|
| 1 | +""" |
| 2 | +API for itinerary generation. |
| 3 | +""" |
| 4 | + |
| 5 | +from __future__ import annotations |
| 6 | + |
| 7 | +from typing import Any |
| 8 | + |
| 9 | +import numpy as np |
| 10 | +import pandas as pd |
| 11 | + |
| 12 | +from ..core.itinerary import ( |
| 13 | + greedy_grow_itineraries, |
| 14 | + kmeans_tsp_itineraries, |
| 15 | + random_partition_itineraries, |
| 16 | + round_robin_itineraries, |
| 17 | + softmax_greedy_itineraries, |
| 18 | + stratified_itineraries, |
| 19 | +) |
| 20 | +from ..distances import get_distance_matrix |
| 21 | +from ..io.data_handler import DataHandler |
| 22 | +from .types import BUDGET_METHODS, PARTITION_METHODS, VALID_METHODS, ItineraryResult |
| 23 | + |
| 24 | + |
| 25 | +def create_itineraries( |
| 26 | + data: str | pd.DataFrame | np.ndarray | list[Any], |
| 27 | + max_distance: float | None = None, |
| 28 | + n_itineraries: int | None = None, |
| 29 | + method: str = "greedy_nn", |
| 30 | + distance: str = "haversine", |
| 31 | + start_method: str = "random", |
| 32 | + temperature: float = 0.1, |
| 33 | + n_strata: int = 4, |
| 34 | + optimize_routes: bool = True, |
| 35 | + seed: int | None = None, |
| 36 | + **kwargs: Any, |
| 37 | +) -> ItineraryResult: |
| 38 | + """ |
| 39 | + Create multiple itineraries from points with a distance budget per itinerary. |
| 40 | +
|
| 41 | + Args: |
| 42 | + data: Input data (file path, DataFrame, numpy array, or list) |
| 43 | + max_distance: Maximum total distance per itinerary (in meters for haversine/osrm/google). |
| 44 | + Required for greedy_nn and softmax_greedy methods. |
| 45 | + n_itineraries: Number of itineraries to create. Required for random_partition, |
| 46 | + stratified, round_robin, and kmeans_tsp methods. |
| 47 | + method: Itinerary generation method: |
| 48 | + - "greedy_nn": Greedy nearest-neighbor (default, most efficient) |
| 49 | + - "random_partition": Random assignment (theoretical baseline) |
| 50 | + - "stratified": Stratified by distance from centroid |
| 51 | + - "round_robin": Round-robin assignment |
| 52 | + - "softmax_greedy": Greedy with softmax sampling |
| 53 | + - "kmeans_tsp": K-means clustering with TSP optimization |
| 54 | + distance: Distance metric ('euclidean', 'haversine', 'osrm', 'google') |
| 55 | + start_method: How to pick starting point for greedy methods |
| 56 | + - "random": Random unvisited point |
| 57 | + - "furthest": Point furthest from centroid of remaining points |
| 58 | + - "first": First available unvisited point (index order) |
| 59 | + temperature: Softmax temperature for softmax_greedy method (default 0.1) |
| 60 | + n_strata: Number of strata for stratified method (default 4) |
| 61 | + optimize_routes: Whether to TSP-optimize routes for partition methods (default True) |
| 62 | + seed: Random seed for reproducibility |
| 63 | + **kwargs: Additional arguments for distance calculation: |
| 64 | + - api_key: Required for 'google' distance |
| 65 | + - osrm_base_url: Custom OSRM server URL |
| 66 | +
|
| 67 | + Returns: |
| 68 | + ItineraryResult containing: |
| 69 | + - itineraries: List of routes (each route is list of point indices) |
| 70 | + - distances: Total distance for each itinerary |
| 71 | + - data: Original DataFrame with itinerary_id column added |
| 72 | + - metadata: Algorithm details |
| 73 | +
|
| 74 | + Example: |
| 75 | + >>> result = create_itineraries('points.csv', max_distance=20000, method='greedy_nn') |
| 76 | + >>> result = create_itineraries('points.csv', n_itineraries=10, method='random_partition') |
| 77 | + """ |
| 78 | + if method not in VALID_METHODS: |
| 79 | + raise ValueError(f"Unknown method: {method}. Use one of {VALID_METHODS}") |
| 80 | + |
| 81 | + if method in BUDGET_METHODS and max_distance is None: |
| 82 | + raise ValueError(f"max_distance is required for method '{method}'") |
| 83 | + if method in PARTITION_METHODS and n_itineraries is None: |
| 84 | + raise ValueError(f"n_itineraries is required for method '{method}'") |
| 85 | + |
| 86 | + df = DataHandler.load_data(data) |
| 87 | + |
| 88 | + if len(df) == 0: |
| 89 | + return ItineraryResult( |
| 90 | + itineraries=[], |
| 91 | + distances=[], |
| 92 | + data=df.assign(itinerary_id=[]), |
| 93 | + metadata={ |
| 94 | + "n_points": 0, |
| 95 | + "n_itineraries": 0, |
| 96 | + "max_distance": max_distance, |
| 97 | + "method": method, |
| 98 | + "distance": distance, |
| 99 | + }, |
| 100 | + ) |
| 101 | + |
| 102 | + points: np.ndarray = df[["longitude", "latitude"]].to_numpy() |
| 103 | + distance_matrix = get_distance_matrix(points, points, method=distance, **kwargs) |
| 104 | + |
| 105 | + rng = np.random.default_rng(seed) |
| 106 | + |
| 107 | + itineraries: list[list[int]] |
| 108 | + distances: list[float] |
| 109 | + |
| 110 | + if method == "greedy_nn": |
| 111 | + itineraries, distances = greedy_grow_itineraries( |
| 112 | + distance_matrix, |
| 113 | + max_distance=max_distance, # type: ignore[arg-type] |
| 114 | + start_method=start_method, |
| 115 | + rng=rng, |
| 116 | + ) |
| 117 | + elif method == "random_partition": |
| 118 | + itineraries, distances = random_partition_itineraries( |
| 119 | + distance_matrix, |
| 120 | + n_itineraries=n_itineraries, # type: ignore[arg-type] |
| 121 | + optimize_routes=optimize_routes, |
| 122 | + rng=rng, |
| 123 | + ) |
| 124 | + elif method == "stratified": |
| 125 | + itineraries, distances = stratified_itineraries( |
| 126 | + distance_matrix, |
| 127 | + points=points, |
| 128 | + n_itineraries=n_itineraries, # type: ignore[arg-type] |
| 129 | + n_strata=n_strata, |
| 130 | + optimize_routes=optimize_routes, |
| 131 | + rng=rng, |
| 132 | + ) |
| 133 | + elif method == "round_robin": |
| 134 | + itineraries, distances = round_robin_itineraries( |
| 135 | + distance_matrix, |
| 136 | + n_itineraries=n_itineraries, # type: ignore[arg-type] |
| 137 | + optimize_routes=optimize_routes, |
| 138 | + rng=rng, |
| 139 | + ) |
| 140 | + elif method == "softmax_greedy": |
| 141 | + itineraries, distances = softmax_greedy_itineraries( |
| 142 | + distance_matrix, |
| 143 | + max_distance=max_distance, # type: ignore[arg-type] |
| 144 | + temperature=temperature, |
| 145 | + start_method=start_method, |
| 146 | + rng=rng, |
| 147 | + ) |
| 148 | + else: |
| 149 | + itineraries, distances = kmeans_tsp_itineraries( |
| 150 | + distance_matrix, |
| 151 | + points=points, |
| 152 | + n_itineraries=n_itineraries, # type: ignore[arg-type] |
| 153 | + max_distance=max_distance, |
| 154 | + rng=rng, |
| 155 | + ) |
| 156 | + |
| 157 | + itinerary_ids = np.full(len(df), -1, dtype=int) |
| 158 | + for itinerary_idx, route in enumerate(itineraries): |
| 159 | + for point_idx in route: |
| 160 | + itinerary_ids[point_idx] = itinerary_idx |
| 161 | + |
| 162 | + result_df = df.copy() |
| 163 | + result_df["itinerary_id"] = itinerary_ids |
| 164 | + |
| 165 | + return ItineraryResult( |
| 166 | + itineraries=itineraries, |
| 167 | + distances=distances, |
| 168 | + data=result_df, |
| 169 | + metadata={ |
| 170 | + "n_points": len(df), |
| 171 | + "n_itineraries": len(itineraries), |
| 172 | + "max_distance": max_distance, |
| 173 | + "n_itineraries_requested": n_itineraries, |
| 174 | + "method": method, |
| 175 | + "distance": distance, |
| 176 | + "start_method": start_method if method in BUDGET_METHODS else None, |
| 177 | + "temperature": temperature if method == "softmax_greedy" else None, |
| 178 | + "n_strata": n_strata if method == "stratified" else None, |
| 179 | + "optimize_routes": optimize_routes if method in PARTITION_METHODS else None, |
| 180 | + "seed": seed, |
| 181 | + "total_distance": float(sum(distances)) if distances else 0.0, |
| 182 | + "avg_distance": float(np.mean(distances)) if distances else 0.0, |
| 183 | + "avg_points_per_itinerary": ( |
| 184 | + float(np.mean([len(it) for it in itineraries])) if itineraries else 0.0 |
| 185 | + ), |
| 186 | + }, |
| 187 | + ) |
0 commit comments