Skip to content

Commit 5bd2280

Browse files
committed
other classic methods
1 parent 06c55bd commit 5bd2280

11 files changed

Lines changed: 1017 additions & 67 deletions

File tree

.github/workflows/ci.yml

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,27 @@ jobs:
2828
- name: Install dev dependencies
2929
run: |
3030
uv pip install -e ".[dev]"
31-
uv pip install types-requests pandas-stubs
32-
31+
uv pip install types-requests pandas-stubs deptry vulture
32+
3333
- name: Run ruff linting
3434
run: |
3535
uv run ruff check allocator/ tests/
36-
36+
3737
- name: Run ruff formatting check
3838
run: |
3939
uv run ruff format --check allocator/ tests/
40-
40+
4141
- name: Run mypy type checking
4242
run: |
43-
uv run mypy allocator/ || echo "Type checking has known issues, continuing..."
43+
uv run mypy allocator/
44+
45+
- name: Run deptry (dependency checks)
46+
run: |
47+
uv run deptry allocator/
48+
49+
- name: Run vulture (dead code detection)
50+
run: |
51+
uv run vulture allocator/ allocator/vulture_whitelist.py --min-confidence 80
4452
4553
4654
test:

.pre-commit-config.yaml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
repos:
2+
- repo: https://github.com/astral-sh/ruff-pre-commit
3+
rev: v0.9.10
4+
hooks:
5+
- id: ruff
6+
args: [--fix]
7+
- id: ruff-format
8+
9+
- repo: https://github.com/pre-commit/mirrors-mypy
10+
rev: v1.15.0
11+
hooks:
12+
- id: mypy
13+
additional_dependencies:
14+
- types-requests
15+
- pandas-stubs
16+
args: [--ignore-missing-imports]
17+
18+
- repo: https://github.com/fpgmaas/deptry
19+
rev: 0.23.0
20+
hooks:
21+
- id: deptry
22+
args: [allocator/]
23+
24+
- repo: local
25+
hooks:
26+
- id: vulture
27+
name: vulture
28+
entry: vulture
29+
args: [allocator/, allocator/vulture_whitelist.py, --min-confidence, "80"]
30+
language: system
31+
types: [python]
32+
pass_filenames: false

allocator/__init__.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,25 @@
3232
For more examples: https://geosensing.github.io/allocator/
3333
"""
3434

35+
import warnings
36+
37+
warnings.filterwarnings("ignore", message=".*SwigPyPacked.*")
38+
warnings.filterwarnings("ignore", message=".*SwigPyObject.*")
39+
warnings.filterwarnings("ignore", message=".*swigvarlink.*")
40+
3541
import logging
3642
import sys
3743

3844
# Import modern API
3945
from .api import (
4046
ClusterResult,
4147
ComparisonResult,
48+
ItineraryResult,
4249
RouteResult,
4350
SortResult,
4451
assign_to_closest,
4552
cluster,
53+
create_itineraries,
4654
distance_assignment,
4755
kmeans,
4856
shortest_path,
@@ -75,11 +83,13 @@
7583
# Result types
7684
"ClusterResult",
7785
"ComparisonResult",
86+
"ItineraryResult",
7887
"RouteResult",
7988
"SortResult",
8089
"assign_to_closest",
8190
# Main functions
8291
"cluster",
92+
"create_itineraries",
8393
"distance_assignment",
8494
"euclidean_distance_matrix",
8595
# Distance utilities
@@ -108,7 +118,7 @@
108118
]
109119

110120

111-
def setup_logging(level=logging.INFO):
121+
def setup_logging(level: int = logging.INFO) -> logging.Logger:
112122
"""
113123
Set up logging configuration for the allocator package.
114124
@@ -137,7 +147,7 @@ def setup_logging(level=logging.INFO):
137147
return logger
138148

139149

140-
def get_logger(name):
150+
def get_logger(name: str) -> logging.Logger:
141151
"""
142152
Get a logger instance for a specific module.
143153

allocator/api/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,22 @@
66

77
from .cluster import cluster, kmeans
88
from .distance import assign_to_closest, distance_assignment, sort_by_distance
9+
from .itinerary import create_itineraries
910
from .route import shortest_path, tsp_christofides, tsp_google, tsp_ortools, tsp_osrm
10-
from .types import ClusterResult, ComparisonResult, RouteResult, SortResult
11+
from .types import ClusterResult, ComparisonResult, ItineraryResult, RouteResult, SortResult
1112

1213
__all__ = [
1314
# Result types
1415
"ClusterResult",
1516
"ComparisonResult",
17+
"ItineraryResult",
1618
"RouteResult",
1719
"SortResult",
1820
# Distance assignment methods
1921
"assign_to_closest",
2022
# Main high-level functions
2123
"cluster",
24+
"create_itineraries",
2225
"distance_assignment",
2326
# Specific clustering methods
2427
"kmeans",

allocator/api/itinerary.py

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
"""
2+
API for itinerary generation.
3+
"""
4+
5+
from __future__ import annotations
6+
7+
from typing import Any
8+
9+
import numpy as np
10+
import pandas as pd
11+
12+
from ..core.itinerary import (
13+
greedy_grow_itineraries,
14+
kmeans_tsp_itineraries,
15+
random_partition_itineraries,
16+
round_robin_itineraries,
17+
softmax_greedy_itineraries,
18+
stratified_itineraries,
19+
)
20+
from ..distances import get_distance_matrix
21+
from ..io.data_handler import DataHandler
22+
from .types import BUDGET_METHODS, PARTITION_METHODS, VALID_METHODS, ItineraryResult
23+
24+
25+
def create_itineraries(
26+
data: str | pd.DataFrame | np.ndarray | list[Any],
27+
max_distance: float | None = None,
28+
n_itineraries: int | None = None,
29+
method: str = "greedy_nn",
30+
distance: str = "haversine",
31+
start_method: str = "random",
32+
temperature: float = 0.1,
33+
n_strata: int = 4,
34+
optimize_routes: bool = True,
35+
seed: int | None = None,
36+
**kwargs: Any,
37+
) -> ItineraryResult:
38+
"""
39+
Create multiple itineraries from points with a distance budget per itinerary.
40+
41+
Args:
42+
data: Input data (file path, DataFrame, numpy array, or list)
43+
max_distance: Maximum total distance per itinerary (in meters for haversine/osrm/google).
44+
Required for greedy_nn and softmax_greedy methods.
45+
n_itineraries: Number of itineraries to create. Required for random_partition,
46+
stratified, round_robin, and kmeans_tsp methods.
47+
method: Itinerary generation method:
48+
- "greedy_nn": Greedy nearest-neighbor (default, most efficient)
49+
- "random_partition": Random assignment (theoretical baseline)
50+
- "stratified": Stratified by distance from centroid
51+
- "round_robin": Round-robin assignment
52+
- "softmax_greedy": Greedy with softmax sampling
53+
- "kmeans_tsp": K-means clustering with TSP optimization
54+
distance: Distance metric ('euclidean', 'haversine', 'osrm', 'google')
55+
start_method: How to pick starting point for greedy methods
56+
- "random": Random unvisited point
57+
- "furthest": Point furthest from centroid of remaining points
58+
- "first": First available unvisited point (index order)
59+
temperature: Softmax temperature for softmax_greedy method (default 0.1)
60+
n_strata: Number of strata for stratified method (default 4)
61+
optimize_routes: Whether to TSP-optimize routes for partition methods (default True)
62+
seed: Random seed for reproducibility
63+
**kwargs: Additional arguments for distance calculation:
64+
- api_key: Required for 'google' distance
65+
- osrm_base_url: Custom OSRM server URL
66+
67+
Returns:
68+
ItineraryResult containing:
69+
- itineraries: List of routes (each route is list of point indices)
70+
- distances: Total distance for each itinerary
71+
- data: Original DataFrame with itinerary_id column added
72+
- metadata: Algorithm details
73+
74+
Example:
75+
>>> result = create_itineraries('points.csv', max_distance=20000, method='greedy_nn')
76+
>>> result = create_itineraries('points.csv', n_itineraries=10, method='random_partition')
77+
"""
78+
if method not in VALID_METHODS:
79+
raise ValueError(f"Unknown method: {method}. Use one of {VALID_METHODS}")
80+
81+
if method in BUDGET_METHODS and max_distance is None:
82+
raise ValueError(f"max_distance is required for method '{method}'")
83+
if method in PARTITION_METHODS and n_itineraries is None:
84+
raise ValueError(f"n_itineraries is required for method '{method}'")
85+
86+
df = DataHandler.load_data(data)
87+
88+
if len(df) == 0:
89+
return ItineraryResult(
90+
itineraries=[],
91+
distances=[],
92+
data=df.assign(itinerary_id=[]),
93+
metadata={
94+
"n_points": 0,
95+
"n_itineraries": 0,
96+
"max_distance": max_distance,
97+
"method": method,
98+
"distance": distance,
99+
},
100+
)
101+
102+
points: np.ndarray = df[["longitude", "latitude"]].to_numpy()
103+
distance_matrix = get_distance_matrix(points, points, method=distance, **kwargs)
104+
105+
rng = np.random.default_rng(seed)
106+
107+
itineraries: list[list[int]]
108+
distances: list[float]
109+
110+
if method == "greedy_nn":
111+
itineraries, distances = greedy_grow_itineraries(
112+
distance_matrix,
113+
max_distance=max_distance, # type: ignore[arg-type]
114+
start_method=start_method,
115+
rng=rng,
116+
)
117+
elif method == "random_partition":
118+
itineraries, distances = random_partition_itineraries(
119+
distance_matrix,
120+
n_itineraries=n_itineraries, # type: ignore[arg-type]
121+
optimize_routes=optimize_routes,
122+
rng=rng,
123+
)
124+
elif method == "stratified":
125+
itineraries, distances = stratified_itineraries(
126+
distance_matrix,
127+
points=points,
128+
n_itineraries=n_itineraries, # type: ignore[arg-type]
129+
n_strata=n_strata,
130+
optimize_routes=optimize_routes,
131+
rng=rng,
132+
)
133+
elif method == "round_robin":
134+
itineraries, distances = round_robin_itineraries(
135+
distance_matrix,
136+
n_itineraries=n_itineraries, # type: ignore[arg-type]
137+
optimize_routes=optimize_routes,
138+
rng=rng,
139+
)
140+
elif method == "softmax_greedy":
141+
itineraries, distances = softmax_greedy_itineraries(
142+
distance_matrix,
143+
max_distance=max_distance, # type: ignore[arg-type]
144+
temperature=temperature,
145+
start_method=start_method,
146+
rng=rng,
147+
)
148+
else:
149+
itineraries, distances = kmeans_tsp_itineraries(
150+
distance_matrix,
151+
points=points,
152+
n_itineraries=n_itineraries, # type: ignore[arg-type]
153+
max_distance=max_distance,
154+
rng=rng,
155+
)
156+
157+
itinerary_ids = np.full(len(df), -1, dtype=int)
158+
for itinerary_idx, route in enumerate(itineraries):
159+
for point_idx in route:
160+
itinerary_ids[point_idx] = itinerary_idx
161+
162+
result_df = df.copy()
163+
result_df["itinerary_id"] = itinerary_ids
164+
165+
return ItineraryResult(
166+
itineraries=itineraries,
167+
distances=distances,
168+
data=result_df,
169+
metadata={
170+
"n_points": len(df),
171+
"n_itineraries": len(itineraries),
172+
"max_distance": max_distance,
173+
"n_itineraries_requested": n_itineraries,
174+
"method": method,
175+
"distance": distance,
176+
"start_method": start_method if method in BUDGET_METHODS else None,
177+
"temperature": temperature if method == "softmax_greedy" else None,
178+
"n_strata": n_strata if method == "stratified" else None,
179+
"optimize_routes": optimize_routes if method in PARTITION_METHODS else None,
180+
"seed": seed,
181+
"total_distance": float(sum(distances)) if distances else 0.0,
182+
"avg_distance": float(np.mean(distances)) if distances else 0.0,
183+
"avg_points_per_itinerary": (
184+
float(np.mean([len(it) for it in itineraries])) if itineraries else 0.0
185+
),
186+
},
187+
)

allocator/api/types.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
import numpy as np
1111
import pandas as pd
1212

13+
BUDGET_METHODS = ("greedy_nn", "softmax_greedy")
14+
PARTITION_METHODS = ("random_partition", "stratified", "round_robin", "kmeans_tsp")
15+
VALID_METHODS = BUDGET_METHODS + PARTITION_METHODS
16+
1317

1418
@dataclass
1519
class ClusterResult:
@@ -50,3 +54,13 @@ class ComparisonResult:
5054
results: dict[str, ClusterResult]
5155
statistics: pd.DataFrame
5256
metadata: dict[str, Any]
57+
58+
59+
@dataclass
60+
class ItineraryResult:
61+
"""Result of budget-constrained itinerary generation."""
62+
63+
itineraries: list[list[int]]
64+
distances: list[float]
65+
data: pd.DataFrame
66+
metadata: dict[str, Any]

0 commit comments

Comments
 (0)