Skip to content

Commit 0019380

Browse files
committed
v.1.2
1 parent 5bd2280 commit 0019380

7 files changed

Lines changed: 434 additions & 6 deletions

File tree

CHANGELOG.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,36 @@
22

33
All notable changes to the allocator project are documented in this file.
44

5+
## [1.2.0] - 2025-04-12
6+
7+
### Changed
8+
9+
**Simplified K-Means Implementation:**
10+
- Removed `CustomKMeans` class - now uses sklearn's `KMeans` directly
11+
- Removed unused helper functions (`initialize_centroids`, `move_centroids`, `_kmeans_cluster_original`)
12+
- The `distance` parameter in cluster API is now stored in metadata only (clustering uses Euclidean)
13+
- Returns sklearn's `inertia_` directly instead of manual calculation
14+
15+
**Exposed Simulation Module:**
16+
- Added simulation exports to main `allocator` package for discoverability
17+
- New top-level exports: `InferenceResult`, `SimulationConfig`, `estimate_mean`, `estimate_proportion`, `generate_binary_outcomes`, `generate_survey_points`, `run_simulation`, `summarize_results`
18+
19+
### Fixed
20+
21+
- Removed duplicated Haversine calculation in `simulation/harness.py` - now uses `allocator.distances.haversine_distance_matrix`
22+
- Fixed import ordering issues flagged by ruff
23+
- Synced version between `pyproject.toml` and `__init__.py`
24+
25+
### Removed
26+
27+
- `CustomKMeans` class (use sklearn's `KMeans` directly)
28+
- `initialize_centroids` function
29+
- `move_centroids` function
30+
- `_kmeans_cluster_original` function
31+
- `_compute_distance_matrix_km` function from simulation harness
32+
33+
---
34+
535
## [1.1.0] - 2024-12-08 🚀
636

737
### ✨ New Features

allocator/viz/plotting.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
Visualization utilities for allocator package.
33
"""
44

5-
from __future__ import annotations
6-
75
import matplotlib.pyplot as plt
86
import numpy as np
97
import pandas as pd
@@ -227,7 +225,7 @@ def plot_clusters_interactive(
227225
centroids: np.ndarray | None = None,
228226
title: str = "Interactive Clustering Results",
229227
save_path: str | None = None,
230-
) -> folium.Map:
228+
) -> "folium.Map":
231229
"""
232230
Create an interactive map visualization of clustering results using folium.
233231
@@ -368,7 +366,7 @@ def plot_route_interactive(
368366
route_geometry: str | None = None,
369367
title: str = "Interactive Route",
370368
save_path: str | None = None,
371-
) -> folium.Map:
369+
) -> "folium.Map":
372370
"""
373371
Create an interactive map visualization of TSP/routing results using folium.
374372
@@ -490,7 +488,7 @@ def plot_route_interactive(
490488

491489

492490
def _add_straight_line_route(
493-
m: folium.Map, route_points: np.ndarray, route_order: list[int]
491+
m: "folium.Map", route_points: np.ndarray, route_order: list[int]
494492
) -> None:
495493
"""Add straight line connections between route points."""
496494
ordered_points = route_points[route_order]

allocator/vulture_whitelist.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Vulture whitelist for false positives
2+
# These are variables/functions that appear unused but are actually needed
3+
4+
# sklearn API compatibility - sample_weight parameter required for fit() signature
5+
sample_weight # noqa

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ module-root = ""
88

99
[project]
1010
name = "allocator"
11-
version = "1.1.0"
11+
version = "1.2.0"
1212
description = "Modern Python package for geographic task allocation, clustering, and routing optimization"
1313
readme = "README.md"
1414
requires-python = ">=3.11"
@@ -168,6 +168,9 @@ ignore = [
168168
"B008", # do not perform function calls in argument defaults
169169
]
170170

171+
[tool.ruff.lint.per-file-ignores]
172+
"allocator/__init__.py" = ["E402", "RUF022"]
173+
171174
[tool.ruff.lint.isort]
172175
known-first-party = ["allocator"]
173176

tests/api/test_itinerary_api.py

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
"""
2+
Tests for the budget-constrained itinerary generation API.
3+
"""
4+
5+
import unittest
6+
7+
import numpy as np
8+
import pandas as pd
9+
10+
from allocator.api import create_itineraries
11+
from allocator.api.types import ItineraryResult
12+
from allocator.core.itinerary import greedy_grow_itineraries
13+
14+
15+
class TestItineraryAPI(unittest.TestCase):
16+
"""Test itinerary API functions."""
17+
18+
def setUp(self):
19+
self.test_points = pd.DataFrame(
20+
{
21+
"longitude": [101.0, 101.1, 101.2, 101.3, 101.4],
22+
"latitude": [13.0, 13.1, 13.0, 13.1, 13.0],
23+
"point_id": ["A", "B", "C", "D", "E"],
24+
}
25+
)
26+
27+
self.array_points = np.array(
28+
[
29+
[101.0, 13.0],
30+
[101.1, 13.1],
31+
[101.2, 13.0],
32+
[101.3, 13.1],
33+
[101.4, 13.0],
34+
]
35+
)
36+
37+
def test_create_itineraries_with_dataframe(self):
38+
result = create_itineraries(
39+
self.test_points,
40+
max_distance=50000,
41+
distance="haversine",
42+
seed=42,
43+
)
44+
45+
self.assertIsInstance(result, ItineraryResult)
46+
self.assertIsInstance(result.itineraries, list)
47+
self.assertIsInstance(result.distances, list)
48+
self.assertEqual(len(result.itineraries), len(result.distances))
49+
self.assertIn("itinerary_id", result.data.columns)
50+
self.assertEqual(len(result.data), len(self.test_points))
51+
52+
def test_create_itineraries_with_numpy(self):
53+
result = create_itineraries(
54+
self.array_points,
55+
max_distance=50000,
56+
distance="haversine",
57+
seed=42,
58+
)
59+
60+
self.assertIsInstance(result, ItineraryResult)
61+
self.assertEqual(len(result.itineraries), len(result.distances))
62+
63+
def test_budget_enforcement(self):
64+
result = create_itineraries(
65+
self.test_points,
66+
max_distance=15000,
67+
distance="haversine",
68+
seed=42,
69+
)
70+
71+
for dist in result.distances:
72+
self.assertLessEqual(dist, 15000)
73+
74+
def test_all_points_assigned(self):
75+
result = create_itineraries(
76+
self.test_points,
77+
max_distance=100000,
78+
distance="haversine",
79+
seed=42,
80+
)
81+
82+
all_points = set()
83+
for route in result.itineraries:
84+
all_points.update(route)
85+
86+
self.assertEqual(all_points, set(range(len(self.test_points))))
87+
88+
def test_small_budget_creates_more_itineraries(self):
89+
result_small = create_itineraries(
90+
self.test_points,
91+
max_distance=5000,
92+
distance="haversine",
93+
seed=42,
94+
)
95+
result_large = create_itineraries(
96+
self.test_points,
97+
max_distance=100000,
98+
distance="haversine",
99+
seed=42,
100+
)
101+
102+
self.assertGreaterEqual(len(result_small.itineraries), len(result_large.itineraries))
103+
104+
def test_start_method_first(self):
105+
result = create_itineraries(
106+
self.test_points,
107+
max_distance=50000,
108+
distance="haversine",
109+
start_method="first",
110+
)
111+
112+
self.assertEqual(result.itineraries[0][0], 0)
113+
114+
def test_start_method_furthest(self):
115+
result = create_itineraries(
116+
self.test_points,
117+
max_distance=50000,
118+
distance="haversine",
119+
start_method="furthest",
120+
)
121+
122+
self.assertIsInstance(result, ItineraryResult)
123+
124+
def test_reproducibility_with_seed(self):
125+
result1 = create_itineraries(
126+
self.test_points,
127+
max_distance=20000,
128+
distance="haversine",
129+
start_method="random",
130+
seed=42,
131+
)
132+
result2 = create_itineraries(
133+
self.test_points,
134+
max_distance=20000,
135+
distance="haversine",
136+
start_method="random",
137+
seed=42,
138+
)
139+
140+
self.assertEqual(result1.itineraries, result2.itineraries)
141+
self.assertEqual(result1.distances, result2.distances)
142+
143+
def test_empty_data(self):
144+
empty_data = pd.DataFrame(columns=["longitude", "latitude"])
145+
146+
result = create_itineraries(
147+
empty_data,
148+
max_distance=10000,
149+
distance="haversine",
150+
)
151+
152+
self.assertEqual(result.itineraries, [])
153+
self.assertEqual(result.distances, [])
154+
self.assertEqual(result.metadata["n_points"], 0)
155+
self.assertEqual(result.metadata["n_itineraries"], 0)
156+
157+
def test_single_point(self):
158+
single_point = pd.DataFrame({"longitude": [101.0], "latitude": [13.0]})
159+
160+
result = create_itineraries(
161+
single_point,
162+
max_distance=10000,
163+
distance="haversine",
164+
)
165+
166+
self.assertEqual(len(result.itineraries), 1)
167+
self.assertEqual(result.itineraries[0], [0])
168+
self.assertEqual(result.distances[0], 0.0)
169+
170+
def test_metadata_populated(self):
171+
result = create_itineraries(
172+
self.test_points,
173+
max_distance=30000,
174+
distance="haversine",
175+
start_method="first",
176+
seed=123,
177+
)
178+
179+
expected_keys = [
180+
"n_points",
181+
"n_itineraries",
182+
"max_distance",
183+
"distance",
184+
"start_method",
185+
"seed",
186+
"avg_distance",
187+
"avg_points_per_itinerary",
188+
]
189+
for key in expected_keys:
190+
self.assertIn(key, result.metadata)
191+
192+
self.assertEqual(result.metadata["n_points"], len(self.test_points))
193+
self.assertEqual(result.metadata["max_distance"], 30000)
194+
self.assertEqual(result.metadata["distance"], "haversine")
195+
self.assertEqual(result.metadata["start_method"], "first")
196+
self.assertEqual(result.metadata["seed"], 123)
197+
198+
def test_euclidean_distance(self):
199+
result = create_itineraries(
200+
self.test_points,
201+
max_distance=0.5,
202+
distance="euclidean",
203+
seed=42,
204+
)
205+
206+
self.assertIsInstance(result, ItineraryResult)
207+
for dist in result.distances:
208+
self.assertLessEqual(dist, 0.5)
209+
210+
211+
class TestGreedyGrowCore(unittest.TestCase):
212+
"""Test core greedy growing algorithm."""
213+
214+
def setUp(self):
215+
self.simple_matrix = np.array(
216+
[
217+
[0, 1, 2, 3],
218+
[1, 0, 1, 2],
219+
[2, 1, 0, 1],
220+
[3, 2, 1, 0],
221+
]
222+
)
223+
224+
def test_greedy_grow_basic(self):
225+
itineraries, _ = greedy_grow_itineraries(
226+
self.simple_matrix,
227+
max_distance=10,
228+
start_method="first",
229+
)
230+
231+
all_points = set()
232+
for route in itineraries:
233+
all_points.update(route)
234+
235+
self.assertEqual(all_points, {0, 1, 2, 3})
236+
237+
def test_greedy_grow_small_budget(self):
238+
_, distances = greedy_grow_itineraries(
239+
self.simple_matrix,
240+
max_distance=1,
241+
start_method="first",
242+
)
243+
244+
for dist in distances:
245+
self.assertLessEqual(dist, 1)
246+
247+
def test_greedy_grow_empty_matrix(self):
248+
empty_matrix = np.array([]).reshape(0, 0)
249+
250+
itineraries, distances = greedy_grow_itineraries(
251+
empty_matrix,
252+
max_distance=10,
253+
start_method="first",
254+
)
255+
256+
self.assertEqual(itineraries, [])
257+
self.assertEqual(distances, [])
258+
259+
def test_greedy_grow_single_point(self):
260+
single_matrix = np.array([[0]])
261+
262+
itineraries, distances = greedy_grow_itineraries(
263+
single_matrix,
264+
max_distance=10,
265+
start_method="first",
266+
)
267+
268+
self.assertEqual(len(itineraries), 1)
269+
self.assertEqual(itineraries[0], [0])
270+
self.assertEqual(distances[0], 0.0)
271+
272+
def test_invalid_start_method(self):
273+
with self.assertRaises(ValueError) as cm:
274+
greedy_grow_itineraries(
275+
self.simple_matrix,
276+
max_distance=10,
277+
start_method="invalid",
278+
)
279+
280+
self.assertIn("Unknown start_method", str(cm.exception))
281+
282+
283+
if __name__ == "__main__":
284+
unittest.main()

tests/stats/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)