Skip to content

Commit 0458a87

Browse files
committed
with random walk
1 parent a0f8e3b commit 0458a87

16 files changed

Lines changed: 1150 additions & 182 deletions

README.md

Lines changed: 56 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,87 @@
1-
# allocator: Efficiently collect data from geographically distributed locations
1+
# allocator
22

33
[![PyPI version](https://img.shields.io/pypi/v/allocator.svg)](https://pypi.python.org/pypi/allocator)
44
[![Downloads](https://pepy.tech/badge/allocator)](https://pepy.tech/project/allocator)
55
[![CI](https://github.com/geosensing/allocator/actions/workflows/ci.yml/badge.svg)](https://github.com/geosensing/allocator/actions/workflows/ci.yml)
66
[![Documentation](https://img.shields.io/badge/docs-github.io-blue)](https://geosensing.github.io/allocator/)
77

8-
**Allocator** provides a modern, Pythonic API for geographic task allocation, clustering, and routing optimization.
8+
Field teams, delivery services, and survey organizations waste time and money on inefficient routes. When you have 100+ locations to visit, manual planning fails. Allocator solves this.
99

10-
## Key Features
10+
## What It Does
1111

12-
- **🎯 Clustering**: Group geographic points into balanced zones
13-
- **🛣️ Routing**: Find optimal paths through locations (TSP solving)
14-
- **📍 Assignment**: Connect points to closest workers/centers
15-
- **🚀 Performance**: Optimized algorithms with NumPy and scikit-learn
16-
- **📦 Modern API**: Clean Python interface + unified CLI
12+
- **Cluster**: Divide locations into balanced work zones
13+
- **Route**: Find the shortest path through locations (TSP)
14+
- **Assign**: Match locations to nearest workers or depots
15+
- **Random Walk**: Generate survey itineraries on road networks
1716

18-
## Quick Start
17+
## Install
1918

2019
```bash
2120
pip install allocator
2221
```
2322

23+
## Python API
24+
25+
### Cluster locations into zones
26+
2427
```python
2528
import allocator
2629
import pandas as pd
2730

28-
# Geographic locations
2931
locations = pd.DataFrame({
30-
'longitude': [100.5018, 100.5065, 100.5108],
31-
'latitude': [13.7563, 13.7590, 13.7633]
32+
'longitude': [100.501, 100.506, 100.510, 100.515, 100.520],
33+
'latitude': [13.756, 13.759, 13.763, 13.768, 13.772]
3234
})
3335

34-
# Group into zones
35-
clusters = allocator.cluster(locations, n_clusters=2)
36+
result = allocator.cluster(locations, n_clusters=2)
37+
print(result.labels) # [0 0 0 1 1]
38+
```
39+
40+
### Find shortest route
41+
42+
```python
43+
route = allocator.shortest_path(locations, method='ortools')
44+
print(route.route) # [0, 1, 2, 4, 3, 0]
45+
```
3646

37-
# Find optimal route
38-
route = allocator.shortest_path(locations)
47+
### Assign to nearest depot
3948

40-
# Assign to service centers
41-
centers = pd.DataFrame({
49+
```python
50+
depots = pd.DataFrame({
4251
'longitude': [100.50, 100.52],
4352
'latitude': [13.75, 13.77]
4453
})
45-
assignments = allocator.assign(locations, centers)
54+
55+
assignments = allocator.assign_to_closest(locations, depots)
56+
print(assignments.data['assigned_worker'].tolist()) # [0, 0, 1, 1, 1]
57+
```
58+
59+
### Generate random walk itineraries
60+
61+
```python
62+
import networkx as nx
63+
64+
# Load road network graph (from OSMnx or similar)
65+
G = nx.read_graphml("road_network.graphml")
66+
67+
result = allocator.random_walk(G, n_walks=10, walk_length_m=5000)
68+
print(result.data) # DataFrame with waypoints
69+
```
70+
71+
## CLI
72+
73+
```bash
74+
allocator cluster kmeans locations.csv -n 5 -o zones.csv
75+
allocator route tsp locations.csv --method ortools -o route.csv
76+
allocator sort locations.csv --workers depots.csv -o assignments.csv
77+
allocator random-walk road_network.graphml -n 10 -l 5000 -o waypoints.csv
4678
```
4779

48-
## Documentation & Examples
80+
## Documentation
4981

50-
- **📖 [Full Documentation](https://geosensing.github.io/allocator/)**
51-
- **🚀 [Installation & Tutorial](https://geosensing.github.io/allocator/quickstart.html)**
52-
- **🔧 [API Reference](https://geosensing.github.io/allocator/api/clustering.html)**
53-
- **💡 [Real-World Examples](https://geosensing.github.io/allocator/examples/overview.html)**
82+
- [Full Documentation](https://geosensing.github.io/allocator/)
83+
- [API Reference](https://geosensing.github.io/allocator/api/clustering.html)
5484

55-
## License & Contributing
85+
## License
5686

57-
MIT License. Contributions welcome - see [Contributing Guide](https://geosensing.github.io/allocator/contributing.html).
87+
MIT

allocator/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,15 @@
4444
ClusterResult,
4545
ComparisonResult,
4646
ItineraryResult,
47+
RandomWalkResult,
4748
RouteResult,
4849
SortResult,
4950
assign_to_closest,
5051
cluster,
5152
create_itineraries,
5253
distance_assignment,
5354
kmeans,
55+
random_walk,
5456
shortest_path,
5557
sort_by_distance,
5658
tsp_christofides,
@@ -76,6 +78,7 @@
7678
"ClusterResult",
7779
"ComparisonResult",
7880
"ItineraryResult",
81+
"RandomWalkResult",
7982
"RouteResult",
8083
"SortResult",
8184
# Main functions
@@ -98,6 +101,8 @@
98101
"plot_clusters",
99102
"plot_comparison",
100103
"plot_route",
104+
# Random walk
105+
"random_walk",
101106
# Logging utilities
102107
"setup_logging",
103108
"shortest_path",

allocator/api/__init__.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,23 @@
77
from .cluster import cluster, kmeans
88
from .distance import assign_to_closest, distance_assignment, sort_by_distance
99
from .itinerary import create_itineraries
10+
from .random_walk import random_walk
1011
from .route import shortest_path, tsp_christofides, tsp_google, tsp_ortools, tsp_osrm
11-
from .types import ClusterResult, ComparisonResult, ItineraryResult, RouteResult, SortResult
12+
from .types import (
13+
ClusterResult,
14+
ComparisonResult,
15+
ItineraryResult,
16+
RandomWalkResult,
17+
RouteResult,
18+
SortResult,
19+
)
1220

1321
__all__ = [
1422
# Result types
1523
"ClusterResult",
1624
"ComparisonResult",
1725
"ItineraryResult",
26+
"RandomWalkResult",
1827
"RouteResult",
1928
"SortResult",
2029
# Distance assignment methods
@@ -25,6 +34,8 @@
2534
"distance_assignment",
2635
# Specific clustering methods
2736
"kmeans",
37+
# Random walk
38+
"random_walk",
2839
"shortest_path",
2940
"sort_by_distance",
3041
# Specific routing methods

allocator/api/random_walk.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
"""
2+
API for random walk itinerary generation on road networks.
3+
"""
4+
5+
from typing import Any
6+
7+
import networkx as nx
8+
import numpy as np
9+
import pandas as pd
10+
11+
from ..core.random_walk import (
12+
generate_walks,
13+
get_largest_connected_component,
14+
validate_graph,
15+
)
16+
from .types import RandomWalkResult
17+
18+
19+
def random_walk(
20+
graph: nx.Graph,
21+
n_walks: int = 15,
22+
walk_length_m: float = 5000.0,
23+
start_points: list[Any] | None = None,
24+
seed: int | None = None,
25+
use_largest_component: bool = True,
26+
) -> RandomWalkResult:
27+
"""
28+
Generate self-weighting random walk itineraries on a road network graph.
29+
30+
Random walks on road networks have a self-weighting property: at each
31+
intersection of degree d, choosing the next edge uniformly (probability 1/d)
32+
ensures that the time-average along the walk converges to a length-weighted
33+
spatial average. This eliminates the need for explicit inclusion probabilities.
34+
35+
Args:
36+
graph: NetworkX graph from OSMnx or geo-sampling. Must have:
37+
- Node attributes: x/y, lon/lat, or longitude/latitude
38+
- Edge attributes: length (in meters)
39+
n_walks: Number of independent walks to generate (default 15)
40+
walk_length_m: Target length of each walk in meters (default 5000.0)
41+
start_points: Optional list of starting node IDs. If provided, walks
42+
cycle through these points (useful for GRTS-selected starting locations).
43+
If None, random nodes are chosen uniformly.
44+
seed: Random seed for reproducibility
45+
use_largest_component: If True (default), use only the largest connected
46+
component of the graph to avoid getting stuck in disconnected regions.
47+
48+
Returns:
49+
RandomWalkResult containing:
50+
- walks: List of walk dicts, each with:
51+
- waypoints: List of (lon, lat, cumulative_distance_m) tuples
52+
- edges_traversed: List of (from_node, to_node, length_m) tuples
53+
- total_distance_m: Actual distance walked
54+
- data: DataFrame with all waypoints:
55+
- walk_id: Walk index
56+
- sequence: Waypoint sequence number within walk
57+
- longitude: Waypoint longitude
58+
- latitude: Waypoint latitude
59+
- cumulative_distance_m: Distance from walk start
60+
- metadata: Dict with:
61+
- n_walks: Number of walks generated
62+
- walk_length_m: Target walk length
63+
- total_network_length_m: Sum of all edge lengths
64+
- n_nodes: Number of nodes in graph
65+
- n_edges: Number of edges in graph
66+
- seed: Random seed used
67+
- avg_actual_distance_m: Mean actual walk distance
68+
- start_points_provided: Whether start_points was provided
69+
70+
Raises:
71+
ValueError: If graph has no valid nodes or edges
72+
73+
Example:
74+
>>> import networkx as nx
75+
>>> import allocator
76+
>>>
77+
>>> # Create a simple test graph
78+
>>> G = nx.Graph()
79+
>>> G.add_node(0, longitude=100.0, latitude=13.0)
80+
>>> G.add_node(1, longitude=100.1, latitude=13.0)
81+
>>> G.add_edge(0, 1, length=1000.0)
82+
>>>
83+
>>> result = allocator.random_walk(G, n_walks=5, walk_length_m=500.0, seed=42)
84+
>>> len(result.walks)
85+
5
86+
"""
87+
validation = validate_graph(graph)
88+
if not validation["valid"]:
89+
raise ValueError(f"Invalid graph: {'; '.join(validation['errors'])}")
90+
91+
working_graph = graph
92+
if use_largest_component:
93+
working_graph = get_largest_connected_component(graph)
94+
if working_graph.number_of_nodes() < graph.number_of_nodes():
95+
validation = validate_graph(working_graph)
96+
97+
rng = np.random.default_rng(seed)
98+
99+
walks = generate_walks(
100+
working_graph,
101+
n_walks=n_walks,
102+
walk_length_m=walk_length_m,
103+
start_points=start_points,
104+
rng=rng,
105+
)
106+
107+
rows = []
108+
for walk_id, walk in enumerate(walks):
109+
for seq, (lon, lat, cum_dist) in enumerate(walk["waypoints"]):
110+
rows.append(
111+
{
112+
"walk_id": walk_id,
113+
"sequence": seq,
114+
"longitude": lon,
115+
"latitude": lat,
116+
"cumulative_distance_m": cum_dist,
117+
}
118+
)
119+
120+
data = pd.DataFrame(rows)
121+
122+
actual_distances = [w["total_distance_m"] for w in walks]
123+
124+
metadata = {
125+
"n_walks": len(walks),
126+
"walk_length_m": walk_length_m,
127+
"total_network_length_m": validation["total_network_length_m"],
128+
"n_nodes": validation["n_nodes"],
129+
"n_edges": validation["n_edges"],
130+
"seed": seed,
131+
"avg_actual_distance_m": float(np.mean(actual_distances)) if walks else 0.0,
132+
"start_points_provided": start_points is not None,
133+
}
134+
135+
return RandomWalkResult(
136+
walks=walks,
137+
data=data,
138+
metadata=metadata,
139+
)

allocator/api/types.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,12 @@ class ItineraryResult:
6262
distances: list[float]
6363
data: pd.DataFrame
6464
metadata: dict[str, Any]
65+
66+
67+
@dataclass
68+
class RandomWalkResult:
69+
"""Result of random walk itinerary generation on road networks."""
70+
71+
walks: list[dict[str, Any]]
72+
data: pd.DataFrame
73+
metadata: dict[str, Any]

allocator/cli/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from .. import __version__
1212
from .cluster_cmd import kmeans
1313
from .itinerary_cmd import itinerary
14+
from .random_walk_cmd import random_walk_cmd
1415
from .route_cmd import christofides, ortools, tsp
1516

1617
console = Console()
@@ -53,6 +54,7 @@ def route() -> None:
5354
route.add_command(christofides)
5455
route.add_command(ortools)
5556
cli.add_command(itinerary)
57+
cli.add_command(random_walk_cmd, name="random-walk")
5658

5759

5860
@cli.command()

0 commit comments

Comments
 (0)