1
+ import logging
2
+ import math
3
+ import json
4
+ import time
5
+ from haversine import haversine , Unit
6
+
7
+ def get_stop_group (route_list , stop_list , stop_seq_mapping , stop_list_grid , stop_id ):
8
+ DISTANCE_THRESHOLD = 50 # in metres
9
+ BEARING_THRESHOLD = 45 # in degrees
10
+ STOP_LIST_LIMIT = 50 # max number of stops in a group
11
+
12
+ def get_stops_haversine_distance (stop_a , stop_b ):
13
+ return haversine (
14
+ (stop_a ['location' ]['lat' ], stop_a ['location' ]['lng' ]),
15
+ (stop_b ['location' ]['lat' ], stop_b ['location' ]['lng' ]),
16
+ unit = Unit .METERS # specify that we want distance in meter, default is km
17
+ )
18
+
19
+ bearing_targets = stop_seq_mapping .get (stop_id , {}).get ('bearings' , [])
20
+
21
+ def is_bearing_in_range (bearing ):
22
+ if BEARING_THRESHOLD >= 180 or not bearing_targets :
23
+ return True
24
+ for target in bearing_targets :
25
+ bearing_min = target - BEARING_THRESHOLD
26
+ bearing_max = target + BEARING_THRESHOLD
27
+ if bearing_min < 0 :
28
+ bearing_min += 360
29
+ if bearing_max > 360 :
30
+ bearing_max -= 360
31
+ if (bearing_min <= bearing <= bearing_max or
32
+ (bearing_min > bearing_max and (bearing <= bearing_max or bearing >= bearing_min ))):
33
+ return True
34
+ return False
35
+
36
+ def search_nearby_stops (target_stop_id , excluded_stop_id_list ):
37
+ target_stop = stop_list [target_stop_id ]
38
+ # take lat/lng up to 3 decimal places, that's about 100m x 100m square
39
+ lat = int (target_stop ['location' ]['lat' ] * 1000 )
40
+ lng = int (target_stop ['location' ]['lng' ] * 1000 )
41
+
42
+ nearby_stops = []
43
+ for stop_id in stop_list_grid .get (f"{ lat } _{ lng } " , []):
44
+ if (stop_id not in excluded_stop_id_list and get_stops_haversine_distance (target_stop , stop_list [stop_id ]) <= DISTANCE_THRESHOLD ):
45
+ bearings = stop_seq_mapping .get (stop_id , {}).get ('bearings' , [])
46
+ if any (is_bearing_in_range (b ) for b in bearings ):
47
+ nearby_stops .append ({
48
+ 'id' : stop_id ,
49
+ 'co' : stop_seq_mapping .get (stop_id , {}).get ('co' , '' )
50
+ })
51
+ return nearby_stops
52
+
53
+ stop_group = []
54
+ stop_list_entries = search_nearby_stops (stop_id , [])
55
+
56
+ # recursively search for nearby stops within thresholds (distance and bearing)
57
+ # stop searching when no new stops are found within range, or when stop list is getting too large
58
+ i = 0
59
+ while i < len (stop_list_entries ):
60
+ entry = stop_list_entries [i ]
61
+ stop_group .append ([entry ['co' ], entry ['id' ]])
62
+ i += 1
63
+ if len (stop_list_entries ) < STOP_LIST_LIMIT :
64
+ stop_list_entries .extend (search_nearby_stops (entry ['id' ], [e ['id' ] for e in stop_list_entries ]))
65
+
66
+ # to reduce size of routeFareList.min.json, excl current stop_id from final output stopMap
67
+ return [stop for stop in stop_group if stop [1 ] != stop_id ]
68
+ # return stop_group
69
+
70
+ def get_bearing (a , b ):
71
+ φ1 = math .radians (a ['lat' ])
72
+ φ2 = math .radians (b ['lat' ])
73
+ λ1 = math .radians (a ['lng' ])
74
+ λ2 = math .radians (b ['lng' ])
75
+
76
+ y = math .sin (λ2 - λ1 ) * math .cos (φ2 )
77
+ x = (math .cos (φ1 ) * math .sin (φ2 ) -
78
+ math .sin (φ1 ) * math .cos (φ2 ) * math .cos (λ2 - λ1 ))
79
+ θ = math .atan2 (y , x )
80
+ brng = (math .degrees (θ ) + 360 ) % 360 # in degrees
81
+ return brng
82
+
83
+ def get_stop_bearings (route_stops ):
84
+ unique_routes = []
85
+ bearings = []
86
+ for route_stop in route_stops :
87
+ if route_stop ['bearing' ] != - 1 :
88
+ unique_route = f"{ route_stop ['co' ]} _{ route_stop ['routeKey' ].split ('+' )[0 ]} _{ route_stop ['bearing' ]} "
89
+ if unique_route not in unique_routes :
90
+ unique_routes .append (unique_route )
91
+ bearings .append (route_stop ['bearing' ])
92
+
93
+ if not bearings :
94
+ return []
95
+
96
+ BEARING_THRESHOLD = 45 # in degrees
97
+ BEARING_EPSILON = 10e-6 # very small number
98
+ bearing_groups = []
99
+
100
+ for bearing in bearings :
101
+ if bearing == - 1 :
102
+ continue
103
+ if not bearing_groups :
104
+ bearing_groups .append ([bearing ])
105
+ continue
106
+
107
+ for group in bearing_groups :
108
+ if any (abs (b - bearing ) < BEARING_EPSILON for b in group ):
109
+ break
110
+ if any (abs (b - bearing ) <= BEARING_THRESHOLD or abs (b - bearing ) >= 360 - BEARING_THRESHOLD for b in group ):
111
+ group .append (bearing )
112
+ break
113
+ else :
114
+ bearing_groups .append ([bearing ])
115
+
116
+ if len (bearing_groups ) == 1 :
117
+ return bearing_groups [0 ]
118
+
119
+ longest_length = max (len (group ) for group in bearing_groups )
120
+ return [b for group in bearing_groups if len (group ) == longest_length for b in group ]
121
+
122
+ # Main function to process stops
123
+ def merge_stop_list ():
124
+ # Read the result from previous pipeline
125
+ with open ('routeFareList.mergeRoutes.min.json' , 'r' , encoding = 'UTF-8' ) as f :
126
+ db = json .load (f )
127
+
128
+ route_list = db ['routeList' ]
129
+ stop_list = db ['stopList' ]
130
+ start_time = time .time ()
131
+ stop_seq_mapping = {}
132
+
133
+ # Preprocess the list of bearings for each stop
134
+ for route_key , route_list_entry in route_list .items ():
135
+ stops = route_list_entry .get ('stops' , {})
136
+ for co , co_stops in stops .items ():
137
+ for stop_pos , stop_id in enumerate (co_stops ):
138
+ if stop_id not in stop_seq_mapping :
139
+ stop_seq_mapping [stop_id ] = {"routeStops" : [], "co" : co , "bearings" : []}
140
+ if stop_pos == len (co_stops ) - 1 :
141
+ stop_seq_mapping [stop_id ]['routeStops' ].append ({
142
+ 'routeKey' : route_key ,
143
+ 'co' : co ,
144
+ 'seq' : stop_pos ,
145
+ 'bearing' : - 1
146
+ })
147
+ else :
148
+ bearing = get_bearing (stop_list [stop_id ]['location' ], stop_list [co_stops [stop_pos + 1 ]]['location' ])
149
+ stop_seq_mapping [stop_id ]['routeStops' ].append ({
150
+ 'routeKey' : route_key ,
151
+ 'co' : co ,
152
+ 'seq' : stop_pos ,
153
+ 'bearing' : bearing
154
+ })
155
+
156
+ for stop_id in stop_seq_mapping .keys ():
157
+ stop_seq_mapping [stop_id ]['bearings' ] = get_stop_bearings (stop_seq_mapping [stop_id ]['routeStops' ])
158
+
159
+ # Just dump the json in case of a need for trouble-shooting, but otherwise we do not need this file
160
+ with open ('stopMap.routeStopsSequence.json' , 'w' , encoding = 'UTF-8' ) as f :
161
+ json .dump (stop_seq_mapping , f )
162
+
163
+ logger .info (f'Processed routeStopsSequence in { (time .time () - start_time ) * 1000 :.2f} ms' )
164
+
165
+ # Preprocess stopList, organise stops into ~100m x ~100m squares to reduce size of nested loop later
166
+ stop_list_grid = {}
167
+ for stop_id , stop in stop_list .items ():
168
+ # take lat/lng up to 3 decimal places, that's about 100m x 100m square
169
+ lat = int (stop ['location' ]['lat' ] * 1000 )
170
+ lng = int (stop ['location' ]['lng' ] * 1000 )
171
+ # add stop into the 9 grid boxes surrounding this stop
172
+ grid = [
173
+ f"{ lat - 1 } _{ lng - 1 } " ,
174
+ f"{ lat } _{ lng - 1 } " ,
175
+ f"{ lat + 1 } _{ lng - 1 } " ,
176
+ f"{ lat - 1 } _{ lng } " ,
177
+ f"{ lat } _{ lng } " ,
178
+ f"{ lat + 1 } _{ lng } " ,
179
+ f"{ lat - 1 } _{ lng + 1 } " ,
180
+ f"{ lat } _{ lng + 1 } " ,
181
+ f"{ lat + 1 } _{ lng + 1 } " ,
182
+ ]
183
+ for grid_id in grid :
184
+ if grid_id not in stop_list_grid :
185
+ stop_list_grid [grid_id ] = []
186
+ stop_list_grid [grid_id ].append (stop_id )
187
+
188
+ target_stop_list = list (stop_list .items ())
189
+ stop_map = {}
190
+ count = 0
191
+ group_count = 0
192
+
193
+ for stop_id , stop in target_stop_list :
194
+ count += 1
195
+ # if count % 1000 == 0:
196
+ # logger.info(f"Processed {count} stops ({group_count} groups) at {(time.time() - start_time) * 1000:.2f}ms")
197
+
198
+ stop_group = get_stop_group (route_list , stop_list , stop_seq_mapping , stop_list_grid , stop_id )
199
+ if len (stop_group ) > 0 :
200
+ group_count += 1
201
+ stop_map [stop_id ] = stop_group
202
+
203
+ logger .info (f"Processed { count } stops ({ group_count } groups) at { (time .time () - start_time ) * 1000 :.2f} ms" )
204
+
205
+ with open ('stopMap.json' , 'w' , encoding = 'UTF-8' ) as f :
206
+ json .dump (stop_map , f , indent = 4 )
207
+
208
+ db ['stopMap' ] = stop_map
209
+
210
+ with open ('routeFareList.json' , 'w' , encoding = 'UTF-8' ) as f :
211
+ json .dump (db , f , indent = 4 )
212
+
213
+ # reduce size of routeFareList.min.json by rounding lat/lng values to 5 decimal places
214
+ # 5 d.p. is roughly one-metre accuracy, it is good enough for this project
215
+ # saves around 50kb in size for 14,000 stops
216
+ for stop_id , stop in target_stop_list :
217
+ stop_list [stop_id ]['location' ]['lat' ] = float ('%.5f' % (stop_list [stop_id ]['location' ]['lat' ]))
218
+ stop_list [stop_id ]['location' ]['lng' ] = float ('%.5f' % (stop_list [stop_id ]['location' ]['lng' ]))
219
+
220
+ db ['stopList' ] = stop_list
221
+
222
+ logger .info (f"Reduced location lat/lng to 5 d.p. at { (time .time () - start_time ) * 1000 :.2f} ms" )
223
+
224
+ with open ('routeFareList.alpha.json' , 'w' , encoding = 'UTF-8' ) as f :
225
+ json .dump (db , f , indent = 4 )
226
+
227
+ with open ('routeFareList.min.json' , 'w' , encoding = 'UTF-8' ) as f :
228
+ json .dump (db , f )
229
+
230
+ with open ('routeFareList.alpha.min.json' , 'w' , encoding = 'UTF-8' ) as f :
231
+ json .dump (db , f )
232
+
233
+ if __name__ == "__main__" :
234
+ logging .basicConfig (level = logging .INFO )
235
+ logger = logging .getLogger (__name__ )
236
+ merge_stop_list ()
0 commit comments