-
-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Switch to use async emitRequest for lrtfeeder, mtr and parseGtfs
- Loading branch information
Showing
4 changed files
with
377 additions
and
331 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,62 +1,75 @@ | ||
# -*- coding: utf-8 -*- | ||
# MTR Bus fetching | ||
|
||
import asyncio | ||
import csv | ||
import requests | ||
import json | ||
|
||
routeList = {} | ||
stopList = {} | ||
|
||
r = requests.get('https://opendata.mtr.com.hk/data/mtr_bus_routes.csv') | ||
r.encoding = 'utf-8' | ||
reader = csv.reader(r.text.split("\n") ) | ||
headers = next(reader,None) | ||
routes = [route for route in reader if len(route) == 4] | ||
for [route, chn, eng, circular] in routes: | ||
if route == '': | ||
continue | ||
start = { | ||
"zh": chn.split('至')[0], | ||
"en": eng.split(' to ')[0] | ||
} | ||
end = { | ||
"zh": chn.split('至')[1], | ||
"en": eng.split(' to ')[1] | ||
} | ||
for bound in ['I', 'O']: | ||
routeList[route+"_"+bound] = { | ||
"route": route, | ||
"bound": bound, | ||
"service_type": "1", | ||
"orig_tc": start['zh'] if bound == 'O' else end['zh'], | ||
"dest_tc": end["zh"] if bound == 'O' else start['zh'], | ||
"orig_en": start['en'] if bound == 'O' else end['en'], | ||
"dest_en": end["en"] if bound == 'O' else start['en'], | ||
"stops": [] | ||
import logging | ||
import httpx | ||
|
||
from crawl_utils import emitRequest | ||
|
||
async def getRouteStop(co = 'lrtfeeder'): | ||
a_client = httpx.AsyncClient(timeout=httpx.Timeout(30.0, pool=None)) | ||
routeList = {} | ||
stopList = {} | ||
|
||
r = await emitRequest('https://opendata.mtr.com.hk/data/mtr_bus_routes.csv', a_client) | ||
r.encoding = 'utf-8' | ||
reader = csv.reader(r.text.split("\n") ) | ||
headers = next(reader,None) | ||
routes = [route for route in reader if len(route) == 4] | ||
for [route, chn, eng, circular] in routes: | ||
if route == '': | ||
continue | ||
start = { | ||
"zh": chn.split('至')[0], | ||
"en": eng.split(' to ')[0] | ||
} | ||
end = { | ||
"zh": chn.split('至')[1], | ||
"en": eng.split(' to ')[1] | ||
} | ||
for bound in ['I', 'O']: | ||
routeList[route+"_"+bound] = { | ||
"route": route, | ||
"bound": bound, | ||
"service_type": "1", | ||
"orig_tc": start['zh'] if bound == 'O' else end['zh'], | ||
"dest_tc": end["zh"] if bound == 'O' else start['zh'], | ||
"orig_en": start['en'] if bound == 'O' else end['en'], | ||
"dest_en": end["en"] if bound == 'O' else start['en'], | ||
"stops": [] | ||
} | ||
|
||
# Parse stops | ||
r = await emitRequest('https://opendata.mtr.com.hk/data/mtr_bus_stops.csv', a_client) | ||
r.encoding = 'utf-8' | ||
reader = csv.reader(r.text.split("\n") ) | ||
headers = next(reader,None) | ||
stops = [stop for stop in reader if len(stop) == 8] | ||
for [route, bound, seq, stationId, lat, lng, name_zh, name_en] in stops: | ||
routeKey = route+"_"+bound | ||
if routeKey in routeList: | ||
routeList[routeKey]['stops'].append(stationId) | ||
else: | ||
print ("error", routeKey) | ||
stopList[stationId] = { | ||
"stop": stationId, | ||
"name_en": name_en, | ||
"name_tc": name_zh, | ||
"lat": lat, | ||
"long": lng | ||
} | ||
|
||
with open('routeList.lrtfeeder.json', 'w', encoding='UTF-8') as f: | ||
f.write(json.dumps([route for route in routeList.values() if len(route['stops']) > 0], ensure_ascii=False)) | ||
with open('stopList.lrtfeeder.json', 'w', encoding='UTF-8') as f: | ||
f.write(json.dumps(stopList, ensure_ascii=False)) | ||
|
||
# Parse stops | ||
r = requests.get('https://opendata.mtr.com.hk/data/mtr_bus_stops.csv') | ||
r.encoding = 'utf-8' | ||
reader = csv.reader(r.text.split("\n") ) | ||
headers = next(reader,None) | ||
stops = [stop for stop in reader if len(stop) == 8] | ||
for [route, bound, seq, stationId, lat, lng, name_zh, name_en] in stops: | ||
routeKey = route+"_"+bound | ||
if routeKey in routeList: | ||
routeList[routeKey]['stops'].append(stationId) | ||
else: | ||
print ("error", routeKey) | ||
stopList[stationId] = { | ||
"stop": stationId, | ||
"name_en": name_en, | ||
"name_tc": name_zh, | ||
"lat": lat, | ||
"long": lng | ||
} | ||
|
||
with open('routeList.lrtfeeder.json', 'w', encoding='UTF-8') as f: | ||
f.write(json.dumps([route for route in routeList.values() if len(route['stops']) > 0], ensure_ascii=False)) | ||
with open('stopList.lrtfeeder.json', 'w', encoding='UTF-8') as f: | ||
f.write(json.dumps(stopList, ensure_ascii=False)) | ||
if __name__=='__main__': | ||
logging.basicConfig(level=logging.INFO) | ||
logging.getLogger('httpx').setLevel(logging.WARNING) | ||
logger = logging.getLogger(__name__) | ||
asyncio.run(getRouteStop()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,59 +1,72 @@ | ||
# -*- coding: utf-8 -*- | ||
# MTR Bus fetching | ||
|
||
import asyncio | ||
import csv | ||
import requests | ||
import json | ||
from pyproj import Transformer | ||
|
||
epsgTransformer = Transformer.from_crs('epsg:2326', 'epsg:4326') | ||
|
||
routeList = {} | ||
stopList = {} | ||
|
||
r = requests.get('https://opendata.mtr.com.hk/data/mtr_lines_and_stations.csv') | ||
r.encoding = 'utf-8' | ||
reader = csv.reader(r.text.split("\n") ) | ||
headers = next(reader,None) | ||
routes = [route for route in reader if len(route) == 7] | ||
for [route, bound, stopCode, stopId, chn, eng, seq] in routes: | ||
if route == "": | ||
continue | ||
if route+"_"+bound not in routeList: | ||
routeList[route+"_"+bound] = { | ||
"gtfsId": None, | ||
"route": route, | ||
"bound": bound, | ||
"service_type": "1", | ||
"orig_tc": None, | ||
"orig_en": None, | ||
"dest_tc": None, | ||
"dest_en": None, | ||
"stops": [None] * 100, | ||
"fare": [] | ||
} | ||
if int(float(seq)) == 1: | ||
routeList[route+"_"+bound]["orig_tc"] = chn | ||
routeList[route+"_"+bound]["orig_en"] = eng | ||
routeList[route+"_"+bound]["dest_tc"] = chn | ||
routeList[route+"_"+bound]["dest_en"] = eng | ||
routeList[route+"_"+bound]["stops"][int(float(seq))] = stopCode | ||
if stopCode not in stopList: | ||
r = requests.get('https://geodata.gov.hk/gs/api/v1.0.0/locationSearch?q=港鐵'+chn+"站", headers={'Accept': 'application/json'}) | ||
lat, lng = epsgTransformer.transform( r.json()[0]['y'], r.json()[0]['x'] ) | ||
stopList[stopCode] = { | ||
"stop": stopCode, | ||
"name_en": eng, | ||
"name_tc": chn, | ||
"lat": lat, | ||
"long": lng | ||
} | ||
import logging | ||
import httpx | ||
|
||
from crawl_utils import emitRequest | ||
|
||
def filterStops(route): | ||
route['stops'] = [stop for stop in route['stops'] if stop is not None] | ||
return route | ||
|
||
with open('routeList.mtr.json', 'w', encoding='UTF-8') as f: | ||
f.write(json.dumps(list(map(filterStops, [route for route in routeList.values() if len(route['stops']) > 0])), ensure_ascii=False)) | ||
with open('stopList.mtr.json', 'w', encoding='UTF-8') as f: | ||
f.write(json.dumps(stopList, ensure_ascii=False)) | ||
async def getRouteStop(co = 'mtr'): | ||
a_client = httpx.AsyncClient(timeout=httpx.Timeout(30.0, pool=None)) | ||
epsgTransformer = Transformer.from_crs('epsg:2326', 'epsg:4326') | ||
|
||
routeList = {} | ||
stopList = {} | ||
|
||
r = await emitRequest('https://opendata.mtr.com.hk/data/mtr_lines_and_stations.csv', a_client) | ||
r.encoding = 'utf-8' | ||
reader = csv.reader(r.text.split("\n") ) | ||
headers = next(reader,None) | ||
routes = [route for route in reader if len(route) == 7] | ||
for [route, bound, stopCode, stopId, chn, eng, seq] in routes: | ||
if route == "": | ||
continue | ||
if route+"_"+bound not in routeList: | ||
routeList[route+"_"+bound] = { | ||
"gtfsId": None, | ||
"route": route, | ||
"bound": bound, | ||
"service_type": "1", | ||
"orig_tc": None, | ||
"orig_en": None, | ||
"dest_tc": None, | ||
"dest_en": None, | ||
"stops": [None] * 100, | ||
"fare": [] | ||
} | ||
if int(float(seq)) == 1: | ||
routeList[route+"_"+bound]["orig_tc"] = chn | ||
routeList[route+"_"+bound]["orig_en"] = eng | ||
routeList[route+"_"+bound]["dest_tc"] = chn | ||
routeList[route+"_"+bound]["dest_en"] = eng | ||
routeList[route+"_"+bound]["stops"][int(float(seq))] = stopCode | ||
if stopCode not in stopList: | ||
r = await emitRequest('https://geodata.gov.hk/gs/api/v1.0.0/locationSearch?q=港鐵'+chn+"站", a_client, headers={'Accept': 'application/json'}) | ||
lat, lng = epsgTransformer.transform( r.json()[0]['y'], r.json()[0]['x'] ) | ||
stopList[stopCode] = { | ||
"stop": stopCode, | ||
"name_en": eng, | ||
"name_tc": chn, | ||
"lat": lat, | ||
"long": lng | ||
} | ||
|
||
with open('routeList.mtr.json', 'w', encoding='UTF-8') as f: | ||
f.write(json.dumps(list(map(filterStops, [route for route in routeList.values() if len(route['stops']) > 0])), ensure_ascii=False)) | ||
with open('stopList.mtr.json', 'w', encoding='UTF-8') as f: | ||
f.write(json.dumps(stopList, ensure_ascii=False)) | ||
|
||
if __name__=='__main__': | ||
logging.basicConfig(level=logging.INFO) | ||
logging.getLogger('httpx').setLevel(logging.WARNING) | ||
logger = logging.getLogger(__name__) | ||
asyncio.run(getRouteStop()) |
Oops, something went wrong.