Given an input dataset of zone-to-zone travel demand data, convert the origins and destinations to network nodes nearest to each zone's centroid.
import numpy as np
import pandas as pd
import geopandas as gpd
import osmnx as ox
import time
start_time = time.time()
output_file = 'data/pm_peak_nodes.csv' #where to save the final output
taz_travel_demand_file = 'data/pm_peak.csv' #travel demand from zone to zone
taz_shapefile = 'Travel_Analysis_Zones/Travel_Analysis_Zones.shp' #taz geometries
network_file = 'data/bayarea_tertiary_simplified.graphml' #the network we are modeling
# the network that we are modeling
G = ox.load_graphml(network_file, folder='.')
# the TAZ shapefile
gdf = gpd.read_file(taz_shapefile)
gdf.shape
(1454, 6)
# the travel demand data from zone to zone
df = pd.read_csv(taz_travel_demand_file)
df.shape
(86563, 3)
# what are the CRSs of the TAZ shapefile and the network?
original_crs = gdf.crs
print(original_crs)
target_crs = G.graph['crs']
print(target_crs)
{'init': 'epsg:26910'} {'init': 'epsg:4326'}
# project the TAZ geodataframe to the network's CRS
gdf = gdf.to_crs(target_crs)
# calculate TAZ centroids
gdf['centroid'] = gdf.centroid
gdf['centroid'].head()
0 POINT (-121.857633666841 37.28926565704089) 1 POINT (-121.853324889967 37.27757719988241) 2 POINT (-121.8155657776301 37.29462130901247) 3 POINT (-121.8156334258242 37.28067749895624) 4 POINT (-121.8326218122455 37.3119281358813) Name: centroid, dtype: object
# extract lat, lng centroid tuples
points = gdf['centroid'].map(lambda p: (p.y, p.x))
# wrapper function for OSMnx to get network node nearest to some point
def nearest_node(point):
return ox.get_nearest_node(G, point, method='greatcircle', return_dist=True)
%%time
nodes_distances = points.map(nearest_node)
Wall time: 1min 35s
# unzip the nearest nodes and their distances to the centroid into individual columns
nodes, distances = zip(*nodes_distances)
gdf['nearest_node'] = nodes
gdf['nearest_node_distance'] = distances
# display subset of results
gdf[['TAZ1454', 'centroid', 'nearest_node', 'nearest_node_distance']].head()
TAZ1454 | centroid | nearest_node | nearest_node_distance | |
---|---|---|---|---|
0 | 566 | POINT (-121.857633666841 37.28926565704089) | 1105792616 | 205.079915 |
1 | 565 | POINT (-121.853324889967 37.27757719988241) | 1099259828 | 64.051030 |
2 | 573 | POINT (-121.8155657776301 37.29462130901247) | 1097706630 | 277.684824 |
3 | 571 | POINT (-121.8156334258242 37.28067749895624) | 65475266 | 205.125176 |
4 | 576 | POINT (-121.8326218122455 37.3119281358813) | 65555564 | 475.644355 |
# create a dict keyed by zone ID with values of node ID nearest to zone's centroid
zone_nodes = gdf[['TAZ1454', 'nearest_node']].set_index('TAZ1454')['nearest_node'].to_dict()
# make sure we're working with integers
df['DTAZ'] = df['DTAZ'].astype(np.int64)
df['OTAZ'] = df['OTAZ'].astype(np.int64)
df['trips'] = df['trips'].astype(np.int64)
df.head()
OTAZ | DTAZ | trips | |
---|---|---|---|
0 | 1 | 7 | 1 |
1 | 1 | 8 | 2 |
2 | 1 | 9 | 3 |
3 | 1 | 11 | 1 |
4 | 1 | 15 | 2 |
# given a zone, return the node nearest to its centroid
def lookup_nearest_node(zone):
if zone in zone_nodes:
return zone_nodes[zone]
else:
print('ERROR: zone "{}" not found in dict'.format(zone))
# get the origin nodes for all the origin zones
df['orig'] = df['OTAZ'].map(lookup_nearest_node)
# get the destination nodes for all the destination zones
df['dest'] = df['DTAZ'].map(lookup_nearest_node)
# clean up the data into the format LBNL desires
df['dest'] = df['dest'].astype(np.int64)
df['orig'] = df['orig'].astype(np.int64)
df = df.drop(labels=['OTAZ', 'DTAZ'], axis=1)
df = df.reindex(labels=['orig', 'dest', 'trips'], axis=1)
# display a subset of the final data
df.head()
orig | dest | trips | |
---|---|---|---|
0 | 65295278 | 65334120 | 1 |
1 | 65295278 | 65343958 | 2 |
2 | 65295278 | 65352337 | 3 |
3 | 65295278 | 65325032 | 1 |
4 | 65295278 | 65309522 | 2 |
# save out to CSV
df.to_csv(output_file, index=False, encoding='utf-8')
# process is all done, show elapsed time
elapsed_time = time.time() - start_time
elapsed_time
104.20572972297668