import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.ticker import FuncFormatter
from mpl_toolkits.axes_grid1 import make_axes_locatable
import seaborn as sns
import geopandas as gpd
import contextily as ctx
plt.rcParams['savefig.facecolor'] = 'white'
%matplotlib inline
Printing verions of Python modules and packages with watermark - the IPython magic extension.
Documention for installing watermark: https://github.com/rasbt/watermark
%reload_ext watermark
%watermark -v -p numpy,pandas,geopandas,matplotlib,matplotlib,seaborn,contextily
Python implementation: CPython Python version : 3.11.0 IPython version : 8.6.0 numpy : 1.23.4 pandas : 1.5.1 geopandas : 0.12.1 matplotlib: 3.6.2 seaborn : 0.12.1 contextily: 1.2.0
# list items in data folder
%ls data/
README.md streets-clipped.gpkg street-flooding-complaints-cleaned.csv streets.gpkg street-flooding-complaints.csv water-main-breaks.csv
# read data as a dataframe
path = 'data/street-flooding-complaints.csv'
df = pd.read_csv(path, low_memory=False)
# preview data
print(f'shape of data: {df.shape}')
df.head()
shape of data: (25747, 32)
unique_key | created_date | closed_date | agency | agency_name | complaint_type | descriptor | cross_street_1 | cross_street_2 | intersection_street_1 | ... | city | x_coordinate_state_plane | y_coordinate_state_plane | latitude | longitude | location | incident_address | street_name | bbl | due_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 18265181 | 2010-07-14T08:38:00.000 | 2010-07-14T08:38:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | PELHAM PKWY | STILLWELL AVE | PELHAM PKWY | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | 34783066 | 2016-11-15T09:27:00.000 | 2016-11-15T10:05:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | NaN | NaN | LAFAYETTE AVENUE | ... | STATEN ISLAND | 958594.0 | 170855.0 | 40.635597 | -74.092438 | {'latitude': '40.635596930697716', 'longitude'... | NaN | NaN | NaN | NaN |
2 | 21549616 | 2011-09-29T10:34:00.000 | 2011-09-30T10:40:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | NaN | NaN | THURSBY AVE | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | 35839080 | 2017-03-31T20:24:00.000 | 2017-04-01T02:25:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | 3 AVENUE | 2 AVENUE | NaN | ... | NEW YORK | NaN | NaN | NaN | NaN | NaN | EAST 106 STREET | EAST 106 STREET | NaN | NaN |
4 | 29443390 | 2014-12-06T10:23:00.000 | 2014-12-06T11:30:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | NAGLE AVE | DYCKMAN ST | NAGLE AVE | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 32 columns
# column info
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 25747 entries, 0 to 25746 Data columns (total 32 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 unique_key 25747 non-null int64 1 created_date 25747 non-null object 2 closed_date 25746 non-null object 3 agency 25747 non-null object 4 agency_name 25747 non-null object 5 complaint_type 25747 non-null object 6 descriptor 25747 non-null object 7 cross_street_1 22472 non-null object 8 cross_street_2 22464 non-null object 9 intersection_street_1 9616 non-null object 10 intersection_street_2 9616 non-null object 11 address_type 25741 non-null object 12 facility_type 0 non-null float64 13 status 25747 non-null object 14 resolution_description 25743 non-null object 15 resolution_action_updated_date 25747 non-null object 16 community_board 25747 non-null object 17 borough 25747 non-null object 18 open_data_channel_type 25747 non-null object 19 park_facility_name 25747 non-null object 20 park_borough 25747 non-null object 21 incident_zip 24899 non-null float64 22 city 24901 non-null object 23 x_coordinate_state_plane 24817 non-null float64 24 y_coordinate_state_plane 24817 non-null float64 25 latitude 24817 non-null float64 26 longitude 24817 non-null float64 27 location 24817 non-null object 28 incident_address 16188 non-null object 29 street_name 16188 non-null object 30 bbl 14603 non-null float64 31 due_date 1 non-null object dtypes: float64(7), int64(1), object(24) memory usage: 6.3+ MB
# importing nta boundaries
url = 'https://data.cityofnewyork.us/resource/9nt8-h7nd.geojson'
nta_gdf = gpd.read_file(url).to_crs(epsg=2263)
# full name
nta_gdf['ntaname_full'] = nta_gdf['ntaname'] + ', ' + nta_gdf['boroname']
# previewing first five rows in data
print(f'shape of data: {nta_gdf.shape}')
nta_gdf.head()
shape of data: (262, 13)
shape_area | ntaname | cdtaname | shape_leng | boroname | ntatype | nta2020 | borocode | countyfips | ntaabbrev | cdta2020 | geometry | ntaname_full | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 35321204.8204 | Greenpoint | BK01 Williamsburg-Greenpoint (CD 1 Equivalent) | 28912.5653122 | Brooklyn | 0 | BK0101 | 3 | 047 | Grnpt | BK01 | MULTIPOLYGON (((1003059.973 204572.243, 100299... | Greenpoint, Brooklyn |
1 | 28854314.555 | Williamsburg | BK01 Williamsburg-Greenpoint (CD 1 Equivalent) | 28098.0267744 | Brooklyn | 0 | BK0102 | 3 | 047 | Wllmsbrg | BK01 | MULTIPOLYGON (((995851.880 203199.535, 995969.... | Williamsburg, Brooklyn |
2 | 15208960.44 | South Williamsburg | BK01 Williamsburg-Greenpoint (CD 1 Equivalent) | 18250.2804159 | Brooklyn | 0 | BK0103 | 3 | 047 | SWllmsbrg | BK01 | MULTIPOLYGON (((998047.189 196303.521, 998157.... | South Williamsburg, Brooklyn |
3 | 52266209.4439 | East Williamsburg | BK01 Williamsburg-Greenpoint (CD 1 Equivalent) | 43184.773814 | Brooklyn | 0 | BK0104 | 3 | 047 | EWllmsbrg | BK01 | MULTIPOLYGON (((1005302.485 199455.944, 100530... | East Williamsburg, Brooklyn |
4 | 9982321.73877 | Brooklyn Heights | BK02 Downtown Brooklyn-Fort Greene (CD 2 Appro... | 14312.506134 | Brooklyn | 0 | BK0201 | 3 | 047 | BkHts | BK02 | MULTIPOLYGON (((986737.292 194249.956, 986678.... | Brooklyn Heights, Brooklyn |
# sanity check
nta_gdf.plot()
<AxesSubplot: >
path = 'data/streets.gpkg'
streets = gpd.read_file(path)
# sanity checks
print(f'shape of data: {streets.shape}')
print(f"street id is unique: {streets['physicalid'].is_unique}")
print(streets.crs)
streets.head()
shape of data: (99324, 12) street id is unique: True epsg:2263
physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3 | BATTERY PL | BATTERY | BATTERY PL | 1 | Street | 42.0 | 13 | 13 | 1 | 262.77781228 | MULTILINESTRING ((979278.595 196555.690, 97929... |
1 | 5 | BATTERY PL | BATTERY | BATTERY PL | 1 | Street | 42.0 | 13 | 13 | 1 | 259.415988519 | MULTILINESTRING ((979377.413 196797.951, 97950... |
2 | 6 | BATTERY PL | BATTERY | BATTERY PL | 1 | Street | 42.0 | 13 | 13 | 1 | 280.444780871 | MULTILINESTRING ((979503.289 197024.782, 97964... |
3 | 8 | BATTERY PL | BATTERY | BATTERY PL | 1 | Street | 42.0 | 13 | 13 | 1 | 32.0701391509 | MULTILINESTRING ((979553.746 196059.826, 97952... |
4 | 14 | BATTERY PL | BATTERY | BATTERY PL | 1 | Street | 24.0 | 13 | 13 | 1 | 206.27185039 | MULTILINESTRING ((980288.092 195963.182, 98026... |
# column info
streets.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 99324 entries, 0 to 99323 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 physicalid 99324 non-null object 1 st_label 99324 non-null object 2 st_name 99324 non-null object 3 full_stree 99324 non-null object 4 rw_type 99324 non-null object 5 rw_type_name 99324 non-null object 6 st_width 99324 non-null object 7 frm_lvl_co 99324 non-null object 8 to_lvl_co 99324 non-null object 9 borocode 99324 non-null object 10 shape_leng 99324 non-null object 11 geometry 99324 non-null geometry dtypes: geometry(1), object(11) memory usage: 9.1+ MB
# sanity check, examine counts of geom types
streets.geom_type.value_counts()
MultiLineString 99324 dtype: int64
# sanity check plot
streets.plot()
<AxesSubplot: >
crs=2263
geometry = gpd.points_from_xy(
df['x_coordinate_state_plane'],
df['y_coordinate_state_plane']
)
# make geodataframe form x,y points
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs=crs)
# spatial join with nta information
gdf = gpd.sjoin(
gdf,
nta_gdf,
how="inner",
predicate='within'
)
print('shape of data: {}'.format(gdf.shape))
gdf.head()
shape of data: (24814, 46)
unique_key | created_date | closed_date | agency | agency_name | complaint_type | descriptor | cross_street_1 | cross_street_2 | intersection_street_1 | ... | cdtaname | shape_leng | boroname | ntatype | nta2020 | borocode | countyfips | ntaabbrev | cdta2020 | ntaname_full | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 34783066 | 2016-11-15T09:27:00.000 | 2016-11-15T10:05:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | NaN | NaN | LAFAYETTE AVENUE | ... | SI01 North Shore (CD 1 Equivalent) | 31943.5246384 | Staten Island | 0 | SI0101 | 5 | 085 | StGrg | SI01 | St. George-New Brighton, Staten Island |
994 | 16585559 | 2010-05-04T09:20:00.000 | 2010-05-08T09:00:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | ST PETER'S PL | BEND | NaN | ... | SI01 North Shore (CD 1 Equivalent) | 31943.5246384 | Staten Island | 0 | SI0101 | 5 | 085 | StGrg | SI01 | St. George-New Brighton, Staten Island |
1335 | 18255249 | 2010-07-13T13:20:00.000 | 2010-07-13T15:10:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | ST PETER'S PL | BEND | NaN | ... | SI01 North Shore (CD 1 Equivalent) | 31943.5246384 | Staten Island | 0 | SI0101 | 5 | 085 | StGrg | SI01 | St. George-New Brighton, Staten Island |
1422 | 18380954 | 2010-07-30T11:08:00.000 | 2010-07-30T11:20:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | DANIEL LOW TER | BEND | NaN | ... | SI01 North Shore (CD 1 Equivalent) | 31943.5246384 | Staten Island | 0 | SI0101 | 5 | 085 | StGrg | SI01 | St. George-New Brighton, Staten Island |
1603 | 18449016 | 2010-08-09T13:49:00.000 | 2010-08-09T14:30:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | ST PETER'S PL | BEND | NaN | ... | SI01 North Shore (CD 1 Equivalent) | 31943.5246384 | Staten Island | 0 | SI0101 | 5 | 085 | StGrg | SI01 | St. George-New Brighton, Staten Island |
5 rows × 46 columns
(gdf
.groupby(by='ntaname_full')['unique_key']
.count()
.rename('count')
.reset_index()
.sort_values('count', ascending=False)
.head(10)
)
ntaname_full | count | |
---|---|---|
155 | New Dorp-Midland Beach, Staten Island | 913 |
184 | Rockaway Beach-Arverne-Edgemere, Queens | 656 |
96 | Great Kills-Eltingville, Staten Island | 630 |
72 | Far Rockaway-Bayswater, Queens | 607 |
108 | Howard Beach-Lindenwood, Queens | 566 |
200 | St. Albans, Queens | 553 |
186 | Rosedale, Queens | 437 |
91 | Grasmere-Arrochar-South Beach-Dongan Hills, St... | 437 |
20 | Borough Park, Brooklyn | 434 |
160 | Oakwood-Richmondtown, Staten Island | 396 |
Let's select the top two neighborhoods to examine as case studies.
crs = 2263
geometry = gpd.points_from_xy(
df['x_coordinate_state_plane'],
df['y_coordinate_state_plane']
)
# make geodataframe from x,y points
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs=crs)
# spatial join with nta information
gdf = gpd.sjoin(
gdf,
nta_gdf,
how="inner",
predicate='within'
)
print('shape of data: {}'.format(gdf.shape))
gdf.head()
shape of data: (24814, 46)
unique_key | created_date | closed_date | agency | agency_name | complaint_type | descriptor | cross_street_1 | cross_street_2 | intersection_street_1 | ... | cdtaname | shape_leng | boroname | ntatype | nta2020 | borocode | countyfips | ntaabbrev | cdta2020 | ntaname_full | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 34783066 | 2016-11-15T09:27:00.000 | 2016-11-15T10:05:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | NaN | NaN | LAFAYETTE AVENUE | ... | SI01 North Shore (CD 1 Equivalent) | 31943.5246384 | Staten Island | 0 | SI0101 | 5 | 085 | StGrg | SI01 | St. George-New Brighton, Staten Island |
994 | 16585559 | 2010-05-04T09:20:00.000 | 2010-05-08T09:00:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | ST PETER'S PL | BEND | NaN | ... | SI01 North Shore (CD 1 Equivalent) | 31943.5246384 | Staten Island | 0 | SI0101 | 5 | 085 | StGrg | SI01 | St. George-New Brighton, Staten Island |
1335 | 18255249 | 2010-07-13T13:20:00.000 | 2010-07-13T15:10:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | ST PETER'S PL | BEND | NaN | ... | SI01 North Shore (CD 1 Equivalent) | 31943.5246384 | Staten Island | 0 | SI0101 | 5 | 085 | StGrg | SI01 | St. George-New Brighton, Staten Island |
1422 | 18380954 | 2010-07-30T11:08:00.000 | 2010-07-30T11:20:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | DANIEL LOW TER | BEND | NaN | ... | SI01 North Shore (CD 1 Equivalent) | 31943.5246384 | Staten Island | 0 | SI0101 | 5 | 085 | StGrg | SI01 | St. George-New Brighton, Staten Island |
1603 | 18449016 | 2010-08-09T13:49:00.000 | 2010-08-09T14:30:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | ST PETER'S PL | BEND | NaN | ... | SI01 North Shore (CD 1 Equivalent) | 31943.5246384 | Staten Island | 0 | SI0101 | 5 | 085 | StGrg | SI01 | St. George-New Brighton, Staten Island |
5 rows × 46 columns
# locate midland beach nta shape
name = 'New Dorp-Midland Beach, Staten Island'
midland_beach_shape = (
nta_gdf
.loc[nta_gdf['ntaname_full'] == name]
.reset_index(drop=True)
)
# preview data
midland_beach_shape.head()
shape_area | ntaname | cdtaname | shape_leng | boroname | ntatype | nta2020 | borocode | countyfips | ntaabbrev | cdta2020 | geometry | ntaname_full | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 62182628.8093 | New Dorp-Midland Beach | SI02 Mid-Island (CD 2 Approximation) | 50207.4527328 | Staten Island | 0 | SI0202 | 5 | 085 | NwDrp_MBch | SI02 | MULTIPOLYGON (((959003.673 152441.505, 959495.... | New Dorp-Midland Beach, Staten Island |
# clipping complaints that fall within Midland Beach
midland_beach_gdf = gpd.clip(gdf, midland_beach_shape)
# sanity check plot
midland_beach_gdf.plot()
<AxesSubplot: >
# clip streets within Midland Beach
streets_clipped = gpd.clip(streets, midland_beach_shape)
# recalculate street length
streets_clipped['shape_leng'] = streets_clipped['geometry'].length
# sanity check plot
print(f'shape of data: {streets_clipped.shape}')
streets_clipped.plot()
shape of data: (914, 12)
/Users/geribauer/anaconda3/envs/duckdb_env/lib/python3.11/site-packages/geopandas/tools/clip.py:67: FutureWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)` clipped.loc[
<AxesSubplot: >
# examine geom_type values
streets_clipped.geom_type.value_counts()
LineString 908 MultiLineString 6 dtype: int64
# retrieve only linestring geom types
streets_clipped = (
streets_clipped.loc[streets_clipped.geom_type == 'LineString']
.reset_index(drop=True)
)
# sanity check geom types
streets_clipped.geom_type.value_counts()
LineString 908 dtype: int64
Methodology: https://medium.com/@brendan_ward/how-to-leverage-geopandas-for-faster-snapping-of-points-to-lines-6113c94e59aa
The code below is from Brendan's awesome post.
# offset 80 ft.
offset = 80
bbox = midland_beach_gdf.bounds + [-offset, -offset, offset, offset]
hits = bbox.apply(lambda row: list(streets_clipped.sindex.intersection(row)), axis=1)
hits.head()
13387 [135] 11428 [135] 7309 [135] 9911 [135] 17781 [135] dtype: object
tmp = pd.DataFrame({
"pt_idx": np.repeat(hits.index, hits.apply(len)),
"line_i": np.concatenate(hits.values)
})
# join back to the lines on line_i; we use reset_index() to
# give us the ordinal position of each line
tmp = tmp.join(
streets_clipped.reset_index(drop=True),
on="line_i"
)
# join back to the original points to get their geometry
# rename the point geometry as "point"
tmp = tmp.join(
midland_beach_gdf['geometry'].rename("point"),
on="pt_idx"
)
# convert back to a GeoDataFrame, so we can do spatial ops
tmp = gpd.GeoDataFrame(
tmp,
geometry="geometry",
crs=midland_beach_gdf.crs
)
tmp.head()
pt_idx | line_i | physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | point | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 13387 | 135.0 | 91096 | PATTERSON AVE | PATTERSON | PATTERSON AVE | 1 | Street | 28.0 | 13 | 13 | 5 | 274.39715 | LINESTRING (961401.227 151136.331, 961242.291 ... | POINT (961299.000 150997.000) |
1 | 11428 | 135.0 | 91096 | PATTERSON AVE | PATTERSON | PATTERSON AVE | 1 | Street | 28.0 | 13 | 13 | 5 | 274.39715 | LINESTRING (961401.227 151136.331, 961242.291 ... | POINT (961299.000 150997.000) |
2 | 7309 | 135.0 | 91096 | PATTERSON AVE | PATTERSON | PATTERSON AVE | 1 | Street | 28.0 | 13 | 13 | 5 | 274.39715 | LINESTRING (961401.227 151136.331, 961242.291 ... | POINT (961299.000 150997.000) |
3 | 9911 | 135.0 | 91096 | PATTERSON AVE | PATTERSON | PATTERSON AVE | 1 | Street | 28.0 | 13 | 13 | 5 | 274.39715 | LINESTRING (961401.227 151136.331, 961242.291 ... | POINT (961299.000 150997.000) |
4 | 17781 | 135.0 | 91096 | PATTERSON AVE | PATTERSON | PATTERSON AVE | 1 | Street | 28.0 | 13 | 13 | 5 | 274.39715 | LINESTRING (961401.227 151136.331, 961242.291 ... | POINT (961299.000 150997.000) |
# calculate distance from line to point
tmp["snap_dist"] = tmp['geometry'].distance(gpd.GeoSeries(tmp.point))
# discard any lines that are greater than tolerance from points
tmp = (
tmp
.loc[tmp.snap_dist <= offset]
.sort_values(by=["snap_dist"])
)
tmp.head()
pt_idx | line_i | physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | point | snap_dist | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2997 | 8341 | 871.0 | 173628 | CEDAR GROVE AVE | CEDAR GROVE | CEDAR GROVE AVE | 1 | Street | 30.0 | 13 | 13 | 5 | 81.013544 | LINESTRING (956774.728 144439.828, 956728.977 ... | POINT (956729.000 144373.000) | 0.001732 |
2179 | 3674 | 575.0 | 53940 | FREEBORN ST | FREEBORN | FREEBORN ST | 1 | Street | 30.0 | 13 | 13 | 5 | 379.453064 | LINESTRING (958104.215 147999.309, 957884.712 ... | POINT (958104.000 147999.000) | 0.003294 |
559 | 23408 | 552.0 | 84599 | ROSE AVE | ROSE | ROSE AVE | 1 | Street | 30.0 | 13 | 13 | 5 | 249.365873 | LINESTRING (951137.861 148456.727, 951338.494 ... | POINT (951338.000 148309.000) | 0.003960 |
1537 | 11672 | 350.0 | 53996 | OLYMPIA BLVD | OLYMPIA | OLYMPIA BLVD | 1 | Street | 18.0 | 13 | 13 | 5 | 1008.363208 | LINESTRING (959533.977 149558.671, 959288.004 ... | POINT (958944.000 148741.000) | 0.006296 |
325 | 8235 | 138.0 | 44747 | MASON AVE | MASON | MASON AVE | 1 | Street | 54.0 | 13 | 13 | 5 | 290.157312 | LINESTRING (958857.688 151163.298, 958693.864 ... | POINT (958694.000 150924.000) | 0.006396 |
# group by the index of the points and take the first, which is the closest line
closest = tmp.groupby("pt_idx").first()
# construct a GeoDataFrame of the closest lines
closest = gpd.GeoDataFrame(closest, geometry="geometry")
closest.head()
line_i | physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | point | snap_dist | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
pt_idx | |||||||||||||||
48 | 592.0 | 76090 | MAPLETON AVE | MAPLETON | MAPLETON AVE | 1 | Street | 32.0 | 13 | 13 | 5 | 249.179691 | LINESTRING (959135.153 148146.042, 959335.953 ... | POINT (959241.000 148064.000) | 3.438294 |
84 | 246.0 | 100415 | HAVEN AVE | HAVEN | HAVEN AVE | 1 | Street | 25.0 | 13 | 13 | 5 | 190.500775 | LINESTRING (957003.740 150288.337, 956885.362 ... | POINT (956963.000 150241.000) | 2.503578 |
193 | 434.0 | 52066 | KISWICK ST | KISWICK | KISWICK ST | 1 | Street | 32.0 | 13 | 13 | 5 | 509.998826 | LINESTRING (957795.024 148848.019, 957495.893 ... | POINT (957674.000 148687.000) | 3.577665 |
210 | 235.0 | 90075 | STOBE AVE | STOBE | STOBE AVE | 1 | Street | 38.0 | 13 | 13 | 5 | 748.398420 | LINESTRING (958293.109 150253.541, 958830.668 ... | POINT (958673.000 150008.000) | 3.070179 |
211 | 156.0 | 86099 | HUSSON ST | HUSSON | HUSSON ST | 1 | Street | 30.0 | 13 | 13 | 5 | 258.755628 | LINESTRING (956131.650 151399.706, 955957.314 ... | POINT (956065.000 151331.000) | 2.960824 |
# position of nearest point from start of the line
pos = closest['geometry'].project(gpd.GeoSeries(closest['point']))
# Get new point location geometry
new_pts = closest.geometry.interpolate(pos)
# Create a new GeoDataFrame from the columns from the closest line and
# new point geometries (which will be called "geometries")
snapped = gpd.GeoDataFrame(closest, geometry=new_pts)
snapped.head()
line_i | physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | point | snap_dist | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
pt_idx | |||||||||||||||
48 | 592.0 | 76090 | MAPLETON AVE | MAPLETON | MAPLETON AVE | 1 | Street | 32.0 | 13 | 13 | 5 | 249.179691 | POINT (959243.036 148066.771) | POINT (959241.000 148064.000) | 3.438294 |
84 | 246.0 | 100415 | HAVEN AVE | HAVEN | HAVEN AVE | 1 | Street | 25.0 | 13 | 13 | 5 | 190.500775 | POINT (956964.962 150239.444) | POINT (956963.000 150241.000) | 2.503578 |
193 | 434.0 | 52066 | KISWICK ST | KISWICK | KISWICK ST | 1 | Street | 32.0 | 13 | 13 | 5 | 509.998826 | POINT (957676.898 148684.902) | POINT (957674.000 148687.000) | 3.577665 |
210 | 235.0 | 90075 | STOBE AVE | STOBE | STOBE AVE | 1 | Street | 38.0 | 13 | 13 | 5 | 748.398420 | POINT (958674.649 150010.590) | POINT (958673.000 150008.000) | 3.070179 |
211 | 156.0 | 86099 | HUSSON ST | HUSSON | HUSSON ST | 1 | Street | 30.0 | 13 | 13 | 5 | 258.755628 | POINT (956067.188 151329.005) | POINT (956065.000 151331.000) | 2.960824 |
# join back to the original points
updated_points = (
midland_beach_gdf.drop(columns=['geometry', 'shape_leng'])
.join(snapped.drop(columns=['borocode']))
.dropna(subset=["geometry"])
)
updated_points.head()
unique_key | created_date | closed_date | agency | agency_name | complaint_type | descriptor | cross_street_1 | cross_street_2 | intersection_street_1 | ... | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | shape_leng | geometry | point | snap_dist | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
13387 | 32383025 | 2016-01-10T09:17:00.000 | 2016-01-11T07:50:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | SEAVIEW AVE | DEAD END | NaN | ... | PATTERSON AVE | 1 | Street | 28.0 | 13 | 13 | 274.39715 | POINT (961301.144 150995.477) | POINT (961299.000 150997.000) | 2.630045 |
11428 | 29471751 | 2014-12-09T09:29:00.000 | 2014-12-09T19:30:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | SEAVIEW AVE | DEAD END | NaN | ... | PATTERSON AVE | 1 | Street | 28.0 | 13 | 13 | 274.39715 | POINT (961301.144 150995.477) | POINT (961299.000 150997.000) | 2.630045 |
7309 | 24672672 | 2012-12-27T07:49:00.000 | 2012-12-27T10:00:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | SEAVIEW AVE | DEAD END | NaN | ... | PATTERSON AVE | 1 | Street | 28.0 | 13 | 13 | 274.39715 | POINT (961301.144 150995.477) | POINT (961299.000 150997.000) | 2.630045 |
9911 | 27943694 | 2014-04-30T22:14:00.000 | 2014-05-01T16:43:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | SEAVIEW AVE | DEAD END | NaN | ... | PATTERSON AVE | 1 | Street | 28.0 | 13 | 13 | 274.39715 | POINT (961301.144 150995.477) | POINT (961299.000 150997.000) | 2.630045 |
17781 | 37554602 | 2017-10-29T15:36:00.000 | 2017-10-29T22:55:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | SEAVIEW AVE | DEAD END | NaN | ... | PATTERSON AVE | 1 | Street | 28.0 | 13 | 13 | 274.39715 | POINT (961301.144 150995.477) | POINT (961299.000 150997.000) | 2.630045 |
5 rows × 58 columns
updated_points = gpd.GeoDataFrame(
updated_points,
geometry='point'
)
updated_points.plot()
<AxesSubplot: >
fig, ax = plt.subplots(figsize=(8, 8))
updated_points.plot(
color='black',
edgecolor='black',
alpha=.4,
ax=ax,
zorder=3
)
streets_clipped.plot(ax=ax, zorder=2)
midland_beach_shape.plot(
ax=ax,
zorder=1,
color='None',
edgecolor='black'
)
# adding basemap
ctx.add_basemap(
ax,
crs=2263,
source=ctx.providers.CartoDB.PositronNoLabels,
zorder=0
)
label = 'NYC 311 Street Flooding Complaints\n\
in New Dorp-Midland Beach, Staten Island from 2010 to 2020'
ax.set_title(label, fontsize=12, pad=10)
plt.axis('off')
plt.tight_layout()
# buffer lines
streets_clipped = (
streets_clipped
.set_geometry('geometry')
.assign(new_geom=lambda x: x.geometry.buffer(40, cap_style=2))
.set_geometry('new_geom')
)
streets_clipped.head()
physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | new_geom | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 52332 | MIDLAND AVE | MIDLAND | MIDLAND AVE | 1 | Street | 30.0 | 13 | 13 | 5 | 513.064094 | LINESTRING (953615.529 151406.480, 954034.853 ... | POLYGON ((954057.901 151143.532, 954011.804 15... |
1 | 50152 | RICHMOND RD | RICHMOND | RICHMOND RD | 1 | Street | 42.0 | 13 | 13 | 5 | 312.580758 | LINESTRING (953615.529 151406.480, 953340.369 ... | POLYGON ((953359.347 151222.965, 953321.391 15... |
2 | 98860 | BEDFORD AVE | BEDFORD | BEDFORD AVE | 1 | Street | 30.0 | 13 | 13 | 5 | 78.668493 | LINESTRING (954158.195 151401.054, 954222.466 ... | POLYGON ((954245.532 151388.369, 954199.400 15... |
3 | 102931 | HAMDEN AVE | HAMDEN | HAMDEN AVE | 1 | Street | 30.0 | 13 | 13 | 5 | 590.710057 | LINESTRING (954296.799 151609.830, 954781.221 ... | POLYGON ((954804.112 151304.589, 954758.331 15... |
4 | 98861 | BEDFORD AVE | BEDFORD | BEDFORD AVE | 1 | Street | 30.0 | 13 | 13 | 5 | 311.130297 | LINESTRING (953904.291 151580.873, 954158.195 ... | POLYGON ((954181.313 151433.697, 954135.076 15... |
# group count of complaints by street
gdf_count = (
updated_points
.groupby(by='physicalid')['created_date']
.count()
.reset_index()
.rename(columns={"created_date": "count"})
)
gdf_count.head()
physicalid | count | |
---|---|---|
0 | 100411 | 1 |
1 | 100415 | 12 |
2 | 100416 | 1 |
3 | 100417 | 2 |
4 | 100451 | 1 |
# joining count of complaints to streets
streets_with_count = streets_clipped.merge(
gdf_count,
left_on='physicalid',
right_on='physicalid',
how='left'
)
streets_with_count['count'] = streets_with_count['count'].fillna(0).astype(int)
# examine df
streets_with_count.info()
<class 'geopandas.geodataframe.GeoDataFrame'> Int64Index: 908 entries, 0 to 907 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 physicalid 908 non-null object 1 st_label 908 non-null object 2 st_name 908 non-null object 3 full_stree 908 non-null object 4 rw_type 908 non-null object 5 rw_type_name 908 non-null object 6 st_width 908 non-null object 7 frm_lvl_co 908 non-null object 8 to_lvl_co 908 non-null object 9 borocode 908 non-null object 10 shape_leng 908 non-null float64 11 geometry 908 non-null geometry 12 new_geom 908 non-null geometry 13 count 908 non-null int64 dtypes: float64(1), geometry(2), int64(1), object(10) memory usage: 106.4+ KB
# normalize counts
streets_with_count['count_per_100ft'] = (streets_with_count['count'] / streets_with_count['shape_leng']) * 100
streets_with_count.head()
physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | new_geom | count | count_per_100ft | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 52332 | MIDLAND AVE | MIDLAND | MIDLAND AVE | 1 | Street | 30.0 | 13 | 13 | 5 | 513.064094 | LINESTRING (953615.529 151406.480, 954034.853 ... | POLYGON ((954057.901 151143.532, 954011.804 15... | 0 | 0.000000 |
1 | 50152 | RICHMOND RD | RICHMOND | RICHMOND RD | 1 | Street | 42.0 | 13 | 13 | 5 | 312.580758 | LINESTRING (953615.529 151406.480, 953340.369 ... | POLYGON ((953359.347 151222.965, 953321.391 15... | 0 | 0.000000 |
2 | 98860 | BEDFORD AVE | BEDFORD | BEDFORD AVE | 1 | Street | 30.0 | 13 | 13 | 5 | 78.668493 | LINESTRING (954158.195 151401.054, 954222.466 ... | POLYGON ((954245.532 151388.369, 954199.400 15... | 0 | 0.000000 |
3 | 102931 | HAMDEN AVE | HAMDEN | HAMDEN AVE | 1 | Street | 30.0 | 13 | 13 | 5 | 590.710057 | LINESTRING (954296.799 151609.830, 954781.221 ... | POLYGON ((954804.112 151304.589, 954758.331 15... | 1 | 0.169288 |
4 | 98861 | BEDFORD AVE | BEDFORD | BEDFORD AVE | 1 | Street | 30.0 | 13 | 13 | 5 | 311.130297 | LINESTRING (953904.291 151580.873, 954158.195 ... | POLYGON ((954181.313 151433.697, 954135.076 15... | 0 | 0.000000 |
# exammine distribution of counts
streets_with_count.loc[:, ['count', 'count_per_100ft']].describe()
count | count_per_100ft | |
---|---|---|
count | 908.000000 | 908.000000 |
mean | 1.001101 | 0.346161 |
std | 4.183893 | 1.639988 |
min | 0.000000 | 0.000000 |
25% | 0.000000 | 0.000000 |
50% | 0.000000 | 0.000000 |
75% | 1.000000 | 0.145356 |
max | 59.000000 | 29.875949 |
fig, ax = plt.subplots(figsize=(8, 8))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
cmap = plt.cm.Blues
# get max of array and place as top bounds
norm = mpl.colors.BoundaryNorm([0, 1, 5, 10, 30, 60], cmap.N)
streets_with_count.plot(
column='count',
ax=ax,
cax=cax,
zorder=2,
cmap=cmap,
norm=norm,
edgecolor='black',
linewidth=.3,
legend=True
)
(midland_beach_shape
.plot(
ax=ax,
zorder=1,
color='None',
edgecolor='black')
)
# adding basemap
ctx.add_basemap(
ax,
crs=2263,
source=ctx.providers.CartoDB.PositronNoLabels,
zorder=0
)
# setting title
label = 'Count of 311 Street Flooding Complaints by Street Segment\n\
in New Dorp-Midland Beach, Staten Island from 2010 to 2020'
ax.set_title(label, fontsize=12, pad=10)
ax.axis('off')
plt.tight_layout()
fig, ax = plt.subplots(figsize=(8, 8))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
cmap = plt.cm.Blues
# get max of array and place as top bounds
norm = mpl.colors.BoundaryNorm([0, 1, 3, 5, 10, 30], cmap.N)
streets_with_count.plot(
column='count_per_100ft',
ax=ax,
cax=cax,
zorder=2,
cmap=cmap,
norm=norm,
edgecolor='black',
linewidth=.3,
legend=True
)
(midland_beach_shape
.plot(
ax=ax,
zorder=1,
color='None',
edgecolor='black')
)
# adding basemap
ctx.add_basemap(
ax,
crs=2263,
source=ctx.providers.CartoDB.PositronNoLabels,
zorder=0
)
# setting title
label = 'Count of 311 Street Flooding Complaints per 100 ft.\n\
by Street Segment in New Dorp-Midland Beach, Staten Island from 2010 to 2020'
ax.set_title(label, fontsize=12, pad=10)
ax.axis('off')
plt.savefig('figures/midland-beach.png', dpi=250, bbox_inches='tight')
# create full street name
streets_with_count['street_and_id'] = (
streets_with_count['full_stree']
+ ', id:'
+ streets_with_count['physicalid']
)
streets_with_count.head()
physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | new_geom | count | count_per_100ft | street_and_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 52332 | MIDLAND AVE | MIDLAND | MIDLAND AVE | 1 | Street | 30.0 | 13 | 13 | 5 | 513.064094 | LINESTRING (953615.529 151406.480, 954034.853 ... | POLYGON ((954057.901 151143.532, 954011.804 15... | 0 | 0.000000 | MIDLAND AVE, id:52332 |
1 | 50152 | RICHMOND RD | RICHMOND | RICHMOND RD | 1 | Street | 42.0 | 13 | 13 | 5 | 312.580758 | LINESTRING (953615.529 151406.480, 953340.369 ... | POLYGON ((953359.347 151222.965, 953321.391 15... | 0 | 0.000000 | RICHMOND RD, id:50152 |
2 | 98860 | BEDFORD AVE | BEDFORD | BEDFORD AVE | 1 | Street | 30.0 | 13 | 13 | 5 | 78.668493 | LINESTRING (954158.195 151401.054, 954222.466 ... | POLYGON ((954245.532 151388.369, 954199.400 15... | 0 | 0.000000 | BEDFORD AVE, id:98860 |
3 | 102931 | HAMDEN AVE | HAMDEN | HAMDEN AVE | 1 | Street | 30.0 | 13 | 13 | 5 | 590.710057 | LINESTRING (954296.799 151609.830, 954781.221 ... | POLYGON ((954804.112 151304.589, 954758.331 15... | 1 | 0.169288 | HAMDEN AVE, id:102931 |
4 | 98861 | BEDFORD AVE | BEDFORD | BEDFORD AVE | 1 | Street | 30.0 | 13 | 13 | 5 | 311.130297 | LINESTRING (953904.291 151580.873, 954158.195 ... | POLYGON ((954181.313 151433.697, 954135.076 15... | 0 | 0.000000 | BEDFORD AVE, id:98861 |
fig, axs = plt.subplots(2, 1, sharey=False, figsize=(8, 8))
# first plot
data = streets_with_count.sort_values(by='count', ascending=False).head(10)
sns.barplot(
data=data,
y='street_and_id',
x='count',
color='#1f77b4',
ax=axs[0]
)
label = 'Count of NYC 311 Street Flooding Complaints\n\
by Street Segment in New Dorp-Midland Beach, Staten Island from 2010 to 2020'
axs[0].set_title(label, x=.3)
axs[0].set_xlabel('Count')
axs[0].set_ylabel('Street Segment')
# second plot
data = streets_with_count.sort_values(by='count_per_100ft', ascending=False).head(10)
sns.barplot(
data=data,
y='street_and_id',
x='count_per_100ft',
color='#1f77b4',
ax=axs[1]
)
label = 'Count of NYC 311 Street Flooding Complaints per 100 ft.\n\
by Street Segment in New Dorp-Midland Beach, Staten Island from 2010 to 2020'
axs[1].set_title(label, x=.3)
axs[1].set_xlabel('Count per 100 ft.')
axs[1].set_ylabel('Street Segment')
fig.tight_layout()
# transform complaints to geodataframe
crs = 2263
geometry = gpd.points_from_xy(
df['x_coordinate_state_plane'],
df['y_coordinate_state_plane']
)
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs=crs)
# sanity check
gdf.head()
unique_key | created_date | closed_date | agency | agency_name | complaint_type | descriptor | cross_street_1 | cross_street_2 | intersection_street_1 | ... | x_coordinate_state_plane | y_coordinate_state_plane | latitude | longitude | location | incident_address | street_name | bbl | due_date | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 18265181 | 2010-07-14T08:38:00.000 | 2010-07-14T08:38:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | PELHAM PKWY | STILLWELL AVE | PELHAM PKWY | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | POINT EMPTY |
1 | 34783066 | 2016-11-15T09:27:00.000 | 2016-11-15T10:05:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | NaN | NaN | LAFAYETTE AVENUE | ... | 958594.0 | 170855.0 | 40.635597 | -74.092438 | {'latitude': '40.635596930697716', 'longitude'... | NaN | NaN | NaN | NaN | POINT (958594.000 170855.000) |
2 | 21549616 | 2011-09-29T10:34:00.000 | 2011-09-30T10:40:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | NaN | NaN | THURSBY AVE | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | POINT EMPTY |
3 | 35839080 | 2017-03-31T20:24:00.000 | 2017-04-01T02:25:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | 3 AVENUE | 2 AVENUE | NaN | ... | NaN | NaN | NaN | NaN | NaN | EAST 106 STREET | EAST 106 STREET | NaN | NaN | POINT EMPTY |
4 | 29443390 | 2014-12-06T10:23:00.000 | 2014-12-06T11:30:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | NAGLE AVE | DYCKMAN ST | NAGLE AVE | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | POINT EMPTY |
5 rows × 33 columns
# streets
path = 'data/streets.gpkg'
streets = gpd.read_file(path)
streets.head()
physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3 | BATTERY PL | BATTERY | BATTERY PL | 1 | Street | 42.0 | 13 | 13 | 1 | 262.77781228 | MULTILINESTRING ((979278.595 196555.690, 97929... |
1 | 5 | BATTERY PL | BATTERY | BATTERY PL | 1 | Street | 42.0 | 13 | 13 | 1 | 259.415988519 | MULTILINESTRING ((979377.413 196797.951, 97950... |
2 | 6 | BATTERY PL | BATTERY | BATTERY PL | 1 | Street | 42.0 | 13 | 13 | 1 | 280.444780871 | MULTILINESTRING ((979503.289 197024.782, 97964... |
3 | 8 | BATTERY PL | BATTERY | BATTERY PL | 1 | Street | 42.0 | 13 | 13 | 1 | 32.0701391509 | MULTILINESTRING ((979553.746 196059.826, 97952... |
4 | 14 | BATTERY PL | BATTERY | BATTERY PL | 1 | Street | 24.0 | 13 | 13 | 1 | 206.27185039 | MULTILINESTRING ((980288.092 195963.182, 98026... |
# examine streets df
streets.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 99324 entries, 0 to 99323 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 physicalid 99324 non-null object 1 st_label 99324 non-null object 2 st_name 99324 non-null object 3 full_stree 99324 non-null object 4 rw_type 99324 non-null object 5 rw_type_name 99324 non-null object 6 st_width 99324 non-null object 7 frm_lvl_co 99324 non-null object 8 to_lvl_co 99324 non-null object 9 borocode 99324 non-null object 10 shape_leng 99324 non-null object 11 geometry 99324 non-null geometry dtypes: geometry(1), object(11) memory usage: 9.1+ MB
# return only Hammels-Arverne-Edgemere, Queens
name = 'Rockaway Beach-Arverne-Edgemere, Queens'
arverne_edgemere_shape = (
nta_gdf
.loc[nta_gdf['ntaname_full'] == name]
.reset_index(drop=True)
)
arverne_edgemere_shape
shape_area | ntaname | cdtaname | shape_leng | boroname | ntatype | nta2020 | borocode | countyfips | ntaabbrev | cdta2020 | geometry | ntaname_full | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 50502113.6837 | Rockaway Beach-Arverne-Edgemere | QN14 The Rockaways (CD 14 Approximation) | 67363.5655404 | Queens | 0 | QN1402 | 4 | 081 | RckwyBch | QN14 | MULTIPOLYGON (((1043586.958 159082.823, 104359... | Rockaway Beach-Arverne-Edgemere, Queens |
# clipping complaints that fall within arverne edgemere
arverne_edgemere_gdf = gpd.clip(
gdf,
arverne_edgemere_shape
)
arverne_edgemere_gdf.plot()
<AxesSubplot: >
# clipping the points that only fall within the nta shapes
streets_clipped = gpd.clip(
streets,
arverne_edgemere_shape
)
# sanity check plot
streets_clipped.plot()
/Users/geribauer/anaconda3/envs/duckdb_env/lib/python3.11/site-packages/geopandas/tools/clip.py:67: FutureWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)` clipped.loc[
<AxesSubplot: >
# sanity check geom type
streets_clipped.geom_type.value_counts()
LineString 810 MultiLineString 2 dtype: int64
# locating only linestring geom type
streets_clipped = (
streets_clipped
.loc[streets_clipped.geom_type == 'LineString']
.reset_index(drop=True)
)
streets_clipped.geom_type.value_counts()
LineString 810 dtype: int64
# sanity check plot
streets_clipped.plot()
<AxesSubplot: >
Methodology: https://medium.com/@brendan_ward/how-to-leverage-geopandas-for-faster-snapping-of-points-to-lines-6113c94e59aa
The code below is from Brendan's awesome post.
# offset by 80 ft.
offset = 80
bbox = arverne_edgemere_gdf.bounds + [-offset, -offset, offset, offset]
hits = bbox.apply(lambda row: list(streets_clipped.sindex.intersection(row)), axis=1)
tmp = pd.DataFrame({
"pt_idx": np.repeat(hits.index, hits.apply(len)),
"line_i": np.concatenate(hits.values)
})
tmp.head()
pt_idx | line_i | |
---|---|---|
0 | 16989 | 26.0 |
1 | 16989 | 28.0 |
2 | 16989 | 36.0 |
3 | 1746 | 25.0 |
4 | 1746 | 34.0 |
# join back to the lines on line_i; we use reset_index() to
# give us the ordinal position of each line
tmp = tmp.join(
streets_clipped.reset_index(drop=True),
on="line_i"
)
tmp.head()
pt_idx | line_i | physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 16989 | 26.0 | 14454 | BCH 69 ST | 69 | BCH 69 ST | 1 | Street | 50.0 | 13 | 13 | 4 | 250.440084793 | LINESTRING (1040166.129 157046.927, 1040147.64... |
1 | 16989 | 28.0 | 14452 | DE COSTA AVE | DE COSTA | DE COSTA AVE | 1 | Street | 32.0 | 13 | 13 | 4 | 693.196917932 | LINESTRING (1040147.641 157296.684, 1039456.55... |
2 | 16989 | 36.0 | 14453 | BCH 69 ST | 69 | BCH 69 ST | 1 | Street | 50.0 | 13 | 13 | 4 | 244.881228875 | LINESTRING (1040147.641 157296.684, 1040135.23... |
3 | 1746 | 25.0 | 104183 | BCH 64 ST | 64 | BCH 64 ST | 1 | Street | 30.0 | 13 | 13 | 4 | 494.61983231 | LINESTRING (1041547.046 156907.594, 1041508.79... |
4 | 1746 | 34.0 | 80839 | DE COSTA AVE | DE COSTA | DE COSTA AVE | 1 | Street | 24.0 | 13 | 13 | 4 | 262.490460936 | LINESTRING (1041508.798 157400.733, 1041247.05... |
# join back to the original points to get their geometry
# rename the point geometry as "point"
tmp = tmp.join(
arverne_edgemere_gdf.geometry.rename("point"),
on="pt_idx"
)
# convert back to a GeoDataFrame, so we can do spatial ops
tmp = gpd.GeoDataFrame(
tmp,
geometry="geometry",
crs=arverne_edgemere_gdf.crs
)
tmp.head()
pt_idx | line_i | physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | point | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 16989 | 26.0 | 14454 | BCH 69 ST | 69 | BCH 69 ST | 1 | Street | 50.0 | 13 | 13 | 4 | 250.440084793 | LINESTRING (1040166.129 157046.927, 1040147.64... | POINT (1040148.000 157296.000) |
1 | 16989 | 28.0 | 14452 | DE COSTA AVE | DE COSTA | DE COSTA AVE | 1 | Street | 32.0 | 13 | 13 | 4 | 693.196917932 | LINESTRING (1040147.641 157296.684, 1039456.55... | POINT (1040148.000 157296.000) |
2 | 16989 | 36.0 | 14453 | BCH 69 ST | 69 | BCH 69 ST | 1 | Street | 50.0 | 13 | 13 | 4 | 244.881228875 | LINESTRING (1040147.641 157296.684, 1040135.23... | POINT (1040148.000 157296.000) |
3 | 1746 | 25.0 | 104183 | BCH 64 ST | 64 | BCH 64 ST | 1 | Street | 30.0 | 13 | 13 | 4 | 494.61983231 | LINESTRING (1041547.046 156907.594, 1041508.79... | POINT (1041513.000 157311.000) |
4 | 1746 | 34.0 | 80839 | DE COSTA AVE | DE COSTA | DE COSTA AVE | 1 | Street | 24.0 | 13 | 13 | 4 | 262.490460936 | LINESTRING (1041508.798 157400.733, 1041247.05... | POINT (1041513.000 157311.000) |
tmp["snap_dist"] = tmp['geometry'].distance(gpd.GeoSeries(tmp.point))
# discard any lines that are greater than tolerance from points
# sort on ascending snap distance, so that closest goes to top
tmp = (
tmp
.loc[tmp.snap_dist <= offset]
.sort_values(by=["snap_dist"])
)
tmp.head()
pt_idx | line_i | physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | point | snap_dist | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1671 | 7173 | 252.0 | 197523 | BCH 88 ST | 88 | BCH 88 ST | 1 | Street | 30.0 | 13 | 13 | 4 | 50.0912759345 | LINESTRING (1036149.133 153656.343, 1036144.96... | POINT (1036145.000 153706.000) | 0.011066 |
1007 | 762 | 356.0 | 104191 | BCH 63 ST | 63 | BCH 63 ST | 1 | Street | 30.0 | 13 | 13 | 4 | 395.08378196 | LINESTRING (1041962.214 155024.238, 1041926.94... | POINT (1041927.000 155417.000) | 0.011800 |
1219 | 490 | 171.0 | 14338 | BCH 72 ST | 72 | BCH 72 ST | 1 | Street | 26.0 | 13 | 13 | 4 | 250.157755236 | LINESTRING (1039574.795 155745.963, 1039554.95... | POINT (1039555.000 155995.000) | 0.021221 |
1223 | 21557 | 171.0 | 14338 | BCH 72 ST | 72 | BCH 72 ST | 1 | Street | 26.0 | 13 | 13 | 4 | 250.157755236 | LINESTRING (1039574.795 155745.963, 1039554.95... | POINT (1039555.000 155995.000) | 0.021221 |
1486 | 21023 | 333.0 | 92055 | BCH 70 ST | 70 | BCH 70 ST | 1 | Street | 32.0 | 13 | 13 | 4 | 673.213590352 | LINESTRING (1040105.228 154597.321, 1040050.99... | POINT (1040051.000 155268.000) | 0.023457 |
# group by the index of the points and take the first, which is the closest line
closest = tmp.groupby("pt_idx").first()
closest = gpd.GeoDataFrame(closest, geometry="geometry")
# position of nearest point from start of the line
pos = closest.geometry.project(gpd.GeoSeries(closest.point))
# get new point location geometry
new_pts = closest.geometry.interpolate(pos)
new_pts.head()
pt_idx 52 POINT (1040819.734 155329.350) 62 POINT (1043313.307 155267.403) 101 POINT (1040144.337 157361.814) 111 POINT (1041240.643 157452.700) 172 POINT (1034728.396 152132.737) dtype: geometry
# create a new GeoDataFrame from the columns from the closest line and
# new point geometries (which will be called "geometries")
snapped = gpd.GeoDataFrame(closest, geometry=new_pts)
snapped.head()
line_i | physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | point | snap_dist | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
pt_idx | |||||||||||||||
52 | 371.0 | 14373 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 50.0 | 13 | 13 | 4 | 267.587592764 | POINT (1040819.734 155329.350) | POINT (1040820.000 155326.000) | 3.360859 |
62 | 442.0 | 14425 | ARVERNE BLVD | ARVERNE | ARVERNE BLVD | 1 | Street | 52.0 | 13 | 13 | 4 | 588.693377133 | POINT (1043313.307 155267.403) | POINT (1043312.000 155264.000) | 3.644916 |
101 | 36.0 | 14453 | BCH 69 ST | 69 | BCH 69 ST | 1 | Street | 50.0 | 13 | 13 | 4 | 244.881228875 | POINT (1040144.337 157361.814) | POINT (1040148.000 157362.000) | 3.667291 |
111 | 43.0 | 80859 | BCH 65 ST | 65 | BCH 65 ST | 1 | Street | 22.0 | 13 | 13 | 4 | 249.849131194 | POINT (1041240.643 157452.700) | POINT (1041244.000 157453.000) | 3.370776 |
172 | 780.0 | 90112 | BCH 97 ST | 97 | BCH 97 ST | 1 | Street | 30.0 | 13 | 13 | 4 | 686.882173558 | POINT (1034728.396 152132.737) | POINT (1034731.000 152134.000) | 2.894381 |
# join back to the original points:
updated_points = (
arverne_edgemere_gdf
.drop(columns=["geometry"])
.join(snapped)
.dropna(subset=["geometry"])
)
updated_points.head()
unique_key | created_date | closed_date | agency | agency_name | complaint_type | descriptor | cross_street_1 | cross_street_2 | intersection_street_1 | ... | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | point | snap_dist | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
16989 | 36474856 | 2017-06-18T17:44:00.000 | 2017-06-20T12:25:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | NaN | NaN | BEACH 69 STREET | ... | 1 | Street | 50.0 | 13 | 13 | 4 | 250.440084793 | POINT (1040147.693 157295.977) | POINT (1040148.000 157296.000) | 0.307548 |
1746 | 18558693 | 2010-08-25T17:32:00.000 | 2010-08-26T10:25:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | ALMEDA AVE | DE COSTA AVE | NaN | ... | 1 | Street | 30.0 | 13 | 13 | 4 | 494.61983231 | POINT (1041515.741 157311.213) | POINT (1041513.000 157311.000) | 2.749361 |
5830 | 23287828 | 2012-05-23T18:42:00.000 | 2012-05-24T05:30:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | ALMEDA AVE | DE COSTA AVE | NaN | ... | 1 | Street | 30.0 | 13 | 13 | 4 | 240.424436393 | POINT (1041773.306 157333.270) | POINT (1041770.000 157333.000) | 3.317418 |
18798 | 38698384 | 2018-03-15T13:35:00.000 | 2018-03-15T15:13:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | BEACH 66 ST | BEACH 67 ST | NaN | ... | 1 | Street | 30.0 | 13 | 13 | 4 | 267.661919031 | POINT (1040903.206 157353.043) | POINT (1040903.000 157356.000) | 2.963919 |
5384 | 22905508 | 2012-03-19T16:35:00.000 | 2012-03-20T08:40:00.000 | DEP | Department of Environmental Protection | Sewer | Street Flooding (SJ) | BEACH 66 ST | DECOSTA AVE | BEACH 66 STREET | ... | 1 | Street | 42.0 | 13 | 13 | 4 | 495.233090459 | POINT (1040981.823 157357.986) | POINT (1040982.000 157358.000) | 0.177131 |
5 rows × 47 columns
updated_points.loc[:, ['snap_dist']].describe()
snap_dist | |
---|---|
count | 652.000000 |
mean | 2.984863 |
std | 4.159109 |
min | 0.011066 |
25% | 0.469492 |
50% | 2.686694 |
75% | 3.216407 |
max | 62.831774 |
plt.figure(figsize=(6, 4))
sns.histplot(updated_points.snap_dist)
plt.title('Histogram of snap_dist (ft.)')
Text(0.5, 1.0, 'Histogram of snap_dist (ft.)')
updated_points = gpd.GeoDataFrame(
updated_points,
geometry='point'
)
updated_points.plot()
<AxesSubplot: >
fig, ax = plt.subplots(figsize=(10, 10))
updated_points.plot(
color='black',
edgecolor='black',
alpha=.4,
ax=ax,
zorder=3
)
streets_clipped.plot(ax=ax, zorder=2)
(arverne_edgemere_shape
.plot(ax=ax,
zorder=1,
color='None',
edgecolor='black')
)
# adding basemap
ctx.add_basemap(ax, crs=2263, source=ctx.providers.CartoDB.PositronNoLabels, zorder=0)
label = 'NYC 311 Street Flooding Complaints\nin Rockaway Beach-Arverne-Edgemere, \
Queens, New York from 2010 to 2020'
ax.set_title(label, fontsize=12, pad=10)
plt.axis('off')
plt.tight_layout()
# count of complaints by street
gdf_count = (
updated_points
.groupby(by='physicalid')['created_date']
.count()
.reset_index()
.rename(columns={"created_date": "count"})
)
gdf_count.head()
physicalid | count | |
---|---|---|
0 | 101236 | 2 |
1 | 102480 | 1 |
2 | 104183 | 11 |
3 | 104184 | 4 |
4 | 104185 | 1 |
# buffer lines
streets_clipped = (
streets_clipped
.set_geometry('geometry')
.assign(new_geom=lambda x: x.geometry.buffer(40, cap_style=2))
.set_geometry('new_geom')
)
streets_clipped.head()
physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | new_geom | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 191282 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 54.0 | 13 | 13 | 4 | 162.978712416 | LINESTRING (1044506.797 156193.332, 1044344.50... | POLYGON ((1044348.186 156138.517, 1044340.830 ... |
1 | 191280 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 54.0 | 13 | 13 | 4 | 332.280615233 | LINESTRING (1044837.670 156223.884, 1044506.79... | POLYGON ((1044510.474 156153.502, 1044503.119 ... |
2 | 14360 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 54.0 | 13 | 13 | 4 | 256.536878099 | LINESTRING (1045093.264 156245.866, 1044837.67... | POLYGON ((1044841.098 156184.031, 1044834.243 ... |
3 | 14359 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 48.0 | 13 | 13 | 4 | 272.066919415 | LINESTRING (1045364.484 156267.317, 1045093.26... | POLYGON ((1045096.418 156205.990, 1045090.110 ... |
4 | 14358 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 46.0 | 13 | 13 | 4 | 251.664413084 | LINESTRING (1045615.107 156290.191, 1045364.48... | POLYGON ((1045368.120 156227.482, 1045360.848 ... |
# joining our nta population data to our nta shapes data
streets_with_count = streets_clipped.merge(
gdf_count,
left_on='physicalid',
right_on='physicalid',
how='left'
)
streets_with_count['shape_leng'] = streets_with_count['geometry'].length
streets_with_count['count'] = streets_with_count['count'].fillna(0).astype(int)
streets_with_count.info()
<class 'geopandas.geodataframe.GeoDataFrame'> Int64Index: 810 entries, 0 to 809 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 physicalid 810 non-null object 1 st_label 810 non-null object 2 st_name 810 non-null object 3 full_stree 810 non-null object 4 rw_type 810 non-null object 5 rw_type_name 810 non-null object 6 st_width 810 non-null object 7 frm_lvl_co 810 non-null object 8 to_lvl_co 810 non-null object 9 borocode 810 non-null object 10 shape_leng 810 non-null float64 11 geometry 810 non-null geometry 12 new_geom 810 non-null geometry 13 count 810 non-null int64 dtypes: float64(1), geometry(2), int64(1), object(10) memory usage: 94.9+ KB
# normalize counts
streets_with_count['count_per_100ft'] = (streets_with_count['count'] / streets_with_count['shape_leng']) * 100
streets_with_count.head()
physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | new_geom | count | count_per_100ft | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 191282 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 54.0 | 13 | 13 | 4 | 162.978925 | LINESTRING (1044506.797 156193.332, 1044344.50... | POLYGON ((1044348.186 156138.517, 1044340.830 ... | 0 | 0.000000 |
1 | 191280 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 54.0 | 13 | 13 | 4 | 332.281048 | LINESTRING (1044837.670 156223.884, 1044506.79... | POLYGON ((1044510.474 156153.502, 1044503.119 ... | 0 | 0.000000 |
2 | 14360 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 54.0 | 13 | 13 | 4 | 256.537209 | LINESTRING (1045093.264 156245.866, 1044837.67... | POLYGON ((1044841.098 156184.031, 1044834.243 ... | 1 | 0.389807 |
3 | 14359 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 48.0 | 13 | 13 | 4 | 272.067267 | LINESTRING (1045364.484 156267.317, 1045093.26... | POLYGON ((1045096.418 156205.990, 1045090.110 ... | 0 | 0.000000 |
4 | 14358 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 46.0 | 13 | 13 | 4 | 251.664741 | LINESTRING (1045615.107 156290.191, 1045364.48... | POLYGON ((1045368.120 156227.482, 1045360.848 ... | 0 | 0.000000 |
streets_with_count.loc[:, ['count', 'count_per_100ft']].describe()
count | count_per_100ft | |
---|---|---|
count | 810.000000 | 810.000000 |
mean | 0.804938 | 0.463891 |
std | 2.666271 | 2.178782 |
min | 0.000000 | 0.000000 |
25% | 0.000000 | 0.000000 |
50% | 0.000000 | 0.000000 |
75% | 0.000000 | 0.000000 |
max | 46.000000 | 32.548808 |
fig, ax = plt.subplots(figsize=(10, 10))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
cmap = plt.cm.Blues
# get max of array and place as top bounds
norm = mpl.colors.BoundaryNorm([0, 1, 5, 10, 30, 50], cmap.N)
streets_with_count.plot(
column='count',
ax=ax,
cax=cax,
norm=norm,
zorder=2,
cmap='Blues',
edgecolor='black',
linewidth=.3,
legend=True
)
(arverne_edgemere_shape
.plot(ax=ax,
zorder=1,
color='None',
edgecolor='black')
)
# adding basemap
ctx.add_basemap(ax, crs=2263, source=ctx.providers.CartoDB.PositronNoLabels, zorder=0)
label = 'Count of NYC 311 Street Flooding Complaints \n\
by Street Segment in Rockaway Beach-Arverne-Edgemere, Queens from 2010 to 2020'
ax.set_title(label=label, fontsize=12, pad=10)
ax.axis('off')
plt.tight_layout()
fig, ax = plt.subplots(figsize=(10, 10))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
cmap = plt.cm.Blues
# get max of array and place as top bounds
norm = mpl.colors.BoundaryNorm([0, 1, 3, 5, 10, 30], cmap.N)
streets_with_count.plot(
column='count',
ax=ax,
cax=cax,
norm=norm,
zorder=2,
cmap='Blues',
edgecolor='black',
linewidth=.3,
legend=True
)
(arverne_edgemere_shape
.plot(ax=ax,
zorder=1,
color='None',
edgecolor='black')
)
# adding basemap
ctx.add_basemap(ax, crs=2263, source=ctx.providers.CartoDB.PositronNoLabels, zorder=0)
label = 'Count of NYC 311 Street Flooding Complaints per 100 ft.\n\
by Street Segment in Rockaway Beach-Arverne-Edgemere, Queens from 2010 to 2020'
ax.set_title(label=label, fontsize=12, pad=10)
ax.axis('off')
plt.savefig('figures/arverne.png', dpi=250, bbox_inches='tight')
streets_with_count['street_and_id'] = (
streets_with_count['full_stree']
+ ', id:'
+ streets_with_count['physicalid']
)
streets_with_count.head()
physicalid | st_label | st_name | full_stree | rw_type | rw_type_name | st_width | frm_lvl_co | to_lvl_co | borocode | shape_leng | geometry | new_geom | count | count_per_100ft | street_and_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 191282 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 54.0 | 13 | 13 | 4 | 162.978925 | LINESTRING (1044506.797 156193.332, 1044344.50... | POLYGON ((1044348.186 156138.517, 1044340.830 ... | 0 | 0.000000 | BCH CHANNEL DR, id:191282 |
1 | 191280 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 54.0 | 13 | 13 | 4 | 332.281048 | LINESTRING (1044837.670 156223.884, 1044506.79... | POLYGON ((1044510.474 156153.502, 1044503.119 ... | 0 | 0.000000 | BCH CHANNEL DR, id:191280 |
2 | 14360 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 54.0 | 13 | 13 | 4 | 256.537209 | LINESTRING (1045093.264 156245.866, 1044837.67... | POLYGON ((1044841.098 156184.031, 1044834.243 ... | 1 | 0.389807 | BCH CHANNEL DR, id:14360 |
3 | 14359 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 48.0 | 13 | 13 | 4 | 272.067267 | LINESTRING (1045364.484 156267.317, 1045093.26... | POLYGON ((1045096.418 156205.990, 1045090.110 ... | 0 | 0.000000 | BCH CHANNEL DR, id:14359 |
4 | 14358 | BCH CHANNEL DR | CHANNEL | BCH CHANNEL DR | 1 | Street | 46.0 | 13 | 13 | 4 | 251.664741 | LINESTRING (1045615.107 156290.191, 1045364.48... | POLYGON ((1045368.120 156227.482, 1045360.848 ... | 0 | 0.000000 | BCH CHANNEL DR, id:14358 |
fig, axs = plt.subplots(2, 1, sharey=False, figsize=(8, 8))
# first plot
data = streets_with_count.sort_values(by='count', ascending=False).head(10)
sns.barplot(
data=data,
y='street_and_id',
x='count',
color='#1f77b4',
ax=axs[0]
)
label = 'Count of NYC 311 Street Flooding Complaints\n\
by Street Segment in Hammels-Arverne-Edgemere, Queens from 2010 to 2020'
axs[0].set_title(label, x=.3)
axs[0].set_xlabel('Count')
axs[0].set_ylabel('Street Segment')
# second plot
data = streets_with_count.sort_values(by='count_per_100ft', ascending=False).head(10)
sns.barplot(
data=data,
y='street_and_id',
x='count_per_100ft',
color='#1f77b4',
ax=axs[1]
)
label = 'Count of NYC 311 Street Flooding Complaints per 100 ft.\n\
by Street Segment in Hammels-Arverne-Edgemere, Queens from 2010 to 2020'
axs[1].set_title(label, x=.3)
axs[1].set_xlabel('Count per 100 ft.')
axs[1].set_ylabel('Street Segment')
plt.tight_layout()