import pyarrow.parquet
import folium
pf = pyarrow.parquet.ParquetFile('ordered-100k.parquet')
m = folium.Map(location=(8, 0), zoom_start=6, tiles="cartodb positron")
# Kumasi
kminlat, kminlon = 6.5967, -1.7276
kmaxlat, kmaxlon = 6.7965, -1.4879
kumasi_count = 0
for i in range(pf.metadata.num_row_groups):
rg = pf.metadata.row_group(i)
minfid = rg.column(0).statistics.min
maxfid = rg.column(0).statistics.max
# print(f'{i:3d}) {rg.total_byte_size/1e6:-4.1f}MB {minfid}-{maxfid}')
minlon = rg.column(1).statistics.min
minlat = rg.column(2).statistics.min
maxlon = rg.column(3).statistics.max
maxlat = rg.column(4).statistics.max
in_kumasi = (
maxlat > kminlat
and maxlon > kminlon
and minlat < kmaxlat
and minlon < kmaxlon
)
kumasi_count += int(in_kumasi)
folium.vector_layers.Rectangle(
[(minlat, minlon), (maxlat, maxlon)],
tooltip=i,
color='blue' if in_kumasi else 'gray',
weight=1,
fill_color='blue' if in_kumasi else 'gray',
fill_opacity=0.05,
).add_to(m)
print(kumasi_count, "of", pf.metadata.num_row_groups, "total row groups in Kumasi")
m
106 of 1001 total row groups in Kumasi