#!/usr/bin/env python # coding: utf-8 # In[9]: import pyarrow.parquet import folium pf = pyarrow.parquet.ParquetFile('ordered-100k.parquet') m = folium.Map(location=(8, 0), zoom_start=6, tiles="cartodb positron") # Kumasi kminlat, kminlon = 6.5967, -1.7276 kmaxlat, kmaxlon = 6.7965, -1.4879 kumasi_count = 0 for i in range(pf.metadata.num_row_groups): rg = pf.metadata.row_group(i) minfid = rg.column(0).statistics.min maxfid = rg.column(0).statistics.max # print(f'{i:3d}) {rg.total_byte_size/1e6:-4.1f}MB {minfid}-{maxfid}') minlon = rg.column(1).statistics.min minlat = rg.column(2).statistics.min maxlon = rg.column(3).statistics.max maxlat = rg.column(4).statistics.max in_kumasi = ( maxlat > kminlat and maxlon > kminlon and minlat < kmaxlat and minlon < kmaxlon ) kumasi_count += int(in_kumasi) folium.vector_layers.Rectangle( [(minlat, minlon), (maxlat, maxlon)], tooltip=i, color='blue' if in_kumasi else 'gray', weight=1, fill_color='blue' if in_kumasi else 'gray', fill_opacity=0.05, ).add_to(m) print(kumasi_count, "of", pf.metadata.num_row_groups, "total row groups in Kumasi") m