import pyrasterframes
import pyrasterframes.rf_ipython # enables nicer visualizations of pandas DF
from pyrasterframes.rasterfunctions import (rf_local_add, rf_dimensions, rf_extent, rf_crs, rf_mk_crs,
st_geometry, st_reproject, rf_tile)
import pyspark.sql.functions as F
spark = pyrasterframes.get_spark_session()
uri = 'https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059' \
'/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF'
df = spark.read.raster(uri)
df.printSchema()
root |-- proj_raster_path: string (nullable = false) |-- proj_raster: struct (nullable = true) | |-- tile_context: struct (nullable = false) | | |-- extent: struct (nullable = false) | | | |-- xmin: double (nullable = false) | | | |-- ymin: double (nullable = false) | | | |-- xmax: double (nullable = false) | | | |-- ymax: double (nullable = false) | | |-- crs: struct (nullable = false) | | | |-- crsProj4: string (nullable = false) | |-- tile: tile (nullable = false)
Do some work with the raster data; add 3 element-wise to the pixel/cell values and show some rows of the DataFrame.
df.select(rf_local_add(df.proj_raster, F.lit(3))).show(5, False)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |rf_local_add(proj_raster, 3) | +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |[[[-7783653.637667, 993342.4642358534, -7665045.582235852, 1111950.519667], [+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs ]], [int16ud32767, (256,255), [3408,3471,3110,2875,2798,2973,3255,3169,-2147483648,3217,...,-2147483648,-2147483648,-2147483648,-2147483648,-2147483648,-2147483648,-2147483648,2841,3226,-2147483648]]]| |[[[-7665045.582235853, 993342.4642358534, -7546437.526804706, 1111950.519667], [+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs ]], [int16ud32767, (256,255), [2337,2346,2581,2751,2575,2364,2223,2384,2618,2296,...,-2147483648,-2147483648,2608,2701,2713,3050,2983,2953,3252,2682]]] | |[[[-7546437.526804707, 993342.4642358534, -7427829.471373559, 1111950.519667], [+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs ]], [int16ud32767, (256,255), [2728,2784,2781,2567,2539,2254,2327,2436,2888,2589,...,2741,2515,2843,2934,2801,3044,2899,2430,2471,2645]]] | |[[[-7427829.47137356, 993342.4642358534, -7309221.415942413, 1111950.519667], [+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs ]], [int16ud32767, (256,255), [3058,3163,3036,3228,2877,3310,2885,2932,2931,2940,...,2634,2531,2122,1911,2229,2507,2239,2272,2499,2966]]] | |[[[-7309221.415942414, 993342.4642358534, -7190613.360511266, 1111950.519667], [+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs ]], [int16ud32767, (256,255), [3355,3502,3055,3343,3334,-2147483648,-2147483648,-2147483648,-2147483648,3058,...,2537,2851,2905,2449,2605,3025,2719,3054,3226,3052]]] | +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ only showing top 5 rows
The extent struct tells us where in the CRS the tile data covers. The granule is split into arbitrary sized chunks. Each row is a different chunk. Let's see how many.
Side note: you can configure the default size of these chunks, which are called Tiles, by passing a tuple of desired columns and rows as: raster(uri, tile_dimensions=(96, 96))
. The default is (256, 256)
df.count()
100
What area does the DataFrame cover?
crs = df.agg(F.first(rf_crs(df.proj_raster)).crsProj4.alias('crs')).first()['crs']
print(crs)
coverage_area = df.select(
df.proj_raster_path,
st_reproject(
st_geometry(rf_extent(df.proj_raster)),
rf_mk_crs(crs),
rf_mk_crs('EPSG:4326')).alias('footprint')
)
coverage_area.show(10, False)
+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs +--------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |proj_raster_path |footprint | +--------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF|POLYGON ((-70.85954815687087 8.933333332533772, -71.07986282542622 9.999999999104968, -69.99674110618135 9.999999999104968, -69.7797836135278 8.933333332533772, -70.85954815687087 8.933333332533772)) | |https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF|POLYGON ((-69.77978361352781 8.933333332533772, -69.99674110618135 9.999999999104968, -68.91361938693649 9.999999999104968, -68.70001907018472 8.933333332533772, -69.77978361352781 8.933333332533772)) | |https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF|POLYGON ((-68.70001907018474 8.933333332533772, -68.9136193869365 9.999999999104968, -67.8304976676916 9.999999999104968, -67.62025452684163 8.933333332533772, -68.70001907018474 8.933333332533772)) | |https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF|POLYGON ((-67.62025452684165 8.933333332533772, -67.83049766769162 9.999999999104968, -66.74737594844675 9.999999999104968, -66.54048998349857 8.933333332533772, -67.62025452684165 8.933333332533772)) | |https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF|POLYGON ((-66.54048998349859 8.933333332533772, -66.74737594844676 9.999999999104968, -65.66425422920187 9.999999999104968, -65.4607254401555 8.933333332533772, -66.54048998349859 8.933333332533772)) | |https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF|POLYGON ((-65.4607254401555 8.933333332533772, -65.66425422920187 9.999999999104968, -64.58113250995702 9.999999999104968, -64.38096089681244 8.933333332533772, -65.4607254401555 8.933333332533772)) | |https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF|POLYGON ((-64.38096089681244 8.933333332533772, -64.58113250995702 9.999999999104968, -63.498010790712144 9.999999999104968, -63.30119635346936 8.933333332533772, -64.38096089681244 8.933333332533772))| |https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF|POLYGON ((-63.30119635346937 8.933333332533772, -63.49801079071215 9.999999999104968, -62.41488907146726 9.999999999104968, -62.221431810126276 8.933333332533772, -63.30119635346937 8.933333332533772))| |https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF|POLYGON ((-62.22143181012629 8.933333332533772, -62.41488907146727 9.999999999104968, -61.33176735222239 9.999999999104968, -61.14166726678321 8.933333332533772, -62.22143181012629 8.933333332533772)) | |https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF|POLYGON ((-61.14166726678322 8.933333332533772, -61.3317673522224 9.999999999104968, -60.92559670750556 9.999999999104968, -60.736755563029554 8.933333332533772, -61.14166726678322 8.933333332533772)) | +--------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ only showing top 10 rows
So where in the world is that? We'll generate a little visualization with Leaflet in the notebook using Folium.
import geopandas
import folium
gdf = geopandas.GeoDataFrame(
coverage_area.select('footprint').toPandas(),
geometry='footprint', crs={'init':'EPSG:4326'})
folium.Map((5, -65), zoom_start=6) \
.add_child(folium.GeoJson(gdf.__geo_interface__))
Look at a sample of the data. You may find it useful to double-click the tile image column to see larger or smaller rendering of the image.
#Look at a sample
pandas_df = df.select(
df.proj_raster_path,
rf_extent(df.proj_raster).alias('extent'),
rf_tile(df.proj_raster).alias('tile'),
).limit(5).toPandas()
pandas_df
proj_raster_path | extent | tile | |
---|---|---|---|
0 | https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF | (-7783653.637667, 993342.4642358534, -7665045.582235852, 1111950.519667) | |
1 | https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF | (-7665045.582235853, 993342.4642358534, -7546437.526804706, 1111950.519667) | |
2 | https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF | (-7546437.526804707, 993342.4642358534, -7427829.471373559, 1111950.519667) | |
3 | https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF | (-7427829.47137356, 993342.4642358534, -7309221.415942413, 1111950.519667) | |
4 | https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF | (-7309221.415942414, 993342.4642358534, -7190613.360511266, 1111950.519667) |