import pyrasterframes
from pyrasterframes.utils import create_rf_spark_session
import pyrasterframes.rf_ipython # enables nicer visualizations of pandas DF
from pyrasterframes.rasterfunctions import *
import pyspark.sql.functions as F
spark = create_rf_spark_session()
uri = 'https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059' \
'/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF'
df = spark.read.raster(uri)
df.printSchema()
root |-- proj_raster_path: string (nullable = false) |-- proj_raster: struct (nullable = true) | |-- tile_context: struct (nullable = false) | | |-- extent: struct (nullable = false) | | | |-- xmin: double (nullable = false) | | | |-- ymin: double (nullable = false) | | | |-- xmax: double (nullable = false) | | | |-- ymax: double (nullable = false) | | |-- crs: struct (nullable = false) | | | |-- crsProj4: string (nullable = false) | |-- tile: tile (nullable = false)
Do some work with the raster data; add 3 element-wise to the pixel/cell values and show some rows of the DataFrame.
df.select(rf_local_add(df.proj_raster, F.lit(3)))
rf_local_add(proj_raster, 3) |
---|
The extent struct tells us where in the CRS the tile data covers. The granule is split into arbitrary sized chunks. Each row is a different chunk. Let's see how many.
Side note: you can configure the default size of these chunks, which are called Tiles, by passing a tuple of desired columns and rows as: raster(uri, tile_dimensions=(96, 96))
. The default is (256, 256)
df.count()
100
What area does the DataFrame cover?
crs = df.agg(F.first(rf_crs(df.proj_raster)).crsProj4.alias('crs')).first()['crs']
print(crs)
coverage_area = df.select(
df.proj_raster_path,
st_reproject(
st_geometry(rf_extent(df.proj_raster)),
rf_mk_crs(crs),
rf_mk_crs('EPSG:4326')).alias('footprint')
)
coverage_area
+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs
proj_raster_path | footprint |
---|---|
https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF | POLYGON ((-70.85954815687087 8.933333332533772, -71.07986282542622 9.999999999104968, -69.99674110618135 9.999999999104968, -69.77978361352781 8.933333332533772, -70.85954815687087 8.933333332533772)) |
https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF | POLYGON ((-69.77978361352781 8.933333332533772, -69.99674110618135 9.999999999104968, -68.91361938693649 9.999999999104968, -68.70001907018472 8.933333332533772, -69.77978361352781 8.933333332533772)) |
https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF | POLYGON ((-68.70001907018474 8.933333332533772, -68.9136193869365 9.999999999104968, -67.83049766769162 9.999999999104968, -67.62025452684165 8.933333332533772, -68.70001907018474 8.933333332533772)) |
https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF | POLYGON ((-67.62025452684165 8.933333332533772, -67.83049766769162 9.999999999104968, -66.74737594844675 9.999999999104968, -66.54048998349857 8.933333332533772, -67.62025452684165 8.933333332533772)) |
https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF | POLYGON ((-66.54048998349859 8.933333332533772, -66.74737594844676 9.999999999104968, -65.66425422920187 9.999999999104968, -65.4607254401555 8.933333332533772, -66.54048998349859 8.933333332533772)) |
So where in the world is that? We'll generate a little visualization with Leaflet in the notebook using Folium.
import geopandas
import folium
gdf = geopandas.GeoDataFrame(
coverage_area.select('footprint').toPandas(),
geometry='footprint', crs={'init':'EPSG:4326'})
folium.Map((5, -65), zoom_start=6) \
.add_child(folium.GeoJson(gdf.__geo_interface__))
Look at a sample of the data. You may find it useful to double-click the tile image column to see larger or smaller rendering of the image.
#Look at a sample
pandas_df = df.select(
df.proj_raster_path,
rf_extent(df.proj_raster).alias('extent'),
rf_geometry(df.proj_raster).alias('geo'),
rf_tile(df.proj_raster).alias('tile'),
).limit(5).toPandas()
pandas_df
proj_raster_path | extent | geo | tile | |
---|---|---|---|---|
0 | https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF | (-7783653.637667, 993342.4642358534, -7665045.582235853, 1111950.519667) | POLYGON ((-7783653.637667 993342.4642358534, -7... | |
1 | https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF | (-7665045.582235853, 993342.4642358534, -7546437.526804706, 1111950.519667) | POLYGON ((-7665045.582235853 993342.4642358534,... | |
2 | https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF | (-7546437.526804707, 993342.4642358534, -7427829.47137356, 1111950.519667) | POLYGON ((-7546437.526804707 993342.4642358534,... | |
3 | https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF | (-7427829.47137356, 993342.4642358534, -7309221.415942413, 1111950.519667) | POLYGON ((-7427829.47137356 993342.4642358534, ... | |
4 | https://modis-pds.s3.amazonaws.com/MCD43A4.006/11/08/2019059/MCD43A4.A2019059.h11v08.006.2019072203257_B02.TIF | (-7309221.415942414, 993342.4642358534, -7190613.360511267, 1111950.519667) | POLYGON ((-7309221.415942414 993342.4642358534,... |