The following blocks of code will install the required packages and download the datasets to your Colab environment.
%%capture
if 'google.colab' in str(get_ipython()):
!pip install --upgrade xee
!pip install rioxarray
import ee
import xarray
import rioxarray as rxr
import matplotlib.pyplot as plt
import pandas as pd
import os
import datetime
import numpy as np
output_folder = 'output'
if not os.path.exists(output_folder):
os.mkdir(output_folder)
ee.Authenticate()
ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')
We start with the MODIS Vegetation Indices Version 6.1 data. We pre-process the data by applying cloud masking and pixel scaling.
modis = ee.ImageCollection('MODIS/061/MOD13Q1')
startDate = ee.Date.fromYMD(2020, 1, 1)
endDate = startDate.advance(2, 'year')
filtered = modis.filter(ee.Filter.date(startDate, endDate))
# Cloud Masking
def bitwiseExtract(input, fromBit, toBit):
maskSize = ee.Number(1).add(toBit).subtract(fromBit)
mask = ee.Number(1).leftShift(maskSize).subtract(1)
return input.rightShift(fromBit).bitwiseAnd(mask)
def maskSnowAndClouds(image):
summaryQa = image.select('SummaryQA')
# Select pixels which are less than or equals to 1 (0 or 1)
qaMask = bitwiseExtract(summaryQa, 0, 1).lte(1)
maskedImage = image.updateMask(qaMask)
return maskedImage.copyProperties(image, ['system:index', 'system:time_start'])
# MODIS VI values come as VI x 10000 that need to be scaled by 0.0001
def scaleBands(image):
scaled = image.multiply(0.0001)
return scaled.copyProperties(image, ['system:index', 'system:time_start'])
# Apply the function to all images in the collection
maskedCol = filtered \
.map(maskSnowAndClouds) \
.map(scaleBands)
Now we have an ImageCollection that we want to get it as a XArray Dataset. We define the region of interest and extract the ImageCollection using the 'ee' engine.
Our region of interest is in South India with the UTM Zone 46N. So we request the pixels in EPSG:32643 at 250m resolution.
aoi = ee.Geometry.Rectangle(77, 12, 79, 15)
ds = xarray.open_dataset(
maskedCol,
engine='ee',
crs='EPSG:32643',
scale=250,
geometry=aoi,
)
THe result is a XArray Dataset containing the image time-series.
ds
Select the 'NDVI' band.
original_time_series = ds.NDVI.chunk(dict(X=100, Y=100, time=-1))
original_time_series
<xarray.DataArray 'NDVI' (time: 46, X: 851, Y: 1341)> dask.array<xarray-<this-array>, shape=(46, 851, 1341), dtype=float64, chunksize=(46, 100, 100), chunktype=numpy.ndarray> Coordinates: * time (time) datetime64[ns] 2020-01-01 2020-01-17 ... 2021-12-19 * X (X) float32 77.03 77.03 77.03 77.03 77.04 ... 78.99 79.0 79.0 79.0 * Y (Y) float32 15.03 15.02 15.02 15.02 15.02 ... 12.01 12.0 12.0 12.0 Attributes: id: NDVI data_type: {'type': 'PixelType', 'precision': 'double', 'min': -3.27... dimensions: [172800, 67200] crs: SR-ORG:6974 crs_transform: [231.65635826395825, 0, -20015109.354, 0, -231.6563582639...
We use XArray's excellent time-series processing functionality to fill the cloud-masked pixels with linearly interpolated values from temporal neighbors. We also apply a moving-window smoothing to remove noise.
time_series_interpolated = original_time_series.interpolate_na('time', use_coordinate=False)
time_series_smooth = time_series_interpolated.rolling(time=3, center=True).mean()
time_series_smooth
original_ts = original_time_series.interp(Y=13.16, X=77.35)
smooth_ts = time_series_smooth.interp(Y=13.16, X=77.35)
fig, ax = plt.subplots(1, 1)
fig.set_size_inches(15, 7)
original_ts.plot.line(
ax=ax, x='time',
marker='o', color='#66c2a4', linestyle='--', linewidth=1, markersize=4)
plt.show()
fig, ax = plt.subplots(1, 1)
fig.set_size_inches(15, 7)
original_ts.plot.line(
ax=ax, x='time',
marker='^', color='#66c2a4', linestyle='--', linewidth=1, markersize=2)
smooth_ts.plot.line(
ax=ax, x='time',
marker='o', color='#238b45', linestyle='-', linewidth=1, markersize=4)
plt.show()
df = smooth_ts.to_pandas()
df
output_filename = 'smoothed_time_series.csv'
output_filepath = os.path.join(output_folder, output_filename)
df.reset_index().to_csv(output_filepath, index=False)
Save the original time-series images using rioxarray
as GeoTIFF files.
for time in original_time_series.time.values:
image = original_time_series.sel(time=time)
# transform the image to suit rioxarray format
image = image \
.rename({'Y': 'y', 'X': 'x'}) \
.transpose('y', 'x') \
.rio.write_crs('EPSG:4326')
date = np.datetime_as_string(time, unit='D')
output_file = f'original_{date}.tif'
output_path = os.path.join(output_folder, output_file)
image.rio.to_raster(output_path, driver='COG')
Save the smoothed time-series images.
for time in time_series_smooth.time.values:
image = time_series_smooth.sel(time=time)
# transform the image to suit rioxarray format
image = image \
.rename({'Y': 'y', 'X': 'x'}) \
.transpose('y', 'x') \
.rio.write_crs('EPSG:4326')
date = np.datetime_as_string(time, unit='D')
output_file = f'smoothed_{date}.tif'
output_path = os.path.join(output_folder, output_file)
image.rio.to_raster(output_path, driver='COG')