This notebook crops a directory of tiffs to a subset area of interest using an interactive Matplotlib plot of an image in your data stack.
This notebook assumes that users have generated the original image from Prepare_Data_Stack
notebook. Users now have an option to subset the original image in following methods:
Important Note about JupyterHub
Your JupyterHub server will automatically shutdown when left idle for more than 1 hour. Your notebooks will not be lost but you will have to restart their kernels and re-run them from the beginning. You will not be able to seamlessly continue running a partially run notebook.
import url_widget as url_w
notebookUrl = url_w.URLWidget()
display(notebookUrl)
from IPython.display import Markdown
from IPython.display import display
notebookUrl = notebookUrl.value
user = !echo $JUPYTERHUB_USER
env = !echo $CONDA_PREFIX
if env[0] == '':
env[0] = 'Python 3 (base)'
if env[0] != '/home/jovyan/.local/envs/rtc_analysis':
display(Markdown(f'<text style=color:red><strong>WARNING:</strong></text>'))
display(Markdown(f'<text style=color:red>This notebook should be run using the "rtc_analysis" conda environment.</text>'))
display(Markdown(f'<text style=color:red>It is currently using the "{env[0].split("/")[-1]}" environment.</text>'))
display(Markdown(f'<text style=color:red>Select the "rtc_analysis" from the "Change Kernel" submenu of the "Kernel" menu.</text>'))
display(Markdown(f'<text style=color:red>If the "rtc_analysis" environment is not present, use <a href="{notebookUrl.split("/user")[0]}/user/{user[0]}/notebooks/conda_environments/Create_OSL_Conda_Environments.ipynb"> Create_OSL_Conda_Environments.ipynb </a> to create it.</text>'))
display(Markdown(f'<text style=color:red>Note that you must restart your server after creating a new environment before it is usable by notebooks.</text>'))
In this notebook we will use the following scientific library:
Import the necesssary libraries and modules:
%%capture
from pathlib import Path
import json # for loads
import shutil
from osgeo import gdal
import pyproj
from IPython.display import Markdown
from IPython.display import display
%matplotlib widget
from ipyfilechooser import FileChooser
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 12})
import opensarlab_lib as asfn
asfn.jupytertheme_matplotlib_format()
Write functions to gather and print individual tiff paths:
def get_tiff_paths(paths):
tiff_paths = list(paths.parent.rglob(paths.name))
tiff_paths.sort()
return tiff_paths
def print_tiff_paths(tiff_paths):
print("Tiff paths:")
for p in tiff_paths:
print(f"{p}\n")
Select the directory holding your tiffs
Select
buttonSelect
buttonChange
button to alter your selectionPrepare_Data_Stack_Hyp3
notebook to generate directory, tiffs should be in RTC_GAMMA_tiffs
directory.fc = FileChooser('/home/jovyan/notebooks')
display(fc)
Determine the path to the analysis directory containing the tiff directory:
tiff_dir = Path(fc.selected_path)
analysis_dir = tiff_dir.parent
print(f"analysis_dir: {analysis_dir}")
paths = tiff_dir/"*.tif*"
tiff_paths = get_tiff_paths(paths)
Determine the UTM zone for your images.
This assumes you have already reprojected multiple UTM projections to a single predominant projection using the Prepare_Data_Stack_Hyp3
notebook.
# Note - you should only have files that ends with .tif in same format
info = gdal.Info(str(tiff_paths[0]), format='json')
info = info['coordinateSystem']['wkt']
utm = info.split('ID')[-1].split(',')[1][0:-2]
print(f"UTM Zone: {utm}")
tiff_paths = get_tiff_paths(paths)
print_tiff_paths(tiff_paths)
Create a string containing paths to one image for each area represented in the stack:
to_merge = {}
for pth in tiff_paths:
if 'subset' in str(pth):
continue
info = (gdal.Info(str(pth), options = ['-json']))
info = json.dumps(info)
info = (json.loads(info))['wgs84Extent']['coordinates']
coords = [info[0][0], info[0][3]]
for i in range(0, 2):
for j in range(0, 2):
coords[i][j] = round(coords[i][j])
str_coords = f"{str(coords[0])}{str(coords[1])}"
if str_coords not in to_merge:
to_merge.update({str_coords: pth})
merge_paths = ""
for pth in to_merge:
merge_paths = f"{merge_paths} {to_merge[pth]}"
print(merge_paths)
Merge the images for display in the Area-Of-Interest selector:
full_scene = analysis_dir/'full_scene.tif'
if full_scene.exists():
full_scene.unlink()
gdal_command = f"gdal_merge.py -o {full_scene} {merge_paths}"
!{gdal_command}
image_file = f"{analysis_dir}/raster_stack.vrt"
!gdalbuildvrt -separate $image_file -overwrite $full_scene
Convert the VRT into an array:
img = gdal.Open(image_file)
rasterstack = img.ReadAsArray()
Print the number of bands, pixels, and lines:
print(img.RasterCount) # Number of Bands
print(img.RasterXSize) # Number of Pixels
print(img.RasterYSize) # Number of Lines
Choose a best method to cutout image
print("Please choose one of three options:")
option_key = [
None,
'Option 1: Draw rectangle with drag and drop.',
'Option 2: Write/paste Well-Known Text (WKT).',
'Option 3: Upload shapefile.'
]
option = asfn.select_parameter([option_key[1], option_key[2] , option_key[3],], '')
display(option)
choice = option_key.index(option.value)
## this generates interactive plots (AOI subset) (option 1)
if choice == 1:
fig_xsize = 7
fig_ysize = 7
aoi = asfn.AOI_Selector(rasterstack, fig_xsize, fig_ysize)
Choose a .tif
file to generate your shapefile
if choice != 1:
display(Markdown(f'<text style=color:red>NOTE: As of now, your WKT/shapefile will work if it meets following conditions: </text>'))
display(Markdown(f'<ol text style=color:red><li>Simple polygons </li> <li>Has same projection as the original image.</li></ol>'))
display(Markdown(f'<text style=color:red>Using complex shapes (e.g. MULTIPOLYGON, POLYGON with holes, etc.) may cause an unexpected results.</text>'))
try:
infile = tiff_paths[0]
except:
raise OSError('Directory that contains your .tif files are empty.')
print(infile)
Check if you have a correct infile
if choice != 1:
try:
suffix = infile.suffix
except:
raise TypeError(f'{infile} is not a valid path.')
if suffix != '.tif':
raise ValueError(f'File you chose is not a ".tif" file. Pick a valid ".tif" file.')
# path to your subset image file
outfile = str(infile.parent/f'subset_{infile.stem}.tif')
Useful functions used in option 2 & 3:
if choice != 1:
from osgeo import osr
def oldSubsetExist(directory_path) -> bool:
"""
Given path to a directory containing old 'subset_...tif' file,
it will determine if that subset exists in that directory or not.
"""
for path in directory_path.iterdir():
if 'subset_' in str(path) and path.suffix == '.tif':
return True
return False
def removeOldSubset(directory_path) -> None:
"""
Given path to a directory containing old 'subset_...tif' file,
it will remove all instances of 'subset_...tif' file.
"""
for path in directory_path.iterdir():
if 'subset_' in str(path) and path.suffix == '.tif':
print(f'Removed {path}...')
path.unlink()
# Gets EPSG from your infile
def getEPSG(tif_path) -> int:
try:
Path(tif_path).exists()
except:
print('That .tif file does not exist. Please enter a valid path.')
ds = gdal.OpenEx(tif_path)
prj = ds.GetProjection()
return int(osr.SpatialReference(prj).GetAttrValue('AUTHORITY', 1))
.shp
¶Clean up previous shapely files (.shp
, .prj
, .dbf
, .shx
)
if choice == 2:
print("Would you like to remove all instances of previous/unused shapely files?")
shp_option = asfn.select_parameter(['Yes', 'No',], '')
display(shp_option)
if choice == 2 and shp_option.value == 'Yes':
keywords = ['.shp','.dbf','.prj','.shx']
for path in infile.parent.iterdir():
for k in keywords:
if path.suffix == k:
print(f'Removed {path}')
path.unlink()
Choose a name for your .shp
file:
if choice == 2:
shp_name = input('Choose a name for your shapefly file: ')
# default name if user does not input anything
if not shp_name:
shp_name = 'shape'
shp = str(infile.parent/f'{shp_name}.shp')
print(shp)
Generate .shp
, .proj
, and other relavent files.
# Let user input WKT (option 2)
if choice == 2:
from osgeo import ogr
import shapely.wkt as sWkt
import geopandas
print("When inputting your WKT, here are few things to note:\n"\
"\t1) If you don't already have WKT, you can use GIS software (e.g. ArcGIS) to obtain WKT for your image.\n"\
"\t2) It will only accept single polygon (e.g. POLYGON((x y, x y, ...)). \n"\
"\t3) Your WKT has to be based off of your original image that you wish to subset.")
correctWktInput = False
while not correctWktInput:
wkt = input("Please enter your WKT: ")
try:
geoShape = sWkt.loads(wkt)
series = geopandas.GeoSeries([geoShape])
isNotConnected = series.is_valid[0]
if not isNotConnected:
raise('Obsecure shape detected.')
continue
except:
print('Error due to an unclosed shape, obsecure shape, or bad input. Try again...')
continue
correctWktInput = True
outfile = str(infile.parent/f'subset_{infile.stem}.tif')
epsg = getEPSG(str(infile))
# For reference, latlong epsg = 4326
driver = ogr.GetDriverByName('Esri Shapefile')
ds = driver.CreateDataSource(shp)
srs = osr.SpatialReference()
srs.ImportFromEPSG(epsg)
layer = ds.CreateLayer('', srs, ogr.wkbPolygon)
defn = layer.GetLayerDefn()
# Create a new feature (attribute and geometry)
feat = ogr.Feature(defn)
# Make a geometry from Shapely object
geom = ogr.CreateGeometryFromWkt(wkt)
feat.SetGeometry(geom)
layer.CreateFeature(feat)
feat = geom = None # destroy these
# Save and close everything
ds = layer = feat = geom = None
.shp
¶If you chose option 3, upload your shapely file (.shp
) as well as any files that are related to it (.proj
, .shx
and .dbf
files). Once you uploaded them, select the .shp file using file selector. If you selected existing .shp
it should be highlighted in orange.
Note: .shp
, .shx
, .dbf
, and .proj
files must be in the same directory.
if choice == 3:
display(Markdown(f'<text style=color:red>WARNING: The UPLOADED .shp FILE MUST HAVE A SAME EPSG/PROJECTION AS THE ORIGINAL IMAGE.</text>'))
display(Markdown(f'<text style=color:red>IF YOUR .shp FILE DOES NOT HAVE PROJECTION OR HAVE DIFFERENT PROJECTION FROM ORIGINAL IMAGE,</text>'))
display(Markdown(f'<text style=color:red>IT MAY CAUSE AN UNEXPECTED RESULTS.</text>'))
shpfc = FileChooser('/home/jovyan/notebooks/SAR_Training/English/Master')
display(shpfc)
if choice == 3:
try:
shp = Path(shpfc.selected)
except:
raise TypeError('Please choose the file path before proceeding.')
print(shp)
# Extract wkt from shapefile:
if choice == 3:
if shp.suffix == '.shp':
gInfo = gdal.OpenEx(str(shp))
layer = gInfo.GetLayer()
feature = layer.GetFeature(0)
wkt = feature.GetGeometryRef().ExportToWkt()
If you have subset image from previous run, it will ask you to either keep the old ones or remove and generate new.
if choice != 1:
# Check if 'subset' files from previous run exists
if oldSubsetExist(infile.parent):
print('Items from previous run exists. Would you like to keep them or remove them to generate new items?')
clean_option = asfn.select_parameter(['Clean and Generate New Items', 'Keep Old Items', ], '')
display(clean_option)
if choice != 1:
print(shp)
generate_command = f'gdalwarp -cutline {shp} -crop_to_cutline {infile} {outfile}'
if oldSubsetExist(infile.parent):
if clean_option.value == 'Clean and Generate New Items':
print('Cleaning previously generated subset file(s)...')
removeOldSubset(infile.parent)
print('\n')
else:
generate_command = f'echo Kept previous subset.'
!{generate_command}
Gather and define projection details:
geotrans = img.GetGeoTransform()
projlatlon = pyproj.Proj('EPSG:4326') # WGS84
projimg = pyproj.Proj(f'EPSG:{utm}')
Write a function to convert the pixel, line coordinates from the AOI selector into geographic coordinates in the stack's EPSG projection:
# xy -> geocoords
def xy_to_geocoord(x, y, geotrans,latlon=True):
ref_x = geotrans[0] + (x * geotrans[1])
ref_y = geotrans[3] + (y * geotrans[5])
if latlon:
ref_y, ref_x = pyproj.transform(projimg, projlatlon, ref_x, ref_y)
return [ref_x, ref_y]
# geocoords -> xy
"""
ref_x: x geocoordinate from WKT
ref_y: y geocoordinate from WKT
"""
def geocoord_to_xy(ref_x, ref_y, geotrans):
x = (ref_x - geotrans[0]) / geotrans[1]
y = (ref_y - geotrans[3]) / geotrans[5]
return [x,y]
Option 2 & 3 - Write a function to display the cut out image:
# Display cropped image & place cropped image on top on original image.
import numpy as np
import matplotlib.patches as patches
import re
# Display image (different option depending on options)
def displayImage(rasterstack,
fig_xsize=None, fig_ysize=None,
wkt=None,
cmap=plt.cm.gist_gray,
vmin=None, vmax=None,):
if not vmin:
vmin = np.nanpercentile(rasterstack, 1)
if not vmax:
vmax=np.nanpercentile(rasterstack, 99)
if fig_xsize and fig_ysize:
fig, current_ax = plt.subplots(figsize=(fig_xsize, fig_ysize))
else:
fig, current_ax = plt.subplots()
current_ax.imshow(rasterstack, cmap=plt.cm.gist_gray, vmin=vmin, vmax=vmax)
# If it's original image, show where it's been cropped
if wkt:
showCropped(wkt, current_ax)
# Place cropped image on top of the image
def showCropped(wkt, ax):
x,y = [],[]
wkt_coords = [float(i) for i in (re.findall(r"[-+]?\d*\.\d+|\d+", wkt))]
# Since "patches.Polygon" does not require last coordinate, remove last coordinate (x,y)
wkt_coords.pop()
wkt_coords.pop()
isX = True
for c in wkt_coords:
x.append(c) if isX else y.append(c)
isX = not(isX)
if len(x) == len(y):
tmp = []
for i in range(0, len(x)):
tmp = geocoord_to_xy(x[i],y[i],geotrans)
x[i],y[i] = tmp[0], tmp[1]
poly = patches.Polygon(xy=list(zip(x,y)), linestyle='-')
# Add the patches to the Axes
ax.add_patch(poly)
plt.show()
Option 2 & 3 - Displaying original image and cropped image. If you are satisfied with the cropped image, proceed.
if choice != 1:
# If file size is too big, it will not display image:
fileSize = Path(outfile).stat().st_size
fileSizeGB = fileSize/(1024 ** 3) # in GB
if fileSizeGB > 10.00:
raise MemoryError('Subset file too big and will crash. Please regenerate your subset image using correct values.')
# First, show the entire map, and highlight the parts where it has been cropped.
displayImage(rasterstack,7.5,7.5,wkt)
# generalized - there should only be one 'subset' file in your directory
cropped_img = gdal.Open(str(outfile)).ReadAsArray()
# Display cropped img
displayImage(cropped_img,7.5,7.5)
Option 1: Call xy_to_geocoord
to gather the aoi_coords
:
try:
if choice == 1:
aoi_coords = [xy_to_geocoord(aoi.x1, aoi.y1, geotrans, latlon=False), xy_to_geocoord(aoi.x2, aoi.y2, geotrans, latlon=False)]
print(f"aoi_coords in EPSG {utm}: {aoi_coords}")
else:
# make warning more visable?
display(Markdown(f'<text style=color:red>If the image above: </text>'))
display(Markdown(f'<ol text style=color:red><li>Does not show blue shape on the first image.</li> <li>and/or it displays entirely black image on the second</li></ol>'))
display(Markdown(f'<text style=color:red>It indicates that there is something wrong with your input. Please use WKT or .shp file that has the same projection as original image and generate the subset image.</text>'))
display(Markdown(f'<text style=color:blue>If both images displayed properly as you expected, please proceed ahead.</text>'))
except TypeError:
print('TypeError')
display(Markdown(f'<text style=color:red>This error occurs if an AOI was not selected.</text>'))
display(Markdown(f'<text style=color:red>Note that the square tool icon in the AOI selector menu is <b>NOT</b> the selection tool. It is the zoom tool.</text>'))
display(Markdown(f'<text style=color:red>Read the tips above the AOI selector carefully.</text>'))
Collect the paths to the tiffs:
tiff_paths = get_tiff_paths(paths)
Create a subdirectory in which to store the subset tiffs:
print("Choose a directory name in which to store the subset geotiffs.")
print("Note: this will sit alongside the directory containing your pre-subset geotiffs.")
while True:
sub_name = input()
if sub_name == "":
print("Please enter a valid directory name")
continue
else:
break
Subset the tiffs and move them from the individual product directories into their own directory, /tiffs:
tiff_paths = get_tiff_paths(paths)
for p in tiff_paths:
print(p)
Presets subset directory. For option 2 & 3, it also checks if the subset directory is empty or not.
subset_dir = analysis_dir/f'{sub_name}'
if not subset_dir.exists():
subset_dir.mkdir()
if choice != 1:
isEmptyDir = not(any(subset_dir.rglob('*.tiff')))
if not isEmptyDir:
print("Tiff files from previous run exists. Would you like to remove them and generate new tiff files?")
reset_option = asfn.select_parameter(['Generate New Tiffs', 'Keep Previous Tiffs', ], '')
display(reset_option)
# sometimes, tiff doesn't follow '[0-9]{7}T[0-9]6' format, hence just get the numbers in those cases
# option 2 & 3
if choice != 1:
if isEmptyDir and any(subset_dir.rglob('*.tiff')):
isEmptyDir = False
dup_date_polar = set()
for i, tiff_path in enumerate(tiff_paths):
if 'subset' not in str(tiff_path):
date = Path(asfn.date_from_product_name(str(tiff_path))).name.split('T')[0]
polar = asfn.get_polarity_from_path(str(tiff_path))
print(f"Product #{i+1}:")
print(f'Path: {tiff_path}\n')
date_polar = f'{date}_{polar}'
if date_polar in dup_date_polar:
date_polar += f'_{i}'
else:
dup_date_polar.add(date_polar)
outfile = subset_dir/f'{date_polar}.tiff'
if choice == 1:
gdal_command = f"gdal_translate -projwin {aoi_coords[0][0]} {aoi_coords[0][1]} {aoi_coords[1][0]} {aoi_coords[1][1]} -projwin_srs 'EPSG:{utm}' -co \"COMPRESS=DEFLATE\" -eco -a_nodata 0 {tiff_path} {outfile}"
else: # choice 2 & 3
gdal_command = f'gdalwarp -cutline {shp} -crop_to_cutline {tiff_path} {outfile}'
if not isEmptyDir:
if reset_option.value == 'Generate New Tiffs':
print(f'Removed: {outfile}\n')
outfile.unlink()
else:
gdal_command = f'echo Nothing was executed...\n'
!{gdal_command} # runs command
print(f"Calling the command: {gdal_command}\n")
Delete any subset tifs
that are filled with NaNs
and contain no data.
subset_paths = subset_dir/f"*.tif*"
tiff_paths = get_tiff_paths(subset_paths)
asfn.remove_nan_filled_tifs(tiff_paths)
Sometimes, when using gdal translate to subset a stack of images, there will be slight differences in sizes of the resulting images, off by a single pixel in either direction. The following code checks the newly subset stack for this problem, and if found, it re-subsets all the images to the size of the smallest image in the stack.
# Align geotiffs to an integer resolution value
# list of new subsets
fnames = list(subset_dir.rglob('*.tiff'))
fnames.sort()
resolution = int(gdal.Info(str(fnames[0]), format='json')['geoTransform'][1])
for fname in fnames:
gdal.Warp(str(fname), str(fname),
dstSRS=f'EPSG:{utm}', srcSRS=f'EPSG:{utm}',
xRes=resolution, yRes=resolution, targetAlignedPixels=True)
Decide whether or not to cleanup the original tiffs:
cleanup = asfn.select_parameter(["Save original tiffs", "Delete original tiffs"], '')
cleanup
if cleanup.value == 'Delete original tiffs':
shutil.rmtree(tiff_dir)
Print the path to your subset directory:
print(subset_dir)
Relavent notebooks:
# Run this cell to display links
from IPython.display import display, HTML
current = Path.cwd()
abs_path = [
Path('/home/jovyan/notebooks/SAR_Training/English/Master/Change_Detection_From_Prepared_Data_Stack.ipynb'),
Path('/home/jovyan/notebooks/SAR_Training/English/Master/Explore_SAR_Time_Series_From_Prepared_Data_Stack.ipynb'),
Path('/home/jovyan/notebooks/SAR_Training/English/Master/SARChangeDetectionMethods_From_Prepared_Data_Stack.ipynb'),
Path('/home/jovyan/notebooks/SAR_Training/English/Master/Time_Series_From_Prepared_Stack.ipynb')
]
details = [
'Introduce you to the analysis of deep multi-temporal SAR image data stacks.',
'Introduces you to a some popular change detection methods that can be applied on SAR time series data.',
'Introduces you to the time series signatures associated with flooding.',
'Applies Change Point Detection on a deep multi-temporal SAR image data stack acquired by Sentinel-1.'
]
for a in abs_path:
name = a.stem
relative_path = a.relative_to(current)
detail = details.pop()
link_t = f"<li><a href='{relative_path}'> {name} </a>: {detail} </li>"
html = HTML(link_t)
display(html)
GEOS 657 Microwave Remote Sensing - Version 2.0.0 - January 2022
Version Changes:
wkt
as well as .shp
file to subset image