Your JupyterHub server will automatically shutdown when left idle for more than 1 hour. Your notebooks will not be lost but you will have to restart their kernels and re-run them from the beginning. You will not be able to seamlessly continue running a partially run notebook.
import url_widget as url_w
notebookUrl = url_w.URLWidget()
display(notebookUrl)
from IPython.display import Markdown
from IPython.display import display
notebookUrl = notebookUrl.value
user = !echo $JUPYTERHUB_USER
env = !echo $CONDA_PREFIX
if env[0] == '':
env[0] = 'Python 3 (base)'
if env[0] != '/home/jovyan/.local/envs/rtc_analysis':
display(Markdown(f'<text style=color:red><strong>WARNING:</strong></text>'))
display(Markdown(f'<text style=color:red>This notebook should be run using the "rtc_analysis" conda environment.</text>'))
display(Markdown(f'<text style=color:red>It is currently using the "{env[0].split("/")[-1]}" environment.</text>'))
display(Markdown(f'<text style=color:red>Select the "rtc_analysis" from the "Change Kernel" submenu of the "Kernel" menu.</text>'))
display(Markdown(f'<text style=color:red>If the "rtc_analysis" environment is not present, use <a href="{notebookUrl.split("/user")[0]}/user/{user[0]}/notebooks/conda_environments/Create_OSL_Conda_Environments.ipynb"> Create_OSL_Conda_Environments.ipynb </a> to create it.</text>'))
display(Markdown(f'<text style=color:red>Note that you must restart your server after creating a new environment before it is usable by notebooks.</text>'))
In this notebook we will use the following scientific libraries:
Our first step is to import them:
%%capture
import copy
from datetime import datetime, timedelta, timezone
import json # for loads
from pathlib import Path
import re
import shutil
import warnings
from osgeo import gdal
gdal.UseExceptions()
import numpy as np
from IPython.display import display, clear_output, Markdown
import opensarlab_lib as asfn
from hyp3_sdk import Batch, HyP3
This notebook assumes that you've created your own data stack over your personal area of interest using the Alaska Satellite Facility's value-added product system HyP3, available via ASF Data Search (Vertex). HyP3 is an ASF service used to prototype value added products and provide them to users to collect feedback.
We will retrieve HyP3 data via the hyp3_sdk. As both HyP3 and the Notebook environment sit in the Amazon Web Services (AWS) cloud, data transfer is quick and cost effective.
Before we download anything, create a working directory for this analysis.
Select or create a working directory for the analysis:
while True:
print(f"Current working directory: {Path.cwd()}")
data_dir = Path(input(f"\nPlease enter the name of a directory in which to store your data for this analysis. This should be in the same parent directory as this notebook."))
if data_dir == Path('.'):
continue
if data_dir.is_dir():
contents = data_dir.glob('*')
if len(list(contents)) > 0:
choice = asfn.handle_old_data(data_dir)
if choice == 1:
if data_dir.exists():
shutil.rmtree(data_dir)
data_dir.mkdir()
break
elif choice == 2:
break
else:
clear_output()
continue
else:
break
else:
data_dir.mkdir()
break
Define absolute path to analysis directory:
analysis_directory = Path.cwd().joinpath(data_dir)
print(f"analysis_directory: {analysis_directory}")
Create a HyP3 object and authenticate:
hyp3 = HyP3(prompt=True)
Select a product type to download:
job_types = ['RTC_GAMMA', 'INSAR_GAMMA', 'AUTORIFT']
job_type = asfn.select_parameter(job_types)
job_type
Decide whether to search for a HyP3 project or jobs unattached to a project:
options = ['project', 'projectless jobs']
search_type = asfn.select_parameter(options, '')
print("Select whether to search for HyP3 Project or HyP3 Jobs unattached to a project")
display(search_type)
List projects containing active products of the type chosen in the previous cell and select one:
my_hyp3_info = hyp3.my_info()
active_projects = dict()
if search_type.value == 'project':
for project in my_hyp3_info['job_names']:
batch = Batch()
batch = hyp3.find_jobs(name=project, job_type=job_type.value).filter_jobs(running=False, include_expired=False)
if len(batch) > 0:
active_projects.update({batch.jobs[0].name: batch})
if len(active_projects) > 0:
display(Markdown("<text style='color:darkred;'>Note: After selecting a project, you must select the next cell before hitting the 'Run' button or typing Shift/Enter.</text>"))
display(Markdown("<text style='color:darkred;'>Otherwise, you will rerun this code cell.</text>"))
print('\nSelect a Project:')
project_select = asfn.select_parameter(active_projects.keys())
display(project_select)
if search_type.value == 'projectless jobs' or len(active_projects) == 0:
project_select = False
if search_type.value == 'project':
print(f"There were no {job_type.value} jobs found in any current projects.\n")
jobs = hyp3.find_jobs(job_type=job_type.value).filter_jobs(running=False, include_expired=False)
orphaned_jobs = Batch()
for j in jobs:
if not j.name:
orphaned_jobs += j
jobs = orphaned_jobs
if len(jobs) > 0:
print(f"Found {len(jobs)} {job_type.value} jobs that are not part of a project.")
print(f"Select the jobs you wish to download")
jobs = {i.files[0]['filename']: i for i in jobs}
jobs_select = asfn.select_mult_parameters(jobs.keys(), '', width='500px')
display(jobs_select)
else:
print(f"There were no {job_type.value} jobs found that are not part of a project either.")
Select a date range of products to download:
if project_select:
batch = active_projects[project_select.value]
else:
batch = Batch()
for j in jobs_select.value:
batch += jobs[j]
display(Markdown("<text style='color:darkred;'>Note: After selecting a date range, you should select the next cell before hitting the 'Run' button or typing Shift/Enter.</text>"))
display(Markdown("<text style='color:darkred;'>Otherwise, you may simply rerun this code cell.</text>"))
print('\nSelect a Date Range:')
dates = asfn.get_job_dates(batch)
date_picker = asfn.gui_date_picker(dates)
date_picker
Save the selected date range and remove products falling outside of it:
date_range = asfn.get_slider_vals(date_picker)
date_range[0] = date_range[0].date()
date_range[1] = date_range[1].date()
print(f"Date Range: {str(date_range[0])} to {str(date_range[1])}")
batch = asfn.filter_jobs_by_date(batch, date_range)
Gather the available paths and orbit directions for the remaining products:
display(Markdown("<text style='color:darkred;'><text style='font-size:150%;'>This may take some time for projects containing many jobs...</text></text>"))
asfn.set_paths_orbits(batch)
paths = set()
orbit_directions = set()
for p in batch:
paths.add(p.path)
orbit_directions.add(p.orbit_direction)
paths.add('All Paths')
display(Markdown(f"<text style=color:blue><text style='font-size:175%;'>Done.</text></text>"))
Select a path or paths (use shift or ctrl to select multiple paths):
display(Markdown("<text style='color:darkred;'>Note: After selecting a path, you must select the next cell before hitting the 'Run' button or typing Shift/Enter.</text>"))
display(Markdown("<text style='color:darkred;'>Otherwise, you will simply rerun this code cell.</text>"))
print('\nSelect a Path:')
path_choice = asfn.select_mult_parameters(paths)
path_choice
Save the selected flight path/s:
flight_path = path_choice.value
if flight_path:
if flight_path:
print(f"Flight Path: {flight_path}")
else:
print('Flight Path: All Paths')
else:
print("WARNING: You must select a flight path in the previous cell, then rerun this cell.")
Select an orbit direction:
if len(orbit_directions) > 1:
display(Markdown("<text style='color:red;'>Note: After selecting a flight direction, you must select the next cell before hitting the 'Run' button or typing Shift/Enter.</text>"))
display(Markdown("<text style='color:red;'>Otherwise, you will simply rerun this code cell.</text>"))
print('\nSelect a Flight Direction:')
direction_choice = asfn.select_parameter(orbit_directions, 'Direction:')
direction_choice
Save the selected orbit direction:
direction = direction_choice.value
print(f"Orbit Direction: {direction}")
Filter jobs by path and orbit direction:
batch = asfn.filter_jobs_by_path(batch, flight_path)
batch = asfn.filter_jobs_by_orbit(batch, direction)
print(f"There are {len(batch)} products to download.")
Download the products, unzip them into a directory named after the product type, and delete the zip files:
products_path = analysis_directory.joinpath(job_type.value)
print(products_path)
if not products_path.is_dir():
products_path.mkdir()
print(f"\nProject: {batch.jobs[0].name}")
project_zips = batch.download_files(products_path)
for z in project_zips:
if z.suffix == '.nc':
continue
asfn.asf_unzip(str(products_path), str(z))
z.unlink()
rtc = batch.jobs[0].job_type == 'RTC_GAMMA'
insar = batch.jobs[0].job_type == 'INSAR_GAMMA'
autorift = batch.jobs[0].job_type == 'AUTORIFT'
Determine the available polarizations if downloading RTC products:
if rtc:
polarizations = asfn.get_RTC_polarizations(str(products_path))
polarization_power_set = asfn.get_power_set(polarizations)
Select a polarization:
if rtc:
polarization_choice = asfn.select_parameter(sorted(polarization_power_set), 'Polarizations:')
else:
polarization_choice = None
polarization_choice
Create a paths variable, holding the relative path to the tiffs or NetCDFs:
if rtc:
polarization = polarization_choice.value
print(polarization)
if len(polarization) == 2:
regex = "\w[\--~]{{5,300}}(_|-){}.(tif|tiff)$".format(polarization)
dbl_polar = False
else:
regex = "\w[\--~]{{5,300}}(_|-){}(v|V|h|H).(tif|tiff)$".format(polarization[0])
dbl_polar = True
elif insar:
regex = "\w*_ueF_\w*.tif$"
elif autorift:
# regex = "\w*ASF_OD.nc$"
regex = "\w*ASF_OD.*$"
Write functions to collect and print the paths of the tiffs or NetCDFs:
def get_product_paths(regex, pths):
product_paths = list()
paths = Path().glob(pths)
for pth in paths:
tiff_path = re.search(regex, str(pth))
if tiff_path:
product_paths.append(pth)
return product_paths
def print_product_paths(product_paths):
print("Tiff paths:")
for p in product_paths:
print(f"{p}\n")
Write a function to collect the product acquisition dates:
def get_dates(product_paths):
dates = []
for pth in product_paths:
dates.append(asfn.date_from_product_name(str(pth)).split('T')[0])
return dates
Convert NetCDFs to geotiffs:
if autorift:
import xarray as xr
import re
def ncToGeoTiff(path):
prevPath = ''
for p in path.rglob('*.nc'):
layers = ['v', 'vx', 'vy', 'v_error', 'vr', 'va', 'M11', 'M12']
fname = p.stem
dates = list(set(re.findall(r'\d{8}', fname)))
for layer in layers:
layer_dir = products_path/layer
if prevPath != p: # reduces number of runs
prevPath = p
ds = xr.open_dataset(p)
t1 = re.findall('\d*', ds.img_pair_info.acquisition_date_img1)
acq_date_1 = f'{t1[0]}T' + ''.join(t1[2:7])
t2 = re.findall('\d*', ds.img_pair_info.acquisition_date_img2)
acq_date_2 = f'{t2[0]}T' + ''.join(t2[2:7])
name = f'{fname[0:10]}_{acq_date_1}_{acq_date_2}_{fname[-6:]}_{layer}.tif'
outfile = layer_dir/name
if not layer_dir.exists():
layer_dir.mkdir()
if not outfile.exists():
!gdal_translate NETCDF:{p}:{layer} {outfile}
print('\n')
def removeNC(path):
for p in path.rglob('*.nc'):
p.unlink()
Collect and print the paths of the tiffs or NetCDFs:
rel_prod_path = products_path.relative_to(Path.cwd())
if rtc:
product_pth = f"{str(rel_prod_path)}/*/*{polarization[0]}*.tif*"
elif insar:
product_pth = f"{str(rel_prod_path)}/*/*.tif*"
elif autorift:
product_pth = f"{str(rel_prod_path)}/*.tif*"
ncToGeoTiff(products_path)
removeNC(products_path)
if not autorift:
product_paths = get_product_paths(regex, product_pth)
print_product_paths(product_paths)
else:
print('Tiff paths:\n')
for p in products_path.glob('*'):
print(f'{p.parts[-1]}:')
for p_tiff in p.rglob('*.tif'):
print(p_tiff)
print('\n')
if autorift: - convert to geotiff - delete netcdfs - re-glob paths for geotiffs
Fix multiple UTM Zone-related issues should they exist in your data set. If multiple UTM zones are found, the following code cells will identify the predominant UTM zone and reproject the rest into that zone. This step must be completed prior to merging frames or performing any analysis. AutoRIFT products do not come with projection metadata and so will not be reprojected.
Use gdal.Info to determine the UTM definition types and zones in each product:
if not autorift:
coord_choice = asfn.select_parameter(["UTM", "Lat/Long"], description='Coord Systems:')
coord_choice
if not autorift:
utm_zones = []
utm_types = []
print('Checking UTM Zones in the data stack ...\n')
for k in range(0, len(product_paths)):
info = (gdal.Info(str(product_paths[k]), options = ['-json']))
info = json.dumps(info)
info = (json.loads(info))['coordinateSystem']['wkt']
zone = info.split('ID')[-1].split(',')[1][0:-2]
utm_zones.append(zone)
typ = info.split('ID')[-1].split('"')[1]
utm_types.append(typ)
print(f"UTM Zones:\n {utm_zones}\n")
print(f"UTM Types:\n {utm_types}")
Identify the most commonly used UTM Zone in the data:
if not autorift:
if coord_choice.value == 'UTM':
utm_unique, counts = np.unique(utm_zones, return_counts=True)
a = np.where(counts == np.max(counts))
predominant_utm = utm_unique[a][0]
print(f"Predominant UTM Zone: {predominant_utm}")
else:
predominant_utm = '4326'
Reproject all tiffs to the predominate UTM:
if not autorift:
# Reproject (if needed) and Mosaic DEM Files in Preparation for Subsequent HAND Calculation
# print(DEM_paths)
reproject_indicies = [i for i, j in enumerate(utm_zones) if j != predominant_utm] #makes list of indicies in utm_zones that need to be reprojected
print('--------------------------------------------')
print('Reprojecting %4.1f files' %(len(reproject_indicies)))
print('--------------------------------------------')
for k in reproject_indicies:
temppath = f"{str(product_paths[k].parent)}/r{product_paths[k].name}"
print(temppath)
cmd = f"gdalwarp -overwrite {product_paths[k]} {temppath} -s_srs {utm_types[k]}:{utm_zones[k]} -t_srs EPSG:{predominant_utm}"
# print(cmd)
!{cmd}
product_paths[k].unlink()
Update product_paths with any new filenames created during reprojection:
if not autorift:
product_paths = get_product_paths(regex, product_pth)
print_product_paths(product_paths)
You may notice duplicates in your acquisition dates. As HyP3 processes SAR data on a frame-by-frame basis, duplicates may occur if your area of interest is covered by two consecutive image frames. In this case, two separate images are generated that need to be merged together before time series processing can commence. Currently we only merge RTCs.
Create a directory in which to store the reprojected and merged RTCs:
if not autorift:
output_dir_path = analysis_directory.joinpath(f"{job_type.value}_tiffs")
print(output_dir_path)
if not output_dir_path.is_dir():
output_dir_path.mkdir()
Create a set from the date list, removing any duplicates:
if rtc:
dates = get_dates(product_paths)
print(dates)
unique_dates = set(dates)
print(unique_dates)
Determine which dates have multiple frames. Create a dictionary with each date as a key linked to a value set as an empty string:
if rtc:
dup_date_batches = [{}]
for date in unique_dates:
count = 0
for d in dates:
if date == d:
count +=1
if (dbl_polar and count > 2) or (not dbl_polar and count > 1):
dup_date_batches[0].update({date : ""})
if dbl_polar:
dup_date_batches.append(copy.deepcopy(dup_date_batches[0]))
print(dup_date_batches)
Update the key values in dup_paths with the string paths to all the tiffs for each date:
if rtc:
if dbl_polar:
polar_list = [polarization.split(' ')[0], polarization.split(' ')[2]]
else:
polar_list = [polarization]
for i, polar in enumerate(polar_list):
polar_path_regex = f"(\w|/)*_{polar}.(tif|tiff)$"
polar_paths = get_product_paths(polar_path_regex, product_pth)
for pth in polar_paths:
date = asfn.date_from_product_name(str(pth)).split('T')[0]
if date in dup_date_batches[i]:
dup_date_batches[i][date] = f"{dup_date_batches[i][date]} {str(pth)}"
for d in dup_date_batches:
print(d)
print("\n")
Merge all the frames for each date, save the results to the output directory, and delete the original tiffs.
if rtc and len(dup_date_batches[0]) > 0:
for i, dup_dates in enumerate(dup_date_batches):
polar_regex = "(?<=_)(vh|VH|vv|VV)(?=.tif{1,2})"
polar = re.search(polar_regex, dup_dates[list(dup_dates)[0]])
if polar:
polar = f'_{polar.group(0)}'
else:
polar = ''
for dup_date in dup_dates:
# print(f"\n\n{dup_dates[dup_date]}")
output = f"{str(output_dir_path)}/merged_{dup_date}T999999{polar}{product_paths[0].suffix}"
gdal_command = f"gdal_merge.py -o {output} {dup_dates[dup_date]}"
print(f"\n\nCalling the command: {gdal_command}\n")
!$gdal_command
for pth in dup_dates[dup_date].split(' '):
path = Path(pth)
if path and path.is_file():
path.unlink()
print(f"Deleting: {str(pth)}")
Verify that all duplicate dates were resolved:
if rtc:
product_paths = get_product_paths(regex, product_pth)
for polar in polar_list:
polar_product_pth = product_pth.replace('V*', polar)
polar_product_paths = get_product_paths(regex, polar_product_pth)
dates = get_dates(polar_product_paths)
if len(dates) != len(set(dates)):
print(f"Duplicate dates still present!")
else:
print(f"No duplicate dates are associated with {polar} polarization.")
Print the updated the paths to all remaining non-merged tiffs:
if rtc:
print_product_paths(product_paths)
Move all remaining unmerged tiffs into the output directory, and choose whether to save or delete the directory holding the remaining downloaded product files. AutoRIFT NetCDFs will remain in their original directory:
if not autorift:
choices = ['save', 'delete']
print("Do you wish to save or delete the directory containing auxiliary product files?")
else:
choices = []
save_or_del = asfn.select_parameter(choices)
save_or_del
if not autorift:
for tiff in product_paths:
tiff.rename(f"{output_dir_path}/{tiff.name}")
if save_or_del.value == 'delete':
shutil.rmtree(products_path)
product_paths = get_product_paths(regex, product_pth)
Print the path where you saved your tiffs or NetCDFs.
if rtc or insar:
print(str(output_dir_path))
elif autorift:
print(str(products_path))
Relavent notebooks:
# Run this code to display notebook links
from IPython.display import display, HTML
current = Path.cwd()
abs_path = [
Path('/home/jovyan/notebooks/SAR_Training/English/Master/Subset_Data_Stack.ipynb'),
Path('/home/jovyan/notebooks/SAR_Training/English/Master/100k_MGRS_Geotiff_Subsetter.ipynb')
]
details = [
'Subsets a tiff stack into MGRS tiles.',
'Crops a directory of tiffs to a subset area of interest using an interactive Matplotlib plot of an image in your data stack.'
]
for a in abs_path:
name = a.stem
relative_path = a.relative_to(current)
detail = details.pop()
link_t = f"<li><a href='{relative_path}'>{name}</a>: {detail}</li>"
html = HTML(link_t)
display(html)
Prepare_RTC_Stack_HyP3_v2.ipynb - Version 2.0.1 - February 2023
Version Changes: