%matplotlib inline
import matplotlib.pyplot as plt
from datetime import datetime
import os
import subprocess
import requests
import boto3
import s3fs
import pandas as pd
import numpy as np
import xarray as xr
import rasterio as rio
from rasterio.session import AWSSession
from rasterio.plot import show
import rioxarray
import geopandas
import pyproj
from pyproj import Proj
from shapely.ops import transform
import geoviews as gv
from cartopy import crs
import hvplot.xarray
import holoviews as hv
gv.extension('bokeh', 'matplotlib')
from pystac_client import Client
# Get credentials
s3_cred_endpoint = {
'podaac':'https://archive.podaac.earthdata.nasa.gov/s3credentials',
'lpdaac':'https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials'
}
def get_temp_creds():
temp_creds_url = s3_cred_endpoint['podaac']
return requests.get(temp_creds_url).json()
temp_creds_req = get_temp_creds()
Search the client and find the files with the collection id.
podaac_cat = Client.open('https://cmr.earthdata.nasa.gov/stac/POCLOUD/')
search = podaac_cat.search(
collections=['AVHRR_OI-NCEI-L4-GLOB-v2.1'],
datetime='2016/2020'
)
Here are the number of files matched. Since it is 5 years and 2020 was a leap year, there should be 1827 files.
365*5+2
1827
search.matched()
1827
These lines get the urls and convert to s3 urls."
items = search.get_all_items()
sst_https = items[1].get_assets()['data'].href
sst_https
'https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/AVHRR_OI-NCEI-L4-GLOB-v2.1/20160102120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.1.nc'
sst_s3 = sst_https.replace('https://archive.podaac.earthdata.nasa.gov/', 's3://')
sst_s3
's3://podaac-ops-cumulus-protected/AVHRR_OI-NCEI-L4-GLOB-v2.1/20160102120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.1.nc'
Create the https urls from the items.
sst_https_urls = [x.get_assets()['data'].href for x in items]
sst_https_urls[0]
'https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/AVHRR_OI-NCEI-L4-GLOB-v2.1/20160101120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.1.nc'
Create the s3 urls by replacing the https part with the s3 part.
sst_s3_urls = [x.replace('https://archive.podaac.earthdata.nasa.gov/', 's3://') for x in sst_https_urls]
sst_s3_urls[0]
's3://podaac-ops-cumulus-protected/AVHRR_OI-NCEI-L4-GLOB-v2.1/20160101120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.1.nc'
len(sst_s3_urls)
1827
fs_s3 = s3fs.S3FileSystem(anon=False, key=temp_creds_req['accessKeyId'], secret=temp_creds_req['secretAccessKey'], token=temp_creds_req['sessionToken'])
%%time
s3_file_obj = fs_s3.open(sst_s3_urls[0], mode='rb')
sst_xr = xr.open_dataset(s3_file_obj, engine='h5netcdf')
CPU times: user 112 ms, sys: 8.17 ms, total: 120 ms Wall time: 240 ms
sst_xr
<xarray.Dataset> Dimensions: (lat: 720, lon: 1440, time: 1, nv: 2) Coordinates: * lat (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88 * lon (lon) float32 -179.9 -179.6 -179.4 ... 179.4 179.6 179.9 * time (time) datetime64[ns] 2016-01-01 Dimensions without coordinates: nv Data variables: lat_bnds (lat, nv) float32 -90.0 -89.75 -89.75 ... 89.75 89.75 90.0 lon_bnds (lon, nv) float32 -180.0 -179.8 -179.8 ... 179.8 180.0 analysed_sst (time, lat, lon) float32 ... analysis_error (time, lat, lon) float32 ... mask (time, lat, lon) float32 ... sea_ice_fraction (time, lat, lon) float32 ... Attributes: (12/47) Conventions: CF-1.6, ACDD-1.3 title: NOAA/NCEI 1/4 Degree Daily Optimum Interpolat... id: NCEI-L4LRblend-GLOB-AVHRR_OI references: Reynolds, et al.(2009) What is New in Version... institution: NOAA/NESDIS/NCEI creator_name: NCEI Products and Services ... ... Metadata_Link.: http://doi.org/10.7289/V5SQ8XB5 keywords: Oceans>Ocean Temperature>Sea Surface Temperature keywords_vocabulary: NASA Global Change Master Directory (GCMD) Sc... standard_name_vocabulary: CF Standard Name Table v29 processing_level: L4 cdm_data_type: Grid
array([-89.875, -89.625, -89.375, ..., 89.375, 89.625, 89.875], dtype=float32)
array([-179.875, -179.625, -179.375, ..., 179.375, 179.625, 179.875], dtype=float32)
array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
array([[-90. , -89.75], [-89.75, -89.5 ], [-89.5 , -89.25], ..., [ 89.25, 89.5 ], [ 89.5 , 89.75], [ 89.75, 90. ]], dtype=float32)
array([[-180. , -179.75], [-179.75, -179.5 ], [-179.5 , -179.25], ..., [ 179.25, 179.5 ], [ 179.5 , 179.75], [ 179.75, 180. ]], dtype=float32)
[1036800 values with dtype=float32]
[1036800 values with dtype=float32]
[1036800 values with dtype=float32]
[1036800 values with dtype=float32]
Should take about 19 seconds per year.
54*0.001*365
19.71
%%time
# Iterate through remote_files to create a fileset
fileset = [fs_s3.open(file) for file in sst_s3_urls]
# This works
sst_xr_ts = xr.open_mfdataset(fileset, engine='h5netcdf')
# chunks doesn't seem to make it faster
#sst_xr_ts = xr.open_mfdataset(fileset, engine='h5netcdf', chunks= {'time':1096, 'lat':100, 'lon':100})
CPU times: user 2min 7s, sys: 4.95 s, total: 2min 11s Wall time: 5min 45s
#sst_xr_ts.analysed_sst.hvplot.image()
Read in point locations. Has the lat/lon values of the samples along the coast and the corresponding offshore point to compare that point to: basically a point 280km from the nearshore point and perpendicular to the coast. The lat/lon columns are the ones we want.
df = pd.read_csv('../data/sample_point_pairs_trim.csv')
df.head(1)
x.km.ns | y.km.ns | x.km.os | y.km.os | lon.ns | lat.ns | lon.os | lat.os | |
---|---|---|---|---|---|---|---|---|
0 | -14655.998881 | 6958.759684 | -14782.212703 | 7186.052086 | -165.064195 | 54.796769 | -169.532303 | 56.432301 |
Create.
'abc' + '.ns'
'abc.ns'
def getdf2(ras, pts, loc="ns"):
ind_x = xr.DataArray(df["lon."+loc], dims=['i'])
ind_y = xr.DataArray(df["lat."+loc], dims=['i'])
xr_new = ras.analysed_sst.sel(lon=ind_x, lat=ind_y, method='nearest')
return xr_new
sst_ns = getdf2(sst_xr_ts, df, loc="ns")
sst_os = getdf2(sst_xr_ts, df, loc="os")
sst_dif = sst_os - sst_ns
%store sst_ns
%store sst_os
%store sst_dif
Stored 'sst_ns' (DataArray) Stored 'sst_os' (DataArray) Stored 'sst_dif' (DataArray)
%%time
upwelling_index = xr.where(sst_dif > 2, 1, 0).compute()
CPU times: user 49.9 s, sys: 830 ms, total: 50.7 s Wall time: 41.5 s
upwelling_index.hvplot(colorbar=False)
sst_dif.hvplot(cmap="Spectral")
import geopandas
%matplotlib inline
gdf = geopandas.GeoDataFrame(df, geometry=geopandas.points_from_xy(df['lon.ns'], df['lat.ns']))
gdf = gdf.set_crs("+proj=longlat +datum=WGS84 +no_defs", allow_override=True)
gdf.plot(color='red')
<AxesSubplot:>
Step 1. Create a geodateframe with a geometry column that is the points in Wintri Tripel crs.
gdf['name']=gdf.index
gdf_sub = gdf.iloc[np.arange(0,567,10)]
ax = gdf.plot(figsize=(10,8), markersize=1)
for x, y, label in zip(gdf_sub.geometry.x, gdf_sub.geometry.y, gdf_sub.name):
ax.annotate(label, xy=(x, y), xytext=(3, 3), textcoords="offset points")