# filter some warning messages
import warnings
warnings.filterwarnings("ignore")
#libraries
import datetime as dt
import xarray as xr
import fsspec
import s3fs
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
# make datasets display nicely
xr.set_options(display_style="html")
import os.path
#magic fncts #put static images of your plot embedded in the notebook
%matplotlib inline
plt.rcParams['figure.figsize'] = 12, 6
%config InlineBackend.figure_format = 'retina'
def get_geo_data(sat,lyr,idyjl):
# arguments
# sat goes-east,goes-west,himawari
# lyr year
# idyjl day of year
ds,iexist=[],False
d = dt.datetime(lyr,1,1) + dt.timedelta(days=idyjl)
fs = s3fs.S3FileSystem(anon=True) #connect to s3 bucket!
#create strings for the year and julian day
imon,idym=d.month,d.day
syr,sjdy,smon,sdym = str(lyr).zfill(4),str(idyjl).zfill(3),str(imon).zfill(2),str(idym).zfill(2)
#use glob to list all the files in the directory
if sat=='goes-east':
file_location,var = fs.glob('s3://noaa-goes16/ABI-L2-SSTF/'+syr+'/'+sjdy+'/*/*.nc'),'SST'
if sat=='goes-west':
file_location,var = fs.glob('s3://noaa-goes17/ABI-L2-SSTF/'+syr+'/'+sjdy+'/*/*.nc'),'SST'
if sat=='himawari':
file_location,var = fs.glob('s3://noaa-himawari8/AHI-L2-FLDK-SST/'+syr+'/'+smon+'/'+sdym+'/*/*L2P*.nc'),'sea_surface_temperature'
#make a list of links to the file keys
if len(file_location)<1:
return file_ob
file_ob = [fs.open(file) for file in file_location] #open connection to files
#open all the day's data
with xr.open_mfdataset(file_ob,combine='nested',concat_dim='time') as ds:
iexist = True #file exists
#clean up coordinates which are a MESS in GOES
#rename one of the coordinates that doesn't match a dim & should
if not sat=='himawari':
ds = ds.rename({'t':'time'})
ds = ds.reset_coords()
else:
ds = ds.rename({'ni':'x','nj':'y'})
#put in to Celsius
#ds[var] -= 273.15 #nice python shortcut to +- from itself a-=273.15 is the same as a=a-273.15
#ds[var].attrs['units'] = '$^\circ$C'
return ds,iexist
%%time
lyr = 2020
satlist = ['goes-east','goes-west','himawari']
for sat in satlist:
init = 0 #reset new data store
for idyjl in range(180,201): #6/28/2020-7/18/2020
print('starting ', idyjl)
ds,iexist = get_geo_data(sat,lyr,idyjl)
if not iexist:
continue
print('writing zarr store')
if init == 0:
ds.to_zarr(sat)
init = 1
else:
ds.to_zarr(sat,append_dim='time')
Note that in order to do this you need the aws command line tools which can be installed by running from the command line
pip install awscli
aws s3 sync ./goes_east s3://ohw-bucket/goes_east
aws s3 sync ./goes_west s3://ohw-bucket/goes_west
aws s3 sync ./goes_west s3://ohw-bucket/himawari
! pip install awscli
! aws s3 sync ./goes_east s3://ohw-bucket/goes_east
! aws s3 sync ./goes_west s3://ohw-bucket/goes_west
! aws s3 sync ./goes_west s3://ohw-bucket/himawari
%%time
file_location = 's3://ohw-bucket/goes_east'
ds = xr.open_zarr(fsspec.get_mapper(file_location,anon=False))
ds