#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np import matplotlib.pyplot as plt from salishsea_tools import evaltools as et, viz_tools, places import cmocean as cmo import datetime as dt import netCDF4 as nc import matplotlib.dates as mdates import matplotlib as mpl import matplotlib.gridspec as gridspec from matplotlib.colors import LogNorm import cmocean import f90nml import os import pickle import xarray as xr get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: places.PLACES.keys() # In[3]: places.PLACES['Central SJDF'] # In[4]: places.PLACES['S3'] # In[5]: pl=places.PLACES # In[6]: # path to model files: PATH= '/results2/SalishSea/nowcast-green.201905/' start = dt.datetime(2015,1,1) end = dt.datetime(2019,1,1) #end = dt.datetime(2019,6,1) namfmt='nowcast' # In[7]: flist=et.index_model_files(start,end,PATH,namfmt,flen=1,ftype='ptrc_T',tres=24) # In[8]: int(365/10) # In[9]: pl.keys() # In[10]: pl['Cluster_5'] # In[11]: mesh=nc.Dataset('/data/eolson/results/MEOPAR/NEMO-forcing-new/grid/mesh_mask201702.nc') # In[96]: mesh.variables['e3t_1d'][:] # In[97]: np.sum(mesh.variables['e3t_1d'][:]) # In[87]: fig,ax=plt.subplots(1,1,figsize=(5,5)) ax.contour(mesh.variables['tmask'][0,0,:,:],[.5,], colors='black',zorder=2,linewidths=.5) j,i=pl['S3']['NEMO grid ji'] ax.plot(i,j,'r*') j,i=pl['Cluster_3']['NEMO grid ji'] ax.plot(i,j,'r*') ax.set_aspect(1) ax.axis('off') # In[89]: fig,ax=plt.subplots(1,1,figsize=(5,5)) ax.contour(mesh.variables['tmask'][0,0,:,:],[.5,], colors='black',zorder=2,linewidths=.5) j,i=pl['Cluster_1']['NEMO grid ji'] ax.plot(i,j,'r*') j,i=pl['Central SJDF']['NEMO grid ji'] ax.plot(i,j,'r*') ax.set_aspect(1) ax.axis('off') # In[13]: mesh.variables['e3t_0'][0,0:10,j,i] # In[14]: flist.loc[0:5,['paths']].values[:] # In[15]: f=xr.open_mfdataset(flist.loc[0:29,['paths']].values[:].flatten()) # In[16]: test=np.mean(f['flagellates'][:,:10,j,i],1) # In[17]: test.values # In[18]: f.close() # In[19]: # upper 10 m average at each location if False: dd=29 chls={'S3':list(),'Central SJDF':list(),'Cluster_1':list(),'Cluster_3':list()} for ind in range(0,int(np.ceil(len(flist)/dd))): start=ind*dd end=min((ind+1)*dd,len(flist)) print(start,end) with xr.open_mfdataset(flist.iloc[start:end]['paths']) as f: for ipl in chls.keys(): j,i=pl[ipl]['NEMO grid ji'] phy=np.mean(f['diatoms'][:,:10,j,i]+f['ciliates'][:,:10,j,i]+f['flagellates'][:,:10,j,i],1) chls[ipl].append(phy.values) for ipl in chls.keys(): chls[ipl]=1.8*np.concatenate(chls[ipl]) with open( 'chls.pkl', "wb" ) as pkf: pickle.dump(chls,pkf) else: with open( 'chls.pkl', "rb" ) as pkf: chls=pickle.load(chls,pkf) # In[52]: dts=np.array([ii for ii in flist.t_0]) yds=np.array(et.datetimeToYD(flist.t_0)) yrs=np.array([ii.year for ii in flist.t_0]) # In[59]: cols=plt.get_cmap('Accent',6) # In[90]: fig,ax=plt.subplots(1,1,figsize=(12,1.8)) for yr in np.arange(2015,2019): ii=yrs==yr ax.plot(yds[ii],chls['S3'][ii],color=cols((yr-2015)/(2019-2015))) ax.plot(yds[ii],chls['Cluster_3'][ii],color=cols((yr-2015)/(2019-2015))) ax.set_xlim(0,366) ax.set_ylim(0,12) ax.set_ylabel('Chl (mg/m$^3$)',fontsize=14) # In[91]: fig,ax=plt.subplots(1,1,figsize=(12,1.8)) for yr in np.arange(2015,2019): ii=yrs==yr ax.plot(yds[ii],chls['Central SJDF'][ii],color=cols((yr-2015)/(2019-2015))) ax.plot(yds[ii],chls['Cluster_1'][ii],color=cols((yr-2015)/(2019-2015))) ax.set_xlim(0,366) ax.set_ylim(0,12) ax.set_ylabel('Chl (mg/m$^3$)',fontsize=14) # In[51]: # In[47]: cols(0/6),cols(1/6),cols(2/6),cols(3/6),cols(4/6),cols(5/6) # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[2]: # path to model files: PATH= '/results2/SalishSea/nowcast-green.201905/' # start and end dates for analysis: # number of days per model file: flen=1 # dictionary mapping desired model variables to the file types where they are found filemap={'nitrate':'ptrc_T','silicon':'ptrc_T','ammonium':'ptrc_T','diatoms':'ptrc_T','ciliates':'ptrc_T', 'flagellates':'ptrc_T','vosaline':'grid_T','votemper':'grid_T'} # dictionary mapping model file types to their time resolution in hours (1 is hourly files, 24 is daily) fdict={'ptrc_T':1,'grid_T':1} # results format # -- nowcast: files like 01jan15/SalishSea_1h_20150101_20150101_ptrc_T.nc # -- long: files like SalishSea_1h_20150206_20150804_ptrc_T_20150427-20150506.nc, all in one directory # path to directory containing database: # obsDir='/ocean/shared/SalishSeaCastData/DFO/BOT/' #normal obs dir obsDir='/data/eolson/MEOPAR/OBS/' #temporary obs dir #(dbname defaults to 'DFO_OcProfDB.sqlite') # Where to store last pickle file: pkldir='/data/eolson/results/MEOPAR/clusterGroups/' # ### 1) load DFO data and match it to model output: don't repeat this part unless you want to look at different years (outside 2013-2016) # In[3]: # load DFO bottle data (returns pandas dataframe) # AbsSal is Absolute (actually reference) Salinity, and ConsT is Conservative Temperature # N is nitrate+nitrate, Si is Silicate; Chlorophyll_Extracted; Z is depth (m); dtUTC is datetime in UTC # excludeSaanich=True -> do not include data from Saanich Inlet df1=et.loadDFO(basedir=obsDir,datelims=(start_date,end_date),excludeSaanich=True,) print(len(df1)) df1.head() # In[4]: # match model output to observations and return both in a dataframe # the model variables will have their original names prefixed by mod_, eg mod_vosaline # the observation file names are unchanged. data=et.matchData(data=df1,filemap=filemap, fdict=fdict, mod_start=start_date, mod_end=end_date, mod_nam_fmt=namfmt, mod_basedir=PATH, mod_flen=flen, meshPath='/data/eolson/results/MEOPAR/NEMO-forcing-new/grid/mesh_mask201702.nc') # In[5]: #Chl:N ratio used later in plots; get value from namelist associated with model output mod_chl_N=et.getChlNRatio(basedir=PATH,nam_fmt='nowcast') print(mod_chl_N) # In[6]: # plot matched data locations fig, ax = plt.subplots(figsize = (6,6)) viz_tools.set_aspect(ax, coords = 'map') ax.plot(data['Lon'], data['Lat'], 'ro',label='matched obs') grid = nc.Dataset('/data/vdo/MEOPAR/NEMO-forcing/grid/bathymetry_201702.nc') viz_tools.plot_coastline(ax, grid, coords = 'map') ax.set_ylim(48, 50.5) ax.legend() ax.set_xlim(-125.7, -122.5); # In[7]: # add column for day of year data['YD']=et.datetimeToYD(data['dtUTC']) # In[8]: data.dropna(axis=1,how='all',inplace=True) # In[9]: def logt(x): return np.log10(x+.001) # In[10]: # chlorophyll calculations data['l10_obsChl']=logt(data['Chlorophyll_Extracted']) data['l10_modChl']=logt(mod_chl_N*(data['mod_diatoms']+data['mod_ciliates']+data['mod_flagellates'])) data['mod_Chl']=mod_chl_N*(data['mod_diatoms']+data['mod_ciliates']+data['mod_flagellates']) data['Chl']=data['Chlorophyll_Extracted'] # ### 2) add Cluster info: Also don't need to repeat this unless you want other years # In[11]: clusterD='/data/tjarniko/MEOPAR/analysis_tereza/notebooks/CLUSTER_PAPER/CLEAN/KEY_PAPERFIGURES/pkls/' cxf='Xcoords_for571_stations.pkl' cyf='Ycoords_for571_stations.pkl' cfile='BIO_clno_5_2015_reass.pkl' # only 2015 and 2016 overlap with hplc cver='BIO' # In[12]: data['Year']=[ii.year for ii in data['dtUTC']] # In[13]: cx=pickle.load(open(clusterD+cxf, 'rb')) cy=pickle.load(open(clusterD+cyf, 'rb')) cf=dict() for iyear in np.unique(data.Year): cf[iyear]=pickle.load(open(clusterD+cfile,'rb')) # In[14]: def round2(num): return int(np.trunc((num+2)/10)*10+2) # In[15]: data['Cluster']=np.zeros(len(data)) # In[16]: for ir, row in data.iterrows(): ii=(cx==round2(row['i']))&(cy==round2(row['j'])) if sum(ii)==1: cluster=cf[row['Year']][ii] data.at[ir,'Cluster']=int(cluster) # In[17]: # number of points not assigned a cluster, total length of dataframe: np.sum(data['Cluster']==0),len(data) # In[18]: pickle.dump(data,open(os.path.join(pkldir,'DFODataModelClusterBIO_1905temp.pkl'),'wb')) # # Jump to here to load pandas dataframe containing DFO Obs, model values, and Cluster ID # In[3]: data=pickle.load(open(os.path.join(pkldir,'DFODataModelClusterBIO_1905temp.pkl'),'rb')) # In[7]: data.loc[(data.Cluster==5)&(data.mod_Chl<0)] # In[12]: (np.min(data.loc[ii,['mod_Chl']].values),np.max(data.loc[ii,['mod_Chl']].values), np.min(data.loc[ii,['Chl']].values),np.max(data.loc[ii,['Chl']].values)) # In[13]: (np.min(data.loc[ii,['Z']].values),np.max(data.loc[ii,['Z']].values), np.min(data.loc[ii,['Chl']].values),np.max(data.loc[ii,['Chl']].values)) # In[14]: plt.hist(data.loc[ii,['Z']].values) # In[18]: ii=(data.Cluster==5)&(~np.isnan(data.mod_Chl))&(~np.isnan(data.Chl)) plt.hist(data.loc[ii,['mod_Chl']].values-data.loc[ii,['Chl']].values,50) # In[19]: plt.hist(data.loc[ii,['mod_Chl','Chl']],50) # In[20]: et.printstats(data.loc[ii,:],'Chl','mod_Chl') # In[19]: data.keys() # In[20]: # create dictionary of dataframe views by year datyr=dict() yy=np.array([ii.year for ii in data['dtUTC']]) yys=np.unique(yy) for yr in yys: datyr[yr]=data.loc[yy==yr] # In[21]: # plot matched data sampling times clist=('gold','aqua','plum','c','m','r','g','b','brown','gray') fig,axL=plt.subplots(1,1,figsize=(12,1)) for ii, yr in enumerate(yys): dshift=dt.datetime(yys[0],1,1)-dt.datetime(yr,1,1) axL.plot(datyr[yr].dtUTC+dshift,np.zeros(np.shape(datyr[yr].dtUTC))+.1*ii,'.', color=clist[ii],markersize=4,label=str(yr)) axL.set_yticks(()); yearsFmt = mdates.DateFormatter('%d %b') axL.xaxis.set_major_formatter(yearsFmt) axL.xaxis.set_ticks([dt.datetime(int(yys[0]),1,1), dt.datetime(int(yys[0]),3,1),dt.datetime(int(yys[0]),5,1),dt.datetime(int(yys[0]),7,1), dt.datetime(int(yys[0]),9,1),dt.datetime(int(yys[0]),11,1),dt.datetime(int(yys[0])+1,1,1)]) for tick in axL.get_xticklabels(): tick.set_rotation(90) tick.set_horizontalalignment('center') axL.set_ylim(-.1,.1*(len(datyr.keys())+1)) axL.set_xlim(dt.datetime(yys[0],1,1),dt.datetime(yys[0],12,31)) axL.legend() axL.set_frame_on(False) xmin, xmax = axL.get_xaxis().get_view_interval() ymin, ymax = axL.get_yaxis().get_view_interval() axL.add_artist(mpl.lines.Line2D((xmin, xmax), (ymin, ymin), color='black', linewidth=2)) # #### Display stats # In[22]: print('Chl depth<15:') et.printstats(data.loc[data.Z<15,:],'Chl','mod_Chl') for icl in range(0,6): print('cluster',icl) et.printstats(data.loc[data.Cluster==icl,:],'Chl','mod_Chl') # In[22]: print('Chl depth<15:') et.printstats(data.loc[data.Z<15,:],'Chl','mod_Chl') for icl in range(0,6): print('cluster',icl) et.printstats(data.loc[(data.Cluster==icl)&(data.Z<10),:],'Chl','mod_Chl') # ### mod vs. obs plots # In[23]: cm1=cmocean.cm.thermal bounds = np.array(np.arange(0,30)) norm = mpl.colors.BoundaryNorm(boundaries=bounds, ncolors=200) #pcm = ax[1].pcolormesh(X, Y, Z1, norm=norm, cmap='RdBu_r') fig,ax=plt.subplots(1,1,figsize=(3,3)) args={'marker':'.','s':8,'norm':norm} ps=et.varvarScatter(ax,data,'l10_obsChl','l10_modChl','Z',cm=cm1,args=args) cb=fig.colorbar(ps,ax=ax,boundaries=np.arange(0,30)) cb.set_label('Depth (m)') ax.set_ylabel('Model') ax.set_xlabel('Obs') ax.set_title('log10[Chl (mg/m3)]') ax.set_xlim(-3,2) ax.set_ylim(-3,2) ax.plot((-3,2),(-3,2),'k-') # In[24]: ### map clusters fig, ax = plt.subplots(figsize = (8,6)) viz_tools.set_aspect(ax, coords = 'map') m=ax.scatter(data['Lon'], data['Lat'],c=data['Cluster'],s=5,cmap=plt.get_cmap('tab10',6)) grid = nc.Dataset('/data/vdo/MEOPAR/NEMO-forcing/grid/bathymetry_201702.nc') viz_tools.plot_coastline(ax, grid, coords = 'map') plt.colorbar(m) ax.set_ylim(48, 50.5) ax.set_xlim(-125.7, -122.5); # In[25]: ## Plot residuals by year day for each cluster # In[29]: fig,ax=plt.subplots(6,1,figsize=(16,16)) fig.subplots_adjust(hspace=.6) for icl in range(0,6): icld=data.loc[data.Cluster==icl] ix=ax[icl] m=ix.scatter(icld['YD'],icld['mod_Chl']-icld['Chl'],c=icld['Year'],s=10,cmap=plt.get_cmap('Accent',6)) ix.set_xlabel('Year Day') ix.set_ylabel('Model-Obs\n Chl(mg/m3)') ix.set_xlim(0,366) ix.set_ylim(-30,30) ix.set_title(f'Cluster {icl}') fig.colorbar(m,ax=ix) ix.plot((0,366),(0,0),'-',color='lightgray') # In[30]: fig,ax=plt.subplots(6,1,figsize=(16,16)) fig.subplots_adjust(hspace=.6) for icl in range(0,6): icld=data.loc[data.Cluster==icl] ix=ax[icl] m=ix.scatter(icld['YD'],icld['l10_modChl']-icld['l10_obsChl'],c=icld['Year'],s=10,cmap=plt.get_cmap('Accent',6)) ix.set_xlabel('Year Day') ix.set_ylabel('Model-Obs\n log10[Chl(mg/m3)+.001]') ix.set_xlim(0,366) ix.set_ylim(-3,3) ix.set_title(f'Cluster {icl}') fig.colorbar(m,ax=ix) ix.plot((0,366),(0,0),'-',color='lightgray') # In[ ]: # In[ ]: # In[ ]: