#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np
import matplotlib.pyplot as plt
from salishsea_tools import evaltools as et, viz_tools, places
import cmocean as cmo
import datetime as dt
import netCDF4 as nc
import matplotlib.dates as mdates
import matplotlib as mpl
import matplotlib.gridspec as gridspec
from matplotlib.colors import LogNorm
import cmocean
import f90nml
import os
import pickle
import xarray as xr

get_ipython().run_line_magic('matplotlib', 'inline')


# In[2]:


places.PLACES.keys()


# In[3]:


places.PLACES['Central SJDF']


# In[4]:


places.PLACES['S3']


# In[5]:


pl=places.PLACES


# In[6]:


# path to model files:
PATH= '/results2/SalishSea/nowcast-green.201905/'
start = dt.datetime(2015,1,1)
end = dt.datetime(2019,1,1)
#end = dt.datetime(2019,6,1)
namfmt='nowcast'


# In[7]:


flist=et.index_model_files(start,end,PATH,namfmt,flen=1,ftype='ptrc_T',tres=24)


# In[8]:


int(365/10)


# In[9]:


pl.keys()


# In[10]:


pl['Cluster_5']


# In[11]:


mesh=nc.Dataset('/data/eolson/results/MEOPAR/NEMO-forcing-new/grid/mesh_mask201702.nc')


# In[96]:


mesh.variables['e3t_1d'][:]


# In[97]:


np.sum(mesh.variables['e3t_1d'][:])


# In[87]:


fig,ax=plt.subplots(1,1,figsize=(5,5))
ax.contour(mesh.variables['tmask'][0,0,:,:],[.5,],
                    colors='black',zorder=2,linewidths=.5)
j,i=pl['S3']['NEMO grid ji']
ax.plot(i,j,'r*')
j,i=pl['Cluster_3']['NEMO grid ji']
ax.plot(i,j,'r*')
ax.set_aspect(1)
ax.axis('off')


# In[89]:


fig,ax=plt.subplots(1,1,figsize=(5,5))
ax.contour(mesh.variables['tmask'][0,0,:,:],[.5,],
                    colors='black',zorder=2,linewidths=.5)
j,i=pl['Cluster_1']['NEMO grid ji']
ax.plot(i,j,'r*')
j,i=pl['Central SJDF']['NEMO grid ji']
ax.plot(i,j,'r*')
ax.set_aspect(1)
ax.axis('off')


# In[13]:


mesh.variables['e3t_0'][0,0:10,j,i]


# In[14]:


flist.loc[0:5,['paths']].values[:]


# In[15]:


f=xr.open_mfdataset(flist.loc[0:29,['paths']].values[:].flatten())


# In[16]:


test=np.mean(f['flagellates'][:,:10,j,i],1)


# In[17]:


test.values


# In[18]:


f.close()


# In[19]:


# upper 10 m average at each location
if False:
    dd=29
    chls={'S3':list(),'Central SJDF':list(),'Cluster_1':list(),'Cluster_3':list()}
    for ind in range(0,int(np.ceil(len(flist)/dd))):
        start=ind*dd
        end=min((ind+1)*dd,len(flist))
        print(start,end)
        with xr.open_mfdataset(flist.iloc[start:end]['paths']) as f:
            for ipl in chls.keys():
                j,i=pl[ipl]['NEMO grid ji']
                phy=np.mean(f['diatoms'][:,:10,j,i]+f['ciliates'][:,:10,j,i]+f['flagellates'][:,:10,j,i],1)
                chls[ipl].append(phy.values)
    for ipl in chls.keys():
        chls[ipl]=1.8*np.concatenate(chls[ipl])
    with open( 'chls.pkl', "wb" ) as pkf:
        pickle.dump(chls,pkf)
else:
    with open( 'chls.pkl', "rb" ) as pkf:
        chls=pickle.load(chls,pkf)


# In[52]:


dts=np.array([ii for ii in flist.t_0])
yds=np.array(et.datetimeToYD(flist.t_0))
yrs=np.array([ii.year for ii in flist.t_0])


# In[59]:


cols=plt.get_cmap('Accent',6)


# In[90]:


fig,ax=plt.subplots(1,1,figsize=(12,1.8))
for yr in np.arange(2015,2019):
    ii=yrs==yr
    ax.plot(yds[ii],chls['S3'][ii],color=cols((yr-2015)/(2019-2015)))
    ax.plot(yds[ii],chls['Cluster_3'][ii],color=cols((yr-2015)/(2019-2015)))
ax.set_xlim(0,366)
ax.set_ylim(0,12)
ax.set_ylabel('Chl (mg/m$^3$)',fontsize=14)


# In[91]:


fig,ax=plt.subplots(1,1,figsize=(12,1.8))
for yr in np.arange(2015,2019):
    ii=yrs==yr
    ax.plot(yds[ii],chls['Central SJDF'][ii],color=cols((yr-2015)/(2019-2015)))
    ax.plot(yds[ii],chls['Cluster_1'][ii],color=cols((yr-2015)/(2019-2015)))
ax.set_xlim(0,366)
ax.set_ylim(0,12)
ax.set_ylabel('Chl (mg/m$^3$)',fontsize=14)


# In[51]:


# In[47]:


cols(0/6),cols(1/6),cols(2/6),cols(3/6),cols(4/6),cols(5/6)


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]:


# In[2]:


# path to model files:
PATH= '/results2/SalishSea/nowcast-green.201905/'

# start and end dates for analysis:

# number of days per model file:
flen=1

# dictionary mapping desired model variables to the file types where they are found
filemap={'nitrate':'ptrc_T','silicon':'ptrc_T','ammonium':'ptrc_T','diatoms':'ptrc_T','ciliates':'ptrc_T',
         'flagellates':'ptrc_T','vosaline':'grid_T','votemper':'grid_T'}

# dictionary mapping model file types to their time resolution in hours (1 is hourly files, 24 is daily)
fdict={'ptrc_T':1,'grid_T':1}

# results format
# -- nowcast: files like 01jan15/SalishSea_1h_20150101_20150101_ptrc_T.nc
# -- long: files like SalishSea_1h_20150206_20150804_ptrc_T_20150427-20150506.nc, all in one directory

# path to directory containing database: 
# obsDir='/ocean/shared/SalishSeaCastData/DFO/BOT/' #normal obs dir
obsDir='/data/eolson/MEOPAR/OBS/'            #temporary obs dir
#(dbname defaults to 'DFO_OcProfDB.sqlite')

# Where to store last pickle file:
pkldir='/data/eolson/results/MEOPAR/clusterGroups/'


# ### 1) load DFO data and match it to model output: don't repeat this part unless you want to look at different years (outside 2013-2016)

# In[3]:


# load DFO bottle data (returns pandas dataframe)
# AbsSal is Absolute (actually reference) Salinity, and ConsT is Conservative Temperature
# N is nitrate+nitrate, Si is Silicate; Chlorophyll_Extracted; Z is depth (m); dtUTC is datetime in UTC
# excludeSaanich=True -> do not include data from Saanich Inlet
df1=et.loadDFO(basedir=obsDir,datelims=(start_date,end_date),excludeSaanich=True,)
print(len(df1))
df1.head()


# In[4]:


# match model output to observations and return both in a dataframe
# the model variables will have their original names prefixed by mod_, eg mod_vosaline
# the observation file names are unchanged. 
data=et.matchData(data=df1,filemap=filemap, fdict=fdict, mod_start=start_date, mod_end=end_date, 
                  mod_nam_fmt=namfmt, mod_basedir=PATH, mod_flen=flen,
                  meshPath='/data/eolson/results/MEOPAR/NEMO-forcing-new/grid/mesh_mask201702.nc')


# In[5]:


#Chl:N ratio used later in plots; get value from namelist associated with model output
mod_chl_N=et.getChlNRatio(basedir=PATH,nam_fmt='nowcast')
print(mod_chl_N)


# In[6]:


# plot matched data locations
fig, ax = plt.subplots(figsize = (6,6))
viz_tools.set_aspect(ax, coords = 'map')
ax.plot(data['Lon'], data['Lat'], 'ro',label='matched obs')
grid = nc.Dataset('/data/vdo/MEOPAR/NEMO-forcing/grid/bathymetry_201702.nc')
viz_tools.plot_coastline(ax, grid, coords = 'map')
ax.set_ylim(48, 50.5)
ax.legend()
ax.set_xlim(-125.7, -122.5);


# In[7]:


# add column for day of year
data['YD']=et.datetimeToYD(data['dtUTC'])


# In[8]:


data.dropna(axis=1,how='all',inplace=True)


# In[9]:


def logt(x):
    return np.log10(x+.001)


# In[10]:


# chlorophyll calculations
data['l10_obsChl']=logt(data['Chlorophyll_Extracted'])
data['l10_modChl']=logt(mod_chl_N*(data['mod_diatoms']+data['mod_ciliates']+data['mod_flagellates']))
data['mod_Chl']=mod_chl_N*(data['mod_diatoms']+data['mod_ciliates']+data['mod_flagellates'])
data['Chl']=data['Chlorophyll_Extracted']


# ### 2) add Cluster info: Also don't need to repeat this unless you want other years

# In[11]:


clusterD='/data/tjarniko/MEOPAR/analysis_tereza/notebooks/CLUSTER_PAPER/CLEAN/KEY_PAPERFIGURES/pkls/'
cxf='Xcoords_for571_stations.pkl'
cyf='Ycoords_for571_stations.pkl'
cfile='BIO_clno_5_2015_reass.pkl' # only 2015 and 2016 overlap with hplc
cver='BIO'


# In[12]:


data['Year']=[ii.year for ii in data['dtUTC']]


# In[13]:


cx=pickle.load(open(clusterD+cxf, 'rb'))
cy=pickle.load(open(clusterD+cyf, 'rb'))
cf=dict()
for iyear in np.unique(data.Year):
    cf[iyear]=pickle.load(open(clusterD+cfile,'rb'))


# In[14]:


def round2(num):
    return int(np.trunc((num+2)/10)*10+2)


# In[15]:


data['Cluster']=np.zeros(len(data))


# In[16]:


for ir, row in data.iterrows():
    ii=(cx==round2(row['i']))&(cy==round2(row['j']))
    if sum(ii)==1:
        cluster=cf[row['Year']][ii]
        data.at[ir,'Cluster']=int(cluster)


# In[17]:


# number of points not assigned a cluster, total length of dataframe:
np.sum(data['Cluster']==0),len(data)


# In[18]:


pickle.dump(data,open(os.path.join(pkldir,'DFODataModelClusterBIO_1905temp.pkl'),'wb'))


# # Jump to here to load pandas dataframe containing DFO Obs, model values, and Cluster ID

# In[3]:


data=pickle.load(open(os.path.join(pkldir,'DFODataModelClusterBIO_1905temp.pkl'),'rb'))


# In[7]:


data.loc[(data.Cluster==5)&(data.mod_Chl<0)]


# In[12]:


(np.min(data.loc[ii,['mod_Chl']].values),np.max(data.loc[ii,['mod_Chl']].values),
 np.min(data.loc[ii,['Chl']].values),np.max(data.loc[ii,['Chl']].values))


# In[13]:


(np.min(data.loc[ii,['Z']].values),np.max(data.loc[ii,['Z']].values),
 np.min(data.loc[ii,['Chl']].values),np.max(data.loc[ii,['Chl']].values))


# In[14]:


plt.hist(data.loc[ii,['Z']].values)


# In[18]:


ii=(data.Cluster==5)&(~np.isnan(data.mod_Chl))&(~np.isnan(data.Chl))
plt.hist(data.loc[ii,['mod_Chl']].values-data.loc[ii,['Chl']].values,50)


# In[19]:


plt.hist(data.loc[ii,['mod_Chl','Chl']],50)


# In[20]:


et.printstats(data.loc[ii,:],'Chl','mod_Chl')


# In[19]:


data.keys()


# In[20]:


# create dictionary of dataframe views by year
datyr=dict()
yy=np.array([ii.year for ii in data['dtUTC']])
yys=np.unique(yy)
for yr in yys:
    datyr[yr]=data.loc[yy==yr]


# In[21]:


# plot matched data sampling times
clist=('gold','aqua','plum','c','m','r','g','b','brown','gray')
fig,axL=plt.subplots(1,1,figsize=(12,1))
for ii, yr in enumerate(yys):
    dshift=dt.datetime(yys[0],1,1)-dt.datetime(yr,1,1)
    axL.plot(datyr[yr].dtUTC+dshift,np.zeros(np.shape(datyr[yr].dtUTC))+.1*ii,'.',
             color=clist[ii],markersize=4,label=str(yr))
axL.set_yticks(());
yearsFmt = mdates.DateFormatter('%d %b')
axL.xaxis.set_major_formatter(yearsFmt)
axL.xaxis.set_ticks([dt.datetime(int(yys[0]),1,1), dt.datetime(int(yys[0]),3,1),dt.datetime(int(yys[0]),5,1),dt.datetime(int(yys[0]),7,1),
                     dt.datetime(int(yys[0]),9,1),dt.datetime(int(yys[0]),11,1),dt.datetime(int(yys[0])+1,1,1)])
for tick in axL.get_xticklabels():
        tick.set_rotation(90)
        tick.set_horizontalalignment('center')
axL.set_ylim(-.1,.1*(len(datyr.keys())+1))
axL.set_xlim(dt.datetime(yys[0],1,1),dt.datetime(yys[0],12,31))
axL.legend()
axL.set_frame_on(False)
xmin, xmax = axL.get_xaxis().get_view_interval()
ymin, ymax = axL.get_yaxis().get_view_interval()
axL.add_artist(mpl.lines.Line2D((xmin, xmax), (ymin, ymin), color='black', linewidth=2))


# #### Display stats

# In[22]:


print('Chl depth<15:') 
et.printstats(data.loc[data.Z<15,:],'Chl','mod_Chl')
for icl in range(0,6):
    print('cluster',icl)
    et.printstats(data.loc[data.Cluster==icl,:],'Chl','mod_Chl')


# In[22]:


print('Chl depth<15:') 
et.printstats(data.loc[data.Z<15,:],'Chl','mod_Chl')
for icl in range(0,6):
    print('cluster',icl)
    et.printstats(data.loc[(data.Cluster==icl)&(data.Z<10),:],'Chl','mod_Chl')


# ### mod vs. obs plots

# In[23]:


cm1=cmocean.cm.thermal
bounds = np.array(np.arange(0,30))
norm = mpl.colors.BoundaryNorm(boundaries=bounds, ncolors=200)
#pcm = ax[1].pcolormesh(X, Y, Z1, norm=norm, cmap='RdBu_r')
fig,ax=plt.subplots(1,1,figsize=(3,3))
args={'marker':'.','s':8,'norm':norm}
ps=et.varvarScatter(ax,data,'l10_obsChl','l10_modChl','Z',cm=cm1,args=args)
cb=fig.colorbar(ps,ax=ax,boundaries=np.arange(0,30))
cb.set_label('Depth (m)')
ax.set_ylabel('Model')
ax.set_xlabel('Obs')
ax.set_title('log10[Chl (mg/m3)]')
ax.set_xlim(-3,2)
ax.set_ylim(-3,2)
ax.plot((-3,2),(-3,2),'k-')


# In[24]:


### map clusters
fig, ax = plt.subplots(figsize = (8,6))
viz_tools.set_aspect(ax, coords = 'map')
m=ax.scatter(data['Lon'], data['Lat'],c=data['Cluster'],s=5,cmap=plt.get_cmap('tab10',6))
grid = nc.Dataset('/data/vdo/MEOPAR/NEMO-forcing/grid/bathymetry_201702.nc')
viz_tools.plot_coastline(ax, grid, coords = 'map')
plt.colorbar(m)
ax.set_ylim(48, 50.5)
ax.set_xlim(-125.7, -122.5);


# In[25]:


## Plot residuals by year day for each cluster


# In[29]:


fig,ax=plt.subplots(6,1,figsize=(16,16))
fig.subplots_adjust(hspace=.6)
for icl in range(0,6):
    icld=data.loc[data.Cluster==icl]
    ix=ax[icl]
    m=ix.scatter(icld['YD'],icld['mod_Chl']-icld['Chl'],c=icld['Year'],s=10,cmap=plt.get_cmap('Accent',6))
    ix.set_xlabel('Year Day')
    ix.set_ylabel('Model-Obs\n Chl(mg/m3)')
    ix.set_xlim(0,366)
    ix.set_ylim(-30,30)
    ix.set_title(f'Cluster {icl}')
    fig.colorbar(m,ax=ix)
    ix.plot((0,366),(0,0),'-',color='lightgray')


# In[30]:


fig,ax=plt.subplots(6,1,figsize=(16,16))
fig.subplots_adjust(hspace=.6)
for icl in range(0,6):
    icld=data.loc[data.Cluster==icl]
    ix=ax[icl]
    m=ix.scatter(icld['YD'],icld['l10_modChl']-icld['l10_obsChl'],c=icld['Year'],s=10,cmap=plt.get_cmap('Accent',6))
    ix.set_xlabel('Year Day')
    ix.set_ylabel('Model-Obs\n log10[Chl(mg/m3)+.001]')
    ix.set_xlim(0,366)
    ix.set_ylim(-3,3)
    ix.set_title(f'Cluster {icl}')
    fig.colorbar(m,ax=ix)
    ix.plot((0,366),(0,0),'-',color='lightgray')


# In[ ]:


# In[ ]:


# In[ ]: