#!/usr/bin/env python # coding: utf-8 # Attempting to isolate data from Rob's analysis that might be useful for the Salish Sea model. # # Data is from NOAA and IOS 2003-2014.(Can do a bigger date range) # **Plan** # # Compare monthly temperature/salinity profile in the mouth of Juan de Fuca between observations and model boundary conditions. # In[1]: import ACTDR import numpy as np import matplotlib.pyplot as plt import netCDF4 as nc from salishsea_tools import viz_tools, nc_tools from salishsea_tools.nowcast import analyze import seaborn as sns import datetime # In[2]: ACTDR.load_dat('JuandeFucaMouth.dat') # In[3]: b = '/data/nsoontie/MEOPAR/NEMO-forcing/grid/bathy_meter_SalishSea2.nc' grid_B = nc.Dataset(b) bathy = grid_B.variables['Bathymetry'][:] lat=grid_B.variables['nav_lat'][:] lon=grid_B.variables['nav_lon'][:] # In[4]: get_ipython().run_line_magic('matplotlib', 'inline') # In[5]: sns.set_style("darkgrid") # Plot casts on map # In[6]: fig,ax=plt.subplots(1,1) for cast in ACTDR.CTD_DAT: ax.plot(cast['Longitude'], cast['Latitude'], 'o' ) viz_tools.plot_coastline(ax,grid_B,coords='map') # In[7]: cast=ACTDR.CTD_DAT[0] print cast.keys() # Use pandas to help manipulate the data # In[8]: import pandas as pd # # Manipulating the Observed Data # # In the next few cells, I will familiarize myself with the data and how to handle it # In[9]: data = pd.DataFrame(ACTDR.CTD_DAT) # Histogram by year # In[10]: min_year = data['Year'].min() max_year = data['Year'].max() data.hist('Year',bins=np.arange(min_year-0.5,max_year+1.5)) plt.ylabel('num casts') plt.xlabel('year') ax=plt.gca() ax.get_xaxis().get_major_formatter().set_useOffset(False) # In[11]: data['Year'].max() # ## Depth between 200 and 300m # # This seems like a reasonable first attempt at elimintating data the isn't near the open boundary # In[12]: #remove data if max depth is smaller than 200 and bigger than 300 data_dep200to300 = data[(data['Depth'].apply(max)>200) & (data['Depth'].apply(max)<300)] # In[13]: def plot_grouped_data(data,key, field,xmin=-126,xmax=-124,ymin=48,ymax=49,zmin=0,zmax=300,vmin=28,vmax=34): """Groups the data based on key. Then plots the field of the grouped data over depth Examples: plot_grouped_data(data,'Month','Salinity')""" group = data.groupby(key) num=group.ngroups fig,axs = plt.subplots(2,num,figsize=(15,8)) for ind,ax, axm in zip(group.groups.keys(),axs[0,:],axs[1,:]): dat = group.get_group(ind) for dep,var,lon,lat in zip(dat['Depth'], dat[field],dat['Longitude'],dat['Latitude']): ax.plot(var,dep) axm.plot(lon,lat,'o') ax.set_title('{}: {}'.format(key, ind)) ax.set_ylim([zmax,zmin]) ax.set_xlim([vmin,vmax]) ax.set_xlabel(field) ax.set_ylabel('Depth [m]') #map viz_tools.plot_coastline(axm,grid_B,coords='map') axm.set_ylim([ymin,ymax]) axm.set_xlim([xmin,xmax]) # ##Group by Year # # Look at the coverage of data in each year # In[14]: fig = plot_grouped_data(data_dep200to300,'Year','Salinity') # In[15]: fig = plot_grouped_data(data_dep200to300,'Year','Temperature',vmin=5,vmax=10) # In[16]: min_year = data_dep200to300['Year'].min() max_year = data_dep200to300['Year'].max() data_dep200to300.hist('Year',bins=np.arange(min_year-0.5,max_year+1.5)) plt.ylabel('num casts') plt.xlabel('year') ax=plt.gca() ax.get_xaxis().get_major_formatter().set_useOffset(False) # ##Group by Month # # Look at the coverage of data in each month # In[17]: fig = plot_grouped_data(data_dep200to300,'Month','Salinity') # In[18]: fig = plot_grouped_data(data_dep200to300,'Month','Temperature', vmin=5,vmax=10) # In[19]: data_dep200to300.hist('Month', bins=np.arange(0.5,12.5)) plt.ylabel('num casts') plt.xlabel('month') # ## Max Salinity range of Observations # # In[20]: print 'Highest maximum salinity in all of the casts {0:.3}'.format(max(data_dep200to300['Salinity'].apply(max))) print 'Lowest maximum salinity in all of the casts: {0:.3}'.format(min(data_dep200to300['Salinity'].apply(max))) print 'Mean maximum salinity in all of the casts {0:.3}'.format(np.mean(data_dep200to300['Salinity'].apply(max))) # # Model boundary conditions # Start to look at the model boundary conditions. # # In each month, plot the salinity/temperature at a few points along the open boundary. Include on the same axis the observations. # In[21]: #Loading boundary conditions bc = nc.Dataset('/data/nsoontie/MEOPAR/NEMO-forcing/open_boundaries/west/SalishSea2_Masson_DC.nc') nc_tools.show_variables(bc) # ## Selection of observations # Remove observations with longitude < -124.8 to exclude points far from observations. Check histogram again. # In[22]: data_compare = data_dep200to300[data_dep200to300['Longitude'] > -124.8] min_year = data_compare['Year'].min() max_year = data_compare['Year'].max() data_compare.hist('Year', bins = np.arange(min_year-0.5,max_year+1.5)) plt.ylabel('num casts') plt.xlabel('year') ax=plt.gca() ax.get_xaxis().get_major_formatter().set_useOffset(False) # In[23]: data_compare.hist('Month',bins=np.arange(0.5,12.5)) plt.ylabel('num casts') plt.xlabel('month') # ### plotting function # In[24]: def compare_bc_obs(month, indices, field, data_obs, bcs, xmin=-125,xmax=-124,ymin=48,ymax=49,zmin=0,zmax=300, vmin=29,vmax=35 ): """Compares the observations from IOS/NOAA data near the mouth of Juan de Fuca with model boundary conditions. Comparisons are during month and at open boundary indices. dield is compared ('Temperature' or 'Salinity' ) Observations stored in data_obs (DataFrame) Model bcs stored in bcs (netcdf handle) indices is a list of model indices to examine""" salinity_flag = False #Print extra stuff if we are looking at salinity fig, [ax, axm] = plt.subplots(1,2,figsize=(10,3)) #lists to store maxes of observations and model max_obs=[] max_bcs=[] #Model variables time = bcs.variables['time_counter'] depth = bcs.variables['deptht'] lats= bcs.variables['nav_lat'] lons = bcs.variables['nav_lon'] if field == 'Salinity': var = bcs.variables['vosaline'] salinity_flag=True elif field == 'Temperature': var = bcs.variables['votemper'] #times convert to date time start = datetime.datetime(2014,1,1) #arbitrary year. dates = [ start + datetime.timedelta(days = 7*t) for t in time] #Look up model dates in the desired month t_inds = []; for t,d in enumerate(dates): if d.month == month: t_inds.append(t) #plotting #plot model for i in indices: #iterate through space index for t in t_inds: #iterate through time index ax.plot( var[t,:,0,i],depth[:],'-ob',label='model') max_bcs.append(np.nanmax(var[t,:,0,i])) #location on map axm.plot(lons[0,indices],lats[0,indices],'ob',label='model') #plot obs data_m = data_obs[data_obs['Month']==month] for dep, var_obs, lon, lat in zip(data_m['Depth'],data_m[field], data_m['Longitude'], data_m['Latitude']): ax.plot(var_obs, dep,'-*r',label = 'obs',alpha=0.1) max_obs.append(np.nanmax(var_obs)) #plot location on map axm.plot(lon,lat,'*r',label='obs') ax.set_ylim([zmax,zmin]) ax.set_xlim([vmin,vmax]) ax.set_title('Month {}'.format(month)) ax.set_ylabel('Depth [m]') ax.set_xlabel(field) #plotmap viz_tools.plot_coastline(axm,grid_B,coords='map') axm.set_ylim([ymin,ymax]) axm.set_xlim([xmin,xmax]) #fake the legend simObs, = axm.plot(0,0,'r*') simMod, = axm.plot(0,0,'bo') axm.legend([simObs,simMod], ['obs','model']) #Maxes of obs/model max_obs_max = np.nanmax(max_obs) min_obs_max = np.nanmin(max_obs) mean_obs_max = np.nanmean(max_obs) max_bcs_max = np.nanmean(max_bcs) if salinity_flag: axm.text(xmin+0.1,ymin+.25,'Max of obs maxes {0:.4}'.format(max_obs_max)) axm.text(xmin+0.1,ymin+.15,'Min of obs maxes {0:.4}'.format(min_obs_max)) axm.text(xmin+0.1,ymin+.05,'Max of model maxes {0:.4}'.format(max_bcs_max)) return fig, max_obs_max, min_obs_max, mean_obs_max, max_bcs_max # ## Salinity # Month by month salinity comparisons # In[25]: maxes_obs =[] #list for the maximum of the observed maxes in each month mins_obs = [] #list for the minumum of the observed maxes in each month means_obs = [] #list for the mean of the observed maxes in each month maxes_model = [] #list for the maximum of the model maxes in each month locs = [20,40,60,80,790,810,830,850] # indices of boundary conditions to use in comparison grouped=data_compare.groupby('Month') for m in grouped.groups.keys(): fig, max_obs_max, min_obs_max, mean_obs_max, max_bcs_max = compare_bc_obs(m,locs,'Salinity',data_compare,bc) maxes_obs.append(max_obs_max) mins_obs.append(min_obs_max) means_obs.append(mean_obs_max) maxes_model.append(max_bcs_max) maxes_model = np.array(maxes_model) means_obs = np.array(means_obs) mins_obs = np.array(mins_obs) maxes_obs = np.array(maxes_obs) print 'Mean difference between model max and observed max: {0:.4}'.format(np.mean(maxes_model-maxes_obs)) print 'Mean difference between model max and minimum of observed max: {0:.4}'.format(np.mean(maxes_model-mins_obs)) print 'Max difference between model max and observed max: {0:.4}'.format(np.max(maxes_model-maxes_obs)) print 'Max difference between model max and minimum of observed max: {0:.4}'.format(np.max(maxes_model-mins_obs)) print 'Min difference between model max and observed max: {0:.4}'.format(np.min(maxes_model-maxes_obs)) print 'Min difference between model max and minimum of observed max: {0:.4}'.format(np.min(maxes_model-mins_obs)) # * Generally, model is too fresh in the deep. More summary below. # # ### Montly max observed and modelled salinities # In[26]: months = grouped.groups.keys() plt.plot(months,maxes_model,'o-',label='model') plt.plot(months, maxes_obs,'*-',label='obs') plt.plot(months, mins_obs,'*-',label='Min of obs maxes') plt.plot(months, means_obs,'*-',label='Mean of obs maxes') ax=plt.gca() ax.set_xlabel('Month') ax.set_ylabel('Max Salinity') ax.set_title('Monthly maximum salinity') ax.legend(loc=0) # * Hmm, strong seasonal trend in model max salinty but not in the observations. Why? # * Are there enough observations in each month to see a significant seasonsal trend? Some months don't have many observations (July/August) # * There is more variability in the observed minimum of the max salinities. # ## Temperature # Month by month temperature comparisons # In[27]: grouped=data_compare.groupby('Month') for m in grouped.groups.keys(): fig = compare_bc_obs(m,locs,'Temperature',data_compare,bc,vmin=5,vmax=10) # # Compare salinity below a depth # Idea: compare average salinity in a depth range between model points in a month and observed casts. # In[28]: def compare_average_salinity_depth(month, dmin, dmax, indices, data_obs, bcs): """Compares the average salinity in a depth range for observations and boundary conditions in a month month is the month for comaprison dmin, dmax define the depth range (metres) indices is a list of model indicest data_obs are the observations (DataFrame) bcs are the model bcs (netcdf) returns data frame means with columns: mean_obs array of mean salinity in each observation cast mean_bcs mean model salinity in at each index """ #initialize arrays mean_bcs=[] mean_obs =[] #Model averaging #Model variables time = bcs.variables['time_counter'] depth = bcs.variables['deptht'] var = bcs.variables['vosaline'][:] #times convert to date time start = datetime.datetime(2014,1,1) #arbitrary year. dates = [ start + datetime.timedelta(days = 7*t) for t in time] #Look up model dates in the desired month t_inds = []; for t,d in enumerate(dates): if d.month == month: t_inds.append(t) #depth average the model depth_inds = np.where(np.logical_and(depth[:]dmin)) dep_range = depth[depth_inds] for i in indices: #iterate through space index for t in t_inds: #iterate through time index sal_in_drange = var[t,depth_inds,0,i] avg_sal = analyze.depth_average(sal_in_drange,dep_range,depth_axis=1) mean_bcs.append(avg_sal[0]) #observation averaging data_m = data_obs[data_obs['Month']==month] for dep, var_obs in zip(data_m['Depth'],data_m['Salinity']): dep=np.array(dep) var_obs = np.array(var_obs) depth_inds = np.where(np.logical_and(dep[:]dmin)) dep_range = dep[depth_inds] sal_in_drange = var_obs[depth_inds] avg_sal = analyze.depth_average(sal_in_drange,dep_range,depth_axis=0) mean_obs.append(avg_sal) #add to dict frame means={'mean_bcs': mean_bcs, 'mean_obs': mean_obs} return means # Comparing average salinity in a depth range # In[29]: dmin=150; dmax=250; monthly_means={'obs':[], 'bcs':[]} print 'Summary of depth averaged salinities between {} and {} m'.format(dmin,dmax) grouped=data_compare.groupby('Month') for m in grouped.groups.keys(): fig,ax=plt.subplots(1,1,figsize=(5,2)) means = compare_average_salinity_depth(m,dmin,dmax,locs,data_compare,bc) monthly_means['obs'].append(np.nanmean(means['mean_obs'])) monthly_means['bcs'].append(np.nanmean(means['mean_bcs'])) # list for box plots means_list = [means['mean_bcs'], means['mean_obs']] box =ax.boxplot(means_list,labels=['model','obs']) ax.set_title('Month {}'.format(m)) ax.set_ylim([33,34]) #monthly means months = grouped.groups.keys() fig,ax=plt.subplots(1,1) ax.plot(months, monthly_means['bcs'],label='model') ax.plot(months, monthly_means['obs'],label='obs') ax.legend() ax.set_title('Monthly means') # * Observed seasonal trend is more apparent if I look at depth averaged between 150m and 250m # * Model is still consistently 0.2 psu to 0.5 psu fresher than observations. Is that significant? # In[30]: print 'Summary of depth averaged ({}-{} m) salinity differences'.format(dmin,dmax) print 'Mean difference {0:.4}'.format(np.nanmean(np.array(monthly_means['obs']) - np.array(monthly_means['bcs']))) print 'Max difference {0:.4}'.format(np.nanmax(np.array(monthly_means['obs']) - np.array(monthly_means['bcs']))) print 'Min difference {0:.4}'.format(np.nanmin(np.array(monthly_means['obs']) - np.array(monthly_means['bcs']))) # #Surface Salinity Comparison # In[36]: dmin=0; dmax=50; monthly_means={'obs':[], 'bcs':[]} print 'Summary of depth averaged salinities between {} and {} m'.format(dmin,dmax) grouped=data_compare.groupby('Month') for m in grouped.groups.keys(): fig,ax=plt.subplots(1,1,figsize=(5,2)) means = compare_average_salinity_depth(m,dmin,dmax,locs,data_compare,bc) monthly_means['obs'].append(np.nanmean(means['mean_obs'])) monthly_means['bcs'].append(np.nanmean(means['mean_bcs'])) # list for box plots means_list = [means['mean_bcs'], means['mean_obs']] box =ax.boxplot(means_list,labels=['model','obs']) ax.set_title('Month {}'.format(m)) ax.set_ylim([30,34]) #monthly means months = grouped.groups.keys() fig,ax=plt.subplots(1,1) ax.plot(months, monthly_means['bcs'],label='model') ax.plot(months, monthly_means['obs'],label='obs') ax.legend() ax.set_title('Monthly means') # Observational trend is that surface waters have higher salinity in the lte summer/early fall months. This trend is reflected in the model bcs. # #Summary # # ### Salinity # * Boundary condition at depth is fresher than all of the observations. # * Degree of freshness varies with the season. # * Mean difference in salinity below 150m is ~0.33 (model fresher) # * Max difference in salinity below 150m is ~0.53 # * Min difference in salinity below 150m is ~0.13 # * Is that significant? # * Our spin up deep water renewal is about 0.2 psu too fresh (assumuing 31 is the target). I think part of that can be explained by the bcs. I need to dig into literature about the density of the upwelled coastal waters entering the system and its effect on the deep water renewal. I think I recall reading a Masson paper about this. # * I'm interested in seeing how salty our nowcast deep water renewal will be and when it will happen. I wonder if there is a threshold for density of bcs in order to observe renewal. Something to play with... # * Will this year's DWR be about the same salinity as spin up, even though the salinity of the Haro Strait waters is much fresher than the spin up year? # * Our intermediate SoG is too fresh and I don't think the salinty of the incoming waters really affects that. There is still too much mixing there, but it may be linked to the background rate. I think this because the SoG intermadiate waters are much tamer in terms of mixing than the Haro Strait waters. So, I think the mixing in SoG intermediate is mostly diffusion, which would be determined by our background value. Mixing in the surface SoG is driven by wind so that is different. # # # ### Temperature # # * Temperature is a lot more variable, but in the deep, the trend looks that the model is warmer. # * Deep fall temperatures look good. # # Questions and Next Steps # # * More reading about deep water renewal and denisty of upwelled waters # * Do I have enough data to make a conclusion about the saltiness of the bcs? Should I be looking at data before 2003? # * Is it better to just reduce mixing to get the salitier dwr or to come up with saltier bcs? # * Would a 0.5 increase in psu at the open boundary make a difference? Perhaps something to test in small domain, but I need to reexamine if I get deep water renwal there. Year long sim? # * How is the fresh water in the JdF surface leaving our domain? # # # In[ ]: