Attempting to isolate data from Rob's analysis that might be useful for the Salish Sea model.
Data is from NOAA and IOS 2003-2014.(Can do a bigger date range)
Plan
Compare monthly temperature/salinity profile in the mouth of Juan de Fuca between observations and model boundary conditions.
import ACTDR
import numpy as np
import matplotlib.pyplot as plt
import netCDF4 as nc
from salishsea_tools import viz_tools, nc_tools
from salishsea_tools.nowcast import analyze
import seaborn as sns
import datetime
ACTDR.load_dat('JuandeFucaMouth.dat')
> open JuandeFucaMouth.dat > load CTD_DAT > load STANDARD_KEYS > close JuandeFucaMouth.dat > complete
b = '/data/nsoontie/MEOPAR/NEMO-forcing/grid/bathy_meter_SalishSea2.nc'
grid_B = nc.Dataset(b)
bathy = grid_B.variables['Bathymetry'][:]
lat=grid_B.variables['nav_lat'][:]
lon=grid_B.variables['nav_lon'][:]
%matplotlib inline
sns.set_style("darkgrid")
Plot casts on map
fig,ax=plt.subplots(1,1)
for cast in ACTDR.CTD_DAT:
ax.plot(cast['Longitude'], cast['Latitude'], 'o' )
viz_tools.plot_coastline(ax,grid_B,coords='map')
<matplotlib.contour.QuadContourSet instance at 0x7f1e7baae560>
cast=ACTDR.CTD_DAT[0]
print cast.keys()
['Temperature', 'Longitude', 'Month', 'Depth', 'Year', 'Latitude', 'ID', 'Salinity', 'Day']
Use pandas to help manipulate the data
import pandas as pd
In the next few cells, I will familiarize myself with the data and how to handle it
data = pd.DataFrame(ACTDR.CTD_DAT)
Histogram by year
min_year = data['Year'].min()
max_year = data['Year'].max()
data.hist('Year',bins=np.arange(min_year-0.5,max_year+1.5))
plt.ylabel('num casts')
plt.xlabel('year')
ax=plt.gca()
ax.get_xaxis().get_major_formatter().set_useOffset(False)
data['Year'].max()
2014
This seems like a reasonable first attempt at elimintating data the isn't near the open boundary
#remove data if max depth is smaller than 200 and bigger than 300
data_dep200to300 = data[(data['Depth'].apply(max)>200) & (data['Depth'].apply(max)<300)]
def plot_grouped_data(data,key, field,xmin=-126,xmax=-124,ymin=48,ymax=49,zmin=0,zmax=300,vmin=28,vmax=34):
"""Groups the data based on key. Then plots the field of the grouped data over depth
Examples: plot_grouped_data(data,'Month','Salinity')"""
group = data.groupby(key)
num=group.ngroups
fig,axs = plt.subplots(2,num,figsize=(15,8))
for ind,ax, axm in zip(group.groups.keys(),axs[0,:],axs[1,:]):
dat = group.get_group(ind)
for dep,var,lon,lat in zip(dat['Depth'], dat[field],dat['Longitude'],dat['Latitude']):
ax.plot(var,dep)
axm.plot(lon,lat,'o')
ax.set_title('{}: {}'.format(key, ind))
ax.set_ylim([zmax,zmin])
ax.set_xlim([vmin,vmax])
ax.set_xlabel(field)
ax.set_ylabel('Depth [m]')
#map
viz_tools.plot_coastline(axm,grid_B,coords='map')
axm.set_ylim([ymin,ymax])
axm.set_xlim([xmin,xmax])
Look at the coverage of data in each year
fig = plot_grouped_data(data_dep200to300,'Year','Salinity')
fig = plot_grouped_data(data_dep200to300,'Year','Temperature',vmin=5,vmax=10)
min_year = data_dep200to300['Year'].min()
max_year = data_dep200to300['Year'].max()
data_dep200to300.hist('Year',bins=np.arange(min_year-0.5,max_year+1.5))
plt.ylabel('num casts')
plt.xlabel('year')
ax=plt.gca()
ax.get_xaxis().get_major_formatter().set_useOffset(False)
Look at the coverage of data in each month
fig = plot_grouped_data(data_dep200to300,'Month','Salinity')
fig = plot_grouped_data(data_dep200to300,'Month','Temperature', vmin=5,vmax=10)
data_dep200to300.hist('Month', bins=np.arange(0.5,12.5))
plt.ylabel('num casts')
plt.xlabel('month')
<matplotlib.text.Text at 0x7f1e7a922bd0>
print 'Highest maximum salinity in all of the casts {0:.3}'.format(max(data_dep200to300['Salinity'].apply(max)))
print 'Lowest maximum salinity in all of the casts: {0:.3}'.format(min(data_dep200to300['Salinity'].apply(max)))
print 'Mean maximum salinity in all of the casts {0:.3}'.format(np.mean(data_dep200to300['Salinity'].apply(max)))
Highest maximum salinity in all of the casts 34.0 Lowest maximum salinity in all of the casts: 33.1 Mean maximum salinity in all of the casts 33.9
Start to look at the model boundary conditions.
In each month, plot the salinity/temperature at a few points along the open boundary. Include on the same axis the observations.
#Loading boundary conditions
bc = nc.Dataset('/data/nsoontie/MEOPAR/NEMO-forcing/open_boundaries/west/SalishSea2_Masson_DC.nc')
nc_tools.show_variables(bc)
[u'deptht', u'nav_lat', u'nav_lon', u'nbidta', u'nbjdta', u'nbrdta', u'time_counter', u'vosaline', u'votemper']
Remove observations with longitude < -124.8 to exclude points far from observations. Check histogram again.
data_compare = data_dep200to300[data_dep200to300['Longitude'] > -124.8]
min_year = data_compare['Year'].min()
max_year = data_compare['Year'].max()
data_compare.hist('Year', bins = np.arange(min_year-0.5,max_year+1.5))
plt.ylabel('num casts')
plt.xlabel('year')
ax=plt.gca()
ax.get_xaxis().get_major_formatter().set_useOffset(False)
data_compare.hist('Month',bins=np.arange(0.5,12.5))
plt.ylabel('num casts')
plt.xlabel('month')
<matplotlib.text.Text at 0x7f1e7b1e2f90>
def compare_bc_obs(month, indices, field, data_obs, bcs,
xmin=-125,xmax=-124,ymin=48,ymax=49,zmin=0,zmax=300, vmin=29,vmax=35 ):
"""Compares the observations from IOS/NOAA data near the mouth of Juan de Fuca with model boundary conditions.
Comparisons are during month and at open boundary indices.
dield is compared ('Temperature' or 'Salinity' )
Observations stored in data_obs (DataFrame)
Model bcs stored in bcs (netcdf handle)
indices is a list of model indices to examine"""
salinity_flag = False #Print extra stuff if we are looking at salinity
fig, [ax, axm] = plt.subplots(1,2,figsize=(10,3))
#lists to store maxes of observations and model
max_obs=[]
max_bcs=[]
#Model variables
time = bcs.variables['time_counter']
depth = bcs.variables['deptht']
lats= bcs.variables['nav_lat']
lons = bcs.variables['nav_lon']
if field == 'Salinity':
var = bcs.variables['vosaline']
salinity_flag=True
elif field == 'Temperature':
var = bcs.variables['votemper']
#times convert to date time
start = datetime.datetime(2014,1,1) #arbitrary year.
dates = [ start + datetime.timedelta(days = 7*t) for t in time]
#Look up model dates in the desired month
t_inds = [];
for t,d in enumerate(dates):
if d.month == month:
t_inds.append(t)
#plotting
#plot model
for i in indices: #iterate through space index
for t in t_inds: #iterate through time index
ax.plot( var[t,:,0,i],depth[:],'-ob',label='model')
max_bcs.append(np.nanmax(var[t,:,0,i]))
#location on map
axm.plot(lons[0,indices],lats[0,indices],'ob',label='model')
#plot obs
data_m = data_obs[data_obs['Month']==month]
for dep, var_obs, lon, lat in zip(data_m['Depth'],data_m[field], data_m['Longitude'], data_m['Latitude']):
ax.plot(var_obs, dep,'-*r',label = 'obs',alpha=0.1)
max_obs.append(np.nanmax(var_obs))
#plot location on map
axm.plot(lon,lat,'*r',label='obs')
ax.set_ylim([zmax,zmin])
ax.set_xlim([vmin,vmax])
ax.set_title('Month {}'.format(month))
ax.set_ylabel('Depth [m]')
ax.set_xlabel(field)
#plotmap
viz_tools.plot_coastline(axm,grid_B,coords='map')
axm.set_ylim([ymin,ymax])
axm.set_xlim([xmin,xmax])
#fake the legend
simObs, = axm.plot(0,0,'r*')
simMod, = axm.plot(0,0,'bo')
axm.legend([simObs,simMod], ['obs','model'])
#Maxes of obs/model
max_obs_max = np.nanmax(max_obs)
min_obs_max = np.nanmin(max_obs)
mean_obs_max = np.nanmean(max_obs)
max_bcs_max = np.nanmean(max_bcs)
if salinity_flag:
axm.text(xmin+0.1,ymin+.25,'Max of obs maxes {0:.4}'.format(max_obs_max))
axm.text(xmin+0.1,ymin+.15,'Min of obs maxes {0:.4}'.format(min_obs_max))
axm.text(xmin+0.1,ymin+.05,'Max of model maxes {0:.4}'.format(max_bcs_max))
return fig, max_obs_max, min_obs_max, mean_obs_max, max_bcs_max
Month by month salinity comparisons
maxes_obs =[] #list for the maximum of the observed maxes in each month
mins_obs = [] #list for the minumum of the observed maxes in each month
means_obs = [] #list for the mean of the observed maxes in each month
maxes_model = [] #list for the maximum of the model maxes in each month
locs = [20,40,60,80,790,810,830,850] # indices of boundary conditions to use in comparison
grouped=data_compare.groupby('Month')
for m in grouped.groups.keys():
fig, max_obs_max, min_obs_max, mean_obs_max, max_bcs_max = compare_bc_obs(m,locs,'Salinity',data_compare,bc)
maxes_obs.append(max_obs_max)
mins_obs.append(min_obs_max)
means_obs.append(mean_obs_max)
maxes_model.append(max_bcs_max)
maxes_model = np.array(maxes_model)
means_obs = np.array(means_obs)
mins_obs = np.array(mins_obs)
maxes_obs = np.array(maxes_obs)
print 'Mean difference between model max and observed max: {0:.4}'.format(np.mean(maxes_model-maxes_obs))
print 'Mean difference between model max and minimum of observed max: {0:.4}'.format(np.mean(maxes_model-mins_obs))
print 'Max difference between model max and observed max: {0:.4}'.format(np.max(maxes_model-maxes_obs))
print 'Max difference between model max and minimum of observed max: {0:.4}'.format(np.max(maxes_model-mins_obs))
print 'Min difference between model max and observed max: {0:.4}'.format(np.min(maxes_model-maxes_obs))
print 'Min difference between model max and minimum of observed max: {0:.4}'.format(np.min(maxes_model-mins_obs))
Mean difference between model max and observed max: -0.4841 Mean difference between model max and minimum of observed max: -0.215 Max difference between model max and observed max: -0.2949 Max difference between model max and minimum of observed max: 0.1999 Min difference between model max and observed max: -0.7103 Min difference between model max and minimum of observed max: -0.5253
months = grouped.groups.keys()
plt.plot(months,maxes_model,'o-',label='model')
plt.plot(months, maxes_obs,'*-',label='obs')
plt.plot(months, mins_obs,'*-',label='Min of obs maxes')
plt.plot(months, means_obs,'*-',label='Mean of obs maxes')
ax=plt.gca()
ax.set_xlabel('Month')
ax.set_ylabel('Max Salinity')
ax.set_title('Monthly maximum salinity')
ax.legend(loc=0)
<matplotlib.legend.Legend at 0x7f1e7accbb10>
Month by month temperature comparisons
grouped=data_compare.groupby('Month')
for m in grouped.groups.keys():
fig = compare_bc_obs(m,locs,'Temperature',data_compare,bc,vmin=5,vmax=10)
Idea: compare average salinity in a depth range between model points in a month and observed casts.
def compare_average_salinity_depth(month, dmin, dmax, indices, data_obs, bcs):
"""Compares the average salinity in a depth range for observations and boundary conditions in a month
month is the month for comaprison
dmin, dmax define the depth range (metres)
indices is a list of model indicest
data_obs are the observations (DataFrame)
bcs are the model bcs (netcdf)
returns data frame means with columns:
mean_obs array of mean salinity in each observation cast
mean_bcs mean model salinity in at each index
"""
#initialize arrays
mean_bcs=[]
mean_obs =[]
#Model averaging
#Model variables
time = bcs.variables['time_counter']
depth = bcs.variables['deptht']
var = bcs.variables['vosaline'][:]
#times convert to date time
start = datetime.datetime(2014,1,1) #arbitrary year.
dates = [ start + datetime.timedelta(days = 7*t) for t in time]
#Look up model dates in the desired month
t_inds = [];
for t,d in enumerate(dates):
if d.month == month:
t_inds.append(t)
#depth average the model
depth_inds = np.where(np.logical_and(depth[:]<dmax,depth[:]>dmin))
dep_range = depth[depth_inds]
for i in indices: #iterate through space index
for t in t_inds: #iterate through time index
sal_in_drange = var[t,depth_inds,0,i]
avg_sal = analyze.depth_average(sal_in_drange,dep_range,depth_axis=1)
mean_bcs.append(avg_sal[0])
#observation averaging
data_m = data_obs[data_obs['Month']==month]
for dep, var_obs in zip(data_m['Depth'],data_m['Salinity']):
dep=np.array(dep)
var_obs = np.array(var_obs)
depth_inds = np.where(np.logical_and(dep[:]<dmax,dep[:]>dmin))
dep_range = dep[depth_inds]
sal_in_drange = var_obs[depth_inds]
avg_sal = analyze.depth_average(sal_in_drange,dep_range,depth_axis=0)
mean_obs.append(avg_sal)
#add to dict frame
means={'mean_bcs': mean_bcs, 'mean_obs': mean_obs}
return means
Comparing average salinity in a depth range
dmin=150; dmax=250;
monthly_means={'obs':[], 'bcs':[]}
print 'Summary of depth averaged salinities between {} and {} m'.format(dmin,dmax)
grouped=data_compare.groupby('Month')
for m in grouped.groups.keys():
fig,ax=plt.subplots(1,1,figsize=(5,2))
means = compare_average_salinity_depth(m,dmin,dmax,locs,data_compare,bc)
monthly_means['obs'].append(np.nanmean(means['mean_obs']))
monthly_means['bcs'].append(np.nanmean(means['mean_bcs']))
# list for box plots
means_list = [means['mean_bcs'], means['mean_obs']]
box =ax.boxplot(means_list,labels=['model','obs'])
ax.set_title('Month {}'.format(m))
ax.set_ylim([33,34])
#monthly means
months = grouped.groups.keys()
fig,ax=plt.subplots(1,1)
ax.plot(months, monthly_means['bcs'],label='model')
ax.plot(months, monthly_means['obs'],label='obs')
ax.legend()
ax.set_title('Monthly means')
Summary of depth averaged salinities between 150 and 250 m
<matplotlib.text.Text at 0x7f1e7b867b90>
print 'Summary of depth averaged ({}-{} m) salinity differences'.format(dmin,dmax)
print 'Mean difference {0:.4}'.format(np.nanmean(np.array(monthly_means['obs']) - np.array(monthly_means['bcs'])))
print 'Max difference {0:.4}'.format(np.nanmax(np.array(monthly_means['obs']) - np.array(monthly_means['bcs'])))
print 'Min difference {0:.4}'.format(np.nanmin(np.array(monthly_means['obs']) - np.array(monthly_means['bcs'])))
Summary of depth averaged (150-250 m) salinity differences Mean difference 0.3311 Max difference 0.5291 Min difference 0.1265
dmin=0; dmax=50;
monthly_means={'obs':[], 'bcs':[]}
print 'Summary of depth averaged salinities between {} and {} m'.format(dmin,dmax)
grouped=data_compare.groupby('Month')
for m in grouped.groups.keys():
fig,ax=plt.subplots(1,1,figsize=(5,2))
means = compare_average_salinity_depth(m,dmin,dmax,locs,data_compare,bc)
monthly_means['obs'].append(np.nanmean(means['mean_obs']))
monthly_means['bcs'].append(np.nanmean(means['mean_bcs']))
# list for box plots
means_list = [means['mean_bcs'], means['mean_obs']]
box =ax.boxplot(means_list,labels=['model','obs'])
ax.set_title('Month {}'.format(m))
ax.set_ylim([30,34])
#monthly means
months = grouped.groups.keys()
fig,ax=plt.subplots(1,1)
ax.plot(months, monthly_means['bcs'],label='model')
ax.plot(months, monthly_means['obs'],label='obs')
ax.legend()
ax.set_title('Monthly means')
Summary of depth averaged salinities between 0 and 50 m
<matplotlib.text.Text at 0x7f1e7ae4a350>
Observational trend is that surface waters have higher salinity in the lte summer/early fall months. This trend is reflected in the model bcs.