import datetime as dt
import numpy as np
import netCDF4 as nc
import pandas as pd
import glob
from salishsea_tools import geo_tools
import gsw
import os
import pytz
import matplotlib.pyplot as plt
import cmocean as cmo
import warnings
from sqlalchemy import create_engine, case, MetaData
from sqlalchemy.orm import create_session, aliased
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.sql import and_, or_, not_, func
from salishsea_tools import viz_tools
pd.set_option('display.max_colwidth', -1)
%matplotlib inline
basedir='/ocean/shared/SalishSeaCastData/DFO/CTD/'
dbname='DFO_CTD.sqlite'
datelims=()
engine = create_engine('sqlite:///' + basedir + dbname, echo = False)
grid = nc.Dataset('/data/eolson/MEOPAR/NEMO-forcing-new/grid/bathymetry_201702.nc')
md=MetaData()
md.reflect(engine)
Base = automap_base(metadata=md)
# reflect the tables in salish.sqlite:
Base.prepare()
# mapped classes have been created
# existing tables:
StationTBL=Base.classes.StationTBL
ObsTBL=Base.classes.ObsTBL
CalcsTBL=Base.classes.CalcsTBL
AncTBL=Base.classes.AncillaryTBL
#JDFLocsTBL=Base.classes.JDFLocsTBL
session = create_session(bind = engine, autocommit = False, autoflush = True)
qry=session.query(ObsTBL.Depth,ObsTBL.Pressure,CalcsTBL.Z,ObsTBL.Salinity,ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,CalcsTBL.Salinity_SA,CalcsTBL.Salinity_T0_C0_SA,CalcsTBL.Salinity_T1_C1_SA,
CalcsTBL.Temperature_CT,CalcsTBL.Temperature_Primary_CT,CalcsTBL.Temperature_Secondary_CT).\
select_from(CalcsTBL).join(ObsTBL,ObsTBL.ID==CalcsTBL.ObsTBLID).filter(ObsTBL.Depth==ObsTBL.Depth).all()
df=pd.DataFrame(qry)
df.describe()
np.min((df['Depth']-df['Z'])/(df['Z']+df['Depth'])*2),np.max((df['Depth']-df['Z'])/(df['Z']+df['Depth'])*2)
df.loc[(np.abs((df['Depth']-df['Z'])/(df['Z']+df['Depth'])*2)>.1)&(np.abs((df['Depth']-df['Z']))>.3)]
sorted([x.name for x in md.tables['StationTBL'].columns])
sorted([x.name for x in md.tables['ObsTBL'].columns])
sorted([x.name for x in md.tables['CalcsTBL'].columns])
sorted([x.name for x in md.tables['AncillaryTBL'].columns])
print('Z without P:',session.query(ObsTBL.Depth).filter(ObsTBL.Pressure==None).count())
print('P without Z:',session.query(ObsTBL.Pressure).filter(ObsTBL.Depth==None).count())
print('Z min, max:',session.query(func.min(ObsTBL.Depth)).one(),session.query(func.max(ObsTBL.Depth)).one())
print('P min, max:',session.query(func.min(ObsTBL.Pressure)).one(),session.query(func.max(ObsTBL.Pressure)).one())
for var in (ObsTBL.Salinity,ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary):
print(var,'min max count:',session.query(func.min(var)).one(),session.query(func.max(var)).one(),session.query(var).filter(var!=None).count())
vlist=(ObsTBL.Salinity,ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary)
ulist=(ObsTBL.Salinity_units,ObsTBL.Salinity_T0_C0_units,ObsTBL.Salinity_T1_C1_units,ObsTBL.Temperature_units,
ObsTBL.Temperature_Primary_units,ObsTBL.Temperature_Secondary_units)
for vvar,uvar in zip(vlist,ulist):
print(uvar,'unique:')
print('\t',[i for i in session.query(uvar).group_by(uvar).all()])
print('\t','# missing units:',session.query(vvar,uvar).filter(and_(vvar!=None,uvar==None)).count())
df.loc[(~np.isnan(df['Salinity_T1_C1']))&(~np.isnan(df['Temperature_Primary']))]
df.loc[(~np.isnan(df['Salinity_T1_C1']))&(~np.isnan(df['Temperature']))]
df.loc[(~np.isnan(df['Salinity_T0_C0']))&(~np.isnan(df['Temperature']))]
df.loc[(~np.isnan(df['Salinity_T0_C0']))&(~np.isnan(df['Temperature_Secondary']))]
df.loc[(~np.isnan(df['Salinity']))&(~np.isnan(df['Temperature_Secondary']))]
df.loc[(~np.isnan(df['Salinity']))&(~np.isnan(df['Temperature_Primary']))]
len(df.loc[(~np.isnan(df['Salinity']))&(~np.isnan(df['Temperature']))])
len(df.loc[(~np.isnan(df['Salinity_T0_C0']))&(~np.isnan(df['Temperature_Primary']))])
len(df.loc[(~np.isnan(df['Salinity_T1_C1']))&(~np.isnan(df['Temperature_Secondary']))])
df=pd.DataFrame(session.query(ObsTBL.Depth,ObsTBL.Pressure,ObsTBL.Salinity,ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,
ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary,
StationTBL.Lat,StationTBL.Lon).select_from(ObsTBL).join(StationTBL,StationTBL.ID==ObsTBL.StationTBLID).all())
fig,ax=plt.subplots(1,3,figsize=(18,6))
ax[0].plot(df['Salinity'],df['Temperature'],'r.')
ax[1].plot(df['Salinity_T0_C0'],df['Temperature_Primary'],'c.')
ax[2].plot(df['Salinity_T1_C1'],df['Temperature_Secondary'],'m.')
fig,ax=plt.subplots(1,3,figsize=(18,6))
for iax in ax:
viz_tools.set_aspect(iax, coords = 'map')
viz_tools.plot_coastline(iax, grid, coords = 'map')
iax.set_ylim(47, 52)
iax.set_xlim(-130, -122);
ax[0].plot(df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lon']],
df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lat']],'ro')
ax[1].plot(df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lon']],
df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lat']],'co')
ax[2].plot(df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lon']],
df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lat']],'mo')
df=pd.DataFrame(session.query(ObsTBL.Depth,ObsTBL.Pressure,ObsTBL.Salinity,ObsTBL.Salinity_T0_C0,
ObsTBL.Salinity_T1_C1,ObsTBL.Temperature,ObsTBL.Temperature_Primary,
ObsTBL.Temperature_Secondary,StationTBL.Lat,StationTBL.Lon).\
select_from(ObsTBL).join(StationTBL,StationTBL.ID==ObsTBL.StationTBLID).\
filter(ObsTBL.Include==True).all())
fig,ax=plt.subplots(1,3,figsize=(18,6))
ax[0].plot(df['Salinity'],df['Temperature'],'r.')
ax[1].plot(df['Salinity_T0_C0'],df['Temperature_Primary'],'c.')
ax[2].plot(df['Salinity_T1_C1'],df['Temperature_Secondary'],'m.')
fig,ax=plt.subplots(1,3,figsize=(18,6))
for iax in ax:
viz_tools.set_aspect(iax, coords = 'map')
viz_tools.plot_coastline(iax, grid, coords = 'map')
iax.set_ylim(47, 52)
iax.set_xlim(-130, -122);
ax[0].plot(df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lon']],
df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lat']],'ro')
ax[1].plot(df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lon']],
df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lat']],'co')
ax[2].plot(df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lon']],
df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lat']],'mo')
qry=session.query(StationTBL.StartYear.label('Year'),StationTBL.StartMonth.label('Month'),
StationTBL.StartDay.label('Day'),StationTBL.StartHour.label('Hour'),
StationTBL.Lat,StationTBL.Lon,
ObsTBL.Depth,ObsTBL.Pressure,ObsTBL.Salinity,
ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,
ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary,ObsTBL.sourceFile).\
select_from(StationTBL).join(ObsTBL,ObsTBL.StationTBLID==StationTBL.ID).\
filter(and_(StationTBL.Lat>47-3/2.5*(StationTBL.Lon+123.5),
StationTBL.Lat<47-3/2.5*(StationTBL.Lon+121)))
df=pd.DataFrame(qry.all())
fig,ax=plt.subplots(1,3,figsize=(18,6))
ax[0].plot(df['Salinity'],df['Temperature'],'r.')
ax[1].plot(df['Salinity_T0_C0'],df['Temperature_Primary'],'c.')
ax[2].plot(df['Salinity_T1_C1'],df['Temperature_Secondary'],'m.')
fig,ax=plt.subplots(1,3,figsize=(18,6))
for iax in ax:
viz_tools.set_aspect(iax, coords = 'map')
viz_tools.plot_coastline(iax, grid, coords = 'map')
#iax.set_ylim(48, 50.5)
#iax.set_xlim(-125.7, -122.5);
iax.set_ylim(47, 52)
iax.set_xlim(-130, -122);
ax[0].plot(df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lon']],
df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lat']],'ro')
ax[1].plot(df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lon']],
df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lat']],'co')
ax[2].plot(df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lon']],
df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lat']],'mo')
models=session.query(AncTBL.MODEL).distinct().all()
models
## Where are CastAway stations?
qry=session.query(StationTBL.StartYear.label('Year'),StationTBL.StartMonth.label('Month'),
StationTBL.StartDay.label('Day'),StationTBL.StartHour.label('Hour'),
StationTBL.Lat,StationTBL.Lon,
ObsTBL.Depth,ObsTBL.Pressure,ObsTBL.Salinity,
ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,
ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary,ObsTBL.sourceFile).\
select_from(StationTBL).join(ObsTBL,ObsTBL.StationTBLID==StationTBL.ID).\
join(AncTBL,AncTBL.StationTBLID==StationTBL.ID).\
filter(AncTBL.MODEL=='CastAway')
df=pd.DataFrame(qry.all())
fig,ax=plt.subplots(1,3,figsize=(18,6))
for iax in ax:
viz_tools.set_aspect(iax, coords = 'map')
viz_tools.plot_coastline(iax, grid, coords = 'map')
#iax.set_ylim(48, 50.5)
#iax.set_xlim(-125.7, -122.5);
iax.set_ylim(47, 52)
iax.set_xlim(-130, -122);
ax[0].plot(df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lon']],
df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lat']],'ro')
ax[1].plot(df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lon']],
df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lat']],'co')
ax[2].plot(df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lon']],
df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lat']],'mo')
qry=session.query(StationTBL.Include).select_from(StationTBL).join(AncTBL,AncTBL.StationTBLID==StationTBL.ID).\
filter(AncTBL.MODEL=='CastAway').distinct().all()
print('Station Include:',qry)
qry=session.query(ObsTBL.Include).select_from(ObsTBL).join(AncTBL,AncTBL.StationTBLID==ObsTBL.StationTBLID).\
filter(AncTBL.MODEL=='CastAway').distinct().all()
print('Obs Include:',qry)
test=session.query(ObsTBL.StationTBLID).distinct().order_by(ObsTBL.StationTBLID).all()
plt.plot(test)
qry=session.query(StationTBL.StartYear.label('Year'),StationTBL.StartMonth.label('Month'),
StationTBL.StartDay.label('Day'),StationTBL.StartHour.label('Hour'),
StationTBL.Lat,StationTBL.Lon,
ObsTBL.Depth,ObsTBL.Pressure,ObsTBL.Salinity,
ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,
ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary,ObsTBL.sourceFile).\
select_from(StationTBL).join(ObsTBL,ObsTBL.StationTBLID==StationTBL.ID).\
join(AncTBL,AncTBL.StationTBLID==StationTBL.ID).\
filter(and_(StationTBL.Lat>47-3/2.5*(StationTBL.Lon+123.5),
StationTBL.Lat<47-3/2.5*(StationTBL.Lon+121),ObsTBL.Include==True))
df=pd.DataFrame(qry.all())
fig,ax=plt.subplots(1,3,figsize=(18,6))
ax[0].plot(df['Salinity'],df['Temperature'],'r.')
ax[1].plot(df['Salinity_T0_C0'],df['Temperature_Primary'],'c.')
ax[2].plot(df['Salinity_T1_C1'],df['Temperature_Secondary'],'m.')
fig,ax=plt.subplots(1,3,figsize=(18,6))
for iax in ax:
viz_tools.set_aspect(iax, coords = 'map')
viz_tools.plot_coastline(iax, grid, coords = 'map')
#iax.set_ylim(48, 50.5)
#iax.set_xlim(-125.7, -122.5);
iax.set_ylim(47, 52)
iax.set_xlim(-130, -122);
ax[0].plot(df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lon']],
df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lat']],'ro')
ax[1].plot(df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lon']],
df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lat']],'co')
ax[2].plot(df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lon']],
df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lat']],'mo')
# search for duplicate stations and investigate:
a1=aliased(StationTBL)
a2=aliased(StationTBL)
dupsQRY=session.query(a1.ID.label('ID1'),a1.Include,a2.ID.label('ID2'),a2.Include,a1.sourceFile.label('source1'),a2.sourceFile.label('source2'),
a1.EVENT_NUMBER.label('EVENT_NUMBER1'),a2.EVENT_NUMBER.label('EVENT_NUMBER2'),
a1.PLATFORM.label('PLATFORM1'),a2.PLATFORM.label('PLATFORM2'),
a1.STATION.label('STATION1'),a2.STATION.label('STATION2'),
a1.WATER_DEPTH.label('WATER_DEPTH1'),a2.WATER_DEPTH.label('WATER_DEPTH2')).select_from(a1).join(a2,and_(
a1.StartYear==a2.StartYear,
a1.StartMonth==a2.StartMonth,
a1.StartDay==a2.StartDay,
a1.StartHour-a2.StartHour<0.001,
a1.StartHour-a2.StartHour>-0.001,
a1.Lat-a2.Lat<0.001,
a1.Lat-a2.Lat>-0.001,
a1.Lon-a2.Lon<0.001,
a1.Lon-a2.Lon>-0.001,
a1.ID!=a2.ID)).filter(a1.Include==True,a2.Include==True,a1.ID<a2.ID)
dfa=pd.DataFrame(dupsQRY.all())
dfa
# display previously eliminated duplicates:
a1=aliased(StationTBL)
a2=aliased(StationTBL)
dupsQRY=session.query(a1.ID.label('ID1'),a1.Include,a2.ID.label('ID2'),a2.Include,a1.sourceFile.label('source1'),a2.sourceFile.label('source2'),
a1.EVENT_NUMBER.label('EVENT_NUMBER1'),a2.EVENT_NUMBER.label('EVENT_NUMBER2'),
a1.PLATFORM.label('PLATFORM1'),a2.PLATFORM.label('PLATFORM2'),
a1.STATION.label('STATION1'),a2.STATION.label('STATION2'),
a1.WATER_DEPTH.label('WATER_DEPTH1'),a2.WATER_DEPTH.label('WATER_DEPTH2')).select_from(a1).join(a2,and_(
a1.StartYear==a2.StartYear,
a1.StartMonth==a2.StartMonth,
a1.StartDay==a2.StartDay,
a1.StartHour-a2.StartHour<0.001,
a1.StartHour-a2.StartHour>-0.001,
a1.Lat-a2.Lat<0.001,
a1.Lat-a2.Lat>-0.001,
a1.Lon-a2.Lon<0.001,
a1.Lon-a2.Lon>-0.001,
a1.ID!=a2.ID)).filter(a1.Include==True,a2.Include==False,a1.ID<a2.ID)
dfa=pd.DataFrame(dupsQRY.all())
dfa
for i,r in dfa.iterrows():
print(r['source1'],'\n',r['source2'],'\n')
## check for obs include true where station include false; should be empty with adjusted procedure
test=pd.DataFrame(session.query(StationTBL.ID.label('StationID'),ObsTBL.ID.label('ObsID')).select_from(StationTBL).\
join(ObsTBL,ObsTBL.StationTBLID==StationTBL.ID).\
filter(and_(StationTBL.Include==False,ObsTBL.Include==True)).all())
test
grid.close()