#!/usr/bin/env python # coding: utf-8 # In[ ]: import datetime as dt import numpy as np import netCDF4 as nc import pandas as pd import glob from salishsea_tools import geo_tools import gsw import os import pytz import matplotlib.pyplot as plt import cmocean as cmo import warnings from sqlalchemy import create_engine, case, MetaData from sqlalchemy.orm import create_session, aliased from sqlalchemy.ext.automap import automap_base from sqlalchemy.sql import and_, or_, not_, func from salishsea_tools import viz_tools pd.set_option('display.max_colwidth', -1) get_ipython().run_line_magic('matplotlib', 'inline') # In[ ]: basedir='/ocean/shared/SalishSeaCastData/DFO/CTD/' dbname='DFO_CTD.sqlite' datelims=() # In[ ]: engine = create_engine('sqlite:///' + basedir + dbname, echo = False) connection = engine.connect()def Sal_PSU_to_SA(S,press,lon,lat): SA=gsw.SA_from_SP(S,press,lon,lat) if SA>=40: print('Err: SA=',SA,', None entered') SA=None elif SA<0: SA=None print('Err: SA=',SA,', None entered') return(SA) def T_to_CT(SA,T,press): CT=gsw.CT_from_t(SA,T,press) if CT>=40: print('Err: CT=',CT,', None entered') CT=None elif CT<-5: CT=None print('Err: CT=',CT,', None entered') return(CT) def p_to_Z(press,lat): Z=-1.0*gsw.z_from_p(press,lat) return(Z) connection.connection.create_function("Sal_PSU_to_SA_DB",4,Sal_PSU_to_SA) connection.connection.create_function("T_to_CT_DB",3,T_to_CT) connection.connection.create_function("p_to_Z_DB",2,p_to_Z)connection.execute("""UPDATE CalcsTBL SET Temperature_CT = T_to_CT_DB(CalcsTBL.Salinity_SA,ObsTBL.Temperature,ObsTBL.Pressure), Temperature_Primary_CT = T_to_CT_DB(CalcsTBL.Salinity_T0_C0_SA,ObsTBL.Temperature,ObsTBL.Pressure), Temperature_Secondary_CT = T_to_CT_DB(CalcsTBL.Salinity_T1_C1_SA,ObsTBL.Temperature,ObsTBL.Pressure), FROM ((CalcsTBL INNER JOIN ObsTBL ON CalcsTBL.ObsID = ObsTBL.ID) INNER JOIN StationTBL ON CalcsTBL.StationID = StationTBL.ID)""")test=connection.execute("""SELECT T_to_CT_DB( CASE WHEN CalcsTBL.Salinity_T1_C1_SA IS NOT NULL THEN CalcsTBL.Salinity_T1_C1_SA WHEN CalcsTBL.Salinity_T0_C0_SA IS NOT NULL THEN CalcsTBL.Salinity_T0_C0_SA WHEN CalcsTBL.Salinity_SA IS NOT NULL THEN CalcsTBL.Salinity_SA ELSE NULL END,ObsTBL.Temperature,ObsTBL.Pressure) as T1, T_to_CT_DB( CASE WHEN CalcsTBL.Salinity_T1_C1_SA IS NOT NULL THEN CalcsTBL.Salinity_T1_C1_SA WHEN CalcsTBL.Salinity_T0_C0_SA IS NOT NULL THEN CalcsTBL.Salinity_T0_C0_SA WHEN CalcsTBL.Salinity_SA IS NOT NULL THEN CalcsTBL.Salinity_SA ELSE NULL END,ObsTBL.Temperature,ObsTBL.Pressure) as T2, T_to_CT_DB( CASE WHEN CalcsTBL.Salinity_T1_C1_SA IS NOT NULL THEN CalcsTBL.Salinity_T1_C1_SA WHEN CalcsTBL.Salinity_T0_C0_SA IS NOT NULL THEN CalcsTBL.Salinity_T0_C0_SA WHEN CalcsTBL.Salinity_SA IS NOT NULL THEN CalcsTBL.Salinity_SA ELSE NULL END,ObsTBL.Temperature,ObsTBL.Pressure) as T3 FROM CalcsTBL INNER JOIN ObsTBL ON CalcsTBL.ObsID = ObsTBL.ID INNER JOIN StationTBL ON CalcsTBL.StationID = StationTBL.ID;""") ii=0 for row in test: ii=ii+1 if (ii>100)&(ii<120): print(row) # In[ ]: grid = nc.Dataset('/data/eolson/MEOPAR/NEMO-forcing-new/grid/bathymetry_201702.nc') # In[ ]: md=MetaData() md.reflect(engine) Base = automap_base(metadata=md) # reflect the tables in salish.sqlite: Base.prepare() # mapped classes have been created # existing tables: StationTBL=Base.classes.StationTBL ObsTBL=Base.classes.ObsTBL CalcsTBL=Base.classes.CalcsTBL AncTBL=Base.classes.AncillaryTBL #JDFLocsTBL=Base.classes.JDFLocsTBL session = create_session(bind = engine, autocommit = False, autoflush = True) # In[ ]: qry=session.query(ObsTBL.Depth,ObsTBL.Pressure,CalcsTBL.Z,ObsTBL.Salinity,ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,CalcsTBL.Salinity_SA,CalcsTBL.Salinity_T0_C0_SA,CalcsTBL.Salinity_T1_C1_SA, CalcsTBL.Temperature_CT,CalcsTBL.Temperature_Primary_CT,CalcsTBL.Temperature_Secondary_CT).\ select_from(CalcsTBL).join(ObsTBL,ObsTBL.ID==CalcsTBL.ObsTBLID).filter(ObsTBL.Depth==ObsTBL.Depth).all() # In[ ]: df=pd.DataFrame(qry) # In[ ]: df.describe() # ### Look at cases where Depth and Z are different: # In[ ]: np.min((df['Depth']-df['Z'])/(df['Z']+df['Depth'])*2),np.max((df['Depth']-df['Z'])/(df['Z']+df['Depth'])*2) # In[ ]: df.loc[(np.abs((df['Depth']-df['Z'])/(df['Z']+df['Depth'])*2)>.1)&(np.abs((df['Depth']-df['Z']))>.3)] # ### List column names for tables: # In[ ]: sorted([x.name for x in md.tables['StationTBL'].columns]) # In[ ]: sorted([x.name for x in md.tables['ObsTBL'].columns]) # In[ ]: sorted([x.name for x in md.tables['CalcsTBL'].columns]) # In[ ]: sorted([x.name for x in md.tables['AncillaryTBL'].columns]) # #### salinity variables: 'Salinity','Salinity_T0_C0', 'Salinity_T1_C1' # #### temperature variables:'Temperature','Temperature_Primary','Temperature_Secondary' # ### How many Depths with no Pressure and vice versa? # In[ ]: print('Z without P:',session.query(ObsTBL.Depth).filter(ObsTBL.Pressure==None).count()) print('P without Z:',session.query(ObsTBL.Pressure).filter(ObsTBL.Depth==None).count()) # ### Other depth info: # In[ ]: print('Z min, max:',session.query(func.min(ObsTBL.Depth)).one(),session.query(func.max(ObsTBL.Depth)).one()) print('P min, max:',session.query(func.min(ObsTBL.Pressure)).one(),session.query(func.max(ObsTBL.Pressure)).one()) # ### Other Variables: # In[ ]: for var in (ObsTBL.Salinity,ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary): print(var,'min max count:',session.query(func.min(var)).one(),session.query(func.max(var)).one(),session.query(var).filter(var!=None).count()) # In[ ]: vlist=(ObsTBL.Salinity,ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1,ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary) ulist=(ObsTBL.Salinity_units,ObsTBL.Salinity_T0_C0_units,ObsTBL.Salinity_T1_C1_units,ObsTBL.Temperature_units, ObsTBL.Temperature_Primary_units,ObsTBL.Temperature_Secondary_units) # In[ ]: for vvar,uvar in zip(vlist,ulist): print(uvar,'unique:') print('\t',[i for i in session.query(uvar).group_by(uvar).all()]) print('\t','# missing units:',session.query(vvar,uvar).filter(and_(vvar!=None,uvar==None)).count()) # ## Check which T&S variable combinations are present # In[ ]: df.loc[(~np.isnan(df['Salinity_T1_C1']))&(~np.isnan(df['Temperature_Primary']))] # In[ ]: df.loc[(~np.isnan(df['Salinity_T1_C1']))&(~np.isnan(df['Temperature']))] # In[ ]: df.loc[(~np.isnan(df['Salinity_T0_C0']))&(~np.isnan(df['Temperature']))] # In[ ]: df.loc[(~np.isnan(df['Salinity_T0_C0']))&(~np.isnan(df['Temperature_Secondary']))] # In[ ]: df.loc[(~np.isnan(df['Salinity']))&(~np.isnan(df['Temperature_Secondary']))] # In[ ]: df.loc[(~np.isnan(df['Salinity']))&(~np.isnan(df['Temperature_Primary']))] # In[ ]: len(df.loc[(~np.isnan(df['Salinity']))&(~np.isnan(df['Temperature']))]) # In[ ]: len(df.loc[(~np.isnan(df['Salinity_T0_C0']))&(~np.isnan(df['Temperature_Primary']))]) # In[ ]: len(df.loc[(~np.isnan(df['Salinity_T1_C1']))&(~np.isnan(df['Temperature_Secondary']))]) # ### Plot All T S Data # In[ ]: df=pd.DataFrame(session.query(ObsTBL.Depth,ObsTBL.Pressure,ObsTBL.Salinity,ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1, ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary, StationTBL.Lat,StationTBL.Lon).select_from(ObsTBL).join(StationTBL,StationTBL.ID==ObsTBL.StationTBLID).all()) fig,ax=plt.subplots(1,3,figsize=(18,6)) ax[0].plot(df['Salinity'],df['Temperature'],'r.') ax[1].plot(df['Salinity_T0_C0'],df['Temperature_Primary'],'c.') ax[2].plot(df['Salinity_T1_C1'],df['Temperature_Secondary'],'m.') # In[ ]: fig,ax=plt.subplots(1,3,figsize=(18,6)) for iax in ax: viz_tools.set_aspect(iax, coords = 'map') viz_tools.plot_coastline(iax, grid, coords = 'map') iax.set_ylim(47, 52) iax.set_xlim(-130, -122); ax[0].plot(df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lon']], df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lat']],'ro') ax[1].plot(df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lon']], df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lat']],'co') ax[2].plot(df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lon']], df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lat']],'mo') # ### Plot all included T&S data # In[ ]: df=pd.DataFrame(session.query(ObsTBL.Depth,ObsTBL.Pressure,ObsTBL.Salinity,ObsTBL.Salinity_T0_C0, ObsTBL.Salinity_T1_C1,ObsTBL.Temperature,ObsTBL.Temperature_Primary, ObsTBL.Temperature_Secondary,StationTBL.Lat,StationTBL.Lon).\ select_from(ObsTBL).join(StationTBL,StationTBL.ID==ObsTBL.StationTBLID).\ filter(ObsTBL.Include==True).all()) fig,ax=plt.subplots(1,3,figsize=(18,6)) ax[0].plot(df['Salinity'],df['Temperature'],'r.') ax[1].plot(df['Salinity_T0_C0'],df['Temperature_Primary'],'c.') ax[2].plot(df['Salinity_T1_C1'],df['Temperature_Secondary'],'m.') # In[ ]: fig,ax=plt.subplots(1,3,figsize=(18,6)) for iax in ax: viz_tools.set_aspect(iax, coords = 'map') viz_tools.plot_coastline(iax, grid, coords = 'map') iax.set_ylim(47, 52) iax.set_xlim(-130, -122); ax[0].plot(df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lon']], df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lat']],'ro') ax[1].plot(df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lon']], df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lat']],'co') ax[2].plot(df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lon']], df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lat']],'mo') # ### Restrict to Salish Sea # In[ ]: qry=session.query(StationTBL.StartYear.label('Year'),StationTBL.StartMonth.label('Month'), StationTBL.StartDay.label('Day'),StationTBL.StartHour.label('Hour'), StationTBL.Lat,StationTBL.Lon, ObsTBL.Depth,ObsTBL.Pressure,ObsTBL.Salinity, ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1, ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary,ObsTBL.sourceFile).\ select_from(StationTBL).join(ObsTBL,ObsTBL.StationTBLID==StationTBL.ID).\ filter(and_(StationTBL.Lat>47-3/2.5*(StationTBL.Lon+123.5), StationTBL.Lat<47-3/2.5*(StationTBL.Lon+121))) df=pd.DataFrame(qry.all()) # In[ ]: fig,ax=plt.subplots(1,3,figsize=(18,6)) ax[0].plot(df['Salinity'],df['Temperature'],'r.') ax[1].plot(df['Salinity_T0_C0'],df['Temperature_Primary'],'c.') ax[2].plot(df['Salinity_T1_C1'],df['Temperature_Secondary'],'m.') # In[ ]: fig,ax=plt.subplots(1,3,figsize=(18,6)) for iax in ax: viz_tools.set_aspect(iax, coords = 'map') viz_tools.plot_coastline(iax, grid, coords = 'map') #iax.set_ylim(48, 50.5) #iax.set_xlim(-125.7, -122.5); iax.set_ylim(47, 52) iax.set_xlim(-130, -122); ax[0].plot(df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lon']], df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lat']],'ro') ax[1].plot(df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lon']], df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lat']],'co') ax[2].plot(df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lon']], df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lat']],'mo') # In[ ]: models=session.query(AncTBL.MODEL).distinct().all() models # ### Display CastAway stations # In[ ]: ## Where are CastAway stations? qry=session.query(StationTBL.StartYear.label('Year'),StationTBL.StartMonth.label('Month'), StationTBL.StartDay.label('Day'),StationTBL.StartHour.label('Hour'), StationTBL.Lat,StationTBL.Lon, ObsTBL.Depth,ObsTBL.Pressure,ObsTBL.Salinity, ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1, ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary,ObsTBL.sourceFile).\ select_from(StationTBL).join(ObsTBL,ObsTBL.StationTBLID==StationTBL.ID).\ join(AncTBL,AncTBL.StationTBLID==StationTBL.ID).\ filter(AncTBL.MODEL=='CastAway') df=pd.DataFrame(qry.all()) fig,ax=plt.subplots(1,3,figsize=(18,6)) for iax in ax: viz_tools.set_aspect(iax, coords = 'map') viz_tools.plot_coastline(iax, grid, coords = 'map') #iax.set_ylim(48, 50.5) #iax.set_xlim(-125.7, -122.5); iax.set_ylim(47, 52) iax.set_xlim(-130, -122); ax[0].plot(df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lon']], df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lat']],'ro') ax[1].plot(df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lon']], df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lat']],'co') ax[2].plot(df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lon']], df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lat']],'mo') # ### Check CastAway profiles are excluded: # In[ ]: qry=session.query(StationTBL.Include).select_from(StationTBL).join(AncTBL,AncTBL.StationTBLID==StationTBL.ID).\ filter(AncTBL.MODEL=='CastAway').distinct().all() print('Station Include:',qry) # In[ ]: qry=session.query(ObsTBL.Include).select_from(ObsTBL).join(AncTBL,AncTBL.StationTBLID==ObsTBL.StationTBLID).\ filter(AncTBL.MODEL=='CastAway').distinct().all() print('Obs Include:',qry) # #### check that this plot is linear or you are likely missing stations from obs table: # In[ ]: test=session.query(ObsTBL.StationTBLID).distinct().order_by(ObsTBL.StationTBLID).all() plt.plot(test) # ### Salish Sea with Include=True # In[ ]: qry=session.query(StationTBL.StartYear.label('Year'),StationTBL.StartMonth.label('Month'), StationTBL.StartDay.label('Day'),StationTBL.StartHour.label('Hour'), StationTBL.Lat,StationTBL.Lon, ObsTBL.Depth,ObsTBL.Pressure,ObsTBL.Salinity, ObsTBL.Salinity_T0_C0,ObsTBL.Salinity_T1_C1, ObsTBL.Temperature,ObsTBL.Temperature_Primary,ObsTBL.Temperature_Secondary,ObsTBL.sourceFile).\ select_from(StationTBL).join(ObsTBL,ObsTBL.StationTBLID==StationTBL.ID).\ join(AncTBL,AncTBL.StationTBLID==StationTBL.ID).\ filter(and_(StationTBL.Lat>47-3/2.5*(StationTBL.Lon+123.5), StationTBL.Lat<47-3/2.5*(StationTBL.Lon+121),ObsTBL.Include==True)) df=pd.DataFrame(qry.all()) fig,ax=plt.subplots(1,3,figsize=(18,6)) ax[0].plot(df['Salinity'],df['Temperature'],'r.') ax[1].plot(df['Salinity_T0_C0'],df['Temperature_Primary'],'c.') ax[2].plot(df['Salinity_T1_C1'],df['Temperature_Secondary'],'m.') # In[ ]: fig,ax=plt.subplots(1,3,figsize=(18,6)) for iax in ax: viz_tools.set_aspect(iax, coords = 'map') viz_tools.plot_coastline(iax, grid, coords = 'map') #iax.set_ylim(48, 50.5) #iax.set_xlim(-125.7, -122.5); iax.set_ylim(47, 52) iax.set_xlim(-130, -122); ax[0].plot(df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lon']], df.loc[(df['Salinity']>0)&(df['Temperature']>0),['Lat']],'ro') ax[1].plot(df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lon']], df.loc[(df['Salinity_T0_C0']>0)&(df['Temperature_Primary']>0),['Lat']],'co') ax[2].plot(df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lon']], df.loc[(df['Salinity_T1_C1']>0)&(df['Temperature_Secondary']>0),['Lat']],'mo') # ## Check for Duplicates # In[ ]: # search for duplicate stations and investigate: a1=aliased(StationTBL) a2=aliased(StationTBL) dupsQRY=session.query(a1.ID.label('ID1'),a1.Include,a2.ID.label('ID2'),a2.Include,a1.sourceFile.label('source1'),a2.sourceFile.label('source2'), a1.EVENT_NUMBER.label('EVENT_NUMBER1'),a2.EVENT_NUMBER.label('EVENT_NUMBER2'), a1.PLATFORM.label('PLATFORM1'),a2.PLATFORM.label('PLATFORM2'), a1.STATION.label('STATION1'),a2.STATION.label('STATION2'), a1.WATER_DEPTH.label('WATER_DEPTH1'),a2.WATER_DEPTH.label('WATER_DEPTH2')).select_from(a1).join(a2,and_( a1.StartYear==a2.StartYear, a1.StartMonth==a2.StartMonth, a1.StartDay==a2.StartDay, a1.StartHour-a2.StartHour<0.001, a1.StartHour-a2.StartHour>-0.001, a1.Lat-a2.Lat<0.001, a1.Lat-a2.Lat>-0.001, a1.Lon-a2.Lon<0.001, a1.Lon-a2.Lon>-0.001, a1.ID!=a2.ID)).filter(a1.Include==True,a2.Include==True,a1.ID-0.001, a1.Lat-a2.Lat<0.001, a1.Lat-a2.Lat>-0.001, a1.Lon-a2.Lon<0.001, a1.Lon-a2.Lon>-0.001, a1.ID!=a2.ID)).filter(a1.Include==True,a2.Include==False,a1.ID