import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import datetime as dt
from salishsea_tools import evaltools as et
# read in stored station info (links station names to locations)
dir0 = '/ocean/eolson/MEOPAR/obs/WADE/ptools_data/ecology/'
sta_df = pd.read_pickle(dir0 + 'sta_df.p')
bottle_fn = dir0 + 'raw/ParkerMacCready2019CTDDataFeb2020.xlsx'
sheet_name = '2018-2019NutrientData'
sheet_chl = '2018-2019ChlaLabData'
bot = pd.read_excel(bottle_fn, sheet_name=sheet_name,engine='openpyxl')
chl = pd.read_excel(bottle_fn, sheet_name=sheet_chl, engine='openpyxl')
bot.dropna(how='all',inplace=True) # drop rows where all values are NaN
chl.dropna(how='all',inplace=True)
bot.dropna(subset=['Sampling Depth'],inplace=True) # drop rows with NaN sampling depths because nominal depths do not appear reliable
chl.dropna(subset=['Sampling Depth'],inplace=True)
# drop rows with no useful data:
bot.dropna(how='all',subset=['NH4_Lab','NO2_Lab','NO3_Lab','PO4_Lab','SiOH4_Lab'],inplace=True)
chl.dropna(how='all',subset=['Chla_Lab'],inplace=True)
# drop rows that appear to have misaligned Sampling depths
#Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep Chla_Lab Chla_QC
#2019-02-26 DNA001 12.0 30 1.136 1.0 1.9216 2.0
chl.drop(chl.loc[(chl.Date==dt.datetime(2019,2,26))&(chl.Station=='DNA001')&\
(chl.Nomdepth==30)&(chl['Sampling Depth']==1.9216)].index,inplace=True)
# also remove from chl:
# 2018-02-01 HCB010 9.0 30 10.545 sample is probably actually 30 m based on nuts
# 2018-03-09 CRR001 9.0 30 101.554 "
chl.drop(chl.loc[(chl.Date==dt.datetime(2018,2,1))&(chl.Station=='HCB010')&\
(chl.Nomdepth==30)&(chl['Sampling Depth']==10.545)].index,inplace=True)
chl.drop(chl.loc[(chl.Date==dt.datetime(2018,3,9))&(chl.Station=='CRR001')&\
(chl.Nomdepth==30)&(chl['Sampling Depth']==101.554)].index,inplace=True)
#remove from chl and nuts:
# Date:2019-10-30 Station: ADM001 all 3 values (Nom depth 0,10,30 but sampling depth all ~126.7)
chl.drop(chl.loc[(chl.Date==dt.datetime(2019,10,30))&(chl.Station=='ADM001')].index,inplace=True)
bot.drop(chl.loc[(chl.Date==dt.datetime(2019,10,30))&(chl.Station=='ADM001')].index,inplace=True)
# average over rows having identical (Date,Station,Niskin,Sampling Depth) [replicates]
# ignore Nomdepth because it seems to not always be accurate
chl2=pd.DataFrame(chl.groupby(['Date','Station','Niskin','Sampling Depth'],as_index=False).mean())
chl2
Date | Station | Niskin | Sampling Depth | CTD Cast Rep | Chla_Lab | Chla_QC | Chla_QA | Chla_SampleFieldReplicateNumber | |
---|---|---|---|---|---|---|---|---|---|
0 | 2018-01-10 | PSS019 | 9 | 31.196 | 1.0 | 0.0710 | 2.0 | 3.0 | 1.0 |
1 | 2018-01-10 | PSS019 | 11 | 11.404 | 1.0 | 0.1960 | 2.0 | 3.0 | 1.0 |
2 | 2018-01-10 | PSS019 | 12 | 1.187 | 1.0 | 0.6673 | 2.0 | 3.0 | 1.0 |
3 | 2018-01-10 | SAR003 | 9 | 31.057 | 1.0 | 0.0530 | 2.0 | 3.0 | 1.0 |
4 | 2018-01-10 | SAR003 | 11 | 11.146 | 1.0 | 0.2097 | 2.0 | 3.0 | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1787 | 2019-12-19 | NSQ002 | 9 | 30.676 | 1.0 | 0.3154 | 2.0 | 2.0 | 1.0 |
1788 | 2019-12-19 | NSQ002 | 10 | 10.871 | 1.0 | 0.3640 | 2.0 | 2.0 | 1.0 |
1789 | 2019-12-19 | NSQ002 | 12 | 1.214 | 1.0 | 0.4397 | 2.0 | 2.0 | 1.0 |
1790 | 2019-12-19 | OAK004 | 11 | 11.106 | 1.0 | 0.5765 | 2.0 | 2.0 | 2.0 |
1791 | 2019-12-19 | OAK004 | 12 | 1.189 | 1.0 | 0.5765 | 2.0 | 2.0 | 1.0 |
1792 rows × 9 columns
bot2=pd.DataFrame(bot.groupby(['Date','Station','Niskin','Sampling Depth'],as_index=False).mean())
allbot = pd.merge(left=bot2,right=chl2,how='outer',
left_on = ['Date','Station','Niskin','CTD Cast Rep','Sampling Depth'],
right_on = ['Date','Station','Niskin','CTD Cast Rep','Sampling Depth'])
print(f'len(allbot):{len(allbot)}, len(chl2):{len(chl2)}, len(bot2):{len(bot2)}')
len(allbot):2357, len(chl2):1792, len(bot2):2348
nutNoChl=len(allbot.loc[(~pd.isnull(allbot.NH4_Lab))&(pd.isnull(allbot.Chla_Lab))])
chlNoNut=len(allbot.loc[(pd.isnull(allbot.NH4_Lab))&(~pd.isnull(allbot.Chla_Lab))])
print(nutNoChl,chlNoNut)
565 9
# if these equal the total length, things are making sense:
print(len(bot2)+chlNoNut,len(chl2)+nutNoChl)
2357 2357
allbot.keys()
Index(['Date', 'Station', 'Niskin', 'Sampling Depth', 'CTD Cast Rep', 'NH4_Lab', 'NH4_QC', 'NH4_QA', 'NH4_SampleFieldReplicateNumber', 'NO2_Lab', 'NO2_QC', 'NO2_QA', 'NO2_SampleFieldReplicateNumber', 'NO3_Lab', 'NO3_QC', 'NO3_QA', 'NO3_SampleFieldReplicateNumber', 'PO4_Lab', 'PO4_QC', 'PO4_QA', 'PO4_SampleFieldReplicateNumber', 'SiOH4_Lab', 'SiOH4_QC', 'SiOH4_QA', 'SiOH4_SampleFieldReplicateNumber', 'Unnamed: 36', 'Chla_Lab', 'Chla_QC', 'Chla_QA', 'Chla_SampleFieldReplicateNumber'], dtype='object')
dfTime=pd.read_excel('/ocean/eolson/MEOPAR/obs/WADE/WDE_Data/OlsonSuchyAllen_UBC_PDR_P003790-010721.xlsx',
engine='openpyxl',sheet_name='EventDateTime')
## duplicate Station/Date entries with different times seem to be always within a couple of hours,
# so just take the first (next cell)
test=dfTime.groupby(['FlightDate','SiteCode'])['TimeDown \n(Local - PST or PDT)'].count()
for date, loc in test[test>1].index:
print(dfTime.loc[(dfTime.FlightDate==date)&(dfTime.SiteCode==loc),['FlightDate','SiteCode','TimeDown \n(Local - PST or PDT)']])
FlightDate SiteCode TimeDown \n(Local - PST or PDT) 590 2001-07-09 DNA001 17:09:00 591 2001-07-09 DNA001 17:09:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 948 2003-06-25 BUD005 17:00:00 949 2003-06-25 BUD005 17:00:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 1362 2005-08-16 BUD005 10:43:00 1363 2005-08-16 BUD005 10:43:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 1429 2005-11-07 BUD005 11:55:00 1430 2005-11-07 BUD005 11:55:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 1446 2005-12-05 BUD005 11:58:00 1447 2005-12-05 BUD005 11:58:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2104 2008-08-21 SJF000 09:59:00 2105 2008-08-21 SJF000 11:05:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2129 2008-09-29 SJF000 10:00:00 2130 2008-09-29 SJF000 10:15:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2155 2008-11-19 SJF000 10:29:00 2156 2008-11-19 SJF000 10:45:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2182 2009-03-26 SJF000 09:44:00 2183 2009-03-26 SJF000 09:57:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2208 2009-04-16 SJF000 10:04:00 2209 2009-04-16 SJF000 10:19:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2261 2009-06-10 SJF002 10:40:00 2262 2009-06-10 SJF002 11:00:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2287 2009-07-21 SJF000 10:20:00 2289 2009-07-21 SJF000 10:30:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2374 2009-10-24 SJF000 10:02:00 2375 2009-10-24 SJF000 10:15:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2383 2009-11-24 SJF000 10:09:00 2384 2009-11-24 SJF000 10:19:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2476 2010-03-16 SJF000 09:48:00 2477 2010-03-16 SJF000 09:54:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2534 2010-06-01 SJF000 09:43:00 2536 2010-06-01 SJF000 09:55:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2561 2010-06-28 SJF000 09:58:00 2562 2010-06-28 SJF000 10:09:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2596 2010-08-12 SJF000 11:07:00 2597 2010-08-12 SJF000 11:16:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2623 2010-09-15 SJF000 10:28:00 2624 2010-09-15 SJF000 10:38:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2646 2010-10-15 SJF000 10:26:00 2647 2010-10-15 SJF000 10:35:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2688 2010-11-28 SJF000 10:10:00 2689 2010-11-28 SJF000 10:20:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2756 2011-03-14 SJF000 10:12:00 2757 2011-03-14 SJF000 10:20:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2772 2011-04-18 SJF000 10:00:00 2773 2011-04-18 SJF000 10:07:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2807 2011-05-16 SJF000 09:24:00 2808 2011-05-16 SJF000 09:33:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2853 2011-07-27 SJF000 10:59:00 2854 2011-07-27 SJF000 11:13:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2879 2011-08-30 SJF000 09:30:00 2880 2011-08-30 SJF000 09:36:00 FlightDate SiteCode TimeDown \n(Local - PST or PDT) 2962 2011-12-21 SJF000 08:41:00 2963 2011-12-21 SJF000 08:48:00
# drop duplicate rows
dfTime.drop_duplicates(subset=['FlightDate','SiteCode'],keep='first',inplace=True)
print(dfTime.keys())
Index(['FlightYear', 'FlightMonth', 'FlightDate', 'SiteCode', 'Sampled', 'TimeDown \n(Local - PST or PDT)', 'FieldComment'], dtype='object')
dfTime['dtPac']=[dt.datetime.combine(idate, itime) for idate, itime \
in zip(dfTime['FlightDate'],dfTime['TimeDown \n(Local - PST or PDT)'])]
dfTime['dtUTC']=[et.pac_to_utc(ii) for ii in dfTime['dtPac']]
allbot['dtUTC']=np.nan # create column and set all values to nan
# use loop to set dtUTC values, where available (where dates are unavailable, left as NaN for now)
for ind, row in allbot.iterrows():
ix=(dfTime.FlightDate==row['Date'])&(dfTime.SiteCode==row['Station'])
if np.sum(ix)==1:
idate,itime=dfTime.loc[ix,['FlightDate','TimeDown \n(Local - PST or PDT)']].values[0]
allbot.loc[ind,['UTCDateTime']]=et.pac_to_utc(dt.datetime.combine(idate,itime))
# PROCESS STATION LOCATION INFO (based on Parker's code)
sta_fn='/ocean/eolson/MEOPAR/obs/WADE/WDE_Data/OlsonSuchyAllen_UBC_PDR_P003790-010721.xlsx'
sheetname='Site Info'
sta_df =pd.read_excel(sta_fn,engine='openpyxl',sheet_name=sheetname)
sta_df.dropna(how='any',subset=['Lat_NAD83 (deg / dec_min)','Long_NAD83 (deg / dec_min)','Station'],inplace=True)
sta_df = sta_df.set_index('Station')
# get locations in decimal degrees
for sta in sta_df.index:
lat_str = sta_df.loc[sta, 'Lat_NAD83 (deg / dec_min)']
lat_deg = float(lat_str.split()[0]) + float(lat_str.split()[1])/60
sta_df.loc[sta,'Lat'] = lat_deg
#
lon_str = sta_df.loc[sta, 'Long_NAD83 (deg / dec_min)']
lon_deg = float(lon_str.split()[0]) + float(lon_str.split()[1])/60
sta_df.loc[sta,'Lon'] = -lon_deg
sta_df.pop('Lat_NAD83 (deg / dec_min)');
sta_df.pop('Long_NAD83 (deg / dec_min)');
allbotFinal=pd.merge(left=sta_df,right=allbot,how='right',
left_on='Station',right_on='Station')
allbotFinal
Station | Desig | Descrip | Basin | *Max_Depth | Lat | Lon | Date | Niskin | Sampling Depth | ... | SiOH4_QC | SiOH4_QA | SiOH4_SampleFieldReplicateNumber | Unnamed: 36 | Chla_Lab | Chla_QC | Chla_QA | Chla_SampleFieldReplicateNumber | dtUTC | UTCDateTime | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | HCB013 | NaN | NaN | NaN | NaN | NaN | NaN | 2018-01-10 | 3 | 26.820 | ... | 2.0 | 3.0 | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | HCB013 | NaN | NaN | NaN | NaN | NaN | NaN | 2018-01-10 | 11 | 11.252 | ... | 2.0 | 3.0 | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | HCB013 | NaN | NaN | NaN | NaN | NaN | NaN | 2018-01-10 | 12 | 1.336 | ... | 2.0 | 3.0 | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | PSS019 | C | Possession Sound - Gedney Island | Whidbey Basin | 107.0 | 48.010927 | -122.301250 | 2018-01-10 | 3 | 98.380 | ... | 2.0 | 3.0 | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | 2018-01-10 20:34:00 |
4 | PSS019 | C | Possession Sound - Gedney Island | Whidbey Basin | 107.0 | 48.010927 | -122.301250 | 2018-01-10 | 9 | 31.196 | ... | 2.0 | 3.0 | 1.0 | NaN | 0.0710 | 2.0 | 3.0 | 1.0 | NaN | 2018-01-10 20:34:00 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2352 | SKG003 | C | Skagit Bay - Str. Point Red Buoy | Whidbey Basin | 25.0 | 48.296488 | -122.489605 | 2018-09-05 | 9 | 10.848 | ... | NaN | NaN | NaN | NaN | 1.2393 | 2.0 | 3.0 | 1.0 | NaN | 2018-09-05 19:48:00 |
2353 | OAK004 | C | Oakland Bay - Near Eagle Point | South Basin | 28.0 | 47.213427 | -123.077650 | 2018-12-10 | 10 | 0.880 | ... | NaN | NaN | NaN | NaN | 1.0945 | 2.0 | 3.0 | 2.0 | NaN | 2018-12-10 22:14:00 |
2354 | ELB015 | C | Elliott Bay - E. of Duwamish Head | Main Basin | 91.0 | 47.596487 | -122.369572 | 2019-03-15 | 12 | 0.897 | ... | NaN | NaN | NaN | NaN | 0.8500 | 2.0 | 2.0 | 1.0 | NaN | 2019-03-15 19:15:00 |
2355 | HCB004 | C | Hood Canal - Gt. Bend, Sisters Point | Hood Canal Basin | 53.0 | 47.356205 | -123.024873 | 2019-08-16 | 4 | 26.098 | ... | NaN | NaN | NaN | NaN | 0.7460 | 2.0 | 2.0 | 1.0 | NaN | 2019-08-16 20:47:00 |
2356 | ADM003 | C | Admiralty Inlet (south) | Admiralty Inlet | 118.0 | 47.878983 | -122.483195 | 2019-11-14 | 12 | 0.379 | ... | NaN | NaN | NaN | NaN | 0.7404 | 2.0 | 2.0 | 1.0 | NaN | 2019-11-14 20:06:00 |
2357 rows × 38 columns
np.unique(allbotFinal.loc[np.isnan(allbotFinal.Lat),['Station']])
array(['BLL040', 'HCB013'], dtype=object)
# There are two station designations that are not pressent in the station list. Drop them.
allbotFinal.dropna(how='any',subset=['Lat','Lon'],inplace=True)
# rename Sample Depth to Z
allbotFinal.rename(columns={'Sampling Depth':'Z'},inplace=True)
allbotFinal
Station | Desig | Descrip | Basin | *Max_Depth | Lat | Lon | Date | Niskin | Z | ... | SiOH4_QC | SiOH4_QA | SiOH4_SampleFieldReplicateNumber | Unnamed: 36 | Chla_Lab | Chla_QC | Chla_QA | Chla_SampleFieldReplicateNumber | dtUTC | UTCDateTime | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3 | PSS019 | C | Possession Sound - Gedney Island | Whidbey Basin | 107.0 | 48.010927 | -122.301250 | 2018-01-10 | 3 | 98.380 | ... | 2.0 | 3.0 | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | 2018-01-10 20:34:00 |
4 | PSS019 | C | Possession Sound - Gedney Island | Whidbey Basin | 107.0 | 48.010927 | -122.301250 | 2018-01-10 | 9 | 31.196 | ... | 2.0 | 3.0 | 1.0 | NaN | 0.0710 | 2.0 | 3.0 | 1.0 | NaN | 2018-01-10 20:34:00 |
5 | PSS019 | C | Possession Sound - Gedney Island | Whidbey Basin | 107.0 | 48.010927 | -122.301250 | 2018-01-10 | 11 | 11.404 | ... | 2.0 | 3.0 | 1.0 | NaN | 0.1960 | 2.0 | 3.0 | 1.0 | NaN | 2018-01-10 20:34:00 |
6 | PSS019 | C | Possession Sound - Gedney Island | Whidbey Basin | 107.0 | 48.010927 | -122.301250 | 2018-01-10 | 12 | 1.187 | ... | 2.0 | 3.0 | 1.0 | NaN | 0.6673 | 2.0 | 3.0 | 1.0 | NaN | 2018-01-10 20:34:00 |
7 | SAR003 | C | Saratoga Passage - East Point | Whidbey Basin | 123.0 | 48.107595 | -122.491542 | 2018-01-10 | 3 | 152.993 | ... | 2.0 | 3.0 | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | 2018-01-10 19:41:00 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2352 | SKG003 | C | Skagit Bay - Str. Point Red Buoy | Whidbey Basin | 25.0 | 48.296488 | -122.489605 | 2018-09-05 | 9 | 10.848 | ... | NaN | NaN | NaN | NaN | 1.2393 | 2.0 | 3.0 | 1.0 | NaN | 2018-09-05 19:48:00 |
2353 | OAK004 | C | Oakland Bay - Near Eagle Point | South Basin | 28.0 | 47.213427 | -123.077650 | 2018-12-10 | 10 | 0.880 | ... | NaN | NaN | NaN | NaN | 1.0945 | 2.0 | 3.0 | 2.0 | NaN | 2018-12-10 22:14:00 |
2354 | ELB015 | C | Elliott Bay - E. of Duwamish Head | Main Basin | 91.0 | 47.596487 | -122.369572 | 2019-03-15 | 12 | 0.897 | ... | NaN | NaN | NaN | NaN | 0.8500 | 2.0 | 2.0 | 1.0 | NaN | 2019-03-15 19:15:00 |
2355 | HCB004 | C | Hood Canal - Gt. Bend, Sisters Point | Hood Canal Basin | 53.0 | 47.356205 | -123.024873 | 2019-08-16 | 4 | 26.098 | ... | NaN | NaN | NaN | NaN | 0.7460 | 2.0 | 2.0 | 1.0 | NaN | 2019-08-16 20:47:00 |
2356 | ADM003 | C | Admiralty Inlet (south) | Admiralty Inlet | 118.0 | 47.878983 | -122.483195 | 2019-11-14 | 12 | 0.379 | ... | NaN | NaN | NaN | NaN | 0.7404 | 2.0 | 2.0 | 1.0 | NaN | 2019-11-14 20:06:00 |
2309 rows × 38 columns
fn='/ocean/eolson/MEOPAR/obs/WADE/WDE_Data/OlsonSuchyAllen_UBC_PDR_P003790-010721.xlsx'
sheetname='LabChlaPheo'
chlPheo =pd.read_excel(fn,engine='openpyxl',sheet_name=sheetname)
chlPheo.dropna(how='any',subset=['Date','Station','SamplingDepth'],inplace=True)
# average over replicates
chlPheo2=pd.DataFrame(chlPheo.groupby(['Date','Station','SamplingDepth'],as_index=False).mean())
# join to station info (lat/lon)
chlPheo3=pd.merge(left=sta_df,right=chlPheo2,how='right',
left_on='Station',right_on='Station')
# join to date/time
dfTime['dtUTC']=[et.pac_to_utc(dt.datetime.combine(idate,itime)) for idate,itime in \
zip(dfTime['FlightDate'],dfTime['TimeDown \n(Local - PST or PDT)'])]
dfTime2=dfTime.loc[:,['FlightDate','SiteCode','dtUTC']]
chlPheoFinal=pd.merge(left=chlPheo3,right=dfTime2,how='left',
left_on=['Date','Station'],right_on=['FlightDate','SiteCode'])
len(chlPheoFinal),len(chlPheo3),len(dfTime2)
(11469, 11469, 5186)
def intna(x):
try:
y=int(x)
except:
y=np.nan
return y
np.abs(140-178)/178
0.21348314606741572
nd=[intna(ii) for ii in chl['Nomdepth']]
crit=[np.abs(isd-ind)>max(5,0.3*isd) for ind, isd in zip(nd,chl['Sampling Depth'])]
bot.keys()
Index(['Date', 'Station', 'Niskin', 'Nomdepth', 'Sampling Depth', 'CTD Cast Rep', 'NH4_Lab', 'NH4_QC', 'NH4_QF', 'NH4_QA', 'NH4_Comment', 'NH4_SampleFieldReplicateNumber', 'NO2_Lab', 'NO2_QC', 'NO2_QF', 'NO2_QA', 'NO2_Comment', 'NO2_SampleFieldReplicateNumber', 'NO3_Lab', 'NO3_QC', 'NO3_QF', 'NO3_QA', 'NO3_Comment', 'NO3_SampleFieldReplicateNumber', 'PO4_Lab', 'PO4_QC', 'PO4_QF', 'PO4_QA', 'PO4_Comment', 'PO4_SampleFieldReplicateNumber', 'SiOH4_Lab', 'SiOH4_QC', 'SiOH4_QF', 'SiOH4_QA', 'SiOH4_Comment', 'SiOH4_SampleFieldReplicateNumber', 'Unnamed: 36'], dtype='object')
for i, row in chl.loc[crit].iterrows():
print(chl.loc[(chl.Date==row['Date'])&(chl.Station==row.Station),
['Date','Station','Niskin','Nomdepth','Sampling Depth',
'CTD Cast Rep','Chla_Lab']])
print(bot.loc[(bot.Date==row['Date'])&(bot.Station==row.Station),
['Date','Station','Niskin','Nomdepth','Sampling Depth',
'CTD Cast Rep','NH4_Lab','NO3_Lab']])
print('-------\n')
Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 119 2019-02-26 DNA001 12 0 1.136 1 120 2019-02-26 DNA001 10 10 9.702 1 121 2019-02-26 DNA001 12 30 1.136 1 Chla_Lab 119 2.2268 120 1.9555 121 1.9216 Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 163 2019-02-26 DNA001 12 0 1.136 1 164 2019-02-26 DNA001 10 10 9.702 1 165 2019-02-26 DNA001 4 30 29.815 1 NH4_Lab NO3_Lab 163 0.3302 27.5842 164 0.3106 27.6943 165 0.2201 27.8900 ------- Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 425 2019-05-08 SJF002 12 0 0.713 1 426 2019-05-08 SJF002 4 140 79.942 1 Chla_Lab 425 1.8423 426 0.5294 Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 531 2019-05-08 SJF002 12 0 0.713 1 532 2019-05-08 SJF002 4 140 79.942 1 533 2019-05-08 SJF002 4 140 79.942 1 534 2019-05-08 SJF002 4 140 79.942 1 NH4_Lab NO3_Lab 531 1.0515 17.6690 532 0.1721 29.6922 533 0.1963 29.6108 534 0.1745 29.4867 ------- Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 776 2019-09-11 WPA003 12 0 1.140 1 777 2019-09-11 WPA003 11 10 4.075 1 Chla_Lab 776 4.5214 777 4.2727 Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 966 2019-09-11 WPA003 12 0 1.140 1 967 2019-09-11 WPA003 11 10 4.075 1 NH4_Lab NO3_Lab 966 1.1365 3.1884 967 0.9785 2.9332 ------- Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 791 2019-09-24 ADM002 10 0 0.580 1 792 2019-09-24 ADM002 6 30 30.052 1 793 2019-09-24 ADM002 4 80 60.837 1 Chla_Lab 791 1.5883 792 0.6645 793 0.3328 Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 981 2019-09-24 ADM002 10 0 0.580 1 982 2019-09-24 ADM002 8 10 10.043 1 983 2019-09-24 ADM002 6 30 30.052 1 984 2019-09-24 ADM002 4 80 60.837 1 NH4_Lab NO3_Lab 981 0.3724 24.7083 982 0.3567 24.8101 983 0.3554 26.3724 984 0.1539 28.6972 ------- Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 895 2019-10-29 WPA008 12 0 1.295 1 896 2019-10-29 WPA008 11 10 3.239 1 Chla_Lab 895 2.9615 896 2.9050 Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 1104 2019-10-29 WPA008 12 0 1.295 1 1105 2019-10-29 WPA008 11 10 3.239 1 NH4_Lab NO3_Lab 1104 3.0141 9.0886 1105 3.0663 9.3822 ------- Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 997 2019-12-05 HCB003 12 0 1.054 1 998 2019-12-05 HCB003 11 10 10.603 1 999 2019-12-05 HCB003 9 30 123.116 1 Chla_Lab 997 0.8365 998 0.3594 999 0.0690 Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 1231 2019-12-05 HCB003 12 0 1.054 1 1232 2019-12-05 HCB003 11 10 10.603 1 1233 2019-12-05 HCB003 9 30 123.116 1 1234 2019-12-05 HCB003 3 NB 125.111 1 NH4_Lab NO3_Lab 1231 0.0 27.0123 1232 0.0 28.4125 1233 0.0 28.4254 1234 0.0 28.4172 ------- Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 1156 2018-02-07 PTH005 8 0 0.623 1 1157 2018-02-07 PTH005 6 10 10.019 1 1158 2018-02-07 PTH005 4 30 22.011 1 Chla_Lab 1156 0.4131 1157 0.3311 1158 0.1691 Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 1428 2018-02-07 PTH005 8 0 0.623 1 1429 2018-02-07 PTH005 6 10 10.019 1 1430 2018-02-07 PTH005 4 30 22.011 1 NH4_Lab NO3_Lab 1428 0.2193 24.3483 1429 0.1273 24.3710 1430 1.0114 23.6798 ------- Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 1226 2018-03-07 PTH005 8 0 0.605 1 1227 2018-03-07 PTH005 6 10 9.959 1 1228 2018-03-07 PTH005 4 30 22.752 1 Chla_Lab 1226 1.6503 1227 2.7539 1228 0.6581 Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 1509 2018-03-07 PTH005 8 0 0.605 1 1510 2018-03-07 PTH005 6 10 9.959 1 1511 2018-03-07 PTH005 4 30 22.752 1 NH4_Lab NO3_Lab 1509 0.4032 24.4072 1510 0.3268 24.3368 1511 0.8594 26.1480 ------- Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 1249 2018-03-08 HCB007 12 0 1.123 1 1250 2018-03-08 HCB007 11 10 19.809 1 Chla_Lab 1249 6.6730 1250 12.8164 Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 1537 2018-03-08 HCB007 12 0 1.123 1 1538 2018-03-08 HCB007 11 10 19.809 1 1539 2018-03-08 HCB007 3 NB 19.693 1 NH4_Lab NO3_Lab 1537 0.3574 5.0614 1538 1.8039 24.3578 1539 1.8137 24.2544 ------- Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 1362 2018-04-19 PTH005 8 0 0.708 1 1363 2018-04-19 PTH005 6 10 10.013 1 1364 2018-04-19 PTH005 4 30 20.133 1 Chla_Lab 1362 2.0857 1363 2.0553 1364 1.2048 Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 1671 2018-04-19 PTH005 8 0 0.708 1 1672 2018-04-19 PTH005 6 10 10.013 1 1673 2018-04-19 PTH005 4 30 20.133 1 NH4_Lab NO3_Lab 1671 1.2930 18.1204 1672 1.4154 17.9189 1673 1.2144 18.5835 ------- Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 1532 2018-06-05 PTH005 8 0 0.732 1 1533 2018-06-05 PTH005 6 10 10.110 1 1534 2018-06-05 PTH005 4 30 21.792 1 Chla_Lab 1532 12.6560 1533 8.6061 1534 2.0148 Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 1895 2018-06-05 PTH005 8 0 0.732 1 1896 2018-06-05 PTH005 6 10 10.110 1 1897 2018-06-05 PTH005 4 30 21.792 1 NH4_Lab NO3_Lab 1895 0.4617 5.6939 1896 1.3238 14.3760 1897 2.6031 18.3663 ------- Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 1701 2018-08-08 PTH005 8 0 0.805 1 1702 2018-08-08 PTH005 6 10 10.232 1 1703 2018-08-08 PTH005 4 30 21.665 1 Chla_Lab 1701 13.9722 1702 10.7323 1703 5.7306 Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 2146 2018-08-08 PTH005 8 0 0.805 1 2147 2018-08-08 PTH005 6 10 10.232 1 2148 2018-08-08 PTH005 4 30 21.665 1 NH4_Lab NO3_Lab 2146 0.5354 6.2933 2147 1.4481 13.6707 2148 1.8542 17.3004 ------- Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 1823 2018-09-07 PTH005 8 0 0.587 1 1824 2018-09-07 PTH005 6 10 10.196 1 1825 2018-09-07 PTH005 4 30 21.428 1 Chla_Lab 1823 8.7073 1824 4.6270 1825 2.5717 Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 2288 2018-09-07 PTH005 8 0 0.587 1 2289 2018-09-07 PTH005 6 10 10.196 1 2290 2018-09-07 PTH005 4 30 21.428 1 NH4_Lab NO3_Lab 2288 1.0510 13.3424 2289 2.3574 16.6559 2290 1.3044 14.1399 ------- Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 2086 2018-12-05 SJF001 4 140 129.765 1 2088 2018-12-05 SJF001 6 80 29.955 1 Chla_Lab 2086 0.1691 2088 0.2076 Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep \ 2611 2018-12-05 SJF001 4 140 129.765 1 2613 2018-12-05 SJF001 6 80 29.955 1 NH4_Lab NO3_Lab 2611 0.0162 27.6416 2613 0.0677 27.1655 -------
nomd=[ii if np.isfloat]
File "<ipython-input-37-c68cf9d828bd>", line 1 nomd=[ii if np.isfloat] ^ SyntaxError: invalid syntax
allbot = pd.merge(left=bot,right=chl,how='outer',
left_on = ['Date','Station','Niskin','CTD Cast Rep','Sampling Depth','NO3_SampleFieldReplicateNumber'],
right_on = ['Date','Station','Niskin','CTD Cast Rep','Sampling Depth','Chla_SampleFieldReplicateNumber'])
print(f'len(allbot):{len(allbot)}, len(chl):{len(chl)}, len(bot):{len(bot)}')
#,'Nomdepth'
bot.keys()
chl.keys()
allbot.keys()
allbot['ones']=1
allbot.loc[(~pd.isnull(allbot.NH4_Lab))&(pd.isnull(allbot.Chla_Lab)),['Date','Station','Niskin','CTD Cast Rep','Sampling Depth','NH4_Lab','Chla_Lab']]
allbot.loc[(pd.isnull(allbot.NH4_Lab))&(~pd.isnull(allbot.Chla_Lab)),['Date','Station','Niskin','CTD Cast Rep','Sampling Depth','NH4_Lab','Chla_Lab']]
bot[['Date','Station','Niskin','Sampling Depth']].head(5)
chl[['Date','Station','Niskin','Sampling Depth']].head(5)
allbot.groupby(['Date','Station','Niskin','Sampling Depth'])['ones'].count()
temp=allbot.groupby(['Date','Station','Niskin','Sampling Depth']).agg({'ones':['count']})
temp.columns = ['icount']
np.unique(temp.icount)
temp.loc[temp.icount>1]
bot.loc[(bot.Date==dt.datetime(2018,1,12))&(bot.Station=='OAK004')]
chl.loc[(chl.Date==dt.datetime(2018,1,12))&(chl.Station=='OAK004')]
temp.loc[temp.icount==2]
bot.loc[(bot.Date==dt.datetime(2019,2,26))&(bot.Station=='DNA001'),['Date','Station','Niskin','Nomdepth','Sampling Depth','NH4_Lab','NO3_Lab']]
chl.loc[(chl.Date==dt.datetime(2019,2,26))&(chl.Station=='DNA001')]
allbot.loc[(allbot.Date==dt.datetime(2019,2,26))&(allbot.Station=='DNA001'),['Date','Station','Niskin','Sampling Depth','Chla_Lab']]
#remove this entry in chl:
#Date Station Niskin Nomdepth Sampling Depth CTD Cast Rep Chla_Lab Chla_QC
#2019-02-26 DNA001 12.0 30 1.136 1.0 1.9216 2.0
allbot.loc[(allbot.Nomdepth_x!=allbot.Nomdepth_y)]