Notebook
## load sources files, do conversions, make CSV def subval(idf,colList): # first value in colList should be the column you are going to keep # follow with other columns that will be used to fill in when that column is NaN # in order of precedence if len(colList)==2: idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \ else r[colList[1]] for i,r in idf.iterrows()] elif len(colList)==3: idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \ else r[colList[1]] if not pd.isna(r[colList[1]]) \ else r[colList[2]] for i,r in idf.iterrows()] else: raise NotImplementedError('Add to code to handle this case') idf.drop(columns=list(colList[1:]),inplace=True) return idf # define paths to the source files and eventual output file flist=('/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2015_NewALLO.csv', '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2016_NewALLO.csv', '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2017_NewALLO.csv', '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2018_NewALLO.csv', '/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2019.csv') dfs=list() for fname in flist: dfs.append(pd.read_csv(fname)) df=pd.concat(dfs,ignore_index=True,sort=False); # concatenate the list into a single table df.drop(labels=['ADM:MISSION','ADM:PROJECT','ADM:SCIENTIST','Zone','Zone.1','Temperature:Draw', 'Temperature:Draw [deg C (ITS90)]','Bottle:Firing_Sequence','Comments by sample_numbeR', 'File Name','LOC:EVENT_NUMBER','Number_of_bin_records' ],axis=1,inplace=True) df=subval(df,('Chlorophyll:Extracted [mg/m^3]','Chlorophyll:Extracted')) df=subval(df,('Fluorescence [mg/m^3]','Fluorescence:URU:Seapoint [mg/m^3]','Fluorescence:URU:Seapoint')) df=subval(df,('Lat','LOC:LATITUDE')) df=subval(df,('Lon','LOC:LONGITUDE')) df=subval(df,('Nitrate_plus_Nitrite [umol/L]','Nitrate_plus_Nitrite')) df=subval(df,('PAR [uE/m^2/sec]','PAR')) df=subval(df,('Phaeo-Pigment:Extracted [mg/m^3]','Phaeo-Pigment:Extracted')) df=subval(df,('Phosphate [umol/L]','Phosphate')) df=subval(df,('Pressure [decibar]','Pressure')) df=subval(df,('Salinity','Salinity [PSS-78]','Salinity:T1:C1 [PSS-78]')) df=subval(df,('Salinity:Bottle','Salinity:Bottle [PSS-78]')) df=subval(df,('Silicate [umol/L]','Silicate')) df=subval(df,('Temperature','Temperature [deg C (ITS90)]','Temperature:Secondary [deg C (ITS90)]')) df=subval(df,('Transmissivity [*/metre]','Transmissivity')) df['Z']=np.where(pd.isna(df['Depth [metres]']), -1*gsw.z_from_p(df['Pressure [decibar]'].values,df['Lat'].values), df['Depth [metres]']) df['p']=np.where(pd.isna(df['Pressure [decibar]']), gsw.p_from_z(-1*df['Depth [metres]'].values,df['Lat'].values), df['Pressure [decibar]']) df['SA']=gsw.SA_from_SP(df['Salinity'].values,df['p'].values,df['Lon'].values,df['Lat'].values) df['CT']=gsw.CT_from_t(df['SA'].values,df['Temperature'].values,df['p'].values) df.rename({'TchlA':'TchlA (ug/L)','Raphido':'Raphidophytes','Dinoflagellates-1':'Dinoflagellates', 'Dictyo':'Dictyochophytes'},axis=1, inplace=True, errors='raise') df['dtUTC']=[dt.datetime.strptime(ii,'%Y-%m-%d %H:%M:%S') if isinstance(ii,str) else np.nan \ for ii in df['FIL:START TIME YYYY/MM/DD HH:MM:SS'] ] df.drop(columns=['FIL:START TIME YYYY/MM/DD HH:MM:SS', 'LOC:STATION','LOC:WATER DEPTH','Oxygen:Dissolved:CTD', 'pH:SBE:Nominal', 'Salinity:Bottle','Flag:Salinity:Bottle', 'Flag:Chlorophyll:Extracted', 'Flag:Nitrate_plus_Nitrite', 'Flag:Silicate', 'Flag:Phosphate','Cruise', 'Oxygen:Dissolved', 'Flag:Oxygen:Dissolved', 'Transmissivity [*/metre]','PAR [uE/m^2/sec]', 'PAR:Reference [uE/m^2/sec]', 'Oxygen:Dissolved:SBE [mL/L]', 'Oxygen:Dissolved:SBE [umol/kg]','Temperature', 'Salinity', 'Phaeo-Pigment:Extracted [mg/m^3]','Oxygen:Dissolved [mL/L]', 'Oxygen:Dissolved [umol/kg]', 'Depth [metres]','Phosphate [umol/L]','Fluorescence [mg/m^3]','Oxygen:Dissolved:CTD [mL/L]', 'Oxygen:Dissolved:CTD [umol/kg]','Alkalinity:Total [umol/L]','Flag:Alkalinity:Total', 'Carbon:Dissolved:Inorganic [umol/kg]','Flag:Carbon:Dissolved:Inorganic', 'Bottle_Number', 'Pressure [decibar]','Depth:Nominal [metres]','Conductivity [S/m]', 'LOC:ALTIMETER (M)', 'ADM:PLATFORM', 'LOC:GEOGRAPHIC AREA', 'FIL:DATA DESCRIPTION', 'Transmissivity:Green [*/metre]', 'Date', ],inplace=True) df.dropna(how='any',subset=['dtUTC','Lat','Lon','Diatoms-1'],inplace=True) # get model indices: PATH= '/results2/SalishSea/nowcast-green.201905/' flen=1 filemap={'nitrate':'ptrc_T','silicon':'ptrc_T','ammonium':'ptrc_T','diatoms':'ptrc_T','ciliates':'ptrc_T', 'flagellates':'ptrc_T','vosaline':'grid_T','votemper':'grid_T'} fdict={'ptrc_T':1,'grid_T':1} namfmt='nowcast' data=et.matchData(df,filemap=filemap,fdict=fdict,mod_nam_fmt=namfmt,mod_basedir=PATH,mod_flen=flen) df2=data.loc[:,['dtUTC','Lat', 'Lon','Z', 'p','i','j','k', 'Sample_Number','Diatoms-1', 'Diatoms-2', 'Prasinophytes', 'Cryptophytes', 'Dinoflagellates', 'Haptophytes', 'Dictyochophytes', 'Raphidophytes', 'Cyanobacteria', 'TchlA (ug/L)', 'Chlorophyll:Extracted [mg/m^3]', 'Nitrate_plus_Nitrite [umol/L]', 'Silicate [umol/L]', 'Fluorescence:URU [mg/m^3]', 'SA', 'CT' ]] df2.to_csv('/data/eolson/results/MEOPAR/oldDBs/HPLCPhyto.csv',index=False)
for i, el in df.loc[:,['dtUTC']].iterrows(): print(i,el['dtUTC']) dt.datetime.strptime(el['dtUTC'],'%Y-%m-%d %H:%M:%S') df.dropna(how='any',axis=0,subset=['dtUTC','Lat','Lon'],inplace=True)