In [1]:

import numpy as np  # this module handles arrays, but here we need it for its NaN value
import pandas as pd # this module contains a lot of tools for handling tabular data
import re

In [2]:

# define paths to the source files and eventual output file
pathBottle='/ocean/eolson/MEOPAR/obs/NemcekHPLC/All 2016 SoG bottle.xlsx'
pathPhyto='/ocean/eolson/MEOPAR/obs/NemcekHPLC/2015-2019 SoG abs values New ALLO.xlsx'
pathOut='/ocean/eolson/MEOPAR/obs/NemcekHPLC/bottlePhytoMerged2016_NewALLO.csv'

In [3]:

#formatting function to get year-(3digit) format
def fmtCruise(istr):
    if re.fullmatch('[0-9]{4}-[0-9]{2}',istr): 
        sp=re.split('-',istr)
        rstr=sp[0]+'-0'+sp[1]
    elif re.fullmatch('[0-9]{4}-[0-9]{3}',istr):
        rstr=istr
    else:
        raise ValueError('Input had unexpected format:',istr)
    return rstr

In [4]:

# get names of sheets in notebook
with pd.ExcelFile(pathBottle,engine='openpyxl') as xl:
    sheets=xl.sheet_names
print(sheets)

['2016-05', '2016-47', '2016-07', '2016-62', '2016-010', '2016-071']

In [5]:

# load each sheet in the 2015 bottle Excel file and concatenate them together into one table
dfbotlist=list()
for sheet in sheets:
    df0=pd.read_excel(pathBottle,sheet_name=sheet,verbose=True,engine='openpyxl',
                      na_values=(-99,-99.9)) # read each sheet; include additional na values
    df0['Cruise']=fmtCruise(sheet)  # create and populate Cruise column based on sheet name
    dfbotlist.append(df0) # append the sheet to a list
dfbot=pd.concat(dfbotlist,ignore_index=True,sort=False) # concatenate the list into a single table
# Drop columns with no data in them
l1=set(dfbot.keys())
dfbot.dropna(axis=1,how='all',inplace=True)
print('removed empty columns:',l1-set(dfbot.keys()))

Reading sheet 2016-05
Reading sheet 2016-47
Reading sheet 2016-07
Reading sheet 2016-62
Reading sheet 2016-010
Reading sheet 2016-071
removed empty columns: {'Flag:Ammonium', 'Oxygen:Dissolved.1', 'Ammonium'}

In [6]:

# list the column names in the resulting table
print(dfbot.keys())

Index(['File Name', 'Zone', 'FIL:START TIME YYYY/MM/DD HH:MM:SS',
       'LOC:EVENT_NUMBER', 'LOC:LATITUDE', 'LOC:LONGITUDE', 'LOC:WATER DEPTH',
       'ADM:SCIENTIST', 'LOC:STATION', 'Sample_Number', 'Pressure',
       'Temperature:Secondary', 'Transmissivity', 'Fluorescence:URU:Seapoint',
       'PAR', 'PAR:Reference', 'pH:SBE:Nominal', 'Salinity:T1:C1',
       'Oxygen:Dissolved:SBE', 'Temperature:Draw', 'Salinity:Bottle',
       'Flag:Salinity:Bottle', 'Chlorophyll:Extracted',
       'Flag:Chlorophyll:Extracted', 'Phaeo-Pigment:Extracted',
       'Oxygen:Dissolved', 'Flag:Oxygen:Dissolved', 'Nitrate_plus_Nitrite',
       'Flag:Nitrate_plus_Nitrite', 'Silicate', 'Flag:Silicate', 'Phosphate',
       'Flag:Phosphate', 'Cruise', 'PAR1', 'Sample_Number.1',
       'Number_of_bin_records', 'YYYY/MM/DD HH:MM:SS', 'ADM:MISSION',
       'ADM:PROJECT', 'Pressure [decibar]',
       'Temperature:Secondary [deg C (ITS90)]', 'Transmissivity [*/metre]',
       'Fluorescence:URU:Seapoint [mg/m^3]', 'PAR [uE/m^2/sec]',
       'PAR:Reference [uE/m^2/sec]', 'Salinity:T1:C1 [PSS-78]',
       'Oxygen:Dissolved:SBE [mL/L]', 'Oxygen:Dissolved:SBE [umol/kg]',
       'Salinity:Bottle [PSS-78]', 'Chlorophyll:Extracted [mg/m^3]',
       'Phaeo-Pigment:Extracted [mg/m^3]', 'Nitrate_plus_Nitrite [umol/L]',
       'Silicate [umol/L]', 'Phosphate [umol/L]', 'Bottle_Number',
       'Bottle:Firing_Sequence', 'Oxygen:Dissolved [mL/L]',
       'Oxygen:Dissolved [umol/kg]'],
      dtype='object')

In [7]:

def subval(idf,colList):
    # first value in colList should be the column you are going to keep
    # follow with other columns that will be used to fill in when that column is NaN
    # in order of precedence
    if len(colList)==2:
        idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \
                         else  r[colList[1]] for i,r in idf.iterrows()]
    elif len(colList)==3:
        idf[colList[0]]=[r[colList[0]] if not pd.isna(r[colList[0]]) \
                         else  r[colList[1]] if not pd.isna(r[colList[1]]) \
                         else r[colList[2]] for i,r in idf.iterrows()]
    else:
        raise NotImplementedError('Add to code to handle this case')
    return idf

In [8]:

# there are some duplicate columns here; handle them:
print(np.max(np.abs(dfbot['Sample_Number']-dfbot['Sample_Number.1'])),
      ': zero means no difference between Sample_Number and Sample_number.1 columns containing data')
# check there are no rows missing a value in 'Sample_Number':
print(np.sum(pd.isna(dfbot['Sample_Number'])),
      ': zero means no missing values in Sample_Number')
# -> extra copy is repeat so drop it
# 'ADM:MISSION' contains cruise id so drop it
# 'YYYY/MM/DD HH:MM:SS' data should be in 'FIL:START TIME YYYY/MM/DD HH:MM:SS'; move it there:
dfbot=subval(dfbot,('FIL:START TIME YYYY/MM/DD HH:MM:SS','YYYY/MM/DD HH:MM:SS'))
dfbot=subval(dfbot,('Pressure [decibar]','Pressure'))
dfbot=subval(dfbot,('Temperature:Secondary [deg C (ITS90)]','Temperature:Secondary'))
dfbot=subval(dfbot,('Transmissivity [*/metre]','Transmissivity'))
dfbot=subval(dfbot,('Fluorescence:URU:Seapoint [mg/m^3]','Fluorescence:URU:Seapoint'))
dfbot=subval(dfbot,('PAR [uE/m^2/sec]','PAR'))
dfbot=subval(dfbot,('PAR:Reference [uE/m^2/sec]','PAR:Reference'))
dfbot=subval(dfbot,('Salinity:T1:C1 [PSS-78]','Salinity:T1:C1'))
dfbot=subval(dfbot,('Salinity:Bottle [PSS-78]','Salinity:Bottle'))
dfbot=subval(dfbot,('Chlorophyll:Extracted [mg/m^3]','Chlorophyll:Extracted'))
dfbot=subval(dfbot,('Phaeo-Pigment:Extracted [mg/m^3]','Phaeo-Pigment:Extracted'))
dfbot=subval(dfbot,('Oxygen:Dissolved:SBE [mL/L]','Oxygen:Dissolved:SBE'))
dfbot=subval(dfbot,('Oxygen:Dissolved [mL/L]','Oxygen:Dissolved'))
dfbot=subval(dfbot,('Nitrate_plus_Nitrite [umol/L]','Nitrate_plus_Nitrite'))
dfbot=subval(dfbot,('Silicate [umol/L]','Silicate'))
dfbot=subval(dfbot,('Phosphate [umol/L]','Phosphate'))
dfbot=subval(dfbot,('PAR [uE/m^2/sec]','PAR1'))

0.0 : zero means no difference between Sample_Number and Sample_number.1 columns containing data
0 : zero means no missing values in Sample_Number

In [9]:

# drop repetetive/unecessary columns:
dfbot.drop(labels=['Sample_Number.1','ADM:MISSION','YYYY/MM/DD HH:MM:SS','Transmissivity',
                   'Pressure','Temperature:Secondary','Fluorescence:URU:Seapoint','PAR',
                   'PAR:Reference','Salinity:T1:C1','Chlorophyll:Extracted',
                   'Oxygen:Dissolved:SBE','Oxygen:Dissolved',
                   'Nitrate_plus_Nitrite','Silicate','PAR1'],axis=1,inplace=True)

In [10]:

# define a function that will be applied to the values in the index column;
# this makes it easier to drop non-data rows later
def convertIndex(val):
    try:
        x =int(val)
    except ValueError:
        x=np.nan
    return x

In [11]:

# load the 2015 phytoplankton data with the following options:
#   sheet_name='2015 CHEMTAX abs results'  -> choose the 2015 sheet
#   usecols='A:I,T:AC'   -> read only columns A:I and T:AC from the Excel sheet
#   skiprows=2     -> start reading at the 3rd row of the sheet, 
#                     which contains the column headings
#   converters={'Index': convertIndex,}   -> apply the function defined above to the Index column
#   verbose = True   -> print extra information/ warnings/ errors
dfPhyto=pd.read_excel(pathPhyto,sheet_name='2016',engine='openpyxl',
                      skiprows=0,converters={'Sample#': convertIndex,},
                      verbose=True)

Reading sheet 2016

In [12]:

# display rows 48 to 59 of the resulting table
dfPhyto[48:60]

Out[12]:

	Cruise	Date	Station	Sample#	rep	depth	Diatoms-1	Diatoms-2	Prasinophytes	Cryptophytes	Dinoflagellates-1	Haptophytes	Dictyo	Raphido	Cyanobacteria	TchlA
48	2016-007	June	14	306.0	mean	0	0	0	0.215782	0.568259	0.155735	1.02837	0	0	0.00485905	1.973
49	2016-007	June	16	318.0	mean	0	0.176595	0	0.197922	0.556587	0.129862	0.704256	0.0753036	0.0859203	0.0115539	1.938
50	2016-007	June	OKE	329.0	mean	0	0.534565	1.30459	0.310359	1.03751	0.435442	2.29758	2.14958	0.247075	0.0502969	8.367
51	2016-007	June	22	333.0	mean	0	0.748735	2.2245	0.0510914	0.270685	0.417001	0.156895	3.49932	0.0870553	0.0197189	7.475
52	Cruise	Date	Station	NaN	rep	depth	Diatoms-1	Diatoms-2	Prasinophytes	Cryptophytes	Dinoflagellates-1	Haptophytes	Dictyo	Raphido	Cyanobacteria	TchlA
53	2016-047	June	14	438.0	mean	0	1.03723	0.402856	0.0826985	0.215331	0.117579	0.224789	0.037861	0.0581045	0.0265498	2.203
54	2016-047	June	11	449.0	mean	0	0.37708	0.165123	0.087552	0.113769	0.101932	0.26103	0.0118683	0.0485396	0.0161065	1.183
55	2016-047	June	CPF1	452.0	mean	0	0.00981477	0	0.125556	0.143086	0.0616604	0.287119	0	0.0535119	0.00525178	0.686
56	2016-047	June	CPF2	455.0	mean	0	0.223128	0	0.0825152	0.324354	0.067458	0.251326	0.0179209	0.125422	0.026876	1.119
57	2016-047	June	22	466.0	mean	0	0.612099	0.124002	0.0585194	0.158803	0.0759229	0.119778	0.0561802	0.0190252	0.00866994	1.233
58	2016-047	June	24	469.0	mean	0	0.696984	0.106455	0.0752098	0.160245	0.0776968	0.182455	0.0441467	0.0226451	0.00516366	1.371
59	2016-047	June	28	472.0	mean	0	0.818837	0	0.0915953	0.174323	0.055937	0.0619514	0.0265586	0.0612878	0.0205105	1.311

In [13]:

# now, drop any rows from the table that have NaN values in either of the columns
#  'Index' or 'TchlA (ug/L)'
# This is why we applied a function to the Index column to make sure all 
#  non-numeric Index values would have a consistent NaN entry, making them easy to identify
#  and remove
dfPhyto.dropna(subset=['Sample#', 'TchlA'],how='any',inplace=True)

In [14]:

# pandas creates its own index, and after dropping rows I like to reset it -
# this is just for convenience
dfPhyto.reset_index(drop=True,inplace=True)

In [15]:

# apply formatting function all rows in Cruise column to get year-3digit format
dfPhyto['Cruise']=[fmtCruise(ii) for ii in dfPhyto['Cruise']]

In [16]:

# display part of the table, confirming that non-data rows have been removed
dfPhyto[48:60]

Out[16]:

	Cruise	Date	Station	Sample#	rep	Diatoms-1	Diatoms-2	Prasinophytes	Cryptophytes	Dinoflagellates-1	Haptophytes	Dictyo	Raphido	Cyanobacteria	TchlA
48	2016-007	June	16	318.0	mean	0.176595	0	0.197922	0.556587	0.129862	0.704256	0.0753036	0.0859203	0.0115539	1.938
49	2016-007	June	OKE	329.0	mean	0.534565	1.30459	0.310359	1.03751	0.435442	2.29758	2.14958	0.247075	0.0502969	8.367
50	2016-007	June	22	333.0	mean	0.748735	2.2245	0.0510914	0.270685	0.417001	0.156895	3.49932	0.0870553	0.0197189	7.475
51	2016-047	June	14	438.0	mean	1.03723	0.402856	0.0826985	0.215331	0.117579	0.224789	0.037861	0.0581045	0.0265498	2.203
52	2016-047	June	11	449.0	mean	0.37708	0.165123	0.087552	0.113769	0.101932	0.26103	0.0118683	0.0485396	0.0161065	1.183
53	2016-047	June	CPF1	452.0	mean	0.00981477	0	0.125556	0.143086	0.0616604	0.287119	0	0.0535119	0.00525178	0.686
54	2016-047	June	CPF2	455.0	mean	0.223128	0	0.0825152	0.324354	0.067458	0.251326	0.0179209	0.125422	0.026876	1.119
55	2016-047	June	22	466.0	mean	0.612099	0.124002	0.0585194	0.158803	0.0759229	0.119778	0.0561802	0.0190252	0.00866994	1.233
56	2016-047	June	24	469.0	mean	0.696984	0.106455	0.0752098	0.160245	0.0776968	0.182455	0.0441467	0.0226451	0.00516366	1.371
57	2016-047	June	28	472.0	mean	0.818837	0	0.0915953	0.174323	0.055937	0.0619514	0.0265586	0.0612878	0.0205105	1.311
58	2016-047	June	GE01	483.0	mean	1.71173	0	0.120347	0.210731	0.0864201	0.0260724	0.0207969	0.0199069	0	2.196
59	2016-047	June	38	486.0	mean	6.43463	0	0.186531	0.432792	0.137378	0	0	0.196669	0	7.388

In [17]:

# due to repeated column names in the original spreadsheet, '.1' was appended to the names
# of the phytoplankton columns; 
# these lines correct the column names, removing the '.1':
renameDict=dict()
for colName in dfPhyto.keys():
    if colName.endswith('.1'):
        renameDict[colName]=colName.split('.1')[0]
dfPhyto.rename(columns=renameDict,inplace=True)

In [18]:

dfPhyto

Out[18]:

	Cruise	Date	Station	Sample#	rep	depth	Diatoms-1	Diatoms-2	Prasinophytes	Cryptophytes	Dinoflagellates-1	Haptophytes	Dictyo	Raphido	Cyanobacteria	TchlA
0	2016-005	April	59	23.0	mean	0	4.50144	0.438107	0.048483	0.338791	0.0478202	0	0.00362949	0.000129969	0.0086402	5.38704
1	2016-005	April	102	37.0	mean	0	3.55538	1.73326	0.385956	1.82211	0.139308	0.186364	0.0440193	0.0767997	0	7.9432
2	2016-005	April	75	51.0	mean	0	14.7795	0.563364	0.397391	1.18036	0.075357	0.0941965	0	0	0	17.0901
3	2016-005	April	72	64.0	mean	0	0.923699	0.745832	0.337639	1.20012	0.0704119	0.0893556	0.00326812	0.0523156	0	3.42264
4	2016-005	April	69	76.0	mean	0	3.88279	0.290172	0.183778	0.531872	0.0478852	0.0609817	0	0	0	4.99748
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
115	2016-071	November	9	250.0	mean	0	0.0142285	0.082281	0.0897016	0.0562649	0.000186116	0.00849243	0.017071	0.00322275	0.00355171	0.275
116	2016-071	November	6	264.0	mean	0	0.00399836	0.104654	0.145557	0.109826	0.00047569	0.040834	0.0139237	0.00948663	0.00424517	0.433
117	2016-071	November	3	281.0	mean	0	0	0.0634224	0.0775545	0.0395764	0.0314152	0.0276703	0.0142296	0.0202238	0.00390783	0.278
118	2016-071	November	2	297.0	mean	0	0	0.0636933	0.181767	0.274402	0.0402761	0.0463697	0.0196038	0.0268882	0	0.653
119	2016-071	November	27	314.0	mean	0	0.00343804	0.112049	0.343388	0.400357	0.035914	0.0469057	0.00552154	0.0769891	0.00243813	1.027

120 rows × 16 columns

In [19]:

# This is the important step- join the two tables ('left' and 'right'), 
#  matching the cruise IDs and sample numbers
#   how='outer'  -> all rows from both the left and the right tables will be included, 
#                   even if they cannot be matched; this makes it easy to check for 
#                   unmatched data later
#   left_on  specifies the name of the column to match in the left table (dfbot) 
#   right_on specifies the name of the column to match in the right table (dfPhyto)
dfout = pd.merge(dfbot, dfPhyto,  how='outer', 
                 left_on=['Cruise','Sample_Number'], right_on = ['Cruise','Sample#'])

In [20]:

# show the column names in the resulting table
dfout.keys()

Out[20]:

Index(['File Name', 'Zone', 'FIL:START TIME YYYY/MM/DD HH:MM:SS',
       'LOC:EVENT_NUMBER', 'LOC:LATITUDE', 'LOC:LONGITUDE', 'LOC:WATER DEPTH',
       'ADM:SCIENTIST', 'LOC:STATION', 'Sample_Number', 'pH:SBE:Nominal',
       'Temperature:Draw', 'Salinity:Bottle', 'Flag:Salinity:Bottle',
       'Flag:Chlorophyll:Extracted', 'Phaeo-Pigment:Extracted',
       'Flag:Oxygen:Dissolved', 'Flag:Nitrate_plus_Nitrite', 'Flag:Silicate',
       'Phosphate', 'Flag:Phosphate', 'Cruise', 'Number_of_bin_records',
       'ADM:PROJECT', 'Pressure [decibar]',
       'Temperature:Secondary [deg C (ITS90)]', 'Transmissivity [*/metre]',
       'Fluorescence:URU:Seapoint [mg/m^3]', 'PAR [uE/m^2/sec]',
       'PAR:Reference [uE/m^2/sec]', 'Salinity:T1:C1 [PSS-78]',
       'Oxygen:Dissolved:SBE [mL/L]', 'Oxygen:Dissolved:SBE [umol/kg]',
       'Salinity:Bottle [PSS-78]', 'Chlorophyll:Extracted [mg/m^3]',
       'Phaeo-Pigment:Extracted [mg/m^3]', 'Nitrate_plus_Nitrite [umol/L]',
       'Silicate [umol/L]', 'Phosphate [umol/L]', 'Bottle_Number',
       'Bottle:Firing_Sequence', 'Oxygen:Dissolved [mL/L]',
       'Oxygen:Dissolved [umol/kg]', 'Date', 'Station', 'Sample#', 'rep',
       'depth', 'Diatoms-1', 'Diatoms-2', 'Prasinophytes', 'Cryptophytes',
       'Dinoflagellates-1', 'Haptophytes', 'Dictyo', 'Raphido',
       'Cyanobacteria', 'TchlA'],
      dtype='object')

Checks¶

In [21]:

# Identify cases where phytoplankton data were matched to multiple samples in bottle data:
dftest=pd.merge(dfbot, dfPhyto,how='right', left_on=['Cruise','Sample_Number'],right_on = ['Cruise','Sample#'])

In [22]:

temp=dftest.groupby(['Cruise','Sample#']).agg({'Cruise':['count']})
temp.columns = ['icount']

In [23]:

np.unique(temp.icount)

Out[23]:

array([1])

In [24]:

# check for Phyto samples matched to multiple bottle samples:
temp.loc[temp.icount>1]

Out[24]:

		icount
Cruise	Sample#

In [25]:

# check for phyto samples not matched to bottle samples:
temp.loc[temp.icount==0]

Out[25]:

		icount
Cruise	Sample#

In [26]:

temp2=dfout.groupby(['Cruise','Sample_Number']).agg({'Cruise':['count']})
temp2.columns = ['icount']
# this will catch phyto matched to multiple bottle but also bottle with duplicate sample numbers per cruise:
temp2.loc[temp2.icount>1]

Out[26]:

		icount
Cruise	Sample_Number

In [27]:

# check for phyto samples not matched to bottle samples:
temp.loc[temp.icount==0]

Out[27]:

		icount
Cruise	Sample#

In [28]:

# if the output table is longer than either of the input tables, some columns were not matched
len(dfout), len(dfPhyto), len(dfbot)

Out[28]:

(1420, 120, 1420)

In [29]:

# Check that the number of cells with data in the 'Cyanobacteria' column is 
#  the same for the input and output tables to show that no rows are missing:
np.sum(dfPhyto['Cyanobacteria']>=0), np.sum(dfout['Cyanobacteria']>=0)

Out[29]:

(120, 120)

In [30]:

# If there were data rows from the phytoplankton table that were not matched to 
#  rows from the bottle table, their indices from the phytoplankton table would be 
#  displayed below (the series [] would not be empty)
print(dfout.loc[dfout['ADM:SCIENTIST'].isna()]['Sample#'])

Series([], Name: Sample#, dtype: float64)

In [31]:

# drop repetetive/unecessary columns:
dfout.drop(labels=['Date', 'Station', 'Sample#', 'rep',
                   'depth',],axis=1,inplace=True)

In [32]:

# truncate phyto group values to 3 decimal places:
for col in ('Cyanobacteria', 'Prasinophytes', 'Cryptophytes', 'Diatoms-1',
       'Diatoms-2', 'Dinoflagellates-1', 'Haptophytes', 'Dictyo', 'Raphido',
       'TchlA'):
    dfout[col]=[np.round(ii,decimals=3) for ii in dfout[col]] # use list comprehension to set values for entire column

In [33]:

dfout['Cyanobacteria']

Out[33]:

0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
        ...  
1415      NaN
1416      NaN
1417      NaN
1418    0.002
1419      NaN
Name: Cyanobacteria, Length: 1420, dtype: float64

In [34]:

# Check for unexpected depths
dfout.loc[((dfout['Pressure [decibar]']>10)|(dfout['Depth [metres]']>10))&(dfout['Diatoms-1']>=0)]#,
#         ['FIL:START TIME YYYY/MM/DD HH:MM:SS','Pressure [decibar]','Depth [metres]','Diatoms-1']]

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/anaconda3/envs/py39/lib/python3.9/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2897             try:
-> 2898                 return self._engine.get_loc(casted_key)
   2899             except KeyError as err:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'Depth [metres]'

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
<ipython-input-34-98a40a30babe> in <module>
      1 # Check for unexpected depths
----> 2 dfout.loc[((dfout['Pressure [decibar]']>10)|(dfout['Depth [metres]']>10))&(dfout['Diatoms-1']>=0)]#,
      3 #         ['FIL:START TIME YYYY/MM/DD HH:MM:SS','Pressure [decibar]','Depth [metres]','Diatoms-1']]

~/anaconda3/envs/py39/lib/python3.9/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2904             if self.columns.nlevels > 1:
   2905                 return self._getitem_multilevel(key)
-> 2906             indexer = self.columns.get_loc(key)
   2907             if is_integer(indexer):
   2908                 indexer = [indexer]

~/anaconda3/envs/py39/lib/python3.9/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2898                 return self._engine.get_loc(casted_key)
   2899             except KeyError as err:
-> 2900                 raise KeyError(key) from err
   2901 
   2902         if tolerance is not None:

KeyError: 'Depth [metres]'

In [ ]:

# now write the output table to a .csv file:
dfout.to_csv(pathOut, index=False)

In [ ]:

dfout.keys()

In [ ]: