import pandas as pd, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
p='E:/Onedrive - Lancaster University/Datarepo/szekelydata/klima/'
stations=pd.read_csv(p+'stations.csv')
stations=stations.set_index('ID')
stations.to_csv('data/stations.csv')
0: NoCloud 1: Cloud 4: Fog 6: Rain 7: Snow 11: Hail
weather={
2:1,
3:7,
5:6,
8:6,
9:6,
100:0,
101:0,
102:0,
103:0,
104:0,
105:0,
106:0,
107:0,
108:0,
109:0,
110:4,
111:4,
112:4,
113:1,
114:6,
115:6,
116:6,
117:6,
118:6,
119:1,
120:7,
121:6,
122:7,
123:6,
124:11,
125:6,
126:7,
127:11,
128:4,
129:6,
130:1,
131:1,
132:1,
133:1,
134:1,
135:1,
136:7,
137:7,
138:7,
139:7,
140:4,
141:4,
142:4,
143:4,
144:4,
145:4,
146:4,
147:4,
148:4,
149:4,
150:6,
151:6,
152:6,
153:6,
154:6,
155:6,
156:6,
157:6,
158:6,
159:6,
160:6,
161:6,
162:6,
163:6,
164:6,
165:6,
166:11,
167:11,
168:6,
169:6,
170:7,
171:7,
172:7,
173:7,
174:7,
175:7,
176:7,
177:7,
178:7,
179:7,
180:6,
181:6,
182:6,
183:7,
184:7,
185:7,
186:7,
187:7,
188:7,
189:11,
190:11,
191:6,
192:6,
193:7,
194:7,
195:6,
196:11,
197:6,
198:1,
199:11}
measures=['XTEMP','XSPD','XPCP','XSD','XVSB','YFOG','YPCP','YSNW','YHAL']
ycolumns={1:'YCLD',4:'YFOG',6:'YPCP',7:'YSNW',10:'YCLR',11:'YHAL'}
def load_data(stn,d='high_res',p=p,stations=stations,verbose=True):
if verbose:
print('loading...',stn,stations.loc[int(stn)]['LOC'],d)
df=pd.read_csv(p+'/'+d+'/export/'+stn+'.csv',dtype={' FRSHTT':str})
df.columns=[i.strip() for i in df.columns]
df['time']=pd.to_datetime(df['time'])
df['XTEMP']=(pd.to_numeric(df['TEMP'], errors='coerce').replace(9999.9,np.nan)-32)*5/9 #Fahrenheit to Celsiu
if d=='high_res':
df['XSPD']=pd.to_numeric(df['SPD'], errors='coerce')*1.61 #MPH to Km/h
df['XVSB']=(pd.to_numeric(df['VSB'], errors='coerce')*1.61).apply(lambda x: min(x,10)) #miles to Km, max VSB=20Km
df['XPCP']=pd.to_numeric(df['PCP06'], errors='coerce')*25.4 #inch to mm
df['XSD']=pd.to_numeric(df['SD'], errors='coerce')*25.4 #inch to mm
df['PCP01']=pd.to_numeric(df['PCP01'], errors='coerce')
df['PCP06']=pd.to_numeric(df['PCP06'], errors='coerce')
df['PCP24']=pd.to_numeric(df['PCP24'], errors='coerce')
df['PCPXX']=pd.to_numeric(df['PCPXX'], errors='coerce')
df['PCP06'].loc[~df['PCP06'].isnull()] = 6
df['PCPXX'].loc[~df['PCPXX'].isnull()] = 6
df['PCP01'].loc[~df['PCP01'].isnull()] = 6
df['PCP24'].loc[~df['PCP24'].isnull()] = 6
df['AW']=pd.to_numeric(df['AW'], errors='coerce')+100
df['MW']=pd.to_numeric(df['MW'], errors='coerce')+100
df['W']=pd.to_numeric(df['W'], errors='coerce')
dz=df[['PCP01','PCP06','PCP24','PCPXX','AW','MW','W']]
df['W']=dz.ffill(axis=1)['W'].replace(weather).replace(0,10)
dz=df.groupby(['time','W']).count()['TEMP'].unstack().fillna(0)
dz.columns=[ycolumns[i] for i in dz.columns]
df=df.set_index('time').join(dz).reset_index()
else:
df['year']=df['time'].dt.year
df['month']=df['time'].dt.month
df['day']=df['time'].dt.day
df['hour']=df['time'].dt.hour
df['XSPD']=pd.to_numeric(df['WDSP'], errors='coerce').replace(999.9,np.nan)*1.85 #knots to Km/h
df['XVSB']=(pd.to_numeric(df['VISIB'], errors='coerce').replace(999.9,np.nan)*1.61).apply(lambda x: min(x,10)) #miles to Km, max VSB=20Km
df['XPCP']=pd.to_numeric(df['PRCP'].str[:-1], errors='coerce').replace(99.99,np.nan)*25.4 #inch to mm
df['XSD']=pd.to_numeric(df['SNDP'], errors='coerce').replace(999.9,np.nan)*25.4 #inch to mm
df['YFOG']=pd.to_numeric(df['FRSHTT'].str[0], errors='coerce')
df['YPCP']=pd.to_numeric(df['FRSHTT'].str[1], errors='coerce')
df['YSNW']=pd.to_numeric(df['FRSHTT'].str[2], errors='coerce')
df['YHAL']=pd.to_numeric(df['FRSHTT'].str[3], errors='coerce')
for m in measures:
if m not in df.columns:
df[m]=np.nan
return df[['time','year','month','day','hour']+measures].set_index('time')
# stn='154200' #aurel vlaicu
stn='151700' #mciuc
daily=load_data(stn,'daily')
hires=load_data(stn,'high_res')
loading... 151700 MIERCUREA CIUC daily loading... 151700 MIERCUREA CIUC high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,22,23,26) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)): C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py:205: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy self._setitem_with_indexer(indexer, value)
#keep only days with at least 4 readings (usually at 6,12,18,24) to false means
filt=hires.groupby(['year','month','day'])[['hour']].nunique()
filt2=filt[filt>=4].dropna().reset_index()
filt3=pd.to_datetime(dict(year=filt2['year'], month=filt2['month'], day=filt2['day'])).values
def comparison_getter(measure,daily=daily,hires=hires,filt3=filt3):
if type(measure)!=list:
measure=[measure]
d=daily[measure]
h=hires.groupby(['year','month','day']).mean()[measure]
ymeasures=[m for m in measure if 'Y' in m]
h[ymeasures]=h[ymeasures][h[ymeasures]==0].fillna(1)
h=h.reset_index()
h['time']=pd.to_datetime(dict(year=h['year'], month=h['month'], day=h['day'])).values
h=h.set_index('time')[measure]
h=h.loc[filt3]
return d,h
def comaprison_plotter(measure,daily=daily,hires=hires,filt3=filt3):
d,h=comparison_getter(measure,daily,hires,filt3)
d.columns=['d']
h.columns=['h']
x=h.join(d,how='outer').dropna()
x['diff']=(x['h']-x['d'])
fig,ax=plt.subplots(1,3,figsize=(15,4))
x['diff'].plot(ax=ax[0],title='diff')
x['h'].plot(ax=ax[1],title='high res')
x['d'].plot(ax=ax[2],title='daily')
return x,d,h
comaprison_plotter('XTEMP');
comaprison_plotter('XSPD');
x,d,h=comaprison_plotter('XPCP')
x,d,h=comaprison_plotter('XSD');
x,d,h=comaprison_plotter('XVSB');
x,d,h=comaprison_plotter('YSNW');
x,d,h=comaprison_plotter('YPCP');
x,d,h=comaprison_plotter('YHAL');
hu=['127720',
'128050',
'128120',
'128220',
'128250',
'128300',
'128390',
'128430',
'128510',
'128600',
'128820',
'128920',
'129100',
'129150',
'129420',
'129600',
'129700',
'129820',
'129920',
'129350']
ro=['150040',
'150100',
'150140',
'150150',
'150200',
'150230',
'150800',
'150850',
'150900',
'151080',
'151200',
'151450',
'151500',
'151700',
'151970',
'152000',
'152300',
'152350',
'152470',
'152600',
'152800',
'152920',
'153100',
'153350',
'153460',
'153500',
'153600',
'154100',
'154200',
'154210',
'154500',
'154600',
'154700',
'154800',
'154810',
'154990']
hs=[]
ds=[]
for stn in ro:
try:
d_ok=True
daily=load_data(stn,'daily')
except:
d_ok=False
print('failed')
try:
h_ok=True
hires=load_data(stn,'high_res')
except:
h_ok=False
print('failed')
#keep only days with at least 4 readings (usually at 6,12,18,24) to false means
filt=hires.groupby(['year','month','day'])[['hour']].nunique()
filt2=filt[filt>=4].dropna().reset_index()
filt3=pd.to_datetime(dict(year=filt2['year'], month=filt2['month'], day=filt2['day'])).values
d,h=comparison_getter(measures,daily,hires,filt3)
d['ID']=stn
h['ID']=stn
if d_ok: ds.append(d)
if h_ok: hs.append(h)
ds=pd.concat(ds)
hs=pd.concat(hs)
loading... 150040 SIGHETUL MARMATIEI daily loading... 150040 SIGHETUL MARMATIEI high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (22) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)): C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:21: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
loading... 150100 SATU MARE daily loading... 150100 SATU MARE high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,12,13,22,23) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 150140 TAUTII MAGHERAUS daily loading... 150140 TAUTII MAGHERAUS high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,12,13) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 150150 OCNA SUGATAG daily loading... 150150 OCNA SUGATAG high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (22,23) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 150200 BOTOSANI daily loading... 150200 BOTOSANI high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (22,24,26) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 150230 STEFAN CEL MARE daily loading... 150230 STEFAN CEL MARE high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,7,13,22,25) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 150800 ORADEA daily loading... 150800 ORADEA high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,13,22) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 150850 BISTRITA daily loading... 150850 BISTRITA high_res loading... 150900 IASI daily loading... 150900 IASI high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,12,22,23) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 151080 CEAHLAU TOACA daily loading... 151080 CEAHLAU TOACA high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,12,22) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 151200 CLUJ NAPOCA daily loading... 151200 CLUJ NAPOCA high_res loading... 151450 VIDRASAU daily loading... 151450 VIDRASAU high_res loading... 151500 BACAU daily loading... 151500 BACAU high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,12,23) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 151700 MIERCUREA CIUC daily loading... 151700 MIERCUREA CIUC high_res loading... 151970 BARLAD daily loading... 151970 BARLAD high_res loading... 152000 ARAD daily loading... 152000 ARAD high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,22,23) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 152300 DEVA daily loading... 152300 DEVA high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (22,23,24,26) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 152350 FAGARAS daily loading... 152350 FAGARAS high_res loading... 152470 TRAIAN VUIA daily loading... 152470 TRAIAN VUIA high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,13,22,23,25) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 152600 SIBIU daily loading... 152600 SIBIU high_res loading... 152800 VARFU OMU daily loading... 152800 VARFU OMU high_res loading... 152920 CARANSEBES daily loading... 152920 CARANSEBES high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (22,24) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 153100 GALATI daily loading... 153100 GALATI high_res loading... 153350 CATALOI daily loading... 153350 CATALOI high_res loading... 153460 RAMNICU VALCEA daily loading... 153460 RAMNICU VALCEA high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,22,23,24,26) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 153500 BUZAU daily loading... 153500 BUZAU high_res loading... 153600 SULINA daily loading... 153600 SULINA high_res loading... 154100 DROBETA TURNU SEVERIN daily loading... 154100 DROBETA TURNU SEVERIN high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (12,22,23,24,26) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 154200 AUREL VLAICU daily loading... 154200 AUREL VLAICU high_res loading... 154210 HENRI COANDA daily loading... 154210 HENRI COANDA high_res loading... 154500 CRAIOVA daily loading... 154500 CRAIOVA high_res loading... 154600 CALARASI daily loading... 154600 CALARASI high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,22,24,26) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 154700 ROSIORII DE VEDE daily loading... 154700 ROSIORII DE VEDE high_res loading... 154800 CONSTANTA daily loading... 154800 CONSTANTA high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (12,22,24,26) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 154810 MIHAIL KOGALNICEANU daily loading... 154810 MIHAIL KOGALNICEANU high_res loading... 154990 MANGALIA daily loading... 154990 MANGALIA high_res
ds.to_csv('data/ro_ds.csv')
hs.to_csv('data/ro_hs.csv')
hs=[]
ds=[]
for stn in hu:
try:
d_ok=True
daily=load_data(stn,'daily')
except:
d_ok=False
print('failed')
try:
h_ok=True
hires=load_data(stn,'high_res')
except:
h_ok=False
print('failed')
#keep only days with at least 4 readings (usually at 6,12,18,24) to false means
filt=hires.groupby(['year','month','day'])[['hour']].nunique()
filt2=filt[filt>=4].dropna().reset_index()
filt3=pd.to_datetime(dict(year=filt2['year'], month=filt2['month'], day=filt2['day'])).values
d,h=comparison_getter(measures,daily,hires,filt3)
d['ID']=stn
h['ID']=stn
if d_ok: ds.append(d)
if h_ok: hs.append(h)
ds=pd.concat(ds)
hs=pd.concat(hs)
loading... 127720 MISKOLC daily loading... 127720 MISKOLC high_res
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:21: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
loading... 128050 SOPRON daily loading... 128050 SOPRON high_res loading... 128120 SZOMBATHELY ARPT / VAS daily loading... 128120 SZOMBATHELY ARPT / VAS high_res loading... 128220 GYOR daily loading... 128220 GYOR high_res loading... 128250 PAPA daily loading... 128250 PAPA high_res loading... 128300 VESZPREM/SZENTKIRALYSZABADJA daily loading... 128300 VESZPREM/SZENTKIRALYSZABADJA high_res loading... 128390 FERIHEGY daily loading... 128390 FERIHEGY high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,12,13,22,23,25) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 128430 BUDAPEST/PESTSZENTLORINC daily loading... 128430 BUDAPEST/PESTSZENTLORINC high_res loading... 128510 KEKESTETO daily loading... 128510 KEKESTETO high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (26) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 128600 SZOLNOK daily loading... 128600 SZOLNOK high_res loading... 128820 DEBRECEN daily loading... 128820 DEBRECEN high_res loading... 128920 NYIREGYHAZA daily loading... 128920 NYIREGYHAZA high_res loading... 129100 SZENTGOTTHARD/FARKASFA daily loading... 129100 SZENTGOTTHARD/FARKASFA high_res loading... 129150 ZALAEGERSZEG/ANDRASHIDA daily loading... 129150 ZALAEGERSZEG/ANDRASHIDA high_res loading... 129420 PECS SOUTH daily loading... 129420 PECS SOUTH high_res loading... 129600 BAJA daily loading... 129600 BAJA high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (22,23,26) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
loading... 129700 KECSKEMET daily loading... 129700 KECSKEMET high_res loading... 129820 SZEGED (AUT) daily loading... 129820 SZEGED (AUT) high_res loading... 129920 BEKESCSABA daily loading... 129920 BEKESCSABA high_res loading... 129350 SIOFOK daily loading... 129350 SIOFOK high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,12,13,21,22,23,24,26) have mixed types. Specify dtype option on import or set low_memory=False. if (await self.run_code(code, result, async_=asy)):
ds.to_csv('data/hu_ds.csv')
hs.to_csv('data/hu_hs.csv')