import pandas as pd, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
ro=['CDO7252998062998','CDO5064618063001','CDO3042698063020','CDO7893378063026','CDO4604228063028','CDO7821968063031',
'CDO5072238063046','CDO4981038063054','CDO4725178063056','CDO5209078063060','CDO699718063062','CDO4894288063064',
'CDO1632508063066','CDO8765068063068','CDO9993348063070']
hu=['CDO5941998062972','CDO5285728062974','CDO3021588062978','CDO9675788062981']
p='C:/Users/csala/Onedrive - Lancaster University/Datarepo/szekelydata/klima/'
stations=pd.read_csv(p+'stations.csv')
dfs=[]
for i in hu:
df=pd.read_csv(p+'daily/raw/hu/'+i+'.txt',dtype={' FRSHTT':str,' YEARMODA':str})
dfs.append(df)
print(i)
CDO5941998062972 CDO5285728062974 CDO3021588062978 CDO9675788062981
for i in ro:
df=pd.read_csv(p+'daily/raw/ro/'+i+'.txt',dtype={' FRSHTT':str,' YEARMODA':str})
dfs.append(df)
print(i)
CDO7252998062998 CDO5064618063001 CDO3042698063020 CDO7893378063026 CDO4604228063028 CDO7821968063031 CDO5072238063046 CDO4981038063054 CDO4725178063056 CDO5209078063060 CDO699718063062 CDO4894288063064 CDO1632508063066 CDO8765068063068 CDO9993348063070
dfs=pd.concat(dfs)
year_fixer={'199710':'19971001'}
dfs['time']=pd.to_datetime(dfs[' YEARMODA'].str.strip().replace(year_fixer),format='%Y%m%d')
dfs.head()
STN--- | WBAN | YEARMODA | TEMP | DEWP | .1 | SLP | .2 | STP | ... | .5 | MXSPD | GUST | MAX | MIN | PRCP | SNDP | FRSHTT | Unnamed: 22 | time | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 127560 | 99999 | 20150101 | 24.2 | 6.0 | 16.9 | 6.0 | 1036.1 | 6.0 | 1016.1 | ... | 5.0 | 7.8 | 999.9 | 27.3 | 20.8* | 0.00I | 999.9 | 000000 | NaN | 2015-01-01 |
1 | 127560 | 99999 | 20150102 | 25.6 | 8.0 | 23.9 | 8.0 | 1030.7 | 8.0 | 1010.9 | ... | 6.0 | 7.8 | 999.9 | 31.8* | 21.2 | 0.02E | 999.9 | 000000 | NaN | 2015-01-02 |
2 | 127560 | 99999 | 20150103 | 34.4 | 8.0 | 30.0 | 8.0 | 1023.9 | 8.0 | 1004.6 | ... | 8.0 | 15.5 | 999.9 | 40.5 | 28.0 | 0.00I | 999.9 | 000000 | NaN | 2015-01-03 |
3 | 127560 | 99999 | 20150104 | 33.6 | 7.0 | 30.4 | 7.0 | 1016.6 | 7.0 | 997.3 | ... | 7.0 | 13.6 | 999.9 | 40.8 | 28.4* | 0.04A | 999.9 | 000000 | NaN | 2015-01-04 |
4 | 127560 | 99999 | 20150105 | 27.2 | 12.0 | 24.0 | 12.0 | 1022.6 | 12.0 | 1003.0 | ... | 10.0 | 11.7 | 999.9 | 37.4 | 18.3* | 0.00G | 999.9 | 000000 | NaN | 2015-01-05 |
5 rows × 24 columns
stn=151700
d=dfs[dfs['STN---']==stn]
d.set_index('time')[' TEMP'].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x2274de14f88>
for stn in dfs['STN---'].unique():
d=dfs[dfs['STN---']==stn]
d.to_csv(p+'daily/export/'+str(stn)+'.csv')
print(stn)
127560 127660 127720 127860 128050 128120 128150 128220 128250 128300 128360 128390 128430 128460 128470 128510 128600 128605 128660 128700 128820 128920 129100 129150 129200 129220 129250 129300 129350 129420 129500 129600 129700 129820 129920 128603 129320 697204 128305 128380 128400 128601 129400 119000 150001 150010 150850 150940 151240 151500 151630 151970 152000 152005 152080 152090 152150 152190 152210 152790 152850 152890 153410 153660 153890 154120 154200 154650 154790 154890 154940 150105 150000 150002 150040 150070 150100 150140 150150 150200 150230 150250 150330 150420 150440 150470 150560 150630 150690 150730 150800 150830 150880 150900 150950 150990 151070 151080 151090 151110 151130 151170 151180 151190 151200 151230 151270 151360 151380 151430 151450 151480 151540 151600 151620 151650 151680 151700 151790 151820 151840 151890 151940 151990 152040 152060 152170 152300 152310 152350 152380 152450 152470 152540 152600 152610 152620 152640 152650 152700 152770 152800 152840 152920 152960 152970 153000 153010 153020 153070 153100 153140 153150 153160 153170 153190 153200 153240 153250 153280 153330 153350 153360 153370 153380 153400 153440 153450 153460 153470 153490 153500 153600 153640 153690 153730 153750 153770 153870 153880 153950 154020 154050 154060 154080 154090 154100 154160 154190 154210 154215 154220 154240 154250 154280 154340 154440 154450 154500 154550 154600 154620 154690 154700 154750 154770 154800 154810 154820 154900 154910 154931 154980 154990 150090 150400 150410 150520 151400 152120 152410 152730 152820 152980 153550 154230 154290 150550 150890 151320 151340 151580 151740 150750 152590 152870 152990 153210 153560 154430 150320 151590 152670 153630 154760 150235 151205 151455 153355 154930 154470 154580 154510
Determine most frequent
for i in np.sort(dfs.groupby('STN---').count()['time'].sort_values(ascending=False).head(50).index):
print("'"+str(i)+"',")
'127720', '128050', '128120', '128220', '128250', '128300', '128390', '128430', '128510', '128600', '128820', '128920', '129100', '129150', '150040', '150100', '150140', '150150', '150200', '150230', '150800', '150850', '150900', '151080', '151200', '151450', '151500', '151700', '151970', '152000', '152300', '152350', '152470', '152600', '152800', '152920', '153100', '153350', '153460', '153500', '153600', '154100', '154200', '154210', '154500', '154600', '154700', '154800', '154810', '154990',