import pandas as pd, numpy as np
from scipy import stats
import random
stations=pd.read_csv('data/stations.csv').set_index('ID')
Setup plot params
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.collections import PolyCollection
%matplotlib inline
import matplotlib as mpl
import matplotlib.font_manager as font_manager
path = 'KulimPark-Regular.ttf'
path2 = 'Symbola.ttf'
prop = font_manager.FontProperties(fname=path)
prop2 = font_manager.FontProperties(fname=path2)
color_ax='#B6B6B6'
color_bg='#ECACAC'
color_obs_right0='#F2B880'
color_obs_left0='#E7CFBC'
color_pred_right0='#BB8B8B'
color_pred_left0='#966B9D'
color_pred_talalt0='#59c687'
color_pred_nem_talalt0='#c95498'
font_size=12
s=40
obs_talalt_glyph0='★'
obs_nem_talalt_glyph0='☆'
pred_talalt_glyph0='✔️'
pred_nem_talalt_glyph0='✖️'
title_icon_right={'Temp':'☼','Wind':'🌀','Hail':'⭕️','Snow':'☃️','Snow Depth':'⛄️','Rain':'☔️','Visib':'☀️'}
title_icon_left={'Temp':'✨️','Wind':'☘','Hail':'⚪️','Snow':'⚪️','Snow Depth':'⚪️','Rain':'🌂','Visib':'⛈️'}
title_icon={'Temp':'♨️','Rain':'☂️','Hail':'✴️','Snow':'❄️','Snow Depth':'⛷️','Wind':'⛵️','Cloud':'☁️','Visib':'⛅️'}
def get_data(data,th):
a1=pd.DataFrame(data[data<=th])
a1['g']='left'
a2=pd.DataFrame(data[data>th])
a2['g']='right'
a3=pd.concat([a1,a2])
a3['x']='x'
return a1,a2,a3
def violin_plot(data,th,ax,color_left,color_right):
a=0.3
a1,a2,a3=get_data(data,th)
a1_augment=True
a2_augment=True
if len(a1)==0:
a1=a3.loc[[a3.index[0]]]
a1['g']='left'
a1[a1.columns[0]]=5
if len(a2)==0:
a2=a3.loc[[a3.index[0]]]
a2['g']='right'
a2[a2.columns[0]]=5
if len(a1)>1: a1_augment=False
if not a1_augment:
if a1.nunique()[a1.columns[0]]==1:
a1_augment=True
if a1_augment:
a11=a1.copy().loc[[a1.index[0]]]
a11[a11.columns[0]]+=random.random()*0.1*th
a11['x']='x'
a12=a1.copy().loc[[a1.index[0]]]
a12[a12.columns[0]]-=random.random()*0.1*th
a12['x']='x'
a3=pd.concat([a3,a11,a12])
if len(a2)>1: a2_augment=False
if not a2_augment:
if a2.nunique()[a2.columns[0]]==1:
a2_augment=True
if a2_augment:
a21=a2.copy().loc[[a2.index[0]]]
a21[a21.columns[0]]+=random.random()*0.1*th
a21['x']='x'
a22=a2.copy().loc[[a2.index[0]]]
a22[a22.columns[0]]-=random.random()*0.1*th
a22['x']='x'
a3=pd.concat([a3,a21,a22])
a3=a3.sort_values('g')
ax.axvline(0,color=color_ax)
if a3.nunique()['g']>1:
sns.violinplot(y=a1.columns[0], x='x',hue='g', data=a3, split=True, ax=ax,
inner=None,linewidth=1, scale="count", saturation=1)
ax.get_children()[0].set_color(mpl.colors.colorConverter.to_rgba(color_left, alpha=a))
ax.get_children()[0].set_edgecolor(color_left)
ax.get_children()[1].set_color(mpl.colors.colorConverter.to_rgba(color_right, alpha=a))
ax.get_children()[1].set_edgecolor(color_right)
ax.legend().remove()
else:
if len(a1)>0:
w=a1
c=color_left
else:
w=a2
c=color_right
sns.violinplot(y=w.columns[0], data=w, ax=ax,
inner=None,linewidth=1, scale="count", saturation=1)
ax.set_xlim([-1,0])
ax.get_children()[0].set_color(mpl.colors.colorConverter.to_rgba(c, alpha=a))
ax.get_children()[0].set_edgecolor(c)
def setup_axes():
fig,axes=plt.subplots(1,3,figsize=(8,10),gridspec_kw={'width_ratios': [1, 3, 1]})
axi_top= axes[2].inset_axes([0.1, 0.65, 1, 0.3])
axi_top.axis('off')
axi_bottom= axes[2].inset_axes([0.1, 0, 1, 0.5])
axi_bottom.axis('off')
axes[0].axis('off')
axes[1].axis('off')
axes[2].axis('off')
axes[0]=axes[0].inset_axes([0, 0.15, 1, 0.85])
axes[1]=axes[1].inset_axes([0, 0.15, 1, 0.85])
axes[0].axis('off')
axes[1].axis('off')
return fig, axes, axi_top, axi_bottom
def stem_plot(data,ax,color,s=s):
data=pd.DataFrame(data)
x=data.index
y=data[data.columns[0]].values
for i,e in enumerate(y):
ax.plot([0,e],[x[i],x[i]],color=color)
ax.scatter(y,x,s,color=color,zorder=10)
def stem2_plot(data,th,ax,color_left,color_right,s=s,axv_color=None):
if axv_color==None:axv_color=color_right
a1,a2,a3=get_data(data,th)
stem_plot(a1,ax,color_left,s)
stem_plot(a2,ax,color_right,s)
ax.axvline(0,color=color_ax)
#if th!=0:
if True:
ax.axvline(th,color=axv_color,ls='--',zorder=5)
def icons_plot(axes,kondicio,mennyiseg,observation_th,prediction_th):
ylim=axes[0].get_ylim()
xlim=axes[1].get_xlim()
y_max_coord=ylim[0]+(ylim[1]-ylim[0])*1.05
y_max_coord2=ylim[0]+(ylim[1]-ylim[0])*1.05 #1.04
x_icon_coord_shift=(xlim[1]-xlim[0])*0.1
axes[0].text(observation_th, y_max_coord, title_icon[kondicio],
horizontalalignment='center', color=color_obs_right0, fontproperties=prop2, fontsize=font_size*1.5)
axes[1].text(prediction_th, y_max_coord, title_icon[mennyiseg],
horizontalalignment='center', color=color_ax, fontproperties=prop2, fontsize=font_size*1.5)
axes[1].text(prediction_th+x_icon_coord_shift, y_max_coord2, title_icon_right[mennyiseg],
horizontalalignment='center', color=color_pred_right, fontproperties=prop2, fontsize=font_size*1.5)
axes[1].text(prediction_th-x_icon_coord_shift, y_max_coord2, title_icon_left[mennyiseg],
horizontalalignment='center', color=color_pred_left, fontproperties=prop2, fontsize=font_size*1.5)
def talalat_plot_line(axes,n_prediction_ts_good,n_prediction_ts_bad,
n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt,
observation_th,prediction_th):
ylim=axes[0].get_ylim()
xlim=axes[0].get_xlim()
y_max_coord=ylim[0]+(ylim[1]-ylim[0])*(-0.07)
x_icon_coord_shift=(xlim[1]-xlim[0])*0.1
x_icon_coord_shift2=(xlim[1]-xlim[0])*0.27
axes[0].text(observation_th+x_icon_coord_shift, y_max_coord, obs_talalt_glyph,
horizontalalignment='center', color=color_obs_right, fontproperties=prop2)
axes[0].text(observation_th-x_icon_coord_shift, y_max_coord, obs_nem_talalt_glyph,
horizontalalignment='center', color=color_obs_left, fontproperties=prop2)
axes[0].text(observation_th+x_icon_coord_shift2, y_max_coord, n_prediction_ts_good,
horizontalalignment='center', color=color_obs_right, fontproperties=prop)
axes[0].text(observation_th-x_icon_coord_shift2, y_max_coord, n_prediction_ts_bad,
horizontalalignment='center', color=color_obs_left, fontproperties=prop)
axes[0].text(observation_th, y_max_coord, '|',
horizontalalignment='center', color=color_obs_right0, fontproperties=prop,fontsize=19)
xlim=axes[1].get_xlim()
x_icon_coord_shift=(xlim[1]-xlim[0])*0.04
x_icon_coord_shift2=(xlim[1]-xlim[0])*0.1
axes[1].text(prediction_th+x_icon_coord_shift, y_max_coord, pred_talalt_glyph,
horizontalalignment='center', color=color_pred_talalt, fontproperties=prop2)
axes[1].text(prediction_th-x_icon_coord_shift, y_max_coord, pred_nem_talalt_glyph,
horizontalalignment='center', color=color_pred_nem_talalt, fontproperties=prop2)
axes[1].text(prediction_th+x_icon_coord_shift2, y_max_coord, n_prediction_ts_good_talalt,
horizontalalignment='center', color=color_pred_talalt, fontproperties=prop)
axes[1].text(prediction_th-x_icon_coord_shift2, y_max_coord, n_prediction_ts_good_nem_talalt,
horizontalalignment='center', color=color_pred_nem_talalt, fontproperties=prop)
axes[1].text(prediction_th, y_max_coord, '|',
horizontalalignment='center', color=color_pred_right, fontproperties=prop,fontsize=19)
y_max_coord=ylim[0]+(ylim[1]-ylim[0])*(-0.14)
axes[0].text(observation_th, y_max_coord, 'feltétel',
horizontalalignment='center', color=color_obs_right0, fontproperties=prop)
axes[1].text(prediction_th, y_max_coord, 'jóslat',
horizontalalignment='center', color=color_pred_right, fontproperties=prop)
y_max_coord=ylim[0]+(ylim[1]-ylim[0])*(-0.13)
x_coord_shift=prediction_th+(prediction_th-xlim[0])*(-0.4)
axes[1].annotate('', xy=(x_coord_shift, y_max_coord),xycoords='data',annotation_clip=False,
xytext=(xlim[0], y_max_coord),arrowprops=dict(arrowstyle= '->',color=color_obs_right0))
def talalat_plot_violin(axes,n_prediction_ts_good,n_prediction_ts_bad,n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt):
y_icon_obs=0.65
y_icon_pred=0.5
if color_obs_right==color_obs_right0: x=0.72
else: x=0.47
axes[2].text(0.72, y_icon_obs, obs_talalt_glyph,
horizontalalignment='center', color=color_obs_right, fontproperties=prop2)
axes[2].text(0.9, y_icon_obs,n_prediction_ts_good,
horizontalalignment='center', color=color_obs_right, fontproperties=prop)
axes[2].text(0.47, y_icon_obs, obs_nem_talalt_glyph,
horizontalalignment='center', color=color_obs_left, fontproperties=prop2)
axes[2].text(0.29, y_icon_obs, n_prediction_ts_bad,
horizontalalignment='center', color=color_obs_left, fontproperties=prop)
axes[2].text(0.72, y_icon_pred, pred_talalt_glyph,
horizontalalignment='center', color=color_pred_talalt, fontproperties=prop2)
axes[2].text(0.9, y_icon_pred, n_prediction_ts_good_talalt,
horizontalalignment='center', color=color_pred_talalt, fontproperties=prop)
axes[2].text(0.47, y_icon_pred, pred_nem_talalt_glyph,
horizontalalignment='center', color=color_pred_nem_talalt, fontproperties=prop2)
axes[2].text(0.29, y_icon_pred, n_prediction_ts_good_nem_talalt,
horizontalalignment='center', color=color_pred_nem_talalt, fontproperties=prop)
axes[2].annotate('', xy=(0.59, y_icon_pred*1.04),xycoords='data',
xytext=(x, y_icon_obs*0.98),arrowprops=dict(arrowstyle= '->',color=color_obs_right0))
def talalat_plot(axes,ns,observation_th,prediction_th):
n_prediction_ts_good,n_prediction_ts_bad,n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt=ns
talalat_plot_line(axes,n_prediction_ts_good,n_prediction_ts_bad,
n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt,
observation_th,prediction_th)
talalat_plot_violin(axes,n_prediction_ts_good,n_prediction_ts_bad,
n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt)
def year_plot(data,ax,k):
y=data.values
x=data.index
ex=max(y)-min(y)
text_off=abs(ex*k)
text_align='left'
if y[0]<0:
text_off=-text_off
text_align='right'
ax.text(y[0]+text_off, x[0], str(int(x[0])),
horizontalalignment=text_align, verticalalignment='center',
color=color_ax, fontproperties=prop)
text_off=abs(text_off)
text_align='left'
if y[-1]<0:
text_off=-text_off
text_align='right'
ax.text(y[-1]+text_off, x[-1], str(int(x[-1])),
horizontalalignment=text_align, verticalalignment='center',
color=color_ax, fontproperties=prop)
def spine_plot(datum,title,mondas,jelentes,kondicio,mennyiseg,
observation_ts,observation_th,prediction_ts,prediction_th,c):
#data
prediction_ts_good=prediction_ts.loc[observation_ts[observation_ts>observation_th].index]
prediction_ts_bad=prediction_ts.loc[observation_ts[observation_ts<=observation_th].index]
n_prediction_ts_good=len(prediction_ts_good)
n_prediction_ts_bad=len(prediction_ts_bad)
if color_obs_right0!=color_obs_right:
prediction_ts_good,prediction_ts_bad=prediction_ts_bad,prediction_ts_good
prediction_ts_good_nem_talalt,prediction_ts_good_talalt,\
prediction_ts_good_joined=get_data(prediction_ts_good,prediction_th)
n_prediction_ts_good_talalt=len(prediction_ts_good_talalt)
n_prediction_ts_good_nem_talalt=len(prediction_ts_good_nem_talalt)
ns=[n_prediction_ts_good,n_prediction_ts_bad,n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt]
#plots
fig, axes, axi_top, axi_bottom=setup_axes()
#stem2_plot(observation_ts,observation_th,axes[0],color_obs_left,color_obs_right,s/2,color_obs_right0)
stem2_plot(prediction_ts_good,prediction_th,axes[1],color_pred_left,color_pred_right)
stem_plot(prediction_ts_bad,axes[1],color_ax)
#violin_plot(observation_ts,observation_th,axi_top,color_obs_left,color_obs_right)
#violin_plot(prediction_ts_good,prediction_th,axi_bottom,color_pred_left,color_pred_right)
#icons
#icons_plot(axes,kondicio,mennyiseg,observation_th,prediction_th)
#talalat
#talalat_plot(axes,ns,observation_th,prediction_th)
#years
obs_year_index=observation_data[[obs_key,'pyear']].dropna().groupby('pyear').mean().index
pred_year_index=prediction_data[[pred_key,'pyear']].dropna().groupby('pyear').mean().index
pred_year_index_filt=prediction_ts.loc[pred_year_index].dropna().index
obs_year_index_filt=observation_ts.loc[obs_year_index].dropna().index
pred_year_index2=max(min(pred_year_index_filt),min(obs_year_index_filt))
pred_year_index=range(pred_year_index2,max(pred_year_index_filt)+1)
#year_plot(observation_ts.loc[obs_year_index].dropna(),axes[0],0.09)
year_plot(prediction_ts.loc[pred_year_index].dropna(),axes[1],0.03)
#titles
len_ratio=0.15*(-1+(len(jelentes.split(',')[0])/len(jelentes.split(',')[1])))
#fig.text(0.5+len_ratio,0.04,jelentes.split(',')[0]+',',color=color_obs_right0,
# fontproperties=prop,fontsize=font_size*0.7,horizontalalignment='right')
if color_pred_talalt==color_pred_talalt0: color_pred_side=color_pred_right
else: color_pred_side=color_pred_left
#fig.text(0.5+len_ratio,0.04,jelentes.split(',')[1],color=color_pred_side,
# fontproperties=prop,fontsize=font_size*0.7,horizontalalignment='left')
if n_prediction_ts_good_nem_talalt>=n_prediction_ts_good_talalt:
color_title=color_pred_nem_talalt
else:
color_title=color_pred_talalt
verdict=int(100*n_prediction_ts_good_talalt/(n_prediction_ts_good_talalt+n_prediction_ts_good_nem_talalt))
if color_pred_talalt!=color_pred_talalt0: verdict=100-verdict
return_verdict=int(verdict)
verdict=str(verdict)+'%'
#plt.suptitle(title,y=0.11,color=color_title,fontproperties=prop,fontsize=font_size)
#fig.text(0.97,0.04,verdict, fontproperties=prop,
# horizontalalignment='right', color=color_title, fontsize=font_size*2, )
#fig.text(0.03,0.04, datum, fontproperties=prop,
# horizontalalignment='left', color=color_obs_right0, fontsize=font_size*2, )
plt.savefig(c+'/'+str(mondas)+'_banner.png',dpi=1500, facecolor='#E6E6E6')
plt.show()
return return_verdict
def filter_data(dz,observation_range,prediction_range):
dgs=[]
dhs=[]
for year in range(int(dz.min()['year']),int(dz.max()['year'])):
k=0
from_date=pd.to_datetime(str(year)+'-'+str(observation_range[k].month)+'-'+str(observation_range[k].day))
from_pred=pd.to_datetime(str(year)+'-'+str(prediction_range[k].month)+'-'+str(prediction_range[k].day))
k=1
to_date=pd.to_datetime(str(year)+'-'+str(observation_range[k].month)+'-'+str(observation_range[k].day))
to_pred=pd.to_datetime(str(year)+'-'+str(prediction_range[k].month)+'-'+str(prediction_range[k].day))
if to_pred<to_date:
to_pred+=pd.to_timedelta('1Y')
dg=dz.loc[from_date:]
dg=dg[:to_date]
dg['pyear']=year
dgs.append(dg)
dh=dz.loc[from_pred:]
dh=dh[:to_pred]
dh['pyear']=year
dhs.append(dh)
return pd.concat(dgs),pd.concat(dhs)
def set_direction(kondicio, mennyiseg):
if kondicio:
color_obs_right=color_obs_right0
color_obs_left=color_obs_left0
obs_talalt_glyph='★'
obs_nem_talalt_glyph='☆'
else:
color_obs_right=color_obs_left0
color_obs_left=color_obs_right0
obs_talalt_glyph='☆'
obs_nem_talalt_glyph='★'
if mennyiseg:
color_pred_talalt=color_pred_talalt0
color_pred_nem_talalt=color_pred_nem_talalt0
pred_talalt_glyph='✔️'
pred_nem_talalt_glyph='✖️'
else:
color_pred_talalt=color_pred_nem_talalt0
color_pred_nem_talalt=color_pred_talalt0
pred_talalt_glyph='✖️'
pred_nem_talalt_glyph='✔️'
return color_obs_right,color_obs_left,obs_talalt_glyph,obs_nem_talalt_glyph,\
color_pred_talalt,color_pred_nem_talalt,pred_talalt_glyph,pred_nem_talalt_glyph
def get_sign(sign,key):
positive=True
if (('-' in sign) or ('+' in sign)):
if sign=='-':
positive=False
elif sign=='+':
positive=True
elif (('<' in sign) or ('>' in sign)):
if '<' in sign:
positive=False
elif '>' in sign:
positive=True
return positive
universal_normalize=['XTEMP','XVSB','XSPD']
def get_ts_data(data,key,sign):
ts=data.groupby('year').mean()[key]
if (('-' in sign) or ('+' in sign)):
th=ts.mean()
else:
th=float(sign[1:])
if key in universal_normalize:
th-=ts.mean()
ts-=ts.mean()
return ts,th
def get_comp_data(observation_data,obs_key,obs_sign,prediction_data,pred_key,pred_sign):
ertek_sign=True
irany_sign=True
observation_ts=observation_data.groupby('year').mean()[obs_key]
prediction_ts=prediction_data.groupby('year').mean()[pred_key]
prediction_th=observation_ts.mean()
observation_ts-=observation_ts.mean()
observation_th=observation_ts.min()*1.01
prediction_th-=prediction_ts.mean()
prediction_ts-=prediction_ts.mean()
if obs_sign=='A':
if pred_sign=='A':
observation_th=0
prediction_th=0
else:
irany_sign=False
return observation_ts,observation_th,prediction_ts,prediction_th,ertek_sign,irany_sign
mennyiseg_key={'Temp':'XTEMP','Snow Depth':'XSD','Wind':'XSPD','Rain':'YPCP','Visib':'XVSB',
'Snow':'YSNW','Hail':'YHAL'}
stations_to_include={'ro':[150040,151700,151450,152600,152470,150800,152300,150100,151200,152000],
'hu':[128820,128120,127720,128600,128390,128920,128430,128250,128220,128050,
129150,129420,129600,129700,129820,129920,129350,129100]}
stations_to_include['huro']=stations_to_include['hu']+stations_to_include['ro']
def get_country(c,h='ds',plot=False):
if c=='huro':
hu=pd.read_csv('data/'+'hu'+'_'+h+'.csv') #daily data
ro=pd.read_csv('data/'+'ro'+'_'+h+'.csv') #daily data
df=pd.concat([hu,ro])
else:
df=pd.read_csv('data/'+c+'_'+h+'.csv') #daily data
# df=pd.read_csv('data/'+c+'_hs.csv') #high_res data
df=df[df['ID'].isin(stations_to_include[c])]
df['time']=pd.to_datetime(df['time'])
df['year']=df['time'].dt.year
df['month']=df['time'].dt.month
df['day']=df['time'].dt.day
df['hour']=df['time'].dt.hour
df=df.set_index('time')
df=df.sort_index()
if plot: df.groupby('year').nunique()['ID'].plot()
nepi=pd.read_excel(c+'/idojaras_'+c+'.xlsx')
return df,nepi
c='ro'
df,nepi=get_country(c)
dz=df.groupby(['time']).mean()
color_pred_left=color_pred_left0
color_pred_right=color_pred_right0
mondasok=nepi['ID'].values
shares=[]
mondasok=[14]
for mondas in mondasok:
nep=nepi.loc[mondas]
if str(nep['Mennyiség'])!='nan':
obs_key=mennyiseg_key[nep['Kondíció']]
pred_key=mennyiseg_key[nep['Mennyiség']]
observation_range=[nep['Dátum:mettől']+pd.to_timedelta('-1D'),nep['Dátum:meddig']+pd.to_timedelta('+2D')]
prediction_range=[nep['Periódus:mettől'],nep['Periódus:meddig']+pd.to_timedelta('+1D')]
observation_data,prediction_data=filter_data(dz,observation_range,prediction_range)
#comparison
if str(nep['Érték']) in ['A','B']:
print('comp',mondas)
observation_ts,observation_th,prediction_ts,prediction_th,ertek_sign,irany_sign=\
get_comp_data(observation_data,obs_key,nep['Érték'],\
prediction_data,pred_key,nep['Irány'])
#time series
else:
print('ts',mondas)
ertek_sign=get_sign(nep['Érték'],obs_key)
irany_sign=get_sign(nep['Irány'],pred_key)
observation_ts,observation_th=get_ts_data(observation_data,obs_key,nep['Érték'])
prediction_ts,prediction_th=get_ts_data(prediction_data,pred_key,nep['Irány'])
color_obs_right,color_obs_left,obs_talalt_glyph,obs_nem_talalt_glyph,\
color_pred_talalt,color_pred_nem_talalt,pred_talalt_glyph,pred_nem_talalt_glyph=\
set_direction(ertek_sign, irany_sign)
#datum=str(nep['Dátums'])[:3]+'. '+str(nep['Dátum:mettől'].day)
datum=nep['DS']
shares.append(spine_plot(datum,nep['Mondás'].strip(),mondas,nep['Jelentés'].strip(),nep['Kondíció'],nep['Mennyiség'],
observation_ts,observation_th,prediction_ts,prediction_th,c))
print(np.mean(shares))
ts 14
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:42: FutureWarning: Passing list-likes to .loc or [] with any missing label will raise KeyError in the future, you can use .reindex() as an alternative. See the documentation here: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
72.0