#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd, numpy as np from scipy import stats import random # In[2]: stations=pd.read_csv('data/stations.csv').set_index('ID') # Setup plot params # In[3]: import matplotlib.pyplot as plt import seaborn as sns from matplotlib.collections import PolyCollection get_ipython().run_line_magic('matplotlib', 'inline') # In[4]: import matplotlib as mpl import matplotlib.font_manager as font_manager path = 'KulimPark-Regular.ttf' path2 = 'Symbola.ttf' prop = font_manager.FontProperties(fname=path) prop2 = font_manager.FontProperties(fname=path2) # In[5]: color_ax='#E7CFBC' color_bg='#FFF4EC' color_obs_right0='#F2B880' color_obs_left0=color_ax color_pred_right0='#C98686' color_pred_left0='#966B9D' color_pred_talalt0='#59c687' color_pred_nem_talalt0='#c95498' font_size=12 s=40 obs_talalt_glyph0='★' obs_nem_talalt_glyph0='☆' pred_talalt_glyph0='✔️' pred_nem_talalt_glyph0='✖️' title_icon_right={'Temp':'☼','Wind':'🌀','Hail':'⭕️','Snow':'☃️','Snow Depth':'⛄️','Rain':'☔️','Visib':'☀️'} title_icon_left={'Temp':'✨️','Wind':'☘','Hail':'⚪️','Snow':'⚪️','Snow Depth':'⚪️','Rain':'🌂','Visib':'⛈️'} title_icon={'Temp':'♨️','Rain':'☂️','Hail':'✴️','Snow':'❄️','Snow Depth':'⛷️','Wind':'⛵️','Cloud':'☁️','Visib':'⛅️'} # In[6]: def get_data(data,th): a1=pd.DataFrame(data[data<=th]) a1['g']='left' a2=pd.DataFrame(data[data>th]) a2['g']='right' a3=pd.concat([a1,a2]) a3['x']='x' return a1,a2,a3 # In[7]: def violin_plot(data,th,ax,color_left,color_right): a=0.3 a1,a2,a3=get_data(data,th) a1_augment=True a2_augment=True if len(a1)==0: a1=a3.loc[[a3.index[0]]] a1['g']='left' a1[a1.columns[0]]=5 if len(a2)==0: a2=a3.loc[[a3.index[0]]] a2['g']='right' a2[a2.columns[0]]=5 if len(a1)>1: a1_augment=False if not a1_augment: if a1.nunique()[a1.columns[0]]==1: a1_augment=True if a1_augment: a11=a1.copy().loc[[a1.index[0]]] a11[a11.columns[0]]+=random.random()*0.1*th a11['x']='x' a12=a1.copy().loc[[a1.index[0]]] a12[a12.columns[0]]-=random.random()*0.1*th a12['x']='x' a3=pd.concat([a3,a11,a12]) if len(a2)>1: a2_augment=False if not a2_augment: if a2.nunique()[a2.columns[0]]==1: a2_augment=True if a2_augment: a21=a2.copy().loc[[a2.index[0]]] a21[a21.columns[0]]+=random.random()*0.1*th a21['x']='x' a22=a2.copy().loc[[a2.index[0]]] a22[a22.columns[0]]-=random.random()*0.1*th a22['x']='x' a3=pd.concat([a3,a21,a22]) a3=a3.sort_values('g') ax.axvline(0,color=color_ax) if a3.nunique()['g']>1: sns.violinplot(y=a1.columns[0], x='x',hue='g', data=a3, split=True, ax=ax, inner=None,linewidth=1, scale="count", saturation=1) ax.get_children()[0].set_color(mpl.colors.colorConverter.to_rgba(color_left, alpha=a)) ax.get_children()[0].set_edgecolor(color_left) ax.get_children()[1].set_color(mpl.colors.colorConverter.to_rgba(color_right, alpha=a)) ax.get_children()[1].set_edgecolor(color_right) ax.legend().remove() else: if len(a1)>0: w=a1 c=color_left else: w=a2 c=color_right sns.violinplot(y=w.columns[0], data=w, ax=ax, inner=None,linewidth=1, scale="count", saturation=1) ax.set_xlim([-1,0]) ax.get_children()[0].set_color(mpl.colors.colorConverter.to_rgba(c, alpha=a)) ax.get_children()[0].set_edgecolor(c) # In[8]: def setup_axes(): fig,axes=plt.subplots(1,3,figsize=(8,5),gridspec_kw={'width_ratios': [1, 3, 1]}) axi_top= axes[2].inset_axes([0.1, 0.65, 1, 0.3]) axi_top.axis('off') axi_bottom= axes[2].inset_axes([0.1, 0, 1, 0.5]) axi_bottom.axis('off') axes[0].axis('off') axes[1].axis('off') axes[2].axis('off') axes[0]=axes[0].inset_axes([0, 0.15, 1, 0.85]) axes[1]=axes[1].inset_axes([0, 0.15, 1, 0.85]) axes[0].axis('off') axes[1].axis('off') return fig, axes, axi_top, axi_bottom # In[9]: def stem_plot(data,ax,color,s=s): data=pd.DataFrame(data) x=data.index y=data[data.columns[0]].values for i,e in enumerate(y): ax.plot([0,e],[x[i],x[i]],color=color) ax.scatter(y,x,s,color=color,zorder=10) # In[10]: def stem2_plot(data,th,ax,color_left,color_right,s=s,axv_color=None): if axv_color==None:axv_color=color_right a1,a2,a3=get_data(data,th) stem_plot(a1,ax,color_left,s) stem_plot(a2,ax,color_right,s) ax.axvline(0,color=color_ax) #if th!=0: if True: ax.axvline(th,color=axv_color,ls='--',zorder=5) # In[11]: def icons_plot(axes,kondicio,mennyiseg,observation_th,prediction_th): ylim=axes[0].get_ylim() xlim=axes[1].get_xlim() y_max_coord=ylim[0]+(ylim[1]-ylim[0])*1.05 y_max_coord2=ylim[0]+(ylim[1]-ylim[0])*1.05 #1.04 x_icon_coord_shift=(xlim[1]-xlim[0])*0.1 axes[0].text(observation_th, y_max_coord, title_icon[kondicio], horizontalalignment='center', color=color_obs_right0, fontproperties=prop2, fontsize=font_size*1.5) axes[1].text(prediction_th, y_max_coord, title_icon[mennyiseg], horizontalalignment='center', color=color_ax, fontproperties=prop2, fontsize=font_size*1.5) axes[1].text(prediction_th+x_icon_coord_shift, y_max_coord2, title_icon_right[mennyiseg], horizontalalignment='center', color=color_pred_right, fontproperties=prop2, fontsize=font_size*1.5) axes[1].text(prediction_th-x_icon_coord_shift, y_max_coord2, title_icon_left[mennyiseg], horizontalalignment='center', color=color_pred_left, fontproperties=prop2, fontsize=font_size*1.5) # In[12]: def talalat_plot_line(axes,n_prediction_ts_good,n_prediction_ts_bad, n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt, observation_th,prediction_th): ylim=axes[0].get_ylim() xlim=axes[0].get_xlim() y_max_coord=ylim[0]+(ylim[1]-ylim[0])*(-0.07) x_icon_coord_shift=(xlim[1]-xlim[0])*0.1 x_icon_coord_shift2=(xlim[1]-xlim[0])*0.27 axes[0].text(observation_th+x_icon_coord_shift, y_max_coord, obs_talalt_glyph, horizontalalignment='center', color=color_obs_right, fontproperties=prop2) axes[0].text(observation_th-x_icon_coord_shift, y_max_coord, obs_nem_talalt_glyph, horizontalalignment='center', color=color_obs_left, fontproperties=prop2) axes[0].text(observation_th+x_icon_coord_shift2, y_max_coord, n_prediction_ts_good, horizontalalignment='center', color=color_obs_right, fontproperties=prop) axes[0].text(observation_th-x_icon_coord_shift2, y_max_coord, n_prediction_ts_bad, horizontalalignment='center', color=color_obs_left, fontproperties=prop) axes[0].text(observation_th, y_max_coord, '|', horizontalalignment='center', color=color_obs_right0, fontproperties=prop,fontsize=19) xlim=axes[1].get_xlim() x_icon_coord_shift=(xlim[1]-xlim[0])*0.04 x_icon_coord_shift2=(xlim[1]-xlim[0])*0.1 axes[1].text(prediction_th+x_icon_coord_shift, y_max_coord, pred_talalt_glyph, horizontalalignment='center', color=color_pred_talalt, fontproperties=prop2) axes[1].text(prediction_th-x_icon_coord_shift, y_max_coord, pred_nem_talalt_glyph, horizontalalignment='center', color=color_pred_nem_talalt, fontproperties=prop2) axes[1].text(prediction_th+x_icon_coord_shift2, y_max_coord, n_prediction_ts_good_talalt, horizontalalignment='center', color=color_pred_talalt, fontproperties=prop) axes[1].text(prediction_th-x_icon_coord_shift2, y_max_coord, n_prediction_ts_good_nem_talalt, horizontalalignment='center', color=color_pred_nem_talalt, fontproperties=prop) axes[1].text(prediction_th, y_max_coord, '|', horizontalalignment='center', color=color_pred_right, fontproperties=prop,fontsize=19) y_max_coord=ylim[0]+(ylim[1]-ylim[0])*(-0.14) axes[0].text(observation_th, y_max_coord, 'feltétel', horizontalalignment='center', color=color_obs_right0, fontproperties=prop) axes[1].text(prediction_th, y_max_coord, 'jóslat', horizontalalignment='center', color=color_pred_right, fontproperties=prop) y_max_coord=ylim[0]+(ylim[1]-ylim[0])*(-0.13) x_coord_shift=prediction_th+(prediction_th-xlim[0])*(-0.4) axes[1].annotate('', xy=(x_coord_shift, y_max_coord),xycoords='data',annotation_clip=False, xytext=(xlim[0], y_max_coord),arrowprops=dict(arrowstyle= '->',color=color_obs_right0)) # In[13]: def talalat_plot_violin(axes,n_prediction_ts_good,n_prediction_ts_bad,n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt): y_icon_obs=0.65 y_icon_pred=0.5 if color_obs_right==color_obs_right0: x=0.72 else: x=0.47 axes[2].text(0.72, y_icon_obs, obs_talalt_glyph, horizontalalignment='center', color=color_obs_right, fontproperties=prop2) axes[2].text(0.9, y_icon_obs,n_prediction_ts_good, horizontalalignment='center', color=color_obs_right, fontproperties=prop) axes[2].text(0.47, y_icon_obs, obs_nem_talalt_glyph, horizontalalignment='center', color=color_obs_left, fontproperties=prop2) axes[2].text(0.29, y_icon_obs, n_prediction_ts_bad, horizontalalignment='center', color=color_obs_left, fontproperties=prop) axes[2].text(0.72, y_icon_pred, pred_talalt_glyph, horizontalalignment='center', color=color_pred_talalt, fontproperties=prop2) axes[2].text(0.9, y_icon_pred, n_prediction_ts_good_talalt, horizontalalignment='center', color=color_pred_talalt, fontproperties=prop) axes[2].text(0.47, y_icon_pred, pred_nem_talalt_glyph, horizontalalignment='center', color=color_pred_nem_talalt, fontproperties=prop2) axes[2].text(0.29, y_icon_pred, n_prediction_ts_good_nem_talalt, horizontalalignment='center', color=color_pred_nem_talalt, fontproperties=prop) axes[2].annotate('', xy=(0.59, y_icon_pred*1.04),xycoords='data', xytext=(x, y_icon_obs*0.98),arrowprops=dict(arrowstyle= '->',color=color_obs_right0)) # In[14]: def talalat_plot(axes,ns,observation_th,prediction_th): n_prediction_ts_good,n_prediction_ts_bad,n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt=ns talalat_plot_line(axes,n_prediction_ts_good,n_prediction_ts_bad, n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt, observation_th,prediction_th) talalat_plot_violin(axes,n_prediction_ts_good,n_prediction_ts_bad, n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt) # In[15]: def year_plot(data,ax,k): y=data.values x=data.index ex=max(y)-min(y) text_off=abs(ex*k) text_align='left' if y[0]<0: text_off=-text_off text_align='right' ax.text(y[0]+text_off, x[0], str(int(x[0])), horizontalalignment=text_align, verticalalignment='center', color=color_ax, fontproperties=prop) text_off=abs(text_off) text_align='left' if y[-1]<0: text_off=-text_off text_align='right' ax.text(y[-1]+text_off, x[-1], str(int(x[-1])), horizontalalignment=text_align, verticalalignment='center', color=color_ax, fontproperties=prop) # In[16]: def spine_plot(datum,title,mondas,jelentes,kondicio,mennyiseg, observation_ts,observation_th,prediction_ts,prediction_th,c): #data prediction_ts_good=prediction_ts.loc[observation_ts[observation_ts>observation_th].index] prediction_ts_bad=prediction_ts.loc[observation_ts[observation_ts<=observation_th].index] n_prediction_ts_good=len(prediction_ts_good) n_prediction_ts_bad=len(prediction_ts_bad) if color_obs_right0!=color_obs_right: prediction_ts_good,prediction_ts_bad=prediction_ts_bad,prediction_ts_good prediction_ts_good_nem_talalt,prediction_ts_good_talalt,\ prediction_ts_good_joined=get_data(prediction_ts_good,prediction_th) n_prediction_ts_good_talalt=len(prediction_ts_good_talalt) n_prediction_ts_good_nem_talalt=len(prediction_ts_good_nem_talalt) ns=[n_prediction_ts_good,n_prediction_ts_bad,n_prediction_ts_good_talalt,n_prediction_ts_good_nem_talalt] #plots #fig, axes, axi_top, axi_bottom=setup_axes() #stem2_plot(observation_ts,observation_th,axes[0],color_obs_left,color_obs_right,s/2,color_obs_right0) #stem2_plot(prediction_ts_good,prediction_th,axes[1],color_pred_left,color_pred_right) #stem_plot(prediction_ts_bad,axes[1],color_ax) #violin_plot(observation_ts,observation_th,axi_top,color_obs_left,color_obs_right) #violin_plot(prediction_ts_good,prediction_th,axi_bottom,color_pred_left,color_pred_right) #icons #icons_plot(axes,kondicio,mennyiseg,observation_th,prediction_th) #talalat #talalat_plot(axes,ns,observation_th,prediction_th) #years obs_year_index=observation_data[[obs_key,'pyear']].dropna().groupby('pyear').mean().index pred_year_index=prediction_data[[pred_key,'pyear']].dropna().groupby('pyear').mean().index pred_year_index_filt=prediction_ts.loc[pred_year_index].dropna().index obs_year_index_filt=observation_ts.loc[obs_year_index].dropna().index pred_year_index2=max(min(pred_year_index_filt),min(obs_year_index_filt)) pred_year_index=range(pred_year_index2,max(pred_year_index_filt)+1) #year_plot(observation_ts.loc[obs_year_index].dropna(),axes[0],0.09) #year_plot(prediction_ts.loc[pred_year_index].dropna(),axes[1],0.03) #titles len_ratio=0.15*(-1+(len(jelentes.split(',')[0])/len(jelentes.split(',')[1]))) #fig.text(0.5+len_ratio,0.04,jelentes.split(',')[0]+',',color=color_obs_right0, # fontproperties=prop,fontsize=font_size*0.7,horizontalalignment='right') if color_pred_talalt==color_pred_talalt0: color_pred_side=color_pred_right else: color_pred_side=color_pred_left #fig.text(0.5+len_ratio,0.04,jelentes.split(',')[1],color=color_pred_side, # fontproperties=prop,fontsize=font_size*0.7,horizontalalignment='left') if n_prediction_ts_good_nem_talalt>=n_prediction_ts_good_talalt: color_title=color_pred_nem_talalt else: color_title=color_pred_talalt verdict=int(100*n_prediction_ts_good_talalt/(n_prediction_ts_good_talalt+n_prediction_ts_good_nem_talalt)) if color_pred_talalt!=color_pred_talalt0: verdict=100-verdict return_verdict=int(verdict) verdict=str(verdict)+'%' #plt.suptitle(title,y=0.11,color=color_title,fontproperties=prop,fontsize=font_size) #fig.text(0.97,0.04,verdict, fontproperties=prop, # horizontalalignment='right', color=color_title, fontsize=font_size*2, ) #fig.text(0.03,0.04, datum, fontproperties=prop, # horizontalalignment='left', color=color_obs_right0, fontsize=font_size*2, ) #plt.savefig(c+'/map/'+str(mondas)+'.png',dpi=300, facecolor=color_bg) #plt.show() return return_verdict # In[17]: def filter_data(dz,observation_range,prediction_range): dgs=[] dhs=[] for year in range(int(dz.min()['year']),int(dz.max()['year'])): k=0 from_date=pd.to_datetime(str(year)+'-'+str(observation_range[k].month)+'-'+str(observation_range[k].day)) from_pred=pd.to_datetime(str(year)+'-'+str(prediction_range[k].month)+'-'+str(prediction_range[k].day)) k=1 to_date=pd.to_datetime(str(year)+'-'+str(observation_range[k].month)+'-'+str(observation_range[k].day)) to_pred=pd.to_datetime(str(year)+'-'+str(prediction_range[k].month)+'-'+str(prediction_range[k].day)) if to_pred' in sign)): if '<' in sign: positive=False elif '>' in sign: positive=True return positive # In[20]: universal_normalize=['XTEMP','XVSB','XSPD'] def get_ts_data(data,key,sign): ts=data.groupby('year').mean()[key] if (('-' in sign) or ('+' in sign)): th=ts.mean() else: th=float(sign[1:]) if key in universal_normalize: th-=ts.mean() ts-=ts.mean() return ts,th # In[21]: def get_comp_data(observation_data,obs_key,obs_sign,prediction_data,pred_key,pred_sign): ertek_sign=True irany_sign=True observation_ts=observation_data.groupby('year').mean()[obs_key] prediction_ts=prediction_data.groupby('year').mean()[pred_key] prediction_th=observation_ts.mean() observation_ts-=observation_ts.mean() observation_th=observation_ts.min()*1.01 prediction_th-=prediction_ts.mean() prediction_ts-=prediction_ts.mean() if obs_sign=='A': if pred_sign=='A': observation_th=0 prediction_th=0 else: irany_sign=False return observation_ts,observation_th,prediction_ts,prediction_th,ertek_sign,irany_sign # In[22]: mennyiseg_key={'Temp':'XTEMP','Snow Depth':'XSD','Wind':'XSPD','Rain':'YPCP','Visib':'XVSB', 'Snow':'YSNW','Hail':'YHAL'} # In[23]: stations_to_include={'ro':[150040,151700,151450,152600,152470,150800,152300,150100,151200,152000], 'hu':[128820,128120,127720,128600,128390,128920,128430,128250,128220,128050, 129150,129420,129600,129700,129820,129920,129350,129100]} stations_to_include['huro']=stations_to_include['hu']+stations_to_include['ro'] # In[24]: def get_country(c,h='ds',plot=False): if c=='huro': hu=pd.read_csv('data/'+'hu'+'_'+h+'.csv') #daily data ro=pd.read_csv('data/'+'ro'+'_'+h+'.csv') #daily data df=pd.concat([hu,ro]) else: df=pd.read_csv('data/'+c+'_'+h+'.csv') #daily data # df=pd.read_csv('data/'+c+'_hs.csv') #high_res data df=df[df['ID'].isin(stations_to_include[c])] df['time']=pd.to_datetime(df['time']) df['year']=df['time'].dt.year df['month']=df['time'].dt.month df['day']=df['time'].dt.day df['hour']=df['time'].dt.hour df=df.set_index('time') df=df.sort_index() if plot: df.groupby('year').nunique()['ID'].plot() nepi=pd.read_excel(c+'/idojaras_'+c+'.xlsx') return df,nepi # In[25]: c='huro' df,nepi=get_country(c) # In[26]: color_pred_left=color_pred_left0 color_pred_right=color_pred_right0 mondasok=nepi['ID'].values mondasok=[1,6] shares=[] for st in df['ID'].unique(): dz=df[df['ID']==st].groupby(['time']).mean() print(st) for mondas in mondasok: try: nep=nepi.loc[mondas] if str(nep['Mennyiség'])!='nan': obs_key=mennyiseg_key[nep['Kondíció']] pred_key=mennyiseg_key[nep['Mennyiség']] observation_range=[nep['Dátum:mettől']+pd.to_timedelta('-1D'),nep['Dátum:meddig']+pd.to_timedelta('+2D')] prediction_range=[nep['Periódus:mettől'],nep['Periódus:meddig']+pd.to_timedelta('+1D')] observation_data,prediction_data=filter_data(dz,observation_range,prediction_range) #comparison if str(nep['Érték']) in ['A','B']: #print('comp',mondas) observation_ts,observation_th,prediction_ts,prediction_th,ertek_sign,irany_sign=\ get_comp_data(observation_data,obs_key,nep['Érték'],\ prediction_data,pred_key,nep['Irány']) #time series else: #print('ts',mondas) ertek_sign=get_sign(nep['Érték'],obs_key) irany_sign=get_sign(nep['Irány'],pred_key) observation_ts,observation_th=get_ts_data(observation_data,obs_key,nep['Érték']) prediction_ts,prediction_th=get_ts_data(prediction_data,pred_key,nep['Irány']) color_obs_right,color_obs_left,obs_talalt_glyph,obs_nem_talalt_glyph,\ color_pred_talalt,color_pred_nem_talalt,pred_talalt_glyph,pred_nem_talalt_glyph=\ set_direction(ertek_sign, irany_sign) #datum=str(nep['Dátums'])[:3]+'. '+str(nep['Dátum:mettől'].day) datum=nep['DS'] share=spine_plot(datum,nep['Mondás'].strip(),mondas,nep['Jelentés'].strip(),nep['Kondíció'],nep['Mennyiség'], observation_ts,observation_th,prediction_ts,prediction_th,c) shares.append({'share':share,'station':st,'mondas':mondas}) except: print ('ERROR '+st) # In[27]: dw=pd.DataFrame(shares).set_index('station').join(stations).set_index('mondas').join(nepi.set_index('ID')) # In[28]: import json namer=pd.DataFrame(json.loads(open('data/namer.json','r').read()),index=['name']).T # In[29]: dw=dw.set_index('LOC').join(namer) # In[30]: # !conda install geopandas # In[31]: # !pip install descartes # In[32]: import geopandas # In[41]: def add_basemap(ax, zoom, url='https://maps.wikimedia.org/osm-intl/{z}/{x}/{y}.png?lang=hu'): xmin, xmax, ymin, ymax = ax.axis() basemap, extent = ctx.bounds2img(xmin, ymin, xmax, ymax, zoom=zoom, url=url) ax.imshow(basemap, extent=extent, interpolation='bilinear') # restore original x/y limits ax.axis((xmin, xmax, ymin, ymax)) # In[42]: df = geopandas.read_file(geopandas.datasets.get_path('nybb')) ax = df.plot(figsize=(10, 10), alpha=0.5, edgecolor='k') # In[43]: df = df.to_crs(epsg=3857) # In[44]: # !conda install contextily # In[45]: import contextily as ctx # In[46]: ax = df.plot(figsize=(10, 10), alpha=0.5, edgecolor='k') add_basemap(ax, zoom=11) # In[47]: lat=dw['LAT'].values lon=dw['LON'].values population = dw['share'].values dist=dw['name'].values # In[48]: # scatter city data, with c reflecting population ax.scatter(lon,lat, latlon=True, c=population,s=700, cmap='YlGnBu_r', alpha=0.5) #create colorbar plt.colorbar(label=r'Population') plt.clim(300000, 4000000) # In[ ]: