#!/usr/bin/env python # coding: utf-8 # ## OBS Data Analysis from "Zu nah?" project at TH Wildau # based on open data ressources only # # created by Simon Metzler at Radprofessur Wildau # # * project page https://innohub13.de/wir-forschen/zu-nah/ # * page of Radprofessur https://www.th-wildau.de/radverkehr # --- # ### Index: # * [import python packages](#import) # * [parse data](#parse) # * [Geo overview](#geo) # * [pre processing data](#pre) # * [plot histogram](#histo) # * [plot time based analysis](#time) # --- # import python packages # In[1]: import geopandas as gpd import pandas as pd # In[2]: import glob from tqdm import tqdm import matplotlib.pyplot as plt # In[3]: #from matplotlib_scalebar.scalebar import ScaleBar import contextily as ctx # In[4]: import seaborn as sns import matplotlib.ticker as ticker # --- # parse data # In[5]: # manual downlaod from export-tab (automatic download is possible yet not encouraged) #https://obs.adfc-brandenburg.de/ #https://obs.adfc-bw.de/ #https://obs.radentscheid-essen.de/ #https://portal.openbikesensor.org/ #https://obs.adfc-darmstadt.de/ # Stand 07.11.22 files = glob.glob(r'portale\*.geojson') len(files) # In[6]: gdf =gpd.GeoDataFrame() for f in tqdm(files): gdf_this = gpd.read_file(f, crs='EPSG:4326') portal_name=f.split('_')[-1].replace('.geojson','') gdf_this['portal']= portal_name gdf=gdf.append(gdf_this) print (portal_name) print (len(gdf_this)) # In[7]: gdf # --- # Geo overview # In[8]: gdf.plot() # In[9]: fig, ax = plt.subplots(figsize=(10, 10)) gdf.plot(ax=ax) bland = gpd.read_file("https://raw.githubusercontent.com/isellsoap/deutschlandGeoJSON/main/2_bundeslaender/1_sehr_hoch.geo.json") bland.boundary.plot(ax=ax, color='gray') # In[10]: bbb=bland[bland.name.isin(['Brandenburg','Berlin'])]#.dissolve() bbb.plot() # In[11]: # clip geografically all meassueres from bbb gdf_clipped=gdf.clip(bbb) ## alternativly filter by portal e.g. #gdf_filtered=gdf[gdf.portal=='bbb'] # In[12]: gdf_clipped # In[13]: gdf_clipped.portal.value_counts() # #### BRANDENBURG # In[14]: # Function def makeLayeredMap(*args): """This function accepts an arbitrary number of geodataframes, plots them on top of a Contextily basemap. NOTE: Please edit the Plotting-section to specify parameters for the number of layers and the formatting of each layer. Output: Saved file and layered map for display.""" # Convert the CRS for all layers to EPSG3857 to match Contextily args = list(map(lambda x: x.to_crs(epsg=3857), args)) # Create figure fig, ax = plt.subplots(1, figsize=(15, 15)) #Set aspect to equal ax.set_aspect('equal') # PLOTTING: Specify layers to plot how to format each layer (colours, transparency, etc.): # Layer 1: args[0].plot(ax=ax,facecolor="grey", alpha=0.3, figsize=(15,15)) args[0].plot(ax=ax,facecolor="none", alpha=1, edgecolor='black', linewidths=1, figsize=(15,15)) # Layer 2: args[1].plot(ax=ax, color='#0069B4', alpha=0.3, zorder=2, markersize=5) #ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron) ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron, reset_extent=False, zoom = 10) # Turn off axis ax.axis('off') #add overall title ax.text(x=0.5, y=0.955, s='Measured overtaking events', fontsize=28, weight='bold', ha='center', va='bottom', transform=ax.transAxes) ax.text(x=0.5, y=0.925, s="OBS-Data: ADFC Brandenburg : (n=" + str(len(args[1])) + ", 2022-11-07)", fontsize=15, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes) layered_map = plt.show() #fig.savefig("plots\OBS_analysis_map_bbb_80dpi.png", dpi=80) return(layered_map) # In[15]: makeLayeredMap(bland[bland.name.isin(['Brandenburg','Berlin'])],gdf_clipped) # #### DEUTSCHLAND # In[16]: # Function def makeLayeredMap(*args): """This function accepts an arbitrary number of geodataframes, plots them on top of a Contextily basemap. NOTE: Please edit the Plotting-section to specify parameters for the number of layers and the formatting of each layer. Output: Saved file and layered map for display.""" # Convert the CRS for all layers to EPSG3857 to match Contextily args = list(map(lambda x: x.to_crs(epsg=3857), args)) # Create figure fig, ax = plt.subplots(1, figsize=(15, 15)) #Set aspect to equal ax.set_aspect('equal') # PLOTTING: Specify layers to plot how to format each layer (colours, transparency, etc.): # Layer 1: args[0].plot(ax=ax,facecolor="grey", alpha=0.3, figsize=(15,15)) args[0].plot(ax=ax,facecolor="none", alpha=1, edgecolor='black', linewidths=1, figsize=(15,15)) # Layer 2: args[1].plot(ax=ax, color='#0069B4', alpha=0.3, zorder=2, markersize=5) # Contextily basemap: #ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron) ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron, reset_extent=False, zoom = 10) # Turn off axis ax.axis('off') ax.text(x=0.5, y=0.955, s='Measured overtaking events', fontsize=28, weight='bold', ha='center', va='bottom', transform=ax.transAxes) ax.text(x=0.5, y=0.92, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(len(args[1])) + ", 2022-11-07)", fontsize=13, alpha=0.85, ha='center', va='bottom', transform=ax.transAxes) layered_map = plt.show() #fig.savefig("plots\OBS_analysis_map_Deutschland_80dpi.png", dpi=80) return(layered_map) # In[17]: makeLayeredMap(bland,gdf) # --- # pre processing # # In[18]: # from m in cm gdf['distance_overtaker']=gdf['distance_overtaker']*100 gdf['distance_stationary']=gdf['distance_stationary']*100 # In[19]: #adding bins of distance_overtaker #50cm bins = [0,50,100,150,200,250,400] labels = ['<50','50-100','100-150','150-200','200-250','>250'] gdf['distance_overtaker_binned'] = pd.cut(gdf['distance_overtaker'], bins=bins, labels=labels) #25cm bins = list(range(0,260,25)) bins.append(400) labels = ['<25','25-50','50-75','75-100','100-125','125-150','150-175','175-200','200-225','225-250','>250'] gdf['distance_overtaker_binned_25'] = pd.cut(gdf['distance_overtaker'], bins=bins, labels=labels) # In[20]: # remove all datapoints with meassuerements below 15cm to clean wrong messurements due to ultrasonic sensor issues gdf=gdf[~(gdf.distance_overtaker<15)&~(gdf.distance_stationary<15)].copy() # --- # plot histogram # In[21]: histo_bbb=gdf[gdf.portal=='bbb'].groupby('distance_overtaker_binned_25')['distance_overtaker'].count().reset_index().rename(columns={'distance_overtaker':'counts'}) histo_bbb # In[22]: sns.set(rc={'figure.figsize':(10,6)}) sns.set_context("talk") x_ax='distance_overtaker_binned_25' fig, ax =plt.subplots( figsize=(12,8), ) ### add boxplot left ax1= sns.barplot( data=histo_bbb, x=x_ax, y='counts', hue=x_ax, palette='RdYlGn',# palette='Spectral', dodge=False, ax=ax) ax1.set_ylabel('Number of overtaking events') ax1.set_xlabel('Overtaking distance [cm]') ax1.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.0f}'.format(x) )) ax1.legend_.remove() ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30) ax.text(x=0.5, y=1.15, s='Distribution of overtaking distances', fontsize=28, weight='bold', ha='center', va='bottom', transform=ax.transAxes) #ax.text(x=0.5, y=1.07, s="OBS-Data: ADFC Brandenburg: (n=" + str(counts.sum())+')', fontsize=9, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes) ax.text(x=0.5, y=1.10, s="OBS-Data: ADFC Brandenburg (n=" + str(histo_bbb.counts.sum())+', 2022-11-07)', fontsize=13, alpha=0.9, ha='center', va='bottom', transform=ax.transAxes) #fig.savefig("plots/OBS_analysis_distribution_bbb.png", dpi=300, bbox_inches="tight") # In[23]: histo_all=gdf.groupby('distance_overtaker_binned_25')['distance_overtaker'].count().reset_index().rename(columns={'distance_overtaker':'counts'}) histo_all # In[24]: sns.set(rc={'figure.figsize':(10,6)}) sns.set_context("talk") x_ax='distance_overtaker_binned_25' fig, ax =plt.subplots( figsize=(12,8), ) ### add boxplot left ax1= sns.barplot( data=histo_all, x=x_ax, y='counts', hue=x_ax, palette='RdYlGn',# palette='Spectral', dodge=False, ax=ax) ax1.set_ylabel('Number of overtaking events') ax1.set_xlabel('Overtaking distance [cm]') ax1.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.0f}'.format(x) )) ax1.legend_.remove() ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30) ax.text(x=0.5, y=1.15, s='Distribution of overtaking distances', fontsize=28, weight='bold', ha='center', va='bottom', transform=ax.transAxes) #ax.text(x=0.5, y=1.07, s="OBS-Data: ADFC Brandenburg: (n=" + str(counts.sum())+')', fontsize=9, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes) ax.text(x=0.5, y=1.08, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(histo_all.counts.sum())+', 2022-11-07)', fontsize=13, alpha=0.9, ha='center', va='bottom', transform=ax.transAxes) #fig.savefig("plots/OBS_analysis_distribution_bbb.png", dpi=300, bbox_inches="tight") # --- # plot time based analysis # #### by month # In[25]: gdf['month'] = gdf['time'].dt.month.astype(str) gdf['year'] = gdf['time'].dt.year.astype(str) # In[26]: gdf['year_month'] = gdf['year'] + "_" + gdf['month'] # In[27]: gdf['month'] = gdf['time'].dt.month.astype(int) gdf.groupby(['year','month'])['distance_overtaker'].count().plot(kind='bar') # In[28]: gdf['month'] = gdf['time'].dt.month.astype(int) gdf[gdf.portal=='bbb'].groupby(['month'])['distance_overtaker'].count().plot(kind='bar') # In[29]: #sns.set(rc={'figure.figsize':(10,6)}) sns.set_context("talk") gdf['month'] = gdf['time'].dt.month.astype(str) data_c_plot=gdf[gdf.portal=='bbb'] data_c_plot=data_c_plot.groupby(['month'])['distance_overtaker'].count().reset_index().rename(columns={'distance_overtaker':'counts'}) x_ax='month' x_ax_ordered_str=data_c_plot[x_ax].unique() x_ax_ordered_str = [ '3', '4', '5', '6', '7', '8', '9','10', '11'] fig, ax =plt.subplots( figsize=(12,8), ) ax1= sns.barplot( data=data_c_plot, x=x_ax, y='counts', color='#0058A9', dodge=False, order = x_ax_ordered_str, ax=ax) ax1.set_ylabel('Number of overtaking events') ax1.set_xlabel('Month in 2022') ax1.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.0f}'.format(x) )) ax.text(x=0.5, y=1.15, s='Time distribution of overtaking events', fontsize=28, weight='bold', ha='center', va='bottom', transform=ax.transAxes) ax.text(x=0.5, y=1.10, s="OBS-Data: ADFC Brandenburg (n=" + str(len(gdf[gdf.portal=='bbb']))+', 2022-11-07)', fontsize=13, alpha=0.9, ha='center', va='bottom', transform=ax.transAxes) #fig.savefig("plots/OBS_analysis_distribution_time_bbb.png", dpi=300, bbox_inches="tight") # In[30]: data_c_plot=gdf for col in data_c_plot.dtypes.loc[lambda x: x == 'category'].index: data_c_plot[col] = data_c_plot[col].cat.remove_unused_categories() x_ax='year_month' x_ax_ordered_str=data_c_plot[x_ax].unique() x_ax_ordered_str = ['2020_4', '2020_5', '2020_6', '2020_7', '2020_8', '2020_9', '2020_10', '2020_11', '2020_12', '2021_1', '2021_2', '2021_3', '2021_4', '2021_5', '2021_6', '2021_7', '2021_8', '2021_9', '2021_10', '2021_11', '2021_12', '2022_1', '2022_2', '2022_3', '2022_4', '2022_5', '2022_6', '2022_7', '2022_8', '2022_9', '2022_10', '2022_11'] fig, ax =plt.subplots( figsize=(32,10), ) ### add boxplot left ax1= sns.boxplot( data=data_c_plot, x=x_ax, y="distance_overtaker", showcaps=False, flierprops={"marker": "x"}, order = x_ax_ordered_str, boxprops={"facecolor": (.4, .6, .8, .5)}, medianprops={"color": "k"}, ax=ax) ### add labels to ax1 # medians medians = data_c_plot.groupby([x_ax])['distance_overtaker'].median() medians=medians.reindex(x_ax_ordered_str) vertical_offset = data_c_plot['distance_overtaker'].median() * 0.02 # offset from median for display for xtick in ax1.get_xticks(): ax1.text(xtick+0.25,medians[xtick] + vertical_offset, int(medians[xtick]), horizontalalignment='center', size='small', color='k', weight='semibold') # counts on top counts = data_c_plot.groupby([x_ax])['distance_overtaker'].count()#.astype(int) counts=counts.reindex(x_ax_ordered_str) #vertical_offset = 0.99 # offset from median for display for xtick in ax1.get_xticks(): ax1.text(xtick,298,"n=" +str(counts[xtick]), horizontalalignment='center', size='12', color='k', weight='semibold', bbox=dict(facecolor='w')) ### add boxplot right ax2= sns.barplot( data=data_c_plot, x=x_ax, y="distance_overtaker", #palette="Blues", order = x_ax_ordered_str, alpha=0.0, capsize=.1, n_boot=1000, ci=95, errcolor= 'red', #errcolor='.26' = errwidth=0.7, ax=ax) ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30) #ax.set_xticklabels(ax2.get_xmajorticklabels(), rotation=30) ax1.set(title='Boxplot with 95% CI') ax1.set_ylabel('Distance overtaker [cm]') ax1.set_xlabel('year_month') #add overall title ax.text(x=0.5, y=1.13, s='Overtaking distance by month', fontsize=22, weight='bold', ha='center', va='bottom', transform=ax.transAxes) ax.text(x=0.5, y=1.08, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(counts.sum())+', 2022-11-07)', fontsize=12, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes) #fig.show() #fig.savefig("OBS_analysis_osm_maxspeed_boxplot.png", dpi=300) # In[31]: data_c_plot=gdf for col in data_c_plot.dtypes.loc[lambda x: x == 'category'].index: data_c_plot[col] = data_c_plot[col].cat.remove_unused_categories() x_ax='year_month' #x_ax_ordered_str=data_c_plot[x_ax].unique() x_ax_ordered_str = ['2020_4', '2020_5', '2020_6', '2020_7', '2020_8', '2020_9', '2020_10', '2020_11', '2020_12', '2021_1', '2021_2', '2021_3', '2021_4', '2021_5', '2021_6', '2021_7', '2021_8', '2021_9', '2021_10', '2021_11', '2021_12', '2022_1', '2022_2', '2022_3', '2022_4', '2022_5', '2022_6', '2022_7', '2022_8', '2022_9', '2022_10', '2022_11'] fig, ax =plt.subplots( figsize=(32,10), ) #### add boxplot left #ax1= sns.boxplot( # data=data_c_plot, x=x_ax, y="distance_overtaker", # showcaps=False, # flierprops={"marker": "x"}, # order = x_ax_ordered_str, # boxprops={"facecolor": (.4, .6, .8, .5)}, # medianprops={"color": "k"}, # ax=ax) ax3 = sns.stripplot(data=data_c_plot, x=x_ax, y="distance_overtaker", order=x_ax_ordered_str,color="grey", alpha=0.3, ax=ax, zorder=1) ### add labels to ax1 # counts on top counts = data_c_plot.groupby([x_ax])['distance_overtaker'].count()#.astype(int) counts=counts.reindex(x_ax_ordered_str) #vertical_offset = 0.99 # offset from median for display for xtick in ax3.get_xticks(): ax3.text(xtick,298,"n=" +str(counts[xtick]), horizontalalignment='center', size='12', color='k', weight='semibold', bbox=dict(facecolor='w')) ### add boxplot right ax2= sns.barplot( data=data_c_plot, x=x_ax, y="distance_overtaker", #palette="Blues", order = x_ax_ordered_str, alpha=0.0, capsize=.1, n_boot=1000, ci=95, errcolor= 'red', #errcolor='.26' = errwidth=0.7, ax=ax) ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30) #ax.set_xticklabels(ax2.get_xmajorticklabels(), rotation=30) ax.set(title='Jitterplot with 95% CI') ax.set_ylabel('Distance overtaker [cm]') ax.set_xlabel('year_month') #add overall title ax.text(x=0.5, y=1.13, s='Overtaking distance by month', fontsize=22, weight='bold', ha='center', va='bottom', transform=ax.transAxes) ax.text(x=0.5, y=1.08, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(counts.sum())+', 2022-11-07)', fontsize=12, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes) #fig.show() #fig.savefig("OBS_analysis_osm_maxspeed_boxplot.png", dpi=300) # #### by hour # In[32]: gdf # In[33]: # add local time gdf['time_berlin']=gdf['time'].dt.tz_convert('Europe/Berlin') # In[34]: gdf['hour'] = gdf['time_berlin'].dt.hour.astype(int) gdf[gdf.portal=='bbb'].groupby(['hour'])['distance_overtaker'].count().plot(kind='bar', title='Brandenburg') # In[35]: gdf.groupby(['hour'])['distance_overtaker'].count().plot(kind='bar', title='Deutschland') # In[36]: data_c_plot=gdf for col in data_c_plot.dtypes.loc[lambda x: x == 'category'].index: data_c_plot[col] = data_c_plot[col].cat.remove_unused_categories() x_ax='hour' x_ax_ordered_str=data_c_plot[x_ax].unique() x_ax_ordered_str.sort() fig, ax =plt.subplots( figsize=(32,10), ) ### add boxplot left ax1= sns.boxplot( data=data_c_plot, x=x_ax, y="distance_overtaker", showcaps=False, flierprops={"marker": "x"}, order = x_ax_ordered_str, boxprops={"facecolor": (.4, .6, .8, .5)}, medianprops={"color": "k"}, ax=ax) ### add labels to ax1 # medians medians = data_c_plot.groupby([x_ax])['distance_overtaker'].median() medians=medians.reindex(x_ax_ordered_str) vertical_offset = data_c_plot['distance_overtaker'].median() * 0.02 # offset from median for display for xtick in ax1.get_xticks(): ax1.text(xtick+0.25,medians[xtick] + vertical_offset, int(medians[xtick]), horizontalalignment='center', size='small', color='k', weight='semibold') # counts on top counts = data_c_plot.groupby([x_ax])['distance_overtaker'].count()#.astype(int) counts=counts.reindex(x_ax_ordered_str) #vertical_offset = 0.99 # offset from median for display for xtick in ax1.get_xticks(): ax1.text(xtick,298,"n=" +str(counts[xtick]), horizontalalignment='center', size='12', color='k', weight='semibold', bbox=dict(facecolor='w')) ### add boxplot right ax2= sns.barplot( data=data_c_plot, x=x_ax, y="distance_overtaker", #palette="Blues", order = x_ax_ordered_str, alpha=0.0, capsize=.1, n_boot=1000, ci=95, errcolor= 'red', #errcolor='.26' = errwidth=0.7, ax=ax) #ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30) #ax.set_xticklabels(ax2.get_xmajorticklabels(), rotation=30) ax1.set(title='Boxplot with 95% CI') ax1.set_ylabel('Distance overtaker [cm]') ax1.set_xlabel('hour') #add overall title ax.text(x=0.5, y=1.13, s='Overtaking distance by hour', fontsize=22, weight='bold', ha='center', va='bottom', transform=ax.transAxes) ax.text(x=0.5, y=1.08, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(counts.sum())+', 2022-11-07)', fontsize=12, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes) #fig.show() #fig.savefig("OBS_analysis_osm_maxspeed_boxplot.png", dpi=300) # In[37]: data_c_plot=gdf for col in data_c_plot.dtypes.loc[lambda x: x == 'category'].index: data_c_plot[col] = data_c_plot[col].cat.remove_unused_categories() x_ax='hour' x_ax_ordered_str=data_c_plot[x_ax].unique() x_ax_ordered_str.sort() fig, ax =plt.subplots( figsize=(32,10), ) #### add boxplot left #ax1= sns.boxplot( # data=data_c_plot, x=x_ax, y="distance_overtaker", # showcaps=False, # flierprops={"marker": "x"}, # order = x_ax_ordered_str, # boxprops={"facecolor": (.4, .6, .8, .5)}, # medianprops={"color": "k"}, # ax=ax) ax3 = sns.stripplot(data=data_c_plot, x=x_ax, y="distance_overtaker",color="grey", alpha=0.3, ax=ax, zorder=1) ### add labels to ax1 # counts on top counts = data_c_plot.groupby([x_ax])['distance_overtaker'].count()#.astype(int) counts=counts.reindex(x_ax_ordered_str) #vertical_offset = 0.99 # offset from median for display for xtick in ax3.get_xticks(): ax3.text(xtick,298,"n=" +str(counts[xtick]), horizontalalignment='center', size='12', color='k', weight='semibold', bbox=dict(facecolor='w')) ### add boxplot right ax2= sns.barplot( data=data_c_plot, x=x_ax, y="distance_overtaker", #palette="Blues", order = x_ax_ordered_str, alpha=0.0, capsize=.1, n_boot=1000, ci=95, errcolor= 'red', #errcolor='.26' = errwidth=0.7, ax=ax) #ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30) #ax.set_xticklabels(ax2.get_xmajorticklabels(), rotation=30) ax.set(title='Jitterplot with 95% CI') ax.set_ylabel('Distance overtaker [cm]') ax.set_xlabel('hour') #add overall title ax.text(x=0.5, y=1.13, s='Overtaking distance by hour', fontsize=22, weight='bold', ha='center', va='bottom', transform=ax.transAxes) ax.text(x=0.5, y=1.08, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(counts.sum())+', 2022-11-07)', fontsize=12, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes) #fig.show() #fig.savefig("OBS_analysis_osm_maxspeed_boxplot.png", dpi=300) # ### by weekday # In[38]: gdf['weekday'] = gdf['time_berlin'].dt.day_name() def weekday_sorter(column): """Sort function""" order_wd=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'] correspondence = {o: order for order, o in enumerate(order_wd)} return column.map(correspondence) gdf[gdf.portal=='bbb'].groupby(['weekday'])['distance_overtaker'].count().reset_index().sort_values(by='weekday', key=weekday_sorter).plot(kind='bar', title='Brandenburg', x='weekday', legend=None) # In[39]: gdf['weekday'] = gdf['time_berlin'].dt.day_name() def weekday_sorter(column): """Sort function""" order_wd=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'] correspondence = {o: order for order, o in enumerate(order_wd)} return column.map(correspondence) gdf.groupby(['weekday'])['distance_overtaker'].count().reset_index().sort_values(by='weekday', key=weekday_sorter).plot(kind='bar', title='Brandenburg', x='weekday', legend=None) # In[40]: data_c_plot=gdf for col in data_c_plot.dtypes.loc[lambda x: x == 'category'].index: data_c_plot[col] = data_c_plot[col].cat.remove_unused_categories() x_ax='weekday' x_ax_ordered_str=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'] fig, ax =plt.subplots( figsize=(18,10), ) #### add boxplot left #ax1= sns.boxplot( # data=data_c_plot, x=x_ax, y="distance_overtaker", # showcaps=False, # flierprops={"marker": "x"}, # order = x_ax_ordered_str, # boxprops={"facecolor": (.4, .6, .8, .5)}, # medianprops={"color": "k"}, # ax=ax) ax3 = sns.stripplot(data=data_c_plot, x=x_ax, y="distance_overtaker",color="grey", alpha=0.3, ax=ax, zorder=1) ### add labels to ax1 # counts on top counts = data_c_plot.groupby([x_ax])['distance_overtaker'].count()#.astype(int) counts=counts.reindex(x_ax_ordered_str) #vertical_offset = 0.99 # offset from median for display for xtick in ax3.get_xticks(): ax3.text(xtick,298,"n=" +str(counts[xtick]), horizontalalignment='center', size='12', color='k', weight='semibold', bbox=dict(facecolor='w')) ### add boxplot right ax2= sns.barplot( data=data_c_plot, x=x_ax, y="distance_overtaker", #palette="Blues", order = x_ax_ordered_str, alpha=0.0, capsize=.1, n_boot=1000, ci=95, errcolor= 'red', #errcolor='.26' = errwidth=0.7, ax=ax) #ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30) #ax.set_xticklabels(ax2.get_xmajorticklabels(), rotation=30) ax.set(title='Jitterplot with 95% CI') ax.set_ylabel('Distance overtaker [cm]') ax.set_xlabel('') #add overall title ax.text(x=0.5, y=1.13, s='Overtaking distance by weekday', fontsize=22, weight='bold', ha='center', va='bottom', transform=ax.transAxes) ax.text(x=0.5, y=1.08, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(counts.sum())+', 2022-11-07)', fontsize=12, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes) #fig.show() #fig.savefig("OBS_analysis_osm_maxspeed_boxplot.png", dpi=300) # In[41]: data_c_plot=gdf for col in data_c_plot.dtypes.loc[lambda x: x == 'category'].index: data_c_plot[col] = data_c_plot[col].cat.remove_unused_categories() x_ax='weekday' x_ax_ordered_str=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'] fig, ax =plt.subplots( figsize=(18,10), ) ### add boxplot left ax1= sns.boxplot( data=data_c_plot, x=x_ax, y="distance_overtaker", showcaps=False, flierprops={"marker": "x"}, order = x_ax_ordered_str, boxprops={"facecolor": (.4, .6, .8, .5)}, medianprops={"color": "k"}, ax=ax) ### add labels to ax1 # medians medians = data_c_plot.groupby([x_ax])['distance_overtaker'].median() medians=medians.reindex(x_ax_ordered_str) vertical_offset = data_c_plot['distance_overtaker'].median() * 0.02 # offset from median for display for xtick in ax1.get_xticks(): ax1.text(xtick+0.25,medians[xtick] + vertical_offset, int(medians[xtick]), horizontalalignment='center', size='small', color='k', weight='semibold') # counts on top counts = data_c_plot.groupby([x_ax])['distance_overtaker'].count()#.astype(int) counts=counts.reindex(x_ax_ordered_str) #vertical_offset = 0.99 # offset from median for display for xtick in ax1.get_xticks(): ax1.text(xtick,298,"n=" +str(counts[xtick]), horizontalalignment='center', size='12', color='k', weight='semibold', bbox=dict(facecolor='w')) ### add boxplot right ax2= sns.barplot( data=data_c_plot, x=x_ax, y="distance_overtaker", #palette="Blues", order = x_ax_ordered_str, alpha=0.0, capsize=.1, n_boot=1000, ci=95, errcolor= 'red', #errcolor='.26' = errwidth=0.7, ax=ax) #ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30) #ax.set_xticklabels(ax2.get_xmajorticklabels(), rotation=30) ax1.set(title='Boxplot with 95% CI') ax1.set_ylabel('Distance overtaker [cm]') ax1.set_xlabel('') #add overall title ax.text(x=0.5, y=1.13, s='Overtaking distance by weekday', fontsize=22, weight='bold', ha='center', va='bottom', transform=ax.transAxes) ax.text(x=0.5, y=1.08, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(counts.sum())+', 2022-11-07)', fontsize=12, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes) #fig.show() #fig.savefig("OBS_analysis_osm_maxspeed_boxplot.png", dpi=300)