#!/usr/bin/env python
# coding: utf-8
# ## OBS Data Analysis from "Zu nah?" project at TH Wildau
# based on open data ressources only
#
# created by Simon Metzler at Radprofessur Wildau
#
# * project page https://innohub13.de/wir-forschen/zu-nah/
# * page of Radprofessur https://www.th-wildau.de/radverkehr
# ---
# ### Index:
# * [import python packages](#import)
# * [parse data](#parse)
# * [Geo overview](#geo)
# * [pre processing data](#pre)
# * [plot histogram](#histo)
# * [plot time based analysis](#time)
# ---
# import python packages
# In[1]:
import geopandas as gpd
import pandas as pd
# In[2]:
import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
# In[3]:
#from matplotlib_scalebar.scalebar import ScaleBar
import contextily as ctx
# In[4]:
import seaborn as sns
import matplotlib.ticker as ticker
# ---
# parse data
# In[5]:
# manual downlaod from export-tab (automatic download is possible yet not encouraged)
#https://obs.adfc-brandenburg.de/
#https://obs.adfc-bw.de/
#https://obs.radentscheid-essen.de/
#https://portal.openbikesensor.org/
#https://obs.adfc-darmstadt.de/
# Stand 07.11.22
files = glob.glob(r'portale\*.geojson')
len(files)
# In[6]:
gdf =gpd.GeoDataFrame()
for f in tqdm(files):
gdf_this = gpd.read_file(f, crs='EPSG:4326')
portal_name=f.split('_')[-1].replace('.geojson','')
gdf_this['portal']= portal_name
gdf=gdf.append(gdf_this)
print (portal_name)
print (len(gdf_this))
# In[7]:
gdf
# ---
# Geo overview
# In[8]:
gdf.plot()
# In[9]:
fig, ax = plt.subplots(figsize=(10, 10))
gdf.plot(ax=ax)
bland = gpd.read_file("https://raw.githubusercontent.com/isellsoap/deutschlandGeoJSON/main/2_bundeslaender/1_sehr_hoch.geo.json")
bland.boundary.plot(ax=ax, color='gray')
# In[10]:
bbb=bland[bland.name.isin(['Brandenburg','Berlin'])]#.dissolve()
bbb.plot()
# In[11]:
# clip geografically all meassueres from bbb
gdf_clipped=gdf.clip(bbb)
## alternativly filter by portal e.g.
#gdf_filtered=gdf[gdf.portal=='bbb']
# In[12]:
gdf_clipped
# In[13]:
gdf_clipped.portal.value_counts()
# #### BRANDENBURG
# In[14]:
# Function
def makeLayeredMap(*args):
"""This function accepts an arbitrary number of geodataframes, plots them on top of a Contextily basemap.
NOTE: Please edit the Plotting-section to specify parameters for the number of layers and the formatting of each layer.
Output: Saved file and layered map for display."""
# Convert the CRS for all layers to EPSG3857 to match Contextily
args = list(map(lambda x: x.to_crs(epsg=3857), args))
# Create figure
fig, ax = plt.subplots(1, figsize=(15, 15))
#Set aspect to equal
ax.set_aspect('equal')
# PLOTTING: Specify layers to plot how to format each layer (colours, transparency, etc.):
# Layer 1:
args[0].plot(ax=ax,facecolor="grey", alpha=0.3, figsize=(15,15))
args[0].plot(ax=ax,facecolor="none", alpha=1, edgecolor='black', linewidths=1, figsize=(15,15))
# Layer 2:
args[1].plot(ax=ax, color='#0069B4', alpha=0.3, zorder=2, markersize=5)
#ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)
ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron, reset_extent=False, zoom = 10)
# Turn off axis
ax.axis('off')
#add overall title
ax.text(x=0.5, y=0.955, s='Measured overtaking events', fontsize=28, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
ax.text(x=0.5, y=0.925, s="OBS-Data: ADFC Brandenburg : (n=" + str(len(args[1])) + ", 2022-11-07)", fontsize=15, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes)
layered_map = plt.show()
#fig.savefig("plots\OBS_analysis_map_bbb_80dpi.png", dpi=80)
return(layered_map)
# In[15]:
makeLayeredMap(bland[bland.name.isin(['Brandenburg','Berlin'])],gdf_clipped)
# #### DEUTSCHLAND
# In[16]:
# Function
def makeLayeredMap(*args):
"""This function accepts an arbitrary number of geodataframes, plots them on top of a Contextily basemap.
NOTE: Please edit the Plotting-section to specify parameters for the number of layers and the formatting of each layer.
Output: Saved file and layered map for display."""
# Convert the CRS for all layers to EPSG3857 to match Contextily
args = list(map(lambda x: x.to_crs(epsg=3857), args))
# Create figure
fig, ax = plt.subplots(1, figsize=(15, 15))
#Set aspect to equal
ax.set_aspect('equal')
# PLOTTING: Specify layers to plot how to format each layer (colours, transparency, etc.):
# Layer 1:
args[0].plot(ax=ax,facecolor="grey", alpha=0.3, figsize=(15,15))
args[0].plot(ax=ax,facecolor="none", alpha=1, edgecolor='black', linewidths=1, figsize=(15,15))
# Layer 2:
args[1].plot(ax=ax, color='#0069B4', alpha=0.3, zorder=2, markersize=5)
# Contextily basemap:
#ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)
ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron, reset_extent=False, zoom = 10)
# Turn off axis
ax.axis('off')
ax.text(x=0.5, y=0.955, s='Measured overtaking events', fontsize=28, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
ax.text(x=0.5, y=0.92, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(len(args[1])) + ", 2022-11-07)", fontsize=13, alpha=0.85, ha='center', va='bottom', transform=ax.transAxes)
layered_map = plt.show()
#fig.savefig("plots\OBS_analysis_map_Deutschland_80dpi.png", dpi=80)
return(layered_map)
# In[17]:
makeLayeredMap(bland,gdf)
# ---
# pre processing
#
# In[18]:
# from m in cm
gdf['distance_overtaker']=gdf['distance_overtaker']*100
gdf['distance_stationary']=gdf['distance_stationary']*100
# In[19]:
#adding bins of distance_overtaker
#50cm
bins = [0,50,100,150,200,250,400]
labels = ['<50','50-100','100-150','150-200','200-250','>250']
gdf['distance_overtaker_binned'] = pd.cut(gdf['distance_overtaker'], bins=bins, labels=labels)
#25cm
bins = list(range(0,260,25))
bins.append(400)
labels = ['<25','25-50','50-75','75-100','100-125','125-150','150-175','175-200','200-225','225-250','>250']
gdf['distance_overtaker_binned_25'] = pd.cut(gdf['distance_overtaker'], bins=bins, labels=labels)
# In[20]:
# remove all datapoints with meassuerements below 15cm to clean wrong messurements due to ultrasonic sensor issues
gdf=gdf[~(gdf.distance_overtaker<15)&~(gdf.distance_stationary<15)].copy()
# ---
# plot histogram
# In[21]:
histo_bbb=gdf[gdf.portal=='bbb'].groupby('distance_overtaker_binned_25')['distance_overtaker'].count().reset_index().rename(columns={'distance_overtaker':'counts'})
histo_bbb
# In[22]:
sns.set(rc={'figure.figsize':(10,6)})
sns.set_context("talk")
x_ax='distance_overtaker_binned_25'
fig, ax =plt.subplots(
figsize=(12,8),
)
### add boxplot left
ax1= sns.barplot(
data=histo_bbb, x=x_ax, y='counts',
hue=x_ax,
palette='RdYlGn',# palette='Spectral',
dodge=False,
ax=ax)
ax1.set_ylabel('Number of overtaking events')
ax1.set_xlabel('Overtaking distance [cm]')
ax1.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.0f}'.format(x) ))
ax1.legend_.remove()
ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30)
ax.text(x=0.5, y=1.15, s='Distribution of overtaking distances', fontsize=28, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
#ax.text(x=0.5, y=1.07, s="OBS-Data: ADFC Brandenburg: (n=" + str(counts.sum())+')', fontsize=9, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes)
ax.text(x=0.5, y=1.10, s="OBS-Data: ADFC Brandenburg (n=" + str(histo_bbb.counts.sum())+', 2022-11-07)', fontsize=13, alpha=0.9, ha='center', va='bottom', transform=ax.transAxes)
#fig.savefig("plots/OBS_analysis_distribution_bbb.png", dpi=300, bbox_inches="tight")
# In[23]:
histo_all=gdf.groupby('distance_overtaker_binned_25')['distance_overtaker'].count().reset_index().rename(columns={'distance_overtaker':'counts'})
histo_all
# In[24]:
sns.set(rc={'figure.figsize':(10,6)})
sns.set_context("talk")
x_ax='distance_overtaker_binned_25'
fig, ax =plt.subplots(
figsize=(12,8),
)
### add boxplot left
ax1= sns.barplot(
data=histo_all, x=x_ax, y='counts',
hue=x_ax,
palette='RdYlGn',# palette='Spectral',
dodge=False,
ax=ax)
ax1.set_ylabel('Number of overtaking events')
ax1.set_xlabel('Overtaking distance [cm]')
ax1.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.0f}'.format(x) ))
ax1.legend_.remove()
ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30)
ax.text(x=0.5, y=1.15, s='Distribution of overtaking distances', fontsize=28, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
#ax.text(x=0.5, y=1.07, s="OBS-Data: ADFC Brandenburg: (n=" + str(counts.sum())+')', fontsize=9, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes)
ax.text(x=0.5, y=1.08, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(histo_all.counts.sum())+', 2022-11-07)', fontsize=13, alpha=0.9, ha='center', va='bottom', transform=ax.transAxes)
#fig.savefig("plots/OBS_analysis_distribution_bbb.png", dpi=300, bbox_inches="tight")
# ---
# plot time based analysis
# #### by month
# In[25]:
gdf['month'] = gdf['time'].dt.month.astype(str)
gdf['year'] = gdf['time'].dt.year.astype(str)
# In[26]:
gdf['year_month'] = gdf['year'] + "_" + gdf['month']
# In[27]:
gdf['month'] = gdf['time'].dt.month.astype(int)
gdf.groupby(['year','month'])['distance_overtaker'].count().plot(kind='bar')
# In[28]:
gdf['month'] = gdf['time'].dt.month.astype(int)
gdf[gdf.portal=='bbb'].groupby(['month'])['distance_overtaker'].count().plot(kind='bar')
# In[29]:
#sns.set(rc={'figure.figsize':(10,6)})
sns.set_context("talk")
gdf['month'] = gdf['time'].dt.month.astype(str)
data_c_plot=gdf[gdf.portal=='bbb']
data_c_plot=data_c_plot.groupby(['month'])['distance_overtaker'].count().reset_index().rename(columns={'distance_overtaker':'counts'})
x_ax='month'
x_ax_ordered_str=data_c_plot[x_ax].unique()
x_ax_ordered_str = [ '3', '4', '5', '6', '7', '8', '9','10', '11']
fig, ax =plt.subplots(
figsize=(12,8),
)
ax1= sns.barplot(
data=data_c_plot, x=x_ax, y='counts',
color='#0058A9',
dodge=False,
order = x_ax_ordered_str,
ax=ax)
ax1.set_ylabel('Number of overtaking events')
ax1.set_xlabel('Month in 2022')
ax1.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.0f}'.format(x) ))
ax.text(x=0.5, y=1.15, s='Time distribution of overtaking events', fontsize=28, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
ax.text(x=0.5, y=1.10, s="OBS-Data: ADFC Brandenburg (n=" + str(len(gdf[gdf.portal=='bbb']))+', 2022-11-07)', fontsize=13, alpha=0.9, ha='center', va='bottom', transform=ax.transAxes)
#fig.savefig("plots/OBS_analysis_distribution_time_bbb.png", dpi=300, bbox_inches="tight")
# In[30]:
data_c_plot=gdf
for col in data_c_plot.dtypes.loc[lambda x: x == 'category'].index:
data_c_plot[col] = data_c_plot[col].cat.remove_unused_categories()
x_ax='year_month'
x_ax_ordered_str=data_c_plot[x_ax].unique()
x_ax_ordered_str = ['2020_4', '2020_5', '2020_6', '2020_7', '2020_8', '2020_9',
'2020_10', '2020_11', '2020_12', '2021_1', '2021_2', '2021_3',
'2021_4', '2021_5', '2021_6', '2021_7', '2021_8', '2021_9',
'2021_10', '2021_11', '2021_12', '2022_1', '2022_2', '2022_3',
'2022_4', '2022_5', '2022_6', '2022_7', '2022_8', '2022_9',
'2022_10', '2022_11']
fig, ax =plt.subplots(
figsize=(32,10),
)
### add boxplot left
ax1= sns.boxplot(
data=data_c_plot, x=x_ax, y="distance_overtaker",
showcaps=False,
flierprops={"marker": "x"},
order = x_ax_ordered_str,
boxprops={"facecolor": (.4, .6, .8, .5)},
medianprops={"color": "k"},
ax=ax)
### add labels to ax1
# medians
medians = data_c_plot.groupby([x_ax])['distance_overtaker'].median()
medians=medians.reindex(x_ax_ordered_str)
vertical_offset = data_c_plot['distance_overtaker'].median() * 0.02 # offset from median for display
for xtick in ax1.get_xticks():
ax1.text(xtick+0.25,medians[xtick] + vertical_offset, int(medians[xtick]),
horizontalalignment='center',
size='small',
color='k',
weight='semibold')
# counts on top
counts = data_c_plot.groupby([x_ax])['distance_overtaker'].count()#.astype(int)
counts=counts.reindex(x_ax_ordered_str)
#vertical_offset = 0.99 # offset from median for display
for xtick in ax1.get_xticks():
ax1.text(xtick,298,"n=" +str(counts[xtick]),
horizontalalignment='center',
size='12',
color='k',
weight='semibold',
bbox=dict(facecolor='w'))
### add boxplot right
ax2= sns.barplot(
data=data_c_plot, x=x_ax, y="distance_overtaker",
#palette="Blues",
order = x_ax_ordered_str,
alpha=0.0,
capsize=.1, n_boot=1000, ci=95,
errcolor= 'red', #errcolor='.26' =
errwidth=0.7,
ax=ax)
ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30)
#ax.set_xticklabels(ax2.get_xmajorticklabels(), rotation=30)
ax1.set(title='Boxplot with 95% CI')
ax1.set_ylabel('Distance overtaker [cm]')
ax1.set_xlabel('year_month')
#add overall title
ax.text(x=0.5, y=1.13, s='Overtaking distance by month', fontsize=22, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
ax.text(x=0.5, y=1.08, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(counts.sum())+', 2022-11-07)', fontsize=12, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes)
#fig.show()
#fig.savefig("OBS_analysis_osm_maxspeed_boxplot.png", dpi=300)
# In[31]:
data_c_plot=gdf
for col in data_c_plot.dtypes.loc[lambda x: x == 'category'].index:
data_c_plot[col] = data_c_plot[col].cat.remove_unused_categories()
x_ax='year_month'
#x_ax_ordered_str=data_c_plot[x_ax].unique()
x_ax_ordered_str = ['2020_4', '2020_5', '2020_6', '2020_7', '2020_8', '2020_9',
'2020_10', '2020_11', '2020_12', '2021_1', '2021_2', '2021_3',
'2021_4', '2021_5', '2021_6', '2021_7', '2021_8', '2021_9',
'2021_10', '2021_11', '2021_12', '2022_1', '2022_2', '2022_3',
'2022_4', '2022_5', '2022_6', '2022_7', '2022_8', '2022_9',
'2022_10', '2022_11']
fig, ax =plt.subplots(
figsize=(32,10),
)
#### add boxplot left
#ax1= sns.boxplot(
# data=data_c_plot, x=x_ax, y="distance_overtaker",
# showcaps=False,
# flierprops={"marker": "x"},
# order = x_ax_ordered_str,
# boxprops={"facecolor": (.4, .6, .8, .5)},
# medianprops={"color": "k"},
# ax=ax)
ax3 = sns.stripplot(data=data_c_plot, x=x_ax, y="distance_overtaker", order=x_ax_ordered_str,color="grey", alpha=0.3, ax=ax, zorder=1)
### add labels to ax1
# counts on top
counts = data_c_plot.groupby([x_ax])['distance_overtaker'].count()#.astype(int)
counts=counts.reindex(x_ax_ordered_str)
#vertical_offset = 0.99 # offset from median for display
for xtick in ax3.get_xticks():
ax3.text(xtick,298,"n=" +str(counts[xtick]),
horizontalalignment='center',
size='12',
color='k',
weight='semibold',
bbox=dict(facecolor='w'))
### add boxplot right
ax2= sns.barplot(
data=data_c_plot, x=x_ax, y="distance_overtaker",
#palette="Blues",
order = x_ax_ordered_str,
alpha=0.0,
capsize=.1, n_boot=1000, ci=95,
errcolor= 'red', #errcolor='.26' =
errwidth=0.7,
ax=ax)
ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30)
#ax.set_xticklabels(ax2.get_xmajorticklabels(), rotation=30)
ax.set(title='Jitterplot with 95% CI')
ax.set_ylabel('Distance overtaker [cm]')
ax.set_xlabel('year_month')
#add overall title
ax.text(x=0.5, y=1.13, s='Overtaking distance by month', fontsize=22, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
ax.text(x=0.5, y=1.08, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(counts.sum())+', 2022-11-07)', fontsize=12, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes)
#fig.show()
#fig.savefig("OBS_analysis_osm_maxspeed_boxplot.png", dpi=300)
# #### by hour
# In[32]:
gdf
# In[33]:
# add local time
gdf['time_berlin']=gdf['time'].dt.tz_convert('Europe/Berlin')
# In[34]:
gdf['hour'] = gdf['time_berlin'].dt.hour.astype(int)
gdf[gdf.portal=='bbb'].groupby(['hour'])['distance_overtaker'].count().plot(kind='bar', title='Brandenburg')
# In[35]:
gdf.groupby(['hour'])['distance_overtaker'].count().plot(kind='bar', title='Deutschland')
# In[36]:
data_c_plot=gdf
for col in data_c_plot.dtypes.loc[lambda x: x == 'category'].index:
data_c_plot[col] = data_c_plot[col].cat.remove_unused_categories()
x_ax='hour'
x_ax_ordered_str=data_c_plot[x_ax].unique()
x_ax_ordered_str.sort()
fig, ax =plt.subplots(
figsize=(32,10),
)
### add boxplot left
ax1= sns.boxplot(
data=data_c_plot, x=x_ax, y="distance_overtaker",
showcaps=False,
flierprops={"marker": "x"},
order = x_ax_ordered_str,
boxprops={"facecolor": (.4, .6, .8, .5)},
medianprops={"color": "k"},
ax=ax)
### add labels to ax1
# medians
medians = data_c_plot.groupby([x_ax])['distance_overtaker'].median()
medians=medians.reindex(x_ax_ordered_str)
vertical_offset = data_c_plot['distance_overtaker'].median() * 0.02 # offset from median for display
for xtick in ax1.get_xticks():
ax1.text(xtick+0.25,medians[xtick] + vertical_offset, int(medians[xtick]),
horizontalalignment='center',
size='small',
color='k',
weight='semibold')
# counts on top
counts = data_c_plot.groupby([x_ax])['distance_overtaker'].count()#.astype(int)
counts=counts.reindex(x_ax_ordered_str)
#vertical_offset = 0.99 # offset from median for display
for xtick in ax1.get_xticks():
ax1.text(xtick,298,"n=" +str(counts[xtick]),
horizontalalignment='center',
size='12',
color='k',
weight='semibold',
bbox=dict(facecolor='w'))
### add boxplot right
ax2= sns.barplot(
data=data_c_plot, x=x_ax, y="distance_overtaker",
#palette="Blues",
order = x_ax_ordered_str,
alpha=0.0,
capsize=.1, n_boot=1000, ci=95,
errcolor= 'red', #errcolor='.26' =
errwidth=0.7,
ax=ax)
#ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30)
#ax.set_xticklabels(ax2.get_xmajorticklabels(), rotation=30)
ax1.set(title='Boxplot with 95% CI')
ax1.set_ylabel('Distance overtaker [cm]')
ax1.set_xlabel('hour')
#add overall title
ax.text(x=0.5, y=1.13, s='Overtaking distance by hour', fontsize=22, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
ax.text(x=0.5, y=1.08, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(counts.sum())+', 2022-11-07)', fontsize=12, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes)
#fig.show()
#fig.savefig("OBS_analysis_osm_maxspeed_boxplot.png", dpi=300)
# In[37]:
data_c_plot=gdf
for col in data_c_plot.dtypes.loc[lambda x: x == 'category'].index:
data_c_plot[col] = data_c_plot[col].cat.remove_unused_categories()
x_ax='hour'
x_ax_ordered_str=data_c_plot[x_ax].unique()
x_ax_ordered_str.sort()
fig, ax =plt.subplots(
figsize=(32,10),
)
#### add boxplot left
#ax1= sns.boxplot(
# data=data_c_plot, x=x_ax, y="distance_overtaker",
# showcaps=False,
# flierprops={"marker": "x"},
# order = x_ax_ordered_str,
# boxprops={"facecolor": (.4, .6, .8, .5)},
# medianprops={"color": "k"},
# ax=ax)
ax3 = sns.stripplot(data=data_c_plot, x=x_ax, y="distance_overtaker",color="grey", alpha=0.3, ax=ax, zorder=1)
### add labels to ax1
# counts on top
counts = data_c_plot.groupby([x_ax])['distance_overtaker'].count()#.astype(int)
counts=counts.reindex(x_ax_ordered_str)
#vertical_offset = 0.99 # offset from median for display
for xtick in ax3.get_xticks():
ax3.text(xtick,298,"n=" +str(counts[xtick]),
horizontalalignment='center',
size='12',
color='k',
weight='semibold',
bbox=dict(facecolor='w'))
### add boxplot right
ax2= sns.barplot(
data=data_c_plot, x=x_ax, y="distance_overtaker",
#palette="Blues",
order = x_ax_ordered_str,
alpha=0.0,
capsize=.1, n_boot=1000, ci=95,
errcolor= 'red', #errcolor='.26' =
errwidth=0.7,
ax=ax)
#ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30)
#ax.set_xticklabels(ax2.get_xmajorticklabels(), rotation=30)
ax.set(title='Jitterplot with 95% CI')
ax.set_ylabel('Distance overtaker [cm]')
ax.set_xlabel('hour')
#add overall title
ax.text(x=0.5, y=1.13, s='Overtaking distance by hour', fontsize=22, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
ax.text(x=0.5, y=1.08, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(counts.sum())+', 2022-11-07)', fontsize=12, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes)
#fig.show()
#fig.savefig("OBS_analysis_osm_maxspeed_boxplot.png", dpi=300)
# ### by weekday
# In[38]:
gdf['weekday'] = gdf['time_berlin'].dt.day_name()
def weekday_sorter(column):
"""Sort function"""
order_wd=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
correspondence = {o: order for order, o in enumerate(order_wd)}
return column.map(correspondence)
gdf[gdf.portal=='bbb'].groupby(['weekday'])['distance_overtaker'].count().reset_index().sort_values(by='weekday', key=weekday_sorter).plot(kind='bar', title='Brandenburg', x='weekday', legend=None)
# In[39]:
gdf['weekday'] = gdf['time_berlin'].dt.day_name()
def weekday_sorter(column):
"""Sort function"""
order_wd=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
correspondence = {o: order for order, o in enumerate(order_wd)}
return column.map(correspondence)
gdf.groupby(['weekday'])['distance_overtaker'].count().reset_index().sort_values(by='weekday', key=weekday_sorter).plot(kind='bar', title='Brandenburg', x='weekday', legend=None)
# In[40]:
data_c_plot=gdf
for col in data_c_plot.dtypes.loc[lambda x: x == 'category'].index:
data_c_plot[col] = data_c_plot[col].cat.remove_unused_categories()
x_ax='weekday'
x_ax_ordered_str=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
fig, ax =plt.subplots(
figsize=(18,10),
)
#### add boxplot left
#ax1= sns.boxplot(
# data=data_c_plot, x=x_ax, y="distance_overtaker",
# showcaps=False,
# flierprops={"marker": "x"},
# order = x_ax_ordered_str,
# boxprops={"facecolor": (.4, .6, .8, .5)},
# medianprops={"color": "k"},
# ax=ax)
ax3 = sns.stripplot(data=data_c_plot, x=x_ax, y="distance_overtaker",color="grey", alpha=0.3, ax=ax, zorder=1)
### add labels to ax1
# counts on top
counts = data_c_plot.groupby([x_ax])['distance_overtaker'].count()#.astype(int)
counts=counts.reindex(x_ax_ordered_str)
#vertical_offset = 0.99 # offset from median for display
for xtick in ax3.get_xticks():
ax3.text(xtick,298,"n=" +str(counts[xtick]),
horizontalalignment='center',
size='12',
color='k',
weight='semibold',
bbox=dict(facecolor='w'))
### add boxplot right
ax2= sns.barplot(
data=data_c_plot, x=x_ax, y="distance_overtaker",
#palette="Blues",
order = x_ax_ordered_str,
alpha=0.0,
capsize=.1, n_boot=1000, ci=95,
errcolor= 'red', #errcolor='.26' =
errwidth=0.7,
ax=ax)
#ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30)
#ax.set_xticklabels(ax2.get_xmajorticklabels(), rotation=30)
ax.set(title='Jitterplot with 95% CI')
ax.set_ylabel('Distance overtaker [cm]')
ax.set_xlabel('')
#add overall title
ax.text(x=0.5, y=1.13, s='Overtaking distance by weekday', fontsize=22, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
ax.text(x=0.5, y=1.08, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(counts.sum())+', 2022-11-07)', fontsize=12, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes)
#fig.show()
#fig.savefig("OBS_analysis_osm_maxspeed_boxplot.png", dpi=300)
# In[41]:
data_c_plot=gdf
for col in data_c_plot.dtypes.loc[lambda x: x == 'category'].index:
data_c_plot[col] = data_c_plot[col].cat.remove_unused_categories()
x_ax='weekday'
x_ax_ordered_str=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
fig, ax =plt.subplots(
figsize=(18,10),
)
### add boxplot left
ax1= sns.boxplot(
data=data_c_plot, x=x_ax, y="distance_overtaker",
showcaps=False,
flierprops={"marker": "x"},
order = x_ax_ordered_str,
boxprops={"facecolor": (.4, .6, .8, .5)},
medianprops={"color": "k"},
ax=ax)
### add labels to ax1
# medians
medians = data_c_plot.groupby([x_ax])['distance_overtaker'].median()
medians=medians.reindex(x_ax_ordered_str)
vertical_offset = data_c_plot['distance_overtaker'].median() * 0.02 # offset from median for display
for xtick in ax1.get_xticks():
ax1.text(xtick+0.25,medians[xtick] + vertical_offset, int(medians[xtick]),
horizontalalignment='center',
size='small',
color='k',
weight='semibold')
# counts on top
counts = data_c_plot.groupby([x_ax])['distance_overtaker'].count()#.astype(int)
counts=counts.reindex(x_ax_ordered_str)
#vertical_offset = 0.99 # offset from median for display
for xtick in ax1.get_xticks():
ax1.text(xtick,298,"n=" +str(counts[xtick]),
horizontalalignment='center',
size='12',
color='k',
weight='semibold',
bbox=dict(facecolor='w'))
### add boxplot right
ax2= sns.barplot(
data=data_c_plot, x=x_ax, y="distance_overtaker",
#palette="Blues",
order = x_ax_ordered_str,
alpha=0.0,
capsize=.1, n_boot=1000, ci=95,
errcolor= 'red', #errcolor='.26' =
errwidth=0.7,
ax=ax)
#ax.set_xticklabels(ax1.get_xmajorticklabels(), rotation=30)
#ax.set_xticklabels(ax2.get_xmajorticklabels(), rotation=30)
ax1.set(title='Boxplot with 95% CI')
ax1.set_ylabel('Distance overtaker [cm]')
ax1.set_xlabel('')
#add overall title
ax.text(x=0.5, y=1.13, s='Overtaking distance by weekday', fontsize=22, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
ax.text(x=0.5, y=1.08, s="OBS-Data: ADFC Brandenburg, Radentscheid Essen, ADFC BW, OBS-Portal, \n ADFC Darmstadt, ADFC Osnabrück: (n=" + str(counts.sum())+', 2022-11-07)', fontsize=12, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes)
#fig.show()
#fig.savefig("OBS_analysis_osm_maxspeed_boxplot.png", dpi=300)