#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import numpy as np import seaborn as sns import matplotlib import matplotlib.pyplot as plt # In[2]: pd.options.mode.chained_assignment = 'raise' # pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 2000) # Display full output/results of a cell, not just the last one. from IPython.core.interactiveshell import InteractiveShell InteractiveShell.ast_node_interactivity = "all" # InteractiveShell.ast_node_interactivity = "last" # reset # In[3]: df_timings = pd.read_csv('../data/synth_100.csv') # testdata, synth_100 df_timings.shape # In[4]: df_timings.head() # Aborted respondents # In[5]: df_timings['lastpage'].describe() # In[6]: df_timings['lastpage'].max() # In[7]: # df_timings['abortpage'] = 0 # value for non-aborted respondents df_timings['abortpage'] = df_timings['lastpage'].mask(df_timings['lastpage'] == df_timings['lastpage'].max(), np.nan) df_timings #['abortpage'] # Fraction of aborted respondents # In[8]: df_timings['abortpage'].notna().sum() / df_timings.shape[0] # Fraction of participants who have not aborted at the respective question # In[9]: fig, ax = plt.subplots() fig = sns.ecdfplot(data=df_timings, x="lastpage", complementary=True, ax=ax) ax.set_xlim(0, df_timings['lastpage'].max()) plt.xlabel("Question page") plt.ylabel("Participants share") plt.show() # Number of aborted responses per question (note binning if used) # # This histogram can be overlayed with the time spent per question to see correlations. # In[10]: sns.histplot(data=df_timings, x="abortpage", binwidth=1) plt.xlabel("Question page") plt.ylabel("No. of aborts at page") plt.show() # In[ ]: