#!/usr/bin/env python # coding: utf-8 # # Event-based analytics # # I work at a startup that sells food products. I need to investigate user behavior for the company's app. # First, I need to study the sales funnel and find out how users reach the purchase stage. # - How many users actually make it to this stage? # - How many get stuck at previous stages? # - Which stages in particular? # # Then, I'll look at the results of an A/A/B test. The designers would like to change the fonts for the entire app, but the managers are afraid the users might find the new design intimidating. They decide to make a decision based on the results of the test. # # The users are split into three groups: two control groups get the old fonts and one test group gets the new ones. I need to find out which set of fonts produces better results. # # Creating two A groups has certain advantages. We can make it a principle that we will only be confident in the accuracy of our testing when the two control groups are similar. If there are significant differences between the A groups, this can help us uncover factors that may be distorting the results. Comparing control groups also tells us how much time and data we'll need when running further tests. # # ## Description of the data # # */datasets/logs_exp_us.csv* # # Each log entry is a user action or an event: # - **EventName** — event name # - **DeviceIDHash** — unique user identifier # - **EventTimestamp** — event time # - **ExpId** — experiment number (246 & 247 are the control groups, 248 is the test group) # ### Step 1: Download data & study general info # #### Import libraries & data # In[1]: # import libraries import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import scipy.stats as st import numpy as np from plotly import graph_objects as go # In[2]: # try-except blocks handle errors that occur from changing file directories try: logs = pd.read_csv('logs_exp_us.csv', sep='\t') except: logs = pd.read_csv('/datasets/logs_exp_us.csv', sep='\t') # #### Study general info # In[3]: # study general info display(logs.info()) display(logs.head()) # ##### Conclusion # Immediately we can see there are a few issues we need to correct: # - 'DeviceIDHash' & 'ExpId' should be changed to *object* dtype # - 'EventTimestamps' should be changed to *datetime* dtype # - We should rename columns to something more intuitive # - We'll add a date & time column for easier analysis in the future # - There are no missing values (no action needed) # ### Step 2: Data preprocessing # #### Changing data types # In[4]: # change 'DeviceIDHash' & 'ExpId' to object logs['DeviceIDHash'] = logs['DeviceIDHash'].astype(str) logs['ExpId'] = logs['ExpId'].astype(str) # In[5]: # change 'EventTimestamp' to datetime logs['EventTimestamp'] = logs['EventTimestamp'].astype('datetime64[s]') # #### Change column names # In[6]: # change column names logs.columns = ['event', 'uid', 'datetime', 'group'] # #### Add date & time columns # # In[7]: # add columns & convert to datetime logs['time'] = pd.to_datetime(logs['datetime'], infer_datetime_format=True).dt.time logs['date'] = pd.to_datetime(logs['datetime'], infer_datetime_format=True).dt.date # In[8]: # verify results display(logs.info()) display(logs.head()) # ### Step 3: Study & analyze the data # #### Number of events & users # In[9]: # count number of events total_events = logs['event'].count() print(f'There were {total_events} total events') # In[10]: # count number of users total_users = logs['uid'].nunique() print(f'There were {total_users} unique users') # In[11]: # average events per user avg_events_per_user = total_events / total_users print(f'Each user triggered an average of {int(avg_events_per_user)} events') # #### Data time period # In[12]: min_datetime = logs['datetime'].min() max_datetime = logs['datetime'].max() print(f'The earliest event was at {min_datetime}') print(f'The latest event was at {max_datetime}') # In[13]: # plot distribution of all events & dates from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() plt.figure(figsize=(8, 5)) plt.hist(logs['datetime'], bins=200) plt.xlabel('Date') plt.ylabel('Count') plt.grid(True) plt.xticks(logs['date'].unique(), rotation=45) plt.show() # There is a very low number of event prior to August 1st. Maybe this was a stage where the app hadn't been fully launched yet or in was still in beta testing. We should ignore data prior to this time period by creating a new filtered DataFrame # In[14]: # new filtered DataFrame filtered_logs = logs[(logs['date'] >= pd.to_datetime('2019-08-01', infer_datetime_format=True))] filtered_logs.head() # In[15]: # plot distribution of filtered events & dates plt.hist(filtered_logs['datetime'], bins=200) plt.xlabel('Date') plt.ylabel('Count') plt.xticks(rotation=45) plt.show() # #### Number of events & users with filtered data # In[16]: # count number of events after filter filtered_total_events = filtered_logs['event'].count() print(f'There were {filtered_total_events} total events after filtering') percent_lost_events = (((total_events - filtered_total_events) / total_events) * 100).round(1) print(f'The total number of events decreased by {percent_lost_events} % after filtering') # In[17]: # count number of users after filtering filtered_total_users = filtered_logs['uid'].nunique() print(f'There were {filtered_total_users} unique users after filtering') percent_lost_users = round((((total_users - filtered_total_users) / total_users) * 100)) print(f'The total number of users decreased by {percent_lost_users} % after filtering') # #### Checking groups after filtering # In[18]: # checking groups in filtered data print(filtered_logs.group.unique()) # We still have users present in all 3 groups after filtering our data to dates prior to August 1st, 2019. We can proceed with the analysis now using filtered data. # ### Step 4: Study the event funnel # #### Frequency of events # In[19]: # sort events by frequency event_counts = filtered_logs.groupby('event')['uid'].count().sort_values(ascending=False) event_counts # #### Events by number of users # In[20]: # calculate number of unique users who triggered each event event_user_counts = filtered_logs.groupby('event')['uid'].nunique().sort_values(ascending=False) event_user_counts # In[21]: # calculate the percentage of total users who triggered each event percent_users_events = (event_user_counts / filtered_total_users) * 100 percent_users_events # #### Sequence of events # It looks like the main sequence is: # 1. MainScreenAppear # 2. OffersScreenAppear # 3. CartScreenAppear # 4. PaymentScreenSuccessful # # The tutorial is most likely optional and could appear just before or after the main screen. We can leave it out of the funnel for now, since it isn't part of the critical sequence of events. # #### Plotting the funnel # In[22]: # plot funnel using plotly library fig = go.Figure(go.Funnel( y = event_user_counts.reset_index()['event'][:4], x = event_user_counts.reset_index()['uid'][:4] )) fig.show() # ##### Conclusion # Plotly makes it easy to plot funnel diagrams, and even does the calculations for you. Here, we can see that initially we had 7419 users who triggered the MainScreenAppear event. # # Of those initial users, roughly 62% made it to the next step which was the offer screen. This represents a 38% loss in users between these two stage, the biggest in the funnel. Maybe we can try to boost conversions at this stage to help the overall conversion rate. # # Of the initial 7419 users, about 48% of them get to the successful payment page. Nearly half of the users become customers. # ### Step 5: Study the test results # #### Number of users in each test group # In[23]: # unique users per test group users_per_group = filtered_logs.groupby('group')['uid'].nunique() users_per_group # #### Statistical significance of differences in conversion rates between control groups # We can write a function that uses a z-test to compare conversion rates at every stage and determine whether or not the differences between samples is statistically significant. The arguments for this function are the two sample groups, a list of events that we want to compare conversion rates for, and the critical significance level for the z-test. The function prints the null and alternative hypotheses and returns a DataFrame summarizing the results for each test. # In[24]: # function computes statistical significance between conversion rates at any given stage in the funnel with z-test def conversions_z_test(sample_A, sample_B, events, alpha): import math event_list = [] sample_a_conv_list = [] sample_b_conv_list = [] conv_diff_list = [] reject_null_list = [] for event in events: # calculate total unique users for both samples total_users_A = sample_A['uid'].nunique() total_users_B = sample_B['uid'].nunique() # calculate unique users per each event for both samples users_per_event_A = sample_A.groupby('event')['uid'].nunique().sort_values(ascending=False) users_per_event_B = sample_B.groupby('event')['uid'].nunique().sort_values(ascending=False) # calculate the percentage of users who triggered each event for both samples percent_users_events_A = users_per_event_A / total_users_A percent_users_events_B = users_per_event_B / total_users_B # run z-test p1 = percent_users_events_A[event] p2 = percent_users_events_B[event] p = (users_per_event_A[event] + users_per_event_B[event]) / (total_users_A + total_users_B) # calculate z for samples z = ((p1 - p2) - 0) / (math.sqrt((p*(1-p))*((1/total_users_A)+(1/total_users_B)))) # get z-score from alpha z_score = st.norm.ppf(1 - alpha) # get results if z >= z_score: reject_null = False else: reject_null = True # append data & results to lists event_list.append(event) sample_a_conv_list.append((percent_users_events_A[event]*100).round(2)) sample_b_conv_list.append((percent_users_events_B[event]*100).round(2)) conv_diff_list = np.array(sample_a_conv_list) - np.array(sample_b_conv_list) reject_null_list.append(reject_null) # create dictionary of results for DataFrame result_dict = {'event': event_list, 'sample_a_conv': sample_a_conv_list, 'sample_b_conv': sample_b_conv_list, 'difference': conv_diff_list, 'reject_null': reject_null_list } # print hypotheses print(f'Null hypothesis:\n The difference in conversion rates between samples is statistically significant') print(f'Alt hypothesis:\n The difference in conversion rates between samples is not statistically significant\n') print('alpha =', alpha) # return results DataFrame return pd.DataFrame(result_dict) # In[25]: # split into groups, set events to compare, & set alpha group_A1 = filtered_logs.query('group =="246"').drop('group', axis=1) group_A2 = filtered_logs.query('group =="247"').drop('group', axis=1) group_B0 = filtered_logs.query('group =="248"').drop('group', axis=1) group_A0 = filtered_logs.query('(group =="246") | (group =="247")') events = percent_users_events.index[:4] alpha = 0.05 # In[26]: # run z-tests comparing conversion rates between control groups for all events conversions_z_test(group_A1, group_A2, events, alpha) # ##### Conclusion # After running our z-test comparing conversion rates for the control samples for each event, we can confirm that the differences in conversion rates were not statistically significant. Each difference at each stage of the funnel resulted in us being able to rejec the null hypothesis in favor of the alternative hypothesis. This is good news and tells us that the A/B test is most likely to be set up properly. # #### Statistical significance of differences in conversion rates between control groups & test group # In[27]: # run z-tests for comparing conversion rates for control group A1 with test group B0 for all events conversions_z_test(group_A1, group_B0, events, alpha) # In[28]: # run z-tests for comparing conversion rates for control group A2 with test group B0 for all events conversions_z_test(group_A2, group_B0, events, alpha) # ##### Conclusion # We have differing results for our test depending on which control group was used. Using group A1 (246) resulted in rejecting the null hypothesis for the conversion rate of the CartScreenAppear event. However, when we used group A2 (247), we failed to reject the null hypothesis. It appears that the difference of 2.49% for this combination of groups was statistically signicant. # #### Statistical significance of differences in conversion rates between combined control group & test group # In[29]: # run z-tests for comparing conversion rates for control group A0 with test group B0 for all events conversions_z_test(group_A0, group_B0, events, alpha) # ##### Conclusion # When comparing combined control group A0 (246 & 247) conversion rates with the test group B0 (248) we are again able to reject the null hypothesis in favor of the alternative hypothesis. The difference in conversion rates between the combined control group and the test group is not significant at any stage of the funnel. # #### Explanation of critical significance level # # For theses tests, we used a critical significance level of 0.05. In total, we carried out 16 tests with 4 different combinations of samples (A1 vs A2, A1 vs B0, A2 vs B0, & A0 vs B0). As the number of tests increases, the probability of one of the results being false also increases. We can counteract this by adjusting our critical significance level. # # The probability of making at least one mistake in the course of k comparisons will be: # # >\begin{align} # \ 1 - (1-\alpha)^k \\ \\ # \end{align} # # We carrier out 16 tests and our alpha was 0.05, so the probability of at least one of the results being incorrect is: # # >\begin{align} # \ 1 - (1-0.05)^{16} \\ \\ # \end{align} # In[30]: # calculate probability of false results p_false = 1 - (1 - alpha) ** 16 print(round(p_false * 100), '%') # This is roughly a 56% chance that at least one of our results was false! That's pretty high. Maybe we should adjust our critical significance level to a lower value in order to decrease the probablility that one of our test results is false. Let's change alpha to 0.01 and see what happens. # In[31]: # calculate probability of false results alpha = 0.01 p_false = 1 - (1 - alpha) ** 16 print(round(p_false * 100), '%') # A 15% chance of there being an error across 16 tests seems a little more reasonable. Now, let's rerun our tests with the new alpha value to see if this made any difference in the results of our 16 z-tests. # In[32]: # A1 vs A2 with new alpha conversions_z_test(group_A1, group_A2, events, alpha) # In[33]: # A1 vs B0 with new alpha conversions_z_test(group_A1, group_B0, events, alpha) # In[34]: # A2 vs B0 with new alpha conversions_z_test(group_A2, group_B0, events, alpha) # In[35]: # A0 vs B0 with new alpha conversions_z_test(group_A0, group_B0, events, alpha) # ##### Conclusion # Changing alpha from 0.05 to 0.01 changed the result of the test between A1 and B0 for the CartScreenEvent. Previously, we were unable to reject the null hypothesis. We could say that the new font made a difference a this stage of the funnel. Now, we can reject the null hypothesis. It's likely that this was a false result that came from an alpha value that was initially too high. # # We can say with certainty now that the new font didn't make a statistically significant difference in conversion rates at any stage of the funnel for any group combination. It's safe to say that changing font size isn't an effective stratgey to increase conversion rates.