In [1]:
import pandas as pd
import numpy as np
In [2]:
np.random.seed(123)

num_users = 10000
treated = np.random.randint(2, size=num_users)
print(treated.mean())
0.4972
In [3]:
convert = np.where(treated==1,
              np.random.choice([0,1], 
                               size=num_users, 
                               p=[0.35,0.65]),
              np.random.choice([0,1], 
                               size=num_users, 
                               p=[0.40,0.60]))
df = pd.DataFrame({'treated':treated,'convert':convert})
df.groupby(['treated'])['convert'].agg(['count','sum','mean'])
Out[3]:
count sum mean
treated
0 5028 3012 0.599045
1 4972 3273 0.658286

Two-sample t-test for difference in proportions¶

In [4]:
from scipy.stats import ttest_ind
In [5]:
treated_group = df[df['treated']==1]['convert']
control_group = df[df['treated']==0]['convert']

tstat, pvalue = ttest_ind(treated_group, control_group)

print("T-stat: {0:.3f}".format(tstat))
print("p-value: {0:.3f}".format(pvalue))
T-stat: 6.141
p-value: 0.000

Linear Regression¶

In [6]:
import statsmodels.formula.api as smf
In [7]:
formula = 'convert ~ treated'
model = smf.ols(formula=formula, data=df).fit()
print(model.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                convert   R-squared:                       0.004
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     37.71
Date:                Sat, 06 Jul 2024   Prob (F-statistic):           8.52e-10
Time:                        18:35:17   Log-Likelihood:                -6897.4
No. Observations:               10000   AIC:                         1.380e+04
Df Residuals:                    9998   BIC:                         1.381e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.5990      0.007     88.064      0.000       0.586       0.612
treated        0.0592      0.010      6.141      0.000       0.040       0.078
==============================================================================
Omnibus:                    43670.760   Durbin-Watson:                   1.998
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1676.229
Skew:                          -0.529   Prob(JB):                         0.00
Kurtosis:                       1.296   Cond. No.                         2.61
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [8]:
model.params['Intercept'] + model.params['treated']
Out[8]:
0.658286403861626
In [ ]: