import pandas as pd
import numpy as np
np.random.seed(123)
num_users = 10000
treated = np.random.randint(2, size=num_users)
print(treated.mean())
0.4972
convert = np.where(treated==1,
np.random.choice([0,1],
size=num_users,
p=[0.35,0.65]),
np.random.choice([0,1],
size=num_users,
p=[0.40,0.60]))
df = pd.DataFrame({'treated':treated,'convert':convert})
df.groupby(['treated'])['convert'].agg(['count','sum','mean'])
count | sum | mean | |
---|---|---|---|
treated | |||
0 | 5028 | 3012 | 0.599045 |
1 | 4972 | 3273 | 0.658286 |
from scipy.stats import ttest_ind
treated_group = df[df['treated']==1]['convert']
control_group = df[df['treated']==0]['convert']
tstat, pvalue = ttest_ind(treated_group, control_group)
print("T-stat: {0:.3f}".format(tstat))
print("p-value: {0:.3f}".format(pvalue))
T-stat: 6.141 p-value: 0.000
import statsmodels.formula.api as smf
formula = 'convert ~ treated'
model = smf.ols(formula=formula, data=df).fit()
print(model.summary())
OLS Regression Results ============================================================================== Dep. Variable: convert R-squared: 0.004 Model: OLS Adj. R-squared: 0.004 Method: Least Squares F-statistic: 37.71 Date: Sat, 06 Jul 2024 Prob (F-statistic): 8.52e-10 Time: 18:35:17 Log-Likelihood: -6897.4 No. Observations: 10000 AIC: 1.380e+04 Df Residuals: 9998 BIC: 1.381e+04 Df Model: 1 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 0.5990 0.007 88.064 0.000 0.586 0.612 treated 0.0592 0.010 6.141 0.000 0.040 0.078 ============================================================================== Omnibus: 43670.760 Durbin-Watson: 1.998 Prob(Omnibus): 0.000 Jarque-Bera (JB): 1676.229 Skew: -0.529 Prob(JB): 0.00 Kurtosis: 1.296 Cond. No. 2.61 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
model.params['Intercept'] + model.params['treated']
0.658286403861626