#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') get_ipython().run_line_magic('config', "InlineBackend.figure_format = 'retina'") from matplotlib import pyplot as plt import pandas as pd # In[2]: churn_data = pd.read_csv('https://raw.githubusercontent.com/' 'treselle-systems/customer_churn_analysis/' 'master/WA_Fn-UseC_-Telco-Customer-Churn.csv') churn_data.head() # In[3]: churn_data = churn_data.set_index('customerID') churn_data = churn_data.drop(['TotalCharges'], axis=1) # The dataset is naturally heirarchical: some columns only apply to some users. Ex, if you don't have internet # then the column OnlineBackup isn't applicable, as it's value is "No internet service". We # are going to map this back to No. We will treat the hierachical nature by stratifying on the # different services a user may have. churn_data = churn_data.applymap(lambda x: "No" if str(x).startswith("No ") else x) strata_cols = ['InternetService', 'StreamingMovies', 'StreamingTV', 'PhoneService'] df = pd.get_dummies(churn_data, columns=churn_data.columns.difference(strata_cols + ['tenure', 'MonthlyCharges']), drop_first=True) # In[4]: from lifelines import CoxPHFitter cph = CoxPHFitter().fit(df, 'tenure', 'Churn_Yes', strata=strata_cols) # In[5]: cph # In[17]: cph.print_summary() # In[18]: ax = plt.subplots(figsize=(8, 6)) cph.plot(ax=ax[1]) # In[19]: cph.plot_covariate_groups('Contract_Two year', values=[0, 1]); # In[ ]: