from __future__ import print_function import numpy as np import statsmodels.api as sm from statsmodels.formula.api import ols sm.formula.ols import statsmodels.formula.api as smf sm.OLS.from_formula dta = sm.datasets.get_rdataset("Guerry", "HistData", cache=True) df = dta.data[['Lottery', 'Literacy', 'Wealth', 'Region']].dropna() df.head() mod = ols(formula='Lottery ~ Literacy + Wealth + Region', data=df) res = mod.fit() print(res.summary()) res = ols(formula='Lottery ~ Literacy + Wealth + C(Region)', data=df).fit() print(res.params) res = ols(formula='Lottery ~ Literacy + Wealth + C(Region) -1 ', data=df).fit() print(res.params) res1 = ols(formula='Lottery ~ Literacy : Wealth - 1', data=df).fit() res2 = ols(formula='Lottery ~ Literacy * Wealth - 1', data=df).fit() print(res1.params, '\n') print(res2.params) res = smf.ols(formula='Lottery ~ np.log(Literacy)', data=df).fit() print(res.params) def log_plus_1(x): return np.log(x) + 1. res = smf.ols(formula='Lottery ~ log_plus_1(Literacy)', data=df).fit() print(res.params) import patsy f = 'Lottery ~ Literacy * Wealth' y,X = patsy.dmatrices(f, df, return_type='dataframe') print(y[:5]) print(X[:5]) f = 'Lottery ~ Literacy * Wealth' y,X = patsy.dmatrices(f, df, return_type='dataframe') print(y[:5]) print(X[:5]) print(sm.OLS(y, X).fit().summary())