#!/usr/bin/env python # coding: utf-8 # # Two-way ANOVA # # # In[1]: import matplotlib.pyplot as plt import numpy as np import pandas as pd import seaborn as sns import statsmodels.formula.api as smf # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') get_ipython().run_line_magic('config', "InlineBackend.figure_format = 'retina'") # $\def\stderr#1{\mathbf{se}_{#1}}$ # $\def\stderrhat#1{\hat{\mathbf{se}}_{#1}}$ # $\newcommand{\Mean}{\textbf{Mean}}$ # $\newcommand{\Var}{\textbf{Var}}$ # $\newcommand{\Std}{\textbf{Std}}$ # $\newcommand{\Freq}{\textbf{Freq}}$ # $\newcommand{\RelFreq}{\textbf{RelFreq}}$ # $\newcommand{\DMeans}{\textbf{DMeans}}$ # $\newcommand{\Prop}{\textbf{Prop}}$ # $\newcommand{\DProps}{\textbf{DProps}}$ # ## Definitions # ## Formulas # In[ ]: # ## Example # In[3]: jobsatisfaction = pd.read_csv("../datasets/exercises/jobsatisfaction.csv") jobsatisfaction.head() # In[4]: from statsmodels.graphics.factorplots import interaction_plot js = jobsatisfaction interaction_plot(js["education_level"], js["gender"], js["score"]); # In[5]: lm_2way_anova = smf.ols("score ~ education_level*gender", data=jobsatisfaction).fit() from statsmodels.stats.anova import anova_lm anova_lm(lm_2way_anova, typ="II") # # Same results using R # # ``` # > js = read.csv("/Users/ivan/Projects/Minireference/STATSbook/noBSstatsnotebooks/datasets/exercises/jobsatisfaction.csv") # > two_way_anova <- aov(score ~ gender*education_level, data = js) # > library(car) # > Anova(two_way_anova, type=2) # Anova Table (Type II tests) # # Response: score # Sum Sq Df F value Pr(>F) # gender 0.225 1 0.7447 0.392115 # education_level 113.684 2 187.8921 < 2.2e-16 *** # gender:education_level 4.440 2 7.3379 0.001559 ** # Residuals 15.731 52 # --- # Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # ``` # In[6]: import pingouin as pg pg.anova(jobsatisfaction, dv="score", between=["gender", "education_level"], ss_type=2, detailed=True) # In[ ]: # In[7]: # Interaction term is the same as F-test for submodel with no-interaction term lm_noint = smf.ols("score ~ education_level + gender", data=jobsatisfaction).fit() lm_2way_anova.compare_f_test(lm_noint) # In[ ]: # ## Explanations # In[8]: # SUM coding (requied if want to do type III ANOVA) # lm_2way_anova_sum = smf.ols("score ~ C(education_level,Sum)*C(gender,Sum)", data=jobsatisfaction).fit() # anova_lm(lm_2way_anova_sum, typ="III") # ## Discussion # # # 2-way ANOVA types: https://mcfromnz.wordpress.com/2011/03/02/anova-type-iiiiii-ss-explained/ # # Py from scratch https://www.pybloggers.com/2016/03/three-ways-to-do-a-two-way-anova-with-python/ # # more Py examples https://www.kaggle.com/code/alexmaszanski/two-way-anova-with-python # # same data but gets different results https://rpubs.com/Corvinus/917307 # # https://www.statsmodels.org/dev/examples/notebooks/generated/interactions_anova.html # # In[ ]: