Permutation Tests¶

In [1]:

# boilerplate
%matplotlib inline
import math
import numpy as np
import scipy as sp
import pandas as pd
from scipy import stats  # distributions
from scipy import special # special functions
from scipy import random # random variables, distributions, etc.
from scipy.optimize import brentq
from scipy.stats import (binom, hypergeom)
import matplotlib.pyplot as plt
from ipywidgets import widgets

khazan_fn = './Data/khazanEtal20.csv'

In [2]:

df = pd.read_csv(khazan_fn)
df.head()

Out[2]:

	ID	Class	Gender	Age	number.online.courses.taken	TA	Question	Likert.Score	Theme	ScoreNegPos
0	1	3	female	21	3	Jesse	Facilitated.learning	4	teaching	1
1	1	3	female	21	3	Jesse	Provided.helpful.feedback	5	teaching	2
2	1	3	female	21	3	Jesse	Is.an.expert	4	knowledge	1
3	1	3	female	21	3	Jesse	Graded.in.a.timely.manner	5	professional	2
4	1	3	female	21	3	Jesse	Graded.Fairly	5	teaching	2

In [3]:

qs = df['Question'].unique()

In [4]:

for q in qs:
    mask = df['Question'] == q
    df.loc[mask,q] = df[mask]['Likert.Score']

In [5]:

df = df.drop(columns=['Class','Age','number.online.courses.taken','Question','Likert.Score',\
         'Theme','ScoreNegPos'])
df = df.set_index('ID')
df['Did.NOT.respond.to.email.promptly'] = -df['Did.NOT.respond.to.email.promptly']

In [6]:

df.head()

Out[6]:

	Gender	TA	Facilitated.learning	Provided.helpful.feedback	Is.an.expert	Graded.in.a.timely.manner	Graded.Fairly	Did.NOT.respond.to.email.promptly	Knowledgable.of.course.content	Helpful.feedback.vias.Canvas.discussion	Consistently.fulfilled.responsibilities	Considerate.in.communication	Treated.me.with.respect	Enthusiastic	Professional	TA.again
ID
1	female	Jesse	4.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	female	Jesse	NaN	5.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	female	Jesse	NaN	NaN	4.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	female	Jesse	NaN	NaN	NaN	5.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	female	Jesse	NaN	NaN	NaN	NaN	5.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

In [7]:

agg_dict = {'TA': 'first'}
for q in qs:
    agg_dict[q] = np.nansum
scores = df.groupby('ID').agg(agg_dict).reset_index()
scores.head()

Out[7]:

	ID	TA	Facilitated.learning	Provided.helpful.feedback	Is.an.expert	Graded.in.a.timely.manner	Graded.Fairly	Did.NOT.respond.to.email.promptly	Knowledgable.of.course.content	Helpful.feedback.vias.Canvas.discussion	Consistently.fulfilled.responsibilities	Considerate.in.communication	Treated.me.with.respect	Enthusiastic	Professional	TA.again
0	1	Jesse	4.0	5.0	4.0	5.0	5.0	-5.0	5.0	5.0	5.0	5.0	5.0	5.0	5.0	4.0
1	2	Jesse	4.0	5.0	5.0	4.0	4.0	-2.0	4.0	3.0	5.0	5.0	5.0	5.0	5.0	5.0
2	3	Jesse	5.0	5.0	5.0	5.0	5.0	-5.0	5.0	5.0	5.0	5.0	5.0	5.0	5.0	5.0
3	4	Jesse	4.0	4.0	4.0	4.0	4.0	-4.0	5.0	4.0	4.0	4.0	5.0	4.0	5.0	4.0
4	5	Emily	4.0	4.0	3.0	4.0	2.0	-4.0	3.0	3.0	5.0	3.0	3.0	3.0	4.0	3.0

In [8]:

mask = (scores['TA'] == 'Jesse')
jesse = scores.loc[mask].copy()
emily = scores.loc[~mask].copy()
emily.head()

Out[8]:

	ID	TA	Facilitated.learning	Provided.helpful.feedback	Is.an.expert	Graded.in.a.timely.manner	Graded.Fairly	Did.NOT.respond.to.email.promptly	Knowledgable.of.course.content	Helpful.feedback.vias.Canvas.discussion	Consistently.fulfilled.responsibilities	Considerate.in.communication	Treated.me.with.respect	Enthusiastic	Professional	TA.again
4	5	Emily	4.0	4.0	3.0	4.0	2.0	-4.0	3.0	3.0	5.0	3.0	3.0	3.0	4.0	3.0
5	6	Emily	3.0	4.0	3.0	4.0	4.0	-3.0	4.0	3.0	4.0	3.0	3.0	3.0	3.0	4.0
6	7	Emily	4.0	5.0	4.0	5.0	3.0	-5.0	4.0	4.0	4.0	4.0	4.0	4.0	5.0	3.0
10	11	Emily	3.0	3.0	4.0	3.0	2.0	-3.0	3.0	3.0	5.0	5.0	5.0	5.0	3.0	1.0
11	12	Emily	5.0	5.0	5.0	5.0	5.0	-5.0	5.0	5.0	5.0	5.0	5.0	5.0	5.0	5.0

In [9]:

# there were 11 nonresponders for "Emily" and 10 for "Jesse"
emily_missing = {'ID':0, 'TA':'Emily' }
jesse_missing = {'ID':0, 'TA':'Jesse' }
for q in qs:
    emily_missing[q] = np.nan
    jesse_missing[q] = np.nan
for i in range(11):
    emily = emily.append(emily_missing, ignore_index=True)
for i in range(10):
    jesse = jesse.append(jesse_missing, ignore_index=True)
print(emily.describe(), jesse.describe())

               ID  Facilitated.learning  Provided.helpful.feedback  \
count   66.000000             55.000000                  55.000000   
mean    48.878788              4.018182                   4.200000   
std     35.912507              1.146507                   1.128749   
min      0.000000              1.000000                   1.000000   
25%     13.750000              3.500000                   4.000000   
50%     53.000000              4.000000                   5.000000   
75%     76.250000              5.000000                   5.000000   
max    111.000000              5.000000                   5.000000   

       Is.an.expert  Graded.in.a.timely.manner  Graded.Fairly  \
count     55.000000                  55.000000      55.000000   
mean       4.018182                   4.309091       3.981818   
std        1.113734                   0.920401       1.146507   
min        1.000000                   1.000000       1.000000   
25%        3.500000                   4.000000       3.500000   
50%        4.000000                   5.000000       4.000000   
75%        5.000000                   5.000000       5.000000   
max        5.000000                   5.000000       5.000000   

       Did.NOT.respond.to.email.promptly  Knowledgable.of.course.content  \
count                          55.000000                       55.000000   
mean                           -3.909091                        4.400000   
std                             1.251262                        0.852013   
min                            -5.000000                        1.000000   
25%                            -5.000000                        4.000000   
50%                            -4.000000                        5.000000   
75%                            -3.000000                        5.000000   
max                            -1.000000                        5.000000   

       Helpful.feedback.vias.Canvas.discussion  \
count                                55.000000   
mean                                  4.000000   
std                                   1.122167   
min                                   1.000000   
25%                                   3.000000   
50%                                   4.000000   
75%                                   5.000000   
max                                   5.000000   

       Consistently.fulfilled.responsibilities  Considerate.in.communication  \
count                                55.000000                     55.000000   
mean                                  4.600000                      4.509091   
std                                   0.735351                      0.857920   
min                                   2.000000                      1.000000   
25%                                   4.000000                      4.000000   
50%                                   5.000000                      5.000000   
75%                                   5.000000                      5.000000   
max                                   5.000000                      5.000000   

       Treated.me.with.respect  Enthusiastic  Professional   TA.again  
count                55.000000     55.000000     55.000000  55.000000  
mean                  4.563636      4.309091      4.600000   3.800000  
std                   0.787956      1.034099      0.807373   1.176939  
min                   2.000000      1.000000      2.000000   1.000000  
25%                   4.000000      4.000000      5.000000   3.000000  
50%                   5.000000      5.000000      5.000000   4.000000  
75%                   5.000000      5.000000      5.000000   5.000000  
max                   5.000000      5.000000      5.000000   5.000000                  ID  Facilitated.learning  Provided.helpful.feedback  \
count   70.000000             60.000000                  60.000000   
mean    49.200000              3.966667                   4.266667   
std     38.566938              0.882344                   1.006195   
min      0.000000              1.000000                   1.000000   
25%     14.250000              3.750000                   4.000000   
50%     44.500000              4.000000                   5.000000   
75%     83.750000              5.000000                   5.000000   
max    115.000000              5.000000                   5.000000   

       Is.an.expert  Graded.in.a.timely.manner  Graded.Fairly  \
count     60.000000                  60.000000      60.000000   
mean       4.000000                   4.483333       4.066667   
std        0.802538                   0.700887       1.087162   
min        2.000000                   2.000000       1.000000   
25%        3.000000                   4.000000       4.000000   
50%        4.000000                   5.000000       4.000000   
75%        5.000000                   5.000000       5.000000   
max        5.000000                   5.000000       5.000000   

       Did.NOT.respond.to.email.promptly  Knowledgable.of.course.content  \
count                          60.000000                       60.000000   
mean                           -4.066667                        4.366667   
std                             1.205449                        0.735692   
min                            -5.000000                        3.000000   
25%                            -5.000000                        4.000000   
50%                            -5.000000                        5.000000   
75%                            -3.000000                        5.000000   
max                            -1.000000                        5.000000   

       Helpful.feedback.vias.Canvas.discussion  \
count                                60.000000   
mean                                  4.133333   
std                                   0.947193   
min                                   2.000000   
25%                                   3.750000   
50%                                   4.000000   
75%                                   5.000000   
max                                   5.000000   

       Consistently.fulfilled.responsibilities  Considerate.in.communication  \
count                                60.000000                     60.000000   
mean                                  4.533333                      4.533333   
std                                   0.724081                      0.700282   
min                                   2.000000                      2.000000   
25%                                   4.000000                      4.000000   
50%                                   5.000000                      5.000000   
75%                                   5.000000                      5.000000   
max                                   5.000000                      5.000000   

       Treated.me.with.respect  Enthusiastic  Professional   TA.again  
count                60.000000     60.000000     60.000000  60.000000  
mean                  4.666667      4.183333      4.750000   3.950000  
std                   0.680644      0.892372      0.571202   1.048405  
min                   2.000000      2.000000      3.000000   1.000000  
25%                   5.000000      3.000000      5.000000   3.000000  
50%                   5.000000      4.000000      5.000000   4.000000  
75%                   5.000000      5.000000      5.000000   5.000000  
max                   5.000000      5.000000      5.000000   5.000000

In [10]:

from scipy.stats import norm, rankdata
from cryptorandom.sample import random_sample
from cryptorandom.cryptorandom import SHA256
from permute.utils import get_prng, permute
prng = SHA256(1234567890)

def abs_mean_diff(x, y):
    return np.abs(np.nanmean(x)-np.nanmean(y))

def mean_diff(x, y):
    return np.nanmean(x)-np.nanmean(y)

def sim_npc(X, Y, cols, test_fn, combine="fisher", prng=None, reps=int(10**4), verbose=False):
    ts = {}
    tv = {}
    ps = {}
    XY = pd.concat([X,Y])
    nx = len(X[cols[0]])
    n  = len(XY[cols[0]])
    all_i = set(range(n))
    for c in cols:
        ts[c] = test_fn(X[c], Y[c])
        tv[c] = []
    if verbose:
        print('\nn: {} nx: {} ts:{}\n'.format(n, nx, ts))
    for i in range(reps):
        inx = random_sample(n, size=nx, replace=False, prng=prng) 
        iny = list(all_i - set(inx))  
        for c in cols:
            tv[c].append(test_fn(XY.iloc[inx][c], XY.iloc[iny][c]))
        if verbose and i%(int(reps/10)) == 0:
            print(i, [(np.sum(np.array(tv[c]) >= ts[c]) + 1)/(i+2) for c in cols]) 
    for c in cols:
        ps[c] = (np.sum(np.array(tv[c]) >= ts[c]) + 1)/(reps+1) 
    dist = np.array([tv[c] for c in cols]).T
    dist = np.append(dist, np.array([ts[c] for c in cols], ndmin=2), axis=0)
    p = npc(np.array([ps[c] for c in cols]), dist, combine=combine)
    return p, ts, ps

def npc(pvalues, distr, combine="fisher", plus1=True):
    r"""
    Combines p-values from individual partial test hypotheses $H_{0i}$ against
    $H_{1i}$, $i=1,\dots,n$ to test the global null hypothesis
    .. math:: \cap_{i=1}^n H_{0i}
    against the alternative
    .. math:: \cup_{i=1}^n H_{1i}
    using an omnibus test statistic.
    Parameters
    ----------
    pvalues : array_like
        Array of p-values to combine
    distr : array_like
        Array of dimension [B, n] where B is the number of permutations and n is
        the number of partial hypothesis tests. The $i$th column of distr contains
        the simulated null distribution of the $i$th test statistic under $H_{0i}$.
    combine : {'fisher', 'liptak', 'tippett'} or function
        The combining function to use. Default is "fisher".
        Valid combining functions must take in p-values as their argument and be
        monotonically decreasing in each p-value.
    plus1 : bool
        flag for whether to add 1 to the numerator and denominator of the
        p-value based on the empirical permutation distribution. 
        Default is True.
    Returns
    -------
    float
        A single p-value for the global test
    """
    n = len(pvalues)
    B = distr.shape[0]
    if n < 2:
        raise ValueError("One p-value: nothing to combine!")
    if n != distr.shape[1]:
        raise ValueError("Mismatch in number of p-values and size of distr")

    combine_library = {
        "fisher": fisher,
        "liptak": liptak,
        "tippett": tippett
    }
    if callable(combine):
        if not check_combfunc_monotonic(pvalues, combine):
            raise ValueError(
                "Bad combining function: must be monotonically decreasing in each p-value")
        combine_func = combine
    else:
        combine_func = combine_library[combine]

    # Convert test statistic distribution to p-values
    combined_stat_distr = [0] * B
    pvalues_from_distr = np.zeros((B, n))
    for j in range(n):
        pvalues_from_distr[:, j] = 1 - rankdata(distr[:, j], method="min")/(plus1+B) + (1 + plus1)/(plus1+B)
    if combine == "liptak":
        toobig = np.where(pvalues_from_distr >= 1)
        pvalues_from_distr[toobig] = 1 - np.finfo(float).eps
    combined_stat_distr = np.apply_along_axis(
        combine_func, 1, pvalues_from_distr)

    observed_combined_stat = combine_func(pvalues)
    return (plus1 + np.sum(combined_stat_distr >= observed_combined_stat)) / (plus1+B)

def fisher(pvalues):
    r"""
    Apply Fisher's combining function
    .. math:: -2 \sum_i \log(p_i)
    Parameters
    ----------
    pvalues : array_like
        Array of p-values to combine
    Returns
    -------
    float
        Fisher's combined test statistic
    """
    return -2*np.log(np.prod(pvalues))

def liptak(pvalues):
    r"""
    Apply Liptak's combining function
    .. math:: \sum_i \Phi^{-1}(1-p_i)
    where $\Phi^{-1}$ is the inverse CDF of the standard normal distribution.
    Parameters
    ----------
    pvalues : array_like
        Array of p-values to combine
    Returns
    -------
    float
        Liptak's combined test statistic
    """
    return np.sum(norm.ppf(1 - pvalues))


def tippett(pvalues):
    r"""
    Apply Tippett's combining function
    .. math:: \max_i \{1-p_i\}
    Parameters
    ----------
    pvalues : array_like
        Array of p-values to combine
    Returns
    -------
    float
        Tippett's combined test statistic
    """
    return np.max(1 - pvalues)

In [23]:

sim_npc(jesse, emily, qs, abs_mean_diff, combine="fisher", prng=prng, \
        reps=int(10**4), verbose=True)

n: 136 nx: 70 ts:{'Facilitated.learning': 0.051515151515151736, 'Provided.helpful.feedback': 0.06666666666666643, 'Is.an.expert': 0.01818181818181852, 'Graded.in.a.timely.manner': 0.17424242424242387, 'Graded.Fairly': 0.0848484848484845, 'Did.NOT.respond.to.email.promptly': 0.15757575757575726, 'Knowledgable.of.course.content': 0.0333333333333341, 'Helpful.feedback.vias.Canvas.discussion': 0.13333333333333375, 'Consistently.fulfilled.responsibilities': 0.06666666666666643, 'Considerate.in.communication': 0.0242424242424244, 'Treated.me.with.respect': 0.10303030303030347, 'Enthusiastic': 0.12575757575757596, 'Professional': 0.15000000000000036, 'TA.again': 0.15000000000000036}

0 [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.5, 0.5, 1.0, 0.5, 0.5, 0.5, 1.0]
1000 [0.8303393213572854, 0.7255489021956087, 0.8892215568862275, 0.27245508982035926, 0.7125748502994012, 0.4540918163672655, 0.8293413173652695, 0.49101796407185627, 0.6307385229540918, 0.8982035928143712, 0.4600798403193613, 0.49001996007984033, 0.27345309381237526, 0.47305389221556887]
2000 [0.8361638361638362, 0.7372627372627373, 0.8901098901098901, 0.26823176823176825, 0.7207792207792207, 0.4515484515484515, 0.8416583416583416, 0.5154845154845155, 0.6213786213786214, 0.8931068931068931, 0.476023976023976, 0.494005994005994, 0.27672327672327673, 0.4825174825174825]
3000 [0.8397734843437709, 0.7368421052631579, 0.8884077281812125, 0.2731512325116589, 0.7301798800799467, 0.4563624250499667, 0.8394403730846103, 0.5163224516988674, 0.6332445036642238, 0.896402398401066, 0.4703530979347102, 0.5036642238507661, 0.2741505662891406, 0.4793471019320453]
4000 [0.8320839580209896, 0.7348825587206397, 0.8858070964517741, 0.276111944027986, 0.7311344327836082, 0.463768115942029, 0.8340829585207397, 0.515992003998001, 0.6326836581709145, 0.8958020989505248, 0.46676661669165415, 0.5002498750624688, 0.26936531734132935, 0.47976011994003]
5000 [0.8310675729708117, 0.7313074770091963, 0.8816473410635746, 0.2720911635345862, 0.727109156337465, 0.469812075169932, 0.8348660535785686, 0.5179928028788484, 0.632546981207517, 0.8968412634946021, 0.468812475009996, 0.49780087964814074, 0.2694922031187525, 0.476609356257497]
6000 [0.8328890369876708, 0.73458847050983, 0.8815394868377208, 0.273408863712096, 0.7240919693435521, 0.47184271909363545, 0.8333888703765412, 0.5178273908697101, 0.6344551816061312, 0.8970343218927024, 0.46984338553815397, 0.5009996667777408, 0.26824391869376873, 0.475674775074975]
7000 [0.8341902313624678, 0.7356469580119965, 0.8797486432447872, 0.268494715795487, 0.7233647529277349, 0.4721508140531277, 0.8314767209368752, 0.5187089403027706, 0.6359611539560126, 0.8968866038274779, 0.4705798343330477, 0.5008568980291346, 0.2707797772065124, 0.4777206512425021]
8000 [0.8359160209947513, 0.7345663584103974, 0.8804048987753061, 0.2693076730817296, 0.725568607848038, 0.47100724818795303, 0.83104223944014, 0.5168707823044238, 0.6369657585603599, 0.8979005248687828, 0.46838290427393153, 0.5029992501874532, 0.26905773556610846, 0.4783804048987753]
9000 [0.8373694734503444, 0.7347256165296601, 0.8813596978449233, 0.2684958898022662, 0.7237280604310153, 0.4723394801155299, 0.8317040657631637, 0.5125527660519884, 0.6385247722728282, 0.8965785381026439, 0.4670073317040658, 0.503332592757165, 0.2707176183070429, 0.47833814707842703]

Out[23]:

(0.9065186962607479,
 {'Facilitated.learning': 0.051515151515151736,
  'Provided.helpful.feedback': 0.06666666666666643,
  'Is.an.expert': 0.01818181818181852,
  'Graded.in.a.timely.manner': 0.17424242424242387,
  'Graded.Fairly': 0.0848484848484845,
  'Did.NOT.respond.to.email.promptly': 0.15757575757575726,
  'Knowledgable.of.course.content': 0.0333333333333341,
  'Helpful.feedback.vias.Canvas.discussion': 0.13333333333333375,
  'Consistently.fulfilled.responsibilities': 0.06666666666666643,
  'Considerate.in.communication': 0.0242424242424244,
  'Treated.me.with.respect': 0.10303030303030347,
  'Enthusiastic': 0.12575757575757596,
  'Professional': 0.15000000000000036,
  'TA.again': 0.15000000000000036},
 {'Facilitated.learning': 0.835016498350165,
  'Provided.helpful.feedback': 0.7364263573642635,
  'Is.an.expert': 0.8813118688131187,
  'Graded.in.a.timely.manner': 0.2696730326967303,
  'Graded.Fairly': 0.7234276572342766,
  'Did.NOT.respond.to.email.promptly': 0.47305269473052697,
  'Knowledgable.of.course.content': 0.833916608339166,
  'Helpful.feedback.vias.Canvas.discussion': 0.5122487751224878,
  'Consistently.fulfilled.responsibilities': 0.6366363363663634,
  'Considerate.in.communication': 0.8963103689631037,
  'Treated.me.with.respect': 0.46935306469353066,
  'Enthusiastic': 0.5017498250174982,
  'Professional': 0.27017298270172985,
  'TA.again': 0.4781521847815218})

In [22]:

sim_npc(jesse, emily, qs, mean_diff, combine="tippett", prng=prng, \
        reps=int(10**4), verbose=True)

n: 136 nx: 66 ts:{'Facilitated.learning': 0.051515151515151736, 'Provided.helpful.feedback': -0.06666666666666643, 'Is.an.expert': 0.01818181818181852, 'Graded.in.a.timely.manner': -0.17424242424242387, 'Graded.Fairly': -0.0848484848484845, 'Did.NOT.respond.to.email.promptly': 0.15757575757575726, 'Knowledgable.of.course.content': 0.0333333333333341, 'Helpful.feedback.vias.Canvas.discussion': -0.13333333333333375, 'Consistently.fulfilled.responsibilities': 0.06666666666666643, 'Considerate.in.communication': -0.0242424242424244, 'Treated.me.with.respect': -0.10303030303030347, 'Enthusiastic': 0.12575757575757596, 'Professional': -0.15000000000000036, 'TA.again': -0.15000000000000036}

0 [0.5, 1.0, 0.5, 1.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1.0, 0.5, 1.0, 0.5]
1000 [0.43812375249500995, 0.6526946107784432, 0.45209580838323354, 0.8792415169660679, 0.6736526946107785, 0.23552894211576847, 0.43313373253493015, 0.7644710578842315, 0.312375249500998, 0.5898203592814372, 0.811377245508982, 0.24251497005988024, 0.9001996007984032, 0.7734530938123753]
2000 [0.4160839160839161, 0.6503496503496503, 0.45204795204795206, 0.8776223776223776, 0.6618381618381618, 0.24225774225774227, 0.42507492507492506, 0.7592407592407593, 0.31218781218781216, 0.583916083916084, 0.7947052947052947, 0.24675324675324675, 0.8881118881118881, 0.7672327672327672]
3000 [0.4100599600266489, 0.6465689540306462, 0.4473684210526316, 0.8704197201865423, 0.6675549633577615, 0.24550299800133243, 0.4177215189873418, 0.7551632245169887, 0.3117921385742838, 0.570286475682878, 0.7924716855429713, 0.24616922051965356, 0.8824117255163224, 0.7674883411059293]
4000 [0.41179410294852575, 0.6476761619190404, 0.4535232383808096, 0.875312343828086, 0.6619190404797601, 0.24387806096951525, 0.4222888555722139, 0.7616191904047976, 0.31284357821089454, 0.5719640179910045, 0.7913543228385808, 0.24587706146926536, 0.8820589705147426, 0.7698650674662668]
5000 [0.41203518592562977, 0.647141143542583, 0.45621751299480207, 0.8744502199120352, 0.6603358656537385, 0.23930427828868453, 0.4226309476209516, 0.7626949220311875, 0.3170731707317073, 0.5727708916433427, 0.789484206317473, 0.24710115953618553, 0.8798480607756898, 0.7704918032786885]
6000 [0.41036321226257916, 0.6429523492169277, 0.4501832722425858, 0.8755414861712763, 0.6531156281239587, 0.2465844718427191, 0.41836054648450516, 0.7582472509163612, 0.31356214595134957, 0.5659780073308897, 0.7845718093968677, 0.2437520826391203, 0.8787070976341219, 0.7665778073975341]
7000 [0.4124535846900885, 0.6452442159383034, 0.45215652670665524, 0.8774635818337618, 0.6552413596115395, 0.24478720365609827, 0.4181662382176521, 0.7587832047986289, 0.31348186232505, 0.572407883461868, 0.7863467580691231, 0.2493573264781491, 0.8803199085975436, 0.7710654098828906]
8000 [0.4157710572356911, 0.6464633841539615, 0.45601099725068733, 0.8806548362909272, 0.6567108222944263, 0.2459385153711572, 0.4202699325168708, 0.7621844538865283, 0.3189202699325169, 0.5738565358660335, 0.7866783304173957, 0.252061984503874, 0.8782804298925269, 0.7736815796050988]
9000 [0.4177960453232615, 0.6467451677405021, 0.4578982448344812, 0.8803599200177739, 0.6587425016662963, 0.24894467896023106, 0.42223950233281493, 0.762497222839369, 0.32015107753832484, 0.5735392135081093, 0.7862697178404799, 0.25261053099311265, 0.8773605865363253, 0.7739391246389691]

Out[22]:

(0.7869426114777045,
 {'Facilitated.learning': 0.051515151515151736,
  'Provided.helpful.feedback': -0.06666666666666643,
  'Is.an.expert': 0.01818181818181852,
  'Graded.in.a.timely.manner': -0.17424242424242387,
  'Graded.Fairly': -0.0848484848484845,
  'Did.NOT.respond.to.email.promptly': 0.15757575757575726,
  'Knowledgable.of.course.content': 0.0333333333333341,
  'Helpful.feedback.vias.Canvas.discussion': -0.13333333333333375,
  'Consistently.fulfilled.responsibilities': 0.06666666666666643,
  'Considerate.in.communication': -0.0242424242424244,
  'Treated.me.with.respect': -0.10303030303030347,
  'Enthusiastic': 0.12575757575757596,
  'Professional': -0.15000000000000036,
  'TA.again': -0.15000000000000036},
 {'Facilitated.learning': 0.41985801419858015,
  'Provided.helpful.feedback': 0.6459354064593541,
  'Is.an.expert': 0.45835416458354167,
  'Graded.in.a.timely.manner': 0.8809119088091191,
  'Graded.Fairly': 0.6594340565943405,
  'Did.NOT.respond.to.email.promptly': 0.24987501249875013,
  'Knowledgable.of.course.content': 0.42455754424557546,
  'Helpful.feedback.vias.Canvas.discussion': 0.7619238076192381,
  'Consistently.fulfilled.responsibilities': 0.32086791320867913,
  'Considerate.in.communication': 0.5740425957404259,
  'Treated.me.with.respect': 0.7851214878512148,
  'Enthusiastic': 0.2521747825217478,
  'Professional': 0.8780121987801219,
  'TA.again': 0.7752224777522247})

In [20]:

from scipy.stats import chi2
chi2.sf(-2*np.log(0.08*1.0e-5),df=4)

Out[20]:

1.203092328742278e-05

In [ ]: