Descriptive statistics¶

get overall info about participants
assess normality of response times (RTs)

In [6]:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pingouin as pg
import os

alpha = 0.05

dt = pd.read_csv(os.path.join("..", "data", "data.csv"), index_col=0)
agg = pd.read_csv(os.path.join("..", "data", "data_aggregated.csv"), index_col=0)
aggwo = pd.read_csv(os.path.join("..", "data", "data_outliers-replaced_aggregated.csv"), index_col=0)

print("Loading:", len(d), "lines", len(d["ParticipantID"].unique()), "participants")

Loading: 15768 lines 219 participants

Overall info about participants¶

In [7]:

TPP = 72  # there are 72 trials in a session/participant
TPT = 36  # there are 36 trials in a part/test
TPL = 20  # there are 20 trials in a lexical test


print("Number of participants:")
display(pd.DataFrame(d["StudyID"].value_counts() / TPP))

print()
print("Fluent vs. non-fluent:")
display(pd.DataFrame(d.groupby("StudyID")["Fluent"].value_counts() / TPP))

print()
print()
print("Different kinds of designers:")

display(pd.DataFrame(d.groupby("StudyID")["Training"].value_counts() / TPP))

print()
print()
print("Different kinds of designers and which font was first:")
for sid in [1, 2]:
    print("Study #%s" % sid)
    dtt = pd.DataFrame(columns=["Designer", "Non-designer", "total"], index=["sansforgetica", "arial", "total"])
    dtt["Designer"] = dt[(dt["StudyID"] == sid) & (dt["TestID"] == 1) & (dt["Type"] == "lexical") & (dt["Training"] != "Non-designer")]["Font"].value_counts()
    dtt["Non-designer"] = dt[(dt["StudyID"] == sid) & (dt["TestID"] == 1) & (dt["Type"] == "lexical") & (dt["Training"] == "Non-designer")]["Font"].value_counts()
    dtt /=  TPL
    dtt["total"] = dtt.T.sum()
    dtt.loc["total"] = dtt.sum()
    display(dtt)

print()
print()
print("JoM for categories of training:")
print()
display(pd.DataFrame(d.groupby(["StudyID", "Training"])["JoM"].mean()))
print()
print()
print("JoL for categories of training:")
print()
display(pd.DataFrame(d.groupby(["StudyID", "Training"])["JoL"].value_counts() / TPT))

Number of participants:

	StudyID
2	122.0
1	97.0

Fluent vs. non-fluent:

		Fluent
StudyID	Fluent
1	True	89.0
1	False	8.0
2	True	106.0
2	False	16.0


Different kinds of designers:

		Training
StudyID	Training
1	Non-designer	44.0
	Letter designer	25.0
	Graphic designer	17.0
	Other designer	6.0
	Typographer	5.0
2	Non-designer	59.0
	Graphic designer	28.0
	Letter designer	16.0
	Other designer	12.0
	Typographer	7.0


Different kinds of designers and which font was first:
Study #1

	Designer	Non-designer	total
sansforgetica	26.0	21.0	47.0
arial	27.0	23.0	50.0
total	53.0	44.0	97.0

Study #2

	Designer	Non-designer	total
sansforgetica	31.0	29.0	60.0
arial	32.0	30.0	62.0
total	63.0	59.0	122.0


JoM for categories of training:

		JoM
StudyID	Training
1	Graphic designer	56.735294
	Letter designer	55.160000
	Non-designer	53.977273
	Other designer	49.166667
	Typographer	61.000000
2	Graphic designer	53.875000
	Letter designer	59.562500
	Non-designer	52.237288
	Other designer	53.083333
	Typographer	59.000000


JoL for categories of training:

			JoL
StudyID	Training	JoL
1	Graphic designer	very easy to read	13.0
		ok	10.0
		easy to read	5.0
		difficult to read	4.0
		very difficult to read	2.0
	Letter designer	very easy to read	14.0
		difficult to read	13.0
		ok	11.0
		easy to read	9.0
		very difficult to read	3.0
	Non-designer	very easy to read	38.0
		ok	22.0
		difficult to read	14.0
		easy to read	12.0
		very difficult to read	2.0
	Other designer	very easy to read	5.0
		ok	4.0
		difficult to read	2.0
		very difficult to read	1.0
	Typographer	very easy to read	4.0
		difficult to read	3.0
		ok	2.0
		easy to read	1.0
2	Graphic designer	very easy to read	23.0
		difficult to read	12.0
		ok	11.0
		easy to read	10.0
	Letter designer	difficult to read	13.0
		easy to read	7.0
		very easy to read	7.0
		ok	4.0
		very difficult to read	1.0
	Non-designer	very easy to read	51.0
		ok	26.0
		difficult to read	18.0
		easy to read	17.0
		very difficult to read	6.0
	Other designer	very easy to read	8.0
		difficult to read	6.0
		easy to read	5.0
		ok	4.0
		very difficult to read	1.0
	Typographer	ok	6.0
		very easy to read	5.0
		easy to read	3.0

Assess normality of RTs¶

The distributions of RTs are not normal, but close enough.

In [9]:

# assess normality of RTs

fig, axes = plt.subplots(2, 4, figsize=(20, 10))
plt.subplots_adjust(wspace=0, hspace=0.2)
for i, ttype in enumerate(["lexical", "recognition"]):
    rts = agg[agg["Type"] == ttype]["RT"]
    rts.plot.hist(bins=50, ax=axes[i][0], sharey=True, title="%s" % ttype)
    rts = agg[agg["Type"] == ttype]["RTnorm"]
    rts.plot.hist(bins=50, ax=axes[i][1], sharey=True, title="%s (normalized)" % ttype)
    
    rts = aggwo[aggwo["Type"] == ttype]["RT"]
    rts.plot.hist(bins=50, ax=axes[i][2], sharey=True, title="%s (outliers replaced)" % ttype)
    rts = aggwo[aggwo["Type"] == ttype]["RTnorm"]
    rts.plot.hist(bins=50, ax=axes[i][3], sharey=True, title="%s (outliers replaced, normalized)" % ttype)
    
# test for normality
# null hypothesis: RTs come from a normal distribution
for ttype in ["lexical", "recognition"]:
    for col in ["RTnorm"]:
        print("Normality test for %ss in %s task" % (col, ttype))
        display(pg.normality(agg[col]))
        print("Normality test for %ss in %s task (outliers replaced)" % (col, ttype))
        display(pg.normality(aggwo[col]))

Normality test for RTnorms in lexical task

	W	pval	normal
RTnorm	0.95118	1.909781e-16	False

Normality test for RTnorms in lexical task (outliers replaced)

	W	pval	normal
RTnorm	0.949008	7.829168e-17	False

Normality test for RTnorms in recognition task

	W	pval	normal
RTnorm	0.95118	1.909781e-16	False

Normality test for RTnorms in recognition task (outliers replaced)

	W	pval	normal
RTnorm	0.949008	7.829168e-17	False

In [10]:

# Q-Q plots

for ttype in ["lexical", "recognition"]:
    for col in ["RTnorm"]:
        print("Q-Q plot for %s task" % ttype)
        pg.qqplot(aggwo["RTnorm"], dist="norm")

Q-Q plot for lexical task
Q-Q plot for recognition task

In [ ]: