require(ggplot2) creativity = read.csv('https://raw.githubusercontent.com/StanfordStatistics/stats191-data/main/Sleuth3/creativity.csv', header=TRUE) salaries = read.csv('https://raw.githubusercontent.com/StanfordStatistics/stats191-data/main/Sleuth3/salaries.csv', header=TRUE) set.seed(0) head(creativity) extrinsic = creativity$Score[creativity$Treatment == 'Extrinsic'] summary(extrinsic) intrinsic = creativity$Score[creativity$Treatment == 'Intrinsic'] summary(intrinsic) # this plot is for visualization only # students not expected to reproduce fig <- (ggplot(creativity, aes(x=Score, fill=Treatment)) + geom_histogram(aes(y=after_stat(density)), color="#e9ecef", alpha=0.6, position='identity', bins=10) + labs(fill="")) fig head(salaries) female = salaries$Salary[salaries$Sex == 'Female'] summary(female) male = salaries$Salary[salaries$Sex == 'Male'] summary(male) # this plot is for visualization only # students not expected to reproduce fig <- (ggplot(salaries, aes(x=Salary, fill=Sex)) + geom_histogram(aes(y=after_stat(density)), color="#e9ecef", alpha=0.6, position='identity', bins=10) + labs(fill="")) fig boxplot(Salary ~ Sex, data=salaries, col='orange') estimates = t.test(Score ~ Treatment, data=creativity)$estimate estimates treatment_effect = treatment_effect=estimates[2] - estimates[1] treatment_effect null_treatment = sample(creativity$Treatment, 47, replace=FALSE) null_data = data.frame(Score=creativity$Score, Treatment=null_treatment) null_estimates = t.test(Score ~ Treatment, data=null_data)$estimate null_estimates null_treatment_effect = null_estimates[2] - null_estimates[1] null_treatment_effect # this code/plot is for visualization only # students not expected to reproduce estimates = t.test(Score ~ Treatment, data=creativity)$estimate treatment_effect = treatment_effect=estimates[2] - estimates[1] null_treatment_effect = rep(NA, 10000) for (i in 1:10000) { null_treatment = sample(creativity$Treatment, 47, replace=FALSE) null_data = data.frame(Score=creativity$Score, Treatment=null_treatment) null_estimates = t.test(Score ~ Treatment, data=null_data)$estimate null_treatment_effect[i] = null_estimates[2] - null_estimates[1] } treatment_effect = data.frame(treatment_effect=treatment_effect) fig <- (ggplot(data.frame(null_treatment_effect), aes(x=null_treatment_effect)) + geom_histogram(aes(y=after_stat(density)), color="#e9ecef", alpha=0.6, bins=30) + geom_vline(aes(xintercept=treatment_effect), treatment_effect, color='red', linewidth=2) + geom_vline(aes(xintercept=-treatment_effect), treatment_effect, color='red', alpha=0.5, linewidth=1) + geom_density() + labs(fill="")) fig treatment_effect = treatment_effect[1,] length(null_treatment_effect) p_value = mean(abs(null_treatment_effect) > treatment_effect) p_value sex_estimates = t.test(Salary ~ Sex, data=salaries)$estimate sex_effect = sex_estimates[2] - sex_estimates[1] sex_effect # this code/plot is for visualization only # students not expected to reproduce sex_estimates = t.test(Salary ~ Sex, data=salaries)$estimate sex_effect = sex_estimates[2] - sex_estimates[1] null_sex_effect = rep(NA, 10000) for (i in 1:10000) { null_sex = sample(salaries$Sex, length(salaries$Sex), replace=FALSE) null_data = data.frame(Salary=salaries$Salary, Sex=null_sex) null_estimates = t.test(Salary ~ Sex, data=null_data)$estimate null_sex_effect[i] = null_estimates[2] - null_estimates[1] } sex_effect = data.frame(sex_effect=sex_effect) fig <- (ggplot(data.frame(null_sex_effect), aes(x=null_sex_effect)) + geom_histogram(aes(y=after_stat(density)), color="#e9ecef", alpha=0.6, bins=30) + geom_vline(aes(xintercept=sex_effect), sex_effect, color='red', linewidth=2) + geom_vline(aes(xintercept=-sex_effect), sex_effect, color='red', alpha=0.5, linewidth=1) + geom_density() + labs(fill="")) fig sex_effect = sex_effect[1,] length(null_sex_effect) p_value = mean(abs(null_sex_effect) > sex_effect) p_value