# Run this first so it's ready by the time you need it install.packages("supernova") install.packages("dplyr") install.packages("ggformula") library(supernova) library(dplyr) library(ggformula) GSS <- read.csv("https://raw.githubusercontent.com/smburns47/Psyc158/main/GSS.csv") #smaller dataset, more similar in size to most psychology studies set.seed(10) GSS_subset <- slice_sample(GSS, n=100) model_obj <- lm(highest_year_of_school_completed ~ highest_year_school_completed_mother + highest_year_school_completed_father, data = GSS_subset) summary(model_obj) model_summary <- summary(model_obj) model_summary$r.squared supernova(model_obj) #creating an empty vector of 1000 spots null_R2 <- vector(length=1000) #generate 1000 unique samples, saving each R2 for (i in 1:1000) { GSS_subset$shuffled_mother <- sample(x=GSS_subset$highest_year_school_completed_mother, size=length(GSS_subset$highest_year_school_completed_mother), replace=FALSE) GSS_subset$shuffled_father <- sample(x=GSS_subset$highest_year_school_completed_father, size=length(GSS_subset$highest_year_school_completed_father), replace=FALSE) model <- lm(highest_year_of_school_completed ~ shuffled_mother + shuffled_father, data=GSS_subset) null_R2[i] <- summary(model)$r.squared } R2_df <- data.frame(null_R2) gf_histogram( ~ null_R2, data=R2_df) #creating an empty vector of 1000 spots null_b1 <- vector(length=1000) #generate 1000 unique samples, saving each b1 for (i in 1:1000) { GSS_subset$shuffled_mother <- sample(x=GSS_subset$highest_year_school_completed_mother, size=length(GSS_subset$highest_year_school_completed_mother), replace=FALSE) model <- lm(highest_year_of_school_completed ~ shuffled_mother, data=GSS_subset) null_b1[i] <- model$coefficients[[2]] } b1_df <- data.frame(null_b1) #cut-off values for extremeness b1s_sd <- sd(b1_df$null_b1) high_cutoff <- sd(b1_df$null_b1)*1.96 low_cutoff <- sd(b1_df$null_b1)*-1.96 #marking something as extreme if it is greater than 1.96*sd or less than -1.96*sd b1_df$extreme <- b1_df$null_b1 > high_cutoff | b1_df$null_b1 < low_cutoff gf_histogram(~ null_b1, data = b1_df, fill = ~extreme) simple_model <- lm(highest_year_of_school_completed ~ highest_year_school_completed_mother, data=GSS_subset) b1 <- simple_model$coefficients[[2]] quantile(null_b1, 0.025) quantile(null_b1, 0.975) b1 b1 > quantile(null_b1, 0.975) | b1 < quantile(null_b1, 0.025) full_model <- lm(highest_year_of_school_completed ~ highest_year_school_completed_mother + highest_year_school_completed_father, data = GSS_subset) R2 <- summary(full_model)$r.squared quantile(null_R2, 0.95) R2 R2 > quantile(null_R2, 0.95) summary(model_obj) #Fit a linear model predicting highest_year_of_school_completed with just highest_year_school_completed_father #in GSS_subset father_model <- #YOUR CODE HERE summary(father_model) #Fit a linear model predicting highest_year_of_school_completed with highest_year_school_completed_father # and respondents_sex full_model <- #YOUR CODE HERE summary(full_model) #R-squared model_summary$r.squared #adjusted R-quares model_summary$adj.r.squared install.packages('performance') library(performance) test_performance(father_model, full_model)