1 + 4 rnorm(10) install.packages("Hmisc") library(Hmisc) install.packages("doBy") library(doBy) dat_frq <- read.csv("https://raw.githubusercontent.com/OpenActTextDev/ActuarialRegression/refs/heads/main/CourseCSVData/freMTPL2freq.csv") str(dat_frq) dat_frq$X <- NULL summary(dat_frq) head(dat_frq) custom_summary <- function(x) { c( count = sum(!is.na(x)), mean = mean(x, na.rm = TRUE), sd = sd(x, na.rm = TRUE), min = min(x, na.rm = TRUE), `1%` = quantile(x, 0.01, na.rm = TRUE), `50%` = quantile(x, 0.5, na.rm = TRUE), `99.99%` = quantile(x, 0.9999, na.rm = TRUE), max = max(x, na.rm = TRUE) ) } numeric_id <- sapply(dat_frq, is.numeric) integer_id <- sapply(dat_frq, is.integer) char_id <- sapply(dat_frq, is.character) numeric_vars <- dat_frq[,numeric_id] integer_vars <- dat_frq[,integer_id] char_vars <- dat_frq[,char_id] #str(numeric_vars) #str(integer_vars) #str(char_vars) summary_df <- sapply(numeric_vars, custom_summary) round(t(summary_df), digits = 2); # Transpose for easier viewing winsorize <- function(x, p = 0.01) { qnt <- quantile(x, probs = c(p, 1 - p), na.rm = TRUE) x[x < qnt[1]] <- qnt[1] x[x > qnt[2]] <- qnt[2] return(x) } dat_frqMod <- dat_frq dat_frqMod$VehAge<- winsorize(dat_frqMod$VehAge, p = 0.0001) # like 0.01% and 99.99% dat_frqMod$BonusMalus<- winsorize(dat_frqMod$BonusMalus, p = 0.0001) numeric_vars <- sapply(dat_frqMod, is.numeric) summary_df <- sapply(dat_frqMod[, numeric_vars], custom_summary) round(t(summary_df), digits = 2); # Transpose for easier viewing table(dat_frqMod$ClaimNb) table(dat_frqMod$VehBrand) table(dat_frqMod$VehGas) table(dat_frqMod$Region) table(dat_frqMod$Area) table(dat_frqMod$VehPower) table(dat_frqMod$VehGas, dat_frqMod$ClaimNb) #install.packages("Hmisc") #library(Hmisc) Hmisc::summarize(dat_frqMod$DrivAge, dat_frqMod$ClaimNb, mean) Hmisc::summarize(dat_frqMod$VehAge, dat_frqMod$ClaimNb, mean)