shhh <- function(expr) suppressPackageStartupMessages(suppressWarnings(suppressMessages(expr))) shhh({ library(tidyverse); library(lubridate); library(scales); library(magrittr); library(dplyr); # Modeling library(effsize); }) library(IRdisplay) display_html( '
' ) options(repr.plot.width = 15, repr.plot.height = 10) query_language_clicks_new <- " -- sessions where new lang button was selected WITH new_button AS ( SELECT MIN(TO_DATE(dt)) as button_date, event.web_session_id as session_id, event.context as open_context, wiki as wiki FROM event_sanitized.universallanguageselector WHERE year = 2021 and month in (6,7,8) AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720') AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') -- useragent is sanitized -- AND useragent.is_bot = false -- only logged-in users AND event.isanon = false AND event.action = 'compact-language-links-open' AND event.context = 'header' AND event.skinVersion = 'latest' GROUP BY event.web_session_id, event.context, event.isanon, wiki ), lang_switches AS ( SELECT TO_DATE(dt) as switch_date, event.web_session_id as session_id, event.context as switch_context, wiki as wiki FROM event_sanitized.universallanguageselector WHERE year = 2021 and month in (6,7,8) AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720') AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') -- useragent is sanitized -- AND useragent.is_bot = false -- only logged-in users AND event.isanon = false AND event.action = 'language-change' AND event.context='content-language-switcher' AND event.skinVersion = 'latest' ) SELECT new_button.button_date, lang_switches.switch_date, new_button.session_id, new_button.wiki, new_button.open_context, -- sessions with lang switch that occured after button clicks IF(lang_switches.session_id IS NOT NULL AND switch_date >= button_date, 1, 0) AS language_switch, lang_switches.switch_context FROM new_button LEFT JOIN lang_switches ON new_button.session_id = lang_switches.session_id AND new_button.wiki = lang_switches.wiki " #new_language_link_clicks <- wmfdata::query_hive(query_language_clicks_new) #write_csv(new_language_link_clicks, file = 'Data_login/new_language_link_clicks.csv') #new_language_link_clicks <- read_csv('Data_login/new_language_link_clicks.csv') new_lang_link_clicks_by_date <- new_language_link_clicks %>% filter(language_switch==1) %>% group_by( wiki, switch_date) %>% summarize(n_events_new_language_change = n(), .groups = 'drop') new_lang_link_clicks_by_date$switch_date <- as.Date(new_lang_link_clicks_by_date$switch_date) query_old_language_link <- " SELECT TO_DATE(dt) AS `date`, wiki, event.web_session_id, event.usereditbucket, event.timetochangelanguage, event.interfacelanguage, event.contentlanguage, event.selectedinterfacelanguage, Count(*) AS n_events FROM event_sanitized.universallanguageselector WHERE year = 2021 and month in (6,7,8) AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720') AND event.context = 'languages-list' and event.action = 'language-change' AND event.skinVersion = 'latest' AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') -- useragent is sanitized -- AND useragent.is_bot = false -- only logged-in users AND event.isanon =false GROUP BY TO_DATE(dt), wiki, event.web_session_id, event.usereditbucket, event.timetochangelanguage, event.interfacelanguage, event.contentlanguage, event.selectedinterfacelanguage " #old_lang_link_clicks <- wmfdata::query_hive(query_old_language_link) #write_csv(old_lang_link_clicks, file = 'Data_login/old_lang_link_clicks.csv') #old_lang_link_clicks <- read_csv('Data_login/old_lang_link_clicks.csv') old_lang_link_clicks_by_date <- old_lang_link_clicks %>% group_by( wiki, date) %>% summarize(n_events_language_link = sum(n_events),.groups='drop') old_lang_link_clicks_by_date$date <- as.Date(old_lang_link_clicks_by_date$date) query_n_more_lang_switch <- " -- sessions where N-more lang button was selected WITH button AS ( SELECT MIN(TO_DATE(dt)) as button_date, event.web_session_id as session_id, event.context as open_context, wiki as wiki FROM event_sanitized.universallanguageselector WHERE year = 2021 and month in (6,7,8) AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720') AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') -- useragent is sanitized -- AND useragent.is_bot = false -- only logged-in users AND event.isanon =false AND event.action = 'compact-language-links-open' AND event.context = 'other' AND event.skinVersion = 'latest' GROUP BY event.web_session_id, event.context, wiki ), lang_switches AS ( SELECT TO_DATE(dt) as switch_date, event.web_session_id as session_id, event.isanon, event.context as switch_context, wiki as wiki FROM event_sanitized.universallanguageselector WHERE year = 2021 and month in (6,7,8) AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720') AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') -- useragent is sanitized -- AND useragent.is_bot = false -- only logged-in users AND event.isanon =false AND event.action = 'language-change' AND event.context='content-language-switcher' AND event.skinVersion = 'latest' ) SELECT button.button_date, lang_switches.switch_date, button.session_id, button.wiki, button.open_context, -- sessions with lang switch that occured after button clicks IF(lang_switches.session_id IS NOT NULL AND switch_date >= button_date , 1, 0) AS language_switch, lang_switches.switch_context FROM button LEFT JOIN lang_switches ON button.session_id = lang_switches.session_id AND button.wiki = lang_switches.wiki " #n_more_lang_clicks <- wmfdata::query_hive(query_n_more_lang_switch) #write_csv(n_more_lang_clicks, file = 'Data_login/n_more_switch_lang_clicks.csv') #n_more_lang_clicks <- read_csv('Data_login/n_more_switch_lang_clicks.csv') n_more_lang_clicks_by_date <- n_more_lang_clicks %>% filter(language_switch==1) %>% group_by( wiki, switch_date) %>% summarize(n_events_n_more_language_switch = sum(language_switch),.groups='drop') n_more_lang_clicks_by_date$switch_date <- as.Date(n_more_lang_clicks_by_date$switch_date) date_seq <- seq(as.Date('2021-06-22'), as.Date('2021-07-20'), by = 'days') wiki_seq <- c('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') df_lang_clicks= expand.grid(date=date_seq, wiki=wiki_seq ) df_lang_clicks<- merge(df_lang_clicks , new_lang_link_clicks_by_date, by.x=c("wiki", "date"), by.y=c("wiki","switch_date"), all.x = TRUE) df_lang_clicks <- merge(df_lang_clicks , old_lang_link_clicks_by_date, by.x=c("wiki", "date"), by.y=c("wiki","date"), all.x = TRUE) df_lang_clicks <- merge(df_lang_clicks , n_more_lang_clicks_by_date, by.x=c("wiki", "date"), by.y=c("wiki","switch_date"), all.x = TRUE) df_lang_clicks[is.na(df_lang_clicks)] <- 0 df_lang_clicks <- mutate(df_lang_clicks, n_events_control=n_events_language_link+n_events_n_more_language_switch) df_lang_clicks <- df_lang_clicks %>% mutate( wiki_name = case_when( #clarfiy Wiki project names wiki == 'frwiktionary' ~ "French Wikitionary", wiki == 'hewiki' ~ 'Hebrew Wikipedia', wiki == 'ptwikiversity' ~ 'Portuguese Wikiversity', wiki == 'frwiki'~ "French Wikipedia", wiki == 'euwiki' ~ "Basque Wikipedia", wiki == 'fawiki' ~ 'Persian Wikipedia', wiki == 'ptwiki' ~ 'Portuguese Wikipedia', wiki == 'kowiki' ~ 'Korean Wikipedia', wiki == 'trwiki' ~ 'Turkish Wikipedia', wiki == 'srwiki' ~ 'Serbian Wikipedia', wiki == 'bnwiki' ~ 'Bengali Wikipedia', wiki == 'dewikivoyage' ~ 'German Wikivoyage', wiki == 'vecwiki' ~ 'Venetian Wikipedia'), ) df_lang_clicks_g <- ggplot(data=df_lang_clicks, mapping=aes(x=date)) + geom_line(aes(y=n_events_new_language_change , color="c2") , size = 1.5) + geom_line(aes(y=n_events_control, color="c1") , size = 1.5) + facet_wrap(~wiki_name,nrow=4,scale = 'free_y')+ scale_color_manual(values= c("c1"="#666666", "c2"="#000099"), name = "group", labels = c("Control", "Treatment")) + labs(title = 'Language Link Clicks - logged-in user', x = 'Date', y = 'Language Link Clicks', caption = "User type: logged-in User Control: clicks on language links on both sidebar and N-more suggestion window Treatment: clicks on language links on new language suggestion window" ) + theme_light(base_size=18) + theme(legend.position = "bottom", plot.caption = element_text(hjust = 0, face= "italic"), strip.text = element_text(size = 14, colour='black'), axis.text.x = element_text( size = 10 ), plot.title = element_text(size=20)) ggsave("Graphs_login/lang_link_clicks.png", plot = df_lang_clicks_g, width = 60, height = 30, units = "cm", dpi = "screen"); df_lang_clicks_g # add column classifying events in AB testing df_lang_clicks_AB <- df_lang_clicks %>% filter((date >= '2021-06-22' & date <= '2021-07-20' & wiki !='fawiki')| date >= '2021-06-28' & date <= '2021-07-20' & wiki =='fawiki') %>% group_by(wiki_name) %>% summarize(control = sum(n_events_control), treatment= sum(n_events_new_language_change), .groups = 'drop') df_lang_clicks_AB df_lang_clicks_AB_long <- pivot_longer(df_lang_clicks_AB, cols = c('control','treatment' ), names_to="group", values_to="lang_link_clicks") lang_clicks_barchart <- df_lang_clicks_AB_long %>% ggplot(aes(x = group, y= lang_link_clicks, fill=group)) + geom_bar(stat="identity", position = 'dodge') + geom_text(aes(label = paste(lang_link_clicks)), color = "white", position = position_dodge(0.9), vjust = 1.5, size = 3) + facet_wrap(~wiki_name, scale = 'free_y') + labs ( y = "Number of language link clicks", title = "Number of language link clicks in AB test", caption = "User type: logged-in User Control: clicks on language links on sidebar and N-more suggestion window Treatment: clicks on language links on new language suggestion window") + scale_fill_manual(values= c("control"="#666666", "treatment"="#000099"), name = "") + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), plot.caption = element_text(hjust = 0, face= "italic"), strip.background =element_rect(fill="white"), plot.title = element_text(hjust = 0.5), text = element_text(size=16), axis.line = element_line(colour = "black"), legend.position = "bottom") lang_clicks_barchart ggsave("Graphs_login/lang_link_clicks_bar.png", plot = lang_clicks_barchart, width = 60, height = 30, units = "cm", dpi = "screen"); df_lang_clicks_AB <- df_lang_clicks_AB %>% mutate( pct_change= case_when( control==0&treatment>0 ~ 100, control==0&treatment==0 ~ 0, TRUE ~ round((treatment-control)/control * 100 , 2) ), pct= case_when( control==0&treatment>0 ~ 1, control==0&treatment==0 ~ 0, TRUE ~ round((treatment-control)/control , 4) ) ) df_lang_clicks_AB pct_g <- df_lang_clicks_AB %>% ggplot() + geom_hline(yintercept = 0, linetype = "dashed") + geom_pointrange( aes(x = wiki_name, ymin = 0, ymax = pct_change, y = pct_change) ) + geom_text( aes( y = pct_change + ifelse(pct_change < 0, -4, 4), x = wiki_name, label = wiki_name, hjust = ifelse(pct_change < 0, "right", "left") ), size = 5 ) + scale_x_discrete(breaks = NULL) + coord_flip(ylim = c(-300, 300)) + labs (x = "Wiki Project", y = "Percent Change", title = "Percent change in language link clicks in the AB test for the new language switching button") + theme_bw() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust = 0.5), text = element_text(size=16)) pct_g ggsave("Graphs_login/lang_link_clicks_pcg.png", plot = pct_g, width = 60, height = 30, units = "cm", dpi = "screen"); mean(filter(df_lang_clicks_AB, is.numeric(pct_change)&control>0)$pct_change) df_lang_clicks_AB # Represent it options(repr.plot.width = 10, repr.plot.height = 6) p <- df_lang_clicks_AB_long %>% ggplot( aes(x=lang_link_clicks, fill=group)) + geom_histogram( color="#e9ecef", alpha=0.4, position = 'identity', bins=30) + scale_fill_manual(values=c("#69b3a2", "#404080")) + labs ( title = "Check distribution normality -- number of clicks", fill="") + theme_light(base_size=18) p options(repr.plot.width = 10, repr.plot.height = 6) p <- df_lang_clicks_AB %>% ggplot( aes(x=pct_change)) + geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity', binwidth=10) + labs ( title = "Check distribution normality -- change percentage") + theme_light(base_size=18) p with(df_lang_clicks_AB, t.test(x=pct_change, y=NULL,mu=0, alternative = "less", paired = FALSE, conf.level = 0.95 )) query_input_lang_clicks_new <- " -- sessions where new lang button was selected WITH new_button AS ( SELECT MIN(TO_DATE(dt)) as button_date, event.web_session_id as session_id, event.context as open_context, wiki as wiki FROM event_sanitized.universallanguageselector WHERE year = 2021 and month in (6,7,8) AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720') AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') -- useragent is sanitized -- AND useragent.is_bot = false -- only logged-in users AND event.isanon =false AND event.action = 'compact-language-links-open' AND event.context = 'header' AND event.skinVersion = 'latest' GROUP BY event.web_session_id, event.context, wiki ), input_lang_switches AS ( SELECT TO_DATE(dt) as switch_date, event.web_session_id as session_id, event.context as switch_context, wiki as wiki FROM event_sanitized.universallanguageselector WHERE year = 2021 and month in (6,7,8) AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720') AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') -- useragent is sanitized -- AND useragent.is_bot = false -- only anonymous users AND event.isanon = false AND event.action='ime-change' AND event.skinVersion = 'latest' ) SELECT new_button.button_date, input_lang_switches.switch_date, new_button.session_id, new_button.wiki, new_button.open_context, -- sessions with lang switch that occured after button clicks IF(input_lang_switches.session_id IS NOT NULL AND switch_date >= button_date, 1, 0) AS language_switch, input_lang_switches.switch_context FROM new_button LEFT JOIN input_lang_switches ON new_button.session_id = input_lang_switches.session_id AND new_button.wiki = input_lang_switches.wiki " #new_input_lang_clicks <- wmfdata::query_hive(query_input_lang_clicks_new) new_input_lang_clicks_by_date <- new_input_lang_clicks %>% filter(language_switch==1) %>% group_by( wiki, switch_date) %>% summarize(new_input_language_clicks = sum(language_switch), .groups='drop') new_input_lang_clicks$switch_date <- as.Date(new_input_lang_clicks$switch_date, format = "%Y-%m-%d") query_input_language_old <- " -- sessions where setting button was selected WITH setting_button AS ( SELECT MIN(TO_DATE(dt)) as button_date, event.web_session_id as session_id, event.context as open_context, wiki as wiki FROM event_sanitized.universallanguageselector WHERE year = 2021 and month in (6,7) AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720') AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') -- useragent is sanitized -- AND useragent.is_bot = false -- only logged-in users AND event.isanon =false AND event.action = 'settings-open' AND event.context = 'interlanguage' AND event.skinVersion = 'latest' GROUP BY event.web_session_id, event.context, wiki ), input_lang_switches AS ( SELECT TO_DATE(dt) as switch_date, event.web_session_id as session_id, event.context as switch_context, wiki as wiki FROM event_sanitized.universallanguageselector WHERE year = 2021 and month in (6,7) AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720') AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') -- useragent is sanitized -- AND useragent.is_bot = false -- only anonymous users AND event.isanon = false AND event.action='ime-change' AND event.skinVersion = 'latest' ) SELECT setting_button.button_date, input_lang_switches.switch_date, setting_button.session_id, setting_button.wiki, setting_button.open_context, -- sessions with lang switch that occured after button clicks IF(input_lang_switches.session_id IS NOT NULL AND switch_date >= button_date, 1, 0) AS language_switch, input_lang_switches.switch_context FROM setting_button LEFT JOIN input_lang_switches ON setting_button.session_id = input_lang_switches.session_id AND setting_button.wiki = input_lang_switches.wiki " #old_input_lang_clicks <- wmfdata::query_hive(query_input_language_old) old_input_lang_clicks_by_date <- old_input_lang_clicks %>% filter(language_switch==1) %>% group_by( wiki, switch_date) %>% summarize(old_input_language_clicks = sum(language_switch), .groups='drop') old_input_lang_clicks_by_date$switch_date <- as.Date(old_input_lang_clicks_by_date$switch_date, format = "%Y-%m-%d") df_input_clicks= expand.grid(date=date_seq, wiki=wiki_seq ) df_input_clicks <- merge(df_input_clicks , new_input_lang_clicks_by_date, by.x=c("wiki", "date"), by.y=c("wiki","switch_date"), all.x = TRUE) df_input_clicks <- merge(df_input_clicks , old_input_lang_clicks_by_date, by.x=c("wiki", "date"), by.y=c("wiki","switch_date"), all.x = TRUE) df_input_clicks[is.na(df_input_clicks)] <- 0 df_input_clicks <- df_input_clicks %>% mutate( wiki_name = case_when( #clarfiy Wiki project names wiki == 'frwiktionary' ~ "French Wikitionary", wiki == 'hewiki' ~ 'Hebrew Wikipedia', wiki == 'ptwikiversity' ~ 'Portuguese Wikiversity', wiki == 'frwiki'~ "French Wikipedia", wiki == 'euwiki' ~ "Basque Wikipedia", wiki == 'fawiki' ~ 'Persian Wikipedia', wiki == 'ptwiki' ~ 'Portuguese Wikipedia', wiki == 'kowiki' ~ 'Korean Wikipedia', wiki == 'trwiki' ~ 'Turkish Wikipedia', wiki == 'srwiki' ~ 'Serbian Wikipedia', wiki == 'bnwiki' ~ 'Bengali Wikipedia', wiki == 'dewikivoyage' ~ 'German Wikivoyage', wiki == 'vecwiki' ~ 'Venetian Wikipedia'), ) options(repr.plot.width = 15, repr.plot.height = 10) input_lang_click_g <- ggplot(data=filter( df_input_clicks, (date >= '2021-06-22')&(date <= '2021-07-20')), mapping=aes(x=date)) + geom_line(aes(y=new_input_language_clicks , color="c2") , size = 1.5) + geom_line(aes(y=old_input_language_clicks, color="c1") , size = 1.5) + facet_wrap(~wiki_name,nrow=3,scale = 'free_y')+ scale_color_manual(values= c("c1"="#666666", "c2"="#000099"), name = "group", labels = c("Control", "Treatment")) + labs(title = 'Input Language Link Clicks - logged-in user', x = 'Date', y = 'Input Link Clicks') + theme_light(base_size = 16) + theme(legend.position = "bottom", strip.text = element_text(size = 14, colour='black'), axis.text.x = element_text( size = 10 ), plot.title = element_text(size=20)); ggsave("Graphs_login/input_lang.png", plot = input_lang_click_g , width = 60, height = 30, units = "cm", dpi = "screen"); input_lang_click_g # add column classifying events in AB testing df_input_clicks_AB <- df_input_clicks %>% filter((date >= '2021-06-22' & date <= '2021-07-20' & wiki !='fawiki')| date >= '2021-06-28' & date <= '2021-07-20' & wiki =='fawiki') %>% group_by(wiki_name) %>% summarize(control = sum(old_input_language_clicks), treatment= sum(new_input_language_clicks), .groups = 'drop') df_input_clicks_AB df_input_clicks_AB_L <- pivot_longer(df_input_clicks_AB, cols = c('control','treatment' ), names_to="group", values_to="input_lang_clicks") input_clicks_barchart <- df_input_clicks_AB_L %>% ggplot(aes(x = group, y= input_lang_clicks, fill=group)) + geom_bar(stat="identity", position = 'dodge') + geom_text(aes(label = paste(input_lang_clicks)), color = "white", position = position_dodge(0.9), vjust = 1.5, size = 3) + facet_wrap(~wiki_name, scale = 'free_y') + labs ( y = "Number of input language clicks", title = "Number of input language clicks in AB test", caption = "User type: logged-in User Control: clicks on input language links on setting window Treatment: clicks on input language links on new language suggestion window") + scale_fill_manual(values= c("treatment"="#000099", "control"="#666666"), name = "") + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), plot.caption = element_text(hjust = 0, face= "italic"), strip.background =element_rect(fill="white"), plot.title = element_text(hjust = 0.5), text = element_text(size=16), axis.line = element_line(colour = "black"), legend.position = "bottom") input_clicks_barchart ggsave("Graphs_login/input_clicks_bar.png", plot = input_clicks_barchart , width = 60, height = 30, units = "cm", dpi = "screen"); # determine percent change df_input_clicks_AB <- df_input_clicks_AB %>% mutate( pct_change= case_when( control==0&treatment>0 ~ 100, control==0&treatment==0 ~ 0, TRUE ~ round((treatment-control)/control * 100 , 2) ), pct= case_when( control==0&treatment>0 ~ 1, control==0&treatment==0 ~ 0, TRUE ~ round((treatment-control)/control , 4) ) ) df_input_clicks_AB # chart percent change in logged-in users between control and treatment groups for each of the early adopter wikis pct_input_g <- df_input_clicks_AB %>% ggplot() + geom_hline(yintercept = 0, linetype = "dashed") + geom_pointrange( aes(x = wiki_name, ymin = 0, ymax = pct_change, y = pct_change) ) + geom_text( aes( y = pct_change + ifelse(pct_change < 0, -4, 4), x = wiki_name, label = wiki_name, hjust = ifelse(pct_change < 0, "right", "left") ), size = 5 ) + scale_x_discrete(breaks = NULL) + coord_flip(ylim = c(-600, 600)) + labs (x = "Wiki", y = "Percent Change", title = "Percent change in input language clicks in the AB test for the new language switching button") + theme_bw() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust = 0.5), text = element_text(size=16)) pct_input_g ggsave("Graphs_login/input_clicks_pct.png", plot = pct_input_g , width = 60, height = 30, units = "cm", dpi = "screen"); mean((filter(df_input_clicks_AB, is.numeric(pct_change) & control>0))$pct_change, na.rm=TRUE) query_interface_language_clicks_new <- " -- sessions where new lang button was selected WITH new_button AS ( SELECT MIN(TO_DATE(dt)) as button_date, event.web_session_id as session_id, event.context as open_context, event.isanon, wiki as wiki FROM event_sanitized.universallanguageselector WHERE year = 2021 and month in (6,7,8) AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720') AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') -- useragent is sanitized -- AND useragent.is_bot = false -- only logged-in users AND event.isanon =false AND event.action = 'compact-language-links-open' AND event.context = 'header' AND event.skinVersion = 'latest' GROUP BY event.web_session_id, event.context, event.isanon, wiki ), interface_lang_switches AS ( SELECT TO_DATE(dt) as switch_date, event.web_session_id as session_id, event.context as switch_context, event.isanon, wiki as wiki FROM event_sanitized.universallanguageselector WHERE year = 2021 and month in (6,7,8) AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720') AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') -- useragent is sanitized -- AND useragent.is_bot = false -- only logged-in users AND event.isanon =false AND event.action = 'language-change' AND event.context='interface' AND event.skinVersion = 'latest' ) SELECT new_button.button_date, interface_lang_switches.switch_date, new_button.session_id, new_button.isanon, new_button.wiki, new_button.open_context, -- sessions with lang switch that occured after button clicks IF(interface_lang_switches.session_id IS NOT NULL AND switch_date >= button_date, 1, 0) AS language_switch, interface_lang_switches.switch_context FROM new_button LEFT JOIN interface_lang_switches ON new_button.session_id = interface_lang_switches.session_id AND new_button.wiki = interface_lang_switches.wiki AND new_button.isanon = interface_lang_switches.isanon " #new_interface_language_clicks <- wmfdata::query_hive(query_interface_language_clicks_new) new_interface_lang_clicks_by_date <- new_interface_language_clicks %>% filter(language_switch==1) %>% group_by( wiki, switch_date) %>% summarize(new_interface_language_clicks = sum(language_switch),.groups="drop") new_interface_lang_clicks_by_date$switch_date <- as.Date(new_interface_lang_clicks_by_date$switch_date) query_interface_language_click_old <- " -- sessions where setting button was selected WITH setting_button AS ( SELECT MIN(TO_DATE(dt)) as button_date, event.web_session_id as session_id, event.context as open_context, wiki as wiki FROM event_sanitized.universallanguageselector WHERE year = 2021 and month in (6,7,8) AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720') AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') -- useragent is sanitized -- AND useragent.is_bot = false -- only logged-in users AND event.isanon =false AND event.action = 'settings-open' AND event.context = 'interlanguage' AND event.skinVersion = 'latest' GROUP BY event.web_session_id, event.context, wiki ), interface_lang_switches AS ( SELECT TO_DATE(dt) as switch_date, event.web_session_id as session_id, event.context as switch_context, wiki as wiki FROM event_sanitized.universallanguageselector WHERE year = 2021 and month in (6,7,8) AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720') AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') -- useragent is sanitized -- AND useragent.is_bot = false -- only logged-in users AND event.isanon =false AND event.action = 'language-change' AND event.context='interface' AND event.skinVersion = 'latest' ) SELECT setting_button.button_date, interface_lang_switches.switch_date, setting_button.session_id, setting_button.wiki, setting_button.open_context, -- sessions with lang switch that occured after button clicks IF(interface_lang_switches.session_id IS NOT NULL AND switch_date >= button_date, 1, 0) AS language_switch, interface_lang_switches.switch_context FROM setting_button LEFT JOIN interface_lang_switches ON setting_button.session_id = interface_lang_switches.session_id AND setting_button.wiki = interface_lang_switches.wiki " #old_interface_language_clicks <- wmfdata::query_hive(query_interface_language_click_old) df_interface_clicks= expand.grid(date=date_seq, wiki=wiki_seq ) df_interface_clicks <- merge(df_interface_clicks , new_interface_lang_clicks_by_date, by.x=c("wiki", "date"), by.y=c("wiki","switch_date"), all.x = TRUE) df_interface_clicks <- merge(df_interface_clicks , old_interface_lang_clicks_by_date, by.x=c("wiki", "date"), by.y=c("wiki","switch_date"), all.x = TRUE) df_interface_clicks[is.na(df_interface_clicks)] <- 0 df_interface_clicks <- df_interface_clicks %>% mutate( wiki_name = case_when( #clarfiy Wiki project names wiki == 'frwiktionary' ~ "French Wikitionary", wiki == 'hewiki' ~ 'Hebrew Wikipedia', wiki == 'ptwikiversity' ~ 'Portuguese Wikiversity', wiki == 'frwiki'~ "French Wikipedia", wiki == 'euwiki' ~ "Basque Wikipedia", wiki == 'fawiki' ~ 'Persian Wikipedia', wiki == 'ptwiki' ~ 'Portuguese Wikipedia', wiki == 'kowiki' ~ 'Korean Wikipedia', wiki == 'trwiki' ~ 'Turkish Wikipedia', wiki == 'srwiki' ~ 'Serbian Wikipedia', wiki == 'bnwiki' ~ 'Bengali Wikipedia', wiki == 'dewikivoyage' ~ 'German Wikivoyage', wiki == 'vecwiki' ~ 'Venetian Wikipedia'), ) interface_lang_click_g <- ggplot(data=filter( df_interface_clicks, (date >= '2021-06-22')&(date <= '2021-07-20')), mapping=aes(x=date)) + geom_line(aes(y=new_interface_language_clicks , color="c2") , size = 1.5) + geom_line(aes(y=old_interface_language_clicks, color="c1") , size = 1.5) + facet_wrap(~wiki_name,nrow=3,scale = 'free_y')+ scale_color_manual(values= c("c1"="#666666", "c2"="#000099"), name = "group", labels = c("Control", "Treatment")) + labs(title = 'Interface Language Link Clicks - logged-in user', x = 'Date', y = 'Language Link Clicks') + theme_light(base_size = 16) + theme(legend.position = "bottom", strip.text = element_text(size = 14, colour='black'), axis.text.x = element_text( size = 10 ), plot.title = element_text(size=20)); ggsave("Graphs_login/interface_lang.png", plot = interface_lang_click_g , width = 60, height = 30, units = "cm", dpi = "screen"); interface_lang_click_g # add column classifying events in AB testing df_interface_clicks_AB <- df_interface_clicks %>% filter((date >= '2021-06-22' & date <= '2021-07-20' & wiki !='fawiki')| date >= '2021-06-28' & date <= '2021-07-20' & wiki =='fawiki') %>% group_by(wiki_name) %>% summarize(control = sum(old_interface_language_clicks), treatment= sum(new_interface_language_clicks), .groups = 'drop') df_interface_clicks_AB df_interface_clicks_AB_L <- pivot_longer(df_interface_clicks_AB, cols = c('control','treatment' ), names_to="group", values_to="interface_lang_clicks") interface_clicks_barchart <- df_interface_clicks_AB_L %>% ggplot(aes(x = group, y= interface_lang_clicks, fill=group)) + geom_bar(stat="identity", position = 'dodge') + geom_text(aes(label = paste(interface_lang_clicks)), color = "white", position = position_dodge(0.9), vjust = 1.5, size = 3) + facet_wrap(~wiki_name, scale = 'free_y') + labs ( y = "Number of interface language clicks", title = "Number of interface language clicks", caption = "User type: logged-in User Control: clicks on interface language links on setting window Treatment: clicks on interface language links on new language suggestion window") + scale_fill_manual(values= c("treatment"="#000099", "control"="#666666"), name = "") + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), plot.caption = element_text(hjust = 0, face= "italic"), strip.background =element_rect(fill="white"), plot.title = element_text(hjust = 0.5), text = element_text(size=16), axis.line = element_line(colour = "black"), legend.position = "bottom") interface_clicks_barchart ggsave("Graphs_login/interface_lang_bar.png", plot = interface_clicks_barchart , width = 60, height = 30, units = "cm", dpi = "screen"); # determine percent change df_interface_clicks_AB <- df_interface_clicks_AB %>% mutate( pct_change= case_when( control==0&treatment>0 ~ 100, control==0&treatment==0 ~ 0, TRUE ~ round((treatment-control)/control * 100 , 2) ), pct= case_when( control==0&treatment>0 ~ 1, control==0&treatment==0 ~ 0, TRUE ~ round((treatment-control)/control , 4) ) ) df_interface_clicks_AB # chart percent change in logged-in user between control and treatment groups for each of the early adopter wikis pct_interface_g <- df_interface_clicks_AB %>% ggplot() + geom_hline(yintercept = 0, linetype = "dashed") + geom_pointrange( aes(x = wiki_name, ymin = 0, ymax = pct_change, y = pct_change) ) + geom_text( aes( y = pct_change + ifelse(pct_change < 0, -4, 4), x = wiki_name, label = wiki_name, hjust = ifelse(pct_change < 0, "right", "left") ), size = 5 ) + scale_x_discrete(breaks = NULL) + coord_flip(ylim = c(-600, 600)) + labs (x = "Wiki", y = "Percent Change", title = "Percent change in interface language clicks in the AB test for the new language switching button") + theme_bw() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust = 0.5), text = element_text(size=16)) pct_interface_g ggsave("Graphs_login/interface_lang_pct.png", plot = pct_interface_g , width = 60, height = 30, units = "cm", dpi = "screen"); mean(df_interface_clicks_AB$pct_change, na.rm=TRUE) # number of clicks on new button query_new_button <- " SELECT TO_DATE(dt) as button_date, wiki as wiki, count(1) AS new_button_clicks FROM event_sanitized.universallanguageselector WHERE year = 2021 and month in (6,7) AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720') AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') -- useragent is sanitized -- AND useragent.is_bot = false -- only logged in users AND event.isanon = false AND event.action = 'compact-language-links-open' AND event.context = 'header' AND event.skinVersion = 'latest' GROUP BY TO_DATE(dt), wiki " #df_query_new_button_events <- wmfdata::query_hive(query_new_button) date_seq <- seq(as.Date('2021-06-22'), as.Date('2021-07-20'), by = 'days') wiki_seq <- c('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki', 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki') df= expand.grid(date=date_seq, wiki=wiki_seq ) df<- merge(df , df_lang_clicks, by.x=c("wiki", "date"),by.y=c("wiki","date"), all.x = TRUE ) df<- merge(df , df_query_new_button_events, by.x=c("wiki", "date"), by.y=c("wiki","button_date"), all.x = TRUE) df <- merge(df , new_input_lang_clicks_by_date, by.x=c("wiki", "date"), by.y=c("wiki","switch_date"), all.x = TRUE) df <- merge(df , new_interface_lang_clicks_by_date, by.x=c("wiki", "date"), by.y=c("wiki","switch_date"), all.x = TRUE) df[is.na(df)] <- 0 df <- mutate(df, total_new_switch=n_events_new_language_change+new_input_language_clicks+new_interface_language_clicks) df <- mutate(df, not_switch_percent=round((new_button_clicks-total_new_switch)*100/new_button_clicks,2)) df <- mutate(df, non_switch_clicks=new_button_clicks-total_new_switch) write_csv(df, file = 'Data_login/non-switch.csv') df_graph <- ggplot(data=filter(df, date<'2021-07-20'), mapping=aes(x=date, y=non_switch_clicks, color=wiki)) + geom_line(size = 1.5) + labs(title = 'New button non-switch clicks', x = 'Date', y = 'Non-switch clicks') + scale_color_manual(values=c('#800000', '#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', '#008080', '#e6beff', '#9a6324', '#fffac8', '#aaffc3', '#808000', '#ffd8b1', '#000075', '#808080')) + theme_light(base_size = 18)+ theme(legend.position = "bottom" , legend.text = element_text(size = 14), legend.title = element_text(size = 14, face="bold") ); df_graph ggsave("Graphs_login/non_switch_clicks.png", plot = df_graph , width = 60, height = 30, units = "cm", dpi = "screen"); df_nonswitch_sum <- df %>% group_by( wiki_name) %>% summarize(button_clicks = sum(new_button_clicks), non_switch_clicks=sum(non_switch_clicks), .groups = 'drop') df_nonswitch_sum <- df_nonswitch_sum %>% mutate( non_switch_rate = case_when( button_clicks==0 ~ 0, TRUE ~ round(non_switch_clicks*100/button_clicks , 2) ) ) df_nonswitch_sum df_nonswitch_sum_long <- pivot_longer(df_nonswitch_sum, cols = c('button_clicks','non_switch_clicks' ), names_to="category", values_to="clicks") nonswitch_clicks_barchart <- df_nonswitch_sum_long %>% ggplot(aes(x = category, y= clicks, fill=category)) + geom_bar(stat="identity", position = 'stack') + geom_text(aes(label = paste(clicks)), color = "white", position = position_dodge(0.9), vjust = 1.5, size = 3) + facet_wrap(~wiki_name, scale = 'free_y') + labs ( y = "Number of button clicks", title = "Number of non-switch button clicks") + scale_fill_manual(values= c("non_switch_clicks"="#000099", "button_clicks"="#666666"), name = "") + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), plot.caption = element_text(hjust = 0, face= "italic"), strip.background =element_rect(fill="white"), plot.title = element_text(hjust = 0.5), text = element_text(size=14), axis.line = element_line(colour = "black"), legend.position = "bottom") nonswitch_clicks_barchart ggsave("Graphs_login/non_switch_bar.png", plot = nonswitch_clicks_barchart , width = 60, height = 30, units = "cm", dpi = "screen"); # How frequently do users click the new button but not switch to a language pct_nonswitch_g <- df_nonswitch_sum %>% ggplot() + geom_hline(yintercept = 0, linetype = "dashed") + geom_pointrange( aes(x = wiki_name, ymin = 0, ymax = non_switch_rate, y = non_switch_rate) ) + geom_text( aes( y = non_switch_rate + ifelse(non_switch_rate < 0, -4, 4), x = wiki_name, label = wiki_name, hjust = ifelse(non_switch_rate < 0, "right", "left") ), size = 5 ) + scale_x_discrete(breaks = NULL) + coord_flip(ylim = c(0, 100)) + labs (x = "Wiki", y = "Non-switch Click Rate", title = "Non-switch Click Rate") + theme_bw() + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), plot.title = element_text(hjust = 0.5), text = element_text(size=16)) pct_nonswitch_g ggsave("Graphs_login/non_switch_pct.png", plot = pct_nonswitch_g , width = 60, height = 30, units = "cm", dpi = "screen"); mean(df_nonswitch_sum$non_switch_rate, na.rm=TRUE) mean(filter(df_nonswitch_sum, wiki_name!='German Wikivoyage'& wiki_name!='Portuguese Wikiversity' )$non_switch_rate, na.rm=TRUE)