shhh <- function(expr) suppressPackageStartupMessages(suppressWarnings(suppressMessages(expr)))
shhh({
library(tidyverse);
library(lubridate);
library(scales);
library(magrittr);
library(dplyr);
# Modeling
library(effsize);
})
library(IRdisplay)
display_html(
'<script>
code_show=true;
function code_toggle() {
if (code_show){
$(\'div.input\').hide();
} else {
$(\'div.input\').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()">
<input type="submit" value="Click here to toggle on/off the raw code.">
</form>'
)
options(repr.plot.width = 15, repr.plot.height = 10)
As a part of the Desktop Improvements project, The Wikimedia Foundation's Web team is introducing a new way to switch to a different language wiki. The original language list appears on the sidebar. The new solution replaces the original one by providing 1) a language option button on the top of the page, 2) Selecting the language button will open the list of languages that will contain the suggested languages for each user, as well as a full list of all available languages for that article. You can find more information on this change and other feature deployments on the Language switching project page.
For the logged-in users, an AB test was performed on the early adopter wikis except fawiki between June 22, 2021 and July 20, 2021. On fawiki, the AB test was performed between June 28 and July 20, 2021. This report details the analysis and results for the language switch AB test.
For the logged-out users, another report details the analysis and results for the deployment of language switch on logged-out users.
The primary goal of the AB Test was to test the hypothesis that the group with the new language button and links will get more clicks as it is to discover.
As part of this analysis, we were also interested in identifying any interesting trends in user behavior.
The AB test was run on a per wiki basis on logged-in users. Users included in the test were randomly assigned to either the control (old language switch) or treatment (new language switch) based on their user ID.
We compared the total numbers of clicks between control and treatment groups. There are three use scenarios: clicks on language links, clicks on input language links, and clicks on interface language links. We reviewed each use scenario separately.
We also reviewed the difference in percentage between the treatment group and the control group. Ran one-sample t-test to determine if the difference is statistically significant.
New feature -- language link clicks after new button clicks
query_language_clicks_new <-
"
-- sessions where new lang button was selected
WITH new_button AS (
SELECT
MIN(TO_DATE(dt)) as button_date,
event.web_session_id as session_id,
event.context as open_context,
wiki as wiki
FROM event_sanitized.universallanguageselector
WHERE
year = 2021 and month in (6,7,8)
AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720')
AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki',
'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki')
-- useragent is sanitized
-- AND useragent.is_bot = false
-- only logged-in users
AND event.isanon = false
AND event.action = 'compact-language-links-open'
AND event.context = 'header'
AND event.skinVersion = 'latest'
GROUP BY
event.web_session_id,
event.context,
event.isanon,
wiki
),
lang_switches AS (
SELECT
TO_DATE(dt) as switch_date,
event.web_session_id as session_id,
event.context as switch_context,
wiki as wiki
FROM event_sanitized.universallanguageselector
WHERE
year = 2021 and month in (6,7,8)
AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720')
AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki',
'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki')
-- useragent is sanitized
-- AND useragent.is_bot = false
-- only logged-in users
AND event.isanon = false
AND event.action = 'language-change'
AND event.context='content-language-switcher'
AND event.skinVersion = 'latest'
)
SELECT
new_button.button_date,
lang_switches.switch_date,
new_button.session_id,
new_button.wiki,
new_button.open_context,
-- sessions with lang switch that occured after button clicks
IF(lang_switches.session_id IS NOT NULL AND switch_date >= button_date, 1, 0) AS language_switch,
lang_switches.switch_context
FROM new_button
LEFT JOIN lang_switches ON
new_button.session_id = lang_switches.session_id AND
new_button.wiki = lang_switches.wiki
"
#new_language_link_clicks <- wmfdata::query_hive(query_language_clicks_new)
#write_csv(new_language_link_clicks, file = 'Data_login/new_language_link_clicks.csv')
#new_language_link_clicks <- read_csv('Data_login/new_language_link_clicks.csv')
new_lang_link_clicks_by_date <- new_language_link_clicks %>%
filter(language_switch==1) %>%
group_by( wiki, switch_date) %>%
summarize(n_events_new_language_change = n(), .groups = 'drop')
new_lang_link_clicks_by_date$switch_date <- as.Date(new_lang_link_clicks_by_date$switch_date)
Old feature -- language link clicks on sidebar
query_old_language_link <-
"
SELECT
TO_DATE(dt) AS `date`,
wiki,
event.web_session_id,
event.usereditbucket,
event.timetochangelanguage,
event.interfacelanguage,
event.contentlanguage,
event.selectedinterfacelanguage,
Count(*) AS n_events
FROM event_sanitized.universallanguageselector
WHERE
year = 2021 and month in (6,7,8)
AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720')
AND event.context = 'languages-list' and event.action = 'language-change'
AND event.skinVersion = 'latest'
AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki',
'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki')
-- useragent is sanitized
-- AND useragent.is_bot = false
-- only logged-in users
AND event.isanon =false
GROUP BY
TO_DATE(dt),
wiki,
event.web_session_id,
event.usereditbucket,
event.timetochangelanguage,
event.interfacelanguage,
event.contentlanguage,
event.selectedinterfacelanguage
"
#old_lang_link_clicks <- wmfdata::query_hive(query_old_language_link)
#write_csv(old_lang_link_clicks, file = 'Data_login/old_lang_link_clicks.csv')
#old_lang_link_clicks <- read_csv('Data_login/old_lang_link_clicks.csv')
old_lang_link_clicks_by_date <- old_lang_link_clicks %>%
group_by( wiki, date) %>%
summarize(n_events_language_link = sum(n_events),.groups='drop')
old_lang_link_clicks_by_date$date <- as.Date(old_lang_link_clicks_by_date$date)
Old feature -- language link clicks after N-more button clicks on sidebar
query_n_more_lang_switch <-
"
-- sessions where N-more lang button was selected
WITH button AS (
SELECT
MIN(TO_DATE(dt)) as button_date,
event.web_session_id as session_id,
event.context as open_context,
wiki as wiki
FROM event_sanitized.universallanguageselector
WHERE
year = 2021 and month in (6,7,8)
AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720')
AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki',
'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki')
-- useragent is sanitized
-- AND useragent.is_bot = false
-- only logged-in users
AND event.isanon =false
AND event.action = 'compact-language-links-open'
AND event.context = 'other'
AND event.skinVersion = 'latest'
GROUP BY
event.web_session_id,
event.context,
wiki
),
lang_switches AS (
SELECT
TO_DATE(dt) as switch_date,
event.web_session_id as session_id,
event.isanon,
event.context as switch_context,
wiki as wiki
FROM event_sanitized.universallanguageselector
WHERE
year = 2021 and month in (6,7,8)
AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210720')
AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki',
'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki')
-- useragent is sanitized
-- AND useragent.is_bot = false
-- only logged-in users
AND event.isanon =false
AND event.action = 'language-change'
AND event.context='content-language-switcher'
AND event.skinVersion = 'latest'
)
SELECT
button.button_date,
lang_switches.switch_date,
button.session_id,
button.wiki,
button.open_context,
-- sessions with lang switch that occured after button clicks
IF(lang_switches.session_id IS NOT NULL AND switch_date >= button_date , 1, 0) AS language_switch,
lang_switches.switch_context
FROM button
LEFT JOIN lang_switches ON
button.session_id = lang_switches.session_id AND
button.wiki = lang_switches.wiki
"
#n_more_lang_clicks <- wmfdata::query_hive(query_n_more_lang_switch)
#write_csv(n_more_lang_clicks, file = 'Data_login/n_more_switch_lang_clicks.csv')
#n_more_lang_clicks <- read_csv('Data_login/n_more_switch_lang_clicks.csv')
n_more_lang_clicks_by_date <- n_more_lang_clicks %>%
filter(language_switch==1) %>%
group_by( wiki, switch_date) %>%
summarize(n_events_n_more_language_switch = sum(language_switch),.groups='drop')
n_more_lang_clicks_by_date$switch_date <- as.Date(n_more_lang_clicks_by_date$switch_date)
date_seq <- seq(as.Date('2021-06-22'), as.Date('2021-07-20'), by = 'days')
wiki_seq <- c('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki',
'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki')
df_lang_clicks= expand.grid(date=date_seq, wiki=wiki_seq )
df_lang_clicks<- merge(df_lang_clicks , new_lang_link_clicks_by_date, by.x=c("wiki", "date"), by.y=c("wiki","switch_date"), all.x = TRUE)
df_lang_clicks <- merge(df_lang_clicks , old_lang_link_clicks_by_date, by.x=c("wiki", "date"), by.y=c("wiki","date"), all.x = TRUE)
df_lang_clicks <- merge(df_lang_clicks , n_more_lang_clicks_by_date, by.x=c("wiki", "date"), by.y=c("wiki","switch_date"), all.x = TRUE)
df_lang_clicks[is.na(df_lang_clicks)] <- 0
df_lang_clicks <- mutate(df_lang_clicks, n_events_control=n_events_language_link+n_events_n_more_language_switch)
df_lang_clicks <- df_lang_clicks %>%
mutate(
wiki_name = case_when( #clarfiy Wiki project names
wiki == 'frwiktionary' ~ "French Wikitionary",
wiki == 'hewiki' ~ 'Hebrew Wikipedia',
wiki == 'ptwikiversity' ~ 'Portuguese Wikiversity',
wiki == 'frwiki'~ "French Wikipedia",
wiki == 'euwiki' ~ "Basque Wikipedia",
wiki == 'fawiki' ~ 'Persian Wikipedia',
wiki == 'ptwiki' ~ 'Portuguese Wikipedia',
wiki == 'kowiki' ~ 'Korean Wikipedia',
wiki == 'trwiki' ~ 'Turkish Wikipedia',
wiki == 'srwiki' ~ 'Serbian Wikipedia',
wiki == 'bnwiki' ~ 'Bengali Wikipedia',
wiki == 'dewikivoyage' ~ 'German Wikivoyage',
wiki == 'vecwiki' ~ 'Venetian Wikipedia'),
)
df_lang_clicks_g <- ggplot(data=df_lang_clicks,
mapping=aes(x=date)) +
geom_line(aes(y=n_events_new_language_change , color="c2") , size = 1.5) +
geom_line(aes(y=n_events_control, color="c1") , size = 1.5) +
facet_wrap(~wiki_name,nrow=4,scale = 'free_y')+
scale_color_manual(values= c("c1"="#666666", "c2"="#000099"), name = "group", labels = c("Control", "Treatment")) +
labs(title = 'Language Link Clicks - logged-in user',
x = 'Date',
y = 'Language Link Clicks',
caption = "User type: logged-in User
Control: clicks on language links on both sidebar and N-more suggestion window
Treatment: clicks on language links on new language suggestion window"
) +
theme_light(base_size=18) +
theme(legend.position = "bottom",
plot.caption = element_text(hjust = 0, face= "italic"),
strip.text = element_text(size = 14, colour='black'),
axis.text.x = element_text( size = 10 ),
plot.title = element_text(size=20))
ggsave("Graphs_login/lang_link_clicks.png",
plot = df_lang_clicks_g, width = 60, height = 30, units = "cm", dpi = "screen");
df_lang_clicks_g