shhh <- function(expr) suppressPackageStartupMessages(suppressWarnings(suppressMessages(expr)))
shhh({
library(tidyverse);
library(lubridate);
library(scales);
library(magrittr);
library(dplyr);
})
library(IRdisplay)
display_html(
'<script>
code_show=true;
function code_toggle() {
if (code_show){
$(\'div.input\').hide();
} else {
$(\'div.input\').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()">
<input type="submit" value="Click here to toggle on/off the raw code.">
</form>'
)
options(repr.plot.width = 15, repr.plot.height = 10)
As a part of the Desktop Improvements project, The Wikimedia Foundation's Web team is introducing a new way to switch to a different language wiki. The original language list appears on the sidebar. The new solution replaces the original one by providing 1) a language option button on the top of the page, 2) Selecting the language button will open the list of languages that will contain the suggested languages for each user, as well as a full list of all available languages for that article. You can find more information on this change and other feature deployments on the Language switching project page.
For the logged-in users, an AB test was performed on the early adopter wikis except fawiki between June 22, 2021 and July 20, 2021. On fawiki, the AB test was performed between June 28 and July 20, 2021. We will share the analysis for the AB test in another report.
For the logged-out users, the new feature was originally planned to enable after AB test, on July 20, 2021. However it was accidentally enabled on logged-out users together with the AB test, discussed on T289200. This report details the analysis and results for the deployment of language switch on logged-out users.
The goal of the pre-post analysis is to identify any changes in user behavior that might have resulted from the deployment of the new language feature. Our hypothesis is that more clicks on the new language button and links as it’s easier to discover.
We reviewed the total number of clicks on language links and input language links separately. We did not review the number of clicks on interface language links because it’s only available for logged-in users not for logged-out users.
We selected the 7 days before the deployment as the pre deployment period. Due to caching, not all users were seeing the new version immediately after deployment. Therefore, we defined the first 15 days after deployment as the catch-up period and excluded them in the analysis. Post period analysis reviewed the total number of clicks from 16th day to 23th day after the deployment.
The clicks on input language links within 7 days are too few to show a consistent trend. We also reviewed the average daily clicks for a longer period of time. The pre period extended to as earlier as the data is available ( June 11, 2021). The post period extended to August 31. 2021.
New feature -- language link clicks after new button clicks
query_language_clicks_new <-
"
-- sessions where new lang button was selected
WITH new_button AS (
SELECT
MIN(TO_DATE(dt)) as button_date,
event.web_session_id as session_id,
event.context as open_context,
wiki as wiki
FROM event.universallanguageselector
WHERE
year = 2021 and month in (6,7,8)
AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210831')
AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki',
'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki')
AND useragent.is_bot = false
-- only anonymous users
AND event.isanon = true
AND event.action = 'compact-language-links-open'
AND event.context = 'header'
AND event.skinVersion = 'latest'
GROUP BY
event.web_session_id,
event.context,
event.isanon,
wiki
),
lang_switches AS (
SELECT
TO_DATE(dt) as switch_date,
event.web_session_id as session_id,
event.context as switch_context,
wiki as wiki
FROM event.universallanguageselector
WHERE
year = 2021 and month in (6,7,8)
AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20210831')
AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki',
'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki')
AND useragent.is_bot = false
-- only anonymous users
AND event.isanon =true
AND event.action = 'language-change'
AND event.context='content-language-switcher'
AND event.skinVersion = 'latest'
)
SELECT
new_button.button_date,
lang_switches.switch_date,
new_button.session_id,
new_button.wiki,
new_button.open_context,
-- sessions with lang switch that occured after button clicks
IF(lang_switches.session_id IS NOT NULL AND switch_date >= button_date, 1, 0) AS language_switch,
lang_switches.switch_context
FROM new_button
LEFT JOIN lang_switches ON
new_button.session_id = lang_switches.session_id AND
new_button.wiki = lang_switches.wiki
"
#new_language_link_clicks <- wmfdata::query_hive(query_language_clicks_new)
write_csv(new_language_link_clicks, file = 'Data_logout/new_language_link_clicks.csv')
#new_language_link_clicks <- read_csv('Data_logout/new_language_link_clicks.csv')
new_lang_link_clicks_by_date <- new_language_link_clicks %>%
filter(language_switch==1) %>%
group_by( wiki, switch_date) %>%
summarize(n_events_new_language_change = n(), .groups = 'drop')
new_lang_link_clicks_by_date$switch_date <- as.Date(new_lang_link_clicks_by_date$switch_date)
Old feature -- language link clicks on sidebar
query_old_language_link <-
"
SELECT
TO_DATE(dt) AS `date`,
wiki,
event.web_session_id,
event.usereditbucket,
event.timetochangelanguage,
event.interfacelanguage,
event.contentlanguage,
event.selectedinterfacelanguage,
Count(*) AS n_events
FROM event.universallanguageselector
WHERE
year = 2021 and month in (6,7,8)
AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210601' and '20210628')
AND event.context = 'languages-list' and event.action = 'language-change'
AND event.skinVersion = 'latest'
AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki',
'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki')
AND useragent.is_bot = false
-- only anonymous users
AND event.isanon =true
GROUP BY
TO_DATE(dt),
wiki,
event.web_session_id,
event.usereditbucket,
event.timetochangelanguage,
event.interfacelanguage,
event.contentlanguage,
event.selectedinterfacelanguage
"
#old_lang_link_clicks <- wmfdata::query_hive(query_old_language_link)
write_csv(old_lang_link_clicks, file = 'Data_logout/old_lang_link_clicks.csv')
#old_lang_link_clicks <- read_csv('Data_logout/old_lang_link_clicks.csv')
old_lang_link_clicks_by_date <- old_lang_link_clicks %>%
group_by( wiki, date) %>%
summarize(n_events_language_link = sum(n_events),.groups='drop')
old_lang_link_clicks_by_date$date <- as.Date(old_lang_link_clicks_by_date$date)
Old feature -- language link clicks after N-more button clicks on sidebar
query_n_more_lang_switch <-
"
-- sessions where N-more lang button was selected
WITH button AS (
SELECT
MIN(TO_DATE(dt)) as button_date,
event.web_session_id as session_id,
event.context as open_context,
wiki as wiki
FROM event.universallanguageselector
WHERE
year = 2021 and month in (6,7,8)
AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210601' and '20210628')
AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki',
'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki')
AND useragent.is_bot = false
-- only anonymous users
AND event.isanon =true
AND event.action = 'compact-language-links-open'
AND event.context = 'other'
AND event.skinVersion = 'latest'
GROUP BY
event.web_session_id,
event.context,
wiki
),
lang_switches AS (
SELECT
TO_DATE(dt) as switch_date,
event.web_session_id as session_id,
event.isanon,
event.context as switch_context,
wiki as wiki
FROM event.universallanguageselector
WHERE
year = 2021 and month in (6,7,8)
AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210601' and '20210628')
AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki',
'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki')
AND useragent.is_bot = false
-- only anonymous users
AND event.isanon =true
AND event.action = 'language-change'
AND event.context='content-language-switcher'
AND event.skinVersion = 'latest'
)
SELECT
button.button_date,
lang_switches.switch_date,
button.session_id,
button.wiki,
button.open_context,
-- sessions with lang switch that occured after button clicks
IF(lang_switches.session_id IS NOT NULL AND switch_date >= button_date , 1, 0) AS language_switch,
lang_switches.switch_context
FROM button
LEFT JOIN lang_switches ON
button.session_id = lang_switches.session_id AND
button.wiki = lang_switches.wiki
"
#n_more_lang_clicks <- wmfdata::query_hive(query_n_more_lang_switch)
write_csv(n_more_lang_clicks, file = 'Data_logout/n_more_switch_lang_clicks.csv')
#n_more_lang_clicks <- read_csv('Data_logout/n_more_switch_lang_clicks.csv')
n_more_lang_clicks_by_date <- n_more_lang_clicks %>%
filter(language_switch==1) %>%
group_by( wiki, switch_date) %>%
summarize(n_events_n_more_language_switch = sum(language_switch),.groups='drop')
n_more_lang_clicks_by_date$switch_date <- as.Date(n_more_lang_clicks_by_date$switch_date)
date_seq <- seq(as.Date('2021-06-01'), as.Date('2021-08-31'), by = 'days')
wiki_seq <- c('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki',
'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki')
df_lang_clicks= expand.grid(date=date_seq, wiki=wiki_seq )
df_lang_clicks<- merge(df_lang_clicks , new_lang_link_clicks_by_date, by.x=c("wiki", "date"), by.y=c("wiki","switch_date"), all.x = TRUE)
df_lang_clicks <- merge(df_lang_clicks , old_lang_link_clicks_by_date, by.x=c("wiki", "date"), by.y=c("wiki","date"), all.x = TRUE)
df_lang_clicks <- merge(df_lang_clicks , n_more_lang_clicks_by_date, by.x=c("wiki", "date"), by.y=c("wiki","switch_date"), all.x = TRUE)
df_lang_clicks[is.na(df_lang_clicks)] <- 0
df_lang_clicks <- mutate(df_lang_clicks, n_events_old=n_events_language_link+n_events_n_more_language_switch)
df_lang_clicks <- df_lang_clicks %>%
mutate(
wiki_name = case_when( #clarfiy Wiki project names
wiki == 'frwiktionary' ~ "French Wikitionary",
wiki == 'hewiki' ~ 'Hebrew Wikipedia',
wiki == 'ptwikiversity' ~ 'Portuguese Wikiversity',
wiki == 'frwiki'~ "French Wikipedia",
wiki == 'euwiki' ~ "Basque Wikipedia",
wiki == 'fawiki' ~ 'Persian Wikipedia',
wiki == 'ptwiki' ~ 'Portuguese Wikipedia',
wiki == 'kowiki' ~ 'Korean Wikipedia',
wiki == 'trwiki' ~ 'Turkish Wikipedia',
wiki == 'srwiki' ~ 'Serbian Wikipedia',
wiki == 'bnwiki' ~ 'Bengali Wikipedia',
wiki == 'dewikivoyage' ~ 'German Wikivoyage',
wiki == 'vecwiki' ~ 'Venetian Wikipedia'),
)
df_lang_clicks_g <- ggplot(data=filter( df_lang_clicks, (date > '2021-06-10')&(date < '2021-08-22')),
mapping=aes(x=date)) +
geom_line(aes(y=n_events_new_language_change , color="c2") , size = 1.5) +
geom_line(aes(y=n_events_old, color="c1") , size = 1.5) +
facet_wrap(~wiki_name,nrow=4,scale = 'free_y')+
scale_color_manual(values= c("c1"="#666666", "c2"="#000099"), name = "group", labels = c("Old", "New")) +
labs(title = 'Language Link Clicks - logged-out user',
x = 'Date',
y = 'Language Link Clicks',
caption = "User type: logged-out User
Old: clicks on language links on both sidebar and N-more suggestion window
New: clicks on language links on new language suggestion window"
) +
theme_light(base_size=18) +
theme(legend.position = "bottom",
plot.caption = element_text(hjust = 0, face= "italic"),
strip.text = element_text(size = 14, colour='black'),
axis.text.x = element_text( size = 10 ),
plot.title = element_text(size=20))
ggsave("Graphs_logout/lang_link_clicks.png",
plot = df_lang_clicks_g, width = 60, height = 30, units = "cm", dpi = "screen");
df_lang_clicks_g
# add column classifying pre and post deployment dates
df_lang_clicks_prepost <- df_lang_clicks %>%
mutate(deployment = case_when(
date >= '2021-06-15' & date <= '2021-06-21' & wiki !='fawiki' ~ 'pre',
date >= '2021-07-06' & date <= '2021-07-12' & wiki !='fawiki' ~ 'post',
date >= '2021-06-21' & date <= '2021-06-27' & wiki =='fawiki' ~ 'pre',
date >= '2021-07-12' & date <= '2021-07-18' & wiki =='fawiki' ~ 'post',
TRUE ~ 'NA'
)) %>%
filter(deployment!='NA') %>%
group_by(wiki_name, deployment) %>%
summarize(old_lang_link_clicks = sum(n_events_old), new_lang_link_clicks = sum(n_events_new_language_change), .groups = 'drop')
df_lang_clicks_prepost_long <- pivot_longer(df_lang_clicks_prepost[order(df_lang_clicks_prepost$wiki_name, rev(df_lang_clicks_prepost$deployment)),], cols = c('old_lang_link_clicks','new_lang_link_clicks' ), names_to="group", values_to="lang_link_clicks")
df_lang_clicks_prepost_long$deployment <- factor(df_lang_clicks_prepost_long$deployment,levels = c("pre", "post"))
lang_clicks_barchart <- df_lang_clicks_prepost_long %>%
ggplot(aes(x = deployment, y= lang_link_clicks, fill=group)) +
geom_bar(stat="identity", position = 'dodge') +
geom_text(aes(label = paste(lang_link_clicks)), color = "white", position = position_dodge(0.9), vjust = 1.5, size = 3) +
facet_wrap(~wiki_name, scale = 'free_y') +
labs (
y = "Number of language link clicks",
title = "Number of language link clicks before and after deployment",
caption = "User type: logged-out User
Old: clicks on language links on both sidebar and N-more suggestion window
New: clicks on language links on new language suggestion window") +
scale_fill_manual(values= c( "#000099", "#666666"), name = "Feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=16),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
lang_clicks_barchart
ggsave("Graphs_logout/lang_link_clicks_bar.png",
plot = lang_clicks_barchart, width = 60, height = 30, units = "cm", dpi = "screen");