shhh <- function(expr) suppressPackageStartupMessages(suppressWarnings(suppressMessages(expr)))
shhh({
library(tidyverse);
library(lubridate);
library(scales);
library(magrittr);
library(dplyr);
})
library(IRdisplay)
display_html(
'<script>
code_show=true;
function code_toggle() {
if (code_show){
$(\'div.input\').hide();
} else {
$(\'div.input\').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()">
<input type="submit" value="Click here to toggle on/off the raw code.">
</form>'
)
options(repr.plot.width = 15, repr.plot.height = 10)
Jennifer Wang, Staff Data Scientist, Wikimedia Foundation
13 November 2021
As a part of the Desktop Improvements project, The Wikimedia Foundation's Web team is introducing a new way to switch to a different language wiki. The original language list appears on the sidebar. The new solution replaces the original one by providing 1) a language option button on the top of the page, 2) Selecting the language button will open the list of languages that will contain the suggested languages for each user, as well as a full list of all available languages for that article. You can find more information on this change and other feature deployments on the Language switching project page.
Previous analyses
AB test analysis on logged-in users done in September, 2021.
Pre/post analysis on logged-out users done in September, 2021
The goal of this post analysis is:
to confirm the instrumentation issue discovered by engineer in T291285
to monitor the adoption trend 4 months after new language deployment on pilot wikis. Our hypothesis is that after the feature was available for several month, more users will adopt the new language feature. The clicks of the new button and language links will grow.
We reviewed the total number of clicks on language links and input language links on pilot wikis separately. We did not review the number of clicks on interface language links because it’s only available for logged-in users not for logged-out users.
We selected the 7 days before the deployment as the pre deployment period. Post period analysis reviewed the total number of clicks from 2021-10-25 to 2021-10-31.
The clicks on input language links within 7 days are too few to show a consistent trend. We don't analyze it statistically.
# number of clicks on new button
query_new_button <- "
SELECT
TO_DATE(dt) as button_date,
wiki as wiki,
count(1) AS new_button_clicks
FROM event_sanitized.universallanguageselector
WHERE
year = 2021 and month in (6,7,8,9,10)
AND CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210601' and '20211031'
-- AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki',
-- 'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki')
-- only anonymous users
AND event.isanon = true
AND event.action = 'compact-language-links-open'
AND event.context = 'header'
AND event.skinVersion = 'latest'
GROUP BY
TO_DATE(dt),
wiki
"
df_new_button_clicks <- wmfdata::query_hive(query_new_button)
Don't forget to authenticate with Kerberos using kinit
Data sanity check by taking French Wikipedia as a example.
arrange( filter(df_new_button_clicks, (wiki=='frwiki')&(button_date>'2021-09-01')), button_date)
button_date | wiki | new_button_clicks |
---|---|---|
<chr> | <chr> | <int> |
2021-09-02 | frwiki | 77523 |
2021-09-03 | frwiki | 74646 |
2021-09-04 | frwiki | 59191 |
2021-09-05 | frwiki | 66992 |
2021-09-06 | frwiki | 86422 |
2021-09-07 | frwiki | 85912 |
2021-09-08 | frwiki | 87364 |
2021-09-09 | frwiki | 88069 |
2021-09-10 | frwiki | 83201 |
2021-09-11 | frwiki | 65745 |
2021-09-12 | frwiki | 72656 |
2021-09-13 | frwiki | 93442 |
2021-09-14 | frwiki | 93272 |
2021-09-15 | frwiki | 93204 |
2021-09-16 | frwiki | 75324 |
2021-09-17 | frwiki | 1277 |
2021-09-18 | frwiki | 625 |
2021-09-19 | frwiki | 81 |
2021-09-20 | frwiki | 37 |
2021-09-21 | frwiki | 23 |
2021-09-22 | frwiki | 6 |
2021-09-23 | frwiki | 11 |
2021-09-24 | frwiki | 7 |
2021-09-25 | frwiki | 4 |
2021-09-26 | frwiki | 5 |
2021-09-28 | frwiki | 2 |
2021-09-29 | frwiki | 1 |
2021-10-01 | frwiki | 1 |
2021-10-02 | frwiki | 5 |
2021-10-06 | frwiki | 1 |
2021-10-07 | frwiki | 2 |
2021-10-10 | frwiki | 1 |
2021-10-12 | frwiki | 2 |
2021-10-14 | frwiki | 14526 |
2021-10-15 | frwiki | 94008 |
2021-10-16 | frwiki | 74086 |
2021-10-17 | frwiki | 88912 |
2021-10-18 | frwiki | 111869 |
2021-10-19 | frwiki | 110139 |
2021-10-20 | frwiki | 108072 |
2021-10-21 | frwiki | 103959 |
2021-10-22 | frwiki | 94230 |
2021-10-23 | frwiki | 70243 |
2021-10-24 | frwiki | 79227 |
2021-10-25 | frwiki | 104358 |
2021-10-26 | frwiki | 103350 |
2021-10-27 | frwiki | 99758 |
2021-10-28 | frwiki | 95591 |
2021-10-29 | frwiki | 87436 |
2021-10-30 | frwiki | 70705 |
2021-10-31 | frwiki | 77706 |
write_csv(arrange( filter(df_new_button_clicks, (wiki=='frwiki')&(button_date>'2021-09-01')), button_date), file = 'Data_out/new_button_clicks_fr.csv')
df_new_button_clicks$button_date <- as.Date(df_new_button_clicks$button_date)
df_new_button_clicks <- df_new_button_clicks %>%
mutate(
wiki_name = case_when( #clarfiy Wiki project names
wiki == 'frwiktionary' ~ "French Wikitionary",
wiki == 'hewiki' ~ 'Hebrew Wikipedia',
wiki == 'ptwikiversity' ~ 'Portuguese Wikiversity',
wiki == 'frwiki'~ "French Wikipedia",
wiki == 'euwiki' ~ "Basque Wikipedia",
wiki == 'fawiki' ~ 'Persian Wikipedia',
wiki == 'ptwiki' ~ 'Portuguese Wikipedia',
wiki == 'kowiki' ~ 'Korean Wikipedia',
wiki == 'trwiki' ~ 'Turkish Wikipedia',
wiki == 'srwiki' ~ 'Serbian Wikipedia',
wiki == 'bnwiki' ~ 'Bengali Wikipedia',
wiki == 'dewikivoyage' ~ 'German Wikivoyage',
wiki == 'vecwiki' ~ 'Venetian Wikipedia',
TRUE ~ 'NA'),
)
df_button_clicks_g <- ggplot(data=filter(df_new_button_clicks, wiki_name!='NA'),
mapping=aes(x=button_date)) +
geom_line( aes(y=new_button_clicks ), color="#000099" , size = 1.5) +
facet_wrap(~wiki_name,nrow=4,scale = 'free_y')+
labs(title = 'New Language Button Clicks - logged-out user',
x = 'Date',
y = 'New Language Button Clicks',
caption = "User type: logged-out User
Clicks on New Language Button"
) +
theme_light(base_size=18) +
theme(legend.position = "bottom",
plot.caption = element_text(hjust = 0, face= "italic"),
strip.text = element_text(size = 14, colour='black'),
axis.text.x = element_text( size = 10 ),
plot.title = element_text(size=20))
df_button_clicks_g
Summary
The new button click events have dropped since 2021-09-16. The number of events recovered since 2021-10-14. It has recovered to the level before the drop as of 2021-10-31. Engineer has tracked down the instrumentation issue in T291285.
New feature -- language link clicks after new button clicks
query_language_clicks_new <-
"
-- sessions where new lang button was selected
WITH new_button AS (
SELECT
MIN(TO_DATE(dt)) as button_date,
event.web_session_id as session_id,
event.context as open_context,
wiki as wiki
FROM event_sanitized.universallanguageselector
WHERE
year = 2021 and month in (6,7,8,9,10)
AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20211031')
AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki',
'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki')
-- only anonymous users
AND event.isanon = true
AND event.action = 'compact-language-links-open'
AND event.context = 'header'
AND event.skinVersion = 'latest'
GROUP BY
event.web_session_id,
event.context,
event.isanon,
wiki
),
lang_switches AS (
SELECT
TO_DATE(dt) as switch_date,
event.web_session_id as session_id,
event.context as switch_context,
wiki as wiki
FROM event_sanitized.universallanguageselector
WHERE
year = 2021 and month in (6,7,8,9,10)
AND (CONCAT(year,LPAD(month,2,'0'),LPAD(day,2,'0')) between '20210622' and '20211031')
AND wiki in ('frwiktionary', 'hewiki', 'ptwikiversity', 'frwiki',
'euwiki', 'fawiki', 'ptwiki', 'kowiki', 'trwiki', 'srwiki', 'bnwiki', 'dewikivoyage', 'vecwiki')
-- only anonymous users
AND event.isanon =true
AND event.action = 'language-change'
AND event.context='content-language-switcher'
AND event.skinVersion = 'latest'
)
SELECT
new_button.button_date,
lang_switches.switch_date,
new_button.session_id,
new_button.wiki,
new_button.open_context,
-- sessions with lang switch that occured after button clicks
IF(lang_switches.session_id IS NOT NULL AND switch_date >= button_date, 1, 0) AS language_switch,
lang_switches.switch_context
FROM new_button
LEFT JOIN lang_switches ON
new_button.session_id = lang_switches.session_id AND
new_button.wiki = lang_switches.wiki
"
new_language_link_clicks <- wmfdata::query_hive(query_language_clicks_new)
Don't forget to authenticate with Kerberos using kinit
#write_csv(new_language_link_clicks, file = 'Data_logout/new_language_link_clicks.csv')
#new_language_link_clicks <- read_csv('Data_logout/new_language_link_clicks.csv')
new_lang_link_clicks_by_date <- new_language_link_clicks %>%
filter(language_switch==1) %>%
group_by( wiki, switch_date) %>%
summarize(n_events_new_language_change = n(), .groups = 'drop')
new_lang_link_clicks_by_date$switch_date <- as.Date(new_lang_link_clicks_by_date$switch_date)
new_lang_link_clicks_by_date <- new_lang_link_clicks_by_date %>%
mutate(
wiki_name = case_when( #clarfiy Wiki project names
wiki == 'frwiktionary' ~ "French Wikitionary",
wiki == 'hewiki' ~ 'Hebrew Wikipedia',
wiki == 'ptwikiversity' ~ 'Portuguese Wikiversity',
wiki == 'frwiki'~ "French Wikipedia",
wiki == 'euwiki' ~ "Basque Wikipedia",
wiki == 'fawiki' ~ 'Persian Wikipedia',
wiki == 'ptwiki' ~ 'Portuguese Wikipedia',
wiki == 'kowiki' ~ 'Korean Wikipedia',
wiki == 'trwiki' ~ 'Turkish Wikipedia',
wiki == 'srwiki' ~ 'Serbian Wikipedia',
wiki == 'bnwiki' ~ 'Bengali Wikipedia',
wiki == 'dewikivoyage' ~ 'German Wikivoyage',
wiki == 'vecwiki' ~ 'Venetian Wikipedia'),
)
df_lang_clicks_g <- ggplot(data= new_lang_link_clicks_by_date,
mapping=aes(x=switch_date)) +
geom_line(aes(y=n_events_new_language_change ), color="#000099" , size = 1.5) +
facet_wrap(~wiki_name,nrow=4,scale = 'free_y')+
labs(title = 'Language Link Clicks - logged-out user',
x = 'Date',
y = 'Language Link Clicks',
caption = "User type: logged-out User
Clicks on language links on new language suggestion window"
) +
theme_light(base_size=18) +
theme(legend.position = "bottom",
plot.caption = element_text(hjust = 0, face= "italic"),
strip.text = element_text(size = 14, colour='black'),
axis.text.x = element_text( size = 10 ),
plot.title = element_text(size=20))
df_lang_clicks_g