shhh <- function(expr) suppressPackageStartupMessages(suppressWarnings(suppressMessages(expr)))
shhh({
library(tidyverse);
library(lubridate);
library(scales);
library(magrittr);
library(dplyr);
})
library(IRdisplay)
display_html(
'<script>
code_show=true;
function code_toggle() {
if (code_show){
$(\'div.input\').hide();
} else {
$(\'div.input\').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()">
<input type="submit" value="Click here to toggle on/off the raw code.">
</form>'
)
options(repr.plot.width = 15, repr.plot.height = 10)
Jennifer Wang, Staff Data Scientist, Wikimedia Foundation
July 2022
As a part of the Desktop Improvements project, The Wikimedia Foundation's Web team is introducing a new Table of Contents (ToC). The goal is to make it easier for readers and editors to gain context and navigate throughout the page without needing to scroll to the top. Please find more information on this change and other feature deployments on the Table of contents project page.
An AB test was performed on the early adopter wikis between May 26, 2022 and June 15, 2022 for both logged-in users and anonymous users. This report details the analysis and results for the deployment of the new table of contents on both user groups.
The primary goal of the AB Test was to test the hypothesis that
As part of this analysis, we were also curious about how the new ToC affects the time readers spent on a page.
The AB test was run on a per wiki basis on both logged-in users and anonymous users. The sessions included in the test were randomly assigned to either the control (old ToC) or treatment (new ToC) based on their landing page id.
We compared the total numbers of clicks, the total numbers of scrolls, the average reading time (second/kilobyte) between control and treatment groups. We also reviewed the difference in percentage between the treatment group and the control group. The logged-in user group and anonymous user group are analyzed separately.
We also ran Hierarchical Generalized Linear Modeling on session based data to determine if the difference is statistically significant.
June 9, 2022
query <- "
select wiki, TO_DATE(meta.dt) AS date_time,`group` AS test_group, count(1) AS pv,
count(distinct web_session_id) AS sessions
FROM event.mediawiki_web_ab_test_enrollment
WHERE year=2022 AND month IN (4, 5,6)
AND experiment_name='skin-vector-toc-experiment'
GROUP BY wiki, TO_DATE(meta.dt),`group`
ORDER BY wiki, date_time, `group`
LIMIT 100000
"
df <- wmfdata::query_hive(query)
Don't forget to authenticate with Kerberos using kinit
df$date_time <- as.Date(df$date_time)
g_pv <- ggplot(data=df,
mapping=aes(x=date_time, y=pv, color=test_group)) +
geom_line( size = 1.5) +
facet_wrap(~wiki,nrow=6,scale = 'free_y')+
scale_color_manual(values= c("#666666", "#000099"), name = "group") +
labs(title = 'Daily pageviews by test group',
x = 'Date',
y = 'Pageviews') +
theme_light(base_size = 16) +
theme(legend.position = "bottom",
strip.text = element_text(size = 14, colour='black'),
axis.text.x = element_text( size = 10 ),
plot.title = element_text(size=20));
g_pv
head(df_pv)
wiki | domain | test_group | total_pv | |
---|---|---|---|---|
<chr> | <chr> | <chr> | <int> | |
1 | arywiki | ary.wikipedia.org | control | 1715 |
2 | arywiki | ary.wikipedia.org | treatment | 1101 |
3 | arywiki | ary.wikipedia.shisu.cf | control | 1 |
4 | bnwiki | bn.wikipedia.ahau.cf | control | 3 |
5 | bnwiki | bn.wikipedia.ahau.cf | treatment | 2 |
6 | bnwiki | bn.wikipedia.ahmu.cf | control | 1 |
barchart_pv_overall <- df %>%
group_by(wiki, test_group) %>%
summarize(total_pv=sum(pv), .groups='drop') %>%
ggplot(aes(x = test_group, y= total_pv, fill=test_group)) +
geom_bar(stat="identity", position = 'dodge') +
#geom_text(aes(label = paste(scrolls_per_pv)), color = "black", position = position_dodge(0.9), vjust =0, size = 3) +
facet_wrap(~wiki, scale = 'free_y') +
labs (
y = "Number of total pageviews" ,
title = "Number of total pageviews " ,
caption = "User type: logged-in and anaonymous User" ) +
scale_fill_manual(values= c( "#666666", "#000099"), name = "feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.text.x = element_blank(),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=12),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
barchart_pv_overall
g_s <- ggplot(data=df,
mapping=aes(x=date_time, y=sessions, color=test_group)) +
geom_line( size = 1.5) +
facet_wrap(~wiki,nrow=6,scale = 'free_y')+
scale_color_manual(values= c("#666666", "#000099"), name = "group") +
labs(title = 'Daily unique sessions by test group',
x = 'Date',
y = 'sessions') +
theme_light(base_size = 16) +
theme(legend.position = "bottom",
strip.text = element_text(size = 14, colour='black'),
axis.text.x = element_text( size = 10 ),
plot.title = element_text(size=20));
g_s
barchart_sessions_overall <- df %>%
group_by(wiki, test_group) %>%
summarize(total_sessions=sum(sessions), .groups='drop') %>%
ggplot(aes(x = test_group, y= total_sessions, fill=test_group)) +
geom_bar(stat="identity", position = 'dodge') +
#geom_text(aes(label = paste(scrolls_per_pv)), color = "black", position = position_dodge(0.9), vjust =0, size = 3) +
facet_wrap(~wiki, scale = 'free_y') +
labs (
y = "Number of total unique sessions" ,
title = "Number of total unique sessions " ,
caption = "User type: logged-in and anaonymous User" ) +
scale_fill_manual(values= c( "#666666", "#000099"), name = "feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.text.x = element_blank(),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=12),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
barchart_sessions_overall
Note:
5 wikis with low traffic: plwikinews, ptwikinews, vecwiki, arywiki, viwikibook
Foundationwiki has low traffic in the control group.
query_check_duplicate <- "
WITH t_duplicate AS
(SELECT web_session_id, wiki, count(distinct `group` ) AS groups, min(meta.dt) AS session_dt
FROM event.mediawiki_web_ab_test_enrollment
WHERE wiki NOT IN ('testwiki','test2wiki') and year=2022 and month IN (5,6)
AND CONCAT(year, '-', LPAD(month,2,'0'),'-', LPAD(day,2,'0')) BETWEEN '2022-05-26' AND '2022-06-15'
GROUP BY web_session_id, wiki
HAVING groups>1
)
SELECT wiki, count(web_session_id) AS dup_sessions
FROM t_duplicate
GROUP BY wiki
"
df_duplicate <- wmfdata::query_hive(query_check_duplicate)
Don't forget to authenticate with Kerberos using kinit
df_duplicate
wiki | dup_sessions |
---|---|
<chr> | <int> |
arywiki | 41 |
bnwiki | 31209 |
dewikivoyage | 3763 |
euwiki | 22606 |
fawiki | 478563 |
foundationwiki | 5851 |
frwiki | 3245573 |
frwikiquote | 428 |
frwiktionary | 89800 |
hewiki | 529285 |
idwiki | 305725 |
incubatorwiki | 591 |
kowiki | 527008 |
mediawikiwiki | 23291 |
plwikinews | 16 |
ptwiki | 751847 |
ptwikinews | 5 |
ptwikiversity | 173 |
srwiki | 90155 |
thwiki | 246259 |
trwiki | 8579 |
vecwiki | 96 |
viwiki | 539556 |
viwikibooks | 30 |
sum(df_duplicate[, 'dup_sessions'])
Note:
All sessions assigned to both treatment group and control group need to be excluded in analysis
query_total_sessions <- "
select wiki, `group` AS test_group, count(1) AS pv,
count(distinct web_session_id) AS sessions
FROM event.mediawiki_web_ab_test_enrollment
WHERE year=2022 AND month IN (5,6)
AND CONCAT(year, '-', LPAD(month,2,'0'),'-', LPAD(day,2,'0')) BETWEEN '2022-05-26' AND '2022-06-15'
AND experiment_name='skin-vector-toc-experiment'
GROUP BY wiki, `group`
ORDER BY wiki, `group`
LIMIT 100000
"
df_total_sessions <- wmfdata::query_hive(query_total_sessions)
Don't forget to authenticate with Kerberos using kinit
df_total_sessions <- df_total_sessions %>%
group_by(test_group) %>%
summarize(total_sessions=sum(sessions), total_pv=sum(pv))
df_total_sessions
test_group | total_sessions | total_pv |
---|---|---|
<chr> | <int> | <int> |
control | 28016822 | 55011276 |
treatment | 28471190 | 55656417 |
sum(df_total_sessions[, 'total_sessions'])
6900450/(56488012-6900450)
Note:. 14% of sessions were duplicatedly assigned to both control and treatment group. Will be excluded in following analysis.
query_non_duplicate <- "
WITH t_ab_no_duplicate AS
(SELECT web_session_id, wiki, meta.domain AS domain, count(distinct `group` ) AS groups, min(meta.dt) AS session_dt
FROM event.mediawiki_web_ab_test_enrollment
WHERE wiki NOT IN ('testwiki','test2wiki') and year=2022 and month IN (5,6)
AND CONCAT(year, '-', LPAD(month,2,'0'),'-', LPAD(day,2,'0')) BETWEEN '2022-05-26' AND '2022-06-15'
GROUP BY web_session_id, wiki, meta.domain
-- exclude session ids are in both control and treatment group
HAVING groups < 2
)
SELECT
t1.web_session_id,
t1.wiki,t1.meta.domain AS domain,
t1.`group` AS test_group,
min(t1.meta.dt) AS session_dt
FROM event.mediawiki_web_ab_test_enrollment AS t1
INNER JOIN t_ab_no_duplicate AS t2 ON t1.wiki=t2.wiki
AND t1.web_session_id=t2.web_session_id
WHERE t1.wiki NOT IN ('testwiki','test2wiki') and t1.year=2022 and t1.month IN (5,6)
GROUP BY t1.web_session_id, t1.wiki,t1.meta.domain, t1.`group`
"
df_ab <- wmfdata::query_hive(query_non_duplicate)
Don't forget to authenticate with Kerberos using kinit
barchart_sessions_overall <- df_ab %>%
group_by(wiki, test_group) %>%
summarize(sessions=n_distinct(web_session_id), .groups='drop') %>%
ggplot(aes(x = test_group, y= sessions, fill=test_group)) +
geom_bar(stat="identity", position = 'dodge') +
#geom_text(aes(label = paste(scrolls_per_pv)), color = "black", position = position_dodge(0.9), vjust =0, size = 3) +
facet_wrap(~wiki, scale = 'free_y') +
labs (
y = "Number of total unique sessions" ,
title = "Number of total unique sessions " ,
caption = "User type: logged-in and anaonymous User" ) +
scale_fill_manual(values= c( "#666666", "#000099"), name = "feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.text.x = element_blank(),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=12),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
barchart_sessions_overall
Note:
8 wikis have unbalanced distribution on unique session: arywiki, foundationwiki, incubatorwiki, mediawiki, plwikinews, ptwikinews, vecwiki, viwikibook .
query_non_duplicate_pv <- "
WITH t_ab_no_duplicate AS
(SELECT web_session_id, wiki, meta.domain AS domain, count(distinct `group` ) AS groups, min(meta.dt) AS session_dt
FROM event.mediawiki_web_ab_test_enrollment
WHERE wiki NOT IN ('testwiki','test2wiki') and year=2022 and month IN (5,6)
AND CONCAT(year, '-', LPAD(month,2,'0'),'-', LPAD(day,2,'0')) BETWEEN '2022-05-26' AND '2022-06-15'
GROUP BY web_session_id, wiki, meta.domain
-- exclude session ids are in both control and treatment group
HAVING groups < 2
)
SELECT
t1.wiki,t1.meta.domain AS domain,
t1.`group` AS test_group,
COUNT(1) AS total_pv
FROM event.mediawiki_web_ab_test_enrollment AS t1
INNER JOIN t_ab_no_duplicate AS t2 ON t1.wiki=t2.wiki
AND t1.web_session_id=t2.web_session_id
WHERE t1.wiki NOT IN ('testwiki','test2wiki') and t1.year=2022 and t1.month IN (5,6)
AND CONCAT(year, '-', LPAD(month,2,'0'),'-', LPAD(day,2,'0')) BETWEEN '2022-05-26' AND '2022-06-15'
GROUP BY t1.wiki,t1.meta.domain, t1.`group`
"
df_pv <- wmfdata::query_hive(query_non_duplicate_pv)
Don't forget to authenticate with Kerberos using kinit
barchart_sessions_overall_2 <- df_pv %>%
ggplot(aes(x = test_group, y= total_pv, fill=test_group)) +
geom_bar(stat="identity", position = 'dodge') +
#geom_text(aes(label = paste(scrolls_per_pv)), color = "black", position = position_dodge(0.9), vjust =0, size = 3) +
facet_wrap(~wiki, scale = 'free_y') +
labs (
y = "Number of total sessions" ,
title = "Number of total sessions " ,
caption = "User type: logged-in and anaonymous User" ) +
scale_fill_manual(values= c( "#666666", "#000099"), name = "feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.text.x = element_blank(),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=12),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
barchart_sessions_overall_2
Note:.
5 wikis have unbalanced distribution on total pageviews: arywiki, foundationwiki, incubatorwiki,
vecwiki, viwikibook .
Test question: Is the new table of contents is used more frequently than the previous table of contents
Clicks on ToC was tracked in event.DesktopWebUIActionsTracking
schema.
Important note
We show the old table of contents in the treatment bucket if the window is small (<1000px) for a better user experince on smaller viewport devices. The sessions with small viewport ( <1000px ) should be excluded in AB test analysis.
query_click <- "
WITH t_ab_no_dupli AS (
SELECT web_session_id, wiki, meta.domain AS domain, count(distinct `group` ) AS groups, min(meta.dt) AS session_dt
FROM event.mediawiki_web_ab_test_enrollment
WHERE wiki NOT IN ('testwiki','test2wiki') AND year=2022 AND month IN (5,6)
AND experiment_name='skin-vector-toc-experiment'
GROUP BY web_session_id, wiki, meta.domain
-- exclude session ids are in both control and treatment group
HAVING groups < 2
),
t_ab AS(
SELECT
t1.web_session_id,
t1.wiki,t1.meta.domain AS domain,
t1.`group` AS test_group,
min(t1.meta.dt) AS session_dt
FROM event.mediawiki_web_ab_test_enrollment AS t1
INNER JOIN t_ab_no_dupli AS t2 ON t1.wiki=t2.wiki
AND t1.web_session_id=t2.web_session_id
WHERE t1.wiki NOT IN ('testwiki','test2wiki')
AND year=2022
AND CONCAT(year, '-', LPAD(month,2,'0'),'-', LPAD(day,2,'0')) BETWEEN '2022-05-26' AND '2022-06-15'
AND experiment_name='skin-vector-toc-experiment'
AND NOT is_bot
GROUP BY t1.web_session_id, t1.wiki,t1.meta.domain, t1.`group`
)
-- clicks from ab test group
SELECT CONCAT(t3.year, '-', LPAD(t3.month,2,'0'),'-', LPAD(t3.day,2,'0')) AS event_date,
wiki, event.isanon, t4.test_group,
event.name AS event_name,
event.viewportSizeBucket AS view_size,
count(1) AS clicks
FROM event.DesktopWebUIActionsTracking AS t3
INNER JOIN t_ab AS t4
ON t3.wiki=t4.wiki AND t3.event.token = t4.web_session_id
WHERE t3.wiki IN ('bnwiki', 'fawiki', 'foundationwiki',
'hewiki', 'ptwikinews', 'ptwikiversity', 'srwiki',
'thwiki', 'vecwiki', 'viwiki', 'viwikibooks', 'dewikivoyage',
'euwiki', 'kowiki', 'plwikinews', 'trwiki', 'arywiki',
'frwiki', 'frwikiquote', 'frwiktionary', 'incubatorwiki', 'ptwiki'
)
AND t3.year=2022 and t3.month IN (5,6)
AND CONCAT(t3.year, '-', LPAD(t3.month,2,'0'),'-', LPAD(t3.day,2,'0')) BETWEEN '2022-05-26' AND '2022-06-15'
AND t4.session_dt <= t3.meta.dt
AND event.name IN ( 'ui.toc', 'ui.sidebar-toc')
AND event.action='click' AND event.skinversion=2
GROUP BY t3.year, t3.month,t3.day, t3.wiki, event.isanon, t4.test_group, event.name , event.viewportSizeBucket
"
df_clicks <- wmfdata::query_hive(query_click)
Don't forget to authenticate with Kerberos using kinit
df_clicks$event_date <- as.Date(df_clicks$event_date)
df_clicks
event_date | wiki | isanon | test_group | event_name | view_size | clicks |
---|---|---|---|---|---|---|
<date> | <chr> | <chr> | <chr> | <chr> | <chr> | <int> |
2022-05-26 | bnwiki | false | control | ui.toc | 1200px-2000px | 1 |
2022-05-26 | bnwiki | true | control | ui.toc | 1000px-1199px | 1 |
2022-05-26 | bnwiki | true | control | ui.toc | 1200px-2000px | 4 |
2022-05-26 | dewikivoyage | false | control | ui.toc | 1200px-2000px | 5 |
2022-05-26 | dewikivoyage | false | treatment | ui.sidebar-toc | 1200px-2000px | 1 |
2022-05-26 | dewikivoyage | true | control | ui.toc | 1000px-1199px | 7 |
2022-05-26 | dewikivoyage | true | control | ui.toc | 1200px-2000px | 46 |
2022-05-26 | dewikivoyage | true | control | ui.toc | 720px-999px | 3 |
2022-05-26 | dewikivoyage | true | control | ui.toc | >2000px | 1 |
2022-05-26 | dewikivoyage | true | treatment | ui.sidebar-toc | 1000px-1199px | 7 |
2022-05-26 | dewikivoyage | true | treatment | ui.sidebar-toc | 1200px-2000px | 32 |
2022-05-26 | euwiki | false | control | ui.toc | 1200px-2000px | 2 |
2022-05-26 | euwiki | true | control | ui.toc | 1000px-1199px | 6 |
2022-05-26 | euwiki | true | control | ui.toc | 1200px-2000px | 7 |
2022-05-26 | fawiki | false | control | ui.toc | 1000px-1199px | 2 |
2022-05-26 | fawiki | false | control | ui.toc | 1200px-2000px | 5 |
2022-05-26 | fawiki | false | control | ui.toc | 720px-999px | 3 |
2022-05-26 | fawiki | true | control | ui.toc | 1000px-1199px | 7 |
2022-05-26 | fawiki | true | control | ui.toc | 1200px-2000px | 59 |
2022-05-26 | fawiki | true | control | ui.toc | 320px-719px | 1 |
2022-05-26 | fawiki | true | control | ui.toc | 720px-999px | 6 |
2022-05-26 | fawiki | true | treatment | ui.sidebar-toc | 1000px-1199px | 8 |
2022-05-26 | fawiki | true | treatment | ui.sidebar-toc | 1200px-2000px | 42 |
2022-05-26 | fawiki | true | treatment | ui.sidebar-toc | 720px-999px | 1 |
2022-05-26 | fawiki | true | treatment | ui.toc | 1000px-1199px | 1 |
2022-05-26 | foundationwiki | false | control | ui.toc | 1200px-2000px | 16 |
2022-05-26 | foundationwiki | true | control | ui.toc | 1000px-1199px | 2 |
2022-05-26 | foundationwiki | true | control | ui.toc | 1200px-2000px | 5 |
2022-05-26 | foundationwiki | true | control | ui.toc | 320px-719px | 5 |
2022-05-26 | foundationwiki | true | control | ui.toc | 720px-999px | 2 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
2022-06-15 | thwiki | true | control | ui.toc | 1000px-1199px | 134 |
2022-06-15 | thwiki | true | control | ui.toc | 1200px-2000px | 655 |
2022-06-15 | thwiki | true | control | ui.toc | 320px-719px | 22 |
2022-06-15 | thwiki | true | control | ui.toc | 720px-999px | 43 |
2022-06-15 | thwiki | true | control | ui.toc | >2000px | 10 |
2022-06-15 | thwiki | true | treatment | ui.sidebar-toc | 1000px-1199px | 50 |
2022-06-15 | thwiki | true | treatment | ui.sidebar-toc | 1200px-2000px | 332 |
2022-06-15 | thwiki | true | treatment | ui.sidebar-toc | 720px-999px | 5 |
2022-06-15 | thwiki | true | treatment | ui.sidebar-toc | >2000px | 1 |
2022-06-15 | thwiki | true | treatment | ui.toc | 1200px-2000px | 1 |
2022-06-15 | trwiki | false | control | ui.toc | 1000px-1199px | 1 |
2022-06-15 | trwiki | false | control | ui.toc | 1200px-2000px | 26 |
2022-06-15 | trwiki | false | treatment | ui.sidebar-toc | 1200px-2000px | 3 |
2022-06-15 | trwiki | true | control | ui.toc | 1200px-2000px | 19 |
2022-06-15 | trwiki | true | treatment | ui.sidebar-toc | 1200px-2000px | 1 |
2022-06-15 | trwiki | true | treatment | ui.toc | 1200px-2000px | 6 |
2022-06-15 | trwiki | true | treatment | ui.toc | 720px-999px | 1 |
2022-06-15 | vecwiki | true | control | ui.toc | 720px-999px | 1 |
2022-06-15 | viwiki | false | control | ui.toc | 1000px-1199px | 1 |
2022-06-15 | viwiki | false | control | ui.toc | 1200px-2000px | 25 |
2022-06-15 | viwiki | false | control | ui.toc | 720px-999px | 3 |
2022-06-15 | viwiki | false | treatment | ui.sidebar-toc | 1200px-2000px | 22 |
2022-06-15 | viwiki | true | control | ui.toc | 1000px-1199px | 104 |
2022-06-15 | viwiki | true | control | ui.toc | 1200px-2000px | 1231 |
2022-06-15 | viwiki | true | control | ui.toc | 320px-719px | 21 |
2022-06-15 | viwiki | true | control | ui.toc | 720px-999px | 63 |
2022-06-15 | viwiki | true | control | ui.toc | >2000px | 5 |
2022-06-15 | viwiki | true | treatment | ui.sidebar-toc | 1000px-1199px | 35 |
2022-06-15 | viwiki | true | treatment | ui.sidebar-toc | 1200px-2000px | 417 |
2022-06-15 | viwiki | true | treatment | ui.toc | 1200px-2000px | 1 |
filter(df_clicks, test_group=='treatment' & event_name=='ui.toc' & event_date=='2022-06-15')
event_date | wiki | isanon | test_group | event_name | view_size | clicks |
---|---|---|---|---|---|---|
<date> | <chr> | <chr> | <chr> | <chr> | <chr> | <int> |
2022-06-15 | bnwiki | true | treatment | ui.toc | 1200px-2000px | 1 |
2022-06-15 | fawiki | true | treatment | ui.toc | 1000px-1199px | 2 |
2022-06-15 | fawiki | true | treatment | ui.toc | 1200px-2000px | 4 |
2022-06-15 | fawiki | true | treatment | ui.toc | 320px-719px | 1 |
2022-06-15 | frwiki | false | treatment | ui.toc | 1000px-1199px | 3 |
2022-06-15 | frwiki | false | treatment | ui.toc | 1200px-2000px | 89 |
2022-06-15 | frwiki | false | treatment | ui.toc | 720px-999px | 3 |
2022-06-15 | frwiki | false | treatment | ui.toc | <320px | 1 |
2022-06-15 | frwiki | false | treatment | ui.toc | >2000px | 1 |
2022-06-15 | frwiki | true | treatment | ui.toc | 1000px-1199px | 224 |
2022-06-15 | frwiki | true | treatment | ui.toc | 1200px-2000px | 2061 |
2022-06-15 | frwiki | true | treatment | ui.toc | 320px-719px | 31 |
2022-06-15 | frwiki | true | treatment | ui.toc | 720px-999px | 103 |
2022-06-15 | frwiki | true | treatment | ui.toc | >2000px | 45 |
2022-06-15 | frwiktionary | true | treatment | ui.toc | 1200px-2000px | 1 |
2022-06-15 | ptwiki | false | treatment | ui.toc | 1000px-1199px | 4 |
2022-06-15 | ptwiki | false | treatment | ui.toc | 1200px-2000px | 29 |
2022-06-15 | ptwiki | true | treatment | ui.toc | 1000px-1199px | 28 |
2022-06-15 | ptwiki | true | treatment | ui.toc | 1200px-2000px | 340 |
2022-06-15 | ptwiki | true | treatment | ui.toc | 320px-719px | 9 |
2022-06-15 | ptwiki | true | treatment | ui.toc | 720px-999px | 8 |
2022-06-15 | ptwiki | true | treatment | ui.toc | >2000px | 4 |
2022-06-15 | thwiki | true | treatment | ui.toc | 1200px-2000px | 1 |
2022-06-15 | trwiki | true | treatment | ui.toc | 1200px-2000px | 6 |
2022-06-15 | trwiki | true | treatment | ui.toc | 720px-999px | 1 |
2022-06-15 | viwiki | true | treatment | ui.toc | 1200px-2000px | 1 |
filter(df_clicks, (test_group=='control' & event_name=='ui.sidebar-toc'))
event_date | wiki | isanon | test_group | event_name | view_size | clicks |
---|---|---|---|---|---|---|
<date> | <chr> | <chr> | <chr> | <chr> | <chr> | <int> |
2022-05-28 | frwiki | true | control | ui.sidebar-toc | 1200px-2000px | 1 |
2022-05-29 | frwiki | true | control | ui.sidebar-toc | 1200px-2000px | 2 |
2022-06-03 | foundationwiki | true | control | ui.sidebar-toc | 1200px-2000px | 1 |
2022-06-04 | frwiktionary | true | control | ui.sidebar-toc | 1200px-2000px | 1 |
2022-06-06 | frwiki | true | control | ui.sidebar-toc | 1200px-2000px | 1 |
2022-06-07 | frwiki | true | control | ui.sidebar-toc | 1200px-2000px | 1 |
2022-06-10 | ptwiki | true | control | ui.sidebar-toc | 1000px-1199px | 10 |
2022-06-11 | frwiki | true | control | ui.sidebar-toc | 1200px-2000px | 1 |
2022-06-12 | frwiki | true | control | ui.sidebar-toc | 1200px-2000px | 1 |
2022-06-12 | srwiki | true | control | ui.sidebar-toc | 1200px-2000px | 1 |
2022-06-12 | viwiki | true | control | ui.sidebar-toc | 1200px-2000px | 1 |
2022-06-14 | foundationwiki | true | control | ui.sidebar-toc | 1200px-2000px | 1 |
2022-06-14 | frwiki | true | control | ui.sidebar-toc | 1200px-2000px | 1 |
filter(df_clicks, !(test_group=='control' & event_name=='ui.sidebar-toc') & !(test_group=='treatment' & event_name=='ui.toc'))
event_date | wiki | isanon | test_group | event_name | view_size | clicks |
---|---|---|---|---|---|---|
<date> | <chr> | <chr> | <chr> | <chr> | <chr> | <int> |
2022-05-26 | bnwiki | false | control | ui.toc | 1200px-2000px | 1 |
2022-05-26 | bnwiki | true | control | ui.toc | 1000px-1199px | 1 |
2022-05-26 | bnwiki | true | control | ui.toc | 1200px-2000px | 4 |
2022-05-26 | dewikivoyage | false | control | ui.toc | 1200px-2000px | 5 |
2022-05-26 | dewikivoyage | false | treatment | ui.sidebar-toc | 1200px-2000px | 1 |
2022-05-26 | dewikivoyage | true | control | ui.toc | 1000px-1199px | 7 |
2022-05-26 | dewikivoyage | true | control | ui.toc | 1200px-2000px | 46 |
2022-05-26 | dewikivoyage | true | control | ui.toc | 720px-999px | 3 |
2022-05-26 | dewikivoyage | true | control | ui.toc | >2000px | 1 |
2022-05-26 | dewikivoyage | true | treatment | ui.sidebar-toc | 1000px-1199px | 7 |
2022-05-26 | dewikivoyage | true | treatment | ui.sidebar-toc | 1200px-2000px | 32 |
2022-05-26 | euwiki | false | control | ui.toc | 1200px-2000px | 2 |
2022-05-26 | euwiki | true | control | ui.toc | 1000px-1199px | 6 |
2022-05-26 | euwiki | true | control | ui.toc | 1200px-2000px | 7 |
2022-05-26 | fawiki | false | control | ui.toc | 1000px-1199px | 2 |
2022-05-26 | fawiki | false | control | ui.toc | 1200px-2000px | 5 |
2022-05-26 | fawiki | false | control | ui.toc | 720px-999px | 3 |
2022-05-26 | fawiki | true | control | ui.toc | 1000px-1199px | 7 |
2022-05-26 | fawiki | true | control | ui.toc | 1200px-2000px | 59 |
2022-05-26 | fawiki | true | control | ui.toc | 320px-719px | 1 |
2022-05-26 | fawiki | true | control | ui.toc | 720px-999px | 6 |
2022-05-26 | fawiki | true | treatment | ui.sidebar-toc | 1000px-1199px | 8 |
2022-05-26 | fawiki | true | treatment | ui.sidebar-toc | 1200px-2000px | 42 |
2022-05-26 | fawiki | true | treatment | ui.sidebar-toc | 720px-999px | 1 |
2022-05-26 | foundationwiki | false | control | ui.toc | 1200px-2000px | 16 |
2022-05-26 | foundationwiki | true | control | ui.toc | 1000px-1199px | 2 |
2022-05-26 | foundationwiki | true | control | ui.toc | 1200px-2000px | 5 |
2022-05-26 | foundationwiki | true | control | ui.toc | 320px-719px | 5 |
2022-05-26 | foundationwiki | true | control | ui.toc | 720px-999px | 2 |
2022-05-26 | foundationwiki | true | control | ui.toc | >2000px | 1 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
2022-06-15 | srwiki | true | treatment | ui.sidebar-toc | 1000px-1199px | 3 |
2022-06-15 | srwiki | true | treatment | ui.sidebar-toc | 1200px-2000px | 63 |
2022-06-15 | thwiki | false | control | ui.toc | 1200px-2000px | 22 |
2022-06-15 | thwiki | false | treatment | ui.sidebar-toc | 1200px-2000px | 6 |
2022-06-15 | thwiki | true | control | ui.toc | 1000px-1199px | 134 |
2022-06-15 | thwiki | true | control | ui.toc | 1200px-2000px | 655 |
2022-06-15 | thwiki | true | control | ui.toc | 320px-719px | 22 |
2022-06-15 | thwiki | true | control | ui.toc | 720px-999px | 43 |
2022-06-15 | thwiki | true | control | ui.toc | >2000px | 10 |
2022-06-15 | thwiki | true | treatment | ui.sidebar-toc | 1000px-1199px | 50 |
2022-06-15 | thwiki | true | treatment | ui.sidebar-toc | 1200px-2000px | 332 |
2022-06-15 | thwiki | true | treatment | ui.sidebar-toc | 720px-999px | 5 |
2022-06-15 | thwiki | true | treatment | ui.sidebar-toc | >2000px | 1 |
2022-06-15 | trwiki | false | control | ui.toc | 1000px-1199px | 1 |
2022-06-15 | trwiki | false | control | ui.toc | 1200px-2000px | 26 |
2022-06-15 | trwiki | false | treatment | ui.sidebar-toc | 1200px-2000px | 3 |
2022-06-15 | trwiki | true | control | ui.toc | 1200px-2000px | 19 |
2022-06-15 | trwiki | true | treatment | ui.sidebar-toc | 1200px-2000px | 1 |
2022-06-15 | vecwiki | true | control | ui.toc | 720px-999px | 1 |
2022-06-15 | viwiki | false | control | ui.toc | 1000px-1199px | 1 |
2022-06-15 | viwiki | false | control | ui.toc | 1200px-2000px | 25 |
2022-06-15 | viwiki | false | control | ui.toc | 720px-999px | 3 |
2022-06-15 | viwiki | false | treatment | ui.sidebar-toc | 1200px-2000px | 22 |
2022-06-15 | viwiki | true | control | ui.toc | 1000px-1199px | 104 |
2022-06-15 | viwiki | true | control | ui.toc | 1200px-2000px | 1231 |
2022-06-15 | viwiki | true | control | ui.toc | 320px-719px | 21 |
2022-06-15 | viwiki | true | control | ui.toc | 720px-999px | 63 |
2022-06-15 | viwiki | true | control | ui.toc | >2000px | 5 |
2022-06-15 | viwiki | true | treatment | ui.sidebar-toc | 1000px-1199px | 35 |
2022-06-15 | viwiki | true | treatment | ui.sidebar-toc | 1200px-2000px | 417 |
df_test <- df_clicks %>%
group_by(wiki,isanon,test_group, event_name, view_size) %>%
summarize(total_clicks=sum(clicks),.groups='drop')
filter(df_test, wiki=='frwiki'& isanon=='false')
wiki | isanon | test_group | event_name | view_size | total_clicks |
---|---|---|---|---|---|
<chr> | <chr> | <chr> | <chr> | <chr> | <int> |
frwiki | false | control | ui.toc | >2000px | 243 |
frwiki | false | control | ui.toc | 1000px-1199px | 812 |
frwiki | false | control | ui.toc | 1200px-2000px | 6548 |
frwiki | false | control | ui.toc | 320px-719px | 183 |
frwiki | false | control | ui.toc | 720px-999px | 355 |
frwiki | false | treatment | ui.sidebar-toc | >2000px | 40 |
frwiki | false | treatment | ui.sidebar-toc | 1000px-1199px | 180 |
frwiki | false | treatment | ui.sidebar-toc | 1200px-2000px | 1376 |
frwiki | false | treatment | ui.sidebar-toc | 320px-719px | 1 |
frwiki | false | treatment | ui.toc | <320px | 2 |
frwiki | false | treatment | ui.toc | >2000px | 47 |
frwiki | false | treatment | ui.toc | 1000px-1199px | 184 |
frwiki | false | treatment | ui.toc | 1200px-2000px | 1769 |
frwiki | false | treatment | ui.toc | 320px-719px | 41 |
frwiki | false | treatment | ui.toc | 720px-999px | 93 |
filter(df_test, wiki=='frwiki'& isanon=='true')
wiki | isanon | test_group | event_name | view_size | total_clicks |
---|---|---|---|---|---|
<chr> | <chr> | <chr> | <chr> | <chr> | <int> |
frwiki | true | control | ui.sidebar-toc | 1200px-2000px | 8 |
frwiki | true | control | ui.toc | <320px | 87 |
frwiki | true | control | ui.toc | >2000px | 4877 |
frwiki | true | control | ui.toc | 1000px-1199px | 29176 |
frwiki | true | control | ui.toc | 1200-2000 | 1 |
frwiki | true | control | ui.toc | 1200px-2000px | 214918 |
frwiki | true | control | ui.toc | 320px-719px | 3662 |
frwiki | true | control | ui.toc | 720px-999px | 10743 |
frwiki | true | treatment | ui.sidebar-toc | <320px | 4 |
frwiki | true | treatment | ui.sidebar-toc | >2000px | 677 |
frwiki | true | treatment | ui.sidebar-toc | 1000px-1199px | 8394 |
frwiki | true | treatment | ui.sidebar-toc | 1200px-2000px | 41701 |
frwiki | true | treatment | ui.sidebar-toc | 320px-719px | 64 |
frwiki | true | treatment | ui.sidebar-toc | 720px-999px | 138 |
frwiki | true | treatment | ui.toc | <320px | 13 |
frwiki | true | treatment | ui.toc | >2000px | 863 |
frwiki | true | treatment | ui.toc | 1000px-1199px | 4610 |
frwiki | true | treatment | ui.toc | 1200px-2000px | 38449 |
frwiki | true | treatment | ui.toc | 320px-719px | 682 |
frwiki | true | treatment | ui.toc | 720px-999px | 1911 |
Note: (2022-07-06) Data shows in treatment group, some of sessions saw the old ToC, and some of sessions saw the new ToC, no matter the viewport size. For example on frwiki, in treatment group more than half of the sessions with larger than 1000px viewport are assigned to old ToC. Need to discuss with engineers. More analysis by viewport size can be found at Clicks on ToC by viewport size
With such a test group assignment, the AB test analysis on scrolls to ToC and reading time is invalid. Because schema mediawiki_web_ab_test_enrollment, mediawiki_reading_depth and mediawiki_web_ui_scroll do not record events on old ToC and new ToC seperated. The treatment group and control group can not be correctly categorized purely based on schema mediawiki_web_ab_test_enrollment.
g_click_loggedin <- df_clicks %>%
filter(isanon=='false') %>%
filter(!(test_group=='control' & event_name=='ui.sidebar-toc') & !(test_group=='treatment' & event_name=='ui.toc')) %>%
ggplot(mapping=aes(x=event_date, y=clicks, color=test_group)) +
geom_line( size = 1.5) +
facet_wrap(~wiki,nrow=6,scale = 'free_y')+
scale_color_manual(values= c("#666666", "#000099"), name = "group") +
labs(title = 'Daily clicks on table of content by test group',
x = 'Date',
y = 'Clicks',
caption = "User type: logged-in User" ) +
theme_light(base_size = 16) +
theme(legend.position = "bottom",
strip.text = element_text(size = 14, colour='black'),
plot.caption = element_text(hjust = 0, face= "italic"),
axis.text.x = element_text( size = 10 ),
plot.title = element_text(size=20));
g_click_loggedin
df_c_loggedin <- df_clicks %>%
filter(isanon=='false' & wiki!='arywiki'& wiki!='foundationwiki' & wiki!='incubatorwiki' & wiki!='vecwiki'& wiki!='viwikibook') %>%
filter(!(test_group=='control' & event_name=='ui.sidebar-toc') & !(test_group=='treatment' & event_name=='ui.toc')) %>%
group_by(wiki, test_group ) %>%
summarize(total_clicks=sum(clicks), .groups='drop')
head(df_c_loggedin )
wiki | test_group | total_clicks |
---|---|---|
<chr> | <chr> | <int> |
bnwiki | control | 49 |
bnwiki | treatment | 47 |
dewikivoyage | control | 60 |
dewikivoyage | treatment | 44 |
euwiki | control | 67 |
euwiki | treatment | 75 |
df_c_loggedin_W <- df_c_loggedin %>%
spread(test_group, total_clicks) %>%
mutate(
pct_change = round((treatment-control)/control * 100 , 2)
)
barchart_clicks_loggedin <- df_c_loggedin %>%
ggplot(aes(x = test_group, y= total_clicks, fill=test_group)) +
geom_bar(stat="identity", position = 'dodge') +
geom_text(aes(label = paste(total_clicks)), color = "black", position = position_dodge(0.9), vjust =0, size = 3) +
facet_wrap(~wiki, scale = 'free_y') +
labs (
y = "Number of total clicks on table of content" ,
title = "Number of total clicks on table of content" ,
caption = "User type: Logged-in User" ) +
scale_fill_manual(values= c( "#666666", "#000099"), name = "feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.text.x = element_blank(),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=12),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
barchart_clicks_loggedin
df_c_loggedin_W
wiki | control | treatment | pct_change |
---|---|---|---|
<chr> | <int> | <int> | <dbl> |
bnwiki | 49 | 47 | -4.08 |
dewikivoyage | 60 | 44 | -26.67 |
euwiki | 67 | 75 | 11.94 |
fawiki | 678 | 243 | -64.16 |
frwiki | 8141 | 1597 | -80.38 |
frwikiquote | 5 | 2 | -60.00 |
frwiktionary | 64 | 15 | -76.56 |
hewiki | 840 | 234 | -72.14 |
kowiki | 512 | 141 | -72.46 |
plwikinews | 4 | 1 | -75.00 |
ptwiki | 1904 | 425 | -77.68 |
ptwikinews | 1 | 10 | 900.00 |
ptwikiversity | 7 | 20 | 185.71 |
srwiki | 138 | 50 | -63.77 |
thwiki | 262 | 124 | -52.67 |
trwiki | 448 | 259 | -42.19 |
viwiki | 754 | 218 | -71.09 |
pct_g <- df_c_loggedin_W %>%
filter(control>50) %>%
ggplot() +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_pointrange(
aes(x = wiki, ymin = 0, ymax = pct_change, y = pct_change)
) +
geom_text(
aes(
y = pct_change + ifelse(pct_change < 0, -4, 4),
x = wiki, label = wiki,
hjust = ifelse(pct_change < 0, "right", "left")
),
size = 5
) +
scale_x_discrete(breaks = NULL) +
coord_flip(ylim = c(-300, 300)) +
labs (x = "Wiki Project",
y = "Percent Change",
title = "Percent change in clicks on ToC in the AB test -- logged-in users") +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.title = element_text(hjust = 0.5),
text = element_text(size=16))
pct_g
Average percentage change
Average of the percent changes observed on each early adopter wiki (%)
mean((filter(df_c_loggedin_W , is.numeric(pct_change) & control>50))$pct_change, na.rm=TRUE)
df_c_anon <- df_clicks %>%
filter(isanon=='true' & wiki!='arywiki'& wiki!='foundationwiki' & wiki!='incubatorwiki' & wiki!='vecwiki'& wiki!='viwikibook') %>%
filter(!(test_group=='control' & event_name=='ui.sidebar-toc') & !(test_group=='treatment' & event_name=='ui.toc')) %>%
group_by(wiki, test_group ) %>%
summarize(total_clicks=sum(clicks), .groups='drop')
df_c_anon
wiki | test_group | total_clicks |
---|---|---|
<chr> | <chr> | <int> |
bnwiki | control | 1932 |
bnwiki | treatment | 1320 |
dewikivoyage | control | 960 |
dewikivoyage | treatment | 737 |
euwiki | control | 1098 |
euwiki | treatment | 500 |
fawiki | control | 9773 |
fawiki | treatment | 4885 |
frwiki | control | 263464 |
frwiki | treatment | 50978 |
frwikiquote | control | 53 |
frwikiquote | treatment | 20 |
frwiktionary | control | 1720 |
frwiktionary | treatment | 458 |
hewiki | control | 15583 |
hewiki | treatment | 5384 |
kowiki | control | 28721 |
kowiki | treatment | 7762 |
plwikinews | control | 1 |
ptwiki | control | 51066 |
ptwiki | treatment | 11909 |
ptwikiversity | control | 108 |
ptwikiversity | treatment | 35 |
srwiki | control | 3521 |
srwiki | treatment | 1595 |
thwiki | control | 15265 |
thwiki | treatment | 6170 |
trwiki | control | 222 |
trwiki | treatment | 38 |
viwiki | control | 26406 |
viwiki | treatment | 9007 |
viwikibooks | control | 6 |
viwikibooks | treatment | 12 |
g_click_anon <- df_clicks %>%
filter(isanon=='true') %>%
filter(!(test_group=='control' & event_name=='ui.sidebar-toc') & !(test_group=='treatment' & event_name=='ui.toc')) %>%
ggplot(mapping=aes(x=event_date, y=clicks, color=test_group)) +
geom_line( size = 1.5) +
facet_wrap(~wiki,nrow=6,scale = 'free_y')+
scale_color_manual(values= c("#666666", "#000099"), name = "group") +
labs(title = 'Daily clicks on table of content by test group',
x = 'Date',
y = 'Clicks',
caption = "User type: Anonymous User" ) +
theme_light(base_size = 16) +
theme(legend.position = "bottom",
strip.text = element_text(size = 14, colour='black'),
plot.caption = element_text(hjust = 0, face= "italic"),
axis.text.x = element_text( size = 10 ),
plot.title = element_text(size=20));
g_click_anon
geom_path: Each group consists of only one observation. Do you need to adjust the group aesthetic?
barchart_clicks_anon <- df_c_anon %>%
ggplot(aes(x = test_group, y= total_clicks, fill=test_group)) +
geom_bar(stat="identity", position = 'dodge') +
geom_text(aes(label = paste(total_clicks)), color = "black", position = position_dodge(0.9), vjust =0, size = 3) +
facet_wrap(~wiki, scale = 'free_y') +
labs (
y = "Number of total clicks on table of content" ,
title = "Number of total clicks on table of content" ,
caption = "User type: Anonymous User" ) +
scale_fill_manual(values= c( "#666666", "#000099"), name = "feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.text.x = element_blank(),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=12),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
barchart_clicks_anon
df_c_anon_W <- df_c_anon %>%
spread(test_group, total_clicks)
df_c_anon_W[is.na(df_c_anon_W)] <- 0
df_c_anon_W
wiki | control | treatment |
---|---|---|
<chr> | <int> | <int> |
bnwiki | 1932 | 1320 |
dewikivoyage | 960 | 737 |
euwiki | 1098 | 500 |
fawiki | 9773 | 4885 |
frwiki | 263464 | 50978 |
frwikiquote | 53 | 20 |
frwiktionary | 1720 | 458 |
hewiki | 15583 | 5384 |
kowiki | 28721 | 7762 |
plwikinews | 1 | 0 |
ptwiki | 51066 | 11909 |
ptwikiversity | 108 | 35 |
srwiki | 3521 | 1595 |
thwiki | 15265 | 6170 |
trwiki | 222 | 38 |
viwiki | 26406 | 9007 |
viwikibooks | 6 | 12 |
df_c_anon_W <- df_c_anon_W %>%
mutate(
pct_change = round((treatment-control)/control * 100 , 2)
)
df_c_anon_W
wiki | control | treatment | pct_change |
---|---|---|---|
<chr> | <int> | <int> | <dbl> |
bnwiki | 1932 | 1320 | -31.68 |
dewikivoyage | 960 | 737 | -23.23 |
euwiki | 1098 | 500 | -54.46 |
fawiki | 9773 | 4885 | -50.02 |
frwiki | 263464 | 50978 | -80.65 |
frwikiquote | 53 | 20 | -62.26 |
frwiktionary | 1720 | 458 | -73.37 |
hewiki | 15583 | 5384 | -65.45 |
kowiki | 28721 | 7762 | -72.97 |
plwikinews | 1 | 0 | -100.00 |
ptwiki | 51066 | 11909 | -76.68 |
ptwikiversity | 108 | 35 | -67.59 |
srwiki | 3521 | 1595 | -54.70 |
thwiki | 15265 | 6170 | -59.58 |
trwiki | 222 | 38 | -82.88 |
viwiki | 26406 | 9007 | -65.89 |
viwikibooks | 6 | 12 | 100.00 |
pct_g <- df_c_anon_W %>%
filter(control>50) %>%
ggplot() +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_pointrange(
aes(x = wiki, ymin = 0, ymax = pct_change, y = pct_change)
) +
geom_text(
aes(
y = pct_change + ifelse(pct_change < 0, -4, 4),
x = wiki, label = wiki,
hjust = ifelse(pct_change < 0, "right", "left")
),
size = 5
) +
scale_x_discrete(breaks = NULL) +
coord_flip(ylim = c(-300, 300)) +
labs (x = "Wiki Project",
y = "Percent Change",
title = "Percent change in clicks on ToC in the AB test -- anonymous users") +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.title = element_text(hjust = 0.5),
text = element_text(size=16))
pct_g
Average percentage change
Average of the percent changes observed on each early adopter wiki (%)
mean((filter(df_c_anon_W , is.numeric(pct_change) & control>50))$pct_change, na.rm=TRUE)
Summary
Below summary is based on the data after removing the clicks on old ToC in treatment group and removing the clicks on new ToC in control group.
The hypothesis that the new table of contents is used more frequently than the previous table of contents is NOT supported by data. Instead, compared to control group, the clicks on ToC in treatment group decreased 57% for logged-in users and 61% for anonymous users.
Need to discuss with PM.
Test question: Does the new table of contents reduce the need to scroll back to the top of the page.
Note: (2022-07-06) Data shows in treatment group, some of sessions saw the old ToC, and some of sessions saw the new ToC.
With such a test group assignment, the AB test analysis on scrolls to ToC is invalid. Because schema mediawiki_web_ab_test_enrollment and mediawiki_web_ui_scroll do not record events on old ToC and new ToC seperated. The treatment group and control group can not be correctly categorized purely based on schema mediawiki_web_ab_test_enrollment.
query_scroll <- "
WITH t_ab_no_dupli AS (
SELECT web_session_id, wiki, meta.domain AS domain, count(distinct `group` ) AS groups, min(meta.dt) AS session_dt
FROM event.mediawiki_web_ab_test_enrollment
WHERE wiki NOT IN ('testwiki','test2wiki') AND year=2022 AND month IN (5,6)
AND experiment_name='skin-vector-toc-experiment'
GROUP BY web_session_id, wiki, meta.domain
-- exclude session ids are in both control and treatment group
HAVING groups < 2
),
t_ab AS(
SELECT
t1.web_session_id,
t1.wiki,t1.meta.domain AS domain,
t1.`group` AS test_group,
min(t1.meta.dt) AS session_dt
FROM event.mediawiki_web_ab_test_enrollment AS t1
INNER JOIN t_ab_no_dupli AS t2 ON t1.wiki=t2.wiki
AND t1.web_session_id=t2.web_session_id
WHERE t1.wiki NOT IN ('testwiki','test2wiki')
AND year=2022
AND CONCAT(year, '-', LPAD(month,2,'0'),'-', LPAD(day,2,'0')) BETWEEN '2022-05-26' AND '2022-06-15'
AND experiment_name='skin-vector-toc-experiment'
AND NOT is_bot
GROUP BY t1.web_session_id, t1.wiki,t1.meta.domain, t1.`group`
)
--scrolls by session
SELECT TO_DATE(t3.meta.dt) AS event_date,t_ab.wiki, t_ab.test_group, t3.is_anon, t3.web_session_id, COUNT(1) AS scrolls
FROM t_ab
LEFT JOIN event.mediawiki_web_ui_scroll AS t3
ON t_ab.domain=t3.meta.domain AND t3.web_session_id = t_ab.web_session_id
AND t3.year=2022 AND month IN (5,6)
AND CONCAT(t3.year, '-', LPAD(t3.month,2,'0'),'-', LPAD(t3.day,2,'0')) BETWEEN '2022-05-26' AND '2022-06-15'
WHERE t_ab.session_dt <= t3.meta.dt
AND t3.action='scroll-to-toc'
GROUP BY TO_DATE(t3.meta.dt) , t_ab.wiki, t_ab.test_group, t3.is_anon, t3.web_session_id
"
df_scrolls <- wmfdata::query_hive(query_scroll)
Don't forget to authenticate with Kerberos using kinit
df_scrolls$event_date <- as.Date(df_scrolls$event_date)
write_csv(df_scrolls, "Data_out/scrolls.csv")
df_s_loggedin <- df_scrolls %>%
filter(is_anon=='false' & wiki!='arywiki'& wiki!='foundationwiki' & wiki!='incubatorwiki' & wiki!='vecwiki'& wiki!='viwikibook') %>%
group_by(wiki, test_group ) %>%
summarize(total_scrolls=sum(scrolls), .groups='drop')
df_s_loggedin
wiki | test_group | total_scrolls |
---|---|---|
<chr> | <chr> | <int> |
bnwiki | control | 712 |
bnwiki | treatment | 827 |
dewikivoyage | control | 128 |
dewikivoyage | treatment | 434 |
euwiki | control | 893 |
euwiki | treatment | 848 |
fawiki | control | 5855 |
fawiki | treatment | 5241 |
frwiki | control | 37162 |
frwiki | treatment | 35200 |
frwikiquote | control | 37 |
frwikiquote | treatment | 29 |
frwiktionary | control | 551 |
frwiktionary | treatment | 540 |
hewiki | control | 4462 |
hewiki | treatment | 5295 |
idwiki | control | 3185 |
idwiki | treatment | 3313 |
kowiki | control | 2847 |
kowiki | treatment | 3387 |
mediawikiwiki | control | 1679 |
mediawikiwiki | treatment | 707 |
plwikinews | control | 11 |
plwikinews | treatment | 2 |
ptwiki | control | 12184 |
ptwiki | treatment | 11482 |
ptwikinews | control | 3 |
ptwikinews | treatment | 22 |
ptwikiversity | control | 11 |
ptwikiversity | treatment | 60 |
srwiki | control | 1293 |
srwiki | treatment | 1542 |
thwiki | control | 1812 |
thwiki | treatment | 1619 |
trwiki | control | 4757 |
trwiki | treatment | 4913 |
viwiki | control | 3686 |
viwiki | treatment | 3503 |
viwikibooks | treatment | 2 |
g_scroll_loggedin <- df_scrolls %>%
filter(is_anon=='false') %>%
ggplot(mapping=aes(x=event_date, y=scrolls, color=test_group)) +
geom_line( size = 1.5) +
facet_wrap(~wiki,nrow=6,scale = 'free_y')+
scale_color_manual(values= c("#666666", "#000099"), name = "group") +
labs(title = 'Daily scrolls to table of content by test group',
x = 'Date',
y = 'Scrolls',
caption = "User type: logged-in User" ) +
theme_light(base_size = 16) +
theme(legend.position = "bottom",
strip.text = element_text(size = 14, colour='black'),
plot.caption = element_text(hjust = 0, face= "italic"),
axis.text.x = element_text( size = 10 ),
plot.title = element_text(size=20));
g_scroll_loggedin
geom_path: Each group consists of only one observation. Do you need to adjust the group aesthetic?
barchart_scrolls_loggedin <- df_s_loggedin %>%
ggplot(aes(x = test_group, y= total_scrolls, fill=test_group)) +
geom_bar(stat="identity", position = 'dodge') +
geom_text(aes(label = paste(total_scrolls)), color = "black", position = position_dodge(0.9), vjust =0, size = 3) +
facet_wrap(~wiki, scale = 'free_y') +
labs (
y = "Number of scrolls to table of content" ,
title = "Number of scrolls to table of content" ,
caption = "User type: Logged-in User" ) +
scale_fill_manual(values= c( "#666666", "#000099"), name = "feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.text.x = element_blank(),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=12),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
barchart_scrolls_loggedin
head(df_s_loggedin)
wiki | test_group | total_scrolls |
---|---|---|
<chr> | <chr> | <int> |
bnwiki | control | 712 |
bnwiki | treatment | 827 |
dewikivoyage | control | 128 |
dewikivoyage | treatment | 434 |
euwiki | control | 893 |
euwiki | treatment | 848 |
df_s_loggedin_W <- df_s_loggedin %>%
pivot_wider(names_from = test_group, values_from = total_scrolls, values_fill = 0)
df_s_loggedin_W
wiki | control | treatment |
---|---|---|
<chr> | <int> | <int> |
bnwiki | 712 | 827 |
dewikivoyage | 128 | 434 |
euwiki | 893 | 848 |
fawiki | 5855 | 5241 |
frwiki | 37162 | 35200 |
frwikiquote | 37 | 29 |
frwiktionary | 551 | 540 |
hewiki | 4462 | 5295 |
idwiki | 3185 | 3313 |
kowiki | 2847 | 3387 |
mediawikiwiki | 1679 | 707 |
plwikinews | 11 | 2 |
ptwiki | 12184 | 11482 |
ptwikinews | 3 | 22 |
ptwikiversity | 11 | 60 |
srwiki | 1293 | 1542 |
thwiki | 1812 | 1619 |
trwiki | 4757 | 4913 |
viwiki | 3686 | 3503 |
viwikibooks | 0 | 2 |
df_s_loggedin_W <- df_s_loggedin_W %>%
mutate(
pct_change = round((treatment-control)/control * 100 , 2)
)
df_s_loggedin_W
wiki | control | treatment | pct_change |
---|---|---|---|
<chr> | <int> | <int> | <dbl> |
bnwiki | 712 | 827 | 16.15 |
dewikivoyage | 128 | 434 | 239.06 |
euwiki | 893 | 848 | -5.04 |
fawiki | 5855 | 5241 | -10.49 |
frwiki | 37162 | 35200 | -5.28 |
frwikiquote | 37 | 29 | -21.62 |
frwiktionary | 551 | 540 | -2.00 |
hewiki | 4462 | 5295 | 18.67 |
idwiki | 3185 | 3313 | 4.02 |
kowiki | 2847 | 3387 | 18.97 |
mediawikiwiki | 1679 | 707 | -57.89 |
plwikinews | 11 | 2 | -81.82 |
ptwiki | 12184 | 11482 | -5.76 |
ptwikinews | 3 | 22 | 633.33 |
ptwikiversity | 11 | 60 | 445.45 |
srwiki | 1293 | 1542 | 19.26 |
thwiki | 1812 | 1619 | -10.65 |
trwiki | 4757 | 4913 | 3.28 |
viwiki | 3686 | 3503 | -4.96 |
viwikibooks | 0 | 2 | Inf |
pct_g <- df_s_loggedin_W %>%
filter(control>50) %>%
ggplot() +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_pointrange(
aes(x = wiki, ymin = 0, ymax = pct_change, y = pct_change)
) +
geom_text(
aes(
y = pct_change + ifelse(pct_change < 0, -4, 4),
x = wiki, label = wiki,
hjust = ifelse(pct_change < 0, "right", "left")
),
size = 5
) +
scale_x_discrete(breaks = NULL) +
coord_flip(ylim = c(-300, 300)) +
labs (x = "Wiki Project",
y = "Percent Change",
title = "Percent change in scroll to ToC in the AB test") +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.title = element_text(hjust = 0.5),
text = element_text(size=16))
pct_g
Note:
For logged-in users:
On 7 wikis, the number of scrolls back to table of content is smaller in treatment group than in control group.
On 7 wikis, the number of scrolls back to table of content is larger in treatment group than in control group.
The new table of content did not reduce the need to scroll back to the table of content significantly.
df_s_anon <- df_scrolls %>%
filter(is_anon=='true' & wiki!='arywiki'& wiki!='foundationwiki' & wiki!='incubatorwiki' & wiki!='vecwiki'& wiki!='viwikibook') %>%
group_by(wiki, test_group ) %>%
summarize(total_scrolls=sum(scrolls), .groups='drop')
df_s_anon
wiki | test_group | total_scrolls |
---|---|---|
<chr> | <chr> | <int> |
bnwiki | control | 8522 |
bnwiki | treatment | 8832 |
dewikivoyage | control | 2332 |
dewikivoyage | treatment | 2356 |
euwiki | control | 3172 |
euwiki | treatment | 3455 |
fawiki | control | 63418 |
fawiki | treatment | 67346 |
frwiki | control | 621892 |
frwiki | treatment | 636386 |
frwikiquote | control | 118 |
frwikiquote | treatment | 142 |
frwiktionary | control | 10347 |
frwiktionary | treatment | 9411 |
hewiki | control | 48346 |
hewiki | treatment | 51887 |
idwiki | control | 73765 |
idwiki | treatment | 72159 |
kowiki | control | 83887 |
kowiki | treatment | 82866 |
mediawikiwiki | control | 1968 |
mediawikiwiki | treatment | 1326 |
plwikinews | control | 1 |
ptwiki | control | 222333 |
ptwiki | treatment | 221240 |
ptwikinews | control | 1 |
ptwikinews | treatment | 1 |
ptwikiversity | control | 302 |
ptwikiversity | treatment | 246 |
srwiki | control | 14979 |
srwiki | treatment | 15648 |
thwiki | control | 55747 |
thwiki | treatment | 55115 |
trwiki | control | 1136 |
trwiki | treatment | 1250 |
viwiki | control | 76705 |
viwiki | treatment | 74222 |
viwikibooks | control | 35 |
viwikibooks | treatment | 60 |
g_scroll_loggedin <- df_scrolls %>%
filter(is_anon=='true') %>%
ggplot(mapping=aes(x=event_date, y=scrolls, color=test_group)) +
geom_line( size = 1.5) +
facet_wrap(~wiki,nrow=6,scale = 'free_y')+
scale_color_manual(values= c("#666666", "#000099"), name = "group") +
labs(title = 'Daily scrolls to table of content by test group',
x = 'Date',
y = 'Scrolls',
caption = "User type: Anonymous User" ) +
theme_light(base_size = 16) +
theme(legend.position = "bottom",
strip.text = element_text(size = 14, colour='black'),
plot.caption = element_text(hjust = 0, face= "italic"),
axis.text.x = element_text( size = 10 ),
plot.title = element_text(size=20));
g_scroll_loggedin
geom_path: Each group consists of only one observation. Do you need to adjust the group aesthetic? geom_path: Each group consists of only one observation. Do you need to adjust the group aesthetic?
barchart_scrolls_anon <- df_s_anon %>%
ggplot(aes(x = test_group, y= total_scrolls, fill=test_group)) +
geom_bar(stat="identity", position = 'dodge') +
geom_text(aes(label = paste(total_scrolls)), color = "black", position = position_dodge(0.9), vjust =0, size = 3) +
facet_wrap(~wiki, scale = 'free_y') +
labs (
y = "Number of scrolls to table of content" ,
title = "Number of scrolls to table of content" ,
caption = "User type: Anonymous User" ) +
scale_fill_manual(values= c( "#666666", "#000099"), name = "feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.text.x = element_blank(),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=12),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
barchart_scrolls_anon
df_s_anon_W <- df_s_anon %>%
pivot_wider(names_from = test_group, values_from = total_scrolls, values_fill = 0)
df_s_anon_W <- df_s_anon_W %>%
mutate(
pct_change = round((treatment-control)/control * 100 , 2)
)
df_s_anon_W
wiki | control | treatment | pct_change |
---|---|---|---|
<chr> | <int> | <int> | <dbl> |
bnwiki | 8522 | 8832 | 3.64 |
dewikivoyage | 2332 | 2356 | 1.03 |
euwiki | 3172 | 3455 | 8.92 |
fawiki | 63418 | 67346 | 6.19 |
frwiki | 621892 | 636386 | 2.33 |
frwikiquote | 118 | 142 | 20.34 |
frwiktionary | 10347 | 9411 | -9.05 |
hewiki | 48346 | 51887 | 7.32 |
idwiki | 73765 | 72159 | -2.18 |
kowiki | 83887 | 82866 | -1.22 |
mediawikiwiki | 1968 | 1326 | -32.62 |
plwikinews | 1 | 0 | -100.00 |
ptwiki | 222333 | 221240 | -0.49 |
ptwikinews | 1 | 1 | 0.00 |
ptwikiversity | 302 | 246 | -18.54 |
srwiki | 14979 | 15648 | 4.47 |
thwiki | 55747 | 55115 | -1.13 |
trwiki | 1136 | 1250 | 10.04 |
viwiki | 76705 | 74222 | -3.24 |
viwikibooks | 35 | 60 | 71.43 |
pct_g <- df_s_anon_W %>%
filter(control>50) %>%
ggplot() +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_pointrange(
aes(x = wiki, ymin = 0, ymax = pct_change, y = pct_change)
) +
geom_text(
aes(
y = pct_change + ifelse(pct_change < 0, -4, 4),
x = wiki, label = wiki,
hjust = ifelse(pct_change < 0, "right", "left")
),
size = 5
) +
scale_x_discrete(breaks = NULL) +
coord_flip(ylim = c(-300, 300)) +
labs (x = "Wiki Project",
y = "Percent Change",
title = "Percent change in scroll to ToC in the AB test") +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.title = element_text(hjust = 0.5),
text = element_text(size=16))
pct_g
Note:
For anonymous users:
On 6 wikis, the number of scrolls back to table of content in treatment group is smaller than in control group.
On 9 wikis, the number of scrolls back to table of content in treatment group is larger than in control group.
The new table of content did not reduce the need to scroll back to the table of content significantly.
Data Modeling Summary.
We explored Hierarchical Generalized Linear Models using glmer() function from lme4 package to fit the session based data. We also tried Bayesian Multilevel Models using brms package, but failed to get a converged model.
For logged-in users, Coef of treatment group = -0.008601, p-value= 0.0847 > 0.05. We cannot rejected null hypothesis that treatment group and control group have same level of scrolls back to ToC. We don't have data to suppport that the new table of contents reduce the need to scroll back to the top of the page for logged-in users.
For logged-out users, we did not find a converged model. It's an indication that the data do not fit the model well, because there are too many poorly fitting observations.
Summary:
The hypothsis is that the new table of content reduce the need to scroll back to the table of content. It's not supported by data for both logged-in users and anonymous users.
Note
Test question: Does the new table of contents decrease the time people spend scrolling/scrolling quickly (if possible)
Field scroll_speed is not recorded (value is null) in mediawiki_web_ui_scroll schema. No data is collected to answer this question.
Test question: How does the new table of contents affect the time spent on a page
Note: (2022-07-06) Data shows in treatment group, some of sessions saw the old ToC, and some of sessions saw the new ToC.
With such a test group assignment, the AB test analysis on reading time is invalid. Because schema mediawiki_web_ab_test_enrollment and mediawiki_reading_depth do not record events on old ToC and new ToC seperated. The treatment group and control group can not be correctly categorized purely based on schema mediawiki_web_ab_test_enrollment.
query_reading_length <- "
WITH t_ab_no_dupli AS (
SELECT web_session_id, wiki, meta.domain AS domain, count(distinct `group` ) AS groups, min(meta.dt) AS session_dt
FROM event.mediawiki_web_ab_test_enrollment
WHERE wiki NOT IN ('testwiki','test2wiki') AND year=2022 AND month IN (5,6)
AND experiment_name='skin-vector-toc-experiment'
GROUP BY web_session_id, wiki, meta.domain
-- exclude session ids are in both control and treatment group
HAVING groups < 2
),
t_ab AS(
SELECT
t1.web_session_id,
t1.wiki,t1.meta.domain AS domain,
t1.`group` AS test_group,
min(t1.meta.dt) AS session_dt
FROM event.mediawiki_web_ab_test_enrollment AS t1
INNER JOIN t_ab_no_dupli AS t2 ON t1.wiki=t2.wiki
AND t1.web_session_id=t2.web_session_id
WHERE t1.wiki NOT IN ('testwiki','test2wiki')
AND year=2022 AND month IN (5,6)
AND CONCAT(year, '-', LPAD(month,2,'0'),'-', LPAD(day,2,'0')) BETWEEN '2022-05-26' AND '2022-06-15'
AND experiment_name='skin-vector-toc-experiment'
AND NOT is_bot
GROUP BY t1.web_session_id, t1.wiki,t1.meta.domain, t1.`group`
)
--reading time by session
SELECT TO_DATE(t3.meta.dt) AS event_date,t_ab.wiki, t_ab.test_group, t3.session_token, is_anon,
sum(total_length) AS sum_reading_length, sum(page_length) AS sum_page_length
FROM event.mediawiki_reading_depth AS t3
INNER JOIN t_ab
ON t_ab.domain=t3.meta.domain AND t3.session_token = t_ab.web_session_id
WHERE t3.year=2022 AND t3.month IN (5,6)
AND CONCAT(t3.year, '-', LPAD(t3.month,2,'0'),'-', LPAD(t3.day,2,'0')) BETWEEN '2022-05-26' AND '2022-06-15'
AND t_ab.session_dt <= t3.meta.dt
AND t3.page_length>0
-- cap the long tail
AND t3.total_length <= 5000000
GROUP BY TO_DATE(t3.meta.dt) , t_ab.wiki, t_ab.test_group, t3.session_token, is_anon
"
barchart_sessions_overall_2 <- df_pv %>%
ggplot(aes(x = test_group, y= total_pv, fill=test_group)) +
geom_bar(stat="identity", position = 'dodge') +
#geom_text(aes(label = paste(scrolls_per_pv)), color = "black", position = position_dodge(0.9), vjust =0, size = 3) +
facet_wrap(~wiki, scale = 'free_y') +
labs (
y = "Number of total sessions" ,
title = "Number of total sessions " ,
caption = "User type: logged-in and anaonymous User" ) +
scale_fill_manual(values= c( "#666666", "#000099"), name = "feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.text.x = element_blank(),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=12),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
barchart_sessions_overall_2
df_reading_time <- wmfdata::query_hive(query_reading_length)
Don't forget to authenticate with Kerberos using kinit
df_reading_time$event_date <- as.Date(df_reading_time$event_date)
summary(filter(df_reading_time, is_anon=='false'))
event_date wiki test_group session_token Min. :2022-05-26 Length:1662 Length:1662 Length:1662 1st Qu.:2022-05-31 Class :character Class :character Class :character Median :2022-06-05 Mode :character Mode :character Mode :character Mean :2022-06-05 3rd Qu.:2022-06-10 Max. :2022-06-15 is_anon sum_reading_length sum_page_length Length:1662 Min. : 66 Min. : 200 Class :character 1st Qu.: 30508 1st Qu.: 10000 Mode :character Median : 131794 Median : 31500 Mean : 839656 Mean : 124025 3rd Qu.: 669804 3rd Qu.: 100000 Max. :40756371 Max. :6143700
summary(filter(df_reading_time, is_anon=='true'))
event_date wiki test_group session_token Min. :2022-05-26 Length:334599 Length:334599 Length:334599 1st Qu.:2022-05-31 Class :character Class :character Class :character Median :2022-06-06 Mode :character Mode :character Mode :character Mean :2022-06-05 3rd Qu.:2022-06-10 Max. :2022-06-15 is_anon sum_reading_length sum_page_length Length:334599 Min. : 1 Min. : 1 Class :character 1st Qu.: 30662 1st Qu.: 10000 Mode :character Median : 108629 Median : 21000 Mean : 549457 Mean : 61662 3rd Qu.: 464763 3rd Qu.: 60000 Max. :80083588 Max. :9837000
Note:
Majority of the readers is in anonymous mode
df_reading_time_over_page_length <- df_reading_time %>%
#filter(sum_reading_length!='NA' & sum_page_length>0) %>%
group_by(wiki, test_group, is_anon) %>%
summarize(read_time_avg=round(sum(sum_reading_length)/sum(sum_page_length),4), .groups='drop')
df_reading_time_over_page_length
wiki | test_group | is_anon | read_time_avg |
---|---|---|---|
<chr> | <chr> | <chr> | <dbl> |
arywiki | control | true | 3.4177 |
arywiki | treatment | true | 8.0131 |
bnwiki | control | false | 1.3724 |
bnwiki | control | true | 10.0074 |
bnwiki | treatment | false | 9.9918 |
bnwiki | treatment | true | 10.1146 |
dewikivoyage | control | false | 7.5321 |
dewikivoyage | control | true | 8.2551 |
dewikivoyage | treatment | false | 5.8670 |
dewikivoyage | treatment | true | 11.6123 |
euwiki | control | false | 11.0437 |
euwiki | control | true | 23.1421 |
euwiki | treatment | false | 40.2365 |
euwiki | treatment | true | 18.8725 |
fawiki | control | false | 6.5700 |
fawiki | control | true | 12.1744 |
fawiki | treatment | false | 6.7215 |
fawiki | treatment | true | 12.6149 |
foundationwiki | control | true | 1.3991 |
foundationwiki | treatment | false | 3.9742 |
foundationwiki | treatment | true | 1.2699 |
frwiki | control | false | 6.9265 |
frwiki | control | true | 7.3273 |
frwiki | treatment | false | 7.1395 |
frwiki | treatment | true | 6.9144 |
frwikiquote | control | false | 16.6457 |
frwikiquote | control | true | 12.3970 |
frwikiquote | treatment | true | 2.4718 |
frwiktionary | control | false | 12.6196 |
frwiktionary | control | true | 69.5489 |
⋮ | ⋮ | ⋮ | ⋮ |
mediawikiwiki | treatment | true | 16.6407 |
plwikinews | treatment | true | 64.5350 |
ptwiki | control | false | 5.0943 |
ptwiki | control | true | 10.2322 |
ptwiki | treatment | false | 5.7516 |
ptwiki | treatment | true | 10.5792 |
ptwikiversity | control | false | 68.4139 |
ptwikiversity | control | true | 47.5621 |
ptwikiversity | treatment | false | 40.0410 |
ptwikiversity | treatment | true | 23.5070 |
srwiki | control | false | 2.5893 |
srwiki | control | true | 9.5096 |
srwiki | treatment | false | 5.8222 |
srwiki | treatment | true | 8.6310 |
thwiki | control | false | 5.3195 |
thwiki | control | true | 6.8824 |
thwiki | treatment | false | 11.2635 |
thwiki | treatment | true | 7.4075 |
trwiki | control | false | 4.0360 |
trwiki | control | true | 5.1505 |
trwiki | treatment | false | 5.9574 |
trwiki | treatment | true | 7.4136 |
vecwiki | control | true | 2.6439 |
vecwiki | treatment | true | 30.1309 |
viwiki | control | false | 6.6593 |
viwiki | control | true | 8.1346 |
viwiki | treatment | false | 8.4084 |
viwiki | treatment | true | 8.4355 |
viwikibooks | control | true | 2.6588 |
viwikibooks | treatment | true | 10.4290 |
df_r_loggedin <- df_reading_time_over_page_length %>%
filter(is_anon=='false' & wiki!='arywiki'& wiki!='foundationwiki' & wiki!='incubatorwiki'
& wiki!='vecwiki'
& wiki!='viwikibook'& wiki!='plwikinews'& wiki!='ptwikinews'& wiki!='viwikibook' )
df_r_loggedin
wiki | test_group | is_anon | read_time_avg |
---|---|---|---|
<chr> | <chr> | <chr> | <dbl> |
bnwiki | control | false | 1.3724 |
bnwiki | treatment | false | 9.9918 |
dewikivoyage | control | false | 7.5321 |
dewikivoyage | treatment | false | 5.8670 |
euwiki | control | false | 11.0437 |
euwiki | treatment | false | 40.2365 |
fawiki | control | false | 6.5700 |
fawiki | treatment | false | 6.7215 |
frwiki | control | false | 6.9265 |
frwiki | treatment | false | 7.1395 |
frwikiquote | control | false | 16.6457 |
frwiktionary | control | false | 12.6196 |
frwiktionary | treatment | false | 12.5604 |
hewiki | control | false | 7.8766 |
hewiki | treatment | false | 8.0965 |
idwiki | control | false | 12.6832 |
idwiki | treatment | false | 6.8142 |
kowiki | control | false | 7.0495 |
kowiki | treatment | false | 8.4294 |
mediawikiwiki | control | false | 21.2185 |
mediawikiwiki | treatment | false | 31.3582 |
ptwiki | control | false | 5.0943 |
ptwiki | treatment | false | 5.7516 |
ptwikiversity | control | false | 68.4139 |
ptwikiversity | treatment | false | 40.0410 |
srwiki | control | false | 2.5893 |
srwiki | treatment | false | 5.8222 |
thwiki | control | false | 5.3195 |
thwiki | treatment | false | 11.2635 |
trwiki | control | false | 4.0360 |
trwiki | treatment | false | 5.9574 |
viwiki | control | false | 6.6593 |
viwiki | treatment | false | 8.4084 |
barchart_read_time_loggedin <- df_r_loggedin %>%
ggplot(aes(x = test_group, y= read_time_avg, fill=test_group)) +
geom_bar(stat="identity", position = 'dodge') +
geom_text(aes(label = paste(read_time_avg)), color = "black", position = position_dodge(0.9), vjust =0, size = 3) +
facet_wrap(~wiki, scale = 'free_y') +
labs (
y = "Reading time/page length (MS/byte)" ,
title = "Reading time / page length (MS/byte) " ,
caption = "User type: Logged-in User" ) +
scale_fill_manual(values= c( "#666666", "#000099"), name = "feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.text.x = element_blank(),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=12),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
barchart_read_time_loggedin
df_r_loggedin_W <- df_r_loggedin %>%
pivot_wider(names_from = test_group, values_from = read_time_avg, values_fill = 0)
df_r_loggedin_W <- df_r_loggedin_W %>%
mutate(
pct_change = round((treatment-control)/control * 100 , 2)
)
df_r_loggedin_W
wiki | is_anon | control | treatment | pct_change |
---|---|---|---|---|
<chr> | <chr> | <dbl> | <dbl> | <dbl> |
bnwiki | false | 1.3724 | 9.9918 | 628.05 |
dewikivoyage | false | 7.5321 | 5.8670 | -22.11 |
euwiki | false | 11.0437 | 40.2365 | 264.34 |
fawiki | false | 6.5700 | 6.7215 | 2.31 |
frwiki | false | 6.9265 | 7.1395 | 3.08 |
frwikiquote | false | 16.6457 | 0.0000 | -100.00 |
frwiktionary | false | 12.6196 | 12.5604 | -0.47 |
hewiki | false | 7.8766 | 8.0965 | 2.79 |
idwiki | false | 12.6832 | 6.8142 | -46.27 |
kowiki | false | 7.0495 | 8.4294 | 19.57 |
mediawikiwiki | false | 21.2185 | 31.3582 | 47.79 |
ptwiki | false | 5.0943 | 5.7516 | 12.90 |
ptwikiversity | false | 68.4139 | 40.0410 | -41.47 |
srwiki | false | 2.5893 | 5.8222 | 124.86 |
thwiki | false | 5.3195 | 11.2635 | 111.74 |
trwiki | false | 4.0360 | 5.9574 | 47.61 |
viwiki | false | 6.6593 | 8.4084 | 26.27 |
pct_g <- df_r_loggedin_W %>%
ggplot() +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_pointrange(
aes(x = wiki, ymin = 0, ymax = pct_change, y = pct_change)
) +
geom_text(
aes(
y = pct_change + ifelse(pct_change < 0, -4, 4),
x = wiki, label = wiki,
hjust = ifelse(pct_change < 0, "right", "left")
),
size = 5
) +
scale_x_discrete(breaks = NULL) +
coord_flip(ylim = c(-300, 700)) +
labs (x = "Wiki Project",
y = "Percent Change",
title = "Percent change in reading time (in second) per kilo-byte in the AB test") +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.title = element_text(hjust = 0.5),
text = element_text(size=16))
pct_g
Note:
For logged-in readers:
On 5 wikis, reading time in treatment groups is shorter than in control group.
On 12 wiki, reading time in treatment groups is longer than in control group.
Do not observe new ToC reduced the reader's reading time.
df_r_anon <- df_reading_time_over_page_length %>%
filter(is_anon=='true' & wiki!='arywiki'& wiki!='foundationwiki' & wiki!='incubatorwiki'
& wiki!='vecwiki'
& wiki!='viwikibook'& wiki!='plwikinews'& wiki!='ptwikinews'& wiki!='viwikibook' )
df_r_anon
wiki | test_group | is_anon | read_time_avg |
---|---|---|---|
<chr> | <chr> | <chr> | <dbl> |
bnwiki | control | true | 10.0074 |
bnwiki | treatment | true | 10.1146 |
dewikivoyage | control | true | 8.2551 |
dewikivoyage | treatment | true | 11.6123 |
euwiki | control | true | 23.1421 |
euwiki | treatment | true | 18.8725 |
fawiki | control | true | 12.1744 |
fawiki | treatment | true | 12.6149 |
frwiki | control | true | 7.3273 |
frwiki | treatment | true | 6.9144 |
frwikiquote | control | true | 12.3970 |
frwikiquote | treatment | true | 2.4718 |
frwiktionary | control | true | 69.5489 |
frwiktionary | treatment | true | 46.3304 |
hewiki | control | true | 11.7172 |
hewiki | treatment | true | 11.4568 |
idwiki | control | true | 19.2986 |
idwiki | treatment | true | 20.1290 |
kowiki | control | true | 16.8761 |
kowiki | treatment | true | 15.5490 |
mediawikiwiki | control | true | 13.1234 |
mediawikiwiki | treatment | true | 16.6407 |
ptwiki | control | true | 10.2322 |
ptwiki | treatment | true | 10.5792 |
ptwikiversity | control | true | 47.5621 |
ptwikiversity | treatment | true | 23.5070 |
srwiki | control | true | 9.5096 |
srwiki | treatment | true | 8.6310 |
thwiki | control | true | 6.8824 |
thwiki | treatment | true | 7.4075 |
trwiki | control | true | 5.1505 |
trwiki | treatment | true | 7.4136 |
viwiki | control | true | 8.1346 |
viwiki | treatment | true | 8.4355 |
viwikibooks | control | true | 2.6588 |
viwikibooks | treatment | true | 10.4290 |
barchart_read_time_anon <- df_r_anon %>%
ggplot(aes(x = test_group, y= read_time_avg, fill=test_group)) +
geom_bar(stat="identity", position = 'dodge') +
geom_text(aes(label = paste(read_time_avg)), color = "black", position = position_dodge(0.9), vjust =0, size = 3) +
facet_wrap(~wiki, scale = 'free_y') +
labs (
y = "Reading time/page length (MS/byte)" ,
title = "Reading time / page length (MS/byte) " ,
caption = "User type: Anonymous User" ) +
scale_fill_manual(values= c( "#666666", "#000099"), name = "feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.text.x = element_blank(),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=12),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
barchart_read_time_anon
df_r_anon_W <- df_r_anon %>%
pivot_wider(names_from = test_group, values_from = read_time_avg, values_fill = 0)
df_r_anon_W <- df_r_anon_W %>%
mutate(
pct_change = round((treatment-control)/control * 100 , 2)
)
df_r_anon_W
wiki | is_anon | control | treatment | pct_change |
---|---|---|---|---|
<chr> | <chr> | <dbl> | <dbl> | <dbl> |
bnwiki | true | 10.0074 | 10.1146 | 1.07 |
dewikivoyage | true | 8.2551 | 11.6123 | 40.67 |
euwiki | true | 23.1421 | 18.8725 | -18.45 |
fawiki | true | 12.1744 | 12.6149 | 3.62 |
frwiki | true | 7.3273 | 6.9144 | -5.64 |
frwikiquote | true | 12.3970 | 2.4718 | -80.06 |
frwiktionary | true | 69.5489 | 46.3304 | -33.38 |
hewiki | true | 11.7172 | 11.4568 | -2.22 |
idwiki | true | 19.2986 | 20.1290 | 4.30 |
kowiki | true | 16.8761 | 15.5490 | -7.86 |
mediawikiwiki | true | 13.1234 | 16.6407 | 26.80 |
ptwiki | true | 10.2322 | 10.5792 | 3.39 |
ptwikiversity | true | 47.5621 | 23.5070 | -50.58 |
srwiki | true | 9.5096 | 8.6310 | -9.24 |
thwiki | true | 6.8824 | 7.4075 | 7.63 |
trwiki | true | 5.1505 | 7.4136 | 43.94 |
viwiki | true | 8.1346 | 8.4355 | 3.70 |
viwikibooks | true | 2.6588 | 10.4290 | 292.24 |
pct_g <- df_r_anon_W %>%
ggplot() +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_pointrange(
aes(x = wiki, ymin = 0, ymax = pct_change, y = pct_change)
) +
geom_text(
aes(
y = pct_change + ifelse(pct_change < 0, -4, 4),
x = wiki, label = wiki,
hjust = ifelse(pct_change < 0, "right", "left")
),
size = 5
) +
scale_x_discrete(breaks = NULL) +
coord_flip(ylim = c(-300, 400)) +
labs (x = "Wiki Project",
y = "Percent Change",
title = "Percent change in reading time (in second) per kilo-byte in the AB test") +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.title = element_text(hjust = 0.5),
text = element_text(size=16))
pct_g
Note:
For anonymous readers: On 8 wikis, reading time in treatment groups is shorter than in control group. On 10 wiki, reading time in treatment groups is longer than in control group.
Do not observe new ToC reduced the reader's reading time.
Summary:.
This question is for curiosity observation. Hypothesis is that the new table of contents would decrease the time people spend scrolling/scrolling quickly. Because a user friedly interface can help reader quickly locate their interested content, resulting in a shorter reading time.
The hypothesis is NOT supported by data.
query_click_session <- "
WITH t_ab_no_dupli AS (
SELECT web_session_id, wiki, meta.domain AS domain, count(distinct `group` ) AS groups, min(meta.dt) AS session_dt
FROM event.mediawiki_web_ab_test_enrollment
WHERE wiki NOT IN ('testwiki','test2wiki') AND year=2022 AND month IN (5,6)
AND experiment_name='skin-vector-toc-experiment'
GROUP BY web_session_id, wiki, meta.domain
-- exclude session ids are in both control and treatment group
HAVING groups < 2
),
t_ab AS(
SELECT
t1.web_session_id,
t1.wiki,t1.meta.domain AS domain,
t1.`group` AS test_group,
min(t1.meta.dt) AS session_dt
FROM event.mediawiki_web_ab_test_enrollment AS t1
INNER JOIN t_ab_no_dupli AS t2 ON t1.wiki=t2.wiki
AND t1.web_session_id=t2.web_session_id
WHERE t1.wiki NOT IN ('testwiki','test2wiki')
AND year=2022
AND CONCAT(year, '-', LPAD(month,2,'0'),'-', LPAD(day,2,'0')) BETWEEN '2022-05-26' AND '2022-06-15'
AND experiment_name='skin-vector-toc-experiment'
AND NOT is_bot
GROUP BY t1.web_session_id, t1.wiki,t1.meta.domain, t1.`group`
)
-- clicks from ab test group
SELECT t3.event.token AS session_id,
wiki, event.isanon, t4.test_group,
event.name AS event_name, event.viewportSizeBucket AS view_size,
count(1) AS clicks
FROM event.DesktopWebUIActionsTracking AS t3
INNER JOIN t_ab AS t4
ON t3.wiki=t4.wiki AND t3.event.token = t4.web_session_id
WHERE t3.wiki IN ('bnwiki', 'fawiki', 'foundationwiki',
'hewiki', 'ptwikinews', 'ptwikiversity', 'srwiki',
'thwiki', 'vecwiki', 'viwiki', 'viwikibooks', 'dewikivoyage',
'euwiki', 'kowiki', 'plwikinews', 'trwiki', 'arywiki',
'frwiki', 'frwikiquote', 'frwiktionary', 'incubatorwiki', 'ptwiki'
)
AND t3.year=2022 and t3.month IN (5,6)
AND CONCAT(t3.year, '-', LPAD(t3.month,2,'0'),'-', LPAD(t3.day,2,'0')) BETWEEN '2022-05-26' AND '2022-06-15'
AND t4.session_dt <= t3.meta.dt
AND event.name IN ( 'ui.toc', 'ui.sidebar-toc')
AND event.action='click' AND event.skinversion=2
GROUP BY t3.event.token, t3.wiki, event.isanon, t4.test_group, event.name, event.viewportSizeBucket
"
df_click_session <- wmfdata::query_hive(query_click_session)
Don't forget to authenticate with Kerberos using kinit
df_click_session_aggr <- df_click_session %>%
group_by(wiki, test_group, isanon, event_name, view_size) %>%
summarize(num_sessions=n_distinct(session_id),total_clicks=sum(clicks), .groups='drop')
filter(df_click_session_aggr, wiki=='frwiki'&isanon=='false' )
wiki | test_group | isanon | event_name | view_size | num_sessions | total_clicks |
---|---|---|---|---|---|---|
<chr> | <chr> | <chr> | <chr> | <chr> | <int> | <int> |
frwiki | control | false | ui.toc | >2000px | 156 | 252 |
frwiki | control | false | ui.toc | 1000px-1199px | 489 | 855 |
frwiki | control | false | ui.toc | 1200px-2000px | 4137 | 6761 |
frwiki | control | false | ui.toc | 320px-719px | 126 | 187 |
frwiki | control | false | ui.toc | 720px-999px | 268 | 373 |
frwiki | treatment | false | ui.sidebar-toc | >2000px | 15 | 40 |
frwiki | treatment | false | ui.sidebar-toc | 1000px-1199px | 83 | 192 |
frwiki | treatment | false | ui.sidebar-toc | 1200px-2000px | 673 | 1406 |
frwiki | treatment | false | ui.sidebar-toc | 320px-719px | 1 | 1 |
frwiki | treatment | false | ui.toc | <320px | 2 | 2 |
frwiki | treatment | false | ui.toc | >2000px | 37 | 49 |
frwiki | treatment | false | ui.toc | 1000px-1199px | 140 | 197 |
frwiki | treatment | false | ui.toc | 1200px-2000px | 1250 | 1841 |
frwiki | treatment | false | ui.toc | 320px-719px | 34 | 42 |
frwiki | treatment | false | ui.toc | 720px-999px | 67 | 97 |
filter(df_click_session_aggr, wiki=='frwiki'&isanon=='true' )
wiki | test_group | isanon | event_name | view_size | num_sessions | total_clicks |
---|---|---|---|---|---|---|
<chr> | <chr> | <chr> | <chr> | <chr> | <int> | <int> |
frwiki | control | true | ui.sidebar-toc | 1200px-2000px | 7 | 8 |
frwiki | control | true | ui.toc | <320px | 76 | 89 |
frwiki | control | true | ui.toc | >2000px | 4361 | 4979 |
frwiki | control | true | ui.toc | 1000px-1199px | 24625 | 30025 |
frwiki | control | true | ui.toc | 1200-2000 | 1 | 1 |
frwiki | control | true | ui.toc | 1200px-2000px | 183868 | 220673 |
frwiki | control | true | ui.toc | 320px-719px | 3074 | 3805 |
frwiki | control | true | ui.toc | 720px-999px | 9101 | 11058 |
frwiki | treatment | true | ui.sidebar-toc | <320px | 4 | 4 |
frwiki | treatment | true | ui.sidebar-toc | >2000px | 457 | 698 |
frwiki | treatment | true | ui.sidebar-toc | 1000px-1199px | 5311 | 8663 |
frwiki | treatment | true | ui.sidebar-toc | 1200px-2000px | 25154 | 42990 |
frwiki | treatment | true | ui.sidebar-toc | 320px-719px | 49 | 64 |
frwiki | treatment | true | ui.sidebar-toc | 720px-999px | 117 | 142 |
frwiki | treatment | true | ui.toc | <320px | 13 | 14 |
frwiki | treatment | true | ui.toc | >2000px | 785 | 897 |
frwiki | treatment | true | ui.toc | 1000px-1199px | 4043 | 4821 |
frwiki | treatment | true | ui.toc | 1200px-2000px | 34106 | 39955 |
frwiki | treatment | true | ui.toc | 320px-719px | 610 | 713 |
frwiki | treatment | true | ui.toc | 720px-999px | 1677 | 1982 |
df_click_session_aggr <- df_click_session_aggr %>%
mutate(
view_size_lab = factor(
view_size ,
levels = c("<320px", "320px-719px", "720px-999px","1000px-1199px", "1200px-2000px", ">2000px"),
labels = c("<320px", "<720px", "<1000px","<1200px", "<2000px", "above" )
)
)
head(df_click_session_aggr)
wiki | test_group | isanon | event_name | view_size | num_sessions | total_clicks | view_size_lab |
---|---|---|---|---|---|---|---|
<chr> | <chr> | <chr> | <chr> | <chr> | <int> | <int> | <fct> |
arywiki | control | true | ui.toc | 1000px-1199px | 1 | 1 | <1200px |
arywiki | control | true | ui.toc | 1200px-2000px | 2 | 2 | <2000px |
arywiki | treatment | true | ui.sidebar-toc | 1200px-2000px | 2 | 2 | <2000px |
bnwiki | control | false | ui.toc | 1000px-1199px | 6 | 7 | <1200px |
bnwiki | control | false | ui.toc | 1200px-2000px | 29 | 39 | <2000px |
bnwiki | control | false | ui.toc | 720px-999px | 4 | 5 | <1000px |
options(repr.plot.width = 15, repr.plot.height = 15)
col_click_sessions_loggedin <- df_click_session_aggr %>%
filter(isanon=='false'& wiki!='foundationwiki') %>%
filter(!(test_group=='control' & event_name=='ui.sidebar-toc') & !(test_group=='treatment' & event_name=='ui.toc')) %>%
ggplot(aes(x = view_size_lab, y= num_sessions, fill=test_group)) +
#make consistent width in geom_col for the category with missing data
geom_col( position = position_dodge(preserve = "single") ) +
geom_text(aes(label = paste(num_sessions)), color = "black", position = position_dodge(width = 1), vjust = -0.5, hjust = 0.5, size = 3) +
facet_wrap(~wiki, ncol=4, scale = 'free_y' ) +
#make geom_text within the canvas's bounds
scale_y_continuous(expand = expansion(mult = 0.1)) +
labs (
y = "Number of unique sessions" ,
x = "Viewport size",
title = "Number of unique sessions which have clicks on table of content" ,
caption = "User type: Logged-in Users" ) +
scale_fill_manual(values= c( "#666666", "#000099"), name = "feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
#each plot's x axix lable
axis.text.x = element_text(angle=90),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=12),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
col_click_sessions_loggedin
col_click_sessions_anon <- df_click_session_aggr %>%
filter(isanon=='true' & wiki!='foundationwiki') %>%
filter(!(test_group=='control' & event_name=='ui.sidebar-toc') & !(test_group=='treatment' & event_name=='ui.toc')) %>%
ggplot(aes(x = view_size, y= num_sessions, fill=test_group)) +
#make consistent width in geom_col for the category with missing data
geom_col( position = position_dodge(preserve = "single") ) +
geom_text(aes(label = paste(num_sessions)), color = "black", position = position_dodge(width = 1), vjust = -0.5, hjust = 0.5, size = 3) +
facet_wrap(~wiki, ncol=4, scale = 'free_y') +
#make geom_text within the canvas's bounds
scale_y_continuous(expand = expansion(mult = 0.1)) +
labs (
y = "Number of unique sessions" ,
x = "Viewport size",
title = "Number of unique sessions which have clicks on table of content" ,
caption = "User type: Anonymous Users" ) +
scale_fill_manual(values= c( "#666666", "#000099"), name = "feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
#each plot's x axix lable
axis.text.x = element_text(angle=90),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=12),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
col_click_sessions_anon
col_clicks_loggedin <- df_click_session_aggr %>%
filter(isanon=='false'& wiki!='foundationwiki') %>%
filter(!(test_group=='control' & event_name=='ui.sidebar-toc') & !(test_group=='treatment' & event_name=='ui.toc')) %>%
ggplot(aes(x = view_size_lab, y= total_clicks, fill=test_group)) +
#make consistent width in geom_col for the category with missing data
geom_col( position = position_dodge(preserve = "single") ) +
geom_text(aes(label = paste(total_clicks)), color = "black", position = position_dodge(width = 1), vjust = -0.5, hjust = 0.5, size = 3) +
facet_wrap(~wiki, ncol=4, scale = 'free_y' ) +
#make geom_text within the canvas's bounds
scale_y_continuous(expand = expansion(mult = 0.1)) +
labs (
y = "Number of clicks" ,
x = "Viewport size",
title = "Number of clicks on table of content" ,
caption = "User type: Logged-in Users" ) +
scale_fill_manual(values= c( "#666666", "#000099"), name = "feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
#each plot's x axix lable
axis.text.x = element_text(angle=90),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=12),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
col_clicks_loggedin
col_clicks_anon <- df_click_session_aggr %>%
filter(isanon=='true' & wiki!='foundationwiki') %>%
filter(!(test_group=='control' & event_name=='ui.sidebar-toc') & !(test_group=='treatment' & event_name=='ui.toc')) %>%
ggplot(aes(x = view_size, y= total_clicks, fill=test_group)) +
#make consistent width in geom_col for the category with missing data
geom_col( position = position_dodge(preserve = "single") ) +
geom_text(aes(label = paste(total_clicks)), color = "black", position = position_dodge(width = 1), vjust = -0.5, hjust = 0.5, size = 3) +
facet_wrap(~wiki, ncol=4, scale = 'free_y') +
#make geom_text within the canvas's bounds
scale_y_continuous(expand = expansion(mult = 0.1)) +
labs (
y = "Number of clicks" ,
x = "Viewport size",
title = "Number of clicks on table of content" ,
caption = "User type: Anonymous Users" ) +
scale_fill_manual(values= c( "#666666", "#000099"), name = "feature") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
#each plot's x axix lable
axis.text.x = element_text(angle=90),
plot.caption = element_text(hjust = 0, face= "italic"),
strip.background =element_rect(fill="white"),
plot.title = element_text(hjust = 0.5),
text = element_text(size=12),
axis.line = element_line(colour = "black"),
legend.position = "bottom")
col_clicks_anon
Check whether the session was duplicated assigned to ui.sidebar-toc
and ui.toc
df_test <- df_click_session %>%
group_by(session_id, wiki, isanon,test_group) %>%
summarize(n_event_name=n_distinct(event_name), .groups='drop')
df_click_session %>%
filter(wiki!='foundationwiki') %>%
filter(!(test_group=='control' & event_name=='ui.sidebar-toc') & !(test_group=='treatment' & event_name=='ui.toc')) %>%
filter(test_group=='treatment'&isanon=='false') %>%
summary()
session_id wiki isanon test_group Length:1719 Length:1719 Length:1719 Length:1719 Class :character Class :character Class :character Class :character Mode :character Mode :character Mode :character Mode :character event_name view_size clicks Length:1719 Length:1719 Min. : 1.000 Class :character Class :character 1st Qu.: 1.000 Mode :character Mode :character Median : 1.000 Mean : 2.111 3rd Qu.: 2.000 Max. :76.000
df_click_session %>%
filter(wiki!='foundationwiki') %>%
filter(!(test_group=='control' & event_name=='ui.sidebar-toc') & !(test_group=='treatment' & event_name=='ui.toc')) %>%
filter(test_group=='control'&isanon=='false') %>%
summary()
session_id wiki isanon test_group Length:9060 Length:9060 Length:9060 Length:9060 Class :character Class :character Class :character Class :character Mode :character Mode :character Mode :character Mode :character event_name view_size clicks Length:9060 Length:9060 Min. : 1.000 Class :character Class :character 1st Qu.: 1.000 Mode :character Mode :character Median : 1.000 Mean : 1.594 3rd Qu.: 2.000 Max. :63.000
p2 <- df_click_session %>%
filter(wiki!='foundationwiki') %>%
filter(!(test_group=='control' & event_name=='ui.sidebar-toc') & !(test_group=='treatment' & event_name=='ui.toc')) %>%
filter(isanon=='false') %>%
ggplot( aes(x=clicks, fill=test_group)) +
geom_histogram( color="#e9ecef", alpha=0.4, position = 'identity', bins=10) +
scale_x_log10() +
scale_fill_manual(values=c("#666666", "#000099")) +
labs ( title = "Check distribution of clicks per session",
fill="") +
theme_light(base_size=18)
p2
Note:
Data shows in treatment group, some of sessions saw the old ToC, and some of sessions saw the new ToC, no matter the viewport size. For example on frwiki, in treatment group more than half of the sessions with larger than 1000px viewport are assigned to old ToC.
In control group, few sessions (only 2) have this issue, which can be ingored. It leads to unblanced bucketing between control group and treatment group.
With such a test group assignment, the AB test analysis on scrolls to ToC and reading time is invalid. Because schema mediawiki_web_ab_test_enrollment, mediawiki_reading_depth and mediawiki_web_ui_scroll do not record events on old ToC and new ToC seperated. The treatment group and control group can not be correctly categorized purely based on schema mediawiki_web_ab_test_enrollment.
query_ui_tracking <- "
SELECT event.token AS session_id,
wiki, event.isanon, event.viewportSizeBucket AS view_size,
COUNT(DISTINCT event.name) AS n_event_name
FROM event.DesktopWebUIActionsTracking
WHERE wiki IN ('bnwiki', 'fawiki', 'foundationwiki',
'hewiki', 'ptwikinews', 'ptwikiversity', 'srwiki',
'thwiki', 'vecwiki', 'viwiki', 'viwikibooks', 'dewikivoyage',
'euwiki', 'kowiki', 'plwikinews', 'trwiki', 'arywiki',
'frwiki', 'frwikiquote', 'frwiktionary', 'incubatorwiki', 'ptwiki'
)
AND year=2022 and month IN (5,6)
AND CONCAT(year, '-', LPAD(month,2,'0'),'-', LPAD(day,2,'0')) BETWEEN '2022-05-26' AND '2022-06-15'
AND event.name IN ( 'ui.toc', 'ui.sidebar-toc')
AND event.action='click' AND event.skinversion=2
GROUP BY event.token, wiki, event.isanon, event.viewportSizeBucket
HAVING n_event_name >1
"
df_check_ui_tracking <- wmfdata::query_hive(query_ui_tracking)
Don't forget to authenticate with Kerberos using kinit
query_1 <- "
SELECT `group`, wiki, meta.dt, substr(meta.dt, 1,19) as dt_sub
FROM event.mediawiki_web_ab_test_enrollment
WHERE wiki='frwiki'
AND year = 2022 AND month in (5, 6)
AND web_session_id='002708c8787bcbcce17c'
AND experiment_name='skin-vector-toc-experiment'
ORDER BY dt_sub
LIMIT 100
"
df_1 <- wmfdata::query_hive(query_1)
Don't forget to authenticate with Kerberos using kinit
df_1
query_2 <- "
SELECT dt, event.action, event.name,event
FROM event.desktopwebuiactionstracking
WHERE year = 2022 and month in (5, 6 ) AND event.action IN ('init' ,'click')
AND event.token='002708c8787bcbcce17c'
AND wiki='frwiki'
ORDER BY dt LIMIT 100000
"
df_2 <- wmfdata::query_hive(query_2)
Don't forget to authenticate with Kerberos using kinit
Code credit
Mikhail Popov’s wmfdata: R package https://github.com/wikimedia/wmfdata-r
Megan Neisler’s data visualization code in https://nbviewer.org/github/wikimedia-research/Desktop-Improvements-Search-Widget-Analysis-2021/blob/main/Search-widget-ab-test-report.ipynb