shhh <- function(expr) suppressPackageStartupMessages(suppressWarnings(suppressMessages(expr)))
shhh({
library(tidyverse); library(glue); library(lubridate); library(scales)
})
Dialog close events (dialog-insert, dialog-done, dialog-abort, etc) were all getting logged twice for node events (a node is any component that is not editable withing the page. Said another way: any component that requires a dialog to edit (e.g. reference, image, template).
The fix was deployed on 8 October 2020.
Checks:
# Collect vefu events
query <-
"SELECT
date_format(dt, 'yyyy-MM-dd') AS date,
event.editingSessionID AS sessionID,
event.feature AS feature,
If(event.action LIKE 'dialog-%', 'dialog_close_event', 'window_open_event') AS event_type,
COUNT(*) as events
FROM
event.visualeditorfeatureuse
WHERE
-- fix deployed on 6 October 2020
year = 2020
AND month = 10
-- review all dialog and window open actions
AND (event.action IN ('dialog-done', 'dialog-insert', 'dialog-abort', 'dialog-remove') OR
event.action LIKE 'window-open-from-%')
-- remove bots
AND useragent.is_bot = false
GROUP BY
date_format(dt, 'yyyy-MM-dd'),
event.editingSessionID,
event.feature,
If(event.action LIKE 'dialog-%', 'dialog_close_event', 'window_open_event')
"
collect_vefu_events <- wmfdata::query_hive(query)
Don't forget to authenticate with Kerberos using kinit
collect_vefu_events$date <- as.Date(collect_vefu_events$date, format = "%Y-%m-%d")
duplicate_events_daily <- collect_vefu_events %>%
spread(event_type, events) %>%
#review events with both dialog close and window-open-event
filter(!is.na(dialog_close_event),
!is.na(window_open_event),
#review only node features
feature %in% c('citoid', 'transclusion', 'media', 'reference', 'cite-book', 'cite-web', 'mwcite')) %>%
mutate(is_duplicate = ifelse(dialog_close_event <= window_open_event, 'false', 'true')) %>%
arrange(date)
head(duplicate_events_daily,50)
#Need to investigate mWsave and cite events more - we don't start recording until the 13th or 14th with dialog-close-event and window-open events
date | sessionid | feature | dialog_close_event | window_open_event | is_duplicate | |
---|---|---|---|---|---|---|
<date> | <chr> | <chr> | <int> | <int> | <chr> | |
1 | 2020-10-01 | 0001a213d8b2c397591e | citoid | 2 | 1 | true |
2 | 2020-10-01 | 001db27a38fc96a5a4ad | citoid | 2 | 1 | true |
3 | 2020-10-01 | 003b70fd55795a20d6ab | transclusion | 2 | 1 | true |
4 | 2020-10-01 | 00611273291617e0f1e2 | transclusion | 2 | 1 | true |
5 | 2020-10-01 | 00717b7ab5e1fcd56ac3 | transclusion | 2 | 1 | true |
6 | 2020-10-01 | 007263d77c643caec47b | transclusion | 2 | 1 | true |
7 | 2020-10-01 | 0082a616346bef8d958c | citoid | 2 | 1 | true |
8 | 2020-10-01 | 008344302959c32d8afc | citoid | 2 | 1 | true |
9 | 2020-10-01 | 008879198041930f8cef | citoid | 2 | 1 | true |
10 | 2020-10-01 | 00a97972c83cba8fa9a7 | transclusion | 2 | 1 | true |
11 | 2020-10-01 | 00af6c0be4a5b46f8590 | transclusion | 4 | 3 | true |
12 | 2020-10-01 | 00b226620a0ab6c88663 | citoid | 2 | 1 | true |
13 | 2020-10-01 | 00b26477187d073dc0ea | transclusion | 2 | 2 | false |
14 | 2020-10-01 | 00c78b79be440da89f66 | transclusion | 2 | 1 | true |
15 | 2020-10-01 | 00c84f144789eaa4671c | transclusion | 4 | 2 | true |
16 | 2020-10-01 | 00d8539350c95a15fcc9 | transclusion | 4 | 2 | true |
17 | 2020-10-01 | 00e75f40bf8976a9093f | transclusion | 10 | 5 | true |
18 | 2020-10-01 | 0111834093cdea5b1a69 | transclusion | 2 | 1 | true |
19 | 2020-10-01 | 0111f9b0884b1ef77673 | transclusion | 2 | 1 | true |
20 | 2020-10-01 | 0114802093feacf36b2c | citoid | 2 | 1 | true |
21 | 2020-10-01 | 0115f0e0efa30e31072b | citoid | 2 | 1 | true |
22 | 2020-10-01 | 012abb5076c543fa6474 | transclusion | 6 | 3 | true |
23 | 2020-10-01 | 0135dfd71d231e4da210 | transclusion | 2 | 1 | true |
24 | 2020-10-01 | 0141f400de49e9608cf7 | citoid | 2 | 1 | true |
25 | 2020-10-01 | 01486f68ca71bb676d5f | citoid | 2 | 1 | true |
26 | 2020-10-01 | 014b2d0dbefc4ca6ca40 | transclusion | 2 | 3 | false |
27 | 2020-10-01 | 0152f6e144390ccecb83 | transclusion | 2 | 1 | true |
28 | 2020-10-01 | 0153bba7d535780c0880 | media | 2 | 1 | true |
29 | 2020-10-01 | 015754417a186e0de223 | transclusion | 1 | 1 | false |
30 | 2020-10-01 | 01589699a9a3ccb12b61 | transclusion | 4 | 2 | true |
31 | 2020-10-01 | 015f10017d919561dc98 | transclusion | 2 | 1 | true |
32 | 2020-10-01 | 015f6380c19b3b40165c | transclusion | 2 | 1 | true |
33 | 2020-10-01 | 0168f07f56a34cdec016 | media | 2 | 1 | true |
34 | 2020-10-01 | 0175d2d0cd509d3a1bc3 | citoid | 10 | 5 | true |
35 | 2020-10-01 | 0175d2d0cd509d3a1bc3 | transclusion | 6 | 3 | true |
36 | 2020-10-01 | 017f07d321c092735915 | transclusion | 2 | 2 | false |
37 | 2020-10-01 | 01883dc6f39479039a89 | transclusion | 2 | 1 | true |
38 | 2020-10-01 | 01c45ce282a8a4dbc329 | citoid | 2 | 1 | true |
39 | 2020-10-01 | 01ca2d932be004a17b36 | citoid | 2 | 2 | false |
40 | 2020-10-01 | 01d295e794156fc2eb0b | transclusion | 2 | 4 | false |
41 | 2020-10-01 | 01dbe65f27c9aa4397af | citoid | 4 | 2 | true |
42 | 2020-10-01 | 01e0b31553cd89772ebf | reference | 2 | 1 | true |
43 | 2020-10-01 | 01e2962b8a6b2a9b58d8 | transclusion | 2 | 1 | true |
44 | 2020-10-01 | 020948a1d0b41146a0c7 | citoid | 2 | 1 | true |
45 | 2020-10-01 | 021231b0c863367074b3 | citoid | 6 | 3 | true |
46 | 2020-10-01 | 0221ece048c519cde207 | citoid | 4 | 2 | true |
47 | 2020-10-01 | 0221ece048c519cde207 | transclusion | 2 | 1 | true |
48 | 2020-10-01 | 0223733a8ae318d8ef96 | media | 2 | 1 | true |
49 | 2020-10-01 | 024981033d11a010c9f0 | transclusion | 2 | 1 | true |
50 | 2020-10-01 | 0257378ee716c70954a0 | citoid | 2 | 1 | true |
# Plot chart of duplicate events
p <- duplicate_events_daily %>%
group_by(date, feature) %>%
summarise(num_close_events = sum(dialog_close_event)) %>%
ggplot(aes(x=date, y= num_close_events, color = feature)) +
geom_line(size = 1.5) +
scale_y_continuous() +
labs (y = "Number of close events per day",
x = "Date",
title = "Daily dialog close events by feature") +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
text = element_text(size=16),
axis.text.x = element_text(angle=45, hjust=1),
legend.position = 'bottom')
p
options(repr.plot.width = 15, repr.plot.height = 10)
`summarise()` regrouping output by 'date' (override with `.groups` argument)
ggsave("figures/daily_dialog_close_events.png", p, width = 16, height = 8, units = "in", dpi = 300)
# plot duplicate events
p <- duplicate_events_daily %>%
group_by(date, feature, is_duplicate) %>%
summarise(num_close_events = sum(dialog_close_event)) %>%
ggplot(aes(x=date, y= num_close_events, color = is_duplicate)) +
facet_wrap(~feature) +
geom_line(size = 1.5) +
scale_y_continuous() +
labs (y = "Number of duplicate events per day",
x = "Date",
title = "Daily duplicate close events by feature") +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
text = element_text(size=16),
axis.text.x = element_text(angle=45, hjust=1),
legend.position = 'bottom')
p
`summarise()` regrouping output by 'date', 'feature' (override with `.groups` argument)
ggsave("figures/daily_duplicate_events.png", p, width = 16, height = 8, units = "in", dpi = 300)