From task description:
"Across all languages, Wikipedia articles created with Content Translation are deleted less often than those created from scratch. For example, in 2020, 3% of new translations were deleted, compared to 12% of other new articles. However, this is not the case for all Wikipedias and some specific wikis have a higher deletion rate for translations. For example, for Indonesian (T219851#5914691) and Telugu (T244769) the deletion ratios for Content Translation were higher compared to other articles created in these wikis."
The purpose of this analysis is to identify and list the number of wikis where the deletion rate of atciles created with content translation is higher than the deletion rate for articles created with other tools. Specifically, we want to answer the following questions:
This analysis will be used as a baseline to assess the evolution of deletion rates as improvements are made.
Data comes from the mediawiki_history table and reflects the deletion ratios of main namespace articles that were created using Content Translation compared to the deletion ratio for main namespace articles created without the tool. Bots were excluded.
For the purpose of this analysis, we compared the deletion ratios for the current most recent completed quarter (Q4: April through June 2021) to the prior quarter (Q1: January through March 2019) and two 6 month periods. The reviewed time period can be adjusted as needed.
Wiki size threshold: We removed wikis where 15 or fewer articles were created with content translation during the reviewed period to reduce noise in the data and focus on wikis with more representative data.
shhh <- function(expr) suppressPackageStartupMessages(suppressWarnings(suppressMessages(expr)))
shhh({
library(tidyverse);
# Tables:
library(gt);
library(gtsummary);
})
# Q4: April through June 2021
query <-
"
-- find both cx and non-cx created articles
WITH created_articles AS (
SELECT
wiki_db AS wiki,
SUM(CAST(ARRAY_CONTAINS(revision_tags, 'contenttranslation') AS INT)) AS created_cx,
COUNT(*) AS created_total
FROM wmf.mediawiki_history
WHERE
snapshot = '2021-08'
AND event_timestamp BETWEEN '2021-04-01' and '2021-06-30'
-- interested in main page namespaces
AND page_namespace = 0
-- only look at new page creations
AND revision_parent_id = 0
AND event_entity = 'revision'
AND event_type = 'create'
GROUP BY
wiki_db
),
--find all deleted articles that were created with cx
deleted_articles AS (
SELECT
wiki_db AS wiki,
SUM(CAST(ARRAY_CONTAINS(revision_tags, 'contenttranslation') AS INT)) AS deleted_cx,
COUNT(*) AS deleted_total
FROM wmf.mediawiki_history
WHERE
snapshot = '2021-08'
AND event_timestamp BETWEEN '2021-04-01' and '2021-06-30'
-- interested in main page namespaces
AND page_namespace = 0
-- only look at new page creations
AND revision_parent_id = 0
AND event_entity = 'revision'
-- find revisions moved to the archive table
AND event_type = 'create'
AND revision_is_deleted_by_page_deletion = TRUE
-- remove all bots
AND SIZE(event_user_is_bot_by_historical) = 0 -- not a bot
GROUP BY
wiki_db
)
-- main query to aggregate and join sources above
SELECT
created_articles.wiki,
created_cx,
(created_total - created_cx) AS created_non_cx,
deleted_cx,
(deleted_total - deleted_cx) AS deleted_non_cx
FROM created_articles
JOIN deleted_articles ON
created_articles.wiki = deleted_articles.wiki
"
cx_deletion_ratio_q4 <- wmfdata::query_hive(query)
Don't forget to authenticate with Kerberos using kinit
cx_deletion_ratio_q4_overall <- cx_deletion_ratio_q4 %>%
summarise(deleted_cx_pct = paste0(round(sum(deleted_cx)/sum(created_cx) * 100, 2), "%"),
deleted_non_cx_pct = paste0(round(sum(deleted_non_cx)/sum(created_non_cx) * 100, 2), "%"),
deletion_pct_diff = paste0(round((sum(deleted_non_cx)/sum(created_non_cx)*100)-((sum(deleted_cx)/sum(created_cx))*100), 2),"%")
)
cx_deletion_ratio_q4_overall
deleted_cx_pct | deleted_non_cx_pct | deletion_pct_diff |
---|---|---|
<chr> | <chr> | <chr> |
3.88% | 5.41% | 1.52% |
# Add columns with calculated deletion ratio
cx_deletion_ratio_q4_bywiki <- cx_deletion_ratio_q4 %>%
#filter(wiki == 'arwiki') %>% # use to find ratios for single wiki
filter(created_cx > 15) %>% # remove wikis with 15 or fewer articles created using cx
mutate(deleted_cx_ratio = deleted_cx/created_cx,
deleted_non_cx_ratio = deleted_non_cx/created_non_cx,
deletion_ratio_diff = ((deleted_non_cx/created_non_cx)-(deleted_cx/created_cx)
))
cx_deletion_higher_q4 <- cx_deletion_ratio_q4_bywiki %>%
filter(deletion_ratio_diff < 0) %>% #find wikis with higher cx deletion ratio
summarise(total_wikis = n())
print(paste0("Across all wikis where more than 15 articles have been created with content translation in Q4, there were ",
cx_deletion_higher_q4[1],
" wikis where articles created with content translation were deleted more than articles created without cx"))
[1] "Across all wikis where more than 15 articles have been created with content translation in Q4, there were 15 wikis where articles created with content translation were deleted more than articles created without cx"
cx_deletion_higher_list_q4 <- cx_deletion_ratio_q4_bywiki %>%
filter(deletion_ratio_diff < 0)%>% # only wikis where cx deletion ratio is higher
arrange(deletion_ratio_diff) #sort by highest deletion ratio difference
# reformat into table
cx_deletion_higher_list_q4_tbl <- cx_deletion_higher_list_q4 %>%
gt() %>%
tab_header(
title = "Wikis with higher deletion ratios for articles created with Content Translation",
subtitle = "Reviewed Time Period: April 2021 through June 2021 (Q4)") %>%
fmt_percent(
columns = 6:8
) %>%
cols_label(wiki = "Wiki project",
created_cx = "Created CX Articles",
created_non_cx = "Created non-CX Articles",
deleted_cx = "Deleted CX Articles",
deleted_non_cx = "Deleted non-CX Articles",
deleted_cx_ratio = "CX Articles Deletion Ratio",
deleted_non_cx_ratio = "Non-CX Articles Deletion Ratio",
deletion_ratio_diff = "Deletion Ratio Difference") %>%
tab_spanner("Created Articles", 2:3) %>%
tab_spanner("Deleted Articles", 4:5) %>%
tab_spanner("Deletion Ratios", 6:8) %>%
tab_footnote(
footnote = "Excludes wikis with 15 or fewer articles created with Content Translation
during the reviewed time period",
locations = cells_column_labels(
columns = 'wiki'
)) %>%
gtsave(
"cx_deletion_higher_wikis_q4.html", inline_css = TRUE)
IRdisplay::display_html(data = cx_deletion_higher_list_q4_tbl, file = "cx_deletion_higher_wikis_q4.html")
Wikis with higher deletion ratios for articles created with Content Translation | |||||||
---|---|---|---|---|---|---|---|
Reviewed Time Period: April 2021 through June 2021 (Q4) | |||||||
1
Excludes wikis with 15 or fewer articles created with Content Translation
during the reviewed time period
|
cx_deletion_ration_highest_q4 <- cx_deletion_ratio_q4_bywiki %>%
arrange(desc(deleted_cx_ratio)) %>% #sort by highest to lowest cx deletion ratio
mutate(deleted_cx_ratio = paste0(round(deleted_cx_ratio *100,2),"%") ,
deleted_non_cx_ratio = paste0(round(deleted_non_cx_ratio *100,2),"%") ,
deletion_ratio_diff = paste0(round(deletion_ratio_diff * 100,2),"%") )
head(cx_deletion_ration_highest_q4, 5)
wiki | created_cx | created_non_cx | deleted_cx | deleted_non_cx | deleted_cx_ratio | deleted_non_cx_ratio | deletion_ratio_diff | |
---|---|---|---|---|---|---|---|---|
<chr> | <int> | <int> | <int> | <int> | <chr> | <chr> | <chr> | |
1 | arywiki | 25 | 1461 | 9 | 24 | 36% | 1.64% | -34.36% |
2 | guwiki | 16 | 83 | 4 | 41 | 25% | 49.4% | 24.4% |
3 | hiwiki | 385 | 2735 | 93 | 639 | 24.16% | 23.36% | -0.79% |
4 | kawiki | 48 | 4558 | 10 | 519 | 20.83% | 11.39% | -9.45% |
5 | ltwiki | 21 | 1959 | 4 | 888 | 19.05% | 45.33% | 26.28% |
Moroccan Arabic Wikipedia had the deletion ratio for articles created with content translation. 36% of all articles created with content translation rate were deleted comparted to only 1.64% for articles created without cx.
The highest deletion ratio for a larger size wiki (with over 100,000+ total articles) is Hindi Wikipedia with a deletion ratio of 24.2% for cx created articles compared to 23.36% for articles created without cx.
# Deletion ratios from Q1
query <-
"
-- find all created articles
WITH created_articles AS (
SELECT
wiki_db AS wiki,
SUM(CAST(ARRAY_CONTAINS(revision_tags, 'contenttranslation') AS INT)) AS created_cx,
COUNT(*) AS created_total
FROM wmf.mediawiki_history
WHERE
snapshot = '2021-08'
AND event_timestamp BETWEEN '2021-01-01' and '2021-03-31'
-- interested in main page namespaces
AND page_namespace = 0
-- only look at new page creations
AND revision_parent_id = 0
AND event_entity = 'revision'
AND event_type = 'create'
-- remove bots
AND SIZE(event_user_is_bot_by_historical) = 0
GROUP BY
wiki_db
),
--find all deleted articles
deleted_articles AS (
SELECT
wiki_db AS wiki,
SUM(CAST(ARRAY_CONTAINS(revision_tags, 'contenttranslation') AS INT)) AS deleted_cx,
COUNT(*) AS deleted_total
FROM wmf.mediawiki_history
WHERE
snapshot = '2021-08'
AND event_timestamp BETWEEN '2021-01-01' and '2021-03-31'
-- interested in main page namespaces
AND page_namespace = 0
-- only look at new page creations
AND revision_parent_id = 0
AND event_entity = 'revision'
-- find revisions moved to the archive table
AND event_type = 'create'
AND revision_is_deleted_by_page_deletion = TRUE
-- remove bots
AND SIZE(event_user_is_bot_by_historical) = 0
GROUP BY
wiki_db
)
-- main query
SELECT
created_articles.wiki,
created_cx,
(created_total - created_cx) AS created_non_cx,
deleted_cx,
(deleted_total - deleted_cx) AS deleted_non_cx
FROM created_articles
JOIN deleted_articles ON
created_articles.wiki = deleted_articles.wiki
"
cx_deletion_ratio_q1 <- wmfdata::query_hive(query)
Don't forget to authenticate with Kerberos using kinit
cx_deletion_ratio_q1_bywiki <- cx_deletion_ratio_q1 %>%
#filter(wiki == 'idwiki') %>%
filter(created_cx > 15) %>%
mutate(deleted_cx_ratio = deleted_cx/created_cx,
deleted_non_cx_ratio = deleted_non_cx/created_non_cx,
deletion_ratio_diff = ((deleted_non_cx/created_non_cx)-(deleted_cx/created_cx)
))
cx_deletion_higher_q1 <- cx_deletion_ratio_q1_bywiki %>%
filter(deletion_ratio_diff < 0) %>%
summarise(total_wikis = n())
print(paste0("Across all wikis where more than 15 articles have been created with content translation in Q1, there were ",
cx_deletion_higher_q1[1],
" wikis where articles created with content translation were deleted more than articles created without cx"))
[1] "Across all wikis where more than 15 articles have been created with content translation in Q1, there were 13 wikis where articles created with content translation were deleted more than articles created without cx"
The number of wikis with higher content translation deletion ratios increased by 2 from Q1 to Q4. We next compared the two lists of wikis to confirm if most of the wikis with higher deletion rates were the same across each quarter.
cx_deletion_higher_list_q1 <- cx_deletion_ratio_q1_bywiki %>%
filter(deletion_ratio_diff < 0) %>%
arrange(deletion_ratio_diff)
cx_deletion_higher_list_q1
wiki | created_cx | created_non_cx | deleted_cx | deleted_non_cx | deleted_cx_ratio | deleted_non_cx_ratio | deletion_ratio_diff |
---|---|---|---|---|---|---|---|
<chr> | <int> | <int> | <int> | <int> | <dbl> | <dbl> | <dbl> |
hawwiki | 64 | 85 | 25 | 1 | 0.39062500 | 0.011764706 | -0.3788602941 |
kuwiki | 204 | 4011 | 34 | 69 | 0.16666667 | 0.017202693 | -0.1494639741 |
lawiki | 25 | 923 | 3 | 48 | 0.12000000 | 0.052004334 | -0.0679956663 |
ltwiki | 23 | 2272 | 13 | 1135 | 0.56521739 | 0.499559859 | -0.0656575321 |
fiwiki | 83 | 9448 | 12 | 787 | 0.14457831 | 0.083298052 | -0.0612802608 |
fiu_vrowiki | 16 | 131 | 1 | 4 | 0.06250000 | 0.030534351 | -0.0319656489 |
eowiki | 123 | 5221 | 5 | 62 | 0.04065041 | 0.011875120 | -0.0287752868 |
kawiki | 122 | 5434 | 23 | 889 | 0.18852459 | 0.163599558 | -0.0249250318 |
arzwiki | 110 | 37033 | 3 | 335 | 0.02727273 | 0.009045986 | -0.0182267413 |
thwiki | 17 | 4635 | 1 | 208 | 0.05882353 | 0.044875944 | -0.0139475855 |
bewiki | 256 | 5225 | 9 | 115 | 0.03515625 | 0.022009569 | -0.0131466806 |
mrwiki | 164 | 4771 | 3 | 60 | 0.01829268 | 0.012575980 | -0.0057167030 |
bswiki | 52 | 1953 | 5 | 187 | 0.09615385 | 0.095750128 | -0.0004037181 |
intersect(cx_deletion_higher_list_q1[1], cx_deletion_higher_list_q4[1])
wiki |
---|
<chr> |
kawiki |
bewiki |
mrwiki |
There were 3 wikis that had higher deletion ratios for content translated articles both quarters.
# Current 6 Months
# Jan - June 2021
query <-
"
-- find both cx and non-cx created articles
WITH created_articles AS (
SELECT
wiki_db AS wiki,
SUM(CAST(ARRAY_CONTAINS(revision_tags, 'contenttranslation') AS INT)) AS created_cx,
COUNT(*) AS created_total
FROM wmf.mediawiki_history
WHERE
snapshot = '2021-08'
AND event_timestamp BETWEEN '2021-01-01' and '2021-06-30'
-- interested in main page namespaces
AND page_namespace = 0
-- only look at new page creations
AND revision_parent_id = 0
AND event_entity = 'revision'
AND event_type = 'create'
-- rremove bots
AND SIZE(event_user_is_bot_by_historical) = 0
GROUP BY
wiki_db
),
--find all deleted articles that were created with cx
deleted_articles AS (
SELECT
wiki_db AS wiki,
SUM(CAST(ARRAY_CONTAINS(revision_tags, 'contenttranslation') AS INT)) AS deleted_cx,
COUNT(*) AS deleted_total
FROM wmf.mediawiki_history
WHERE
snapshot = '2021-08'
AND event_timestamp BETWEEN '2021-01-01' and '2021-06-30'
-- interested in main page namespaces
AND page_namespace = 0
-- only look at new page creations
AND revision_parent_id = 0
AND event_entity = 'revision'
AND event_type = 'create'
-- find revisions moved to the archive table
AND revision_is_deleted_by_page_deletion = TRUE
-- remove bots
AND SIZE(event_user_is_bot_by_historical) = 0
GROUP BY
wiki_db
)
-- main query to aggregate and join sources above
SELECT
created_articles.wiki,
created_cx,
(created_total - created_cx) AS created_non_cx,
deleted_cx,
(deleted_total - deleted_cx) AS deleted_non_cx
FROM created_articles
JOIN deleted_articles ON
created_articles.wiki = deleted_articles.wiki
"
cx_deletion_ratio_current_6mo <- wmfdata::query_hive(query)
Don't forget to authenticate with Kerberos using kinit
cx_deletion_ratio_6cur_overall <- cx_deletion_ratio_current_6mo %>%
summarise(deleted_cx_pct = paste0(round(sum(deleted_cx)/sum(created_cx) * 100, 2), "%"),
deleted_non_cx_pct = paste0(round(sum(deleted_non_cx)/sum(created_non_cx) * 100, 2), "%"),
deletion_pct_diff = paste0(round((sum(deleted_non_cx)/sum(created_non_cx)*100)-((sum(deleted_cx)/sum(created_cx))*100), 2),"%")
)
cx_deletion_ratio_6cur_overall
deleted_cx_pct | deleted_non_cx_pct | deletion_pct_diff |
---|---|---|
<chr> | <chr> | <chr> |
3.6% | 8.47% | 4.87% |
cx_deletion_ratio_current_bywiki <- cx_deletion_ratio_current_6mo %>%
#filter(wiki == 'idwiki') %>%
filter(created_cx > 15) %>% # only review wikis with more than 15 cx articles
mutate(deleted_cx_ratio = deleted_cx/created_cx,
deleted_non_cx_ratio = deleted_non_cx/created_non_cx,
deletion_ratio_diff = ((deleted_non_cx/created_non_cx)-(deleted_cx/created_cx)
))
cx_deletion_higher_current_6mo <- cx_deletion_ratio_current_bywiki %>%
filter(deletion_ratio_diff < 0) %>%
summarise(total_wikis = n())
cx_deletion_higher_current_6mo
total_wikis |
---|
<int> |
20 |
print(paste0("Across all wikis where more than 15 articles have been created with content translation from Jan 2021 - June 2021, there were ",
cx_deletion_higher_current_6mo[1],
" wikis where articles created with content translation were deleted more than articles created without cx"))
[1] "Across all wikis where more than 15 articles have been created with content translation from Jan 2021 - June 2021, there were 20 wikis where articles created with content translation were deleted more than articles created without cx"
cx_deletion_higher_list_current <- cx_deletion_ratio_current_bywiki %>%
filter(deletion_ratio_diff < 0)%>% #only wikis with higher cx deletion ratios
arrange(deletion_ratio_diff)
# reformat into table
cx_deletion_higher_list_6mo_tbl <- cx_deletion_higher_list_current %>%
gt() %>%
tab_header(
title = "Wikis with higher deletion ratios for articles created with Content Translation",
subtitle = "Reviewed Time Period: January 2021 through June 2021") %>%
fmt_percent(
columns = 6:8
) %>%
cols_label(wiki = "Wiki project",
created_cx = "Created CX Articles",
created_non_cx = "Created non-CX Articles",
deleted_cx = "Deleted CX Articles",
deleted_non_cx = "Deleted non-CX Articles",
deleted_cx_ratio = "CX Articles Deletion Ratio",
deleted_non_cx_ratio = "Non-CX Articles Deletion Ratio",
deletion_ratio_diff = "Deletion Ratio Difference") %>%
tab_spanner("Created Articles", 2:3) %>%
tab_spanner("Deleted Articles", 4:5) %>%
tab_spanner("Deletion Ratios", 6:8) %>%
tab_footnote(
footnote = "Excludes wikis with 15 or fewer articles created with Content Translation
during the reviewed time period",
locations = cells_column_labels(
columns = 'wiki'
)) %>%
gtsave(
"cx_deletion_higher_wikis_6mo.html", inline_css = TRUE)
IRdisplay::display_html(data = cx_deletion_higher_list_6mo_tbl, file = "cx_deletion_higher_wikis_6mo.html")
Wikis with higher deletion ratios for articles created with Content Translation | |||||||
---|---|---|---|---|---|---|---|
Reviewed Time Period: January 2021 through June 2021 | |||||||
1
Excludes wikis with 15 or fewer articles created with Content Translation
during the reviewed time period
|
cx_deletion_ration_highest_current <- cx_deletion_ratio_current_bywiki %>%
arrange(desc(deleted_cx_ratio)) %>%
mutate(deleted_cx_ratio = paste0(round(deleted_cx_ratio *100,2),"%") ,
deleted_non_cx_ratio = paste0(round(deleted_non_cx_ratio *100,2),"%") ,
deletion_ratio_diff = paste0(round(deletion_ratio_diff * 100,2),"%") )
head(cx_deletion_ration_highest_current, 5)
wiki | created_cx | created_non_cx | deleted_cx | deleted_non_cx | deleted_cx_ratio | deleted_non_cx_ratio | deletion_ratio_diff | |
---|---|---|---|---|---|---|---|---|
<chr> | <int> | <int> | <int> | <int> | <chr> | <chr> | <chr> | |
1 | ltwiki | 45 | 4254 | 17 | 2041 | 37.78% | 47.98% | 10.2% |
2 | hawwiki | 68 | 128 | 25 | 25 | 36.76% | 19.53% | -17.23% |
3 | mnwiki | 30 | 1265 | 10 | 542 | 33.33% | 42.85% | 9.51% |
4 | iswiki | 30 | 2157 | 7 | 140 | 23.33% | 6.49% | -16.84% |
5 | kawiki | 170 | 10010 | 33 | 1415 | 19.41% | 14.14% | -5.28% |
Lithuanian Wikipedia had the highest deletion ratio for articles created with content translation. 37.8% of all articles created with content translation rate were deleted; however, this was still less than the percent of non content translated article deletion ratio (47.9%).
The Wiki that had the highest different in deletion ratios was Hawaiian Wikipedia. 36.8% of all articles created with cx were deleted during the reviewed time period comparted to 19.5% of articles created without content translation.
# Previous 6 Months
# July 2020 - December 2020
query <-
"
-- find both cx and non-cx created articles
WITH created_articles AS (
SELECT
wiki_db AS wiki,
SUM(CAST(ARRAY_CONTAINS(revision_tags, 'contenttranslation') AS INT)) AS created_cx,
COUNT(*) AS created_total
FROM wmf.mediawiki_history
WHERE
snapshot = '2021-08'
AND event_timestamp BETWEEN '2020-07-01' and '2020-12-31'
-- interested in main page namespaces
AND page_namespace = 0
-- only look at new page creations
AND revision_parent_id = 0
AND event_entity = 'revision'
AND event_type = 'create'
-- remove bots
AND SIZE(event_user_is_bot_by_historical) = 0
GROUP BY
wiki_db
),
--find all deleted articles that were created with cx
deleted_articles AS (
SELECT
wiki_db AS wiki,
SUM(CAST(ARRAY_CONTAINS(revision_tags, 'contenttranslation') AS INT)) AS deleted_cx,
COUNT(*) AS deleted_total
FROM wmf.mediawiki_history
WHERE
snapshot = '2021-08'
AND event_timestamp BETWEEN '2020-07-01' and '2020-12-31'
-- interested in main page namespaces
AND page_namespace = 0
-- only look at new page creations
AND revision_parent_id = 0
AND event_entity = 'revision'
-- find revisions moved to the archive table
AND event_type = 'create'
AND revision_is_deleted_by_page_deletion = TRUE
-- remove bots
AND SIZE(event_user_is_bot_by_historical) = 0
GROUP BY
wiki_db
)
-- main query to aggregate and join sources above
SELECT
created_articles.wiki,
created_cx,
(created_total - created_cx) AS created_non_cx,
deleted_cx,
(deleted_total - deleted_cx) AS deleted_non_cx
FROM created_articles
JOIN deleted_articles ON
created_articles.wiki = deleted_articles.wiki
"
cx_deletion_ratio_previous_6mo <- wmfdata::query_hive(query)
Don't forget to authenticate with Kerberos using kinit
cx_deletion_ratio_bywiki_previous <- cx_deletion_ratio_previous_6mo %>%
#filter(wiki == 'idwiki') %>%
filter(created_cx > 15) %>% # only wikis with at leat 15 created articles
mutate(deleted_cx_ratio = deleted_cx/created_cx,
deleted_non_cx_ratio = deleted_non_cx/created_non_cx,
deletion_ratio_diff = ((deleted_non_cx/created_non_cx)-(deleted_cx/created_cx)
))
cx_deletion_higher_previous <- cx_deletion_ratio_bywiki_previous %>%
filter(deletion_ratio_diff < 0) %>%
summarise(total_wikis = n())
cx_deletion_higher_previous
total_wikis |
---|
<int> |
21 |
print(paste0("Across all wikis where more than 15 articles have been created with content translation between July 2020 and December 2020, there were ",
cx_deletion_higher_previous[1],
" wikis where articles created with content translation were deleted more than articles created without cx"))
[1] "Across all wikis where more than 15 articles have been created with content translation between July 2020 and December 2020, there were 21 wikis where articles created with content translation were deleted more than articles created without cx"
The number of wikis with higher content translation deletion ratios decreased by 1 from July 2020 to December 2020 to January 2021 to June 2021.
We next compared the two lists of wikis to confirm if most of the wikis with higher deletion rates were the same across each quarter.
cx_deletion_higher_list_previous <- cx_deletion_ratio_bywiki_previous %>%
filter(deletion_ratio_diff < 0) %>%
arrange(deletion_ratio_diff)
cx_deletion_higher_list_previous
wiki | created_cx | created_non_cx | deleted_cx | deleted_non_cx | deleted_cx_ratio | deleted_non_cx_ratio | deletion_ratio_diff |
---|---|---|---|---|---|---|---|
<chr> | <int> | <int> | <int> | <int> | <dbl> | <dbl> | <dbl> |
fywiki | 17 | 1755 | 14 | 65 | 0.82352941 | 0.037037037 | -0.786492375 |
hawwiki | 42 | 132 | 31 | 24 | 0.73809524 | 0.181818182 | -0.556277056 |
ltwiki | 59 | 3337 | 28 | 644 | 0.47457627 | 0.192987714 | -0.281588558 |
iswiki | 26 | 2000 | 7 | 155 | 0.26923077 | 0.077500000 | -0.191730769 |
lawiki | 48 | 2979 | 9 | 158 | 0.18750000 | 0.053037932 | -0.134462068 |
hywiki | 159 | 33338 | 22 | 1080 | 0.13836478 | 0.032395465 | -0.105969315 |
azwiki | 206 | 29671 | 29 | 1885 | 0.14077670 | 0.063530046 | -0.077246653 |
arywiki | 63 | 2443 | 5 | 50 | 0.07936508 | 0.020466639 | -0.058898440 |
mywiki | 313 | 6698 | 37 | 439 | 0.11821086 | 0.065541953 | -0.052668910 |
cywiki | 122 | 1451 | 13 | 85 | 0.10655738 | 0.058580289 | -0.047977088 |
vecwiki | 20 | 10293 | 1 | 46 | 0.05000000 | 0.004469057 | -0.045530943 |
arzwiki | 133 | 355316 | 4 | 730 | 0.03007519 | 0.002054509 | -0.028020679 |
eowiki | 277 | 10800 | 8 | 90 | 0.02888087 | 0.008333333 | -0.020547533 |
zhwiki | 1512 | 80866 | 137 | 5753 | 0.09060847 | 0.071142384 | -0.019466082 |
dewiki | 505 | 119158 | 87 | 18351 | 0.17227723 | 0.154005606 | -0.018271622 |
zh_yuewiki | 35 | 23696 | 1 | 267 | 0.02857143 | 0.011267725 | -0.017303704 |
ckbwiki | 64 | 2901 | 5 | 183 | 0.07812500 | 0.063081696 | -0.015043304 |
kuwiki | 402 | 5291 | 13 | 133 | 0.03233831 | 0.025137025 | -0.007201283 |
fiwiki | 138 | 20467 | 12 | 1680 | 0.08695652 | 0.082083354 | -0.004873168 |
etwiki | 55 | 8239 | 6 | 865 | 0.10909091 | 0.104988469 | -0.004102440 |
bswiki | 62 | 2677 | 6 | 254 | 0.09677419 | 0.094882331 | -0.001891863 |
intersect(cx_deletion_higher_list_current[1], cx_deletion_higher_list_previous[1])
wiki |
---|
<chr> |
hawwiki |
iswiki |
kuwiki |
arywiki |
arzwiki |
fiwiki |
lawiki |
eowiki |
There were 8 wikis that had higher deletion ratios for content translated articles both quarters.