import pandas as pd
import numpy as np
from datetime import date as dt
import re
## import the wikipedia dataset and view it
wikipedia_pivoted = pd.read_excel('wikipedia_dataset.xlsx')
wikipedia_pivoted.head()
Page | 2016-01-01 00:00:00 | 2016-01-02 00:00:00 | 2016-01-03 00:00:00 | 2016-01-04 00:00:00 | 2016-01-05 00:00:00 | 2016-01-06 00:00:00 | 2016-01-07 00:00:00 | 2016-01-08 00:00:00 | 2016-01-09 00:00:00 | ... | 2016-12-22 00:00:00 | 2016-12-23 00:00:00 | 2016-12-24 00:00:00 | 2016-12-25 00:00:00 | 2016-12-26 00:00:00 | 2016-12-27 00:00:00 | 2016-12-28 00:00:00 | 2016-12-29 00:00:00 | 2016-12-30 00:00:00 | 2016-12-31 00:00:00 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | .xxx_en.wikipedia.org_all-access_all-agents | 7089 | 7592 | 7436 | 7032 | 6498 | 6887 | 7209 | 7022 | 7194 | ... | 4931.0 | 4034.0 | 5204.0 | 4331.0 | 4572.0 | 4509.0 | 5392.0 | 4950.0 | 2931.0 | 4101.0 |
1 | .xxx_en.wikipedia.org_mobile-web_all-agents | 6182 | 6447 | 6437 | 5912 | 5205 | 5770 | 5998 | 5948 | 6165 | ... | 4617.0 | 3663.0 | 4882.0 | 4028.0 | 4263.0 | 4176.0 | 5068.0 | 4539.0 | 2666.0 | 3754.0 |
2 | 1._Juli_de.wikipedia.org_desktop_all-agents | 20 | 49 | 33 | 49 | 34 | 26 | 66 | 28 | 18 | ... | 26.0 | 20.0 | 19.0 | 43.0 | 24.0 | 41.0 | 39.0 | 39.0 | 31.0 | 31.0 |
3 | 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... | 5363 | 6280 | 6297 | 6288 | 6052 | 6200 | 5935 | 5958 | 6200 | ... | 3356.0 | 3366.0 | 2912.0 | 2850.0 | 3140.0 | 3379.0 | 3497.0 | 3338.0 | 3329.0 | 3550.0 |
4 | 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... | 3186 | 3784 | 3832 | 4059 | 4022 | 4213 | 3985 | 3823 | 3794 | ... | 1719.0 | 1593.0 | 1255.0 | 1160.0 | 1311.0 | 1468.0 | 1728.0 | 1494.0 | 1515.0 | 1464.0 |
5 rows × 367 columns
## To convert the dataset from a wide format to a long format (to unpivot)
wikipedia_unpivot = wikipedia_pivoted.melt(id_vars = 'Page', var_name= 'Date', value_name= 'Visits')
wikipedia_unpivot
Page | Date | Visits | |
---|---|---|---|
0 | .xxx_en.wikipedia.org_all-access_all-agents | 2016-01-01 | 7089.0 |
1 | .xxx_en.wikipedia.org_mobile-web_all-agents | 2016-01-01 | 6182.0 |
2 | 1._Juli_de.wikipedia.org_desktop_all-agents | 2016-01-01 | 20.0 |
3 | 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... | 2016-01-01 | 5363.0 |
4 | 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... | 2016-01-01 | 3186.0 |
... | ... | ... | ... |
548995 | Zac_Efron_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 13146.0 |
548996 | Zayn_Malik_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 12597.0 |
548997 | Zendaya_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 6217.0 |
548998 | Zendaya_en.wikipedia.org_mobile-web_all-agents | 2016-12-31 | 4650.0 |
548999 | Zodiac_Killer_en.wikipedia.org_all-access_all-... | 2016-12-31 | 6659.0 |
549000 rows × 3 columns
wikipedia_unpivot.isnull().sum()
Page 0 Date 0 Visits 87 dtype: int64
## Filling missing values with 0 and validating it
wiki = wikipedia_unpivot.fillna(0)
wiki.isnull().sum()
Page 0 Date 0 Visits 0 dtype: int64
## To add a weekday and month column
wiki['Weekday'] = wiki['Date'].dt.day_name()
wiki['Month'] = wiki['Date'].dt.month_name()
wiki
Page | Date | Visits | Weekday | Month | |
---|---|---|---|---|---|
0 | .xxx_en.wikipedia.org_all-access_all-agents | 2016-01-01 | 7089.0 | Friday | January |
1 | .xxx_en.wikipedia.org_mobile-web_all-agents | 2016-01-01 | 6182.0 | Friday | January |
2 | 1._Juli_de.wikipedia.org_desktop_all-agents | 2016-01-01 | 20.0 | Friday | January |
3 | 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... | 2016-01-01 | 5363.0 | Friday | January |
4 | 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... | 2016-01-01 | 3186.0 | Friday | January |
... | ... | ... | ... | ... | ... |
548995 | Zac_Efron_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 13146.0 | Saturday | December |
548996 | Zayn_Malik_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 12597.0 | Saturday | December |
548997 | Zendaya_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 6217.0 | Saturday | December |
548998 | Zendaya_en.wikipedia.org_mobile-web_all-agents | 2016-12-31 | 4650.0 | Saturday | December |
548999 | Zodiac_Killer_en.wikipedia.org_all-access_all-... | 2016-12-31 | 6659.0 | Saturday | December |
549000 rows × 5 columns
## To extract the language codes from the Page strings
wiki['Language_Codes'] = wiki['Page'].str.extract(r"(_[a-z]{2}\.)")
wiki
Page | Date | Visits | Weekday | Month | Language_Codes | |
---|---|---|---|---|---|---|
0 | .xxx_en.wikipedia.org_all-access_all-agents | 2016-01-01 | 7089.0 | Friday | January | _en. |
1 | .xxx_en.wikipedia.org_mobile-web_all-agents | 2016-01-01 | 6182.0 | Friday | January | _en. |
2 | 1._Juli_de.wikipedia.org_desktop_all-agents | 2016-01-01 | 20.0 | Friday | January | _de. |
3 | 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... | 2016-01-01 | 5363.0 | Friday | January | _en. |
4 | 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... | 2016-01-01 | 3186.0 | Friday | January | _en. |
... | ... | ... | ... | ... | ... | ... |
548995 | Zac_Efron_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 13146.0 | Saturday | December | _en. |
548996 | Zayn_Malik_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 12597.0 | Saturday | December | _en. |
548997 | Zendaya_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 6217.0 | Saturday | December | _en. |
548998 | Zendaya_en.wikipedia.org_mobile-web_all-agents | 2016-12-31 | 4650.0 | Saturday | December | _en. |
548999 | Zodiac_Killer_en.wikipedia.org_all-access_all-... | 2016-12-31 | 6659.0 | Saturday | December | _en. |
549000 rows × 6 columns
## Replacing language codes with language names
wiki['Language_Names'] = wiki['Language_Codes'].replace(['_de.','_es.','_en.','_fr.', '_ru.','_ja.','_zh.'],['German','Spanish', 'English','French','Russian','Japanese','Chinese'])
wiki
Page | Date | Visits | Weekday | Month | Language_Codes | Language_Names | |
---|---|---|---|---|---|---|---|
0 | .xxx_en.wikipedia.org_all-access_all-agents | 2016-01-01 | 7089.0 | Friday | January | _en. | English |
1 | .xxx_en.wikipedia.org_mobile-web_all-agents | 2016-01-01 | 6182.0 | Friday | January | _en. | English |
2 | 1._Juli_de.wikipedia.org_desktop_all-agents | 2016-01-01 | 20.0 | Friday | January | _de. | German |
3 | 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... | 2016-01-01 | 5363.0 | Friday | January | _en. | English |
4 | 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... | 2016-01-01 | 3186.0 | Friday | January | _en. | English |
... | ... | ... | ... | ... | ... | ... | ... |
548995 | Zac_Efron_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 13146.0 | Saturday | December | _en. | English |
548996 | Zayn_Malik_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 12597.0 | Saturday | December | _en. | English |
548997 | Zendaya_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 6217.0 | Saturday | December | _en. | English |
548998 | Zendaya_en.wikipedia.org_mobile-web_all-agents | 2016-12-31 | 4650.0 | Saturday | December | _en. | English |
548999 | Zodiac_Killer_en.wikipedia.org_all-access_all-... | 2016-12-31 | 6659.0 | Saturday | December | _en. | English |
549000 rows × 7 columns
##Creating new columns and splitting the page column into a title and device column
## The page coulmn contains the title searched for and the device used
wiki[['Title', 'Device']] = wiki['Page'].str.split('.org_', expand=True)
wiki
Page | Date | Visits | Weekday | Month | Language_Codes | Language_Names | Title | Device | |
---|---|---|---|---|---|---|---|---|---|
0 | .xxx_en.wikipedia.org_all-access_all-agents | 2016-01-01 | 7089.0 | Friday | January | _en. | English | .xxx_en.wikipedia | all-access_all-agents |
1 | .xxx_en.wikipedia.org_mobile-web_all-agents | 2016-01-01 | 6182.0 | Friday | January | _en. | English | .xxx_en.wikipedia | mobile-web_all-agents |
2 | 1._Juli_de.wikipedia.org_desktop_all-agents | 2016-01-01 | 20.0 | Friday | January | _de. | German | 1._Juli_de.wikipedia | desktop_all-agents |
3 | 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... | 2016-01-01 | 5363.0 | Friday | January | _en. | English | 2014_FIFA_World_Cup_en.wikipedia | all-access_all-agents |
4 | 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... | 2016-01-01 | 3186.0 | Friday | January | _en. | English | 2014_FIFA_World_Cup_en.wikipedia | desktop_all-agents |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
548995 | Zac_Efron_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 13146.0 | Saturday | December | _en. | English | Zac_Efron_en.wikipedia | all-access_all-agents |
548996 | Zayn_Malik_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 12597.0 | Saturday | December | _en. | English | Zayn_Malik_en.wikipedia | all-access_all-agents |
548997 | Zendaya_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 6217.0 | Saturday | December | _en. | English | Zendaya_en.wikipedia | all-access_all-agents |
548998 | Zendaya_en.wikipedia.org_mobile-web_all-agents | 2016-12-31 | 4650.0 | Saturday | December | _en. | English | Zendaya_en.wikipedia | mobile-web_all-agents |
548999 | Zodiac_Killer_en.wikipedia.org_all-access_all-... | 2016-12-31 | 6659.0 | Saturday | December | _en. | English | Zodiac_Killer_en.wikipedia | all-access_all-agents |
549000 rows × 9 columns
wiki.isna().sum()
Page 0 Date 0 Visits 0 Weekday 0 Month 0 Language_Codes 6222 Language_Names 6222 Title 0 Device 0 dtype: int64
##Group Devices
wiki.groupby('Device').size()
Device all-access_all-agents 340014 all-access_spider 2196 desktop_all-agents 99186 mobile-web_all-agents 107604 dtype: int64
##Replacing the Device column with well defined strings
wiki['Device_Type'] = wiki['Device'].replace(['all-access_all-agents','all-access_spider','mobile-web_all-agents','desktop_all-agents'],['All Access','All Access','Mobile', 'Desktop'])
wiki
Page | Date | Visits | Weekday | Month | Language_Codes | Language_Names | Title | Device | Device_Type | |
---|---|---|---|---|---|---|---|---|---|---|
0 | .xxx_en.wikipedia.org_all-access_all-agents | 2016-01-01 | 7089.0 | Friday | January | _en. | English | .xxx_en.wikipedia | all-access_all-agents | All Access |
1 | .xxx_en.wikipedia.org_mobile-web_all-agents | 2016-01-01 | 6182.0 | Friday | January | _en. | English | .xxx_en.wikipedia | mobile-web_all-agents | Mobile |
2 | 1._Juli_de.wikipedia.org_desktop_all-agents | 2016-01-01 | 20.0 | Friday | January | _de. | German | 1._Juli_de.wikipedia | desktop_all-agents | Desktop |
3 | 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... | 2016-01-01 | 5363.0 | Friday | January | _en. | English | 2014_FIFA_World_Cup_en.wikipedia | all-access_all-agents | All Access |
4 | 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... | 2016-01-01 | 3186.0 | Friday | January | _en. | English | 2014_FIFA_World_Cup_en.wikipedia | desktop_all-agents | Desktop |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
548995 | Zac_Efron_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 13146.0 | Saturday | December | _en. | English | Zac_Efron_en.wikipedia | all-access_all-agents | All Access |
548996 | Zayn_Malik_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 12597.0 | Saturday | December | _en. | English | Zayn_Malik_en.wikipedia | all-access_all-agents | All Access |
548997 | Zendaya_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 6217.0 | Saturday | December | _en. | English | Zendaya_en.wikipedia | all-access_all-agents | All Access |
548998 | Zendaya_en.wikipedia.org_mobile-web_all-agents | 2016-12-31 | 4650.0 | Saturday | December | _en. | English | Zendaya_en.wikipedia | mobile-web_all-agents | Mobile |
548999 | Zodiac_Killer_en.wikipedia.org_all-access_all-... | 2016-12-31 | 6659.0 | Saturday | December | _en. | English | Zodiac_Killer_en.wikipedia | all-access_all-agents | All Access |
549000 rows × 10 columns
## Assigning a variable for only wikipedia pages
## Since we're working with only wikipedia pages, we filter those out
only_wikipedia = wiki[wiki['Page'].str.contains("wikipedia.org")]
only_wikipedia
Page | Date | Visits | Weekday | Month | Language_Codes | Language_Names | Title | Device | Device_Type | |
---|---|---|---|---|---|---|---|---|---|---|
0 | .xxx_en.wikipedia.org_all-access_all-agents | 2016-01-01 | 7089.0 | Friday | January | _en. | English | .xxx_en.wikipedia | all-access_all-agents | All Access |
1 | .xxx_en.wikipedia.org_mobile-web_all-agents | 2016-01-01 | 6182.0 | Friday | January | _en. | English | .xxx_en.wikipedia | mobile-web_all-agents | Mobile |
2 | 1._Juli_de.wikipedia.org_desktop_all-agents | 2016-01-01 | 20.0 | Friday | January | _de. | German | 1._Juli_de.wikipedia | desktop_all-agents | Desktop |
3 | 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... | 2016-01-01 | 5363.0 | Friday | January | _en. | English | 2014_FIFA_World_Cup_en.wikipedia | all-access_all-agents | All Access |
4 | 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... | 2016-01-01 | 3186.0 | Friday | January | _en. | English | 2014_FIFA_World_Cup_en.wikipedia | desktop_all-agents | Desktop |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
548995 | Zac_Efron_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 13146.0 | Saturday | December | _en. | English | Zac_Efron_en.wikipedia | all-access_all-agents | All Access |
548996 | Zayn_Malik_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 12597.0 | Saturday | December | _en. | English | Zayn_Malik_en.wikipedia | all-access_all-agents | All Access |
548997 | Zendaya_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 6217.0 | Saturday | December | _en. | English | Zendaya_en.wikipedia | all-access_all-agents | All Access |
548998 | Zendaya_en.wikipedia.org_mobile-web_all-agents | 2016-12-31 | 4650.0 | Saturday | December | _en. | English | Zendaya_en.wikipedia | mobile-web_all-agents | Mobile |
548999 | Zodiac_Killer_en.wikipedia.org_all-access_all-... | 2016-12-31 | 6659.0 | Saturday | December | _en. | English | Zodiac_Killer_en.wikipedia | all-access_all-agents | All Access |
542778 rows × 10 columns
## Creating a new index with ordered sequence
wikipedia_all = only_wikipedia.reset_index()
wikipedia_all
index | Page | Date | Visits | Weekday | Month | Language_Codes | Language_Names | Title | Device | Device_Type | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | .xxx_en.wikipedia.org_all-access_all-agents | 2016-01-01 | 7089.0 | Friday | January | _en. | English | .xxx_en.wikipedia | all-access_all-agents | All Access |
1 | 1 | .xxx_en.wikipedia.org_mobile-web_all-agents | 2016-01-01 | 6182.0 | Friday | January | _en. | English | .xxx_en.wikipedia | mobile-web_all-agents | Mobile |
2 | 2 | 1._Juli_de.wikipedia.org_desktop_all-agents | 2016-01-01 | 20.0 | Friday | January | _de. | German | 1._Juli_de.wikipedia | desktop_all-agents | Desktop |
3 | 3 | 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... | 2016-01-01 | 5363.0 | Friday | January | _en. | English | 2014_FIFA_World_Cup_en.wikipedia | all-access_all-agents | All Access |
4 | 4 | 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... | 2016-01-01 | 3186.0 | Friday | January | _en. | English | 2014_FIFA_World_Cup_en.wikipedia | desktop_all-agents | Desktop |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
542773 | 548995 | Zac_Efron_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 13146.0 | Saturday | December | _en. | English | Zac_Efron_en.wikipedia | all-access_all-agents | All Access |
542774 | 548996 | Zayn_Malik_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 12597.0 | Saturday | December | _en. | English | Zayn_Malik_en.wikipedia | all-access_all-agents | All Access |
542775 | 548997 | Zendaya_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 6217.0 | Saturday | December | _en. | English | Zendaya_en.wikipedia | all-access_all-agents | All Access |
542776 | 548998 | Zendaya_en.wikipedia.org_mobile-web_all-agents | 2016-12-31 | 4650.0 | Saturday | December | _en. | English | Zendaya_en.wikipedia | mobile-web_all-agents | Mobile |
542777 | 548999 | Zodiac_Killer_en.wikipedia.org_all-access_all-... | 2016-12-31 | 6659.0 | Saturday | December | _en. | English | Zodiac_Killer_en.wikipedia | all-access_all-agents | All Access |
542778 rows × 11 columns
## Naming the index
wikipedia_all.index.name = 'Row'
wikipedia_all
index | Page | Date | Visits | Weekday | Month | Language_Codes | Language_Names | Title | Device | Device_Type | |
---|---|---|---|---|---|---|---|---|---|---|---|
Row | |||||||||||
0 | 0 | .xxx_en.wikipedia.org_all-access_all-agents | 2016-01-01 | 7089.0 | Friday | January | _en. | English | .xxx_en.wikipedia | all-access_all-agents | All Access |
1 | 1 | .xxx_en.wikipedia.org_mobile-web_all-agents | 2016-01-01 | 6182.0 | Friday | January | _en. | English | .xxx_en.wikipedia | mobile-web_all-agents | Mobile |
2 | 2 | 1._Juli_de.wikipedia.org_desktop_all-agents | 2016-01-01 | 20.0 | Friday | January | _de. | German | 1._Juli_de.wikipedia | desktop_all-agents | Desktop |
3 | 3 | 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... | 2016-01-01 | 5363.0 | Friday | January | _en. | English | 2014_FIFA_World_Cup_en.wikipedia | all-access_all-agents | All Access |
4 | 4 | 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... | 2016-01-01 | 3186.0 | Friday | January | _en. | English | 2014_FIFA_World_Cup_en.wikipedia | desktop_all-agents | Desktop |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
542773 | 548995 | Zac_Efron_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 13146.0 | Saturday | December | _en. | English | Zac_Efron_en.wikipedia | all-access_all-agents | All Access |
542774 | 548996 | Zayn_Malik_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 12597.0 | Saturday | December | _en. | English | Zayn_Malik_en.wikipedia | all-access_all-agents | All Access |
542775 | 548997 | Zendaya_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 6217.0 | Saturday | December | _en. | English | Zendaya_en.wikipedia | all-access_all-agents | All Access |
542776 | 548998 | Zendaya_en.wikipedia.org_mobile-web_all-agents | 2016-12-31 | 4650.0 | Saturday | December | _en. | English | Zendaya_en.wikipedia | mobile-web_all-agents | Mobile |
542777 | 548999 | Zodiac_Killer_en.wikipedia.org_all-access_all-... | 2016-12-31 | 6659.0 | Saturday | December | _en. | English | Zodiac_Killer_en.wikipedia | all-access_all-agents | All Access |
542778 rows × 11 columns
## This confirms the missing values that didnt populate when the language codes were extracted were not wikipedia pages
print(549000 - 542778)
6222
## The number of languages represented with occurrences
Languages = wikipedia_all.groupby(['Language_Names'])['Visits'].sum()
Languages
Language_Names Chinese 2.600405e+08 English 2.404128e+10 French 1.392899e+09 German 2.593808e+09 Japanese 5.279525e+08 Russian 1.889219e+09 Spanish 1.408561e+09 Name: Visits, dtype: float64
## The day of the week most and least popular for visiting wikipedia
Day = wikipedia_all.groupby(['Weekday'])['Visits'].sum().sort_values(ascending=False)
Day
Weekday Monday 4.870715e+09 Tuesday 4.721588e+09 Wednesday 4.641320e+09 Thursday 4.537628e+09 Sunday 4.522586e+09 Saturday 4.442689e+09 Friday 4.377232e+09 Name: Visits, dtype: float64
##Device type used more frequently in visiting wikipedia
Device = wikipedia_all.groupby(['Device_Type'])['Visits'].sum()
Device
Device_Type All Access 1.736010e+10 Desktop 1.015998e+10 Mobile 4.593680e+09 Name: Visits, dtype: float64
## Creating a variable for only wikipedia pages written in English.
En_wikipedia = wiki.query("Language_Names == 'English'")
En_wikipedia
Page | Date | Visits | Weekday | Month | Language_Codes | Language_Names | Title | Device | Device_Type | |
---|---|---|---|---|---|---|---|---|---|---|
0 | .xxx_en.wikipedia.org_all-access_all-agents | 2016-01-01 | 7089.0 | Friday | January | _en. | English | .xxx_en.wikipedia | all-access_all-agents | All Access |
1 | .xxx_en.wikipedia.org_mobile-web_all-agents | 2016-01-01 | 6182.0 | Friday | January | _en. | English | .xxx_en.wikipedia | mobile-web_all-agents | Mobile |
3 | 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... | 2016-01-01 | 5363.0 | Friday | January | _en. | English | 2014_FIFA_World_Cup_en.wikipedia | all-access_all-agents | All Access |
4 | 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... | 2016-01-01 | 3186.0 | Friday | January | _en. | English | 2014_FIFA_World_Cup_en.wikipedia | desktop_all-agents | Desktop |
5 | 2014_FIFA_World_Cup_en.wikipedia.org_mobile-we... | 2016-01-01 | 2136.0 | Friday | January | _en. | English | 2014_FIFA_World_Cup_en.wikipedia | mobile-web_all-agents | Mobile |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
548995 | Zac_Efron_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 13146.0 | Saturday | December | _en. | English | Zac_Efron_en.wikipedia | all-access_all-agents | All Access |
548996 | Zayn_Malik_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 12597.0 | Saturday | December | _en. | English | Zayn_Malik_en.wikipedia | all-access_all-agents | All Access |
548997 | Zendaya_en.wikipedia.org_all-access_all-agents | 2016-12-31 | 6217.0 | Saturday | December | _en. | English | Zendaya_en.wikipedia | all-access_all-agents | All Access |
548998 | Zendaya_en.wikipedia.org_mobile-web_all-agents | 2016-12-31 | 4650.0 | Saturday | December | _en. | English | Zendaya_en.wikipedia | mobile-web_all-agents | Mobile |
548999 | Zodiac_Killer_en.wikipedia.org_all-access_all-... | 2016-12-31 | 6659.0 | Saturday | December | _en. | English | Zodiac_Killer_en.wikipedia | all-access_all-agents | All Access |
418338 rows × 10 columns
## Trending search topics on Januray 1, 2016
## First filter out new years,then group the title column to return the sum of visits per group
new_year_visits = En_wikipedia.query("Date == '2016-01-01'").groupby(['Title'])['Visits'].sum().sort_values(ascending=False)
new_year_visits
Title Main_Page_en.wikipedia 32579831.0 Special:Search_en.wikipedia 3588408.0 Special:Book_en.wikipedia 963706.0 Star_Wars:_The_Force_Awakens_en.wikipedia 475826.0 Star_Wars_en.wikipedia 212609.0 ... Eduardo_Vargas_en.wikipedia 807.0 Eugenie_Bouchard_en.wikipedia 728.0 Missy_Franklin_en.wikipedia 431.0 Nick_Kyrgios_en.wikipedia 272.0 Canada_Day_en.wikipedia 169.0 Name: Visits, Length: 757, dtype: float64
## Trending search topics on November 8, 2016
## First filter out 8th November,then group the title column to return the sum of visits per group
November8 = En_wikipedia.query("Date == '2016-11-8'").groupby(['Title'])['Visits'].sum().sort_values(ascending=False)
November8
Title Main_Page_en.wikipedia 49232448.0 Special:Search_en.wikipedia 4493900.0 United_States_presidential_election,_2016_en.wikipedia 1519180.0 Donald_Trump_en.wikipedia 1057298.0 Special:RecentChangesLinked_en.wikipedia 1024401.0 ... Lycos_en.wikipedia 320.0 Missy_Franklin_en.wikipedia 308.0 Nick_Kyrgios_en.wikipedia 176.0 Elena_Delle_Donne_en.wikipedia 160.0 User:GoogleAnalitycsRoman/google-api_en.wikipedia 0.0 Name: Visits, Length: 757, dtype: float64
##Exporting my tables to be visualized
Day.to_csv('Day.csv')
Device.to_csv('Device.csv')
Languages.to_csv('Languages.csv')
new_year_visits.to_csv('new_year_visits.csv')
November8.to_csv('November8.csv')