import time
from datetime import timedelta
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
df= pd.read_csv('./Netflix/CONTENT_INTERACTION/ViewingActivity.csv')
df.rename(columns={'Profile Name':'Profile', 'Device Type':'Device', 'Start Time': 'Start'}, inplace= True)
df.head(5)
Profile | Start | Duration | Attributes | Title | Supplemental Video Type | Device | Bookmark | Latest Bookmark | Country | |
---|---|---|---|---|---|---|---|---|---|---|
0 | Amy | 2022-01-04 16:14:43 | 00:01:05 | Autoplayed: user action: None; | Resurrection: Ertugrul: Season 1: Episode 73 | NaN | FireTV 4K Stick 2018 | 00:10:30 | 00:10:30 | IN (India) |
1 | Amy | 2022-01-04 15:12:24 | 00:18:35 | NaN | Force 2 | NaN | FireTV 4K Stick 2018 | 00:20:47 | 00:20:47 | IN (India) |
2 | Amy | 2022-01-04 15:11:51 | 00:00:09 | Autoplayed: user action: None; | Force 2_hook_primary_16x9 | HOOK | FireTV 4K Stick 2018 | 00:00:09 | 00:00:09 | IN (India) |
3 | Amy | 2022-01-04 14:59:00 | 00:02:47 | NaN | Resurrection: Ertugrul: Season 1: Episode 73 | NaN | FireTV 4K Stick 2018 | 00:09:26 | Not latest view | IN (India) |
4 | Amy | 2022-01-04 14:43:47 | 00:14:37 | NaN | Black Money Love: Season 1: Episode 22 | NaN | FireTV 4K Stick 2018 | 00:21:50 | 00:21:50 | IN (India) |
df['Show']= df.Title.str.split(":|_", expand= True).iloc[:,0]
df.Profile= df.Profile.astype('category')
df.Profile.cat.rename_categories({
"Amy": "Mom",
"Raj": "Me",
"Daddy": "Dad",
"Jania": "Sister",
"Big Mummy": "Grandma"
}, inplace= True)
C:\Users\91962\anaconda3\lib\site-packages\pandas\core\arrays\categorical.py:2630: FutureWarning: The `inplace` parameter in pandas.Categorical.rename_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object. res = method(*args, **kwargs)
def makeDelta(t):
h= t.hour
m= t.minute
s= t.second
return timedelta(hours= int(h), minutes= int(m), seconds= int(s))
df.Duration= pd.to_datetime(df.Duration).apply(lambda x: makeDelta(x.time()))
def agg_func(g):
td= g.sum()
return td/ timedelta(days=1)
timeSpent= df[['Profile','Duration']].groupby(by= ['Profile']).agg(agg_func)
timeSpent.loc['Total','Duration']= timeSpent.Duration.sum()
timeSpent.Duration= timeSpent.Duration.apply(lambda x: round(x,1))
timeSpent
Duration | |
---|---|
Profile | |
Mom | 236.1 |
Grandma | 83.4 |
Dad | 13.9 |
Sister | 15.2 |
Me | 44.8 |
Total | 393.5 |
fig= px.pie(
data_frame= timeSpent.reset_index()[:-1],
names= 'Profile',
values= 'Duration',
color_discrete_sequence= px.colors.sequential.Blues_r,
hover_data= ['Duration'],
hover_name= 'Profile'
)
fig.update_traces(
hovertemplate= "<b>%{label}</b><br>%{value} days <extra></extra>"
)
fig.update_layout(
title= go.layout.Title(
text= "<b>Total hours of Netflix consumed by Family",
x= 0.5,
font= {"size":20}
),
legend= dict(
title= 'Netflix profiles',
x= 0.8,
y= 0.55
)
)
fig.show()
def agg_func(g):
td= g.sum()
return td
titleWatched= df[['Show', 'Profile', 'Duration']].groupby(by= ['Profile', 'Show']).agg(agg_func)
titleWatched= titleWatched.reset_index()
titleWatched.Duration= round(titleWatched.Duration/ np.timedelta64(1, 'h'),2)
titleWatched
Profile | Show | Duration | |
---|---|---|---|
0 | Mom | NaN | |
1 | Mom | #Alive | 0.02 |
2 | Mom | #Anne Frank - Parallel Stories | 0.00 |
3 | Mom | #AnneFrank - Parallel Stories | 0.31 |
4 | Mom | #realityhigh | 0.23 |
... | ... | ... | ... |
13625 | Me | ¿Quién mató a Sara? | NaN |
13626 | Me | İstanbul Kırmızısı | NaN |
13627 | Me | Şubat | NaN |
13628 | Me | Mayurakshi | NaN |
13629 | Me | 제8일의 밤 | 0.00 |
13630 rows × 3 columns
fig= px.bar(
data_frame= titleWatched[titleWatched.Profile == 'Me'].sort_values(by= 'Duration', ascending= False)[:10],
x= 'Show',
y= 'Duration',
color_discrete_sequence= px.colors.sequential.Blues_r
)
fig.update_layout(
title= go.layout.Title(
text= "<b>Top 10 Netflix shows watched by Me (By Duration)",
x= 0.5
)
)
fig.update_traces(
hovertemplate= "<b>%{x}</b><br>%{y} hours"
)
fig.update_xaxes(
tickprefix= "<b>",
title= ""
)
fig.update_yaxes(
title= "<b>Number of hours",
tickprefix= "<b>"
)
fig.show()
fig= px.bar(
data_frame= titleWatched[titleWatched.Profile == 'Dad'].sort_values(by= 'Duration', ascending= False)[:10],
x= 'Show',
y= 'Duration',
color_discrete_sequence= px.colors.sequential.Blues_r
)
fig.update_layout(
title= go.layout.Title(
text= "<b>Top 10 Netflix shows watched by my Dad (By Duration)",
x= 0.5
)
)
fig.update_traces(
hovertemplate= "<b>%{x}</b><br>%{y} hours"
)
fig.update_xaxes(
tickprefix= "<b>",
title= ""
)
fig.update_yaxes(
title= "<b>Number of hours",
tickprefix= "<b>"
)
fig.show()
fig= px.bar(
data_frame= titleWatched[titleWatched.Profile == 'Mom'].sort_values(by= 'Duration', ascending= False)[:10],
x= 'Show',
y= 'Duration',
color_discrete_sequence= px.colors.sequential.Blues_r
)
fig.update_layout(
title= go.layout.Title(
text= "<b>Top 10 Netflix shows watched by my Mom (By Duration)",
x= 0.5
)
)
fig.update_traces(
hovertemplate= "<b>%{x}</b><br>%{y} hours"
)
fig.update_xaxes(
tickprefix= "<b>",
title= ""
)
fig.update_yaxes(
title= "<b>Number of hours",
tickprefix= "<b>"
)
fig.show()
fig= px.bar(
data_frame= titleWatched[titleWatched.Profile == 'Sister'].sort_values(by= 'Duration', ascending= False)[:10],
x= 'Show',
y= 'Duration',
color_discrete_sequence= px.colors.sequential.Blues_r
)
fig.update_layout(
title= go.layout.Title(
text= "<b>Top 10 Netflix shows watched by my Sister (By Duration)",
x= 0.5
)
)
fig.update_traces(
hovertemplate= "<b>%{x}</b><br>%{y} hours"
)
fig.update_xaxes(
tickprefix= "<b>",
title= ""
)
fig.update_yaxes(
title= "<b>Number of hours",
tickprefix= "<b>"
)
fig.show()
deviceUsed= df[['Device', 'Duration']].groupby(by= ['Device']).agg(agg_func)
deviceUsed= deviceUsed.reset_index()
deviceUsed.Duration= round(deviceUsed.Duration/ np.timedelta64(1, 'h'),2)
deviceUsed.sort_values(by= 'Duration', ascending= False)[:10]
Device | Duration | |
---|---|---|
15 | FireTV 4K Stick 2018 | 2775.23 |
18 | Hisense MTK5657 on UHD platform Smart TV | 2035.00 |
16 | FireTV Stick 2016 | 1379.63 |
4 | Apple Apple TV 4 Apple TV | 937.93 |
1 | Android DefaultWidevineL3Phone Android Phone | 895.69 |
14 | Edge OSS - Windows (Cadmium) | 336.33 |
20 | Netflix Windows App - Cadmium Windows Mobile | 299.70 |
8 | Apple iPhone 6 | 244.71 |
9 | Apple iPhone 7 (GSM) | 118.25 |
19 | Netflix Linux Firefox Other | 116.10 |
df.Start= pd.to_datetime(df.Start, format= "%Y-%m-%d")
df['Year'] = df.Start.apply(lambda x: x.year)
def agg_func(g):
td= g.sum()
return round(td/ timedelta(days=1),1)
yearlySpend= df[['Year', 'Duration']].groupby(by= ['Year']).agg(agg_func)
yearlySpend.loc['total','Duration']= yearlySpend.Duration.sum()
yearlySpend.reset_index(inplace= True)
yearlySpend
Year | Duration | |
---|---|---|
0 | 2017 | 45.4 |
1 | 2018 | 82.2 |
2 | 2019 | 78.0 |
3 | 2020 | 86.2 |
4 | 2021 | 100.6 |
5 | 2022 | 1.1 |
6 | total | 393.5 |
fig= px.bar(
data_frame= yearlySpend.iloc[:-2],
x= 'Year',
y= 'Duration',
color_discrete_sequence= px.colors.sequential.Blues_r
)
fig.update_layout(
title= go.layout.Title(
text= "<b>Total hours of Netflix consumed per year (By Duration)",
x= 0.5
)
)
fig.update_traces(
hovertemplate= "<b>%{x}</b><br>%{y} hours"
)
fig.update_xaxes(
tickprefix= "<b>",
title= "<b>Year"
)
fig.update_yaxes(
title= "<b>Number of hours",
tickprefix= "<b>"
)
fig.show()