# Python libs
import json
from collections import Counter
import numpy as np
import pandas as pd
from dython.nominal import cramers_v, theils_u, correlation_ratio
from scipy.stats import randint
# Date/time/timezone
import datetime as dt
import pytz
from geopy.geocoders import Nominatim
from timezonefinder import TimezoneFinder
# scikit-learn
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.compose import ColumnTransformer
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.feature_extraction.text import TfidfTransformer, TfidfVectorizer
from sklearn.model_selection import StratifiedShuffleSplit, StratifiedKFold
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import f1_score, roc_auc_score, balanced_accuracy_score
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
# Visualization
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
# Module settings
mpl.rc("figure", facecolor="white", dpi=144)
pd.set_option('expand_frame_repr', False) # display dataframe without wrapping
!head -n 50 dataset.json
[ { "browser": "Firefox", "os": "Ubuntu", "locale": "ru_RU", "user_id": 105, "gender": "m", "location": "USA/Chicago", "sites": [ { "site": "mail.google.com", "length": 50 }, { "site": "toptal.com", "length": 132 }, { "site": "slack.com", "length": 65 }, { "site": "lenta.ru", "length": 59 }, { "site": "youtube.com", "length": 67 }, { "site": "chitay-knigi.ru", "length": 108 } ], "time": "09:03:00", "date": "2017-01-08" }, { "browser": "Firefox", "os": "Windows 8", "locale": "pl-PL", "user_id": 11, "gender": "m", "location": "USA/Chicago", "sites": [ { "site": "meduza.org", "length": 40 }, {
!head -n 50 test.json
[ { "browser": "Chrome", "os": "Ubuntu", "locale": "ru_RU", "gender": "m", "location": "Canada/Toronto", "sites": [ { "site": "mail.google.com", "length": 383 }, { "site": "vk.com", "length": 108 }, { "site": "toptal.com", "length": 138 }, { "site": "lenta.ru", "length": 47 }, { "site": "slack.com", "length": 93 }, { "site": "gazzettaobjects.it", "length": 92 }, { "site": "youtube.com", "length": 48 } ], "time": "16:59:00", "date": "2017-09-05" }, { "browser": "Chrome", "os": "Ubuntu", "locale": "ru_RU", "gender": "m", "location": "Canada/Toronto", "sites": [ { "site": "mail.google.com", "length": 59
with open('dataset.json', 'r') as f:
data_json_struct = json.loads(f.read())
user_sessions = pd.DataFrame(data_json_struct)
print(data_json_struct[0])
{'browser': 'Firefox', 'os': 'Ubuntu', 'locale': 'ru_RU', 'user_id': 105, 'gender': 'm', 'location': 'USA/Chicago', 'sites': [{'site': 'mail.google.com', 'length': 50}, {'site': 'toptal.com', 'length': 132}, {'site': 'slack.com', 'length': 65}, {'site': 'lenta.ru', 'length': 59}, {'site': 'youtube.com', 'length': 67}, {'site': 'chitay-knigi.ru', 'length': 108}], 'time': '09:03:00', 'date': '2017-01-08'}
user_sessions.head(20)
print('\n')
user_sessions.info()
browser | os | locale | user_id | gender | location | sites | time | date | |
---|---|---|---|---|---|---|---|---|---|
0 | Firefox | Ubuntu | ru_RU | 105 | m | USA/Chicago | [{'site': 'mail.google.com', 'length': 50}, {'... | 09:03:00 | 2017-01-08 |
1 | Firefox | Windows 8 | pl-PL | 11 | m | USA/Chicago | [{'site': 'meduza.org', 'length': 40}, {'site'... | 13:57:00 | 2016-10-05 |
2 | Chrome | Ubuntu | zh-CN | 17 | m | Singapore/Singapore | [{'site': 'facebook.net', 'length': 74}, {'sit... | 02:06:00 | 2017-03-28 |
3 | Chrome | Windows 10 | pt-BR | 134 | f | Australia/Sydney | [{'site': 'verisign.com', 'length': 111}, {'si... | 21:49:00 | 2017-06-25 |
4 | Firefox | Windows 10 | en-SG | 92 | f | USA/Chicago | [{'site': 'live.com', 'length': 79}, {'site': ... | 00:05:00 | 2016-02-10 |
5 | Internet Explorer | Windows 8 | xh-ZA | 120 | m | France/Paris | [{'site': 'cnn.com', 'length': 65}, {'site': '... | 14:55:00 | 2017-03-28 |
6 | Chrome | Ubuntu | ja-JP | 158 | f | Germany/Berlin | [{'site': 'toptal.com', 'length': 59}, {'site'... | 21:26:00 | 2017-08-14 |
7 | Chrome | Windows 10 | en-NZ | 34 | m | USA/Chicago | [{'site': 'google.com', 'length': 86}, {'site'... | 23:00:00 | 2016-02-19 |
8 | Internet Explorer | Windows 7 | en-CA | 173 | m | USA/San Francisco | [{'site': 'booking.com', 'length': 93}, {'site... | 17:08:00 | 2017-01-04 |
9 | Safari | MacOS | zh-CN | 51 | m | Australia/Sydney | [{'site': 'bing.com', 'length': 166}, {'site':... | 22:31:00 | 2016-01-20 |
10 | Chrome | Windows 8 | pt-BR | 46 | f | China/Shanghai | [{'site': 'slack.com', 'length': 64}, {'site':... | 11:06:00 | 2016-11-26 |
11 | Safari | MacOS | xh-ZA | 182 | f | New Zealand/Auckland | [{'site': 'googleapis.com', 'length': 60}, {'s... | 21:35:00 | 2016-07-15 |
12 | Chrome | Windows 10 | en-CA | 130 | m | New Zealand/Auckland | [{'site': 'booking.com', 'length': 51}, {'site... | 17:44:00 | 2017-05-27 |
13 | Safari | MacOS | ur-PK | 77 | f | Malaysia/Kuala Lumpur | [{'site': 'verisign.com', 'length': 190}, {'si... | 10:33:00 | 2017-01-22 |
14 | Chrome | Windows 10 | nl-NL | 91 | m | USA/New York | [{'site': 'googlevideo.com', 'length': 98}, {'... | 11:32:00 | 2016-04-28 |
15 | Chrome | Windows 10 | bg-BG | 121 | m | Malaysia/Kuala Lumpur | [{'site': 'youtube.com', 'length': 173}, {'sit... | 01:48:00 | 2017-04-17 |
16 | Firefox | Windows 7 | en-SG | 26 | m | Russia/Moscow | [{'site': 'googleapis.com', 'length': 96}, {'s... | 16:17:00 | 2017-01-22 |
17 | Chrome | Windows 10 | ro-RO | 188 | f | Russia/Moscow | [{'site': 'facebook.com', 'length': 392}, {'si... | 08:27:00 | 2017-08-12 |
18 | Firefox | Windows 7 | uk-UA | 176 | f | New Zealand/Auckland | [{'site': 'youtube.com', 'length': 82}, {'site... | 00:00:00 | 2016-06-16 |
19 | Safari | MacOS | fr-FR | 136 | f | Brazil/Rio de Janeiro | [{'site': 'baidu.com', 'length': 240}, {'site'... | 13:13:00 | 2017-06-10 |
<class 'pandas.core.frame.DataFrame'> RangeIndex: 80000 entries, 0 to 79999 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 browser 80000 non-null object 1 os 80000 non-null object 2 locale 80000 non-null object 3 user_id 80000 non-null int64 4 gender 80000 non-null object 5 location 80000 non-null object 6 sites 80000 non-null object 7 time 80000 non-null object 8 date 80000 non-null object dtypes: int64(1), object(8) memory usage: 5.5+ MB
user_sessions.query("sites.str.len() == 0")
browser | os | locale | user_id | gender | location | sites | time | date | |
---|---|---|---|---|---|---|---|---|---|
312 | Chrome | Windows 7 | uk-UA | 12 | m | Canada/Vancouver | [] | 17:45:00 | 2017-08-12 |
604 | Firefox | Windows 10 | en-GB | 78 | f | Japan/Tokyo | [] | 08:36:00 | 2016-11-01 |
821 | Firefox | Windows 8 | pt-BR | 172 | m | China/Shanghai | [] | 05:48:00 | 2016-10-13 |
945 | Chrome | Windows 8 | en-AU | 108 | m | France/Paris | [] | 11:24:00 | 2016-10-14 |
1073 | Internet Explorer | Windows 10 | nl-NL | 75 | m | Germany/Berlin | [] | 01:57:00 | 2016-06-12 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
78883 | Chrome | Windows 10 | vi-VN | 93 | m | Singapore/Singapore | [] | 12:10:00 | 2016-03-20 |
79149 | Safari | MacOS | it-IT | 32 | m | Netherlands/Amsterdam | [] | 13:03:00 | 2016-11-23 |
79603 | Firefox | Windows 8 | pt-BR | 172 | m | China/Shanghai | [] | 05:28:00 | 2016-08-12 |
79903 | Safari | MacOS | nl-NL | 197 | m | Canada/Toronto | [] | 02:37:00 | 2016-12-09 |
79904 | Chrome | Windows 8 | en-AU | 108 | m | France/Paris | [] | 14:45:00 | 2017-01-27 |
393 rows × 9 columns
empty_sites_index = user_sessions.query("sites.str.len() == 0").index
user_sessions.loc[empty_sites_index, 'sites'] = user_sessions.loc[empty_sites_index]['sites'].apply(lambda sites: sites + [{'site': 'NONE.NONE', 'length': 0}])
user_sessions.loc[empty_sites_index]
browser | os | locale | user_id | gender | location | sites | time | date | |
---|---|---|---|---|---|---|---|---|---|
312 | Chrome | Windows 7 | uk-UA | 12 | m | Canada/Vancouver | [{'site': 'NONE.NONE', 'length': 0}] | 17:45:00 | 2017-08-12 |
604 | Firefox | Windows 10 | en-GB | 78 | f | Japan/Tokyo | [{'site': 'NONE.NONE', 'length': 0}] | 08:36:00 | 2016-11-01 |
821 | Firefox | Windows 8 | pt-BR | 172 | m | China/Shanghai | [{'site': 'NONE.NONE', 'length': 0}] | 05:48:00 | 2016-10-13 |
945 | Chrome | Windows 8 | en-AU | 108 | m | France/Paris | [{'site': 'NONE.NONE', 'length': 0}] | 11:24:00 | 2016-10-14 |
1073 | Internet Explorer | Windows 10 | nl-NL | 75 | m | Germany/Berlin | [{'site': 'NONE.NONE', 'length': 0}] | 01:57:00 | 2016-06-12 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
78883 | Chrome | Windows 10 | vi-VN | 93 | m | Singapore/Singapore | [{'site': 'NONE.NONE', 'length': 0}] | 12:10:00 | 2016-03-20 |
79149 | Safari | MacOS | it-IT | 32 | m | Netherlands/Amsterdam | [{'site': 'NONE.NONE', 'length': 0}] | 13:03:00 | 2016-11-23 |
79603 | Firefox | Windows 8 | pt-BR | 172 | m | China/Shanghai | [{'site': 'NONE.NONE', 'length': 0}] | 05:28:00 | 2016-08-12 |
79903 | Safari | MacOS | nl-NL | 197 | m | Canada/Toronto | [{'site': 'NONE.NONE', 'length': 0}] | 02:37:00 | 2016-12-09 |
79904 | Chrome | Windows 8 | en-AU | 108 | m | France/Paris | [{'site': 'NONE.NONE', 'length': 0}] | 14:45:00 | 2017-01-27 |
393 rows × 9 columns
user_sessions['start_dt'] = pd.to_datetime(user_sessions['date'] + ' ' + user_sessions['time'], utc=True)
user_sessions.drop(['time', 'date'], axis=1, inplace=True)
user_sessions
browser | os | locale | user_id | gender | location | sites | start_dt | |
---|---|---|---|---|---|---|---|---|
0 | Firefox | Ubuntu | ru_RU | 105 | m | USA/Chicago | [{'site': 'mail.google.com', 'length': 50}, {'... | 2017-01-08 09:03:00+00:00 |
1 | Firefox | Windows 8 | pl-PL | 11 | m | USA/Chicago | [{'site': 'meduza.org', 'length': 40}, {'site'... | 2016-10-05 13:57:00+00:00 |
2 | Chrome | Ubuntu | zh-CN | 17 | m | Singapore/Singapore | [{'site': 'facebook.net', 'length': 74}, {'sit... | 2017-03-28 02:06:00+00:00 |
3 | Chrome | Windows 10 | pt-BR | 134 | f | Australia/Sydney | [{'site': 'verisign.com', 'length': 111}, {'si... | 2017-06-25 21:49:00+00:00 |
4 | Firefox | Windows 10 | en-SG | 92 | f | USA/Chicago | [{'site': 'live.com', 'length': 79}, {'site': ... | 2016-02-10 00:05:00+00:00 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
79995 | Chrome | Windows 10 | pt-PT | 178 | m | New Zealand/Auckland | [{'site': 'vk.com', 'length': 126}, {'site': '... | 2016-12-30 06:02:00+00:00 |
79996 | Safari | MacOS | it-IT | 32 | m | Netherlands/Amsterdam | [{'site': 'slack.com', 'length': 74}, {'site':... | 2017-01-27 10:27:00+00:00 |
79997 | Firefox | Ubuntu | ru_RU | 0 | m | USA/Chicago | [{'site': 'vk.com', 'length': 44}, {'site': 's... | 2017-03-13 17:55:00+00:00 |
79998 | Firefox | Windows 10 | ru_RU | 56 | m | France/Paris | [{'site': 'lenta.ru', 'length': 82}, {'site': ... | 2016-12-06 14:17:00+00:00 |
79999 | Firefox | Windows 10 | pt-BR | 113 | f | New Zealand/Auckland | [{'site': 'baidu.com', 'length': 60}, {'site':... | 2016-02-18 06:57:00+00:00 |
80000 rows × 8 columns
class TimezoneByCity:
def __init__(self):
self.geolocator = Nominatim(user_agent="geoapiExercises")
self.tzfinder = TimezoneFinder()
def tz_name(self, city: str):
loc = self.geolocator.geocode(city)
tz_name = self.tzfinder.timezone_at(lng=loc.longitude, lat=loc.latitude)
return tz_name
def tz(self, city: str):
tz_name = self.tz_name(city)
return pytz.timezone(tz_name)
tz_by_city = TimezoneByCity()
timezone_tbl = {loc: tz_by_city.tz_name(loc.split('/')[1]) for loc in user_sessions.location.unique()}
print(timezone_tbl)
{'USA/Chicago': 'America/Chicago', 'Singapore/Singapore': 'Asia/Singapore', 'Australia/Sydney': 'Australia/Sydney', 'France/Paris': 'Europe/Paris', 'Germany/Berlin': 'Europe/Berlin', 'USA/San Francisco': 'America/Los_Angeles', 'China/Shanghai': 'Asia/Shanghai', 'New Zealand/Auckland': 'Pacific/Auckland', 'Malaysia/Kuala Lumpur': 'Asia/Kuala_Lumpur', 'USA/New York': 'America/New_York', 'Russia/Moscow': 'Europe/Moscow', 'Brazil/Rio de Janeiro': 'America/Sao_Paulo', 'Canada/Toronto': 'America/Toronto', 'Spain/Madrid': 'Europe/Madrid', 'USA/Miami': 'America/New_York', 'India/Delhi': 'Asia/Kolkata', 'Netherlands/Amsterdam': 'Europe/Amsterdam', 'UK/London': 'Europe/London', 'Japan/Tokyo': 'Asia/Tokyo', 'Italy/Rome': 'Europe/Rome', 'Canada/Vancouver': 'America/Vancouver'}
user_sessions['local_time'] = user_sessions.apply(lambda row: row['start_dt'].tz_convert(timezone_tbl[row['location']]).tz_localize(None), axis=1)
user_sessions
browser | os | locale | user_id | gender | location | sites | start_dt | local_time | |
---|---|---|---|---|---|---|---|---|---|
0 | Firefox | Ubuntu | ru_RU | 105 | m | USA/Chicago | [{'site': 'mail.google.com', 'length': 50}, {'... | 2017-01-08 09:03:00+00:00 | 2017-01-08 03:03:00 |
1 | Firefox | Windows 8 | pl-PL | 11 | m | USA/Chicago | [{'site': 'meduza.org', 'length': 40}, {'site'... | 2016-10-05 13:57:00+00:00 | 2016-10-05 08:57:00 |
2 | Chrome | Ubuntu | zh-CN | 17 | m | Singapore/Singapore | [{'site': 'facebook.net', 'length': 74}, {'sit... | 2017-03-28 02:06:00+00:00 | 2017-03-28 10:06:00 |
3 | Chrome | Windows 10 | pt-BR | 134 | f | Australia/Sydney | [{'site': 'verisign.com', 'length': 111}, {'si... | 2017-06-25 21:49:00+00:00 | 2017-06-26 07:49:00 |
4 | Firefox | Windows 10 | en-SG | 92 | f | USA/Chicago | [{'site': 'live.com', 'length': 79}, {'site': ... | 2016-02-10 00:05:00+00:00 | 2016-02-09 18:05:00 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
79995 | Chrome | Windows 10 | pt-PT | 178 | m | New Zealand/Auckland | [{'site': 'vk.com', 'length': 126}, {'site': '... | 2016-12-30 06:02:00+00:00 | 2016-12-30 19:02:00 |
79996 | Safari | MacOS | it-IT | 32 | m | Netherlands/Amsterdam | [{'site': 'slack.com', 'length': 74}, {'site':... | 2017-01-27 10:27:00+00:00 | 2017-01-27 11:27:00 |
79997 | Firefox | Ubuntu | ru_RU | 0 | m | USA/Chicago | [{'site': 'vk.com', 'length': 44}, {'site': 's... | 2017-03-13 17:55:00+00:00 | 2017-03-13 12:55:00 |
79998 | Firefox | Windows 10 | ru_RU | 56 | m | France/Paris | [{'site': 'lenta.ru', 'length': 82}, {'site': ... | 2016-12-06 14:17:00+00:00 | 2016-12-06 15:17:00 |
79999 | Firefox | Windows 10 | pt-BR | 113 | f | New Zealand/Auckland | [{'site': 'baidu.com', 'length': 60}, {'site':... | 2016-02-18 06:57:00+00:00 | 2016-02-18 19:57:00 |
80000 rows × 9 columns
user_sessions["year"] = user_sessions.local_time.dt.year
user_sessions["month"] = user_sessions.local_time.dt.month
user_sessions["day"] = user_sessions.local_time.dt.day
user_sessions["weekday"] = user_sessions.local_time.dt.weekday
user_sessions["start_hour"] = user_sessions.local_time.dt.hour
user_sessions
browser | os | locale | user_id | gender | location | sites | start_dt | local_time | year | month | day | weekday | start_hour | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Firefox | Ubuntu | ru_RU | 105 | m | USA/Chicago | [{'site': 'mail.google.com', 'length': 50}, {'... | 2017-01-08 09:03:00+00:00 | 2017-01-08 03:03:00 | 2017 | 1 | 8 | 6 | 3 |
1 | Firefox | Windows 8 | pl-PL | 11 | m | USA/Chicago | [{'site': 'meduza.org', 'length': 40}, {'site'... | 2016-10-05 13:57:00+00:00 | 2016-10-05 08:57:00 | 2016 | 10 | 5 | 2 | 8 |
2 | Chrome | Ubuntu | zh-CN | 17 | m | Singapore/Singapore | [{'site': 'facebook.net', 'length': 74}, {'sit... | 2017-03-28 02:06:00+00:00 | 2017-03-28 10:06:00 | 2017 | 3 | 28 | 1 | 10 |
3 | Chrome | Windows 10 | pt-BR | 134 | f | Australia/Sydney | [{'site': 'verisign.com', 'length': 111}, {'si... | 2017-06-25 21:49:00+00:00 | 2017-06-26 07:49:00 | 2017 | 6 | 26 | 0 | 7 |
4 | Firefox | Windows 10 | en-SG | 92 | f | USA/Chicago | [{'site': 'live.com', 'length': 79}, {'site': ... | 2016-02-10 00:05:00+00:00 | 2016-02-09 18:05:00 | 2016 | 2 | 9 | 1 | 18 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
79995 | Chrome | Windows 10 | pt-PT | 178 | m | New Zealand/Auckland | [{'site': 'vk.com', 'length': 126}, {'site': '... | 2016-12-30 06:02:00+00:00 | 2016-12-30 19:02:00 | 2016 | 12 | 30 | 4 | 19 |
79996 | Safari | MacOS | it-IT | 32 | m | Netherlands/Amsterdam | [{'site': 'slack.com', 'length': 74}, {'site':... | 2017-01-27 10:27:00+00:00 | 2017-01-27 11:27:00 | 2017 | 1 | 27 | 4 | 11 |
79997 | Firefox | Ubuntu | ru_RU | 0 | m | USA/Chicago | [{'site': 'vk.com', 'length': 44}, {'site': 's... | 2017-03-13 17:55:00+00:00 | 2017-03-13 12:55:00 | 2017 | 3 | 13 | 0 | 12 |
79998 | Firefox | Windows 10 | ru_RU | 56 | m | France/Paris | [{'site': 'lenta.ru', 'length': 82}, {'site': ... | 2016-12-06 14:17:00+00:00 | 2016-12-06 15:17:00 | 2016 | 12 | 6 | 1 | 15 |
79999 | Firefox | Windows 10 | pt-BR | 113 | f | New Zealand/Auckland | [{'site': 'baidu.com', 'length': 60}, {'site':... | 2016-02-18 06:57:00+00:00 | 2016-02-18 19:57:00 | 2016 | 2 | 18 | 3 | 19 |
80000 rows × 14 columns
start_dt_normalized = (user_sessions['local_time'] - user_sessions['local_time'].dt.normalize()) / pd.Timedelta('1 second') / 86400
user_sessions['start_sin'] = np.sin(2*np.pi* (start_dt_normalized))
user_sessions['start_cos'] = np.cos(2*np.pi* (start_dt_normalized))
user_sessions
browser | os | locale | user_id | gender | location | sites | start_dt | local_time | year | month | day | weekday | start_hour | start_sin | start_cos | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Firefox | Ubuntu | ru_RU | 105 | m | USA/Chicago | [{'site': 'mail.google.com', 'length': 50}, {'... | 2017-01-08 09:03:00+00:00 | 2017-01-08 03:03:00 | 2017 | 1 | 8 | 6 | 3 | 0.716302 | 0.697790 |
1 | Firefox | Windows 8 | pl-PL | 11 | m | USA/Chicago | [{'site': 'meduza.org', 'length': 40}, {'site'... | 2016-10-05 13:57:00+00:00 | 2016-10-05 08:57:00 | 2016 | 10 | 5 | 2 | 8 | 0.716302 | -0.697790 |
2 | Chrome | Ubuntu | zh-CN | 17 | m | Singapore/Singapore | [{'site': 'facebook.net', 'length': 74}, {'sit... | 2017-03-28 02:06:00+00:00 | 2017-03-28 10:06:00 | 2017 | 3 | 28 | 1 | 10 | 0.477159 | -0.878817 |
3 | Chrome | Windows 10 | pt-BR | 134 | f | Australia/Sydney | [{'site': 'verisign.com', 'length': 111}, {'si... | 2017-06-25 21:49:00+00:00 | 2017-06-26 07:49:00 | 2017 | 6 | 26 | 0 | 7 | 0.889017 | -0.457874 |
4 | Firefox | Windows 10 | en-SG | 92 | f | USA/Chicago | [{'site': 'live.com', 'length': 79}, {'site': ... | 2016-02-10 00:05:00+00:00 | 2016-02-09 18:05:00 | 2016 | 2 | 9 | 1 | 18 | -0.999762 | 0.021815 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
79995 | Chrome | Windows 10 | pt-PT | 178 | m | New Zealand/Auckland | [{'site': 'vk.com', 'length': 126}, {'site': '... | 2016-12-30 06:02:00+00:00 | 2016-12-30 19:02:00 | 2016 | 12 | 30 | 4 | 19 | -0.963630 | 0.267238 |
79996 | Safari | MacOS | it-IT | 32 | m | Netherlands/Amsterdam | [{'site': 'slack.com', 'length': 74}, {'site':... | 2017-01-27 10:27:00+00:00 | 2017-01-27 11:27:00 | 2017 | 1 | 27 | 4 | 11 | 0.143493 | -0.989651 |
79997 | Firefox | Ubuntu | ru_RU | 0 | m | USA/Chicago | [{'site': 'vk.com', 'length': 44}, {'site': 's... | 2017-03-13 17:55:00+00:00 | 2017-03-13 12:55:00 | 2017 | 3 | 13 | 0 | 12 | -0.237686 | -0.971342 |
79998 | Firefox | Windows 10 | ru_RU | 56 | m | France/Paris | [{'site': 'lenta.ru', 'length': 82}, {'site': ... | 2016-12-06 14:17:00+00:00 | 2016-12-06 15:17:00 | 2016 | 12 | 6 | 1 | 15 | -0.757565 | -0.652760 |
79999 | Firefox | Windows 10 | pt-BR | 113 | f | New Zealand/Auckland | [{'site': 'baidu.com', 'length': 60}, {'site':... | 2016-02-18 06:57:00+00:00 | 2016-02-18 19:57:00 | 2016 | 2 | 18 | 3 | 19 | -0.872496 | 0.488621 |
80000 rows × 16 columns
user_sessions[['country', 'city']] = user_sessions['location'].str.split('/', expand=True)
user_sessions
browser | os | locale | user_id | gender | location | sites | start_dt | local_time | year | month | day | weekday | start_hour | start_sin | start_cos | country | city | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Firefox | Ubuntu | ru_RU | 105 | m | USA/Chicago | [{'site': 'mail.google.com', 'length': 50}, {'... | 2017-01-08 09:03:00+00:00 | 2017-01-08 03:03:00 | 2017 | 1 | 8 | 6 | 3 | 0.716302 | 0.697790 | USA | Chicago |
1 | Firefox | Windows 8 | pl-PL | 11 | m | USA/Chicago | [{'site': 'meduza.org', 'length': 40}, {'site'... | 2016-10-05 13:57:00+00:00 | 2016-10-05 08:57:00 | 2016 | 10 | 5 | 2 | 8 | 0.716302 | -0.697790 | USA | Chicago |
2 | Chrome | Ubuntu | zh-CN | 17 | m | Singapore/Singapore | [{'site': 'facebook.net', 'length': 74}, {'sit... | 2017-03-28 02:06:00+00:00 | 2017-03-28 10:06:00 | 2017 | 3 | 28 | 1 | 10 | 0.477159 | -0.878817 | Singapore | Singapore |
3 | Chrome | Windows 10 | pt-BR | 134 | f | Australia/Sydney | [{'site': 'verisign.com', 'length': 111}, {'si... | 2017-06-25 21:49:00+00:00 | 2017-06-26 07:49:00 | 2017 | 6 | 26 | 0 | 7 | 0.889017 | -0.457874 | Australia | Sydney |
4 | Firefox | Windows 10 | en-SG | 92 | f | USA/Chicago | [{'site': 'live.com', 'length': 79}, {'site': ... | 2016-02-10 00:05:00+00:00 | 2016-02-09 18:05:00 | 2016 | 2 | 9 | 1 | 18 | -0.999762 | 0.021815 | USA | Chicago |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
79995 | Chrome | Windows 10 | pt-PT | 178 | m | New Zealand/Auckland | [{'site': 'vk.com', 'length': 126}, {'site': '... | 2016-12-30 06:02:00+00:00 | 2016-12-30 19:02:00 | 2016 | 12 | 30 | 4 | 19 | -0.963630 | 0.267238 | New Zealand | Auckland |
79996 | Safari | MacOS | it-IT | 32 | m | Netherlands/Amsterdam | [{'site': 'slack.com', 'length': 74}, {'site':... | 2017-01-27 10:27:00+00:00 | 2017-01-27 11:27:00 | 2017 | 1 | 27 | 4 | 11 | 0.143493 | -0.989651 | Netherlands | Amsterdam |
79997 | Firefox | Ubuntu | ru_RU | 0 | m | USA/Chicago | [{'site': 'vk.com', 'length': 44}, {'site': 's... | 2017-03-13 17:55:00+00:00 | 2017-03-13 12:55:00 | 2017 | 3 | 13 | 0 | 12 | -0.237686 | -0.971342 | USA | Chicago |
79998 | Firefox | Windows 10 | ru_RU | 56 | m | France/Paris | [{'site': 'lenta.ru', 'length': 82}, {'site': ... | 2016-12-06 14:17:00+00:00 | 2016-12-06 15:17:00 | 2016 | 12 | 6 | 1 | 15 | -0.757565 | -0.652760 | France | Paris |
79999 | Firefox | Windows 10 | pt-BR | 113 | f | New Zealand/Auckland | [{'site': 'baidu.com', 'length': 60}, {'site':... | 2016-02-18 06:57:00+00:00 | 2016-02-18 19:57:00 | 2016 | 2 | 18 | 3 | 19 | -0.872496 | 0.488621 | New Zealand | Auckland |
80000 rows × 18 columns
user_sessions['length_session'] = user_sessions['sites'].apply(lambda session_sites: sum(site_entry['length'] for site_entry in session_sites))
user_sessions
browser | os | locale | user_id | gender | location | sites | start_dt | local_time | year | month | day | weekday | start_hour | start_sin | start_cos | country | city | length_session | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Firefox | Ubuntu | ru_RU | 105 | m | USA/Chicago | [{'site': 'mail.google.com', 'length': 50}, {'... | 2017-01-08 09:03:00+00:00 | 2017-01-08 03:03:00 | 2017 | 1 | 8 | 6 | 3 | 0.716302 | 0.697790 | USA | Chicago | 481 |
1 | Firefox | Windows 8 | pl-PL | 11 | m | USA/Chicago | [{'site': 'meduza.org', 'length': 40}, {'site'... | 2016-10-05 13:57:00+00:00 | 2016-10-05 08:57:00 | 2016 | 10 | 5 | 2 | 8 | 0.716302 | -0.697790 | USA | Chicago | 1076 |
2 | Chrome | Ubuntu | zh-CN | 17 | m | Singapore/Singapore | [{'site': 'facebook.net', 'length': 74}, {'sit... | 2017-03-28 02:06:00+00:00 | 2017-03-28 10:06:00 | 2017 | 3 | 28 | 1 | 10 | 0.477159 | -0.878817 | Singapore | Singapore | 1280 |
3 | Chrome | Windows 10 | pt-BR | 134 | f | Australia/Sydney | [{'site': 'verisign.com', 'length': 111}, {'si... | 2017-06-25 21:49:00+00:00 | 2017-06-26 07:49:00 | 2017 | 6 | 26 | 0 | 7 | 0.889017 | -0.457874 | Australia | Sydney | 1323 |
4 | Firefox | Windows 10 | en-SG | 92 | f | USA/Chicago | [{'site': 'live.com', 'length': 79}, {'site': ... | 2016-02-10 00:05:00+00:00 | 2016-02-09 18:05:00 | 2016 | 2 | 9 | 1 | 18 | -0.999762 | 0.021815 | USA | Chicago | 224 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
79995 | Chrome | Windows 10 | pt-PT | 178 | m | New Zealand/Auckland | [{'site': 'vk.com', 'length': 126}, {'site': '... | 2016-12-30 06:02:00+00:00 | 2016-12-30 19:02:00 | 2016 | 12 | 30 | 4 | 19 | -0.963630 | 0.267238 | New Zealand | Auckland | 509 |
79996 | Safari | MacOS | it-IT | 32 | m | Netherlands/Amsterdam | [{'site': 'slack.com', 'length': 74}, {'site':... | 2017-01-27 10:27:00+00:00 | 2017-01-27 11:27:00 | 2017 | 1 | 27 | 4 | 11 | 0.143493 | -0.989651 | Netherlands | Amsterdam | 267 |
79997 | Firefox | Ubuntu | ru_RU | 0 | m | USA/Chicago | [{'site': 'vk.com', 'length': 44}, {'site': 's... | 2017-03-13 17:55:00+00:00 | 2017-03-13 12:55:00 | 2017 | 3 | 13 | 0 | 12 | -0.237686 | -0.971342 | USA | Chicago | 698 |
79998 | Firefox | Windows 10 | ru_RU | 56 | m | France/Paris | [{'site': 'lenta.ru', 'length': 82}, {'site': ... | 2016-12-06 14:17:00+00:00 | 2016-12-06 15:17:00 | 2016 | 12 | 6 | 1 | 15 | -0.757565 | -0.652760 | France | Paris | 696 |
79999 | Firefox | Windows 10 | pt-BR | 113 | f | New Zealand/Auckland | [{'site': 'baidu.com', 'length': 60}, {'site':... | 2016-02-18 06:57:00+00:00 | 2016-02-18 19:57:00 | 2016 | 2 | 18 | 3 | 19 | -0.872496 | 0.488621 | New Zealand | Auckland | 1597 |
80000 rows × 19 columns
n_top = 100
joe_cnt = Counter()
for sites_session in user_sessions.query('user_id == 0')['sites']:
for site_entry in sites_session:
joe_cnt.update({site_entry['site']: site_entry['length']})
joe_top_sites, _ = zip(*joe_cnt.most_common(n_top))
print("Total sites joe visited: ", len(joe_cnt))
print(f"Top {n_top} sites joe visited: \n", joe_top_sites[:100])
Total sites joe visited: 1166 Top 100 sites joe visited: ('lenta.ru', 'toptal.com', 'mail.google.com', 'slack.com', 'vk.com', 'youtube.com', 'mairie-gruson.fr', 'tdg.ch', 'smbg.fr', 'multiplayer.com', 'free.fr', 'play3-live.com', '127.107', 'rollingstone.com', 'crous-clermont.fr', 'machine-outil.com', 'starbucks.com', 'arooze.com', 'jeux-mini.com', 'games.la', 'fntp.fr', 'yale.edu', 'ireasoning.com', 'marianne.net', 'doctrine-project.org', 'annonceetudiant.com', 'dico-ecolo.com', 'getadblock.com', 'tecnitude.com', 'alluserpics.com', 'iufm.fr', 'geowiki.fr', 'moonbasa.com', 'loreal-finance.com', 'apogee-systems.com', 'mgc-prevention.fr', 'lacoope.org', 'alexgorbatchev.com', 'bookryanair.com', 'wikio.fr', 'cpubenchmark.net', 'thequestionsnetwork.org', 'seloger.com', 'lektorat.de', 'coza.net', 'linuxplanet.com', 'gagnantduprix.com', 'smart-tribune.com', 'onescreen.net', 'obspm.fr', 'granthweb.com', 'restotel.net', 'citea.info', 'virginmobile.fr', 'imaginetonfutur.com', 'encyclopediadramatica.com', 'joueurdugrenier.fr', 'jobanim.com', 'epresspack-dev.net', 'arrondirmesfinsdemois.com', 'abaenglish.com', 'media-imdb.com', 'copy.com', 'corsematin.com', 'videogamereviewerkid.com', 'eklablog.fr', 'roundcube.net', 'breizh-portal.com', 'wallpaperzet.com', 'lecture-en-ligne.com', 'ruvr.ru', 'covoiturage.fr', 'nordlittoral.fr', 'tu-dresden.de', 'intuitwebsites.com', 'frontierstrategygroup.com', 'instanttimezone.com', 'rive-gauche.fr', 'jobthread.com', '365euros.com', 'cinejaude.fr', 'infolignes.com', 'jminformatique.biz', 'ebmeditions.fr', 'lacoccinelle.net', 'fromquarkstoquasars.com', 'ssbwiki.com', 'likefood.us', 'cicic.ca', 'dalloz.fr', 'edublogawards.com', 'science-et-vie.com', 'chartsinfrance.net', 'yowindow.com', 'itdevspace.com', 'epresse.fr', 'galaxys5.fr', 'biologycorner.com', 'hdslb.com', 'stgbssint.com')
n_top = 100
all_cnt = Counter()
for sites_session in user_sessions['sites']:
for site_entry in sites_session:
all_cnt.update({site_entry['site']: site_entry['length']})
all_top_sites, _ = zip(*all_cnt.most_common(n_top))
print("Total sites all users visited: ", len(all_cnt))
print(f"Top {n_top} sites all users visited: \n", all_top_sites[:100])
Total sites all users visited: 11132 Top 100 sites all users visited: ('youtube.com', 'toptal.com', 'slack.com', 'lenta.ru', 'vk.com', 'mail.google.com', 'oracle.com', 'wikimedia.org', 'googleapis.com', 'vimeo.com', 'airbnb.com', 'geotrust.com', 'google.com', 'booking.com', 'facebook.com', 'live.com', 'cedexis.com', 'ggpht.com', 'baidu.com', 'ytimg.com', 'facebook.net', 'mangafox.me', 'googlevideo.com', 'verisign.com', 'lijit.com', 'yahoo.com', 'openclassrooms.com', 'twitter.com', 'cloudfront.net', 'meduza.org', 'digicert.com', 'disqus.com', 'jboss.org', 'microsoft.com', 'instagram.com', 'wikipedia.org', 'bing.com', 'bing.net', 'skyscanner.com', 'com.cn', 'cnn.com', 'allpostersimages.com', 'ecns.cn', 'csdn.net', 'food-4tots.com', 'trafiz.net', 'ca-centrefrance.fr', 'designmodo.com', 'stid-france.com', 'technoratimedia.com', 'mlmd.fr', 'irs01.net', 'daxon.fr', 'synten.com', 'ac-mayotte.fr', 'retetedesuflet.ro', 'dmca.com', 'megaportail.eu', 'autotitre.com', 'letudiant.fr', 'bookryanair.com', 'mibdepot.com', 'webartex.ru', 'mathon.fr', 'filedanstachambre.com', 'toutestfacile.com', 'ldd.fr', 'cfasup2000.net', 'back-end.dk', 'sg-autorepondeur.com', 'mal-au-dos.be', 'horaires-mairie.fr', 'wayne.edu', 'cbao.fr', 'edf.com', 'lafistiniere.com', 'geoplay.fr', 'eternia-fr.net', 'kejet.net', 'sosav.fr', 'consulfrance-montreal.org', 'yourdressmaker.com', 'kingsandlegends.com', 'gralon.net', 'activolcans.info', 'fileformat.info', 'bestinlinux.com', 'studyrama.be', 'lonelyplanet.com', 'tisserant.org', 'telechargervideoyoutube.com', 'joomladay.fr', 'crawl-anywhere.com', 'dress-for-less.com', 'clickintext.net', 'dartfish.tv', 'man7.org', 'ipage.com', 'adriagate.com', 'autrement-ussel.fr')
def get_topsites_length(session_sites: list, top_sites=all_top_sites):
topsites_len_dict = dict.fromkeys(top_sites, 0)
for site_entry in session_sites:
site = site_entry['site']
if site in topsites_len_dict:
topsites_len_dict[site] += site_entry['length']
return list(topsites_len_dict.values())
topsites_length = user_sessions['sites'].apply(get_topsites_length)
topsites_length
0 [67, 132, 65, 59, 0, 50, 0, 0, 0, 0, 0, 0, 0, ... 1 [202, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0... 2 [109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0... 3 [0, 0, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 112,... 4 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... ... 79995 [46, 0, 63, 0, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 79996 [43, 0, 74, 0, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... 79997 [0, 0, 71, 0, 44, 54, 0, 0, 0, 0, 0, 0, 0, 0, ... 79998 [251, 103, 0, 82, 133, 127, 0, 0, 0, 0, 0, 0, ... 79999 [0, 0, 0, 0, 0, 0, 118, 0, 0, 0, 0, 0, 145, 0,... Name: sites, Length: 80000, dtype: object
tfidf = TfidfTransformer()
topsites_tfidf = tfidf.fit_transform(topsites_length.values.tolist())
topsites_tfidf.toarray()[:2]
array([[0.3356709 , 0.73258813, 0.36172768, 0.35462643, 0. , 0.30664778, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [0.57288012, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.35902355, 0. , 0. , 0. , 0. , 0.18299643, 0. , 0.28371111, 0. , 0. , 0. , 0.60967766, 0. , 0. , 0. , 0.23921522, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ]])
user_sessions['target'] = (user_sessions['user_id'] != 0).astype(int)
user_sessions
browser | os | locale | user_id | gender | location | sites | start_dt | local_time | year | month | day | weekday | start_hour | start_sin | start_cos | country | city | length_session | target | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Firefox | Ubuntu | ru_RU | 105 | m | USA/Chicago | [{'site': 'mail.google.com', 'length': 50}, {'... | 2017-01-08 09:03:00+00:00 | 2017-01-08 03:03:00 | 2017 | 1 | 8 | 6 | 3 | 0.716302 | 0.697790 | USA | Chicago | 481 | 1 |
1 | Firefox | Windows 8 | pl-PL | 11 | m | USA/Chicago | [{'site': 'meduza.org', 'length': 40}, {'site'... | 2016-10-05 13:57:00+00:00 | 2016-10-05 08:57:00 | 2016 | 10 | 5 | 2 | 8 | 0.716302 | -0.697790 | USA | Chicago | 1076 | 1 |
2 | Chrome | Ubuntu | zh-CN | 17 | m | Singapore/Singapore | [{'site': 'facebook.net', 'length': 74}, {'sit... | 2017-03-28 02:06:00+00:00 | 2017-03-28 10:06:00 | 2017 | 3 | 28 | 1 | 10 | 0.477159 | -0.878817 | Singapore | Singapore | 1280 | 1 |
3 | Chrome | Windows 10 | pt-BR | 134 | f | Australia/Sydney | [{'site': 'verisign.com', 'length': 111}, {'si... | 2017-06-25 21:49:00+00:00 | 2017-06-26 07:49:00 | 2017 | 6 | 26 | 0 | 7 | 0.889017 | -0.457874 | Australia | Sydney | 1323 | 1 |
4 | Firefox | Windows 10 | en-SG | 92 | f | USA/Chicago | [{'site': 'live.com', 'length': 79}, {'site': ... | 2016-02-10 00:05:00+00:00 | 2016-02-09 18:05:00 | 2016 | 2 | 9 | 1 | 18 | -0.999762 | 0.021815 | USA | Chicago | 224 | 1 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
79995 | Chrome | Windows 10 | pt-PT | 178 | m | New Zealand/Auckland | [{'site': 'vk.com', 'length': 126}, {'site': '... | 2016-12-30 06:02:00+00:00 | 2016-12-30 19:02:00 | 2016 | 12 | 30 | 4 | 19 | -0.963630 | 0.267238 | New Zealand | Auckland | 509 | 1 |
79996 | Safari | MacOS | it-IT | 32 | m | Netherlands/Amsterdam | [{'site': 'slack.com', 'length': 74}, {'site':... | 2017-01-27 10:27:00+00:00 | 2017-01-27 11:27:00 | 2017 | 1 | 27 | 4 | 11 | 0.143493 | -0.989651 | Netherlands | Amsterdam | 267 | 1 |
79997 | Firefox | Ubuntu | ru_RU | 0 | m | USA/Chicago | [{'site': 'vk.com', 'length': 44}, {'site': 's... | 2017-03-13 17:55:00+00:00 | 2017-03-13 12:55:00 | 2017 | 3 | 13 | 0 | 12 | -0.237686 | -0.971342 | USA | Chicago | 698 | 0 |
79998 | Firefox | Windows 10 | ru_RU | 56 | m | France/Paris | [{'site': 'lenta.ru', 'length': 82}, {'site': ... | 2016-12-06 14:17:00+00:00 | 2016-12-06 15:17:00 | 2016 | 12 | 6 | 1 | 15 | -0.757565 | -0.652760 | France | Paris | 696 | 1 |
79999 | Firefox | Windows 10 | pt-BR | 113 | f | New Zealand/Auckland | [{'site': 'baidu.com', 'length': 60}, {'site':... | 2016-02-18 06:57:00+00:00 | 2016-02-18 19:57:00 | 2016 | 2 | 18 | 3 | 19 | -0.872496 | 0.488621 | New Zealand | Auckland | 1597 | 1 |
80000 rows × 20 columns
def set_xlabel_rotation(ax, deg=90):
for label in ax.get_xticklabels():
l = label.set_rotation(deg)
fig, ax = plt.subplots(1, 2, figsize=(16, 4));
p = sns.histplot(user_sessions[['user_id']], ax=ax.flatten()[0], discrete=True);
p = sns.histplot(user_sessions[['length_session']], ax=ax.flatten()[1], bins=200);
fig, ax = plt.subplots(1, 3, figsize=(21, 4));
p = sns.countplot(data=user_sessions, x='browser', ax=ax.flatten()[0])
p = sns.countplot(data=user_sessions, x='os', ax=ax.flatten()[1])
p = sns.countplot(data=user_sessions, x='locale', ax=ax.flatten()[2])
for label in ax.flatten()[2].get_xticklabels():
label.set_rotation(90);
fig, ax = plt.subplots(1, 3, figsize=(21, 4));
p = sns.countplot(data=user_sessions, x='gender', ax=ax.flatten()[0])
p = sns.countplot(data=user_sessions, x='city', ax=ax.flatten()[1])
for label in ax.flatten()[1].get_xticklabels():
label.set_rotation(90);
p = sns.countplot(data=user_sessions, x='country', ax=ax.flatten()[2])
for label in ax.flatten()[2].get_xticklabels():
label.set_rotation(90);
fig, ax = plt.subplots(1, 3, figsize=(16, 4));
p = sns.countplot(data=user_sessions, x='year', ax=ax.flatten()[0])
p = sns.countplot(data=user_sessions, x='month', ax=ax.flatten()[1])
p = sns.countplot(data=user_sessions, x='day', ax=ax.flatten()[2])
for label in ax.flatten()[2].get_xticklabels():
label.set_rotation(90);
fig, ax = plt.subplots(1, 2, figsize=(16, 4));
p = sns.countplot(data=user_sessions, x='weekday', ax=ax.flatten()[0])
p = sns.countplot(data=user_sessions, x='start_hour', ax=ax.flatten()[1])