firefox_profile_dir = '/home/bird/.mozilla/firefox/old_profiles/iadzfbcv.default/' # Update this for your own system
[x for x in os.listdir(firefox_profile_dir) if x.endswith('.sqlite')]
['content-prefs.sqlite', 'places.sqlite', 'kinto.sqlite', 'permissions.sqlite', 'formhistory.sqlite', 'storage-sync.sqlite', 'favicons.sqlite', 'cookies.sqlite', 'storage.sqlite', 'webappsstore.sqlite']
cookies_file = '{}/cookies.sqlite'.format(firefox_profile_dir)
cookies_db = sqlite3.connect(cookies_file)
def list_tables_in_db(db):
print(db.cursor().execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall())
list_tables_in_db(cookies_db)
[('moz_cookies',)]
df = pd.read_sql('SELECT * FROM moz_cookies', cookies_db)
df = df[['baseDomain', 'creationTime']]
df.head()
baseDomain | creationTime | |
---|---|---|
0 | creativecommons.org | 1519539169476615 |
1 | reddit.com | 1519539181733316 |
2 | reddit.com | 1519539181733577 |
3 | scorecardresearch.com | 1519539184029502 |
4 | scorecardresearch.com | 1519539184029626 |
len(df)
3954
p = figure(
x_axis_type='datetime', title='My Firefox Cookies',
height=300, sizing_mode='scale_width',
tools='', toolbar_location=None
)
source = ColumnDataSource({
'x': df.creationTime,
'y': df.count_total
})
p.x_range.range_padding = 0
p.y_range.range_padding = 0
p.patch('x', 'y', alpha=0.6, source=source)
p.circle('x', 'y', source=source)
p.xaxis.axis_label = 'Creation Time'
p.yaxis.axis_label = 'n cookies'
show(p)
df.baseDomain.value_counts().head(10)
insightexpressai.com 68 rubiconproject.com 65 pubmatic.com 51 amazon.com 50 google.com 45 stickyadstv.com 40 demdex.net 37 theadvocate.com 30 cnn.com 28 mozilla.org 28 Name: baseDomain, dtype: int64
with open('disconnect_me.json', 'r') as f:
disconnect_json = json.loads(f.read())
print(disconnect_json['categories'].keys())
dict_keys(['Advertising', 'Content', 'Analytics', 'Social', 'Disconnect'])
cookie_domains.head(10)
count | category | |
---|---|---|
insightexpressai.com | 68 | Advertising |
rubiconproject.com | 65 | Advertising |
pubmatic.com | 51 | Advertising |
amazon.com | 50 | Content |
google.com | 45 | Uncategorized |
stickyadstv.com | 40 | Uncategorized |
demdex.net | 37 | Advertising |
theadvocate.com | 30 | Uncategorized |
cnn.com | 28 | Uncategorized |
mozilla.org | 28 | Uncategorized |
cookie_domains.groupby('category').sum()
count | |
---|---|
category | |
Advertising | 741 |
Analytics | 103 |
Content | 220 |
Disconnect | 33 |
Social | 52 |
Uncategorized | 2805 |