#!/usr/bin/env python # coding: utf-8 # In[2]: import os import pandas as pd import sqlite3 # In[3]: firefox_profile_dir = '/home/bird/.mozilla/firefox/old_profiles/iadzfbcv.default/' [x for x in os.listdir(firefox_profile_dir) if x.endswith('sqlite')] # In[4]: storage_file = '{}/webappsstore.sqlite'.format(firefox_profile_dir) storage_db = sqlite3.connect(storage_file) # In[5]: # %load '/home/bird/Documents/tracking technologies/notebooks/get_sqlite_tables.py' def list_tables_in_db(db): print(db.cursor().execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall()) list_tables_in_db(storage_db) # In[5]: storage_df = pd.read_sql('SELECT * FROM webappsstore2', storage_db) #storage_df.head() # In[6]: storage_df['origin'] = storage_df.originKey.apply(lambda x: x[::-1].split(':.')[1]) #storage_df.head() # In[7]: cookies_file = '{}/cookies.sqlite'.format(firefox_profile_dir) cookies_db = sqlite3.connect(cookies_file) cookied_df = pd.read_sql('SELECT * FROM moz_cookies', cookies_db) shared_values = [] for v in cookied_df.value.unique(): matches = cookied_df[cookied_df.value.str.contains(v, regex=False)] if len(matches.baseDomain.unique()) > 5: shared_values.append(v) potential_ids = [x for x in shared_values if(len(x) > 10) & ('com' not in x)] # In[8]: # From cookie table print('We have', len(potential_ids), 'potential ids. Things like:') # potential_ids[0:5] # In[9]: for potential_id in potential_ids: if len(storage_df[storage_df.value.str.contains(potential_id)]) > 0: print(potential_id, 'found in local storage') # In[10]: repeated_id = '67936421072632709762729202117726060613' storage_df[storage_df.value.str.contains(repeated_id)][['value','origin']] # In[11]: repeated_id = '7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc' storage_df[storage_df.value.str.contains(repeated_id)][['value', 'origin']] # We can see this id being shared across multiple origins in local storage as well. # In[ ]: