import urllib url = 'http://apics-online.info/static/download/apics-dataset.sqlite.zip' filename, headers = urllib.urlretrieve(url, url.rpartition('/')[2]) import zipfile import os os.listdir('.') print zipfile.ZipFile(filename).namelist() with zipfile.ZipFile(filename) as fp: with open('apics.sqlite', 'w') as fp2: print fp.read('README.txt') with zipfile.ZipFile(filename) as fp: with open('apics.sqlite', 'w') as fp2: fp2.write(fp.read('apics-dataset.sqlite')) import sqlite3 db = sqlite3.connect('apics.sqlite') db.execute("select name from dataset").fetchone() %pylab inline db = sqlite3.connect('apics.sqlite') cu = db.cursor() print cu.execute("select count(*) from language").fetchone() print cu.execute("select count(*) from lect where language_pk is null").fetchone() for row in cu.execute("select lexifier, count(pk) as c from lect where language_pk is null group by lexifier order by c desc"): print row for row in cu.execute("select l.name from language as l, lect as ll where ll.pk = l.pk and ll.lexifier = 'Malay'"): print row import pandas pandas.set_option('max_rows', 10) languages = pandas.read_sql('SELECT * FROM language', db, 'id') languages.latitude.hist(bins=45) print languages.longitude.hist(bins=45) print