record properties of treedb.sqlite3
loaded from Glottolog
master repo commit
and monitor changes by diffing this
%matplotlib inline
import collections
import pandas as pd
import sqlalchemy as sa
import matplotlib as mpl
import matplotlib.pyplot as plt
import treedb
import treedb.raw
treedb.configure_logging(log_sql=False)
treedb.__version__
'1.3.2'
%%time
TARGET = '../glottolog/'
TAG = 'v4.2.1'
treedb.checkout_or_clone(TAG, target=TARGET)
Wall time: 8.52 s
(None, CompletedProcess(args=['git', 'checkout', '-B', 'treedb', 'v4.2.1'], returncode=0))
treedb.set_root(TARGET)
<treedb.proxies.PathProxy path='../glottolog/languoids/tree' inode=28991922601764532>
next(treedb.iterfiles())
(('abin1243',), <DirEntry 'md.ini'>, <treedb.files.ConfigParser at 0x54de130>)
next(treedb.iterlanguoids())
(('abin1243',), {'id': 'abin1243', 'parent_id': None, 'level': 'language', 'name': 'Abinomn', 'hid': 'bsa', 'iso639_3': 'bsa', 'latitude': -2.92281, 'longitude': 138.891, 'macroareas': ['Papunesia'], 'countries': [{'name': 'Indonesia', 'id': 'ID'}], 'links': [{'url': 'http://endangeredlanguages.com/lang/1763', 'title': 'Abinomn', 'scheme': 'http'}, {'url': 'https://www.wikidata.org/entity/Q56648', 'title': None, 'scheme': 'https'}, {'url': 'https://en.wikipedia.org/wiki/Abinomn_language', 'title': None, 'scheme': 'https'}], 'sources': {'glottolog': [{'bibfile': 'hh', 'bibkey': 'e:Lagerberg:Moegip', 'pages': None, 'trigger': None}, {'bibfile': 'hh', 'bibkey': 'h:SilzerClouse:Index', 'pages': None, 'trigger': None}, {'bibfile': 'hh', 'bibkey': 'h:SilzerHeikkinen:Irian', 'pages': None, 'trigger': None}, {'bibfile': 'hh', 'bibkey': 'hv:Foley:Northwest-New-Guinea', 'pages': None, 'trigger': None}, {'bibfile': 'hh', 'bibkey': 'hvtyp:DonohueMusgrave:Melanesia', 'pages': None, 'trigger': None}, {'bibfile': 'hh', 'bibkey': 'w:Fiwei:Abinomn', 'pages': None, 'trigger': None}]}, 'altnames': {'multitree': [{'name': '"Baso"', 'lang': None}, {'name': 'Abinomn', 'lang': None}, {'name': 'Avinomen', 'lang': None}, {'name': 'Foja', 'lang': None}, {'name': 'Foya', 'lang': None}], 'lexvo': [{'name': 'Abinomn', 'lang': 'en'}, {'name': 'Abinomn language', 'lang': 'en'}, {'name': 'Abinomneg', 'lang': 'br'}, {'name': 'Lingua abinomn', 'lang': 'gl'}, {'name': 'Llingua Abinomn', 'lang': 'ast'}], 'hhbib_lgcode': [{'name': 'Baso', 'lang': None}], 'elcat': [{'name': '"Baso"', 'lang': None}, {'name': 'Abinomn', 'lang': None}, {'name': 'Avinomen', 'lang': None}, {'name': 'Foja', 'lang': None}, {'name': 'Foya', 'lang': None}]}, 'triggers': {'lgcode': ['macrohistory', 'moegip']}, 'identifier': {'multitree': 'bsa', 'endangeredlanguages': '1763'}, 'classification': {'familyrefs': [{'bibfile': 'hh', 'bibkey': 'h:SilzerClouse:Index', 'pages': None, 'trigger': None}, {'bibfile': 'hh', 'bibkey': 'hvtyp:DonohueMusgrave:Melanesia', 'pages': None, 'trigger': None}]}, 'endangerment': {'status': 'shifting', 'source': {'name': 'ElCat', 'bibfile': None, 'bibkey': None, 'pages': None}, 'date': datetime.datetime(2017, 8, 19, 8, 16, 16), 'comment': 'Abinomn (1763-bsa) = Endangered (20 percent certain, based on the evidence available)'}, 'hh_ethnologue_comment': None, 'iso_retirement': None})
treedb.set_engine('treedb.sqlite3')
<treedb.proxies.SQLiteEngineProxy filename='treedb.sqlite3' size=None>
%%time
engine = treedb.load(rebuild=False, exclude_raw=False)
engine
0:02:03.956089 Wall time: 2min 4s
<treedb.proxies.SQLiteEngineProxy filename='treedb.sqlite3' size=68775936>
engine.file_mtime()
datetime.datetime(2020, 6, 11, 19, 24, 42, 215165)
engine.file_size(as_megabytes=True)
65.58984375
%time engine.file_sha256()
Wall time: 526 ms
'23beb0a953017442928646fcedad304648111496791914014f60c4ebc2db9a0e'
%time treedb.raw.checksum()
Wall time: 407 ms
'strong:sha256:03ae265f46c79a5fd9ae44ada3ed50840dbdb897384b7ac57456ba12b6206a71'
%time treedb.raw.checksum(weak=True)
Wall time: 9.13 s
'weak:sha256:9cf661e51d8cd6d8ef1f5e93dbbf4612a8a7e06712c56747d7a280b2d83f503b'
%time treedb.check()
valid_glottocodeCheck: OK valid_iso639_3Check: OK valid_hidCheck: OK clean_nameCheck: OK family_parentCheck: OK language_parentCheck: OK dialect_parentCheck: OK family_childrenCheck: OK family_languagesCheck: OK bookkeeping_no_childrenCheck: OK no_empty_filesCheck: OK Wall time: 2.51 s
True
%time treedb.checksum()
Wall time: 12 s
'path_json:id:sha256:1b85081a5fc1ddedf91f7099524a12c0d8941d6ccf3ff9feb889aec46b863e43'
%time treedb.checksum(source='raw')
Wall time: 13.4 s
'path_json:id:sha256:1b85081a5fc1ddedf91f7099524a12c0d8941d6ccf3ff9feb889aec46b863e43'
from treedb import count, select, text
treedb.configure_logging(level='INFO', log_sql=True)
select([sa.func.sqlite_version()]).scalar()
[INFO@treedb.logging_] treedb version: 1.3.2 SELECT sqlite_version() AS sqlite_version_1 ()
'3.31.1'
application_id = text('PRAGMA application_id').scalar()
assert application_id == 1122 == 0x462
assert application_id == sum(ord(c) for c in treedb.Dataset.__tablename__)
assert treedb.Dataset.__tablename__ == '__dataset__'
application_id
PRAGMA application_id ()
1122
from treedb import pd_read_sql as read_sql
read_sql(treedb.select_tables_nrows(), index_col='table_name')
SELECT sqlite_master.name FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name NOT LIKE ? ORDER BY sqlite_master.name ('table', 'sqlite_%') [INFO@treedb.shortcuts] pandas version: 1.0.4 SELECT ? AS table_name, (SELECT count(*) AS n FROM __dataset__) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM __producer__) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM _file) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM _option) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM _value) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM altname) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM altnameprovider) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM bibfile) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM bibitem) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM classificationcomment) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM classificationref) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM country) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM endangerment) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM endangerment_source) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM ethnologuecomment) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM identifier) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM identifiersite) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM isoretirement) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM isoretirement_changeto) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM languoid) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM languoid_country) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM languoid_macroarea) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM link) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM macroarea) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM source) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM sourceprovider) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM timespan) AS n_rows UNION ALL SELECT ? AS table_name, (SELECT count(*) AS n FROM "trigger") AS n_rows ('__dataset__', '__producer__', '_file', '_option', '_value', 'altname', 'altnameprovider', 'bibfile', 'bibitem', 'classificationcomment', 'classificationref', 'country', 'endangerment', 'endangerment_source', 'ethnologuecomment', 'identifier', 'identifiersite', 'isoretirement', 'isoretirement_changeto', 'languoid', 'languoid_country', 'languoid_macroarea', 'link', 'macroarea', 'source', 'sourceprovider', 'timespan', 'trigger')
n_rows | |
---|---|
table_name | |
__dataset__ | 1 |
__producer__ | 1 |
_file | 25228 |
_option | 53 |
_value | 600096 |
altname | 149531 |
altnameprovider | 11 |
bibfile | 37 |
bibitem | 143220 |
classificationcomment | 11791 |
classificationref | 16043 |
country | 226 |
endangerment | 8165 |
endangerment_source | 11 |
ethnologuecomment | 620 |
identifier | 20867 |
identifiersite | 4 |
isoretirement | 344 |
isoretirement_changeto | 438 |
languoid | 25228 |
languoid_country | 10988 |
languoid_macroarea | 20700 |
link | 28949 |
macroarea | 6 |
source | 188938 |
sourceprovider | 1 |
timespan | 134 |
trigger | 30276 |
treedb.print_rows(treedb.sqlite_master.select_views(), format_='{name}')
SELECT sqlite_master.name FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name NOT LIKE ? ORDER BY sqlite_master.name ('view', 'sqlite_%')
example path_json stats
from treedb import print_table_sql as print_sql
print_sql('sqlite_master')
SELECT sqlite_master.sql FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name = ? ('table', 'sqlite_master')
None
SELECT count(*) AS n_rows FROM sqlite_master ()
56
from treedb import Dataset
print_sql(Dataset)
SELECT sqlite_master.sql FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name = ? ('table', '__dataset__')
CREATE TABLE __dataset__ ( id INTEGER NOT NULL CHECK (id = 1), title TEXT NOT NULL CHECK (title != ''), git_commit VARCHAR(40) NOT NULL CHECK (length(git_commit) = 40), git_describe TEXT NOT NULL CHECK (git_describe != ''), clean BOOLEAN NOT NULL, exclude_raw BOOLEAN NOT NULL, PRIMARY KEY (id), UNIQUE (git_commit), UNIQUE (git_describe), CHECK (clean IN (0, 1)), CHECK (exclude_raw IN (0, 1)) )
SELECT count(*) AS n_rows FROM __dataset__ ()
1
dataset, = select([Dataset]).execute()
pd.DataFrame.from_dict(dataset, orient='index', columns=['__dataset__'])
SELECT __dataset__.id, __dataset__.title, __dataset__.git_commit, __dataset__.git_describe, __dataset__.clean, __dataset__.exclude_raw FROM __dataset__ ()
__dataset__ | |
---|---|
id | 1 |
title | Glottolog treedb |
git_commit | e49bc948c1195a8a5c5a85e1ea9293ea7412602e |
git_describe | v4.2.1 |
clean | True |
exclude_raw | False |
from treedb import Producer
print_sql(Producer)
SELECT sqlite_master.sql FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name = ? ('table', '__producer__')
CREATE TABLE __producer__ ( id INTEGER NOT NULL CHECK (id = 1), name TEXT NOT NULL CHECK (name != ''), version TEXT NOT NULL CHECK (version != ''), PRIMARY KEY (id), UNIQUE (name) )
SELECT count(*) AS n_rows FROM __producer__ ()
1
producer, = select([Producer]).execute()
pd.DataFrame.from_dict(producer, orient='index', columns=['__producer__'])
SELECT __producer__.id, __producer__.name, __producer__.version FROM __producer__ ()
__producer__ | |
---|---|
id | 1 |
name | treedb |
version | 1.3.2 |
treedb.raw
¶from treedb.raw import File, Option, Value
for model in (File, Option, Value):
print_sql(model)
SELECT sqlite_master.sql FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name = ? ('table', '_file')
CREATE TABLE _file ( id INTEGER NOT NULL, glottocode VARCHAR(8) NOT NULL CHECK (length(glottocode) = 8), path TEXT NOT NULL CHECK (length(path) >= 8 AND (length(path) + 1) % 9 = 0), size INTEGER NOT NULL CHECK (size > 0), sha256 VARCHAR(64) NOT NULL CHECK (length(sha256) = 64), PRIMARY KEY (id), CHECK (substr(path, -length(glottocode)) = glottocode), UNIQUE (glottocode), UNIQUE (path), UNIQUE (sha256) )
SELECT count(*) AS n_rows FROM _file ()
25228
SELECT sqlite_master.sql FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name = ? ('table', '_option')
CREATE TABLE _option ( id INTEGER NOT NULL, section TEXT NOT NULL CHECK (section != ''), option TEXT NOT NULL CHECK (option != ''), is_lines BOOLEAN, PRIMARY KEY (id), UNIQUE (section, option), CHECK (is_lines IN (0, 1)) )
SELECT count(*) AS n_rows FROM _option ()
53
SELECT sqlite_master.sql FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name = ? ('table', '_value')
CREATE TABLE _value ( file_id INTEGER NOT NULL, option_id INTEGER NOT NULL, line INTEGER NOT NULL CHECK (line > 0), value TEXT NOT NULL CHECK (value != ''), PRIMARY KEY (file_id, option_id, line), UNIQUE (file_id, line), FOREIGN KEY(file_id) REFERENCES _file (id), FOREIGN KEY(option_id) REFERENCES _option (id) ) WITHOUT ROWID
SELECT count(*) AS n_rows FROM _value ()
600096
read_sql(select([File]).limit(5), index_col='id')
SELECT _file.id, _file.glottocode, _file.path, _file.size, _file.sha256 FROM _file LIMIT ? OFFSET ? (5, 0)
glottocode | path | size | sha256 | |
---|---|---|---|---|
id | ||||
1 | abin1243 | abin1243 | 1229 | b7c19c6bc411908419022568fa021e39d64b1e9ab2e57c... |
2 | abis1238 | abis1238 | 1708 | b0d8c9aca4d8dcca721e8dac7ff33c4f51f116ad300f35... |
3 | abkh1242 | abkh1242 | 306 | 5dd28ae1265a0967b1a236d06c7fd6e7ac3b9e32d866dd... |
4 | abkh1243 | abkh1242/abkh1243 | 257 | a5900355c69af22d94eaa0a147b95fc291270634bc033c... |
5 | abaz1241 | abkh1242/abkh1243/abaz1241 | 2450 | 1f9258cffd448ed9eb5b243d7252bb183aa89558904f8e... |
read_sql(select([Option]).limit(5), index_col='id')
SELECT _option.id, _option.section, _option.option, _option.is_lines FROM _option LIMIT ? OFFSET ? (5, 0)
section | option | is_lines | |
---|---|---|---|
id | |||
1 | core | name | False |
2 | core | hid | False |
3 | core | level | False |
4 | core | iso639-3 | False |
5 | core | latitude | False |
read_sql(select([Value]).limit(5), index_col=['file_id', 'option_id'])
SELECT _value.file_id, _value.option_id, _value.line, _value.value FROM _value LIMIT ? OFFSET ? (5, 0)
line | value | ||
---|---|---|---|
file_id | option_id | ||
1 | 1 | 1 | Abinomn |
2 | 2 | bsa | |
3 | 3 | language | |
4 | 4 | bsa | |
5 | 5 | -2.92281 |
select_file_values = select([Option.section, Option.option, Value.line, Value.value])\
.select_from(sa.join(File, Value).join(Option))\
.where(File.glottocode == sa.bindparam('glottocode'))
read_sql(select_file_values, params={'glottocode': 'abin1243'},
index_col=['section', 'option', 'line'])
SELECT _option.section, _option.option, _value.line, _value.value FROM _file JOIN _value ON _file.id = _value.file_id JOIN _option ON _option.id = _value.option_id WHERE _file.glottocode = ? ('abin1243',)
value | |||
---|---|---|---|
section | option | line | |
core | name | 1 | Abinomn |
hid | 2 | bsa | |
level | 3 | language | |
iso639-3 | 4 | bsa | |
latitude | 5 | -2.92281 | |
longitude | 6 | 138.891 | |
macroareas | 7 | Papunesia | |
countries | 8 | Indonesia (ID) | |
links | 9 | [Abinomn](http://endangeredlanguages.com/lang/... | |
10 | https://www.wikidata.org/entity/Q56648 | ||
11 | https://en.wikipedia.org/wiki/Abinomn_language | ||
sources | glottolog | 12 | **hh:e:Lagerberg:Moegip** |
13 | **hh:h:SilzerClouse:Index** | ||
14 | **hh:h:SilzerHeikkinen:Irian** | ||
15 | **hh:hv:Foley:Northwest-New-Guinea** | ||
16 | **hh:hvtyp:DonohueMusgrave:Melanesia** | ||
17 | **hh:w:Fiwei:Abinomn** | ||
altnames | multitree | 18 | "Baso" |
19 | Abinomn | ||
20 | Avinomen | ||
21 | Foja | ||
22 | Foya | ||
lexvo | 23 | Abinomn [en] | |
24 | Abinomn language [en] | ||
25 | Abinomneg [br] | ||
26 | Lingua abinomn [gl] | ||
27 | Llingua Abinomn [ast] | ||
hhbib_lgcode | 28 | Baso | |
elcat | 29 | "Baso" | |
30 | Abinomn | ||
31 | Avinomen | ||
32 | Foja | ||
33 | Foya | ||
triggers | lgcode | 34 | macrohistory |
35 | moegip | ||
identifier | multitree | 36 | bsa |
endangeredlanguages | 37 | 1763 | |
classification | familyrefs | 38 | **hh:h:SilzerClouse:Index** |
39 | **hh:hvtyp:DonohueMusgrave:Melanesia** | ||
endangerment | status | 40 | shifting |
source | 41 | ElCat | |
date | 42 | 2017-08-19T08:16:16 | |
comment | 43 | Abinomn (1763-bsa) = Endangered (20 percent ce... |
path_depth = File.path_depth()
select_path_depths = select([path_depth,
treedb.Languoid.level, count().label('n_files')])\
.select_from(sa.join(File, treedb.Languoid,
File.glottocode == treedb.Languoid.id))\
.group_by(path_depth, treedb.Languoid.level)\
.order_by('path_depth', 'level')
_ = read_sql(select_path_depths, index_col=['path_depth', 'level'])\
.unstack(fill_value=0).droplevel(0, axis='columns')[list(treedb.LEVEL)]
_.plot.bar(stacked=True, figsize=(12, 3))
(100 * _.div(_.sum(axis='columns'), axis='rows')).plot.bar(stacked=True, figsize=(12, 3));
SELECT (length(_file.path) + ?) / ? AS path_depth, languoid.level, count(*) AS n_files FROM _file JOIN languoid ON _file.glottocode = languoid.id GROUP BY (length(_file.path) + ?) / ?, languoid.level ORDER BY path_depth, languoid.level (1, 9, 1, 9) [INFO@numexpr.utils] NumExpr defaulting to 4 threads.
file_size = File.size.label('file_size')
select_file_sizes = select([file_size, count().label('n_files')])\
.group_by(file_size)\
.order_by('file_size')
_ = read_sql(select_file_sizes, index_col='file_size')
_.plot.area(figsize=(12, 3), logx=True).xaxis.set_major_formatter(mpl.ticker.ScalarFormatter())
_.index.to_series().describe().to_frame().transpose()[['count', 'min', 'max']]
SELECT _file.size AS file_size, count(*) AS n_files FROM _file GROUP BY _file.size ORDER BY file_size ()
count | min | max | |
---|---|---|---|
file_size | 3224.0 | 65.0 | 46101.0 |
file_nvalues = select([File.glottocode,
count(Value.option_id.distinct()).label('n_values')])\
.select_from(sa.join(File, Value))\
.group_by(File.glottocode).c.n_values
select_nvalues = select([file_nvalues, count().label('n_files')])\
.group_by(file_nvalues)\
.order_by(file_nvalues)
_ = read_sql(select_nvalues, index_col='n_values')
_.plot.bar(figsize=(12, 3))
_.index.to_series().describe().to_frame().transpose()[['count', 'min', 'max']]
SELECT n_values, count(*) AS n_files FROM (SELECT _file.glottocode AS glottocode, count(DISTINCT _value.option_id) AS n_values FROM _file JOIN _value ON _file.id = _value.file_id GROUP BY _file.glottocode) GROUP BY n_values ORDER BY n_values ()
count | min | max | |
---|---|---|---|
n_values | 35.0 | 2.0 | 36.0 |
value_length = sa.func.length(Value.value).label('value_length')
select_value_length = select([value_length, count().label('n_values')])\
.group_by(value_length)\
.order_by('value_length')
_ = read_sql(select_value_length, index_col='value_length')
_.plot.area(figsize=(12, 3), logx=True).xaxis.set_major_formatter(mpl.ticker.ScalarFormatter())
_.index.to_series().describe().to_frame().transpose()[['count', 'min', 'max']]
SELECT length(_value.value) AS value_length, count(*) AS n_values FROM _value GROUP BY length(_value.value) ORDER BY value_length ()
count | min | max | |
---|---|---|---|
value_length | 632.0 | 1.0 | 6215.0 |
%time treedb.raw.print_stats()
[INFO@treedb.raw.values] fetch statistics SELECT _option.section, _option.option, count(*) AS n FROM _option JOIN _value ON _option.id = _value.option_id GROUP BY _option.section, _option.option ORDER BY _option.section, n DESC ()
altnames multitree 54,614 altnames hhbib_lgcode 29,050 altnames lexvo 26,027 altnames elcat 23,424 altnames aiatsis 6,542 altnames wals 2,441 altnames wals other 2,097 altnames moseley & asher (1994) 2,089 altnames ruhlen (1987) 2,007 altnames glottolog 1,237 altnames ethnologue 3 classification subrefs 15,344 classification sub 11,584 classification familyrefs 699 classification family 207 core links 28,949 core name 25,228 core level 25,228 core macroareas 20,700 core countries 10,988 core hid 8,733 core longitude 8,402 core latitude 8,402 core iso639-3 8,032 core timespan 134 core name_comment 14 core speakers 1 core name_pronunciation 1 core location 1 core comment 1 endangerment status 8,165 endangerment source 8,165 endangerment date 8,165 endangerment comment 8,165 hh_ethnologue_comment isohid 620 hh_ethnologue_comment ethnologue_versions 620 hh_ethnologue_comment comment_type 620 hh_ethnologue_comment comment 620 identifier multitree 14,906 identifier endangeredlanguages 2,999 identifier wals 2,620 identifier languagelandscape 342 iso_retirement change_to 438 iso_retirement reason 344 iso_retirement name 344 iso_retirement effective 344 iso_retirement code 344 iso_retirement change_request 335 iso_retirement remedy 315 iso_retirement comment 232 sources glottolog 188,938 triggers lgcode 30,089 triggers inlg 187 Wall time: 1.53 s
from treedb import LEVEL, Languoid
FAMILY, LANGUAGE, DIALECT = LEVEL
print_sql(Languoid)
SELECT sqlite_master.sql FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name = ? ('table', 'languoid')
CREATE TABLE languoid ( id VARCHAR(8) NOT NULL CHECK (length(id) = 8), name VARCHAR NOT NULL CHECK (name != ''), level VARCHAR(8) NOT NULL, parent_id VARCHAR(8), hid TEXT CHECK (length(hid) >= 3), iso639_3 VARCHAR(3) CHECK (length(iso639_3) = 3), latitude FLOAT CHECK (latitude BETWEEN -90 AND 90), longitude FLOAT CHECK (longitude BETWEEN -180 AND 180), PRIMARY KEY (id), CHECK ((latitude IS NULL) = (longitude IS NULL)), UNIQUE (name), CHECK (level IN ('family', 'language', 'dialect')), FOREIGN KEY(parent_id) REFERENCES languoid (id) DEFERRABLE INITIALLY DEFERRED, UNIQUE (hid), UNIQUE (iso639_3) ) WITHOUT ROWID
SELECT count(*) AS n_rows FROM languoid ()
25228
%time treedb.print_languoid_stats()
WITH RECURSIVE tree(parent_id, child_id) AS (SELECT root.id AS parent_id, root.id AS child_id FROM languoid AS root WHERE root.parent_id IS NULL UNION ALL SELECT tree.parent_id AS parent_id, child.id AS child_id FROM tree JOIN languoid AS child ON tree.child_id = child.parent_id) SELECT ? AS kind, count(*) AS n FROM languoid UNION ALL SELECT ? AS kind, count(*) AS n FROM languoid WHERE languoid.level = ? AND languoid.parent_id IS NULL UNION ALL SELECT ? AS kind, count(*) AS n FROM languoid WHERE languoid.level = ? AND languoid.parent_id IS NULL UNION ALL SELECT ? AS kind, count(*) AS n FROM languoid WHERE languoid.parent_id IS NULL UNION ALL SELECT ? AS kind, count(*) AS n FROM languoid WHERE languoid.level = ? UNION ALL SELECT ? AS kind, count(*) AS n FROM languoid WHERE languoid.level = ? AND languoid.parent_id IS NOT NULL UNION ALL SELECT ? AS kind, count(*) AS n FROM languoid WHERE languoid.level = ? UNION ALL SELECT ? AS kind, count(*) AS n FROM tree JOIN languoid AS root ON tree.parent_id = root.id JOIN languoid AS child ON tree.child_id = child.id WHERE child.level = ? AND root.name NOT IN (?, ?, ?, ?, ?, ?, ?, ?) UNION ALL SELECT ? AS kind, count(*) AS n FROM tree JOIN languoid AS root ON tree.parent_id = root.id JOIN languoid AS child ON tree.child_id = child.id WHERE child.level = ? AND root.name = ? UNION ALL SELECT ? AS kind, count(*) AS n FROM tree JOIN languoid AS root ON tree.parent_id = root.id JOIN languoid AS child ON tree.child_id = child.id WHERE child.level = ? AND root.name = ? UNION ALL SELECT ? AS kind, count(*) AS n FROM tree JOIN languoid AS root ON tree.parent_id = root.id JOIN languoid AS child ON tree.child_id = child.id WHERE child.level = ? AND root.name = ? UNION ALL SELECT ? AS kind, count(*) AS n FROM tree JOIN languoid AS root ON tree.parent_id = root.id JOIN languoid AS child ON tree.child_id = child.id WHERE child.level = ? AND root.name = ? UNION ALL SELECT ? AS kind, count(*) AS n FROM tree JOIN languoid AS root ON tree.parent_id = root.id JOIN languoid AS child ON tree.child_id = child.id WHERE child.level = ? AND root.name = ? UNION ALL SELECT ? AS kind, count(*) AS n FROM tree JOIN languoid AS root ON tree.parent_id = root.id JOIN languoid AS child ON tree.child_id = child.id WHERE child.level = ? AND root.name = ? UNION ALL SELECT ? AS kind, count(*) AS n FROM tree JOIN languoid AS root ON tree.parent_id = root.id JOIN languoid AS child ON tree.child_id = child.id WHERE child.level = ? AND root.name = ? UNION ALL SELECT ? AS kind, count(*) AS n FROM tree JOIN languoid AS root ON tree.parent_id = root.id JOIN languoid AS child ON tree.child_id = child.id WHERE child.level = ? AND root.name != ? UNION ALL SELECT ? AS kind, count(*) AS n FROM tree JOIN languoid AS root ON tree.parent_id = root.id JOIN languoid AS child ON tree.child_id = child.id WHERE child.level = ? AND root.name = ? ('languoids', 'families', 'family', 'isolates', 'language', 'roots', 'languages', 'language', 'subfamilies', 'family', 'dialects', 'dialect', 'Spoken L1 Languages', 'language', 'Sign Language', 'Unclassifiable', 'Pidgin', 'Unattested', 'Artificial Language', 'Mixed Language', 'Speech Register', 'Bookkeeping', 'Sign Language', 'language', 'Sign Language', 'Unclassifiable', 'language', 'Unclassifiable', 'Pidgin', 'language', 'Pidgin', 'Unattested', 'language', 'Unattested', 'Artificial Language', 'language', 'Artificial Language', 'Mixed Language', 'language', 'Mixed Language', 'Speech Register', 'language', 'Speech Register', 'All', 'language', 'Bookkeeping', 'Bookkeeping', 'language', 'Bookkeeping')
25,228 languoids 242 families 187 isolates 429 roots 8,515 languages 4,234 subfamilies 12,237 dialects 7,604 Spoken L1 Languages 196 Sign Language 123 Unclassifiable 81 Pidgin 66 Unattested 28 Artificial Language 14 Mixed Language 10 Speech Register 8,122 All 393 Bookkeeping Wall time: 1.62 s
read_sql(select([Languoid]).limit(5), index_col='id')
SELECT languoid.id, languoid.name, languoid.level, languoid.parent_id, languoid.hid, languoid.iso639_3, languoid.latitude, languoid.longitude FROM languoid LIMIT ? OFFSET ? (5, 0)
name | level | parent_id | hid | iso639_3 | latitude | longitude | |
---|---|---|---|---|---|---|---|
id | |||||||
3adt1234 | 3Ad-Tekles | dialect | nort3292 | None | None | NaN | NaN |
aala1237 | Aalawa | dialect | ramo1244 | None | None | NaN | NaN |
aant1238 | Aantantara | dialect | nort2920 | None | None | NaN | NaN |
aari1238 | Aari-Gayil | family | ahkk1235 | aiz | aiz | NaN | NaN |
aari1239 | Aari | language | aari1238 | aiw | aiw | 5.95034 | 36.5721 |
read_sql(select([Languoid]).order_by('id').limit(5), index_col='id')
SELECT languoid.id, languoid.name, languoid.level, languoid.parent_id, languoid.hid, languoid.iso639_3, languoid.latitude, languoid.longitude FROM languoid ORDER BY languoid.id LIMIT ? OFFSET ? (5, 0)
name | level | parent_id | hid | iso639_3 | latitude | longitude | |
---|---|---|---|---|---|---|---|
id | |||||||
3adt1234 | 3Ad-Tekles | dialect | nort3292 | None | None | NaN | NaN |
aala1237 | Aalawa | dialect | ramo1244 | None | None | NaN | NaN |
aant1238 | Aantantara | dialect | nort2920 | None | None | NaN | NaN |
aari1238 | Aari-Gayil | family | ahkk1235 | aiz | aiz | NaN | NaN |
aari1239 | Aari | language | aari1238 | aiw | aiw | 5.95034 | 36.5721 |
read_sql(select([Languoid]).order_by('name').limit(5), index_col='id')
SELECT languoid.id, languoid.name, languoid.level, languoid.parent_id, languoid.hid, languoid.iso639_3, languoid.latitude, languoid.longitude FROM languoid ORDER BY languoid.name LIMIT ? OFFSET ? (5, 0)
name | level | parent_id | hid | iso639_3 | latitude | longitude | |
---|---|---|---|---|---|---|---|
id | |||||||
gang1269 | !Gã!nge | dialect | sero1239 | None | None | NaN | NaN |
kwii1241 | !Kwi | family | tuuu1241 | None | None | NaN | NaN |
oung1238 | !O!ung | language | book1242 | oun | oun | -15.3 | 14.35 |
abda1238 | 'Abd Al-Kuri | dialect | soqo1240 | None | None | NaN | NaN |
aden1242 | 'Aden | dialect | jude1267 | None | None | NaN | NaN |
Child, Parent = (sa.orm.aliased(Languoid, name=n) for n in ('child', 'parent'))
select_parent_levels = select([Child.level.label('child_level'),
Parent.level.label('parent_level'),
count().label('n_languoids')])\
.select_from(sa.outerjoin(Child, Parent,
Child.parent_id == Parent.id))\
.group_by(Child.level, Parent.level)\
.order_by('child_level', 'parent_level')
read_sql(select_parent_levels, index_col=['child_level', 'parent_level'])\
.unstack(fill_value=0).assign(all=lambda x: x.sum(axis='columns'))\
.pipe(lambda x: x.append(x.sum().rename('all')))
SELECT child.level AS child_level, parent.level AS parent_level, count(*) AS n_languoids FROM languoid AS child LEFT OUTER JOIN languoid AS parent ON child.parent_id = parent.id GROUP BY child.level, parent.level ORDER BY child_level, parent_level ()
n_languoids | all | ||||
---|---|---|---|---|---|
parent_level | NaN | dialect | family | language | |
child_level | |||||
dialect | 0 | 1781 | 0 | 10456 | 12237 |
family | 242 | 0 | 4234 | 0 | 4476 |
language | 187 | 0 | 8328 | 0 | 8515 |
all | 429 | 1781 | 12562 | 10456 | 25228 |
select_nisos = select([Languoid.level.label('level'),
count().label('n_languoids'),
count(Languoid.iso639_3).label('n_isos')])\
.group_by(Languoid.level)\
.order_by('level')
read_sql(select_nisos, index_col='level')\
.assign(ratio=lambda x: 100 * x['n_isos'] / x['n_languoids'])
SELECT languoid.level AS level, count(*) AS n_languoids, count(languoid.iso639_3) AS n_isos FROM languoid GROUP BY languoid.level ORDER BY level ()
n_languoids | n_isos | ratio | |
---|---|---|---|
level | |||
dialect | 12237 | 177 | 1.446433 |
family | 4476 | 45 | 1.005362 |
language | 8515 | 7810 | 91.720493 |
select_nlocations = select([Languoid.level.label('level'),
count().label('n_languoids'),
count(Languoid.latitude).label('n_locations')])\
.group_by(Languoid.level)\
.order_by('level')
read_sql(select_nlocations, index_col='level')\
.assign(ratio=lambda x: 100 * x['n_locations'] / x['n_languoids'])
SELECT languoid.level AS level, count(*) AS n_languoids, count(languoid.latitude) AS n_locations FROM languoid GROUP BY languoid.level ORDER BY level ()
n_languoids | n_locations | ratio | |
---|---|---|---|
level | |||
dialect | 12237 | 260 | 2.124704 |
family | 4476 | 17 | 0.379803 |
language | 8515 | 8125 | 95.419847 |
select_latlon = select([Languoid.latitude, Languoid.longitude])\
.where(Languoid.level == LANGUAGE)
latitudes, longitudes = zip(*select_latlon.execute())
plt.figure(figsize=(12, 6))
plt.axis([-180, 180, -90, 90])
plt.xticks(range(-180, 181, 60))
plt.yticks(range(-90, 91, 30))
plt.scatter(longitudes, latitudes, 1, 'black');
SELECT languoid.latitude, languoid.longitude FROM languoid WHERE languoid.level = ? ('language',)
Family, Child, family_child = treedb.Languoid.parent_descendant(parent_root=True,
parent_level=FAMILY)
n_languages = count(Child.id).label('n_languages')
select_family_nlanguages = select([Family.id.label('family_id'), n_languages])\
.select_from(family_child)\
.where(Child.level == LANGUAGE)\
.group_by(Family.id)
select_top_families = select_family_nlanguages\
.having(n_languages >= 100)\
.order_by('n_languages')
read_sql(select_top_families, index_col='family_id').plot.barh();
WITH RECURSIVE tree(parent_id, child_id) AS (SELECT root.id AS parent_id, child.id AS child_id FROM languoid AS root LEFT OUTER JOIN languoid AS child ON root.id = child.parent_id WHERE root.parent_id IS NULL AND root.level = ? UNION ALL SELECT tree.parent_id AS parent_id, child.id AS child_id FROM tree JOIN languoid AS child ON tree.child_id = child.parent_id) SELECT root.id AS family_id, count(child.id) AS n_languages FROM tree JOIN languoid AS root ON tree.parent_id = root.id LEFT OUTER JOIN languoid AS child ON tree.child_id = child.id WHERE child.level = ? GROUP BY root.id HAVING count(child.id) >= ? ORDER BY n_languages ('family', 'language', 100)
family_size = select_family_nlanguages.c.n_languages.label('family_size')
select_family_sizes = select([family_size, count().label('n_families')])\
.group_by(family_size)\
.order_by('family_size')
_ = read_sql(select_family_sizes, index_col='family_size')
_.plot.area(figsize=(12, 3), logx=True).xaxis.set_major_formatter(mpl.ticker.ScalarFormatter())
_.index.to_series().describe().to_frame().transpose()[['count', 'min', 'max']]
WITH RECURSIVE tree(parent_id, child_id) AS (SELECT root.id AS parent_id, child.id AS child_id FROM languoid AS root LEFT OUTER JOIN languoid AS child ON root.id = child.parent_id WHERE root.parent_id IS NULL AND root.level = ? UNION ALL SELECT tree.parent_id AS parent_id, child.id AS child_id FROM tree JOIN languoid AS child ON tree.child_id = child.parent_id) SELECT n_languages AS family_size, count(*) AS n_families FROM (SELECT root.id AS family_id, count(child.id) AS n_languages FROM tree JOIN languoid AS root ON tree.parent_id = root.id LEFT OUTER JOIN languoid AS child ON tree.child_id = child.id WHERE child.level = ? GROUP BY root.id) GROUP BY n_languages ORDER BY family_size ('family', 'language')
count | min | max | |
---|---|---|---|
family_size | 56.0 | 2.0 | 1433.0 |
levels = sa.union_all(*[select([sa.literal(l).label('level')]) for l in LEVEL])\
.cte(name='levels')
select_nchildren = select([Parent.id.label('parent_id'),
Parent.level.label('parent_level'),
levels.c.level.label('child_level'),
count(Child.id).label('n_children')])\
.select_from(sa.join(Parent, levels, sa.true())
.outerjoin(Child, sa.and_(Parent.id == Child.parent_id,
Child.level == levels.c.level)))\
.group_by(Parent.id, Parent.level, levels.c.level)
read_sql(select_nchildren)\
.pivot_table(index='parent_level', columns='child_level', values='n_children',
aggfunc=['sum', 'max', 'mean'], fill_value=0)
WITH levels AS (SELECT ? AS level UNION ALL SELECT ? AS level UNION ALL SELECT ? AS level) SELECT parent.id AS parent_id, parent.level AS parent_level, levels.level AS child_level, count(child.id) AS n_children FROM languoid AS parent JOIN levels ON 1 LEFT OUTER JOIN languoid AS child ON parent.id = child.parent_id AND child.level = levels.level GROUP BY parent.id, parent.level, levels.level ('family', 'language', 'dialect')
sum | max | mean | |||||||
---|---|---|---|---|---|---|---|---|---|
child_level | dialect | family | language | dialect | family | language | dialect | family | language |
parent_level | |||||||||
dialect | 1781 | 0 | 0 | 22 | 0 | 0 | 0.145542 | 0.000000 | 0.00000 |
family | 0 | 4234 | 8328 | 0 | 45 | 393 | 0.000000 | 0.945934 | 1.86059 |
language | 10456 | 0 | 0 | 32 | 0 | 0 | 1.227951 | 0.000000 | 0.00000 |
Parent, Child, parent_child = treedb.Languoid.parent_descendant()
select_ndescendants = select([Parent.id.label('parent_id'),
Parent.level.label('parent_level'),
count(Child.id).label('n_descendants')])\
.select_from(parent_child)\
.group_by(Parent.id, Parent.level)\
.alias()
n_descendants = select_ndescendants.c.n_descendants
d_min = sa.func.min(n_descendants).label('min')
d_max = sa.func.max(n_descendants).label('max')
d_mean = (sa.func.sum(n_descendants).cast(sa.Float) / count(n_descendants)).label('mean')
select_level_mean_descendants = select([select_ndescendants.c.parent_level,
d_min, d_max, d_mean])\
.group_by(select_nchildren.c.parent_level)\
.order_by(select_nchildren.c.parent_level)
select_total_mean_descendants = select([sa.literal('total').label('parent_level'),
d_min, d_max, d_mean])
select_mean_descendants = [select_level_mean_descendants, select_total_mean_descendants]
_ = pd.concat([read_sql(q, index_col='parent_level') for q in select_mean_descendants])
_.columns = pd.MultiIndex.from_product([['n_descendants'], _.columns])
_
WITH RECURSIVE tree(parent_id, child_id) AS (SELECT parent.id AS parent_id, child.id AS child_id FROM languoid AS parent LEFT OUTER JOIN languoid AS child ON parent.id = child.parent_id UNION ALL SELECT tree.parent_id AS parent_id, child.id AS child_id FROM tree JOIN languoid AS child ON tree.child_id = child.parent_id) SELECT anon_1.parent_level, min(anon_1.n_descendants) AS min, max(anon_1.n_descendants) AS max, CAST(sum(anon_1.n_descendants) AS FLOAT) / count(anon_1.n_descendants) AS mean FROM (SELECT parent.id AS parent_id, parent.level AS parent_level, count(child.id) AS n_descendants FROM tree JOIN languoid AS parent ON tree.parent_id = parent.id LEFT OUTER JOIN languoid AS child ON tree.child_id = child.id GROUP BY parent.id, parent.level) AS anon_1 GROUP BY parent_level ORDER BY parent_level () WITH RECURSIVE tree(parent_id, child_id) AS (SELECT parent.id AS parent_id, child.id AS child_id FROM languoid AS parent LEFT OUTER JOIN languoid AS child ON parent.id = child.parent_id UNION ALL SELECT tree.parent_id AS parent_id, child.id AS child_id FROM tree JOIN languoid AS child ON tree.child_id = child.parent_id) SELECT ? AS parent_level, min(anon_1.n_descendants) AS min, max(anon_1.n_descendants) AS max, CAST(sum(anon_1.n_descendants) AS FLOAT) / count(anon_1.n_descendants) AS mean FROM (SELECT parent.id AS parent_id, parent.level AS parent_level, count(child.id) AS n_descendants FROM tree JOIN languoid AS parent ON tree.parent_id = parent.id LEFT OUTER JOIN languoid AS child ON tree.child_id = child.id GROUP BY parent.id, parent.level) AS anon_1 ('total',)
n_descendants | |||
---|---|---|---|
min | max | mean | |
parent_level | |||
dialect | 0 | 57 | 0.204462 |
family | 1 | 4750 | 33.029267 |
language | 0 | 125 | 1.437111 |
total | 0 | 4750 | 6.444348 |
from treedb.models import Macroarea, languoid_macroarea
print_sql(Macroarea)
print_sql(languoid_macroarea)
SELECT sqlite_master.sql FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name = ? ('table', 'macroarea')
CREATE TABLE macroarea ( name VARCHAR(13) NOT NULL, PRIMARY KEY (name), CHECK (name IN ('Africa', 'Australia', 'Eurasia', 'North America', 'Papunesia', 'South America')) ) WITHOUT ROWID
SELECT count(*) AS n_rows FROM macroarea ()
6
SELECT sqlite_master.sql FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name = ? ('table', 'languoid_macroarea')
CREATE TABLE languoid_macroarea ( languoid_id VARCHAR(8) NOT NULL, macroarea_name VARCHAR(13) NOT NULL, PRIMARY KEY (languoid_id, macroarea_name), FOREIGN KEY(languoid_id) REFERENCES languoid (id), FOREIGN KEY(macroarea_name) REFERENCES macroarea (name) ) WITHOUT ROWID
SELECT count(*) AS n_rows FROM languoid_macroarea ()
20700
select_macroarea_nlanguages = select([Macroarea.name.label('macroarea'),
count().label('n_languages')])\
.select_from(sa.outerjoin(Macroarea,
sa.join(languoid_macroarea, Languoid)))\
.where(Languoid.level == LANGUAGE)\
.group_by(Macroarea.name)\
.order_by(sa.desc('n_languages'))
_ = read_sql(select_macroarea_nlanguages, index_col='macroarea')
_.plot.pie(y='n_languages')
_
SELECT macroarea.name AS macroarea, count(*) AS n_languages FROM macroarea LEFT OUTER JOIN (languoid_macroarea JOIN languoid ON languoid.id = languoid_macroarea.languoid_id) ON macroarea.name = languoid_macroarea.macroarea_name WHERE languoid.level = ? GROUP BY macroarea.name ORDER BY n_languages DESC ('language',)
n_languages | |
---|---|
macroarea | |
Africa | 2347 |
Papunesia | 2210 |
Eurasia | 1970 |
North America | 787 |
South America | 712 |
Australia | 386 |
macroareas = sa.func.group_concat(Macroarea.name, ', ').label('macroareas')
select_multiarea_languages = select([Languoid.id, Languoid.name, macroareas])\
.select_from(sa.join(Languoid, languoid_macroarea).join(Macroarea))\
.where(Languoid.level == LANGUAGE)\
.group_by(Languoid.id)\
.having(count() > 1)\
.order_by('id')
assert read_sql(select_multiarea_languages).empty
SELECT languoid.id, languoid.name, group_concat(macroarea.name, ?) AS macroareas FROM languoid JOIN languoid_macroarea ON languoid.id = languoid_macroarea.languoid_id JOIN macroarea ON macroarea.name = languoid_macroarea.macroarea_name WHERE languoid.level = ? GROUP BY languoid.id HAVING count(*) > ? ORDER BY languoid.id (', ', 'language', 1)
from treedb.models import Country, languoid_country
print_sql(Country)
print_sql(languoid_country)
SELECT sqlite_master.sql FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name = ? ('table', 'country')
CREATE TABLE country ( id VARCHAR(2) NOT NULL CHECK (length(id) = 2), name TEXT NOT NULL CHECK (name != ''), PRIMARY KEY (id), UNIQUE (name) ) WITHOUT ROWID
SELECT count(*) AS n_rows FROM country ()
226
SELECT sqlite_master.sql FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name = ? ('table', 'languoid_country')
CREATE TABLE languoid_country ( languoid_id VARCHAR(8) NOT NULL, country_id VARCHAR(2) NOT NULL, PRIMARY KEY (languoid_id, country_id), FOREIGN KEY(languoid_id) REFERENCES languoid (id), FOREIGN KEY(country_id) REFERENCES country (id) ) WITHOUT ROWID
SELECT count(*) AS n_rows FROM languoid_country ()
10988
select_country_nlanguages = select([Country.name.label('country'),
count().label('n_languages')])\
.select_from(sa.outerjoin(Country,
sa.join(languoid_country, Languoid)))\
.where(Languoid.level == LANGUAGE)\
.group_by(Country.id)\
.order_by(sa.desc('n_languages'))\
.limit(10)
read_sql(select_country_nlanguages, index_col='country')
SELECT country.name AS country, count(*) AS n_languages FROM country LEFT OUTER JOIN (languoid_country JOIN languoid ON languoid.id = languoid_country.languoid_id) ON country.id = languoid_country.country_id WHERE languoid.level = ? GROUP BY country.id ORDER BY n_languages DESC LIMIT ? OFFSET ? ('language', 10, 0)
n_languages | |
---|---|
country | |
Papua New Guinea | 897 |
Indonesia | 744 |
Nigeria | 572 |
India | 498 |
China | 398 |
Australia | 393 |
Mexico | 328 |
Brazil | 324 |
United States | 321 |
Cameroon | 313 |
select_lc = select([Languoid.id, Languoid.name, Country.id.label('country')])\
.select_from(sa.join(Languoid, languoid_country).join(Country))\
.where(Languoid.level == LANGUAGE)\
.order_by(Languoid.id, 'country')
countries = sa.func.group_concat(select_lc.c.country, ', ').label('countries')
select_multicountry_languages = select([select_lc.c.id,
select_lc.c.name,
count().label('n_countries'),
countries])\
.group_by(select_lc.c.id, select_lc.c.name)\
.having(count() > 1)\
.order_by(sa.desc('n_countries'), select_lc.c.id)\
.limit(10)
read_sql(select_multicountry_languages, index_col='id')
SELECT id, name, count(*) AS n_countries, group_concat(country, ?) AS countries FROM (SELECT languoid.id AS id, languoid.name AS name, country.id AS country FROM languoid JOIN languoid_country ON languoid.id = languoid_country.languoid_id JOIN country ON country.id = languoid_country.country_id WHERE languoid.level = ? ORDER BY languoid.id, country) GROUP BY id, name HAVING count(*) > ? ORDER BY n_countries DESC, id LIMIT ? OFFSET ? (', ', 'language', 1, 10, 0)
name | n_countries | countries | |
---|---|---|---|
id | |||
stan1288 | Spanish | 30 | AD, AR, BO, BR, BZ, CL, CO, CR, CU, DO, EC, ES... |
stan1293 | English | 26 | AU, BM, BR, BZ, CA, CK, CU, DO, FK, GB, GI, GP... |
stan1318 | Standard Arabic | 25 | AE, BH, DJ, DZ, EG, ER, IL, IQ, JO, KM, KW, LB... |
amer1248 | American Sign Language | 22 | BB, BF, BJ, BO, CA, CD, CF, CI, CN, GA, GH, JM... |
east2295 | Eastern Yiddish | 22 | AR, AU, BE, BY, CA, CR, EE, HU, IL, LT, LV, MD... |
russ1263 | Russian | 19 | BY, CN, EE, FI, GE, KG, KP, KZ, LT, LV, MD, MN... |
stan1295 | German | 16 | AT, BE, CH, CZ, DE, DK, FR, HU, IT, LI, LU, NL... |
port1283 | Portuguese | 15 | AD, AR, BO, BR, CO, ES, FR, GF, GY, PE, PT, PY... |
sint1235 | Sinte Romani | 13 | AT, CH, CZ, DE, FR, HR, IT, KZ, ME, NL, PL, RS... |
nucl1301 | Turkish | 12 | AL, AM, AZ, BG, CY, GE, GR, IQ, IR, MK, SY, TR |
select_lc = select([Languoid.id, count().label('n_countries')])\
.select_from(sa.outerjoin(Languoid, languoid_country))\
.where(Languoid.level == LANGUAGE)\
.group_by(Languoid.id)
select_lc_dist = select([select_lc.c.n_countries, count().label('n_languages')])\
.group_by(select_lc.c.n_countries)\
.order_by('n_countries')
_ = read_sql(select_lc_dist, index_col='n_countries')
_.plot.bar(figsize=(12, 3))
read_sql(select([sa.literal('n_countries').label('value'),
count().label('count'),
sa.func.sum(select_lc.c.n_countries).label('sum'),
sa.func.min(select_lc.c.n_countries).label('min'),
sa.func.max(select_lc.c.n_countries).label('max'),
sa.func.avg(select_lc.c.n_countries).label('mean')]),
index_col='value').rename_axis(None)
SELECT n_countries, count(*) AS n_languages FROM (SELECT languoid.id AS id, count(*) AS n_countries FROM languoid LEFT OUTER JOIN languoid_country ON languoid.id = languoid_country.languoid_id WHERE languoid.level = ? GROUP BY languoid.id) GROUP BY n_countries ORDER BY n_countries ('language',) SELECT ? AS value, count(*) AS count, sum(n_countries) AS sum, min(n_countries) AS min, max(n_countries) AS max, avg(n_countries) AS mean FROM (SELECT languoid.id AS id, count(*) AS n_countries FROM languoid LEFT OUTER JOIN languoid_country ON languoid.id = languoid_country.languoid_id WHERE languoid.level = ? GROUP BY languoid.id) ('n_countries', 'language')
count | sum | min | max | mean | |
---|---|---|---|---|---|
n_countries | 8515 | 10937 | 1 | 30 | 1.284439 |
from treedb.models import Altname, AltnameProvider
select_provider_nlanguoids = select([
AltnameProvider.name.label('altname_provider'),
count(sa.distinct(Altname.languoid_id)).label('n_languoids')
]).select_from(sa.join(AltnameProvider, Altname))\
.group_by(AltnameProvider.name)\
.order_by('n_languoids', 'altname_provider')
read_sql(select_provider_nlanguoids, index_col='altname_provider').plot.barh();
SELECT altnameprovider.name AS altname_provider, count(DISTINCT altname.languoid_id) AS n_languoids FROM altnameprovider JOIN altname ON altnameprovider.id = altname.provider_id GROUP BY altnameprovider.name ORDER BY n_languoids, altname_provider ()
select_la = select([Languoid.id, count().label('n_altnames')])\
.select_from(sa.outerjoin(Languoid, Altname))\
.group_by(Languoid.id)
select_la_dist = select([select_la.c.n_altnames, count().label('n_languoids')])\
.group_by(select_la.c.n_altnames)\
.order_by(select_la.c.n_altnames)
_ = read_sql(select_la_dist, index_col='n_altnames')
_.plot.area(figsize=(12, 3))
read_sql(select([sa.literal('n_altnames').label('value'),
count().label('count'),
sa.func.sum(select_la.c.n_altnames).label('sum'),
sa.func.min(select_la.c.n_altnames).label('min'),
sa.func.max(select_la.c.n_altnames).label('max'),
sa.func.avg(select_la.c.n_altnames).label('mean')]),
index_col='value').rename_axis(None)
SELECT n_altnames, count(*) AS n_languoids FROM (SELECT languoid.id AS id, count(*) AS n_altnames FROM languoid LEFT OUTER JOIN altname ON languoid.id = altname.languoid_id GROUP BY languoid.id) GROUP BY n_altnames ORDER BY n_altnames () SELECT ? AS value, count(*) AS count, sum(n_altnames) AS sum, min(n_altnames) AS min, max(n_altnames) AS max, avg(n_altnames) AS mean FROM (SELECT languoid.id AS id, count(*) AS n_altnames FROM languoid LEFT OUTER JOIN altname ON languoid.id = altname.languoid_id GROUP BY languoid.id) ('n_altnames',)
count | sum | min | max | mean | |
---|---|---|---|---|---|
n_altnames | 25228 | 158497 | 1 | 246 | 6.282583 |
from treedb.models import Source, SourceProvider
select_provider_nsources = select([SourceProvider.name.label('provider'),
Languoid.level,
count().label('n_sources')])\
.select_from(sa.join(SourceProvider, Source).join(Languoid))\
.group_by(SourceProvider.name, Languoid.level)\
.order_by('provider', sa.desc('n_sources'))
read_sql(select_provider_nsources, index_col=['provider', 'level'])
SELECT sourceprovider.name AS provider, languoid.level, count(*) AS n_sources FROM sourceprovider JOIN source ON sourceprovider.id = source.provider_id JOIN languoid ON languoid.id = source.languoid_id GROUP BY sourceprovider.name, languoid.level ORDER BY provider, n_sources DESC ()
n_sources | ||
---|---|---|
provider | level | |
glottolog | language | 184938 |
family | 2387 | |
dialect | 1613 |
select_lang_nsources = select([Languoid.id,
count(Source.languoid_id).label('n_sources')])\
.select_from(sa.outerjoin(Languoid, Source))\
.group_by(Languoid.id)
select_nsources_nlangs = select([select_lang_nsources.c.n_sources,
count().label('n_languoids')])\
.group_by(select_lang_nsources.c.n_sources)\
.order_by('n_languoids')
_ = read_sql(select_nsources_nlangs, index_col='n_sources')
_.groupby(_.index != 0).sum().rename_axis('n_sources')\
.rename(index={False: '= 0', True: '> 0'}).plot.bar(figsize=(4, 3))
_.drop(0).plot.area(figsize=(8, 3), logx=True)\
.xaxis.set_major_formatter(mpl.ticker.ScalarFormatter())
read_sql(select([sa.literal('n_sources').label('value'),
count().label('count'),
sa.func.sum(select_lang_nsources.c.n_sources).label('sum'),
sa.func.min(select_lang_nsources.c.n_sources).label('min'),
sa.func.max(select_lang_nsources.c.n_sources).label('max'),
sa.func.avg(select_lang_nsources.c.n_sources).label('mean')]),
index_col='value').rename_axis(None)
SELECT n_sources, count(*) AS n_languoids FROM (SELECT languoid.id AS id, count(source.languoid_id) AS n_sources FROM languoid LEFT OUTER JOIN source ON languoid.id = source.languoid_id GROUP BY languoid.id) GROUP BY n_sources ORDER BY n_languoids () SELECT ? AS value, count(*) AS count, sum(n_sources) AS sum, min(n_sources) AS min, max(n_sources) AS max, avg(n_sources) AS mean FROM (SELECT languoid.id AS id, count(source.languoid_id) AS n_sources FROM languoid LEFT OUTER JOIN source ON languoid.id = source.languoid_id GROUP BY languoid.id) ('n_sources',)
count | sum | min | max | mean | |
---|---|---|---|---|---|
n_sources | 25228 | 188938 | 0 | 1548 | 7.489218 |
select_ll_nsources = select([Languoid.id, Languoid.level,
count(Source.languoid_id).label('n_sources')])\
.select_from(sa.outerjoin(Languoid, Source))\
.group_by(Languoid.id, Languoid.level)
select_ll_nlangs = select([select_ll_nsources.c.level,
select_ll_nsources.c.n_sources,
sa.func.count().label('n_languoids')])\
.group_by(select_ll_nsources.c.level, select_ll_nsources.c.n_sources)\
.order_by('n_languoids')
_ = read_sql(select_ll_nlangs).pivot(index='n_sources', columns='level', values='n_languoids')
_.groupby(_.index != 0).sum()[list(treedb.LEVEL)].rename_axis('n_sources')\
.rename(index={False: '= 0', True: '> 0'}).plot.bar(figsize=(4, 3))
_.drop(0)[list(treedb.LEVEL)].plot.area(figsize=(8, 3), logx=True)\
.xaxis.set_major_formatter(mpl.ticker.ScalarFormatter())
SELECT level, n_sources, count(*) AS n_languoids FROM (SELECT languoid.id AS id, languoid.level AS level, count(source.languoid_id) AS n_sources FROM languoid LEFT OUTER JOIN source ON languoid.id = source.languoid_id GROUP BY languoid.id, languoid.level) GROUP BY level, n_sources ORDER BY n_languoids ()
from treedb.models import Bibfile, Bibitem
select_bibfile_nused = select([Bibfile.name.label('bibfile'), count().label('n_used')])\
.select_from(sa.join(Bibfile, Bibitem))\
.group_by(Bibfile.name)\
.order_by(sa.desc('n_used'))
read_sql(select_bibfile_nused, index_col='bibfile').plot.bar(figsize=(12, 3));
SELECT bibfile.name AS bibfile, count(*) AS n_used FROM bibfile JOIN bibitem ON bibfile.id = bibitem.bibfile_id GROUP BY bibfile.name ORDER BY n_used DESC ()
from treedb.models import ClassificationRef
select_levels = select([ClassificationRef.kind, Languoid.level,
count().label('n_classificationrefs')])\
.select_from(sa.join(ClassificationRef, Languoid))\
.group_by(ClassificationRef.kind, Languoid.level)\
.order_by('kind', 'level')
read_sql(select_levels, index_col=['kind', 'level']).unstack()
SELECT classificationref.kind, languoid.level, count(*) AS n_classificationrefs FROM classificationref JOIN languoid ON languoid.id = classificationref.languoid_id GROUP BY classificationref.kind, languoid.level ORDER BY classificationref.kind, languoid.level ()
n_classificationrefs | |||
---|---|---|---|
level | dialect | family | language |
kind | |||
family | 8 | 435 | 256 |
sub | 64 | 5678 | 9602 |
select_lcr = select([Languoid.id, count().label('n_crefs')])\
.select_from(sa.outerjoin(Languoid, ClassificationRef))\
.group_by(Languoid.id)
select_lcr_dist = select([select_lcr.c.n_crefs, count().label('n_languoids')])\
.group_by(select_lcr.c.n_crefs)\
.order_by(select_lcr.c.n_crefs)
_ = read_sql(select_lcr_dist, index_col='n_crefs')
_.plot.area(figsize=(12, 3))
read_sql(select([sa.literal('n_crefs').label('value'),
count().label('count'),
sa.func.sum(select_lcr.c.n_crefs).label('sum'),
sa.func.min(select_lcr.c.n_crefs).label('min'),
sa.func.max(select_lcr.c.n_crefs).label('max'),
sa.func.avg(select_lcr.c.n_crefs).label('mean')]),
index_col='value').rename_axis(None)
SELECT n_crefs, count(*) AS n_languoids FROM (SELECT languoid.id AS id, count(*) AS n_crefs FROM languoid LEFT OUTER JOIN classificationref ON languoid.id = classificationref.languoid_id GROUP BY languoid.id) GROUP BY n_crefs ORDER BY n_crefs () SELECT ? AS value, count(*) AS count, sum(n_crefs) AS sum, min(n_crefs) AS min, max(n_crefs) AS max, avg(n_crefs) AS mean FROM (SELECT languoid.id AS id, count(*) AS n_crefs FROM languoid LEFT OUTER JOIN classificationref ON languoid.id = classificationref.languoid_id GROUP BY languoid.id) ('n_crefs',)
count | sum | min | max | mean | |
---|---|---|---|---|---|
n_crefs | 25228 | 29417 | 1 | 11 | 1.166046 |
from treedb.models import Link
print_sql(Link)
SELECT sqlite_master.sql FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name = ? ('table', 'link')
CREATE TABLE link ( languoid_id VARCHAR(8) NOT NULL, ord INTEGER NOT NULL CHECK (ord >= 1), url TEXT NOT NULL CHECK (url != ''), title TEXT CHECK (title != ''), scheme TEXT, PRIMARY KEY (languoid_id, ord), UNIQUE (languoid_id, url), CHECK (substr(url, 1, length(scheme) + 3) = scheme || '://'), FOREIGN KEY(languoid_id) REFERENCES languoid (id), CHECK (scheme IN ('http', 'https')) ) WITHOUT ROWID
SELECT count(*) AS n_rows FROM link ()
28949
select_scheme_nlinks = select([Link.scheme.label('link_scheme'), count().label('n_links')])\
.group_by(Link.scheme)\
.order_by(sa.desc('n_links'))
read_sql(select_scheme_nlinks, index_col='link_scheme').plot.pie(y='n_links');
SELECT link.scheme AS link_scheme, count(*) AS n_links FROM link GROUP BY link.scheme ORDER BY n_links DESC ()
from urllib.parse import urlparse
hosts = collections.Counter(urlparse(u).hostname for u, in select([Link.url]).execute())
pd.DataFrame.from_dict(hosts, orient='index', columns=['n_links'])\
.sort_values(by='n_links').plot.barh();
SELECT link.url FROM link ()
from treedb.models import Endangerment, EndangermentSource
print_sql(Endangerment)
print_sql(EndangermentSource)
SELECT sqlite_master.sql FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name = ? ('table', 'endangerment')
CREATE TABLE endangerment ( languoid_id VARCHAR(8) NOT NULL, status VARCHAR(14) NOT NULL, source_id INTEGER NOT NULL, date DATETIME NOT NULL, comment TEXT NOT NULL CHECK (comment != ''), PRIMARY KEY (languoid_id), FOREIGN KEY(languoid_id) REFERENCES languoid (id), CHECK (status IN ('not endangered', 'threatened', 'shifting', 'moribund', 'nearly extinct', 'extinct')), FOREIGN KEY(source_id) REFERENCES endangerment_source (id) ) WITHOUT ROWID
SELECT count(*) AS n_rows FROM endangerment ()
8165
SELECT sqlite_master.sql FROM sqlite_master WHERE sqlite_master.type = ? AND sqlite_master.name = ? ('table', 'endangerment_source')
CREATE TABLE endangerment_source ( id INTEGER NOT NULL, name TEXT NOT NULL CHECK (name != ''), bibitem_id INTEGER, pages TEXT CHECK (pages != ''), PRIMARY KEY (id), UNIQUE (bibitem_id, pages), CHECK ((bibitem_id IS NULL) = (pages IS NULL)), UNIQUE (name), FOREIGN KEY(bibitem_id) REFERENCES bibitem (id) )
SELECT count(*) AS n_rows FROM endangerment_source ()
11
e_source = EndangermentSource.name.label('source')
select_source_nendangerments = select([e_source, count().label('n_endangerments')])\
.select_from(sa.join(Endangerment, EndangermentSource))\
.group_by(e_source)\
.order_by('n_endangerments')
read_sql(select_source_nendangerments, index_col='source').plot.barh();
SELECT endangerment_source.name AS source, count(*) AS n_endangerments FROM endangerment JOIN endangerment_source ON endangerment_source.id = endangerment.source_id GROUP BY endangerment_source.name ORDER BY n_endangerments ()
%time treedb.hash_csv(treedb.get_query())
WITH RECURSIVE tree(child_id, parent_id, steps, terminal) AS (SELECT child.id AS child_id, child.id AS parent_id, ? AS steps, child.parent_id IS NULL AS terminal FROM languoid AS child UNION ALL SELECT tree.child_id AS child_id, parent.parent_id AS parent_id, tree.steps + ? AS steps, grandparent.parent_id IS NULL AS terminal FROM tree JOIN languoid AS parent ON tree.parent_id = parent.id AND parent.parent_id IS NOT NULL LEFT OUTER JOIN languoid AS grandparent ON parent.parent_id = grandparent.id) SELECT languoid.id, languoid.name, languoid.level, languoid.parent_id, (SELECT group_concat(path_part, ?) AS path FROM (SELECT tree.parent_id AS path_part FROM tree WHERE tree.child_id = languoid.id ORDER BY tree.steps DESC)) AS path, (SELECT tree.parent_id FROM tree WHERE tree.child_id = languoid.id AND tree.steps > ? AND tree.terminal = 1) AS family_id, (SELECT tree.parent_id FROM tree WHERE tree.child_id = languoid.id AND languoid.level = ? AND (EXISTS (SELECT * FROM languoid AS ancestor WHERE tree.parent_id = ancestor.id AND ancestor.level = ?))) AS dialect_language_id, languoid.hid, languoid.iso639_3, languoid.latitude, languoid.longitude, (SELECT group_concat(macroarea_name, ?) AS macroareas FROM (SELECT languoid_macroarea.macroarea_name AS macroarea_name FROM languoid_macroarea WHERE languoid_macroarea.languoid_id = languoid.id ORDER BY languoid_macroarea.macroarea_name)) AS macroareas, (SELECT group_concat(country_id, ?) AS countries FROM (SELECT languoid_country.country_id AS country_id FROM languoid_country WHERE languoid_country.languoid_id = languoid.id ORDER BY languoid_country.country_id)) AS countries, (SELECT group_concat(printf, ?) AS links FROM (SELECT CASE WHEN (link.title IS NOT NULL) THEN printf(?, link.title, link.url) ELSE link.url END AS printf FROM link WHERE link.languoid_id = languoid.id ORDER BY link.ord)) AS links, (SELECT group_concat(printf, ?) AS sources_glottolog FROM (SELECT CASE WHEN (source_glottolog.pages IS NOT NULL AND source_glottolog."trigger" IS NOT NULL) THEN printf(?, source_bibfile.name, source_bibitem.bibkey, source_glottolog.pages, source_glottolog."trigger") WHEN (source_glottolog.pages IS NOT NULL) THEN printf(?, source_bibfile.name, source_bibitem.bibkey, source_glottolog.pages) WHEN (source_glottolog."trigger" IS NOT NULL) THEN printf(?, source_bibfile.name, source_bibitem.bibkey, source_glottolog."trigger") ELSE printf(?, source_bibfile.name, source_bibitem.bibkey) END AS printf FROM source AS source_glottolog, bibfile AS source_bibfile, bibitem AS source_bibitem, sourceprovider AS source_provider WHERE source_glottolog.languoid_id = languoid.id AND source_glottolog.provider_id = source_provider.id AND source_provider.name = ? AND source_glottolog.bibitem_id = source_bibitem.id AND source_bibitem.bibfile_id = source_bibfile.id ORDER BY source_bibfile.name, source_bibitem.bibkey)) AS sources_glottolog, (SELECT group_concat(printf, ?) AS altnames_aiatsis FROM (SELECT CASE WHEN (altname_aiatsis.lang = ?) THEN altname_aiatsis.name ELSE printf(?, altname_aiatsis.name, altname_aiatsis.lang) END AS printf FROM altname AS altname_aiatsis, altnameprovider AS altname_aiatsis_provider WHERE altname_aiatsis.languoid_id = languoid.id AND altname_aiatsis.provider_id = altname_aiatsis_provider.id AND altname_aiatsis_provider.name = ? ORDER BY altname_aiatsis.name, altname_aiatsis.lang)) AS altnames_aiatsis, (SELECT group_concat(printf, ?) AS altnames_elcat FROM (SELECT CASE WHEN (altname_elcat.lang = ?) THEN altname_elcat.name ELSE printf(?, altname_elcat.name, altname_elcat.lang) END AS printf FROM altname AS altname_elcat, altnameprovider AS altname_elcat_provider WHERE altname_elcat.languoid_id = languoid.id AND altname_elcat.provider_id = altname_elcat_provider.id AND altname_elcat_provider.name = ? ORDER BY altname_elcat.name, altname_elcat.lang)) AS altnames_elcat, (SELECT group_concat(printf, ?) AS altnames_ethnologue FROM (SELECT CASE WHEN (altname_ethnologue.lang = ?) THEN altname_ethnologue.name ELSE printf(?, altname_ethnologue.name, altname_ethnologue.lang) END AS printf FROM altname AS altname_ethnologue, altnameprovider AS altname_ethnologue_provider WHERE altname_ethnologue.languoid_id = languoid.id AND altname_ethnologue.provider_id = altname_ethnologue_provider.id AND altname_ethnologue_provider.name = ? ORDER BY altname_ethnologue.name, altname_ethnologue.lang)) AS altnames_ethnologue, (SELECT group_concat(printf, ?) AS altnames_glottolog FROM (SELECT CASE WHEN (altname_glottolog.lang = ?) THEN altname_glottolog.name ELSE printf(?, altname_glottolog.name, altname_glottolog.lang) END AS printf FROM altname AS altname_glottolog, altnameprovider AS altname_glottolog_provider WHERE altname_glottolog.languoid_id = languoid.id AND altname_glottolog.provider_id = altname_glottolog_provider.id AND altname_glottolog_provider.name = ? ORDER BY altname_glottolog.name, altname_glottolog.lang)) AS altnames_glottolog, (SELECT group_concat(printf, ?) AS altnames_hhbib_lgcode FROM (SELECT CASE WHEN (altname_hhbib_lgcode.lang = ?) THEN altname_hhbib_lgcode.name ELSE printf(?, altname_hhbib_lgcode.name, altname_hhbib_lgcode.lang) END AS printf FROM altname AS altname_hhbib_lgcode, altnameprovider AS altname_hhbib_lgcode_provider WHERE altname_hhbib_lgcode.languoid_id = languoid.id AND altname_hhbib_lgcode.provider_id = altname_hhbib_lgcode_provider.id AND altname_hhbib_lgcode_provider.name = ? ORDER BY altname_hhbib_lgcode.name, altname_hhbib_lgcode.lang)) AS altnames_hhbib_lgcode, (SELECT group_concat(printf, ?) AS altnames_lexvo FROM (SELECT CASE WHEN (altname_lexvo.lang = ?) THEN altname_lexvo.name ELSE printf(?, altname_lexvo.name, altname_lexvo.lang) END AS printf FROM altname AS altname_lexvo, altnameprovider AS altname_lexvo_provider WHERE altname_lexvo.languoid_id = languoid.id AND altname_lexvo.provider_id = altname_lexvo_provider.id AND altname_lexvo_provider.name = ? ORDER BY altname_lexvo.name, altname_lexvo.lang)) AS altnames_lexvo, (SELECT group_concat(printf, ?) AS "altnames_moseley & asher (1994)" FROM (SELECT CASE WHEN ("altname_moseley & asher (1994)".lang = ?) THEN "altname_moseley & asher (1994)".name ELSE printf(?, "altname_moseley & asher (1994)".name, "altname_moseley & asher (1994)".lang) END AS printf FROM altname AS "altname_moseley & asher (1994)", altnameprovider AS "altname_moseley & asher (1994)_provider" WHERE "altname_moseley & asher (1994)".languoid_id = languoid.id AND "altname_moseley & asher (1994)".provider_id = "altname_moseley & asher (1994)_provider".id AND "altname_moseley & asher (1994)_provider".name = ? ORDER BY "altname_moseley & asher (1994)".name, "altname_moseley & asher (1994)".lang)) AS "altnames_moseley & asher (1994)", (SELECT group_concat(printf, ?) AS altnames_multitree FROM (SELECT CASE WHEN (altname_multitree.lang = ?) THEN altname_multitree.name ELSE printf(?, altname_multitree.name, altname_multitree.lang) END AS printf FROM altname AS altname_multitree, altnameprovider AS altname_multitree_provider WHERE altname_multitree.languoid_id = languoid.id AND altname_multitree.provider_id = altname_multitree_provider.id AND altname_multitree_provider.name = ? ORDER BY altname_multitree.name, altname_multitree.lang)) AS altnames_multitree, (SELECT group_concat(printf, ?) AS "altnames_ruhlen (1987)" FROM (SELECT CASE WHEN ("altname_ruhlen (1987)".lang = ?) THEN "altname_ruhlen (1987)".name ELSE printf(?, "altname_ruhlen (1987)".name, "altname_ruhlen (1987)".lang) END AS printf FROM altname AS "altname_ruhlen (1987)", altnameprovider AS "altname_ruhlen (1987)_provider" WHERE "altname_ruhlen (1987)".languoid_id = languoid.id AND "altname_ruhlen (1987)".provider_id = "altname_ruhlen (1987)_provider".id AND "altname_ruhlen (1987)_provider".name = ? ORDER BY "altname_ruhlen (1987)".name, "altname_ruhlen (1987)".lang)) AS "altnames_ruhlen (1987)", (SELECT group_concat(printf, ?) AS altnames_wals FROM (SELECT CASE WHEN (altname_wals.lang = ?) THEN altname_wals.name ELSE printf(?, altname_wals.name, altname_wals.lang) END AS printf FROM altname AS altname_wals, altnameprovider AS altname_wals_provider WHERE altname_wals.languoid_id = languoid.id AND altname_wals.provider_id = altname_wals_provider.id AND altname_wals_provider.name = ? ORDER BY altname_wals.name, altname_wals.lang)) AS altnames_wals, (SELECT group_concat(printf, ?) AS "altnames_wals other" FROM (SELECT CASE WHEN ("altname_wals other".lang = ?) THEN "altname_wals other".name ELSE printf(?, "altname_wals other".name, "altname_wals other".lang) END AS printf FROM altname AS "altname_wals other", altnameprovider AS "altname_wals other_provider" WHERE "altname_wals other".languoid_id = languoid.id AND "altname_wals other".provider_id = "altname_wals other_provider".id AND "altname_wals other_provider".name = ? ORDER BY "altname_wals other".name, "altname_wals other".lang)) AS "altnames_wals other", (SELECT group_concat("trigger", ?) AS triggers_lgcode FROM (SELECT trigger_lgcode."trigger" AS "trigger" FROM "trigger" AS trigger_lgcode WHERE trigger_lgcode.field = ? AND trigger_lgcode.languoid_id = languoid.id ORDER BY trigger_lgcode.ord)) AS triggers_lgcode, (SELECT group_concat("trigger", ?) AS triggers_inlg FROM (SELECT trigger_inlg."trigger" AS "trigger" FROM "trigger" AS trigger_inlg WHERE trigger_inlg.field = ? AND trigger_inlg.languoid_id = languoid.id ORDER BY trigger_inlg.ord)) AS triggers_inlg, ident_endangeredlanguages.identifier AS identifier_endangeredlanguages, ident_languagelandscape.identifier AS identifier_languagelandscape, ident_multitree.identifier AS identifier_multitree, ident_wals.identifier AS identifier_wals, cc_sub.comment AS classification_sub, (SELECT group_concat(printf, ?) AS classification_subrefs FROM (SELECT printf(CASE WHEN (cr_sub.pages IS NOT NULL) THEN ? ELSE ? END, bibfile_cr_sub.name, bibitem_cr_sub.bibkey, cr_sub.pages) AS printf FROM classificationref AS cr_sub, bibfile AS bibfile_cr_sub, bibitem AS bibitem_cr_sub WHERE cr_sub.kind = ? AND cr_sub.languoid_id = languoid.id AND cr_sub.bibitem_id = bibitem_cr_sub.id AND bibitem_cr_sub.bibfile_id = bibfile_cr_sub.id ORDER BY cr_sub.ord)) AS classification_subrefs, cc_fam.comment AS classification_family, (SELECT group_concat(printf, ?) AS classification_familyrefs FROM (SELECT printf(CASE WHEN (cr_family.pages IS NOT NULL) THEN ? ELSE ? END, bibfile_cr_family.name, bibitem_cr_family.bibkey, cr_family.pages) AS printf FROM classificationref AS cr_family, bibfile AS bibfile_cr_family, bibitem AS bibitem_cr_family WHERE cr_family.kind = ? AND cr_family.languoid_id = languoid.id AND cr_family.bibitem_id = bibitem_cr_family.id AND bibitem_cr_family.bibfile_id = bibfile_cr_family.id ORDER BY cr_family.ord)) AS classification_familyrefs, endangerment.status AS endangerment_status, endangerment.date AS endangerment_date, endangerment.comment AS endangerment_comment, CASE WHEN (endangerment_source.bibitem_id IS NULL) THEN endangerment_source.name ELSE printf(?, bibfile_e.name, bibitem_e.bibkey, endangerment_source.pages) END AS endangerment_source, ethnologuecomment.isohid AS elcomment_isohid, ethnologuecomment.comment_type AS elcomment_comment_type, ethnologuecomment.ethnologue_versions AS elcomment_ethnologue_versions, ethnologuecomment.comment AS elcomment_comment, isoretirement.code AS iso_retirement_code, isoretirement.name AS iso_retirement_name, isoretirement.change_request AS iso_retirement_change_request, isoretirement.effective AS iso_retirement_effective, isoretirement.reason AS iso_retirement_reason, isoretirement.remedy AS iso_retirement_remedy, isoretirement.comment AS iso_retirement_comment, (SELECT group_concat(code, ?) AS iso_retirement_change_to FROM (SELECT isoretirement_changeto.code AS code FROM isoretirement_changeto WHERE isoretirement_changeto.languoid_id = languoid.id ORDER BY isoretirement_changeto.ord)) AS iso_retirement_change_to FROM languoid LEFT OUTER JOIN (identifier AS ident_endangeredlanguages JOIN identifiersite AS ident_endangeredlanguages_site ON ident_endangeredlanguages.site_id = ident_endangeredlanguages_site.id) ON ident_endangeredlanguages_site.name = ? AND ident_endangeredlanguages.languoid_id = languoid.id LEFT OUTER JOIN (identifier AS ident_languagelandscape JOIN identifiersite AS ident_languagelandscape_site ON ident_languagelandscape.site_id = ident_languagelandscape_site.id) ON ident_languagelandscape_site.name = ? AND ident_languagelandscape.languoid_id = languoid.id LEFT OUTER JOIN (identifier AS ident_multitree JOIN identifiersite AS ident_multitree_site ON ident_multitree.site_id = ident_multitree_site.id) ON ident_multitree_site.name = ? AND ident_multitree.languoid_id = languoid.id LEFT OUTER JOIN (identifier AS ident_wals JOIN identifiersite AS ident_wals_site ON ident_wals.site_id = ident_wals_site.id) ON ident_wals_site.name = ? AND ident_wals.languoid_id = languoid.id LEFT OUTER JOIN classificationcomment AS cc_sub ON cc_sub.kind = ? AND cc_sub.languoid_id = languoid.id LEFT OUTER JOIN classificationcomment AS cc_fam ON cc_fam.kind = ? AND cc_fam.languoid_id = languoid.id LEFT OUTER JOIN (endangerment JOIN endangerment_source ON endangerment_source.id = endangerment.source_id LEFT OUTER JOIN (bibitem AS bibitem_e JOIN bibfile AS bibfile_e ON bibfile_e.id = bibitem_e.bibfile_id) ON bibitem_e.id = endangerment_source.bibitem_id) ON languoid.id = endangerment.languoid_id LEFT OUTER JOIN ethnologuecomment ON languoid.id = ethnologuecomment.languoid_id LEFT OUTER JOIN isoretirement ON languoid.id = isoretirement.languoid_id ORDER BY languoid.id (0, 1, '/', 0, 'dialect', 'language', ', ', ', ', ', ', '[%s](%s)', ', ', '**%s:%s**:%s<trigger "%s">', '**%s:%s**:%s', '**%s:%s**<trigger "%s">', '**%s:%s**', 'glottolog', ', ', '', '%s [%s]', 'aiatsis', ', ', '', '%s [%s]', 'elcat', ', ', '', '%s [%s]', 'ethnologue', ', ', '', '%s [%s]', 'glottolog', ', ', '', '%s [%s]', 'hhbib_lgcode', ', ', '', '%s [%s]', 'lexvo', ', ', '', '%s [%s]', 'moseley & asher (1994)', ', ', '', '%s [%s]', 'multitree', ', ', '', '%s [%s]', 'ruhlen (1987)', ', ', '', '%s [%s]', 'wals', ', ', '', '%s [%s]', 'wals other', ', ', 'lgcode', ', ', 'inlg', ', ', '**%s:%s**:%s', '**%s:%s**', 'sub', ', ', '**%s:%s**:%s', '**%s:%s**', 'family', '**%s:%s**:%s', ', ', 'endangeredlanguages', 'languagelandscape', 'multitree', 'wals', 'sub', 'family') [INFO@treedb.queries] hash rows with 'sha256', csv header: ['id', 'name', 'level', 'parent_id', 'path', 'family_id', 'dialect_language_id', 'hid', 'iso639_3', 'latitude', 'longitude', 'macroareas', 'countries', 'links', 'sources_glottolog', 'altnames_aiatsis', 'altnames_elcat', 'altnames_ethnologue', 'altnames_glottolog', 'altnames_hhbib_lgcode', 'altnames_lexvo', 'altnames_moseley & asher (1994)', 'altnames_multitree', 'altnames_ruhlen (1987)', 'altnames_wals', 'altnames_wals other', 'triggers_lgcode', 'triggers_inlg', 'identifier_endangeredlanguages', 'identifier_languagelandscape', 'identifier_multitree', 'identifier_wals', 'classification_sub', 'classification_subrefs', 'classification_family', 'classification_familyrefs', 'endangerment_status', 'endangerment_date', 'endangerment_comment', 'endangerment_source', 'elcomment_isohid', 'elcomment_comment_type', 'elcomment_ethnologue_versions', 'elcomment_comment', 'iso_retirement_code', 'iso_retirement_name', 'iso_retirement_change_request', 'iso_retirement_effective', 'iso_retirement_reason', 'iso_retirement_remedy', 'iso_retirement_comment', 'iso_retirement_change_to']
Wall time: 10.9 s
'25222b4feb2d89b4edaeecc546280a059ae6ba69da961d56ee4a387ba3b81fc0'