import logging
import pathlib
from IPython.display import display
import pandas as pd
import rdflib
ENGINE = 'postgresql://postgres@/glottolog3'
QUERY = '''
SELECT
l.id AS glottocode,
l.name,
ll.level,
ll.category,
substring(wikidata_link->>'url' FROM '/([^/]+)$') AS qid,
substring(wikipedia_link->>'url' FROM '/([^/]+)$') AS title,
i.name AS iso639_3
FROM language AS l
JOIN languoid AS ll USING (pk)
CROSS JOIN jsonb_path_query(l.jsondata::jsonb,
'$.links[*] ? (@.url starts with "https://www.wikidata.org/entity/")') AS wikidata_link
LEFT JOIN jsonb_path_query(l.jsondata::jsonb,
'$.links[*] ? (@.url starts with "https://en.wikipedia.org/wiki/")') AS wikipedia_link ON TRUE
LEFT JOIN (
languageidentifier AS li
JOIN identifier AS i
ON li.identifier_pk = i.pk AND i.type = 'iso639-3'
) ON li.language_pk = l.pk
ORDER BY l.id
'''.strip()
logging.basicConfig(format='[%(levelname)s@%(name)s] %(message)s', level=logging.INFO)
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
gf = pd.read_sql_query(QUERY, ENGINE, index_col='glottocode', dtype='string')
gf.info(memory_usage='deep')
assert gf.index.is_unique
assert gf.index.is_monotonic_increasing
gf.head()
[INFO@sqlalchemy.engine.Engine] select pg_catalog.version() [INFO@sqlalchemy.engine.Engine] [raw sql] {} [INFO@sqlalchemy.engine.Engine] select current_schema() [INFO@sqlalchemy.engine.Engine] [raw sql] {} [INFO@sqlalchemy.engine.Engine] show standard_conforming_strings [INFO@sqlalchemy.engine.Engine] [raw sql] {} [INFO@sqlalchemy.engine.Engine] SELECT l.id AS glottocode, l.name, ll.level, ll.category, substring(wikidata_link->>'url' FROM '/([^/]+)$') AS qid, substring(wikipedia_link->>'url' FROM '/([^/]+)$') AS title, i.name AS iso639_3 FROM language AS l JOIN languoid AS ll USING (pk) CROSS JOIN jsonb_path_query(l.jsondata::jsonb, '$.links[*] ? (@.url starts with "https://www.wikidata.org/entity/")') AS wikidata_link LEFT JOIN jsonb_path_query(l.jsondata::jsonb, '$.links[*] ? (@.url starts with "https://en.wikipedia.org/wiki/")') AS wikipedia_link ON TRUE LEFT JOIN ( languageidentifier AS li JOIN identifier AS i ON li.identifier_pk = i.pk AND i.type = 'iso639-3' ) ON li.language_pk = l.pk ORDER BY l.id [INFO@sqlalchemy.engine.Engine] [raw sql] {}
<class 'pandas.core.frame.DataFrame'> Index: 10539 entries, aant1238 to zyph1238 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 name 10539 non-null string 1 level 10539 non-null string 2 category 10539 non-null string 3 qid 10539 non-null string 4 title 8378 non-null string 5 iso639_3 7845 non-null string dtypes: string(6) memory usage: 4.6 MB
name | level | category | qid | title | iso639_3 | |
---|---|---|---|---|---|---|
glottocode | ||||||
aant1238 | Aantantara | dialect | Dialect | Q31312216 | <NA> | <NA> |
aari1238 | Aari-Gayil | family | Family | Q85516014 | <NA> | aiz |
aari1239 | Aari | language | Spoken L1 Language | Q7495 | Aari_language | aiw |
aari1240 | Aariya | language | Bookkeeping | Q4661732 | Aariya_language | aay |
aasa1238 | Aasax | language | Spoken L1 Language | Q56620 | Asa_language | aas |
%%time
ENDPOINT = 'https://query.wikidata.org/sparql'
class SCHEMA(rdflib.SDO):
"""https://github.com/RDFLib/rdflib/issues/1120"""
_NS = rdflib.Namespace(rdflib.SDO._NS.replace('https://', 'http://'))
PREFIXES = {'schema': SCHEMA}
SPARQL_QUERY = '''
SELECT
?glottocode
(strafter(str(?languoid), str(wd:)) AS ?qid)
(?languoidLabel AS ?name)
(strafter(str(?siteLink), "https://en.wikipedia.org/wiki/") AS ?title)
WHERE {
?languoid wdt:P1394 ?glottocode.
FILTER (REGEX(?glottocode, "^[a-z0-9]{4}[0-9]{4}$")).
OPTIONAL {
?siteLink schema:about ?languoid;
schema:inLanguage "en";
schema:isPartOf <https://en.wikipedia.org/>.
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en".
?languoid rdfs:label ?languoidLabel.
}
}
ORDER BY
?glottocode
xsd:integer(strafter(str(?languoid), str(wd:Q)))
'''.strip()
CSV_PATH = pathlib.Path('wikidata.csv')
def open_sparql_graph(endpoint=ENDPOINT, *, prefixes=PREFIXES):
logging.info('endpoint: %r', endpoint)
graph = rdflib.ConjunctiveGraph('SPARQLStore')
graph.open(endpoint)
logging.info('graph: %s', graph)
logging.info('prefixes: %r', prefixes)
for prefix, namespace in prefixes.items():
graph.namespace_manager.bind(prefix, namespace, replace=True)
logging.debug('namespaces: %r', list(graph.namespaces()))
return graph
def iterrows(query, *, prefixes=PREFIXES,
limit=None, verbose: bool = False,
per_request: int = 100_000):
if limit is None:
limit = float('inf')
graph = open_sparql_graph()
offset = 0
while offset < limit:
request_limit = min(limit - offset, per_request)
request_query = (f'{query}\n'
f'OFFSET {offset:d}\n'
f'LIMIT {request_limit:d}')
logging.info("graph.query('''\n%s\n''')", request_query)
result = graph.query(request_query)
if not offset:
yield (v.toPython().removeprefix('?') for v in result.vars)
for n, values in enumerate(result, 1):
yield (v.toPython() if v is not None else None for v in values)
if n < request_limit:
return
offset += n
def read_sparql_query(query, *, limit=None, **kwargs):
rows = iterrows(query, limit=limit)
columns = list(next(rows))
return pd.DataFrame.from_records(rows, columns=columns, **kwargs)
def read_glottocodes(*, path=CSV_PATH, encoding='utf-8'):
if not path.exists():
df = read_sparql_query(SPARQL_QUERY, index='glottocode').astype('string')
df.to_csv(path, encoding=encoding)
return pd.read_csv(path, index_col='glottocode', encoding=encoding).astype('string')
wf = read_glottocodes()
wf.info(memory_usage='deep')
assert wf.index.is_monotonic_increasing
wf.head(10)
[INFO@root] endpoint: 'https://query.wikidata.org/sparql' [INFO@root] graph: [a rdflib:ConjunctiveGraph;rdflib:storage [a rdflib:Store;rdfs:label 'SPARQLStore']] [INFO@root] prefixes: {'schema': Namespace("http://schema.org/")} [INFO@root] graph.query(''' SELECT ?glottocode (strafter(str(?languoid), str(wd:)) AS ?qid) (?languoidLabel AS ?name) (strafter(str(?siteLink), "https://en.wikipedia.org/wiki/") AS ?title) WHERE { ?languoid wdt:P1394 ?glottocode. FILTER (REGEX(?glottocode, "^[a-z0-9]{4}[0-9]{4}$")). OPTIONAL { ?siteLink schema:about ?languoid; schema:inLanguage "en"; schema:isPartOf <https://en.wikipedia.org/>. } SERVICE wikibase:label { bd:serviceParam wikibase:language "en". ?languoid rdfs:label ?languoidLabel. } } ORDER BY ?glottocode xsd:integer(strafter(str(?languoid), str(wd:Q))) OFFSET 0 LIMIT 100000 ''')
<class 'pandas.core.frame.DataFrame'> Index: 10873 entries, aant1238 to zyph1238 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 qid 10873 non-null string 1 name 10873 non-null string 2 title 8682 non-null string dtypes: string(3) memory usage: 2.7 MB CPU times: total: 625 ms Wall time: 7.05 s
qid | name | title | |
---|---|---|---|
glottocode | |||
aant1238 | Q31312216 | Aantantara | <NA> |
aari1238 | Q85516014 | Aari-Gayil | <NA> |
aari1239 | Q7495 | Aari | Aari_language |
aari1240 | Q4661732 | Aariya | Aariya_language |
aasa1238 | Q56620 | Asa | Asa_language |
aata1238 | Q31314288 | Aatasaara | <NA> |
abaa1238 | Q31363054 | Aba dialect | <NA> |
abab1239 | Q17379636 | Ababda | <NA> |
abab1240 | Q4931250 | Boan | Boan_languages |
abad1240 | Q20644975 | Abzakh Adyghe dialect | Abzakh_Adyghe_dialect |
KEEP_ALL = False
wf[wf.index.duplicated(keep=KEEP_ALL)]
qid | name | title | |
---|---|---|---|
glottocode | |||
ainu1252 | Q27969 | Ainu | Ainu_languages |
ainu1252 | Q50111972 | Ainu | <NA> |
andr1246 | Q30301408 | Andro | <NA> |
andr1246 | Q55603949 | Andro | Andro_language |
araf1243 | Q4783702 | Arafundi | Arafundi_languages |
... | ... | ... | ... |
yulp1239 | Q106554801 | Yulparirra | Yulparirra_language |
zeme1240 | Q56373 | Zeme | Zeme_language |
zeme1240 | Q21491053 | Zeme Naga | <NA> |
zena1250 | Q2293952 | Zenati | Zenati_languages |
zena1250 | Q2741732 | Northern Berber | Northern_Berber_languages |
163 rows × 3 columns
df = gf.join(wf, on='glottocode', rsuffix='_wd').drop(['category', 'name_wd', 'iso639_3'], axis='columns')
df.info(memory_usage='deep')
df.head()
<class 'pandas.core.frame.DataFrame'> Index: 10621 entries, aant1238 to zyph1238 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 name 10621 non-null string 1 level 10621 non-null string 2 qid 10621 non-null string 3 title 8415 non-null string 4 qid_wd 10610 non-null string 5 title_wd 8482 non-null string dtypes: string(6) memory usage: 4.7 MB
name | level | qid | title | qid_wd | title_wd | |
---|---|---|---|---|---|---|
glottocode | ||||||
aant1238 | Aantantara | dialect | Q31312216 | <NA> | Q31312216 | <NA> |
aari1238 | Aari-Gayil | family | Q85516014 | <NA> | Q85516014 | <NA> |
aari1239 | Aari | language | Q7495 | Aari_language | Q7495 | Aari_language |
aari1240 | Aariya | language | Q4661732 | Aariya_language | Q4661732 | Aariya_language |
aasa1238 | Aasax | language | Q56620 | Asa_language | Q56620 | Asa_language |
df[df.index.duplicated(keep=KEEP_ALL)]
name | level | qid | title | qid_wd | title_wd | |
---|---|---|---|---|---|---|
glottocode | ||||||
ainu1252 | Ainu | family | Q50111972 | <NA> | Q27969 | Ainu_languages |
ainu1252 | Ainu | family | Q50111972 | <NA> | Q50111972 | <NA> |
andr1246 | Andro | language | Q30301408 | <NA> | Q30301408 | <NA> |
andr1246 | Andro | language | Q30301408 | <NA> | Q55603949 | Andro_language |
araf1243 | Arafundi | family | Q11170629 | <NA> | Q4783702 | Arafundi_languages |
... | ... | ... | ... | ... | ... | ... |
yulp1239 | Yulparija | language | Q17319895 | <NA> | Q106554801 | Yulparirra_language |
zeme1240 | Zeme Naga | language | Q21491053 | <NA> | Q56373 | Zeme_language |
zeme1240 | Zeme Naga | language | Q21491053 | <NA> | Q21491053 | <NA> |
zena1250 | Zenatic | family | Q2741732 | Northern_Berber_languages | Q2293952 | Zenati_languages |
zena1250 | Zenatic | family | Q2741732 | Northern_Berber_languages | Q2741732 | Northern_Berber_languages |
163 rows × 6 columns
mismatch = df.loc[df['qid'] != df['qid_wd'], ['name', 'level', 'title', 'title_wd', 'qid', 'qid_wd']]
mismatch.info(memory_usage='deep')
mismatch[mismatch.index.duplicated(keep=KEEP_ALL)]
<class 'pandas.core.frame.DataFrame'> Index: 110 entries, ainu1252 to zena1250 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 name 110 non-null string 1 level 110 non-null string 2 title 48 non-null string 3 title_wd 98 non-null string 4 qid 110 non-null string 5 qid_wd 110 non-null string dtypes: string(6) memory usage: 49.1 KB
name | level | title | title_wd | qid | qid_wd | |
---|---|---|---|---|---|---|
glottocode | ||||||
kumi1248 | Tipai | language | Ipai_language | Tiipai_language | Q3027474 | Q3027471 |
kumi1248 | Tipai | language | Ipai_language | Kumeyaay_language | Q3027474 | Q4910139 |
mose1249 | Mosetén-Chimané | language | <NA> | Chimane_language | Q15548035 | Q35950 |
mose1249 | Mosetén-Chimané | language | <NA> | <NA> | Q15548035 | Q25395221 |
with pd.option_context('display.max_rows', 150):
display(mismatch)
name | level | title | title_wd | qid | qid_wd | |
---|---|---|---|---|---|---|
glottocode | ||||||
ainu1252 | Ainu | family | <NA> | Ainu_languages | Q50111972 | Q27969 |
akua1239 | Akuapem | dialect | Twi | Akuapem_dialect | Q36850 | Q31150449 |
amii1238 | Ami | language | <NA> | <NA> | Q12626835 | Q10408315 |
andr1246 | Andro | language | <NA> | Andro_language | Q30301408 | Q55603949 |
aoua1234 | A'ou | language | <NA> | A%27ou_language | Q17284871 | Q16109994 |
araf1243 | Arafundi | family | <NA> | Arafundi_languages | Q11170629 | Q4783702 |
arit1239 | Aritinngitigh | language | Arritinngithigh_language | Adithinngithigh_language | Q4796002 | Q4683034 |
assy1241 | Assyrian Neo-Aramaic | language | Suret_language | Ashurian_Aramaic | Q29440 | Q24915992 |
atat1238 | Atatláhuca Mixtec | language | <NA> | Atatl%C3%A1huca%E2%80%93San_Miguel_Mixtec | Q32093046 | Q12953721 |
azte1234 | Aztec | family | Nahuatl | Nahuan_languages | Q13300 | Q11965602 |
bala1242 | Bala (China) | dialect | <NA> | Bala_language_(China) | Q107342080 | Q86730632 |
bari1298 | Barikewa | language | <NA> | Omati_language | Q63214981 | Q7089905 |
bata1301 | Batak | language | <NA> | Batak_language_(Philippines) | Q50934420 | Q3450443 |
bori1243 | Bori-Karko | language | Adi_languages | Bori_language | Q56440 | Q4945106 |
cent2226 | Central Maipuran | family | <NA> | Paresi%E2%80%93Waura_languages | Q97959215 | Q7136862 |
chon1248 | Chono | language | Kakauhua_language | Chono_language | Q3507948 | Q5104704 |
cuoi1242 | Cuoi | family | <NA> | Cuoi_language | Q12629405 | Q3380501 |
damu1236 | Damu | language | Adi_languages | Damu_language | Q56440 | Q17002115 |
dhan1265 | Dewas-Done Danuwar | language | <NA> | Danwar_language | Q62663667 | Q3522797 |
elal1235 | El Alto Zapotec | language | Zoogocho_Zapotec | El_Alto_Zapotec | Q8074100 | Q5350733 |
esto1258 | Estonian | language | <NA> | Estonian_language | Q12361545 | Q9072 |
garr1260 | Garrwan | family | <NA> | Garawan_languages | Q12631364 | Q5521951 |
hava1248 | Havasupai-Walapai-Yavapai | language | Havasupai%E2%80%93Hualapai_language | <NA> | Q3565286 | Q111366384 |
hwar1238 | Qwara | dialect | <NA> | Qwara_dialect | Q53765647 | Q56736 |
indo1316 | Standard Indonesian | language | Indonesian_language | <NA> | Q9240 | Q110620923 |
kala1399 | Kalaallisut | language | Greenlandic_language | West_Greenlandic | Q25355 | Q15665351 |
kalm1243 | Oirad-Kalmyk-Darkhat | language | Kalmyk_Oirat | Oirat_language | Q33634 | Q56959 |
kati1270 | Katë | language | Kata-vari_dialect | Kamkata-vari_language | Q3449784 | Q2605045 |
kaur1271 | Kaure-Narau | language | <NA> | Kaure_language | Q12634336 | Q20526532 |
kawi1241 | Kawi | language | Kawi_language | Old_Javanese | Q49341 | Q49340 |
kend1253 | Kendeje | language | Teribe_language | Kendeje_language | Q36533 | Q56895 |
kulo1237 | Kulon-Pazeh | language | Pazeh_language | Kulon_language | Q36435 | Q11182000 |
kumi1248 | Tipai | language | Ipai_language | Tiipai_language | Q3027474 | Q3027471 |
kumi1248 | Tipai | language | Ipai_language | Kumeyaay_language | Q3027474 | Q4910139 |
lari1253 | Larestani | language | <NA> | Achomi_language | Q33468 | Q4699526 |
long1252 | Longdu | dialect | Zhongshan_Min | Longdu_dialect | Q8070958 | Q6673704 |
loup1243 | Loup A | language | <NA> | Loup_language | Q27921265 | Q6689698 |
maha1308 | Mahakam Kenyah | language | <NA> | <NA> | Q12953633 | Q12953631 |
mala1480 | Malayic Dayak | language | Bamayo_language | <NA> | Q3514892 | Q110162108 |
mans1258 | Northern Mansi | language | <NA> | Mansi_language | Q30304537 | Q33759 |
mark1255 | Markweeta | language | Markwet_language | Nandi%E2%80%93Markweta_languages | Q56874 | Q11028135 |
masa1299 | Masaaba | language | <NA> | Masaba_language | Q12952814 | Q3740241 |
masb1237 | Masbate Sorsogon | language | <NA> | Sorsogon_language | Q16113356 | Q7563749 |
mato1250 | Mator-Taigi-Karagas | language | <NA> | Mator_language | Q20669419 | Q36453 |
mixe1286 | Mixe | family | Mixean_languages | Mixe_languages | Q36225 | Q3833010 |
mose1249 | Mosetén-Chimané | language | <NA> | Chimane_language | Q15548035 | Q35950 |
mose1249 | Mosetén-Chimané | language | <NA> | <NA> | Q15548035 | Q25395221 |
mosi1247 | Akie | language | Nandi%E2%80%93Markweta_languages | Mosiro_language | Q11028135 | Q6916288 |
ncan1245 | Ncane-Mungong | language | <NA> | Noni_language | Q11297920 | Q36072 |
ndyu1242 | Aukan | language | <NA> | Ndyuka_language | Q2659044 | Q35037 |
nisa1239 | Nisa-Anasi | language | Nisa_language | Nisa-Anasi_language | Q13593518 | Q4751795 |
noir1238 | Noiri | language | <NA> | Bhilori_language | Q12953774 | Q4901734 |
noma1263 | Nomatsiguenga | language | <NA> | Nomatsiguenga_language | Q1995859 | Q3342992 |
nort2930 | Northeast Kiwai | language | <NA> | Kiwai_language | Q11732324 | Q6418846 |
nort2937 | Northern Hill/Valley Yokuts | dialect | Northern_Valley_Yokuts | Kings_River_Yokuts | Q85789777 | Q6413014 |
ocot1243 | Ocotepec Mixtec | language | <NA> | %C3%91um%C3%AD_Mixtec | Q25559575 | Q8078669 |
otom1276 | Otomaco-Taparita | family | Otomaco_language | Otom%C3%A1koan_languages | Q16879234 | Q3217503 |
pale1264 | Palembang | dialect | <NA> | Palembang_language | Q25559510 | Q12497929 |
panj1256 | Eastern Panjabi | language | Punjabi_language | <NA> | Q58635 | Q28164079 |
peno1244 | Peñoles Mixtec | language | <NA> | Estetla_Mixtec | Q42411307 | Q5401071 |
poch1244 | Pochutec | language | <NA> | Pochutec_language | Q42968898 | Q2427341 |
puwa1234 | Puwa Yi | language | <NA> | Phowa_language | Q25559431 | Q7187959 |
rian1260 | Riang | family | <NA> | Riang_language | Q42353409 | Q2741615 |
sana1295 | Sanaani Arabic | language | Yemeni_Arabic | San%CA%BDani_Arabic | Q1686766 | Q56578 |
sanf1262 | San Francisco Matlatzinca | language | <NA> | Matlatzinca_language | Q12953704 | Q3832945 |
sanj1285 | San Juan Atzingo Popoloca | language | <NA> | Southern_Popoloca_language | Q12953819 | Q7570327 |
sanl1248 | San Luís Temalacayuca Popoloca | language | <NA> | Northern_Popoloca_language | Q25559602 | Q7058861 |
sant1454 | Santa Inés Ahuatempan Popoloca | language | <NA> | Western_Popoloca_language | Q42365276 | Q7988174 |
sate1242 | Ems-Weser Frisian | language | Saterland_Frisian_language | East_Frisian_language | Q27154 | Q494355 |
shap1240 | Shapsug | dialect | Shapsug_Adyghe_dialect | Kfar_Kama_Adyghe_dialect | Q12813044 | Q6398657 |
sini1245 | Sinitic | family | Sinitic_languages | Chinese_language | Q33857 | Q7850 |
sira1267 | Sirayaic | language | Sirayaic_languages | Siraya_language | Q55630686 | Q716604 |
sota1242 | Sota Kanum | language | <NA> | Nggarna_language | Q12952568 | Q85788907 |
soul1243 | Souletin Basque | language | <NA> | Souletin_dialect | Q12953385 | Q2746856 |
sout2668 | Southern Hindko | language | Hindko | <NA> | Q382273 | Q111326242 |
sout2679 | South Estonian | language | V%C3%B5ro_language | South_Estonian | Q32762 | Q13295 |
sout2687 | Southern Vietnamese | dialect | <NA> | <NA> | Q55856412 | Q10806348 |
sout2965 | Southern Puget Sound Salish | language | <NA> | Whulshootseed_dialect | Q12642471 | Q7997684 |
sout2978 | Southern East Cree | language | <NA> | East_Cree | Q12953464 | Q282011 |
sout2990 | Southern Pastaza Quechua | language | <NA> | Lowland_Peruvian_Quechua | Q25559692 | Q6694075 |
sout3212 | Southeastern Ngwi | family | Nisoish_languages | Southeastern_Loloish_languages | Q56990 | Q16111894 |
taid1248 | Tai Do-Mene-Yo | language | Tai_Yo_language | Tai_Do_language | Q7675790 | Q7675746 |
taih1245 | Tai Pao (Retired) | language | Tai_Pao_language | Tai_Hang_Tong_language | Q7675795 | Q7675753 |
tall1235 | Tallán | language | Tall%C3%A1n_language | Catacaoan_languages | Q16910468 | Q5051139 |
talu1238 | Lavu-Yongsheng-Talu | language | Talu_language | Lavu_language | Q48769531 | Q16999095 |
tata1257 | Tatana | language | <NA> | Sabah_Bisaya_language | Q18643518 | Q7395820 |
temb1272 | Motembo-Kunda | language | <NA> | Budza_language | Q11013108 | Q3046889 |
temb1276 | Tenetehara | language | <NA> | Tenetehara_language | Q10322157 | Q7699720 |
temn1245 | Northern Mel | family | <NA> | Baga_language | Q16114535 | Q35005 |
tibe1272 | Tibetan | language | Lhasa_Tibetan | Central_Tibetan | Q34271 | Q5061915 |
timo1237 | Timote-Cuica | language | Timote_language | Timotean_languages | Q7806995 | Q3217540 |
tsis1238 | Salka-Tsishingini | language | <NA> | Shingini_language | Q13123571 | Q35199 |
tund1255 | Eastern Tundra Nenets | dialect | Yurats_language | Tundra_Nenets_language | Q34252 | Q1564258 |
uain1239 | Uainuma-Mariate | language | Wainum%C3%A1-Mariat%C3%A9_language | Mariat%C3%A9_language | Q16910017 | Q6762506 |
vase1234 | Northern Ju | language | Sekele_language | !O!ung_language | Q56528 | Q3832974 |
waga1262 | Wagawaga (Retired) | language | <NA> | Wagawaga_language_(New_Guinea) | Q16112427 | Q7959485 |
west2340 | Western Aragonese | dialect | Ans%C3%B3_Aragonese | Western_Aragonese | Q3574358 | Q3574028 |
west2488 | Western Krahn | language | <NA> | Western_Krahn_language | Q35809 | Q10975611 |
wudi1238 | Wuding-Luquan Yi | language | <NA> | Nasu_language | Q25559456 | Q56403 |
wyan1247 | Huron-Wyandot | language | <NA> | Wyandot_language | Q3567223 | Q1185119 |
xian1249 | Xiandao | dialect | <NA> | <NA> | Q12953305 | Q10884275 |
xinc1246 | Xinca-Guazacapan | language | <NA> | Guazacap%C3%A1n_language | Q53428794 | Q19572028 |
yare1249 | Yareni Zapotec | language | <NA> | Ixtl%C3%A1n_Zapotec | Q12645368 | Q6101185 |
yela1238 | Yela-Kela | language | Yela-Kela_language | Yela_language | Q32151338 | Q8051428 |
yout1234 | Yout Wam | language | <NA> | <NA> | Q63341264 | Q31819036 |
yuga1244 | Yugambal | language | Yugambeh_language | Yugambal_language | Q16334334 | Q3446663 |
yulp1239 | Yulparija | language | <NA> | Yulparirra_language | Q17319895 | Q106554801 |
zaca1241 | Zacatlán-Ahuacatlán-Tepetzintla Nahuatl | language | <NA> | <NA> | Q2514044 | Q2204061 |
zeme1240 | Zeme Naga | language | <NA> | Zeme_language | Q21491053 | Q56373 |
zena1250 | Zenatic | family | Northern_Berber_languages | Zenati_languages | Q2741732 | Q2293952 |