from corus.sources.meta import METAS from corus.readme import format_metas, show_html, patch_readme html = format_metas(METAS, 'https://nbviewer.jupyter.org/github/natasha/corus/blob/master/docs.ipynb') patch_readme(html, 'README.md') html = format_metas(METAS) show_html(html) from corus import load_lenta path = 'data/lenta-ru-news.csv.gz' records = load_lenta(path) next(records) from corus import load_lenta2 path = 'data/lenta-ru-news.csv.bz2' records = load_lenta2(path) next(records) from corus import load_factru dir = 'data/factRuEval-2016-master/' records = load_factru(dir) next(records) from corus import load_ne5 dir = 'data/Collection5/' records = load_ne5(dir) next(records) from corus import load_persons dir = 'data/Persons-1000.zip' records = load_persons(dir) next(records) from corus import load_gareev dir = 'data/rus-ner-news-corpus.iob/' records = load_gareev(dir) next(records) from corus import load_wikiner path = 'data/aij-wikiner-ru-wp3.bz2' records = load_wikiner(path) next(records) from corus import load_librusec path = 'data/librusec_fb2.plain.gz' records = load_librusec(path) next(records) from corus import load_taiga_arzamas_metas, load_taiga_arzamas path = 'data/taiga/Arzamas.tar.gz' metas = load_taiga_arzamas_metas(path, offset=0, count=1) records = load_taiga_arzamas(path, metas, offset=0, count=1) next(records) from corus import load_taiga_fontanka_metas, load_taiga_fontanka path = 'data/taiga/Fontanka.tar.gz' metas = load_taiga_fontanka_metas(path, offset=0, count=1) records = load_taiga_fontanka(path, metas, offset=0, count=1) next(records) from corus import load_taiga_interfax_metas, load_taiga_interfax path = 'data/taiga/Interfax.tar.gz' metas = load_taiga_interfax_metas(path, offset=0, count=1) records = load_taiga_interfax(path, metas, offset=0, count=1) next(records) from corus import load_taiga_kp_metas, load_taiga_kp path = 'data/taiga/KP.tar.gz' metas = load_taiga_kp_metas(path, offset=0, count=1) records = load_taiga_kp(path, metas, offset=0, count=1) next(records) from corus import load_taiga_lenta_metas, load_taiga_lenta path = 'data/taiga/Lenta.tar.gz' metas = load_taiga_lenta_metas(path, offset=0, count=1) records = load_taiga_lenta(path, metas, offset=0, count=1) next(records) from corus import load_taiga_magazines_metas, load_taiga_magazines path = 'data/taiga/Magazines.tar.gz' metas = load_taiga_magazines_metas(path, offset=0, count=1) records = load_taiga_magazines(path, metas, offset=0, count=1) next(records) from corus import load_taiga_nplus1_metas, load_taiga_nplus1 path = 'data/taiga/NPlus1.tar.gz' metas = load_taiga_nplus1_metas(path, offset=0, count=1) records = load_taiga_nplus1(path, metas, offset=0, count=1) next(records) from corus import load_taiga_subtitles_metas, load_taiga_subtitles path = 'data/taiga/Subtitles.tar.gz' metas = load_taiga_subtitles_metas(path, offset=0, count=1) records = load_taiga_subtitles(path, metas, offset=0, count=1) next(records) from corus import load_taiga_social path = 'data/taiga/social.tar.gz' records = load_taiga_social(path, offset=0, count=4) list(records) from corus import load_taiga_proza_metas, load_taiga_proza path = 'data/taiga/proza_ru.zip' metas = load_taiga_proza_metas(path, offset=0, count=1) records = load_taiga_proza(path, metas, offset=0, count=1) next(records) from corus import load_taiga_stihi_metas, load_taiga_stihi path = 'data/taiga/stihi_ru.zip' metas = load_taiga_stihi_metas(path, offset=0, count=1) records = load_taiga_stihi(path, metas, offset=0, count=1) next(records) from corus import load_buriy_news paths = [ 'data/buriy/news-articles-2014.tar.bz2', 'data/buriy/news-articles-2015-part1.tar.bz2', 'data/buriy/news-articles-2015-part2.tar.bz2' ] records = ( record for path in paths for record in load_buriy_news(path) ) next(records) from corus import load_buriy_webhose path = 'data/buriy/webhose-2016.tar.bz2' records = load_buriy_webhose(path) next(records) from corus import load_mokoron path = 'data/mokoron/db.sql' records = load_mokoron(path) list(records) from corus import load_wiki path = 'data/ruwiki-latest-pages-articles.xml.bz2' records = load_wiki(path) list(records) from corus import load_bsnlp path = 'data/bsnlp' records = load_bsnlp(path) record = next(records) record from corus import load_ods_interfax path = 'data/ods/interfax.csv.gz' records = load_ods_interfax(path) next(records) from corus import load_ods_gazeta path = 'data/ods/gazeta.csv.gz' records = load_ods_gazeta(path) next(records) from corus import load_ods_izvestia path = 'data/ods/iz.csv.gz' records = load_ods_izvestia(path) next(records) from corus import load_ods_meduza path = 'data/ods/meduza.csv.gz' records = load_ods_meduza(path) next(records) from corus import load_ods_ria path = 'data/ods/ria.csv.gz' records = load_ods_ria(path) next(records) from corus import load_ods_rt path = 'data/ods/rt.csv.gz' records = load_ods_rt(path) next(records) from corus import load_ods_tass path = 'data/ods/tass-001.csv.gz' records = load_ods_tass(path) next(records) from corus import load_ria_raw path = 'data/ria.json.gz' records = load_ria_raw(path) next(records) from corus import load_ria path = 'data/ria.json.gz' records = load_ria(path) next(records) from corus import load_ud_gsd path = 'data/ud/ru_gsd-ud-dev.conllu' records = load_ud_gsd(path) next(records) from corus import load_ud_taiga path = 'data/ud/ru_taiga-ud-dev.conllu' records = load_ud_taiga(path) next(records) from corus import load_ud_pud path = 'data/ud/ru_pud-ud-test.conllu' records = load_ud_pud(path) next(records) from corus import load_ud_syntag path = 'data/ud/ru_syntagrus-ud-dev.conllu' records = load_ud_syntag(path) next(records) from corus import load_morphoru_gicrya path = 'data/morphoru/gikrya_new_test.out' records = load_morphoru_gicrya(path) next(records) from corus import load_morphoru_rnc path = 'data/morphoru/RNCgoldInUD_Morpho.conll' records = load_morphoru_rnc(path) next(records) from corus import load_morphoru_corpora path = 'data/morphoru/unamb_sent_14_6.conllu' records = load_morphoru_corpora(path) next(records) from corus import load_gramru path = 'data/gramru/GramEval_private_test.conllu' records = load_gramru(path) next(records) from corus import load_corpora path = 'data/annot.opcorpora.xml.byfile.zip' records = load_corpora(path) next(records) from corus import load_russe_hj path = 'data/russe/sem/hj.csv' records = load_russe_hj(path) list(records) from corus import load_russe_rt path = 'data/russe/sem/rt.csv' records = load_russe_rt(path) list(records) from corus import load_russe_ae path = 'data/russe/sem/ae2.csv' records = load_russe_ae(path) list(records) from corus import load_toloka_lrwc path = 'data/toloka/lrwc-1.1-aggregated.tsv' records = load_toloka_lrwc(path) list(records) from corus import load_simlex path = 'data/simlex/ru_simlex965_tagged.tsv' records = load_simlex(path) list(records) from corus import load_omnia path = 'data/ru_om1000a.x1_.xz' docs = load_omnia(path) next(docs) from corus import load_ruadrect path = 'data/toloka/ruadrect/task2_ru_test.tsv' records = load_ruadrect(path) next(records) from corus import load_rudrec path = 'data/rudrec/rudrec_annotated.json' records = load_rudrec(path) next(records)