%load_ext autoreload
%autoreload 2
# First, I have to laod different modules that I use for analyzing the data and for plotting:
import sys, os, collections
import pandas as pd
import numpy as np
import re
import csv
import seaborn as sns
import matplotlib.pyplot as plt; plt.rcdefaults()
from matplotlib.pyplot import figure
from collections import Counter
# Second, I have to load the Text Fabric app
from tf.fabric import Fabric
from tf.app import use
SBLGNTv1 = use('SBLGNT/tf/6.12_v1', hoist=globals())
This is Text-Fabric 9.1.11 Api reference : https://annotation.github.io/text-fabric/tf/cheatsheet.html 31 features found and 0 ignored
This is Text-Fabric 9.1.11 Api reference : https://annotation.github.io/text-fabric/tf/cheatsheet.html 31 features found and 0 ignored | 0.10s T otype from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 0.73s T oslots from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.35s T text from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 0.00s T chapter from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 0.07s T verse from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 0.00s T book from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | | 0.01s C __levels__ from otype, oslots, otext | | 1.09s C __order__ from otype, oslots, __levels__ | | 0.11s C __rank__ from otype, __order__ | | 2.96s C __levUp__ from otype, oslots, __rank__ | | 0.50s C __levDown__ from otype, __levUp__, __rank__ | | 8.94s C __boundary__ from otype, oslots, __rank__ | | 0.08s C __sections__ from otype, oslots, otext, __levUp__, __levels__, book, chapter, verse | 0.00s T book_code from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.06s T case from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 0.00s T clause from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 0.93s T degree from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.10s T dict_abc_order from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.53s T gn from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.44s T lemma from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.33s T lemma_dictform from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.91s T lemma_freq from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.98s T lemma_gloss from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 2.15s T lemma_strongs from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.97s T lemma_translit from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.75s T mood from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.86s T morph from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.62s T morphology from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 2.35s T normalized_word from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.98s T nu from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 2.14s T orig_order from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.68s T ps from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 0.00s T sentence from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.84s T tense from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.97s T voice from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.93s T vrsnum from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 2.68s T word from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2
SBLGNTv2 = use('SBLGNT/tf/6.12_v2', hoist=globals())
This is Text-Fabric 9.1.11 Api reference : https://annotation.github.io/text-fabric/tf/cheatsheet.html 31 features found and 0 ignored | 0.20s T otype from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.47s T oslots from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.26s T text from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 0.06s T chapter from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 0.07s T verse from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 0.08s T book from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | | 0.04s C __levels__ from otype, oslots, otext | | 1.44s C __order__ from otype, oslots, __levels__ | | 0.10s C __rank__ from otype, __order__ | | 3.95s C __levUp__ from otype, oslots, __rank__ | | 0.93s C __levDown__ from otype, __levUp__, __rank__ | | 7.71s C __boundary__ from otype, oslots, __rank__ | | 0.10s C __sections__ from otype, oslots, otext, __levUp__, __levels__, book, chapter, verse | 0.00s T book_code from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.04s T case from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 0.14s T clause from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 0.84s T degree from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.82s T dict_abc_order from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.19s T gn from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.90s T lemma from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.33s T lemma_dictform from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.61s T lemma_freq from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.38s T lemma_gloss from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.67s T lemma_strongs from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.54s T lemma_translit from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.67s T mood from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.90s T morph from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.78s T morphology from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.72s T normalized_word from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.99s T nu from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.85s T orig_order from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.37s T ps from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 0.12s T sentence from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.73s T tense from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.33s T voice from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 1.47s T vrsnum from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2 | 2.34s T word from D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/SBLGNT/tf/6.12_v2
Searchv1_0 = '''
book book=Matthew book_code*
chapter chapter=1
sentence sentence=1
verse
'''
Searchv1_0 = SBLGNTv1.search(Searchv1_0)
SBLGNTv1.show(Searchv1_0, start=1, end=1, condensed=True, colorMap={1:'pink'}, extraFeatures={'verse','vrsnum','orig_order'})
0.01s 1 result
verse 1
Searchv2_0 = '''
book book=Matthew
chapter chapter=1
verse verse=21
'''
Searchv2_0 = SBLGNTv2.search(Searchv2_0)
SBLGNTv2.show(Searchv2_0, start=1, end=5, condensed=True, colorMap={3:'pink'}, extraFeatures={'verse','vrsnum', 'orig_order'})
0.01s 1 result
verse 1
Search1 = '''
verse book=Matthew chapter=1 verse=1
word
'''
Search1 = SBLGNT.search(Search1)
SBLGNT.show(Search1, start=1, end=1, condensed=True, colorMap={1:'pink'}, extraFeatures={'verse','vrsnum'})
0.04s 0 results
Search2 = '''
book book=Revelation book_code*
chapter chapter=1
sentence
clause
word lemma* morph* normalized_word* case* degree* gn* mood* nu* orig_order* ps* voice* tense* lemma_translit* dict_abc_order* lemma_freq* lemma_dictform* lemma_freq* lemma_gloss* lemma_strongs*
'''
Search2 = SBLGNT.search(Search2)
SBLGNT.show(Search2, start=1, end=1, condensed=True, colorMap={1:'pink'}, extraFeatures={'verse','vrsnum'})
2.91s 0 results
featureprep=pd.read_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/morph_copy.tf',delimiter='\t',encoding='utf-8')
pd.set_option('display.max_columns', 50)
featureprep.head(5)
morphology | |
---|---|
0 | N- ----NSF- |
1 | N- ----GSF- |
2 | N- ----GSM- |
3 | N- ----GSM- |
4 | N- ----GSM- |
featureprep['orig_order'] = featureprep.index +1
featureprep.head(5)
morphology | orig_order | |
---|---|---|
0 | N- ----NSF- | 1 |
1 | N- ----GSF- | 2 |
2 | N- ----GSM- | 3 |
3 | N- ----GSM- | 4 |
4 | N- ----GSM- | 5 |
sp
¶def spconditions(row):
if re.search('^A.*', str(row)):
return 'adj'
if re.search('^C.*', str(row)):
return 'conj'
if re.search('^D.*', str(row)):
return 'adv'
if re.search('^I.*', str(row)):
return 'interj'
if re.search('^N-.*', str(row)):
return 'noun'
if re.search('^P.*', str(row)):
return 'prep'
if re.search('^RA.*', str(row)):
return 'art-def'
if re.search('^RD.*', str(row)):
return 'pron-dem'
if re.search('^RI.*', str(row)):
return 'pron-inter'
if re.search('^RP.*', str(row)):
return 'pron-prs'
if re.search('^RR.*', str(row)):
return 'pron-rela'
if re.search('^V-.*', str(row)):
return 'verb'
if re.search('^X.*', str(row)):
return 'partcl'
else:
return ''
featureprep['sp']=featureprep['morphology'].apply(spconditions)
featureprep.head(20)
morphology | orig_order | sp | |
---|---|---|---|
0 | N- ----NSF- | 1 | noun |
1 | N- ----GSF- | 2 | noun |
2 | N- ----GSM- | 3 | noun |
3 | N- ----GSM- | 4 | noun |
4 | N- ----GSM- | 5 | noun |
5 | N- ----GSM- | 6 | noun |
6 | N- ----GSM- | 7 | noun |
7 | N- ----GSM- | 8 | noun |
8 | N- ----NSM- | 9 | noun |
9 | V- 3AAI-S-- | 10 | verb |
10 | RA ----ASM- | 11 | art-def |
11 | N- ----ASM- | 12 | noun |
12 | N- ----NSM- | 13 | noun |
13 | C- -------- | 14 | conj |
14 | V- 3AAI-S-- | 15 | verb |
15 | RA ----ASM- | 16 | art-def |
16 | N- ----ASM- | 17 | noun |
17 | N- ----NSM- | 18 | noun |
18 | C- -------- | 19 | conj |
19 | V- 3AAI-S-- | 20 | verb |
gn
¶def gender(row):
if re.search('.*F.$', str(row)):
return 'f'
if re.search('.*M.$', str(row)):
return 'm'
if re.search('.*N.$', str(row)):
return 'n'
else:
return ''
featureprep['gn']=featureprep['morphology'].apply(gender)
featureprep.head(20)
morphology | orig_order | sp | gn | |
---|---|---|---|---|
0 | N- ----NSF- | 1 | noun | f |
1 | N- ----GSF- | 2 | noun | f |
2 | N- ----GSM- | 3 | noun | m |
3 | N- ----GSM- | 4 | noun | m |
4 | N- ----GSM- | 5 | noun | m |
5 | N- ----GSM- | 6 | noun | m |
6 | N- ----GSM- | 7 | noun | m |
7 | N- ----GSM- | 8 | noun | m |
8 | N- ----NSM- | 9 | noun | m |
9 | V- 3AAI-S-- | 10 | verb | |
10 | RA ----ASM- | 11 | art-def | m |
11 | N- ----ASM- | 12 | noun | m |
12 | N- ----NSM- | 13 | noun | m |
13 | C- -------- | 14 | conj | |
14 | V- 3AAI-S-- | 15 | verb | |
15 | RA ----ASM- | 16 | art-def | m |
16 | N- ----ASM- | 17 | noun | m |
17 | N- ----NSM- | 18 | noun | m |
18 | C- -------- | 19 | conj | |
19 | V- 3AAI-S-- | 20 | verb |
nu
¶def number(row):
if re.search('.*S..$', str(row)):
return 'sg'
if re.search('.*S[MFN].$', str(row)):
return 'sg'
if re.search('.*P..$', str(row)):
return 'pl'
if re.search('.*P[MFN].$', str(row)):
return 'pl'
else:
return ''
featureprep['nu']=featureprep['morphology'].apply(number)
featureprep.head(50)
morphology | orig_order | sp | gn | nu | |
---|---|---|---|---|---|
0 | N- ----NSF- | 1 | noun | f | sg |
1 | N- ----GSF- | 2 | noun | f | sg |
2 | N- ----GSM- | 3 | noun | m | sg |
3 | N- ----GSM- | 4 | noun | m | sg |
4 | N- ----GSM- | 5 | noun | m | sg |
5 | N- ----GSM- | 6 | noun | m | sg |
6 | N- ----GSM- | 7 | noun | m | sg |
7 | N- ----GSM- | 8 | noun | m | sg |
8 | N- ----NSM- | 9 | noun | m | sg |
9 | V- 3AAI-S-- | 10 | verb | sg | |
10 | RA ----ASM- | 11 | art-def | m | sg |
11 | N- ----ASM- | 12 | noun | m | sg |
12 | N- ----NSM- | 13 | noun | m | sg |
13 | C- -------- | 14 | conj | ||
14 | V- 3AAI-S-- | 15 | verb | sg | |
15 | RA ----ASM- | 16 | art-def | m | sg |
16 | N- ----ASM- | 17 | noun | m | sg |
17 | N- ----NSM- | 18 | noun | m | sg |
18 | C- -------- | 19 | conj | ||
19 | V- 3AAI-S-- | 20 | verb | sg | |
20 | RA ----ASM- | 21 | art-def | m | sg |
21 | N- ----ASM- | 22 | noun | m | sg |
22 | C- -------- | 23 | conj | ||
23 | RA ----APM- | 24 | art-def | m | pl |
24 | N- ----APM- | 25 | noun | m | pl |
25 | RP ----GSM- | 26 | pron-prs | m | sg |
26 | N- ----NSM- | 27 | noun | m | sg |
27 | C- -------- | 28 | conj | ||
28 | V- 3AAI-S-- | 29 | verb | sg | |
29 | RA ----ASM- | 30 | art-def | m | sg |
30 | N- ----ASM- | 31 | noun | m | sg |
31 | C- -------- | 32 | conj | ||
32 | RA ----ASM- | 33 | art-def | m | sg |
33 | N- ----ASM- | 34 | noun | m | sg |
34 | P- -------- | 35 | prep | ||
35 | RA ----GSF- | 36 | art-def | f | sg |
36 | N- ----GSF- | 37 | noun | f | sg |
37 | N- ----NSM- | 38 | noun | m | sg |
38 | C- -------- | 39 | conj | ||
39 | V- 3AAI-S-- | 40 | verb | sg | |
40 | RA ----ASM- | 41 | art-def | m | sg |
41 | N- ----ASM- | 42 | noun | m | sg |
42 | N- ----NSM- | 43 | noun | m | sg |
43 | C- -------- | 44 | conj | ||
44 | V- 3AAI-S-- | 45 | verb | sg | |
45 | RA ----ASM- | 46 | art-def | m | sg |
46 | N- ----ASM- | 47 | noun | m | sg |
47 | N- ----NSM- | 48 | noun | m | sg |
48 | C- -------- | 49 | conj | ||
49 | V- 3AAI-S-- | 50 | verb | sg |
ps
¶def person(row):
if re.search('.*1.*', str(row)):
return 'p1'
if re.search('.*2.*', str(row)):
return 'p2'
if re.search('.*3.*', str(row)):
return 'p3'
else:
return ''
featureprep['ps']=featureprep['morphology'].apply(person)
featureprep.head(20)
morphology | orig_order | sp | gn | nu | ps | |
---|---|---|---|---|---|---|
0 | N- ----NSF- | 1 | noun | f | sg | |
1 | N- ----GSF- | 2 | noun | f | sg | |
2 | N- ----GSM- | 3 | noun | m | sg | |
3 | N- ----GSM- | 4 | noun | m | sg | |
4 | N- ----GSM- | 5 | noun | m | sg | |
5 | N- ----GSM- | 6 | noun | m | sg | |
6 | N- ----GSM- | 7 | noun | m | sg | |
7 | N- ----GSM- | 8 | noun | m | sg | |
8 | N- ----NSM- | 9 | noun | m | sg | |
9 | V- 3AAI-S-- | 10 | verb | sg | p3 | |
10 | RA ----ASM- | 11 | art-def | m | sg | |
11 | N- ----ASM- | 12 | noun | m | sg | |
12 | N- ----NSM- | 13 | noun | m | sg | |
13 | C- -------- | 14 | conj | |||
14 | V- 3AAI-S-- | 15 | verb | sg | p3 | |
15 | RA ----ASM- | 16 | art-def | m | sg | |
16 | N- ----ASM- | 17 | noun | m | sg | |
17 | N- ----NSM- | 18 | noun | m | sg | |
18 | C- -------- | 19 | conj | |||
19 | V- 3AAI-S-- | 20 | verb | sg | p3 |
case
¶def case(row):
if re.search('......N...$', str(row)):
return 'nominative'
if re.search('......G...$', str(row)):
return 'genitive'
if re.search('......D...$', str(row)):
return 'dative'
if re.search('......A...$', str(row)):
return 'accusative'
else:
return ''
featureprep['case']=featureprep['morphology'].apply(case)
featureprep.head(20)
morphology | orig_order | sp | gn | nu | ps | case | |
---|---|---|---|---|---|---|---|
0 | N- ----NSF- | 1 | noun | f | sg | nominative | |
1 | N- ----GSF- | 2 | noun | f | sg | genitive | |
2 | N- ----GSM- | 3 | noun | m | sg | genitive | |
3 | N- ----GSM- | 4 | noun | m | sg | genitive | |
4 | N- ----GSM- | 5 | noun | m | sg | genitive | |
5 | N- ----GSM- | 6 | noun | m | sg | genitive | |
6 | N- ----GSM- | 7 | noun | m | sg | genitive | |
7 | N- ----GSM- | 8 | noun | m | sg | genitive | |
8 | N- ----NSM- | 9 | noun | m | sg | nominative | |
9 | V- 3AAI-S-- | 10 | verb | sg | p3 | ||
10 | RA ----ASM- | 11 | art-def | m | sg | accusative | |
11 | N- ----ASM- | 12 | noun | m | sg | accusative | |
12 | N- ----NSM- | 13 | noun | m | sg | nominative | |
13 | C- -------- | 14 | conj | ||||
14 | V- 3AAI-S-- | 15 | verb | sg | p3 | ||
15 | RA ----ASM- | 16 | art-def | m | sg | accusative | |
16 | N- ----ASM- | 17 | noun | m | sg | accusative | |
17 | N- ----NSM- | 18 | noun | m | sg | nominative | |
18 | C- -------- | 19 | conj | ||||
19 | V- 3AAI-S-- | 20 | verb | sg | p3 |
vt
¶def tense(row):
if re.search('...A.*', str(row)):
return 'aorist'
if re.search('...P.*', str(row)):
return 'present'
if re.search('...F.*', str(row)):
return 'future'
if re.search('...I.*', str(row)):
return 'imperfect'
if re.search('...Y.*', str(row)):
return 'plusquamperfect'
if re.search('...X.*', str(row)):
return 'perfect'
else:
return ''
featureprep['vt']=featureprep['morphology'].apply(tense)
featureprep.head(20)
morphology | orig_order | sp | gn | nu | ps | case | vt | |
---|---|---|---|---|---|---|---|---|
0 | N- ----NSF- | 1 | noun | f | sg | nominative | future | |
1 | N- ----GSF- | 2 | noun | f | sg | genitive | future | |
2 | N- ----GSM- | 3 | noun | m | sg | genitive | ||
3 | N- ----GSM- | 4 | noun | m | sg | genitive | ||
4 | N- ----GSM- | 5 | noun | m | sg | genitive | ||
5 | N- ----GSM- | 6 | noun | m | sg | genitive | ||
6 | N- ----GSM- | 7 | noun | m | sg | genitive | ||
7 | N- ----GSM- | 8 | noun | m | sg | genitive | ||
8 | N- ----NSM- | 9 | noun | m | sg | nominative | ||
9 | V- 3AAI-S-- | 10 | verb | sg | p3 | aorist | ||
10 | RA ----ASM- | 11 | art-def | m | sg | accusative | aorist | |
11 | N- ----ASM- | 12 | noun | m | sg | accusative | aorist | |
12 | N- ----NSM- | 13 | noun | m | sg | nominative | ||
13 | C- -------- | 14 | conj | |||||
14 | V- 3AAI-S-- | 15 | verb | sg | p3 | aorist | ||
15 | RA ----ASM- | 16 | art-def | m | sg | accusative | aorist | |
16 | N- ----ASM- | 17 | noun | m | sg | accusative | aorist | |
17 | N- ----NSM- | 18 | noun | m | sg | nominative | ||
18 | C- -------- | 19 | conj | |||||
19 | V- 3AAI-S-- | 20 | verb | sg | p3 | aorist |
voice
¶def voice(row):
if re.search('....A.....$', str(row)):
return 'active'
if re.search('....M.....$', str(row)):
return 'middle'
if re.search('....P.....$', str(row)):
return 'passive'
else:
return ''
featureprep['voice']=featureprep['morphology'].apply(voice)
featureprep.head(20)
morphology | orig_order | sp | gn | nu | ps | case | vt | voice | |
---|---|---|---|---|---|---|---|---|---|
0 | N- ----NSF- | 1 | noun | f | sg | nominative | future | ||
1 | N- ----GSF- | 2 | noun | f | sg | genitive | future | ||
2 | N- ----GSM- | 3 | noun | m | sg | genitive | |||
3 | N- ----GSM- | 4 | noun | m | sg | genitive | |||
4 | N- ----GSM- | 5 | noun | m | sg | genitive | |||
5 | N- ----GSM- | 6 | noun | m | sg | genitive | |||
6 | N- ----GSM- | 7 | noun | m | sg | genitive | |||
7 | N- ----GSM- | 8 | noun | m | sg | genitive | |||
8 | N- ----NSM- | 9 | noun | m | sg | nominative | |||
9 | V- 3AAI-S-- | 10 | verb | sg | p3 | aorist | active | ||
10 | RA ----ASM- | 11 | art-def | m | sg | accusative | aorist | ||
11 | N- ----ASM- | 12 | noun | m | sg | accusative | aorist | ||
12 | N- ----NSM- | 13 | noun | m | sg | nominative | |||
13 | C- -------- | 14 | conj | ||||||
14 | V- 3AAI-S-- | 15 | verb | sg | p3 | aorist | active | ||
15 | RA ----ASM- | 16 | art-def | m | sg | accusative | aorist | ||
16 | N- ----ASM- | 17 | noun | m | sg | accusative | aorist | ||
17 | N- ----NSM- | 18 | noun | m | sg | nominative | |||
18 | C- -------- | 19 | conj | ||||||
19 | V- 3AAI-S-- | 20 | verb | sg | p3 | aorist | active |
mood
¶def mood(row):
if re.search('.....I....$', str(row)):
return 'indicative'
if re.search('.....D....$', str(row)):
return 'imperative'
if re.search('.....N....$', str(row)):
return 'infinitive'
if re.search('.....O....$', str(row)):
return 'optative'
if re.search('.....P....$', str(row)):
return 'participle'
if re.search('.....S....$', str(row)):
return 'subjunctive'
else:
return ''
featureprep['mood']=featureprep['morphology'].apply(mood)
featureprep.head(50)
morphology | orig_order | sp | gn | nu | ps | case | vt | voice | mood | |
---|---|---|---|---|---|---|---|---|---|---|
0 | N- ----NSF- | 1 | noun | f | sg | nominative | future | |||
1 | N- ----GSF- | 2 | noun | f | sg | genitive | future | |||
2 | N- ----GSM- | 3 | noun | m | sg | genitive | ||||
3 | N- ----GSM- | 4 | noun | m | sg | genitive | ||||
4 | N- ----GSM- | 5 | noun | m | sg | genitive | ||||
5 | N- ----GSM- | 6 | noun | m | sg | genitive | ||||
6 | N- ----GSM- | 7 | noun | m | sg | genitive | ||||
7 | N- ----GSM- | 8 | noun | m | sg | genitive | ||||
8 | N- ----NSM- | 9 | noun | m | sg | nominative | ||||
9 | V- 3AAI-S-- | 10 | verb | sg | p3 | aorist | active | indicative | ||
10 | RA ----ASM- | 11 | art-def | m | sg | accusative | aorist | |||
11 | N- ----ASM- | 12 | noun | m | sg | accusative | aorist | |||
12 | N- ----NSM- | 13 | noun | m | sg | nominative | ||||
13 | C- -------- | 14 | conj | |||||||
14 | V- 3AAI-S-- | 15 | verb | sg | p3 | aorist | active | indicative | ||
15 | RA ----ASM- | 16 | art-def | m | sg | accusative | aorist | |||
16 | N- ----ASM- | 17 | noun | m | sg | accusative | aorist | |||
17 | N- ----NSM- | 18 | noun | m | sg | nominative | ||||
18 | C- -------- | 19 | conj | |||||||
19 | V- 3AAI-S-- | 20 | verb | sg | p3 | aorist | active | indicative | ||
20 | RA ----ASM- | 21 | art-def | m | sg | accusative | aorist | |||
21 | N- ----ASM- | 22 | noun | m | sg | accusative | aorist | |||
22 | C- -------- | 23 | conj | |||||||
23 | RA ----APM- | 24 | art-def | m | pl | accusative | aorist | |||
24 | N- ----APM- | 25 | noun | m | pl | accusative | aorist | |||
25 | RP ----GSM- | 26 | pron-prs | m | sg | genitive | ||||
26 | N- ----NSM- | 27 | noun | m | sg | nominative | ||||
27 | C- -------- | 28 | conj | |||||||
28 | V- 3AAI-S-- | 29 | verb | sg | p3 | aorist | active | indicative | ||
29 | RA ----ASM- | 30 | art-def | m | sg | accusative | aorist | |||
30 | N- ----ASM- | 31 | noun | m | sg | accusative | aorist | |||
31 | C- -------- | 32 | conj | |||||||
32 | RA ----ASM- | 33 | art-def | m | sg | accusative | aorist | |||
33 | N- ----ASM- | 34 | noun | m | sg | accusative | aorist | |||
34 | P- -------- | 35 | prep | |||||||
35 | RA ----GSF- | 36 | art-def | f | sg | genitive | future | |||
36 | N- ----GSF- | 37 | noun | f | sg | genitive | future | |||
37 | N- ----NSM- | 38 | noun | m | sg | nominative | ||||
38 | C- -------- | 39 | conj | |||||||
39 | V- 3AAI-S-- | 40 | verb | sg | p3 | aorist | active | indicative | ||
40 | RA ----ASM- | 41 | art-def | m | sg | accusative | aorist | |||
41 | N- ----ASM- | 42 | noun | m | sg | accusative | aorist | |||
42 | N- ----NSM- | 43 | noun | m | sg | nominative | ||||
43 | C- -------- | 44 | conj | |||||||
44 | V- 3AAI-S-- | 45 | verb | sg | p3 | aorist | active | indicative | ||
45 | RA ----ASM- | 46 | art-def | m | sg | accusative | aorist | |||
46 | N- ----ASM- | 47 | noun | m | sg | accusative | aorist | |||
47 | N- ----NSM- | 48 | noun | m | sg | nominative | ||||
48 | C- -------- | 49 | conj | |||||||
49 | V- 3AAI-S-- | 50 | verb | sg | p3 | aorist | active | indicative |
degree
¶def degree(row):
if re.search('C$', str(row)):
return 'comparative'
if re.search('S$', str(row)):
return 'superlative'
else:
return ''
featureprep['degree']=featureprep['morphology'].apply(propernoun)
featureprep.head(50)
morphology | orig_order | sp | gn | nu | ps | case | vt | voice | mood | degree | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | N- ----NSF- | 1 | noun | f | sg | nominative | future | ||||
1 | N- ----GSF- | 2 | noun | f | sg | genitive | future | ||||
2 | N- ----GSM- | 3 | noun | m | sg | genitive | |||||
3 | N- ----GSM- | 4 | noun | m | sg | genitive | |||||
4 | N- ----GSM- | 5 | noun | m | sg | genitive | |||||
5 | N- ----GSM- | 6 | noun | m | sg | genitive | |||||
6 | N- ----GSM- | 7 | noun | m | sg | genitive | |||||
7 | N- ----GSM- | 8 | noun | m | sg | genitive | |||||
8 | N- ----NSM- | 9 | noun | m | sg | nominative | |||||
9 | V- 3AAI-S-- | 10 | verb | sg | p3 | aorist | active | indicative | |||
10 | RA ----ASM- | 11 | art-def | m | sg | accusative | aorist | ||||
11 | N- ----ASM- | 12 | noun | m | sg | accusative | aorist | ||||
12 | N- ----NSM- | 13 | noun | m | sg | nominative | |||||
13 | C- -------- | 14 | conj | ||||||||
14 | V- 3AAI-S-- | 15 | verb | sg | p3 | aorist | active | indicative | |||
15 | RA ----ASM- | 16 | art-def | m | sg | accusative | aorist | ||||
16 | N- ----ASM- | 17 | noun | m | sg | accusative | aorist | ||||
17 | N- ----NSM- | 18 | noun | m | sg | nominative | |||||
18 | C- -------- | 19 | conj | ||||||||
19 | V- 3AAI-S-- | 20 | verb | sg | p3 | aorist | active | indicative | |||
20 | RA ----ASM- | 21 | art-def | m | sg | accusative | aorist | ||||
21 | N- ----ASM- | 22 | noun | m | sg | accusative | aorist | ||||
22 | C- -------- | 23 | conj | ||||||||
23 | RA ----APM- | 24 | art-def | m | pl | accusative | aorist | ||||
24 | N- ----APM- | 25 | noun | m | pl | accusative | aorist | ||||
25 | RP ----GSM- | 26 | pron-prs | m | sg | genitive | |||||
26 | N- ----NSM- | 27 | noun | m | sg | nominative | |||||
27 | C- -------- | 28 | conj | ||||||||
28 | V- 3AAI-S-- | 29 | verb | sg | p3 | aorist | active | indicative | |||
29 | RA ----ASM- | 30 | art-def | m | sg | accusative | aorist | ||||
30 | N- ----ASM- | 31 | noun | m | sg | accusative | aorist | ||||
31 | C- -------- | 32 | conj | ||||||||
32 | RA ----ASM- | 33 | art-def | m | sg | accusative | aorist | ||||
33 | N- ----ASM- | 34 | noun | m | sg | accusative | aorist | ||||
34 | P- -------- | 35 | prep | ||||||||
35 | RA ----GSF- | 36 | art-def | f | sg | genitive | future | ||||
36 | N- ----GSF- | 37 | noun | f | sg | genitive | future | ||||
37 | N- ----NSM- | 38 | noun | m | sg | nominative | |||||
38 | C- -------- | 39 | conj | ||||||||
39 | V- 3AAI-S-- | 40 | verb | sg | p3 | aorist | active | indicative | |||
40 | RA ----ASM- | 41 | art-def | m | sg | accusative | aorist | ||||
41 | N- ----ASM- | 42 | noun | m | sg | accusative | aorist | ||||
42 | N- ----NSM- | 43 | noun | m | sg | nominative | |||||
43 | C- -------- | 44 | conj | ||||||||
44 | V- 3AAI-S-- | 45 | verb | sg | p3 | aorist | active | indicative | |||
45 | RA ----ASM- | 46 | art-def | m | sg | accusative | aorist | ||||
46 | N- ----ASM- | 47 | noun | m | sg | accusative | aorist | ||||
47 | N- ----NSM- | 48 | noun | m | sg | nominative | |||||
48 | C- -------- | 49 | conj | ||||||||
49 | V- 3AAI-S-- | 50 | verb | sg | p3 | aorist | active | indicative |
featureprep.head()
morphology | orig_order | sp | gn | nu | ps | case | vt | voice | mood | degree | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | N- ----NSF- | 1 | noun | f | sg | nominative | future | ||||
1 | N- ----GSF- | 2 | noun | f | sg | genitive | future | ||||
2 | N- ----GSM- | 3 | noun | m | sg | genitive | |||||
3 | N- ----GSM- | 4 | noun | m | sg | genitive | |||||
4 | N- ----GSM- | 5 | noun | m | sg | genitive |
sorting first...
featureprep.sort_values(['orig_order'], ascending=True).head(10)
morphology | orig_order | sp | gn | nu | ps | case | vt | voice | mood | degree | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | N- ----NSF- | 1 | noun | f | sg | nominative | future | ||||
1 | N- ----GSF- | 2 | noun | f | sg | genitive | future | ||||
2 | N- ----GSM- | 3 | noun | m | sg | genitive | |||||
3 | N- ----GSM- | 4 | noun | m | sg | genitive | |||||
4 | N- ----GSM- | 5 | noun | m | sg | genitive | |||||
5 | N- ----GSM- | 6 | noun | m | sg | genitive | |||||
6 | N- ----GSM- | 7 | noun | m | sg | genitive | |||||
7 | N- ----GSM- | 8 | noun | m | sg | genitive | |||||
8 | N- ----NSM- | 9 | noun | m | sg | nominative | |||||
9 | V- 3AAI-S-- | 10 | verb | sg | p3 | aorist | active | indicative |
featureprep.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/morphology.xlsx', index=None)
# export single features into tf files
featureprep['sp'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/sp2.tf', index=None)
featureprep['gn'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/gn2.tf', index=None)
featureprep['nu'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/nu.tf', index=None)
featureprep['ps'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/ps2.tf', index=None)
featureprep['case'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/case.tf', index=None)
featureprep['vt'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/vt.tf', index=None)
featureprep['voice'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/voice.tf', index=None)
featureprep['mood'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/mood.tf', index=None)
featureprep['degree'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/degree.tf', index=None)
translitadd=pd.read_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_copy.tf',header=None, delimiter='\t',encoding='utf-8')
pd.set_option('display.max_columns', 50)
translitadd.head(10)
0 | |
---|---|
0 | βίβλος |
1 | γένεσις |
2 | Ἰησοῦς |
3 | Χριστός |
4 | υἱός |
5 | Δαυίδ |
6 | υἱός |
7 | Ἀβραάμ |
8 | Ἀβραάμ |
9 | γεννάω |
translitadd['lemma']=translitadd[0]
translitadd.head(5)
0 | lemma | |
---|---|---|
0 | βίβλος | βίβλος |
1 | γένεσις | γένεσις |
2 | Ἰησοῦς | Ἰησοῦς |
3 | Χριστός | Χριστός |
4 | υἱός | υἱός |
translitadd=translitadd[['lemma']]
translitadd.head(5)
lemma | |
---|---|
0 | βίβλος |
1 | γένεσις |
2 | Ἰησοῦς |
3 | Χριστός |
4 | υἱός |
translitadd['orig_order'] = translitadd.index +1
translitadd.head(5)
lemma | orig_order | |
---|---|---|
0 | βίβλος | 1 |
1 | γένεσις | 2 |
2 | Ἰησοῦς | 3 |
3 | Χριστός | 4 |
4 | υἱός | 5 |
from unidecode import unidecode
s = "βίβλος"
s = unidecode(s)
print(s)
biblos
translitadd['translit'] = translitadd['lemma'].apply(unidecode)
translitadd.head(5)
lemma | orig_order | translit | |
---|---|---|---|
0 | βίβλος | 1 | biblos |
1 | γένεσις | 2 | genesis |
2 | Ἰησοῦς | 3 | Iesous |
3 | Χριστός | 4 | Khristos |
4 | υἱός | 5 | uios |
translitadd['translit'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_translit.tf', index=None)
ABC1=pd.read_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_copy.tf',header=None, delimiter='\t',encoding='utf-8')
pd.set_option('display.max_columns', 50)
ABC1.head(10)
0 | |
---|---|
0 | βίβλος |
1 | γένεσις |
2 | Ἰησοῦς |
3 | Χριστός |
4 | υἱός |
5 | Δαυίδ |
6 | υἱός |
7 | Ἀβραάμ |
8 | Ἀβραάμ |
9 | γεννάω |
ABC1['lemma']=lemma[0]
ABC1.head(5)
0 | lemma | |
---|---|---|
0 | βίβλος | βίβλος |
1 | γένεσις | γένεσις |
2 | Ἰησοῦς | Ἰησοῦς |
3 | Χριστός | Χριστός |
4 | υἱός | υἱός |
ABC1['orig_order'] = ABC1.index +1
ABC1.head(5)
0 | lemma | orig_order | |
---|---|---|---|
0 | βίβλος | βίβλος | 1 |
1 | γένεσις | γένεσις | 2 |
2 | Ἰησοῦς | Ἰησοῦς | 3 |
3 | Χριστός | Χριστός | 4 |
4 | υἱός | υἱός | 5 |
ABC1=ABC1[['orig_order','lemma']]
ABC1.head(5)
orig_order | lemma | |
---|---|---|
0 | 1 | βίβλος |
1 | 2 | γένεσις |
2 | 3 | Ἰησοῦς |
3 | 4 | Χριστός |
4 | 5 | υἱός |
ABC1.describe()
orig_order | |
---|---|
count | 137554.000000 |
mean | 68777.500000 |
std | 39708.563801 |
min | 1.000000 |
25% | 34389.250000 |
50% | 68777.500000 |
75% | 103165.750000 |
max | 137554.000000 |
ABCdict = ABC1.drop_duplicates(['lemma']).sort_values(by='lemma', ascending=[True])
ABCdict.head(10)
orig_order | lemma | |
---|---|---|
68479 | 68480 | Αἰγύπτιος |
69633 | 69634 | Αἰθίοψ |
70464 | 70465 | Αἰνέας |
50739 | 50740 | Αἰνών |
679 | 680 | Αἴγυπτος |
30811 | 30812 | Αὐγοῦστος |
87589 | 87590 | Βάαλ |
75520 | 75521 | Βέροια |
171 | 172 | Βαβυλών |
128506 | 128507 | Βαλάκ |
ABCdict.describe()
orig_order | |
---|---|
count | 5461.000000 |
mean | 55051.110969 |
std | 42441.209940 |
min | 1.000000 |
25% | 12643.000000 |
50% | 48785.000000 |
75% | 90141.000000 |
max | 137334.000000 |
ABC1.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/ABC1order.xlsx', encoding='utf-8')
Now I am ordering the word alphabetically iwth libreoffice writer since I cannot do that in pandas (yet?).
ABC2=pd.read_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/ABC2order.xlsx')
pd.set_option('display.max_columns', 50)
ABC2.head(10)
Unnamed: 0 | orig_order | lemma | ABC order | |
---|---|---|---|---|
0 | 29678 | 29679 | Ἀαρών | 1 |
1 | 131340 | 131341 | Ἀβαδδών | 2 |
2 | 100253 | 100254 | ἀβαρής | 3 |
3 | 28001 | 28002 | αββα | 4 |
4 | 14094 | 14095 | Ἅβελ | 5 |
5 | 108 | 109 | Ἀβιά | 6 |
6 | 19523 | 19524 | Ἀβιαθάρ | 7 |
7 | 31682 | 31683 | Ἀβιληνή | 8 |
8 | 190 | 191 | Ἀβιούδ | 9 |
9 | 7 | 8 | Ἀβραάμ | 10 |
Now we merge the ABCorder dataframe with the original lemma DF.
lemma_ABC=pd.merge (ABC1, ABC2,
on='lemma',
how='outer')
lemma_ABC.head(5)
orig_order_x | lemma | Unnamed: 0 | orig_order_y | ABC order | |
---|---|---|---|---|---|
0 | 1 | βίβλος | 0 | 1 | 970 |
1 | 26440 | βίβλος | 0 | 1 | 970 |
2 | 31717 | βίβλος | 0 | 1 | 970 |
3 | 45660 | βίβλος | 0 | 1 | 970 |
4 | 64886 | βίβλος | 0 | 1 | 970 |
lemma_ABC.describe()
orig_order_x | Unnamed: 0 | orig_order_y | ABC order | |
---|---|---|---|---|
count | 137554.000000 | 137554.000000 | 137554.000000 | 137554.00000 |
mean | 68777.500000 | 7050.531566 | 7051.531566 | 2676.19798 |
std | 39708.563801 | 20152.248998 | 20152.248998 | 1339.74175 |
min | 1.000000 | 0.000000 | 1.000000 | 1.00000 |
25% | 34389.250000 | 25.000000 | 26.000000 | 1501.00000 |
50% | 68777.500000 | 400.000000 | 401.000000 | 2727.00000 |
75% | 103165.750000 | 2097.250000 | 2098.250000 | 3598.00000 |
max | 137554.000000 | 137333.000000 | 137334.000000 | 5461.00000 |
lemma_ABC.sort_values(['orig_order_x'], ascending=True).head(10)
orig_order_x | lemma | Unnamed: 0 | orig_order_y | ABC order | |
---|---|---|---|---|---|
0 | 1 | βίβλος | 0 | 1 | 970 |
10 | 2 | γένεσις | 1 | 2 | 1074 |
15 | 3 | Ἰησοῦς | 2 | 3 | 2406 |
921 | 4 | Χριστός | 3 | 4 | 5385 |
1449 | 5 | υἱός | 4 | 5 | 5053 |
1824 | 6 | Δαυίδ | 5 | 6 | 1156 |
1450 | 7 | υἱός | 4 | 5 | 5053 |
1883 | 8 | Ἀβραάμ | 7 | 8 | 10 |
1884 | 9 | Ἀβραάμ | 7 | 8 | 10 |
1956 | 10 | γεννάω | 9 | 10 | 1077 |
lemma_ABC.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_abc.xlsx')
frequencyadd=pd.read_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_copy.tf',header=None, delimiter='\t',encoding='utf-8')
pd.set_option('display.max_columns', 50)
frequencyadd.head(20)
0 | |
---|---|
0 | βίβλος |
1 | γένεσις |
2 | Ἰησοῦς |
3 | Χριστός |
4 | υἱός |
5 | Δαυίδ |
6 | υἱός |
7 | Ἀβραάμ |
8 | Ἀβραάμ |
9 | γεννάω |
10 | ὁ |
11 | Ἰσαάκ |
12 | Ἰσαάκ |
13 | δέ |
14 | γεννάω |
15 | ὁ |
16 | Ἰακώβ |
17 | Ἰακώβ |
18 | δέ |
19 | γεννάω |
frequencyadd['orig_order'] = frequencyadd.index +1
frequencyadd['lemma']=frequencyadd[0]
frequencyadd=frequencyadd[['orig_order','lemma']]
frequencyadd.head(5)
orig_order | lemma | |
---|---|---|
0 | 1 | βίβλος |
1 | 2 | γένεσις |
2 | 3 | Ἰησοῦς |
3 | 4 | Χριστός |
4 | 5 | υἱός |
frequencyadd["freq_lemma"]=frequencyadd.groupby(["lemma"])["lemma"].transform("count")
#("count") is actually utilizing the 'count' function!
frequencyadd.head(20)
orig_order | lemma | freq_lemma | |
---|---|---|---|
0 | 1 | βίβλος | 10 |
1 | 2 | γένεσις | 5 |
2 | 3 | Ἰησοῦς | 906 |
3 | 4 | Χριστός | 528 |
4 | 5 | υἱός | 375 |
5 | 6 | Δαυίδ | 59 |
6 | 7 | υἱός | 375 |
7 | 8 | Ἀβραάμ | 73 |
8 | 9 | Ἀβραάμ | 73 |
9 | 10 | γεννάω | 97 |
10 | 11 | ὁ | 19769 |
11 | 12 | Ἰσαάκ | 20 |
12 | 13 | Ἰσαάκ | 20 |
13 | 14 | δέ | 2766 |
14 | 15 | γεννάω | 97 |
15 | 16 | ὁ | 19769 |
16 | 17 | Ἰακώβ | 27 |
17 | 18 | Ἰακώβ | 27 |
18 | 19 | δέ | 2766 |
19 | 20 | γεννάω | 97 |
frequencyadd.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_freq.xlsx')
Lets first load the NA1904 BibleOL dictionary:
BOLgreekDICT=pd.read_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/NA1904_dictionary_v1.0.xlsx')
pd.set_option('display.max_columns', 50)
BOLgreekDICT.head(20)
orig abc order | Occurrences | Lexeme | Lexeme_dict | Strong's number | Strong's unreliable? | gloss | |
---|---|---|---|---|---|---|---|
0 | 1 | 5 | Ἀαρών | Ἀαρών, ὁ | 2 | no | Aaron |
1 | 2 | 1 | Ἀβαδδών | Ἀβαδδών, ὁ | 3 | no | Abaddon |
2 | 3 | 1 | ἀβαρής | ἀβαρής, -ές | 4 | no | not burdensome |
3 | 4 | 3 | ἀββά | ἀββά, ὁ | 5 | no | Father |
4 | 5 | 4 | Ἅβελ | Ἅβελ, ὁ | 6 | no | Abel |
5 | 6 | 3 | Ἀβιά | Ἀβιά, ὁ | 7 | no | Abijah |
6 | 7 | 1 | Ἀβιαθάρ | Ἀβιαθάρ, ὁ | 8 | no | Abiathar |
7 | 8 | 1 | Ἀβιληνή | Ἀβιληνή, -ῆς, ἡ | 9 | no | Abilene |
8 | 9 | 2 | Ἀβιούδ | Ἀβιούδ, ὁ | 10 | no | Abiud |
9 | 10 | 73 | Ἀβραάμ | Ἀβραάμ, ὁ | 11 | no | Abraham |
10 | 11 | 9 | ἄβυσσος | ἄβυσσος, -ου, ἡ | 12 | no | abyss, unfathomable depth |
11 | 12 | 2 | Ἅγαβος | Ἅγαβος, -ου, ὁ | 13 | no | Agabus |
12 | 13 | 2 | ἀγαθοεργέω | ἀγαθοεργέω | 14 | no | perform good deeds |
13 | 14 | 9 | ἀγαθοποιέω | ἀγαθοποιέω | 15 | no | do that which is good |
14 | 15 | 1 | ἀγαθοποιΐα | ἀγαθοποιΐα, -ας, ἡ | 16 | no | doing of that which is good |
15 | 16 | 1 | ἀγαθοποιός | ἀγαθοποιός, -οῦ, ὁ | 17 | no | a doer of that which is good |
16 | 17 | 102 | ἀγαθός | ἀγαθός, -ή, -όν | 18 | no | good |
17 | 18 | 4 | ἀγαθωσύνη | ἀγαθωσύνη, -ης, ἡ | 19 | no | goodness |
18 | 19 | 5 | ἀγαλλίασις | ἀγαλλίασις, -εως, ἡ | 20 | no | exultation, exhilaration |
19 | 20 | 11 | ἀγαλλιάω | ἀγαλλιάω | 21 | no | exult, am full of joy |
BOLgreekDICT=BOLgreekDICT[['Lexeme','Lexeme_dict', 'Strong\'s number', 'gloss']]
BOLgreekDICT.head(10)
Lexeme | Lexeme_dict | Strong's number | gloss | |
---|---|---|---|---|
0 | Ἀαρών | Ἀαρών, ὁ | 2 | Aaron |
1 | Ἀβαδδών | Ἀβαδδών, ὁ | 3 | Abaddon |
2 | ἀβαρής | ἀβαρής, -ές | 4 | not burdensome |
3 | ἀββά | ἀββά, ὁ | 5 | Father |
4 | Ἅβελ | Ἅβελ, ὁ | 6 | Abel |
5 | Ἀβιά | Ἀβιά, ὁ | 7 | Abijah |
6 | Ἀβιαθάρ | Ἀβιαθάρ, ὁ | 8 | Abiathar |
7 | Ἀβιληνή | Ἀβιληνή, -ῆς, ἡ | 9 | Abilene |
8 | Ἀβιούδ | Ἀβιούδ, ὁ | 10 | Abiud |
9 | Ἀβραάμ | Ἀβραάμ, ὁ | 11 | Abraham |
BOLgreekDICT.describe()
Strong's number | |
---|---|
count | 5433.000000 |
mean | 2798.407878 |
std | 1638.197697 |
min | 1.000000 |
25% | 1370.000000 |
50% | 2754.000000 |
75% | 4237.000000 |
max | 5624.000000 |
Lets load the SBLGNT lemmas
SBLGNTlemmas=pd.read_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_copy.tf',header=None, delimiter='\t',encoding='utf-8')
pd.set_option('display.max_columns', 50)
SBLGNTlemmas.head(2)
0 | |
---|---|
0 | βίβλος |
1 | γένεσις |
SBLGNTlemmas['orig_order']=SBLGNTlemmas.index +1
SBLGNTlemmas['Lexeme']=SBLGNTlemmas[0]
SBLGNTlemmas=SBLGNTlemmas[['orig_order','Lexeme']]
SBLGNTlemmas.head(5)
orig_order | Lexeme | |
---|---|---|
0 | 1 | βίβλος |
1 | 2 | γένεσις |
2 | 3 | Ἰησοῦς |
3 | 4 | Χριστός |
4 | 5 | υἱός |
SBLGNTlemmas.describe()
orig_order | |
---|---|
count | 137554.000000 |
mean | 68777.500000 |
std | 39708.563801 |
min | 1.000000 |
25% | 34389.250000 |
50% | 68777.500000 |
75% | 103165.750000 |
max | 137554.000000 |
Now lets try a merge of the two files
SBLGNTglosses=pd.merge (SBLGNTlemmas,BOLgreekDICT,
on='Lexeme',
how='outer')
SBLGNTglosses.head(5)
orig_order | Lexeme | Lexeme_dict | Strong's number | gloss | |
---|---|---|---|---|---|
0 | 1.0 | βίβλος | NaN | NaN | NaN |
1 | 26440.0 | βίβλος | NaN | NaN | NaN |
2 | 31717.0 | βίβλος | NaN | NaN | NaN |
3 | 45660.0 | βίβλος | NaN | NaN | NaN |
4 | 64886.0 | βίβλος | NaN | NaN | NaN |
SBLGNTglosses.describe()
orig_order | Strong's number | |
---|---|---|
count | 138318.000000 | 54761.000000 |
mean | 68756.590379 | 3008.488377 |
std | 39712.532678 | 1209.032138 |
min | 1.000000 | 1.000000 |
25% | 34366.250000 | 2041.000000 |
50% | 68753.500000 | 3588.000000 |
75% | 103162.750000 | 3706.000000 |
max | 137554.000000 | 5624.000000 |
SBLGNTglosses.head(20)
orig_order | Lexeme | Lexeme_dict | Strong's number | gloss | |
---|---|---|---|---|---|
0 | 1.0 | βίβλος | NaN | NaN | NaN |
1 | 26440.0 | βίβλος | NaN | NaN | NaN |
2 | 31717.0 | βίβλος | NaN | NaN | NaN |
3 | 45660.0 | βίβλος | NaN | NaN | NaN |
4 | 64886.0 | βίβλος | NaN | NaN | NaN |
5 | 68873.0 | βίβλος | NaN | NaN | NaN |
6 | 76865.0 | βίβλος | NaN | NaN | NaN |
7 | 107214.0 | βίβλος | NaN | NaN | NaN |
8 | 128928.0 | βίβλος | NaN | NaN | NaN |
9 | 136490.0 | βίβλος | NaN | NaN | NaN |
10 | 2.0 | γένεσις | NaN | NaN | NaN |
11 | 281.0 | γένεσις | NaN | NaN | NaN |
12 | 29821.0 | γένεσις | NaN | NaN | NaN |
13 | 120472.0 | γένεσις | NaN | NaN | NaN |
14 | 121080.0 | γένεσις | NaN | NaN | NaN |
15 | 3.0 | Ἰησοῦς | Ἰησοῦς | 2424.0 | Jesus |
16 | 243.0 | Ἰησοῦς | Ἰησοῦς | 2424.0 | Jesus |
17 | 278.0 | Ἰησοῦς | Ἰησοῦς | 2424.0 | Jesus |
18 | 357.0 | Ἰησοῦς | Ἰησοῦς | 2424.0 | Jesus |
19 | 436.0 | Ἰησοῦς | Ἰησοῦς | 2424.0 | Jesus |
SBLGNTglosses.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/SBLGNTglosses.xlsx')
TISCH = use('tisch', hoist=globals())
This is Text-Fabric 9.1.7 Api reference : https://annotation.github.io/text-fabric/tf/cheatsheet.html 31 features found and 0 ignored
Tisch1 = '''
book book=Matthew book_code*
chapter chapter=1
word lex_og*
'''
Tisch1 = TISCH.search(Tisch1)
TISCH.show(Tisch1, start=1, end=1, condensed=True, colorMap={1:'pink'}, extraFeatures={'verse','vrsnum'})
0.22s 435 results
verse 1
SBLGNT = use('SBLGNT/tf/6.12', hoist=globals())
This is Text-Fabric 9.1.7 Api reference : https://annotation.github.io/text-fabric/tf/cheatsheet.html 29 features found and 0 ignored
Search1 = '''
book book=Revelation book_code*
chapter chapter=1
sentence
clause
word lemma* morph* normalized_word* case* degree* gn* mood* nu* orig_order* ps* voice* vt* lemma_translit* dict_abc_order* lemma_freq* lemma_dictform* lemma_freq* lemma_gloss* lemma_strongs*
'''
Search1 = SBLGNT.search(Search1)
SBLGNT.show(Search1, start=1, end=1, condensed=True, colorMap={1:'pink'}, extraFeatures={'verse','vrsnum'})
2.64s 469 results
verse 1
TestimonyOfJesus = '''
clause
w1:word lemma=μαρτυρία
w2:word lemma=Ἰησοῦς case=genitive
w1 <: w2
'''
TestimonyOfJesus=SBLGNT.search(TestimonyOfJesus)
SBLGNT.show(TestimonyOfJesus, start=1, end=10, condensed=True, colorMap={2: 'red', 3: 'orange'})
0.28s 6 results
verse 1
verse 2
verse 3
verse 4
verse 5
In Col 2:16-17 we find a syntactical construction that involves a series of conjunctions (καί and ἤ). The critical edition of the NA28 presents the text in the following way:
The syntactical structure is differently analyzed by scholars. Below you see a comparison between the cascadia and opentext analysis. One needs to keep in mind that the NA edition renders the text slightly different (see ἤ => καί) than the Mehrheitstext that is followed by the SBL edition (see καί => ἤ) :
Lets look first at the conjunction sequence that we have in the NA28. Does this appear elsewhere in the SBLGNT text?
KaiHeHe ='''
v1:verse
w1:word
w2:word lemma=καί
w3:word lemma=ἤ
w4:word lemma=ἤ
w1 < w2
w2 < w3
w3 < w4
'''
KaiHeHe=SBLGNT.search(KaiHeHe)
SBLGNT.show(KaiHeHe, start=1, end=20, condensed=True, colorMap={2: 'grey', 3: 'red', 4: 'magenta', 5: 'magenta'})
0.50s 114 results
verse 1
verse 2
verse 3
verse 4
verse 5
verse 6
verse 7
KaiHeHe2 ='''
v1:verse
w1:word
w2:word lemma=καί
w3:word lemma=ἤ
w4:word lemma=ἤ
v1 =20: w1
w1 < w2
w1 <10: w2
w2 < w3
w2 <6: w3
w3 < w4
w3 <6: w4
'''
KaiHeHe2=SBLGNT.search(KaiHeHe2)
SBLGNT.show(KaiHeHe2, start=1, end=200, condensed=True, colorMap={2: 'grey', 3: 'red', 4: 'magenta', 5: 'magenta'})
0.49s 21 results
verse 1
verse 2
No "either ... or" construction in:
The only exception is Mk 13:35. But the construction is not triggering the "either ... or" function (see also modern translations). It rather hints at a textcritical issue which is also well documented in the text critical apparatus:
Consequently, the proper translation is "X or Y or Z...".
Conclusion: it is highly unlikely that Col 2:16 resembles an "either ... or" construction. By default if there are three ἢ conjunctions appearing in a sequence they trigger the meaning "or ... or".
The following query does not exclude a preceding καὶ:
HeHeHe ='''
verse
word lemma=ἤ
< word lemma=ἤ
< word lemma=ἤ
'''
HeHeHe=SBLGNT.search(HeHeHe)
SBLGNT.show(HeHeHe, start=1, end=10, condensed=True, colorMap={2: 'magenta', 3: 'magenta', 4: 'magenta'})
0.44s 117 results
verse 1
verse 2
verse 3
verse 4
verse 5
verse 6
verse 7
verse 8
verse 9
verse 10
JesusInNT = '''
book
clause
word lemma=Ἰησοῦς case=nominative
'''
JesusInNT=SBLGNT.search(JesusInNT)
SBLGNT.show(JesusInNT, start=1, end=3, condensed=True, colorMap={2: 'gold'})
0.21s 456 results
verse 1
verse 2
verse 3
SBLGNT.export(JesusInNT, toDir='D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/', toFile='JesusInNT.tsv')
JesusInNT=pd.read_csv('D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/JesusInNT.tsv',delimiter='\t',encoding='utf-16')
JesusInNT.head()
R | S1 | S2 | S3 | NODE1 | TYPE1 | NODE2 | TYPE2 | TEXT2 | NODE3 | TYPE3 | TEXT3 | case3 | lemma3 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Matthew | 1 | 16 | 137555 | book | 137882 | clause | ἐκ ἧς ἐγεννήθη Ἰησοῦς ὁ λεγόμενος Χριστός | 243 | word | Ἰησοῦς | nominative | Ἰησοῦς |
1 | 2 | Matthew | 3 | 13 | 137555 | book | 137984 | clause | τότε παραγίνεται ὁ Ἰησοῦς ἀπό τῆς Γαλιλαίας ἐπ... | 1133 | word | Ἰησοῦς | nominative | Ἰησοῦς |
2 | 3 | Matthew | 3 | 15 | 137555 | book | 137988 | clause | ἀποκριθείς δέ ὁ Ἰησοῦς εἶπε(ν) πρός αὐτόν | 1167 | word | Ἰησοῦς | nominative | Ἰησοῦς |
3 | 4 | Matthew | 3 | 16 | 137555 | book | 137992 | clause | βαπτισθείς δέ ὁ Ἰησοῦς εὐθύς ἀνέβη ἀπό τοῦ ὕδα... | 1187 | word | Ἰησοῦς | nominative | Ἰησοῦς |
4 | 5 | Matthew | 4 | 1 | 137555 | book | 137998 | clause | τότε ὁ Ἰησοῦς ἀνήχθη εἰς τήν ἔρημον ὑπό τοῦ πν... | 1227 | word | Ἰησοῦς | nominative | Ἰησοῦς |
figure(num=None, figsize=(5, 5), dpi=80, facecolor='w', edgecolor='k')
JesusInNT.groupby("S1").size().sort_values(ascending=True).plot.barh()
plt.xlabel('occurence of Jesus as subject')
plt.ylabel('NT books')
plt.title('Jesus as subject (case=nominative)')
plt.show()
JesusInNT.S1.value_counts(sort=False).plot.pie(autopct='%1.0f%%', shadow=True, startangle=90)
plt.show()