from google.colab import files
uploaded = files.upload()
print("done")
Saving key.json to key.json done
I'll install the API
get wic dataset
!pip install openai
import openai, json, pandas as pd, numpy as np, random
Collecting openai Downloading https://files.pythonhosted.org/packages/a8/65/c7461f4c87984534683f480ea5742777bc39bbf5721123194c2d0347dc1f/openai-0.2.4.tar.gz (157kB) |████████████████████████████████| 163kB 2.5MB/s Requirement already satisfied: requests>=2.20 in /usr/local/lib/python3.6/dist-packages (from openai) (2.23.0) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->openai) (2020.6.20) Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->openai) (3.0.4) Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->openai) (1.24.3) Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests>=2.20->openai) (2.10) Building wheels for collected packages: openai Building wheel for openai (setup.py) ... done Created wheel for openai: filename=openai-0.2.4-cp36-none-any.whl size=170709 sha256=92cb1c67856702ddb35769c878ef0c3382cb45eb9af852fd3e93c31956798a12 Stored in directory: /root/.cache/pip/wheels/74/96/c8/c6e170929c276b836613e1b9985343b501fe455e53d85e7d48 Successfully built openai Installing collected packages: openai Successfully installed openai-0.2.4
args to pass API; one for 1 line 1 for 2 line
#arguments to send the API
kwargs = {
"engine":"davinci",
"temperature":0,
"max_tokens":200,
"stop":"\n",
}
kwargs2 = {
"engine":"davinci",
"temperature":0,
"max_tokens":200,
"stop":"\n\n",
}
kwargs2Short = {
"engine":"davinci",
"temperature":0,
"max_tokens":20,
"stop":"\n\n",
}
openai.api_key = json.load(open("key.json", "r"))["key"]
def queryTwoLine(prompt, myKwargs = kwargs2):
"""
wrapper for the API (get 2 newlines)
"""
r = openai.Completion.create(prompt=prompt, **myKwargs)["choices"][0]["text"].strip()
return r
def queryOneLine(prompt, myKwargs = kwargs):
"""
wrapper for the API (get 1 newlines)
"""
r = openai.Completion.create(prompt=prompt, **myKwargs)["choices"][0]["text"].strip()
return r
Test to make sure my query works
newKwargs = kwargs.copy()
newKwargs["stop"] = "\n"
queryOneLine("q: what is 1+1?\na:")
'2'
Get the WiC dataset
!wget https://pilehvar.github.io/wic/package/WiC_dataset.zip
--2020-08-01 01:40:25-- https://pilehvar.github.io/wic/package/WiC_dataset.zip Resolving pilehvar.github.io (pilehvar.github.io)... 185.199.108.153, 185.199.111.153, 185.199.110.153, ... Connecting to pilehvar.github.io (pilehvar.github.io)|185.199.108.153|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 275513 (269K) [application/zip] Saving to: ‘WiC_dataset.zip’ WiC_dataset.zip 100%[===================>] 269.06K --.-KB/s in 0.05s 2020-08-01 01:40:26 (5.00 MB/s) - ‘WiC_dataset.zip’ saved [275513/275513]
Unzip the dataset
import zipfile
with zipfile.ZipFile("WiC_dataset.zip","r") as zip_ref:
zip_ref.extractall(".")
Read in train and add the T/F label for if they're the same
train = pd.read_csv("train/train.data.txt", sep='\t', header=None)
train.columns = ["target", "pos", "position", "context-1", "context-2"]
train_gold = pd.read_csv("train/train.gold.txt", sep='\t', header=None)
train_gold.columns = ["label"]
train = pd.concat([train_gold,train], axis=1)
train.head()
label | target | pos | position | context-1 | context-2 | |
---|---|---|---|---|---|---|
0 | F | carry | V | 2-1 | You must carry your camping gear . | Sound carries well over water . |
1 | F | go | V | 2-6 | Messages must go through diplomatic channels . | Do you think the sofa will go through the door ? |
2 | F | break | V | 0-2 | Break an alibi . | The wholesaler broke the container loads into ... |
3 | T | cup | N | 8-4 | He wore a jock strap with a metal cup . | Bees filled the waxen cups with honey . |
4 | F | academy | N | 1-2 | The Academy of Music . | The French Academy . |
Def to bootstrap meanings; this prompt asks it what a term means in context, then returns the question + response
def getContextualNounExample(content, term):
prompt = "Tom said '{}'\n".format(content, term)
prompt += "I asked Tom what '{}' specifically means in this context, he clarified it is another word for".format(term)
return (prompt + " " +queryOneLine(prompt))
Get response to end of training set as my fewshot examples, print to make sure it makes sense
fewShotsNounDefinitions = [] #I'll build this from the training set
for row in train[train.pos=="N"].tail(10).iterrows():
s1 = row[1]["context-1"]
target = row[1]["target"]
r = getContextualNounExample(s1, target)
print(r)
fewShotsNounDefinitions.append(" ".join(r.split("\n"))) #add the definition to fewShotsDefinition as 1 line
Tom said 'The cinema relies on apparent motion .' I asked Tom what 'motion' specifically means in this context, he clarified it is another word for 'movement'. Tom said 'It vanished into the night .' I asked Tom what 'night' specifically means in this context, he clarified it is another word for 'darkness'. Tom said 'He threw the ball into the air .' I asked Tom what 'air' specifically means in this context, he clarified it is another word for 'sky'. Tom said 'Those clouds show little sign of raining soon .' I asked Tom what 'sign' specifically means in this context, he clarified it is another word for 'signs' or 'indications'. Tom said 'We added a new rosebush to our rose bed .' I asked Tom what 'bed' specifically means in this context, he clarified it is another word for 'garden' or 'yard' Tom said 'His state of health .' I asked Tom what 'state' specifically means in this context, he clarified it is another word for 'condition'. Tom said 'Likes a drink before dinner .' I asked Tom what 'drink' specifically means in this context, he clarified it is another word for 'alcoholic drink'. Tom said 'Piecas kronas — five krona .' I asked Tom what 'krona' specifically means in this context, he clarified it is another word for 'coin'. Tom said 'The harder the conflict the more glorious the triumph "-- Thomas Paine .' I asked Tom what 'conflict' specifically means in this context, he clarified it is another word for war. Tom said 'An invasion of bees .' I asked Tom what 'invasion' specifically means in this context, he clarified it is another word for 'attack' or 'assault'.
for i in range(len(fewShotsNounDefinitions)):
print("{}: {}".format(i, fewShotsNounDefinitions[i]))
0: Tom said 'The cinema relies on apparent motion .' I asked Tom what 'motion' specifically means in this context, he clarified it is another word for 'movement'. 1: Tom said 'It vanished into the night .' I asked Tom what 'night' specifically means in this context, he clarified it is another word for 'darkness'. 2: Tom said 'He threw the ball into the air .' I asked Tom what 'air' specifically means in this context, he clarified it is another word for 'sky'. 3: Tom said 'Those clouds show little sign of raining soon .' I asked Tom what 'sign' specifically means in this context, he clarified it is another word for 'signs' or 'indications'. 4: Tom said 'We added a new rosebush to our rose bed .' I asked Tom what 'bed' specifically means in this context, he clarified it is another word for 'garden' or 'yard' 5: Tom said 'His state of health .' I asked Tom what 'state' specifically means in this context, he clarified it is another word for 'condition'. 6: Tom said 'Likes a drink before dinner .' I asked Tom what 'drink' specifically means in this context, he clarified it is another word for 'alcoholic drink'. 7: Tom said 'Piecas kronas — five krona .' I asked Tom what 'krona' specifically means in this context, he clarified it is another word for 'coin'. 8: Tom said 'The harder the conflict the more glorious the triumph "-- Thomas Paine .' I asked Tom what 'conflict' specifically means in this context, he clarified it is another word for war. 9: Tom said 'An invasion of bees .' I asked Tom what 'invasion' specifically means in this context, he clarified it is another word for 'attack' or 'assault'.
Manually discard 3 and 8 bad
#bad = [3,8]
bad = []
fewShotsNounPrompt = "\n".join([fewShotsNounDefinitions[i] for i in range(len(fewShotsNounDefinitions)) if i not in bad])
def getContextualVerbExample(content, term):
prompt = "Tom said '{}'\n".format(content, term)
prompt += "I asked Tom what 'to {}' specifically means in this context, he explained it was another word for 'to".format(term)
return (prompt + " " +queryOneLine(prompt))
fewShotsVerbDefinitions = [] #I'll build this from the training set
for row in train[train.pos=="V"][-10:].iterrows():
s1 = row[1]["context-1"]
target = row[1]["target"]
r = getContextualVerbExample(s1, target)
print(r)
fewShotsVerbDefinitions.append( " ".join(r.split("\n"))) #add the definition to fewShotsDefinition as 1 line
Tom said 'Sanitize the language in a book .' I asked Tom what 'to sanitize' specifically means in this context, he explained it was another word for 'to clean up' or 'to edit'. Tom said 'You are trying my patience .' I asked Tom what 'to try' specifically means in this context, he explained it was another word for 'to test' or 'to attempt'. Tom said 'How would you classify these pottery shards -- are they prehistoric ?' I asked Tom what 'to classify' specifically means in this context, he explained it was another word for 'to classify' Tom said 'My daughter 's fancy wedding is going to break me !' I asked Tom what 'to break' specifically means in this context, he explained it was another word for 'to bankrupt' or 'to ruin' Tom said 'Render thanks .' I asked Tom what 'to render' specifically means in this context, he explained it was another word for 'to give' or 'to offer' and that it was a very old word. Tom said 'I drive to work every day .' I asked Tom what 'to drive' specifically means in this context, he explained it was another word for 'to go'. Tom said 'Keep open the possibility of a merger .' I asked Tom what 'to keep' specifically means in this context, he explained it was another word for 'to consider'. Tom said 'Cats stretch with equal ease and agility beyond the point that breaks a man on the rack .' I asked Tom what 'to stretch' specifically means in this context, he explained it was another word for 'to reach' or 'to extend' and that it was a common word in the English language. Tom said 'Answer the riddle .' I asked Tom what 'to answer' specifically means in this context, he explained it was another word for 'to solve' or 'to work out'. Tom said 'Play the casinos in Trouville .' I asked Tom what 'to play' specifically means in this context, he explained it was another word for 'to cheat' or 'to swindle'.
for i in range(len(fewShotsVerbDefinitions)):
print("{}: {}".format(i, fewShotsVerbDefinitions[i]))
0: Tom said 'Sanitize the language in a book .' I asked Tom what 'to sanitize' specifically means in this context, he explained it was another word for 'to clean up' or 'to edit'. 1: Tom said 'You are trying my patience .' I asked Tom what 'to try' specifically means in this context, he explained it was another word for 'to test' or 'to attempt'. 2: Tom said 'How would you classify these pottery shards -- are they prehistoric ?' I asked Tom what 'to classify' specifically means in this context, he explained it was another word for 'to classify' 3: Tom said 'My daughter 's fancy wedding is going to break me !' I asked Tom what 'to break' specifically means in this context, he explained it was another word for 'to bankrupt' or 'to ruin' 4: Tom said 'Render thanks .' I asked Tom what 'to render' specifically means in this context, he explained it was another word for 'to give' or 'to offer' and that it was a very old word. 5: Tom said 'I drive to work every day .' I asked Tom what 'to drive' specifically means in this context, he explained it was another word for 'to go'. 6: Tom said 'Keep open the possibility of a merger .' I asked Tom what 'to keep' specifically means in this context, he explained it was another word for 'to consider'. 7: Tom said 'Cats stretch with equal ease and agility beyond the point that breaks a man on the rack .' I asked Tom what 'to stretch' specifically means in this context, he explained it was another word for 'to reach' or 'to extend' and that it was a common word in the English language. 8: Tom said 'Answer the riddle .' I asked Tom what 'to answer' specifically means in this context, he explained it was another word for 'to solve' or 'to work out'. 9: Tom said 'Play the casinos in Trouville .' I asked Tom what 'to play' specifically means in this context, he explained it was another word for 'to cheat' or 'to swindle'.
#bad = [2,7]
bad = []
fewShotsVerbPrompt = "\n".join([fewShotsVerbDefinitions[i] for i in range(len(fewShotsVerbDefinitions)) if i not in bad])
Now use the few shots for my actual method, and allow me to context stuff the previous answer so if the meaning is the same it'll likely choose it and if they're different it'll pick up on the difference hopefully and not choose it again. Using presense_penalty to increase odds of choosing new token.
Potentially adjusting this is a parameter to tune.
def getContextualNounMeaning(content, term, contexts = []):
prompt = fewShotsNounPrompt
for context in contexts:
prompt += "\nTom said '{}' ".format(context["content"])
prompt += " I asked Tom what '{}' specifically means in this context, he clarified it is another word for {}".format(context["term"], context["meaning"])
prompt += "\nTom said '{}' ".format(content, term)
prompt += " I asked Tom what '{}' specifically means in this context, he clarified it is another word for".format(term)
r = queryOneLine(prompt, myKwargs = {'engine': 'davinci', 'max_tokens': 20, 'stop': '\n', 'temperature': 0, "presence_penalty":.2})
if not r.startswith("'"):
r = "'" + r
return r
def getContextualVerbMeaning(content, term, contexts = []):
prompt = fewShotsVerbPrompt
for context in contexts:
prompt += "\nTom said '{}'".format(context["content"])
prompt += " I asked Tom what 'to {}' specifically means in this context, he explained it was another word for {}".format(context["term"], context["meaning"])
prompt += "\nTom said '{}'".format(content)
prompt += " I asked Tom what 'to {}' specifically means in this context, he explained it was another word for 'to".format(term)
r = queryOneLine(prompt, myKwargs = {'engine': 'davinci', 'max_tokens': 20, 'stop': '\n', 'temperature': 0, "presence_penalty":.2})
if not r.startswith("'"):
r = "'" + r
return r
Now I make a method to build comparison examples; takes two sentences and a label (which is the actual answer if they're true/false); it generates an example
def generateComparisonExample(s1, s2, label):
prompt = "Tom says that this means {} Jerry says this means {}\n".format(s1, s2)
prompt += "Q: Are Tom and Jerry basically saying the same thing here?\nA:"
if label == "T":
prompt += " Yes"
else:
prompt += " No"
return prompt
Now generate a list of examples from the builder method. I'll be able to select them from the list, but I'll end up just joining all of them together for the actual prompt (possibly something to use to improve)
comparisonFewShotVerbExamples = []
for row in train[train.pos=="V"][-20:-12].iterrows():
s1 = row[1]["context-1"]
s2 = row[1]["context-2"]
label = row[1]["label"]
target = row[1]["target"]
r1 = getContextualVerbMeaning(s1, target)
r2 = getContextualVerbMeaning(s2, target, contexts=[{"content":s1, "term":target, "meaning":r1}])
if len(comparisonFewShotVerbExamples) < 2:
print(s1)
print(s2)
print(r1)
print(r2)
r= generateComparisonExample(r1, r2, label)
comparisonFewShotVerbExamples.append(r)
#print(r)
The company agrees to meet the cost of any repairs . Does this paper meet the requirements for the degree ? 'pay' or 'to cover'. 'comply' or 'to satisfy'. You anger too easily . He angers easily . 'annoy' or 'to irritate'. 'annoy' or 'to irritate'.
comparisonFewShotNounExamples = []
for row in train[train.pos=="N"][-20:-12].iterrows():
s1 = row[1]["context-1"]
s2 = row[1]["context-2"]
label = row[1]["label"]
target = row[1]["target"]
r1 = getContextualNounMeaning(s1, target)
r2 = getContextualNounMeaning(s2, target, contexts=[{"content":s1, "term":target, "meaning":r1}])
if len(comparisonFewShotNounExamples) < 3:
print(s1)
print(s2)
print(r1)
print(r2)
r= generateComparisonExample(r1, r2, label)
comparisonFewShotNounExamples.append(r)
#print(r)
I do it for the fun of it . He is fun to have around . 'pleasure'. 'funny'. A history of France . A critical time in the school 's history . 'past'. 'past'. The rate of production at the factory is skyrocketing . He works at a great rate . 'speed'. 'speed'.
Now I have my actual comparison prompt that uses the few shots from above to let it know I just want a Yes/No answer.
def generateNounComparison(s1, s2):
prompt = "\n\n".join(comparisonFewShotNounExamples)
prompt += "\n\n"
prompt += "Tom says that this means {}; Jerry says this means {}\n".format(s1, s2)
prompt += "Q: Are Tom and Jerry basically saying the same thing here?\nA:"
return queryTwoLine(prompt, myKwargs = kwargs2Short)
def generateVerbComparison(s1, s2):
prompt = "\n\n".join(comparisonFewShotVerbExamples)
prompt += "\n\n"
prompt += "Tom says that this means {}; Jerry says this means {}\n".format(s1, s2)
prompt += "Q: Are Tom and Jerry basically saying the same thing here?\nA:"
return queryTwoLine(prompt, myKwargs = kwargs2Short)
Test to make sure it works OK
for row in train[20:25].iterrows():
s1 = row[1]["context-1"]
s2 = row[1]["context-2"]
label = row[1]["label"]
target = row[1]["target"]
if row[1]["pos"] == "N":
r1 = getContextualNounMeaning(s1, target)
r2 = getContextualNounMeaning(s2, target, contexts=[{"content":s1, "term":target, "meaning":r1}])
r= generateNounComparison(r1, r2)
else:
r1 = getContextualVerbMeaning(s1, target)
r2 = getContextualVerbMeaning(s2, target, contexts=[{"content":s1, "term":target, "meaning":r1}])
r= generateVerbComparison(r1, r2)
print("'{}' v '{}'".format(s1, s2))
print("{} v {}".format(r1, r2))
print("returned: {}".format(r))
print("actual: {}".format(label))
'This speech did n't play well with the American public .' v 'Play football .' 'perform' or 'to act'. v 'compete' or 'to engage in a contest'. returned: No actual: F 'What do you make of his remarks ?' v 'Make a dress .' 'create' or 'to produce'. v 'sew' or 'to stitch'. returned: No actual: F 'The so-called glorious experiment came to an inglorious end .' v 'The end was exciting .' 'conclusion'. v 'conclusion'. returned: Yes actual: F 'There was a break in the action when a player was hurt .' v 'He was up two breaks in the second set .' 'pause'. v 'game'. returned: No actual: F 'It was a heavy play and the actors tried in vain to give life to it .' v 'The life of this milk carton may be thousands of years in this landfill .' 'success'. v 'existence'. returned: No actual: F
k, so now I'll load the dev set
dev = pd.read_csv("dev/dev.data.txt", sep='\t', header=None)
dev.columns = ["target", "pos", "position", "context-1", "context-2"]
dev_gold = pd.read_csv("dev/dev.gold.txt", sep='\t', header=None)
dev_gold.columns = ["label"]
dev = pd.concat([dev_gold,dev], axis=1)
Keep track of right/wrong as I go along
devResults = {}
correct = 0
complete = 0
check entire dev set
for row in dev.iterrows():
if row[0] in devResults: continue
s1 = row[1]["context-1"]
s2 = row[1]["context-2"]
label = row[1]["label"]
target = row[1]["target"]
if row[1]["pos"] == "N":
r1 = getContextualNounMeaning(s1, target)
r2 = getContextualNounMeaning(s2, target, contexts=[{"content":s1, "term":target, "meaning":r1}])
r= generateNounComparison(r1, r2)
else:
r1 = getContextualVerbMeaning(s1, target)
r2 = getContextualVerbMeaning(s2, target, contexts=[{"content":s1, "term":target, "meaning":r1}])
r= generateVerbComparison(r1, r2)
myResults = {}
myResults["s1"] = s1
myResults["s2"] = s2
myResults["pos"] = row[1]["pos"]
myResults["target"] = target
myResults["pred"] = r
myResults["actual"] = label
devResults[row[0]] = myResults
complete +=1
if label == "T":
if r.strip()=="Yes":
correct += 1
if label == "F":
if r.strip()=="No":
correct += 1
if row[0] %10 ==0:print ("Complete: {} Correct: {} Wrong: {}".format(complete, correct, complete-correct))
Complete: 1 Correct: 1 Wrong: 0 Complete: 11 Correct: 7 Wrong: 4 Complete: 21 Correct: 15 Wrong: 6 Complete: 31 Correct: 23 Wrong: 8 Complete: 41 Correct: 29 Wrong: 12 Complete: 51 Correct: 37 Wrong: 14 Complete: 61 Correct: 44 Wrong: 17 Complete: 71 Correct: 50 Wrong: 21 Complete: 81 Correct: 58 Wrong: 23 Complete: 91 Correct: 63 Wrong: 28 Complete: 101 Correct: 71 Wrong: 30 Complete: 111 Correct: 81 Wrong: 30 Complete: 121 Correct: 87 Wrong: 34 Complete: 131 Correct: 97 Wrong: 34 Complete: 141 Correct: 105 Wrong: 36 Complete: 151 Correct: 115 Wrong: 36 Complete: 161 Correct: 117 Wrong: 44 Complete: 171 Correct: 123 Wrong: 48 Complete: 181 Correct: 130 Wrong: 51 Complete: 191 Correct: 139 Wrong: 52 Complete: 201 Correct: 145 Wrong: 56 Complete: 211 Correct: 152 Wrong: 59 Complete: 221 Correct: 158 Wrong: 63 Complete: 231 Correct: 166 Wrong: 65 Complete: 241 Correct: 174 Wrong: 67 Complete: 251 Correct: 181 Wrong: 70 Complete: 261 Correct: 187 Wrong: 74 Complete: 271 Correct: 194 Wrong: 77 Complete: 281 Correct: 201 Wrong: 80 Complete: 291 Correct: 210 Wrong: 81 Complete: 301 Correct: 217 Wrong: 84 Complete: 311 Correct: 223 Wrong: 88 Complete: 321 Correct: 230 Wrong: 91 Complete: 331 Correct: 239 Wrong: 92 Complete: 341 Correct: 243 Wrong: 98 Complete: 351 Correct: 251 Wrong: 100 Complete: 361 Correct: 257 Wrong: 104 Complete: 371 Correct: 264 Wrong: 107 Complete: 381 Correct: 268 Wrong: 113 Complete: 391 Correct: 273 Wrong: 118 Complete: 401 Correct: 280 Wrong: 121 Complete: 411 Correct: 286 Wrong: 125 Complete: 421 Correct: 292 Wrong: 129 Complete: 431 Correct: 298 Wrong: 133 Complete: 441 Correct: 304 Wrong: 137 Complete: 451 Correct: 313 Wrong: 138 Complete: 461 Correct: 320 Wrong: 141 Complete: 471 Correct: 328 Wrong: 143 Complete: 481 Correct: 333 Wrong: 148 Complete: 491 Correct: 340 Wrong: 151 Complete: 501 Correct: 346 Wrong: 155 Complete: 511 Correct: 355 Wrong: 156 Complete: 521 Correct: 361 Wrong: 160 Complete: 531 Correct: 368 Wrong: 163 Complete: 541 Correct: 375 Wrong: 166 Complete: 551 Correct: 383 Wrong: 168 Complete: 561 Correct: 388 Wrong: 173 Complete: 571 Correct: 394 Wrong: 177 Complete: 581 Correct: 400 Wrong: 181 Complete: 591 Correct: 406 Wrong: 185 Complete: 601 Correct: 415 Wrong: 186 Complete: 611 Correct: 425 Wrong: 186 Complete: 621 Correct: 432 Wrong: 189 Complete: 631 Correct: 439 Wrong: 192
save dev set
devDf = pd.DataFrame(devResults).T
devDf.to_pickle("newDevResultsSpecifically_all_8examples.pkl")
files.download("newDevResultsSpecifically_all_8examples.pkl")
devDf.head()
s1 | s2 | pos | target | pred | actual | |
---|---|---|---|---|---|---|
0 | Room and board . | He nailed boards across the windows . | N | board | No | F |
1 | Circulate a rumor . | This letter is being circulated among the facu... | V | circulate | No | F |
2 | Hook a fish . | He hooked a snake accidentally , and was so sc... | V | hook | Yes | T |
3 | For recreation he wrote poetry and solved cros... | Drug abuse is often regarded as a form of recr... | N | recreation | Yes | T |
4 | Making a hobby of domesticity . | A royal family living in unpretentious domesti... | N | domesticity | Yes | F |
Convert labels to the WiC labels
devDf["pred"] = devDf["pred"].apply(lambda x: "T" if x.strip() =="Yes" else "F")
67.24 on dev overall
tmp = devDf.copy()
tmp["accurate"] = tmp["actual"] == tmp["pred"]
tmp["accurate"].sum()/len(tmp)
0.6927899686520376
70 on the nouns
tmp = devDf[devDf.pos=="N"].copy()
tmp["accurate"] = tmp["actual"] == tmp["pred"]
tmp["accurate"].sum()/len(tmp)
0.7164556962025317
62 on verbs
tmp = devDf[devDf.pos=="V"].copy()
tmp["accurate"] = tmp["actual"] == tmp["pred"]
tmp["accurate"].sum()/len(tmp)
0.654320987654321
59% accuracy on True match ones
tmp = devDf[devDf.actual=="T"].copy()
tmp["accurate"] = tmp["actual"] == tmp["pred"]
tmp["accurate"].sum()/len(tmp)
0.7115987460815048
74% accuracy on False match ones
tmp = devDf[devDf.actual=="F"].copy()
tmp["accurate"] = tmp["actual"] == tmp["pred"]
tmp["accurate"].sum()/len(tmp)
0.6739811912225705