from google.colab import files uploaded = files.upload() print("done") !pip install openai import openai, json, pandas as pd, numpy as np, random #arguments to send the API kwargs = { "engine":"davinci", "temperature":0, "max_tokens":200, "stop":"\n", } kwargs2 = { "engine":"davinci", "temperature":0, "max_tokens":200, "stop":"\n\n", } kwargs2Short = { "engine":"davinci", "temperature":0, "max_tokens":20, "stop":"\n\n", } openai.api_key = json.load(open("key.json", "r"))["key"] def queryTwoLine(prompt, myKwargs = kwargs2): """ wrapper for the API (get 2 newlines) """ r = openai.Completion.create(prompt=prompt, **myKwargs)["choices"][0]["text"].strip() return r def queryOneLine(prompt, myKwargs = kwargs): """ wrapper for the API (get 1 newlines) """ r = openai.Completion.create(prompt=prompt, **myKwargs)["choices"][0]["text"].strip() return r newKwargs = kwargs.copy() newKwargs["stop"] = "\n" queryOneLine("q: what is 1+1?\na:") !wget https://pilehvar.github.io/wic/package/WiC_dataset.zip import zipfile with zipfile.ZipFile("WiC_dataset.zip","r") as zip_ref: zip_ref.extractall(".") train = pd.read_csv("train/train.data.txt", sep='\t', header=None) train.columns = ["target", "pos", "position", "context-1", "context-2"] train_gold = pd.read_csv("train/train.gold.txt", sep='\t', header=None) train_gold.columns = ["label"] train = pd.concat([train_gold,train], axis=1) train.head() def getContextualNounExample(content, term): prompt = "Tom said '{}'\n".format(content, term) prompt += "I asked Tom what '{}' specifically means in this context, he clarified it is another word for".format(term) return (prompt + " " +queryOneLine(prompt)) fewShotsNounDefinitions = [] #I'll build this from the training set for row in train[train.pos=="N"].tail(10).iterrows(): s1 = row[1]["context-1"] target = row[1]["target"] r = getContextualNounExample(s1, target) print(r) fewShotsNounDefinitions.append(" ".join(r.split("\n"))) #add the definition to fewShotsDefinition as 1 line for i in range(len(fewShotsNounDefinitions)): print("{}: {}".format(i, fewShotsNounDefinitions[i])) #bad = [3,8] bad = [] fewShotsNounPrompt = "\n".join([fewShotsNounDefinitions[i] for i in range(len(fewShotsNounDefinitions)) if i not in bad]) def getContextualVerbExample(content, term): prompt = "Tom said '{}'\n".format(content, term) prompt += "I asked Tom what 'to {}' specifically means in this context, he explained it was another word for 'to".format(term) return (prompt + " " +queryOneLine(prompt)) fewShotsVerbDefinitions = [] #I'll build this from the training set for row in train[train.pos=="V"][-10:].iterrows(): s1 = row[1]["context-1"] target = row[1]["target"] r = getContextualVerbExample(s1, target) print(r) fewShotsVerbDefinitions.append( " ".join(r.split("\n"))) #add the definition to fewShotsDefinition as 1 line for i in range(len(fewShotsVerbDefinitions)): print("{}: {}".format(i, fewShotsVerbDefinitions[i])) #bad = [2,7] bad = [] fewShotsVerbPrompt = "\n".join([fewShotsVerbDefinitions[i] for i in range(len(fewShotsVerbDefinitions)) if i not in bad]) def getContextualNounMeaning(content, term, contexts = []): prompt = fewShotsNounPrompt for context in contexts: prompt += "\nTom said '{}' ".format(context["content"]) prompt += " I asked Tom what '{}' specifically means in this context, he clarified it is another word for {}".format(context["term"], context["meaning"]) prompt += "\nTom said '{}' ".format(content, term) prompt += " I asked Tom what '{}' specifically means in this context, he clarified it is another word for".format(term) r = queryOneLine(prompt, myKwargs = {'engine': 'davinci', 'max_tokens': 20, 'stop': '\n', 'temperature': 0, "presence_penalty":.2}) if not r.startswith("'"): r = "'" + r return r def getContextualVerbMeaning(content, term, contexts = []): prompt = fewShotsVerbPrompt for context in contexts: prompt += "\nTom said '{}'".format(context["content"]) prompt += " I asked Tom what 'to {}' specifically means in this context, he explained it was another word for {}".format(context["term"], context["meaning"]) prompt += "\nTom said '{}'".format(content) prompt += " I asked Tom what 'to {}' specifically means in this context, he explained it was another word for 'to".format(term) r = queryOneLine(prompt, myKwargs = {'engine': 'davinci', 'max_tokens': 20, 'stop': '\n', 'temperature': 0, "presence_penalty":.2}) if not r.startswith("'"): r = "'" + r return r def generateComparisonExample(s1, s2, label): prompt = "Tom says that this means {} Jerry says this means {}\n".format(s1, s2) prompt += "Q: Are Tom and Jerry basically saying the same thing here?\nA:" if label == "T": prompt += " Yes" else: prompt += " No" return prompt comparisonFewShotVerbExamples = [] for row in train[train.pos=="V"][-20:-12].iterrows(): s1 = row[1]["context-1"] s2 = row[1]["context-2"] label = row[1]["label"] target = row[1]["target"] r1 = getContextualVerbMeaning(s1, target) r2 = getContextualVerbMeaning(s2, target, contexts=[{"content":s1, "term":target, "meaning":r1}]) if len(comparisonFewShotVerbExamples) < 2: print(s1) print(s2) print(r1) print(r2) r= generateComparisonExample(r1, r2, label) comparisonFewShotVerbExamples.append(r) #print(r) comparisonFewShotNounExamples = [] for row in train[train.pos=="N"][-20:-12].iterrows(): s1 = row[1]["context-1"] s2 = row[1]["context-2"] label = row[1]["label"] target = row[1]["target"] r1 = getContextualNounMeaning(s1, target) r2 = getContextualNounMeaning(s2, target, contexts=[{"content":s1, "term":target, "meaning":r1}]) if len(comparisonFewShotNounExamples) < 3: print(s1) print(s2) print(r1) print(r2) r= generateComparisonExample(r1, r2, label) comparisonFewShotNounExamples.append(r) #print(r) def generateNounComparison(s1, s2): prompt = "\n\n".join(comparisonFewShotNounExamples) prompt += "\n\n" prompt += "Tom says that this means {}; Jerry says this means {}\n".format(s1, s2) prompt += "Q: Are Tom and Jerry basically saying the same thing here?\nA:" return queryTwoLine(prompt, myKwargs = kwargs2Short) def generateVerbComparison(s1, s2): prompt = "\n\n".join(comparisonFewShotVerbExamples) prompt += "\n\n" prompt += "Tom says that this means {}; Jerry says this means {}\n".format(s1, s2) prompt += "Q: Are Tom and Jerry basically saying the same thing here?\nA:" return queryTwoLine(prompt, myKwargs = kwargs2Short) for row in train[20:25].iterrows(): s1 = row[1]["context-1"] s2 = row[1]["context-2"] label = row[1]["label"] target = row[1]["target"] if row[1]["pos"] == "N": r1 = getContextualNounMeaning(s1, target) r2 = getContextualNounMeaning(s2, target, contexts=[{"content":s1, "term":target, "meaning":r1}]) r= generateNounComparison(r1, r2) else: r1 = getContextualVerbMeaning(s1, target) r2 = getContextualVerbMeaning(s2, target, contexts=[{"content":s1, "term":target, "meaning":r1}]) r= generateVerbComparison(r1, r2) print("'{}' v '{}'".format(s1, s2)) print("{} v {}".format(r1, r2)) print("returned: {}".format(r)) print("actual: {}".format(label)) dev = pd.read_csv("dev/dev.data.txt", sep='\t', header=None) dev.columns = ["target", "pos", "position", "context-1", "context-2"] dev_gold = pd.read_csv("dev/dev.gold.txt", sep='\t', header=None) dev_gold.columns = ["label"] dev = pd.concat([dev_gold,dev], axis=1) devResults = {} correct = 0 complete = 0 for row in dev.iterrows(): if row[0] in devResults: continue s1 = row[1]["context-1"] s2 = row[1]["context-2"] label = row[1]["label"] target = row[1]["target"] if row[1]["pos"] == "N": r1 = getContextualNounMeaning(s1, target) r2 = getContextualNounMeaning(s2, target, contexts=[{"content":s1, "term":target, "meaning":r1}]) r= generateNounComparison(r1, r2) else: r1 = getContextualVerbMeaning(s1, target) r2 = getContextualVerbMeaning(s2, target, contexts=[{"content":s1, "term":target, "meaning":r1}]) r= generateVerbComparison(r1, r2) myResults = {} myResults["s1"] = s1 myResults["s2"] = s2 myResults["pos"] = row[1]["pos"] myResults["target"] = target myResults["pred"] = r myResults["actual"] = label devResults[row[0]] = myResults complete +=1 if label == "T": if r.strip()=="Yes": correct += 1 if label == "F": if r.strip()=="No": correct += 1 if row[0] %10 ==0:print ("Complete: {} Correct: {} Wrong: {}".format(complete, correct, complete-correct)) devDf = pd.DataFrame(devResults).T devDf.to_pickle("newDevResultsSpecifically_all_8examples.pkl") files.download("newDevResultsSpecifically_all_8examples.pkl") devDf.head() devDf["pred"] = devDf["pred"].apply(lambda x: "T" if x.strip() =="Yes" else "F") tmp = devDf.copy() tmp["accurate"] = tmp["actual"] == tmp["pred"] tmp["accurate"].sum()/len(tmp) tmp = devDf[devDf.pos=="N"].copy() tmp["accurate"] = tmp["actual"] == tmp["pred"] tmp["accurate"].sum()/len(tmp) tmp = devDf[devDf.pos=="V"].copy() tmp["accurate"] = tmp["actual"] == tmp["pred"] tmp["accurate"].sum()/len(tmp) tmp = devDf[devDf.actual=="T"].copy() tmp["accurate"] = tmp["actual"] == tmp["pred"] tmp["accurate"].sum()/len(tmp) tmp = devDf[devDf.actual=="F"].copy() tmp["accurate"] = tmp["actual"] == tmp["pred"] tmp["accurate"].sum()/len(tmp)