# 导入必要的库import openaiimport dspyimport json
# 使用 "with" 语句打开名为 "creds.json" 的文件,以只读模式打开,并将文件对象赋值给 credswith open("creds.json", "r") as creds: # 读取 creds 文件的内容,并使用 json.loads() 方法将其解析为 Python 字典,然后获取字典中 "openai_key" 键对应的值赋值给 api_key api_key = json.loads(creds.read())["openai_key"]
# 导入必要的库import dspy# 创建一个OpenAI对象,使用gpt-4模型,指定API密钥和模型类型为'chat',最大生成标记数为500lm = dspy.OpenAI(model='gpt-4', api_key=api_key, model_type='chat', max_tokens=500)# 配置dspy库的设置,将创建的OpenAI对象传入dspy.settings.configure(lm=lm)
from dspy.datasets import HotPotQA# 加载数据集。dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)# 从训练集中提取问题作为输入。trainset = [x.with_inputs('question') for x in dataset.train]# 从开发集中提取问题作为输入。devset = [x.with_inputs('question') for x in dataset.dev]
# 选择训练集中的第一个样本train_example = trainset[0]# 打印整个训练样本print(train_example)# 打印问题部分print(f"Question: {train_example.question}")# 打印答案部分print(f"Answer: {train_example.answer}")
Example({'question': 'At My Window was released by which American singer-songwriter?', 'answer': 'John Townes Van Zandt'}) (input_keys={'question'}) Question: At My Window was released by which American singer-songwriter? Answer: John Townes Van Zandt
class BasicQA(dspy.Signature): """用简短的事实性答案回答问题。""" # 定义输入字段 question question = dspy.InputField() # 定义输出字段 answer,描述为通常在1到5个单词之间 answer = dspy.OutputField(desc="通常在1到5个单词之间")
# 定义一个基本的问答机器人类class BasicQABot(dspy.Module): def __init__(self): super().__init__() # 使用 BasicQA 模型进行预测 self.generate = dspy.Predict(BasicQA) # 前向传播函数,接收问题作为输入 def forward(self, question): # 通过 generate 模型预测答案 prediction = self.generate(question=question) # 返回预测的答案 return dspy.Prediction(answer=prediction.answer)
qa_bot = BasicQABot() # 创建一个基本的问答机器人对象pred = qa_bot.forward("In the 10th Century A.D. Ealhswith had a son called Æthelweard by which English king?") # 使用问答机器人对象进行查询pred.answer # 输出查询结果
'Alfred the Great'
from dspy.teleprompt import KNNFewShot# 创建一个KNNFewShot对象,设置K值为7,并传入训练集trainsetknn_teleprompter = KNNFewShot(7, trainset)# 编译KNNFewShot对象,使用BasicQABot作为模型,并传入训练集trainsetcompiled_knn = knn_teleprompter.compile(BasicQABot(), trainset=trainset)
# 从开发集中获取第一个样本example = devset[0]# 使用编译后的KNN模型对问题进行预测pred = compiled_knn(question = example.question)# 打印问题print("Question: ", example.question)# 打印期望答案print("Expected answer: ", example.answer)# 打印预测答案print("Prediction: ", pred.answer)
57%|█████▋ | 4/7 [00:02<00:02, 1.44it/s]
Bootstrapped 4 full traces after 5 examples in round 0. Question: Are both Cangzhou and Qionghai in the Hebei province of China? Expected answer: no Prediction: No
# 调用lm对象的inspect_history方法,参数为1lm.inspect_history(1)
Answer questions with short factoid answers.
---
Follow the following format.
Question: ${question}
Answer: often between 1 and 5 words
---
Question: On the coast of what ocean is the birthplace of Diogal Sakho?
Answer: Atlantic Ocean
Question: Which is taller, the Empire State Building or the Bank of America Tower?
Answer: Empire State Building
Question: Samantha Cristoforetti and Mark Shuttleworth are both best known for being first in their field to go where?
Answer: Space
Question: Which Pakistani cricket umpire who won 3 consecutive ICC umpire of the year awards in 2009, 2010, and 2011 will be in the ICC World Twenty20?
Answer: Aleem Dar
Question: What is the code name for the German offensive that started this Second World War engagement on the Eastern Front (a few hundred kilometers from Moscow) between Soviet and German forces, which included 102nd Infantry Division?
Answer: Operation Citadel
Question: Which of these publications was most recently published, Who Put the Bomp or Self?
Answer: Self
Question: Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art?
Answer: Tae Kwon Do Times
---
Question: Are both Cangzhou and Qionghai in the Hebei province of China?
Answer: No
from dspy.evaluate.evaluate import Evaluate# 设置`evaluate_on_hotpotqa`函数。我们将在下面多次使用这个函数。evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)# 使用`answer_exact_match`指标评估`compiled_knn`程序。metric = dspy.evaluate.answer_exact_match# 对`compiled_knn`程序使用`answer_exact_match`指标进行评估。evaluate_on_hotpotqa(compiled_knn, metric)
57%|█████▋ | 4/7 [00:03<00:02, 1.28it/s]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.06it/s] | 1/50 [00:03<03:01, 3.70s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:02<00:01, 1.57it/s] | 2/50 [00:08<03:14, 4.05s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.16it/s] | 3/50 [00:11<02:58, 3.79s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.30it/s] | 4/50 [00:15<02:56, 3.84s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:02<00:01, 1.52it/s] | 5/50 [00:19<02:49, 3.78s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.10it/s] | 6/50 [00:22<02:43, 3.72s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:02<00:01, 1.51it/s] | 7/50 [00:26<02:46, 3.88s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:04<00:03, 1.02s/it] | 8/50 [00:30<02:34, 3.68s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.18it/s] | 9/50 [00:35<02:54, 4.26s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.31it/s] | 10/50 [00:39<02:46, 4.15s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.26it/s] | 11/50 [00:43<02:33, 3.94s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.16it/s] | 12/50 [00:47<02:35, 4.09s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.32it/s] | 13/50 [00:51<02:29, 4.04s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:02<00:01, 1.62it/s] | 14/50 [00:55<02:23, 3.97s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:02<00:02, 1.37it/s] | 15/50 [00:58<02:11, 3.75s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.28it/s] | 16/50 [01:02<02:05, 3.70s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:02<00:02, 1.39it/s] | 17/50 [01:05<02:04, 3.78s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:05<00:04, 1.36s/it] | 18/50 [01:09<01:58, 3.69s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:02<00:01, 1.52it/s] | 19/50 [01:15<02:16, 4.39s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.21it/s] | 20/50 [01:19<02:03, 4.13s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.27it/s] | 21/50 [01:22<01:57, 4.06s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.10it/s] | 22/50 [01:26<01:52, 4.01s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.25it/s] | 23/50 [01:31<01:50, 4.08s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:04<00:03, 1.04s/it] | 24/50 [01:34<01:44, 4.04s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.08it/s] | 25/50 [01:39<01:47, 4.32s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.14it/s] | 26/50 [01:44<01:44, 4.36s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:04<00:03, 1.22s/it] | 27/50 [01:49<01:43, 4.50s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:04<00:03, 1.00s/it] | 28/50 [01:54<01:45, 4.77s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.21it/s] | 29/50 [01:59<01:40, 4.78s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:02<00:01, 1.67it/s] | 30/50 [02:03<01:29, 4.45s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:02<00:01, 1.54it/s] | 31/50 [02:06<01:19, 4.17s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:02<00:01, 1.75it/s] | 32/50 [02:09<01:09, 3.84s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:02<00:01, 1.53it/s] | 33/50 [02:12<01:01, 3.65s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:02<00:01, 1.51it/s] | 34/50 [02:16<00:55, 3.50s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.06it/s] | 35/50 [02:20<00:54, 3.65s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:04<00:03, 1.15s/it]▏ | 36/50 [02:25<00:57, 4.09s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:02<00:01, 1.57it/s]▍ | 37/50 [02:30<00:57, 4.41s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:04<00:03, 1.08s/it]▌ | 38/50 [02:33<00:47, 4.00s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:02<00:02, 1.35it/s]▊ | 39/50 [02:38<00:47, 4.33s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.13it/s]█ | 40/50 [02:42<00:41, 4.20s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.30it/s]█▏ | 41/50 [02:46<00:38, 4.29s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:04<00:03, 1.04s/it]█▍ | 42/50 [02:50<00:32, 4.07s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.27it/s]█▌ | 43/50 [02:56<00:31, 4.53s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.33it/s]█▊ | 44/50 [02:59<00:25, 4.28s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:04<00:03, 1.04s/it]██ | 45/50 [03:03<00:20, 4.20s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:04<00:03, 1.02s/it]██▏| 46/50 [03:09<00:18, 4.61s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:02<00:02, 1.35it/s]██▍| 47/50 [03:14<00:14, 4.74s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:03<00:02, 1.17it/s]██▌| 48/50 [03:17<00:08, 4.38s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
57%|█████▋ | 4/7 [00:04<00:03, 1.02s/it]██▊| 49/50 [03:21<00:04, 4.28s/it]
Bootstrapped 4 full traces after 5 examples in round 0.
Average Metric: 23 / 50 (46.0): 100%|██████████| 50/50 [03:26<00:00, 4.13s/it] /home/jovyan/scdc/project-vaqa-autosuggest/Query-Generation-exploratory/dspy/dspy/evaluate/evaluate.py:126: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead. df = df.applymap(truncate_cell)
Average Metric: 23 / 50 (46.0%)
question | example_answer | gold_titles | pred_answer | answer_exact_match | |
---|---|---|---|---|---|
0 | Are both Cangzhou and Qionghai in the Hebei province of China? | no | {'Cangzhou', 'Qionghai'} | No | ✔️ [True] |
1 | Who conducts the draft in which Marc-Andre Fleury was drafted to the Vegas Golden Knights for the 2017-18 season? | National Hockey League | {'2017–18 Pittsburgh Penguins season', '2017 NHL Expansion Draft'} | National Hockey League | ✔️ [True] |
2 | The Wings entered a new era, following the retirement of which Canadian retired professional ice hockey player and current general manager of the Tampa Bay... | Steve Yzerman | {'2006–07 Detroit Red Wings season', 'Steve Yzerman'} | Steve Yzerman | ✔️ [True] |
3 | What river is near the Crichton Collegiate Church? | the River Tyne | {'Crichton Collegiate Church', 'Crichton Castle'} | Tyne River | ❌ [False] |
4 | In the 10th Century A.D. Ealhswith had a son called Æthelweard by which English king? | King Alfred the Great | {'Æthelweard (son of Alfred)', 'Ealhswith'} | Alfred the Great | ❌ [False] |
(46.0, question example_answer gold_titles pred_answer answer_exact_match 0 Are both Ca... no {Cangzhou, ... No True 1 Who conduct... National Ho... {2017–18 Pi... National Ho... True 2 The Wings e... Steve Yzerman {2006–07 De... Steve Yzerman True 3 What river ... the River Tyne {Crichton C... Tyne River False 4 In the 10th... King Alfred... {Æthelweard... Alfred the ... False 5 The Newark ... Port Author... {Newark Air... Port Author... True 6 Where did a... Bundesliga {Claudio Pi... Peru False 7 Are both Ch... no {Chico Muni... No True 8 In which Ma... Waldo Count... {Stockton S... Waldo County False 9 Which 90s r... The Afghan ... {Gene (band... The Afghan ... True 10 What year d... 79 AD {Curse of t... 79 AD True 11 Is the 72nd... the oldest {First Unit... Oldest True 12 Was Stanisl... not {Stanisław ... Yes False 13 Which film ... Del Lord {Wang Xiaos... Wang Xiaoshuai False 14 Lord North ... Jonathan Wi... {Jonathan A... Jonathan Ai... False 15 What is the... Marche {Marche, Po... Marche True 16 William Hug... 7,402 at th... {Kosciusko,... Unknown False 17 What do stu... design thei... {Gallatin S... Study Art False 18 What is the... English {Restaurant... British False 19 What Americ... Robert F. Chew {Robert F. ... Wood Harris False 20 What city i... Manchester {Toby Sawye... London False 21 Who was bor... Deepa Mehta {Tony Kaye ... Tony Kaye False 22 What is the... the good ma... {Boise Town... The Bon Marché False 23 Who did Liz... Christine C... {Lizzette R... Christine C... True 24 What was th... William Str... {P. T. Barn... Zerah Colburn False 25 Which battl... Battle of t... {Meuse-Argo... Battle of t... True 26 What cricke... Ian Botham {Ian Botham... Terry Alderman False 27 What is the... defensive a... {1982 NC St... Deceased False 28 Which Scott... Ewan McGregor {Come What ... Ewan McGregor True 29 Where have ... space {Frank De W... Space True 30 The origina... Maria Yermo... {Wild Honey... Maria Yermo... True 31 Are Roswell... no {Pago Pago ... No True 32 Untold: The... the voice o... {Marv Alber... Gus Johnson False 33 Are Walt Di... yes {Sacro GRA,... No False 34 What is the... Hamas {Status of ... Hamas True 35 What album ... 1989 {Wildest Dr... 1989 True 36 Which is co... Apera {Apera, Gun... Apera True 37 Do The Drum... no {Pussy Galo... No True 38 What is the... Exon {Banded Bro... UoE False 39 Are both Be... yes {Len Wisema... Yes True 40 Steven Cuit... Bill Melendez {Steven C. ... Bill Melendez True 41 Shark Creek... Clarence River {Clarence R... Clarence River True 42 Who was the... Pixar {Finding Do... Lindsey Col... False 43 Who purchas... Renault {Benetton F... Jean Todt False 44 Fredrick La... Cadwalader ... {Frederick ... Cadwalader ... True 45 Gordon Warn... "Forza Ital... {Franco Zef... Democratic ... False 46 André Zucca... the Wehrmacht {André Zucc... The Propaga... False 47 Both Bill P... cricketer {Bill Ponsf... Cricket False 48 Suzana S. ... Danny Wallace {Yes Man (f... James Ellroy False 49 In what cit... Portland {Election L... New York False )