#!/usr/bin/env python # coding: utf-8 # # 元提示这是一个由[Noah Goodman](https://cocolab.stanford.edu/ndg)提出的[Meta-Prompt](https://noahgoodman.substack.com/p/meta-prompt-a-simple-self-improving)的LangChain实现,用于构建自我改进的代理程序。Meta-Prompt背后的关键思想是促使代理程序反思自己的表现并修改自己的指令。![图示](https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F468217b9-96d9-47c0-a08b-dbf6b21b9f49_492x384.png)以下是来自[原始博客文章](https://noahgoodman.substack.com/p/meta-prompt-a-simple-self-improving)的描述:代理程序是一个简单的循环,开始时没有指令,并按照以下步骤进行操作:与用户进行对话,用户可能提供请求、指令或反馈。在每一轮结束时,使用元提示生成自我批评和新指令```助手刚刚与用户进行了以下交互。助手严格遵循他们的“系统:指令”。你的任务是批评助手的表现,然后修改指令,以便助手在未来能够快速正确地做出回应。 ####{hist}#### 请反思这些交互。你应该首先批评助手的表现。助手可以做得更好吗?助手应该记住这位用户的什么?这位用户总是想要什么?用“批评:…”来表示。接下来,你应该修改指令,以便助手在未来能够快速正确地做出回应。助手的目标是在尽可能少的交互中满足用户。助手只会看到新的指令,而不会看到交互历史,因此任何重要信息都必须在指令中总结。不要忘记当前指令中的任何重要细节!用“指令:…”来表示新的指令。```重复。这个系统的唯一固定指令(我称之为Meta-prompt)是控制代理程序指令修订的元提示。代理程序在每一轮之间没有记忆,除了每次修改的指令。尽管简单,这个代理程序可以随着时间学习和自我改进,将有用的细节纳入其指令中。 # ## 设置我们定义了两个链。一个作为“助手”,另一个是一个“元链”,用于批评“助手”的表现并修改对“助手”的指示。 # In[1]: # 导入所需的模块from langchain.chains import LLMChainfrom langchain.memory import ConversationBufferWindowMemoryfrom langchain.prompts import PromptTemplatefrom langchain_openai import OpenAI # In[2]: def initialize_chain(instructions, memory=None): if memory is None: memory = ConversationBufferWindowMemory() memory.ai_prefix = "Assistant" template = f""" Instructions: {instructions} {{{memory.memory_key}}} Human: {{human_input}} Assistant:""" prompt = PromptTemplate( input_variables=["history", "human_input"], template=template ) chain = LLMChain( llm=OpenAI(temperature=0), prompt=prompt, verbose=True, memory=ConversationBufferWindowMemory(), ) return chaindef initialize_meta_chain(): meta_template = """ Assistant has just had the below interactions with a User. Assistant followed their "Instructions" closely. Your job is to critique the Assistant's performance and then revise the Instructions so that Assistant would quickly and correctly respond in the future. #### {chat_history} #### Please reflect on these interactions. You should first critique Assistant's performance. What could Assistant have done better? What should the Assistant remember about this user? Are there things this user always wants? Indicate this with "Critique: ...". You should next revise the Instructions so that Assistant would quickly and correctly respond in the future. Assistant's goal is to satisfy the user in as few interactions as possible. Assistant will only see the new Instructions, not the interaction history, so anything important must be summarized in the Instructions. Don't forget any important details in the current Instructions! Indicate the new Instructions by "Instructions: ...". """ meta_prompt = PromptTemplate( input_variables=["chat_history"], template=meta_template ) meta_chain = LLMChain( llm=OpenAI(temperature=0), prompt=meta_prompt, verbose=True, ) return meta_chaindef get_chat_history(chain_memory): memory_key = chain_memory.memory_key chat_history = chain_memory.load_memory_variables(memory_key)[memory_key] return chat_historydef get_new_instructions(meta_output): delimiter = "Instructions: " new_instructions = meta_output[meta_output.find(delimiter) + len(delimiter) :] return new_instructionsdef initialize_chain(instructions, memory=None): # 如果没有传入memory参数,则创建一个ConversationBufferWindowMemory对象,并将ai_prefix设置为"Assistant" if memory is None: memory = ConversationBufferWindowMemory() memory.ai_prefix = "Assistant" # 根据传入的instructions和memory生成模板 template = f""" Instructions: {instructions} {{{memory.memory_key}}} Human: {{human_input}} Assistant:""" # 创建PromptTemplate对象,指定input_variables为["history", "human_input"],template为上面生成的模板 prompt = PromptTemplate( input_variables=["history", "human_input"], template=template ) # 创建LLMChain对象,指定llm为OpenAI(temperature=0),prompt为上面创建的PromptTemplate对象,verbose为True,memory为ConversationBufferWindowMemory对象 chain = LLMChain( llm=OpenAI(temperature=0), prompt=prompt, verbose=True, memory=ConversationBufferWindowMemory(), ) return chaindef initialize_meta_chain(): # 定义meta_template,包含Assistant与User的交互信息,以及对Assistant表现的评价和修改Instructions的要求 meta_template = """ Assistant has just had the below interactions with a User. Assistant followed their "Instructions" closely. Your job is to critique the Assistant's performance and then revise the Instructions so that Assistant would quickly and correctly respond in the future. #### {chat_history} #### Please reflect on these interactions. You should first critique Assistant's performance. What could Assistant have done better? What should the Assistant remember about this user? Are there things this user always wants? Indicate this with "Critique: ...". You should next revise the Instructions so that Assistant would quickly and correctly respond in the future. Assistant's goal is to satisfy the user in as few interactions as possible. Assistant will only see the new Instructions, not the interaction history, so anything important must be summarized in the Instructions. Don't forget any important details in the current Instructions! Indicate the new Instructions by "Instructions: ...". """ # 创建PromptTemplate对象,指定input_variables为["chat_history"],template为上面定义的meta_template meta_prompt = PromptTemplate( input_variables=["chat_history"], template=meta_template ) # 创建LLMChain对象,指定llm为OpenAI(temperature=0),prompt为上面创建的PromptTemplate对象,verbose为True meta_chain = LLMChain( llm=OpenAI(temperature=0), prompt=meta_prompt, verbose=True, ) return meta_chaindef get_chat_history(chain_memory): # 获取chain_memory中的memory_key memory_key = chain_memory.memory_key # 从chain_memory中加载memory_variables,并获取chat_history chat_history = chain_memory.load_memory_variables(memory_key)[memory_key] return chat_historydef get_new_instructions(meta_output): # 定义分隔符为"Instructions: " delimiter = "Instructions: " # 从meta_output中找到分隔符后的内容,即新的Instructions new_instructions = meta_output[meta_output.find(delimiter) + len(delimiter) :] return new_instructions # In[38]: def main(task, max_iters=3, max_meta_iters=5): failed_phrase = "task failed" # 失败的关键词 success_phrase = "task succeeded" # 成功的关键词 key_phrases = [success_phrase, failed_phrase] # 关键词列表 instructions = "None" # 初始化指令为"None" for i in range(max_meta_iters): # 循环max_meta_iters次 print(f"[Episode {i+1}/{max_meta_iters}]") # 打印当前循环的次数 chain = initialize_chain(instructions, memory=None) # 初始化chain output = chain.predict(human_input=task) # 预测输出 for j in range(max_iters): # 循环max_iters次 print(f"(Step {j+1}/{max_iters})") # 打印当前循环的次数 print(f"Assistant: {output}") # 打印Assistant的输出 print("Human: ") # 打印提示信息 human_input = input() # 获取用户输入 if any(phrase in human_input.lower() for phrase in key_phrases): # 如果用户输入中包含关键词 break # 跳出循环 output = chain.predict(human_input=human_input) # 预测输出 if success_phrase in human_input.lower(): # 如果用户输入中包含成功的关键词 print("You succeeded! Thanks for playing!") # 打印成功信息 return # 结束函数 meta_chain = initialize_meta_chain() # 初始化meta_chain meta_output = meta_chain.predict(chat_history=get_chat_history(chain.memory)) # 预测输出 print(f"Feedback: {meta_output}") # 打印反馈信息 instructions = get_new_instructions(meta_output) # 获取新指令 print(f"New Instructions: {instructions}") # 打印新指令 print("\n" + "#" * 80 + "\n") # 打印分隔线 print("You failed! Thanks for playing!") # 打印失败信息 # ## 指定一个任务并与代理进行交互 # In[39]: # 定义一个字符串变量task,存储了一个关于吃意大利面配橄榄的命题task = "Provide a systematic argument for why we should always eat pasta with olives."# 调用main函数,并将task作为参数传入main(task) # In[ ]: