#!/usr/bin/env python # coding: utf-8 # ## Using LangChain to get structured outputs # # ### Setup # # In[1]: from langchain.prompts import ChatPromptTemplate from langchain_anthropic import ChatAnthropic from langchain_ollama import ChatOllama from langchain_fireworks import ChatFireworks from langchain.output_parsers import XMLOutputParser, PydanticOutputParser from typing import Optional from pydantic import BaseModel, Field from pydantic_xml import BaseXmlModel, element from typing_extensions import Annotated, TypedDict # In[3]: ANTHROPIC_API_KEY = "" FIREWORKS_API_KEY = "" # Let's start by creating a LLM model to run our structured output queries. Use a temperature of 0 to improve structured output generation (but at the cost of "creativity"). # # In[ ]: # LLM parameters temperature = 0.8 # Define a LLM model below # # In[ ]: llm_model = ChatAnthropic( model="claude-3-5-haiku-20241022", temperature=temperature, api_key=ANTHROPIC_API_KEY, ) # llm_model = ChatOllama(model="llama3.2", temperature=temperature) # llm_model = ChatFireworks( # model_name="accounts/fireworks/models/llama-v3p1-70b-instruct", # temperature=temperature, # api_key=FIREWORKS_API_KEY, # ) # Check it works # # In[4]: print(llm_model.invoke("Tell me a joke about zebras").content) # ### Problem setup and prompt # # In[5]: test_science_prompt_txt = """ You are a professional science writer tasked with responding to members of the general public who write in asking questions about science. Write an article responding to a writer's question for publication in a science magazine intended for a general readership with a high-school education. You should write clearly and compellingly, include all relavent context, and provide motivating stories where applicable. Your response must be less than 200 words. The question given to you is the following: {question} """ # In[6]: prompt_system_format = ChatPromptTemplate.from_messages( [ ( "system", "Write an article in the provided format that answers the user question.\n{format_instructions}", ), ("human", test_science_prompt_txt), ] ) # ### Manual XML schema specification # # In[1]: prompt_xml_format = """ Your answers must strictly adhere to the following XML format:

[Title of the article.] [Summary of the writer's question, write concisely.] [Answer the writer's question.]

""" # In[10]: xml_parser = XMLOutputParser() # In[11]: test_chain = ( prompt_system_format.partial(format_instructions=prompt_xml_format) | llm_model | xml_parser ) output = test_chain.invoke(dict(question="What is the oldest recorded fossil?")) # In[12]: output # ### Approach 1: LangChain Output Parsers # # In[7]: xml_parser = XMLOutputParser( name="article", tags=["article", "title", "problem", "answer"] ) # In[8]: test_chain = ( prompt_system_format.partial( format_instructions=xml_parser.get_format_instructions() ) | llm_model | xml_parser ) output = test_chain.invoke(dict(question="What is the oldest recorded fossil?")) # In[9]: output # In[17]: print(xml_parser.get_format_instructions()) # ### Approach 2: Pydantic XML with a prompt instructions helper class # # In[2]: from pydantic_xml import BaseXmlModel, element from experiment_xml import pydantic_to_xml_instructions # Nested types class HistoricalEventXML(BaseXmlModel): year: str = element(description="The year of the historical event") summary: str = element( description="A clear description of what happened in this event" ) class ArticleWithHistoryXML(BaseXmlModel, tag="article"): title: str = element(description="Title of the article") problem: str = element( description="Summary of the writer's question, write concisely" ) historical_event: HistoricalEventXML = element( description="The year and explanation of a historical event" ) # Using a helper function to create the # # In[3]: print(pydantic_to_xml_instructions(ArticleWithHistoryXML)) # In[24]: prompt_format = ChatPromptTemplate.from_messages( [ ( "human", "Write an article in the provided format that answers the user question." "\n{format_instructions}", ), ("human", test_science_prompt_txt), ] ) # In[25]: test_chain = ( prompt_format.partial( format_instructions=pydantic_to_xml_instructions(ArticleWithHistoryXML) ) | llm_model ) output = test_chain.invoke(dict(question="What is the oldest recorded fossil?")) # In[26]: ArticleWithHistoryXML.from_xml(output.content) # But we can't use the `PydanticOutputParser` with a Pydantic XML object as it assumes JSON output. # # In[27]: pydantic_parser = PydanticOutputParser(pydantic_object=ArticleXML) # In[28]: test_chain = ( prompt_format.partial(format_instructions=pydantic_to_xml_instructions(ArticleXML)) | llm_model | pydantic_parser ) output = test_chain.invoke(dict(question="What is the oldest recorded fossil?")) # In[ ]: