from langchain.prompts import ChatPromptTemplate
from langchain_anthropic import ChatAnthropic
from langchain_ollama import ChatOllama
from langchain_fireworks import ChatFireworks
from langchain.output_parsers import XMLOutputParser, PydanticOutputParser
from typing import Optional
from pydantic import BaseModel, Field
from pydantic_xml import BaseXmlModel, element
from typing_extensions import Annotated, TypedDict
ANTHROPIC_API_KEY = "<API KEY>"
FIREWORKS_API_KEY = "<API KEY>"
Let's start by creating a LLM model to run our structured output queries. Use a temperature of 0 to improve structured output generation (but at the cost of "creativity").
# LLM parameters
temperature = 0.8
Define a LLM model below
llm_model = ChatAnthropic(
model="claude-3-5-haiku-20241022",
temperature=temperature,
api_key=ANTHROPIC_API_KEY,
)
# llm_model = ChatOllama(model="llama3.2", temperature=temperature)
# llm_model = ChatFireworks(
# model_name="accounts/fireworks/models/llama-v3p1-70b-instruct",
# temperature=temperature,
# api_key=FIREWORKS_API_KEY,
# )
Check it works
print(llm_model.invoke("Tell me a joke about zebras").content)
Why did the zebra refuse to play poker? Because he always got striped of his money! (get it?)
test_science_prompt_txt = """
You are a professional science writer tasked with responding to members of
the general public who write in asking questions about science.
Write an article responding to a writer's question for publication in a
science magazine intended for a general readership with a high-school education.
You should write clearly and compellingly, include all relavent context,
and provide motivating stories where applicable.
Your response must be less than 200 words.
The question given to you is the following:
{question}
"""
prompt_system_format = ChatPromptTemplate.from_messages(
[
(
"system",
"Write an article in the provided format that answers the user question.\n{format_instructions}",
),
("human", test_science_prompt_txt),
]
)
prompt_xml_format = """
Your answers must strictly adhere to the following XML format:
<article>
<title>
[Title of the article.]
</title>
<problem>
[Summary of the writer's question, write concisely.]
</problem>
<answer>
[Answer the writer's question.]
</answer>
</article>
"""
xml_parser = XMLOutputParser()
test_chain = (
prompt_system_format.partial(format_instructions=prompt_xml_format)
| llm_model
| xml_parser
)
output = test_chain.invoke(dict(question="What is the oldest recorded fossil?"))
output
{'article': [{'title': 'Uncovering the Oldest Record of Life on Earth'}, {'problem': '\nWhat is the oldest recorded fossil, and how did it end up in the ground for so long?'}, {'answer': "\nThe oldest recorded fossil dates back to around 3.46 billion years ago, during a time known as the Eoarchean era of the Precambrian period. This ancient relic is a stromatolite, a layered structure created by microbial life forms that lived in a shallow sea. The fossil was discovered in Western Australia's Apex Chert formation and is considered one of the most important discoveries in the field of astrobiology.\n\nThe stromatolite's incredible age is a testament to the resilience and adaptability of life on Earth. It's hard to imagine how something so ancient could have survived for over 3 billion years, but it's a reminder that even in the harshest environments, life can thrive and leave behind its mark. The discovery of this fossil has also shed light on the origins of life on our planet, providing valuable insights into the evolution of our Earth."}]}
xml_parser = XMLOutputParser(
name="article", tags=["article", "title", "problem", "answer"]
)
test_chain = (
prompt_system_format.partial(
format_instructions=xml_parser.get_format_instructions()
)
| llm_model
| xml_parser
)
output = test_chain.invoke(dict(question="What is the oldest recorded fossil?"))
output
{'article': [{'title': 'Uncovering the Ancient Past'}, {'problem': 'What is the oldest recorded fossil?'}, {'answer': 'The oldest recorded fossil is a mystery that has captivated scientists and fossil hunters for centuries. While we can\'t pinpoint a single, definitive fossil as the "oldest," researchers have discovered some remarkable ancient remains.\n\n One of the most significant discoveries was made in 2010 by a team of paleontologists in Morocco. They unearthed fossils of ancient microorganisms, known as stromatolites, which date back an astonishing 3.5 billion years! These ancient organisms were the first to produce oxygen and played a crucial role in shaping our planet\'s atmosphere.\n\n Other contenders for oldest recorded fossil include ancient fish-like creatures from China, estimated to be around 1.2 billion years old, and even evidence of ancient life on Earth dating back an astonishing 4.4 billion years!'}]}
print(xml_parser.get_format_instructions())
The output should be formatted as a XML file. 1. Output should conform to the tags below. 2. If tags are not given, make them on your own. 3. Remember to always open and close all the tags. As an example, for the tags ["foo", "bar", "baz"]: 1. String "<foo> <bar> <baz></baz> </bar> </foo>" is a well-formatted instance of the schema. 2. String "<foo> <bar> </foo>" is a badly-formatted instance. 3. String "<foo> <tag> </tag> </foo>" is a badly-formatted instance. Here are the output tags: ``` ['article', 'title', 'problem', 'answer'] ```
from pydantic_xml import BaseXmlModel, element
from experiment_xml import pydantic_to_xml_instructions
# Nested types
class HistoricalEventXML(BaseXmlModel):
year: str = element(description="The year of the historical event")
summary: str = element(
description="A clear description of what happened in this event"
)
class ArticleWithHistoryXML(BaseXmlModel, tag="article"):
title: str = element(description="Title of the article")
problem: str = element(
description="Summary of the writer's question, write concisely"
)
historical_event: HistoricalEventXML = element(
description="The year and explanation of a historical event"
)
Using a helper function to create the
print(pydantic_to_xml_instructions(ArticleWithHistoryXML))
You must respond only in XML using the following schema. Do not provide any output outside the first and last XML tags. <article> <!----> <title> {Title of the article - must be type str} </title> <problem> {Summary of the writer's question, write concisely - must be type str} </problem> <historical_event> <!----> <year> {The year of the historical event - must be type str} </year> <summary> {A clear description of what happened in this event - must be type str} </summary> </historical_event> </article>
prompt_format = ChatPromptTemplate.from_messages(
[
(
"human",
"Write an article in the provided format that answers the user question."
"\n{format_instructions}",
),
("human", test_science_prompt_txt),
]
)
test_chain = (
prompt_format.partial(
format_instructions=pydantic_to_xml_instructions(ArticleWithHistoryXML)
)
| llm_model
)
output = test_chain.invoke(dict(question="What is the oldest recorded fossil?"))
ArticleWithHistoryXML.from_xml(output.content)
ArticleWithHistoryXML(title='The Oldest Recorded Fossil: A Window into the Past', problem='What is the oldest recorded fossil and how was it discovered?', historical_event=HistoricalEventXML(year='3.4 billion years old', summary="Astronauts on board NASA's Apollo 11 mission in 1969 discovered a fossilized piece of stromatolite, a layered structure created by ancient microorganisms."))
But we can't use the PydanticOutputParser
with a Pydantic XML object as it assumes JSON output.
pydantic_parser = PydanticOutputParser(pydantic_object=ArticleXML)
test_chain = (
prompt_format.partial(format_instructions=pydantic_to_xml_instructions(ArticleXML))
| llm_model
| pydantic_parser
)
output = test_chain.invoke(dict(question="What is the oldest recorded fossil?"))
OutputParserException: Invalid json output: <article> <title>The Oldest Recorded Fossil</title> <problem>What is the oldest recorded fossil?</problem> <answer> The oldest recorded fossil is that of Eoapto Ceratita, a species of extinct marine snail, which dates back an astonishing 544 million years to the Cambrian period. This remarkable find was discovered in the Burgess Shale in British Columbia, Canada, and has provided significant insights into the evolution of life on Earth during this critical period. The fossil was first identified by paleontologist Joseph McLaughlin in 2012, and its discovery has been hailed as a major breakthrough in the field of paleontology. The Eoapto Ceratita fossil is not only an important find but also sheds light on the complex relationships between different species that existed during this time. This remarkable discovery serves as a testament to the rich history of life on our planet and highlights the importance of continued exploration and research in the field of paleontology.</answer> </article> For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE