%xmode minimal
Exception reporting mode: Minimal
from langchain_core.prompts import ChatPromptTemplate
from langchain_anthropic import ChatAnthropic
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import JsonOutputParser
import streamlit as st
# 1 - Claude
# 2 - Ollama, Llama 3.2
# 3 - Ollama, Llama 3.2 with JSON mode
# 4 - Ollama, Gemma2
# 5 - Ollama, Gemma2 with JSON mode
USE_LLM = 2
claude_api_key = "<API KEY>"
if USE_LLM == 1:
llm_model = ChatAnthropic(model="claude-3-haiku-20240307", api_key=claude_api_key)
elif USE_LLM == 2:
llm_model = ChatOllama(model="llama3.2", temperature=1)
elif USE_LLM == 3:
llm_model = ChatOllama(model="llama3.2", format="json", temperature=1)
elif USE_LLM == 4:
llm_model = ChatOllama(model="gemma2", temperature=1)
elif USE_LLM == 5:
llm_model = ChatOllama(model="gemma2", format="json", temperature=1)
We can define a Pydantic model and the output will be returned as a Pydantic object with validation
from typing import Optional
from pydantic import BaseModel, Field
class Joke(BaseModel):
"""Joke to tell user."""
setup: str = Field(description="The setup of the joke")
punchline: str = Field(description="The punchline to the joke")
rating: int = Field(description="How funny the joke is, from 1 to 10")
structured_llm = llm_model.with_structured_output(Joke)
structured_llm.invoke("Tell me a joke about cats")
Joke(setup='Why did the cat join a band?', punchline='Because it wanted to be the purr-cussionist!', rating=8)
Defining the schema using a TypedDict parses the JSON output into a Python dict not a Pydantic object so there's no schema validation
from typing_extensions import Annotated, TypedDict
class JokeTD(TypedDict):
"""Joke to tell user."""
setup: Annotated[str, ..., "The setup of the joke"]
punchline: Annotated[str, ..., "The punchline of the joke"]
rating: Annotated[Optional[int], ..., "How funny the joke is, from 1 to 10"]
structured_llm = llm_model.with_structured_output(JokeTD)
structured_llm.invoke("Tell me a joke about cats")
{'punchline': 'Why did the cat join a band?', 'rating': '8', 'setup': 'Because it wanted to be the purr-cussionist!'}
Or just extract the JSON Schema object
structured_llm = llm_model.with_structured_output(Joke.model_json_schema())
structured_llm.invoke("Tell me a joke about cats")
{'punchline': 'Why did the cat join a band?', 'rating': 8, 'setup': 'Because it wanted to be a purr-cussionist.'}
Let's try a more complicated structure with nested types
class ArticleResponse(BaseModel):
"""A clear and concise answer to the users question."""
title: str = Field(description="Title of the article")
context: str = Field(
description="Provide a brief historical context to answer the question."
)
historical_timeline: list[str] = Field(
description="Provide a list of historical events relevant to the question"
)
structured_llm = llm_model.with_structured_output(ArticleResponse)
structured_llm.invoke("Tell me the history of the state of Texas in America")
ValidationError: 1 validation error for ArticleResponse historical_timeline Input should be a valid list [type=list_type, input_value='["1528: Spanish explorer...he American Civil War"]', input_type=str] For further information visit https://errors.pydantic.dev/2.9/v/list_type
structured_llm = llm_model.with_structured_output(ArticleResponse, include_raw=True)
results = structured_llm.invoke("Tell me the history of the state of Texas in America")
raw_output = results["raw"].response_metadata["message"]["tool_calls"][0]["function"][
"arguments"
]
try:
ArticleResponse(**raw_output)
except Exception as e:
print(f"{type(e).__name__}: {str(e)}")
print(f"\nRaw output:\n{raw_output}")
ValidationError: 1 validation error for ArticleResponse historical_timeline Input should be a valid list [type=list_type, input_value='["1763: Texas becomes pa...ate States of America"]', input_type=str] For further information visit https://errors.pydantic.dev/2.9/v/list_type Raw output: {'context': 'Texas has a rich and diverse history that spans over 300 years, from its early days as a Mexican territory to its current status as the second-largest state in the US.', 'historical_timeline': '["1763: Texas becomes part of Spain after Mexico gains independence from Spain", "1821: The Texas Revolution begins with the Battle of Gonzales and the famous "Come and Take It" declaration", "1836: Texas declares independence from Mexico and establishes the Republic of Texas", "1845: The US purchases Texas from Mexico for $15 million and it becomes a state", "1860s: Texas secedes from the Union during the American Civil War and joins the Confederate States of America"]', 'title': 'The History of Texas'}
We can directly create the JSON schema object from the Pydantic object and we get the raw dict output without Pydantic validation
structured_llm_js = llm_model.with_structured_output(
ArticleResponse.model_json_schema()
)
structured_llm_js.invoke("Tell me the history of the state of Texas in America")
{'context': 'The state of Texas is located in the south-central region of the United States. It is bordered by Mexico to the south, New Mexico and Oklahoma to the west, Arkansas and Louisiana to the east, and the Gulf of Mexico to the southeast.', 'historical_timeline': '["Texas declared independence from Mexico on March 2, 1836", "The Republic of Texas existed as a separate nation for nearly a decade before being annexed by the United States in 1845", "The Texas Civil War broke out in 1861 over secession from the Union and was fought between pro-Union forces and Confederate states rights advocates", "After the end of the American Civil War, Reconstruction efforts took place in Texas during the late 19th century", "The early 20th century saw significant industrialization and urbanization in Texas", "In 1948, the federal government established several national parks within Texas to preserve its unique natural environment"]', 'title': 'A Brief History of the State of Texas'}
The JSON schema representation is quite straightforward
Joke.model_json_schema()
{'description': 'Joke to tell user.', 'properties': {'setup': {'description': 'The setup of the joke', 'title': 'Setup', 'type': 'string'}, 'punchline': {'description': 'The punchline to the joke', 'title': 'Punchline', 'type': 'string'}, 'rating': {'description': 'How funny the joke is, from 1 to 10', 'title': 'Rating', 'type': 'integer'}}, 'required': ['setup', 'punchline', 'rating'], 'title': 'Joke', 'type': 'object'}
Note the same schema is contained in the format instructions, expect for 'title' and 'type'
from langchain_core.output_parsers import PydanticOutputParser
output_parser = PydanticOutputParser(pydantic_object=Joke)
print(output_parser.get_format_instructions())
The output should be formatted as a JSON instance that conforms to the JSON schema below. As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]} the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted. Here is the output schema: ``` {"description": "Joke to tell user.", "properties": {"setup": {"description": "The setup of the joke", "title": "Setup", "type": "string"}, "punchline": {"description": "The punchline to the joke", "title": "Punchline", "type": "string"}, "rating": {"description": "How funny the joke is, from 1 to 10", "title": "Rating", "type": "integer"}}, "required": ["setup", "punchline", "rating"]} ```
Using the PydanticOutputParser allows us to specify JSON outputs for other models that don't support tool calling.
from langchain_core.output_parsers import PydanticOutputParser, StrOutputParser
from langchain_core.exceptions import OutputParserException
parser = PydanticOutputParser(pydantic_object=Joke)
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"Answer the user query. Wrap the output in `json` tags\n{format_instructions}",
),
("human", "{query}"),
]
).partial(format_instructions=parser.get_format_instructions())
chain_llm = prompt | llm_model | parser
chain_llm.invoke("Tell me a joke about cats")
OutputParserException: Failed to parse Joke from completion {"message": "Why did the cat join a band? Because it wanted to be the purr-cussionist!"}. Got: 3 validation errors for Joke setup Field required [type=missing, input_value={'message': 'Why did the ...e the purr-cussionist!'}, input_type=dict] For further information visit https://errors.pydantic.dev/2.9/v/missing punchline Field required [type=missing, input_value={'message': 'Why did the ...e the purr-cussionist!'}, input_type=dict] For further information visit https://errors.pydantic.dev/2.9/v/missing rating Field required [type=missing, input_value={'message': 'Why did the ...e the purr-cussionist!'}, input_type=dict] For further information visit https://errors.pydantic.dev/2.9/v/missing For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE
prompt_user_format = ChatPromptTemplate.from_template(
"{input} \n{format_instructions}"
).partial(format_instructions=parser.get_format_instructions())
structured_llm = prompt_user_format | llm_model | StrOutputParser()
print(structured_llm.invoke("Tell me a joke about cats"))
{"properties": {"setup": {"title": "Why did the cat join a band?", "description": "Because it wanted to be the purr-cussionist.", "type": "string"}, "punchline": {"title": "Purr-cussionist joke", "description": "a play on words", "type": "string"}, "rating": {"title": "Funny rating out of 10", "description": "How funny is this cat joke?", "type": "integer"}}, "required": ["setup", "punchline", "rating"]}
llm_model = ChatOllama(model="llama3.2", temperature=1)
chain = prompt_direct | llm_model.with_structured_output(schema=ArticleResponse1)
try:
output = chain.invoke(dict(question=questions[0]))
except Exception as e:
print(f"{type(e).__name__}: {str(e)}")
llm_model = ChatOllama(model="llama3.2", temperature=1)
chain = prompt_direct | llm_model.with_structured_output(
schema=ArticleResponse1.model_json_schema()
)
output = chain.invoke(dict(question=questions[0]))
print(type(output))
output
<class 'dict'>
{'answer': 'The oldest recorded fossil dates back to around 3.5 billion years ago, during a time known as the Eoarchean era of the Precambrian period. This ancient relic is called Strelley Pool fossil, found in Western Australia.', 'number': 3400000000, 'title': 'Uncovering the Ancient Past: The Oldest Recorded Fossil'}
Using a system prompt seems much less reliable than just inserting the format instructions into a user prompt. Why is this?
from langchain_core.output_parsers import PydanticOutputParser, JsonOutputParser
from langchain.output_parsers.fix import OutputFixingParser
parser = JsonOutputParser(pydantic_object=ArticleResponse1)
prompt = prompt_user_format.partial(
format_instructions=parser.get_format_instructions()
)
structured_llm = prompt | llm_model | parser
try:
output = structured_llm.invoke(dict(question=questions[0]))
print(output)
except Exception as e:
print(f"{type(e).__name__}: {str(e)}")
OutputParserException: Invalid json output: {"title": "Uncovering the Oldest Record of Life on Earth", "answer": "The oldest recorded fossil is believed to be Strome Canyon chert, which dates back approximately 3.46 billion years. This ancient relic was discovered in Western Australia and provides a glimpse into the earliest life forms on our planet. The fossils were found embedded in a rock formation that has been dated using various geological methods, including uranium-lead dating. This incredible find has sparked significant interest in the scientific community, shedding light on the origins of life on Earth.", "number": 3,430,000,000} For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE
parser = JsonOutputParser(pydantic_object=ArticleResponse1)
prompt = prompt_system_format.partial(
format_instructions=parser.get_format_instructions()
)
structured_llm = prompt | llm_model | parser
try:
output = structured_llm.invoke(dict(question=questions[0]))
print(output)
except Exception as e:
print(f"{type(e).__name__}: {str(e)}")
OutputParserException: Invalid json output: **Uncovering the Oldest Fossil: A Window into Earth's Ancient Past** The search for ancient secrets in the earth's crust has led us to discover one of the most significant fossils in human history. Meet Archeopteryx, a 150-million-year-old bird-like creature that sheds light on the evolution of life on our planet. Discovered in 1861 by German paleontologist Hermann von Meyer, Archeopteryx was initially thought to be a mere hybrid between dinosaurs and birds. However, further analysis revealed its unique characteristics, including feathers, wings, and claws. This remarkable fossil has been extensively studied, providing insights into the transition from non-flying reptiles to birds. Other contenders for the oldest recorded fossil include Tiktaalik, an ancient fish-like creature with limb-like fins, and Hallucigenia, a bizarre worm-like animal with spines on its back. However, Archeopteryx remains one of the most significant discoveries in paleontology, offering a glimpse into the mysteries of life's diversification during the Jurassic period. This ancient bird-like creature serves as a poignant reminder of our shared history with the natural world. For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE
Fixing the output with OutputFixingParser
, it could be better to use another model with lower temperature instead of the original model?
parser = PydanticOutputParser(pydantic_object=ArticleResponse2)
prompt = prompt_user_format.partial(
format_instructions=parser.get_format_instructions()
)
llm_model_fix = ChatOllama(model="llama3.2", temperature=0)
parser_fix = OutputFixingParser.from_llm(parser=parser, llm=llm_model_fix)
try:
structured_llm = prompt | llm_model | parser_fix
output = structured_llm.invoke(dict(question=questions[0]))
print(output)
except Exception as e:
print(f"{type(e).__name__}: {str(e)}")
OutputParserException: Failed to parse ArticleResponse2 from completion {"$defs": {"HistoricalEvent": {"description": "The year and explanation of a historical event.", "properties": {"year": {"description": "The year of the historical event", "title": "Year", "type": "integer"}, "description": {"description": "A clear description of what happened in this event", "title": "Description", "type": "string"}}, "required": ["year", "description"], "title": "HistoricalEvent", "type": "object"}}, "description": "Structured article for publication answering a reader's question.", "properties": {"title": {"description": "Title of the article", "title": "Title", "type": "string"}, "historical_event_1": {"$ref": "#/$defs/HistoricalEvent", "description": "The oldest recorded fossil is that of Dickinsonia, a species of ancient animal that lived over 600 million years ago. Discovered in 1909 by Russian paleontologist Raup in what is now present-day Australia, this fossil provides a glimpse into the evolution of life on Earth.", "year": 375000000}, "historical_event_2": {"$ref": "#/$defs/HistoricalEvent", "description": "Although not the oldest recorded fossil, the discovery of Tiktaalik in 2004 has helped scientists better understand the transition from fish to tetrapods during the Devonian period.", "year": 30000000}}, "required": ["title", "historical_event_1", "historical_event_2"]}. Got: 3 validation errors for ArticleResponse2 title Field required [type=missing, input_value={'$defs': {'HistoricalEve..., 'historical_event_2']}, input_type=dict] For further information visit https://errors.pydantic.dev/2.9/v/missing historical_event_1 Field required [type=missing, input_value={'$defs': {'HistoricalEve..., 'historical_event_2']}, input_type=dict] For further information visit https://errors.pydantic.dev/2.9/v/missing historical_event_2 Field required [type=missing, input_value={'$defs': {'HistoricalEve..., 'historical_event_2']}, input_type=dict] For further information visit https://errors.pydantic.dev/2.9/v/missing For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE
Output without stucture
structured_llm = prompt_direct | llm_model
output = structured_llm.invoke(dict(question=questions[0]))
print(output.content)
"The Oldest Recorded Fossil: Uncovering Earth's Ancient Past" For over a century, scientists have been fascinated by fossils – remnants of ancient creatures that once roamed our planet. Among these relics, one fossil stands out for its remarkable age and significance. Meet the fossilized remains of "Hallucigenia," a bizarre creature discovered in 1909 in what is now western Australia. Initially dismissed as a curiosity, recent dating analysis has pushed its estimated age back by millions of years – placing it at an astonishing 500 million years old! To put that in perspective, when Hallucigenia lived, the Earth was still in its Neoproterozoic era, long before dinosaurs roamed the planet. This ancient relic provides a glimpse into the evolution of life on our planet during one of its most formative periods. The discovery of Hallucigenia serves as a reminder that fossils hold secrets to understanding the complex history of our world. Each new find offers insights into Earth's past, shedding light on the mysteries of how life evolved and adapted over millions of years.
class Joke(BaseModel):
"""Joke to tell user."""
setup: str = Field(description="The setup of the joke")
punchline: str = Field(description="The punchline to the joke")
rating: int = Field(description="How funny the joke is, from 1 to 10")
llm_models = {
# "Anthropic_Haiku": ChatAnthropic(model="claude-3-haiku-20240307", api_key=claude_api_key),
"Ollama_llama32": ChatOllama(model="llama3.2", temperature=1),
"Ollama_llama32_json": ChatOllama(model="llama3.2", format="json", temperature=1),
"Ollama_gemma2": ChatOllama(model="gemma2", temperature=1),
"Ollama_gemma2_json": ChatOllama(model="gemma2", format="json", temperature=1),
"Ollama_phi3": ChatOllama(model="phi3", temperature=1),
"Ollama_phi3_json": ChatOllama(model="phi3", format="json", temperature=1),
}
for llm_model in llm_models.values():
print(f"Model: {llm_model.__repr__()}")
test_structured_llm = llm_model.with_structured_output(JokeTD)
try:
output = test_structured_llm.invoke("Tell me a joke about cats")
print(" Tool use support")
except Exception as e:
print(" No tool use")
Model: ChatOllama(model='llama3.2', temperature=1.0) Tool use support Model: ChatOllama(model='llama3.2', temperature=1.0, format='json') Tool use support Model: ChatOllama(model='gemma2', temperature=1.0) No tool use Model: ChatOllama(model='gemma2', temperature=1.0, format='json') No tool use Model: ChatOllama(model='phi3', temperature=1.0) No tool use Model: ChatOllama(model='phi3', temperature=1.0, format='json') No tool use