#!/usr/bin/env python # coding: utf-8 #

# # # JSON查询引擎 # JSON查询引擎用于查询符合JSON模式的JSON文档。 # # 然后，在提示的上下文中使用此JSON模式将自然语言查询转换为结构化的JSON路径查询。然后使用此JSON路径查询来检索数据以回答给定的问题。 # # 如果您在colab上打开这个笔记本，您可能需要安装LlamaIndex 🦙。 # # In[ ]: get_ipython().run_line_magic('pip', 'install llama-index-llms-openai') # In[ ]: get_ipython().system('pip install llama-index') # In[ ]: # 首先，安装jsonpath-ng包，该包默认用于解析和执行JSONPath查询。!pip install jsonpath-ng # In[ ]: import logging import sys logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) # In[ ]: import os import openai os.environ["OPENAI_API_KEY"] = "YOUR_KEY_HERE" # In[ ]: from IPython.display import Markdown, display # ### 让我们从一个玩具JSON开始 # # 一个非常简单的JSON对象，包含来自博客文章网站的用户评论数据。 # # 我们还将提供一个JSON模式（通过向ChatGPT提供JSON样本而生成）。 # # #### 建议 # 确保为JSON模式中的每个字段提供一个有用的`"description"`值。 # # 正如您在给定的示例中所看到的，`"username"`字段的描述提到用户名是小写的。您会发现，这对LLM生成正确的JSON路径查询非常有帮助。 # # In[ ]: # 在一些样本数据上进行测试json_value = { "blogPosts": [ { "id": 1, "title": "第一篇博客文章", "content": "这是我的第一篇博客文章", }, { "id": 2, "title": "第二篇博客文章", "content": "这是我的第二篇博客文章", }, ], "comments": [ { "id": 1, "content": "好帖子！", "username": "jerry", "blogPostId": 1, }, { "id": 2, "content": "有趣的想法", "username": "simon", "blogPostId": 2, }, { "id": 3, "content": "喜欢阅读这篇文章！", "username": "simon", "blogPostId": 2, }, ],}# 上述JSON值符合的JSON模式对象json_schema = { "$schema": "http://json-schema.org/draft-07/schema#", "description": "一个非常简单的博客文章应用程序的模式", "type": "object", "properties": { "blogPosts": { "description": "博客文章列表", "type": "array", "items": { "type": "object", "properties": { "id": { "description": "博客文章的唯一标识符", "type": "integer", }, "title": { "description": "博客文章的标题", "type": "string", }, "content": { "description": "博客文章的内容", "type": "string", }, }, "required": ["id", "title", "content"], }, }, "comments": { "description": "博客文章评论列表", "type": "array", "items": { "type": "object", "properties": { "id": { "description": "评论的唯一标识符", "type": "integer", }, "content": { "description": "评论的内容", "type": "string", }, "username": { "description": "评论者的用户名（小写）", "type": "string", }, "blogPostId": { "description": "评论所属的博客文章的标识符", "type": "integer", }, }, "required": ["id", "content", "username", "blogPostId"], }, }, }, "required": ["blogPosts", "comments"],} # In[ ]: from llama_index.llms.openai import OpenAI from llama_index.core.indices.struct_store import JSONQueryEngine llm = OpenAI(model="gpt-4") nl_query_engine = JSONQueryEngine( json_value=json_value, json_schema=json_schema, llm=llm, ) raw_query_engine = JSONQueryEngine( json_value=json_value, json_schema=json_schema, llm=llm, synthesize_response=False, ) # In[ ]: nl_response = nl_query_engine.query( "What comments has Jerry been writing?", ) raw_response = raw_query_engine.query( "What comments has Jerry been writing?", ) # In[ ]: display( Markdown(f"

Natural language Response

{nl_response}") ) display(Markdown(f"

Raw JSON Response

{raw_response}")) # In[ ]: # 获取JSON路径查询字符串。对原始响应也适用。 print(nl_response.metadata["json_path_response_str"])