#!/usr/bin/env python # coding: utf-8 #

# # # Groq # # 欢迎来到Groq！🚀 在Groq，我们开发了世界上第一款语言处理单元™，或称为LPU。Groq LPU具有确定性、单核流式架构，为GenAI推断速度设定了标准，对于任何给定的工作负载都具有可预测和可重复的性能。 # # 除了架构之外，我们的软件旨在赋予像您这样的开发人员所需的工具，以创建创新且强大的人工智能应用程序。有了Groq作为您的引擎，您可以： # # * 实现实时AI和HPC推断的无损延迟和性能 🔥 # * 了解任何给定工作负载的确切性能和计算时间 🔮 # * 利用我们尖端的技术，保持领先地位 💪 # # 想要了解更多关于Groq的信息吗？请访问我们的[网站](https://groq.com)获取更多资源，并加入我们的[Discord社区](https://discord.gg/JvNsBDKeCG)与我们的开发人员联系！ # # ## 设置 # # 如果您在colab上打开这个笔记本，您可能需要安装LlamaIndex 🦙。 # # In[ ]: get_ipython().run_line_magic('', 'pip install llama-index-llms-groq') # In[ ]: get_ipython().system('pip install llama-index') # In[ ]: from llama_index.llms.groq import Groq # 在[Groq控制台](https://console.groq.com/keys)创建一个API密钥，然后将其设置为环境变量`GROQ_API_KEY`。 # # ```bash # export GROQ_API_KEY= # ``` # # 或者，在初始化LLM时将您的API密钥传递给它： # # In[ ]: llm = Groq(model="mixtral-8x7b-32768", api_key="your_api_key") # 可以在[这里](https://console.groq.com/docs/models)找到可用的LLM模型列表。 # # In[ ]: response = llm.complete("Explain the importance of low latency LLMs") # In[ ]: print(response) # #### 使用消息列表调用`chat` # # In[ ]: from llama_index.core.llms import ChatMessage messages = [ ChatMessage( role="system", content="You are a pirate with a colorful personality" ), ChatMessage(role="user", content="What is your name"), ] resp = llm.chat(messages) # In[ ]: print(resp) # ### 流式处理 # # 使用 `stream_complete` 终端点 # # In[ ]: response = llm.stream_complete("Explain the importance of low latency LLMs") # In[ ]: for r in response: print(r.delta, end="") # 使用 `stream_chat` 端点 # # In[ ]: from llama_index.core.llms import ChatMessage messages = [ ChatMessage( role="system", content="You are a pirate with a colorful personality" ), ChatMessage(role="user", content="What is your name"), ] resp = llm.stream_chat(messages) # In[ ]: for r in resp: print(r.delta, end="")