#!/usr/bin/env python # coding: utf-8 # [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/aurelio-labs/semantic-router/blob/main/docs/examples/pinecone-and-scaling.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/aurelio-labs/semantic-router/blob/main/docs/examples/pinecone-and-scaling.ipynb) # # Scaling to Many Routes and Using Pinecone # Semantic router can be used with many hundreds, thousands, or even more routes. At very large scales it can be useful to use a vector database to store and search though your route vector space. Although we do not demonstrate _very large_ scale in this notebook, we will demonstrate more routes than usual and we will also see how to use the `PineconeIndex` for potential scalability and route persistence beyond our local machines. # ## Installing the Library # In[1]: get_ipython().system('pip install -qU "semantic-router[local, pinecone]==0.0.22" datasets==2.17.0') # ## Downloading Routes # In[2]: from datasets import load_dataset data = load_dataset("aurelio-ai/generic-routes", split="train") data # Each row in this dataset is a single route: # In[3]: data[0] # We transform these into `Route` objects like so: # In[4]: from semantic_router import Route routes = [Route(**data[i]) for i in range(len(data))] routes[0] # Next we initialize an `encoder`. We will use a simple `HuggingFaceEncoder`, we can also use popular encoder APIs like `CohereEncoder` and `OpenAIEncoder`. # In[5]: from semantic_router.encoders import HuggingFaceEncoder encoder = HuggingFaceEncoder() # Now we initialize our `PineconeIndex`, all it requires is a [Pinecone API key](https://app.pinecone.io) (you do need to be using Pinecone Serverless). # In[6]: import os from getpass import getpass from semantic_router.index.pinecone import PineconeIndex os.environ["PINECONE_API_KEY"] = os.environ.get("PINECONE_API_KEY") or getpass( "Enter Pinecone API key: " ) index = PineconeIndex(index_name="index", namespace="namespace") # In[7]: from semantic_router import RouteLayer rl = RouteLayer(encoder=encoder, routes=routes, index=index) # We run the solely static routes layer: # In[8]: rl("how's the weather today?").name # _If you see a warning about no classification being found, wait a moment and run the above cell again._ # ## Loading Index From Previous Initialization # Because we're using Pinecone our route index can now persist / be access from different locations by simply connecting to the pre-existing index, by default this index uses the identifier `"semantic-router--index"` — this is the index we'll be loading here, but we can change the name via the `index_name` parameter if preferred. # # First, let's delete our old route layer, `index`, and `routes`. # In[9]: del rl, index, routes # Let's load our index first. As mentioned, `"index"` is the default index name and we are passing `"namespace"` as namespace name for the pinecone object. # In[10]: index = PineconeIndex(index_name="index", namespace="namespace") # We load the pre-existing routes from this index like so: # In[11]: index.get_routes() # We will transform these into a dictionary format that we can use to initialize our `Route` objects. # In[12]: routes_dict = {} for route, utterance in index.get_routes(): if route not in routes_dict: routes_dict[route] = [] routes_dict[route].append(utterance) # In[13]: routes_dict # Now we transform these into a list of `Route` objects. # In[14]: routes = [ Route(name=route, utterances=utterances) for route, utterances in routes_dict.items() ] routes[0] # Now we reinitialize our `RouteLayer`: # In[15]: from semantic_router import RouteLayer rl = RouteLayer(encoder=encoder, routes=routes, index=index) # And test it again: # In[16]: rl("say something to make me laugh").name # In[17]: rl("tell me something amusing").name # In[18]: rl("it's raining cats and dogs today").name # In[19]: # delete index index.delete_index() # Perfect, our routes loaded from our `PineconeIndex` are working as expected! As mentioned, we can use the `PineconeIndex` for persistance and high scale use-cases, for example where we might have hundreds of thousands of utterances, or even millions. # ---