#!/usr/bin/env python # coding: utf-8 # # Getting Started with Prompt Flow # # **Prerequisite** - To make the most of this tutorial, you'll need: # - A local clone of the Prompt Flow repository # - A Python environment with Jupyter Notebook support (such as Jupyter Lab or the Python extension for Visual Studio Code) # - Know how to program with Python :) # # _A basic understanding of Machine Learning can be beneficial, but it's not mandatory._ # # # **Learning Objectives** - Upon completing this tutorial, you should be able to: # # - Run your first prompt flow sample # - Run your first evaluation # # # The sample used in this tutorial is the [web-classification](../../flows/standard/web-classification/README.md) flow, which categorizes URLs into several predefined classes. Classification is a traditional machine learning task, and this sample illustrates how to perform classification using GPT and prompts. # ## 0. Install dependent packages # In[ ]: get_ipython().run_line_magic('pip', 'install -r ../../requirements.txt') # ## 1. Create necessary connections # Connection helps securely store and manage secret keys or other sensitive credentials required for interacting with LLM and other external tools for example Azure Content Safety. # # In this notebook, we will use flow `web-classification` which uses connection `open_ai_connection` inside, we need to set up the connection if we haven't added it before. After created, it's stored in local db and can be used in any flow. # # Prepare your Azure Open AI resource follow this [instruction](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal) and get your `api_key` if you don't have one. # In[ ]: import json from promptflow import PFClient from promptflow.connections import AzureOpenAIConnection, OpenAIConnection # client can help manage your runs and connections. pf = PFClient() # In[ ]: try: conn_name = "open_ai_connection" conn = pf.connections.get(name=conn_name) print("using existing connection") except: # Follow https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal to create an Azure Open AI resource. connection = AzureOpenAIConnection( name=conn_name, api_key="", api_base="", api_type="azure", api_version="", ) # use this if you have an existing OpenAI account # connection = OpenAIConnection( # name=conn_name, # api_key="", # ) conn = pf.connections.create_or_update(connection) print("successfully created connection") print(conn) # ## 2. Run web-classification flow # # `web-classification` is a flow demonstrating multi-class classification with LLM. Given an url, it will classify the url into one web category with just a few shots, simple summarization and classification prompts. # ### Set flow path # In[ ]: flow = "../../flows/standard/web-classification" # path to the flow directory # ### Quick test # In[ ]: # Test flow flow_inputs = { "url": "https://play.google.com/store/apps/details?id=com.twitter.android", } flow_result = pf.test(flow=flow, inputs=flow_inputs) print(f"Flow result: {flow_result}") # In[ ]: # Test single node in the flow node_name = "fetch_text_content_from_url" node_inputs = { "url": "https://play.google.com/store/apps/details?id=com.twitter.android" } flow_result = pf.test(flow=flow, inputs=node_inputs, node=node_name) print(f"Node result: {flow_result}") # ### Batch run with a data file (with multiple lines of test data) # # In[ ]: data = "../../flows/standard/web-classification/data.jsonl" # path to the data file # create run with default variant base_run = pf.run(flow=flow, data=data, stream=True) # In[ ]: details = pf.get_details(base_run) details.head(10) # ## 3. Evaluate your flow # Then you can use an evaluation method to evaluate your flow. The evaluation methods are also flows which use Python or LLM etc., to calculate metrics like accuracy, relevance score. # # In this notebook, we use `classification-accuracy-eval` flow to evaluate. This is a flow illustrating how to evaluate the performance of a classification system. It involves comparing each prediction to the groundtruth and assigns a "Correct" or "Incorrect" grade, and aggregating the results to produce metrics such as accuracy, which reflects how good the system is at classifying the data. # ### Run evaluation on the previous batch run # The **base_run** is the batch run we completed in step 2 above, for web-classification flow with "data.jsonl" as input. # In[ ]: eval_flow = "../../flows/evaluation/eval-classification-accuracy" eval_run = pf.run( flow=eval_flow, data="../../flows/standard/web-classification/data.jsonl", # path to the data file run=base_run, # specify base_run as the run you want to evaluate column_mapping={ "groundtruth": "${data.answer}", "prediction": "${run.outputs.category}", }, # map the url field from the data to the url input of the flow stream=True, ) # In[ ]: details = pf.get_details(eval_run) details.head(10) # In[ ]: metrics = pf.get_metrics(eval_run) print(json.dumps(metrics, indent=4)) # In[ ]: pf.visualize([base_run, eval_run]) # By now you've successfully run your first prompt flow and even did evaluation on it. That's great! # # You can check out the [web-classification](../../flows/standard/web-classification/) flow and the [classification-accuracy](../../flows/evaluation/eval-classification-accuracy/) flow for more details, and start building your own flow. # # Or you can move on for a more advanced topic: experiment with a variant. # ### Another batch run with a variant # # [Variant](../../../docs/concepts/concept-variants.md) in prompt flow is to allow you do experimentation with LLMs. You can set a variant of Prompt/LLM node pointing to different prompt or use different LLM parameters like temperature. # # In this example, `web-classification`'s node `summarize_text_content` has two variants: `variant_0` and `variant_1`. The difference between them is the inputs parameters: # # variant_0: # # - inputs: # - deployment_name: gpt-35-turbo # - max_tokens: '128' # - temperature: '0.2' # - text: ${fetch_text_content_from_url.output} # # variant_1: # # - inputs: # - deployment_name: gpt-35-turbo # - max_tokens: '256' # - temperature: '0.3' # - text: ${fetch_text_content_from_url.output} # # # You can check the whole flow definition at [flow.dag.yaml](../../flows/standard/web-classification/flow.dag.yaml) # In[ ]: # use the variant1 of the summarize_text_content node. variant_run = pf.run( flow=flow, data=data, variant="${summarize_text_content.variant_1}", # here we specify node "summarize_text_content" to use variant 1 version. stream=True, ) # In[ ]: details = pf.get_details(variant_run) details.head(10) # ### Run evaluation on the variant run # So that later we can compare metrics and see which works better. # In[ ]: eval_flow = "../../flows/evaluation/eval-classification-accuracy" eval_run_variant = pf.run( flow=eval_flow, data="../../flows/standard/web-classification/data.jsonl", # path to the data file run=variant_run, # use run as the variant column_mapping={ "groundtruth": "${data.answer}", "prediction": "${run.outputs.category}", }, # map the url field from the data to the url input of the flow stream=True, ) # In[ ]: details = pf.get_details(eval_run_variant) details.head(10) # In[ ]: metrics = pf.get_metrics(eval_run_variant) print(json.dumps(metrics, indent=4)) # In[ ]: pf.visualize([eval_run, eval_run_variant]) # # Next Steps # # Learn more on: # - [Manage connections](../../connections/connection.ipynb): how to manage the endpoints/secrets information to access external services including LLMs. # - [Chat with PDF](../e2e-development/chat-with-pdf.md): go through an end-to-end tutorial on how to develop a chat application with prompt flow. # - [Deploy http endpoint](../flow-deploy/deploy.md): how to deploy the flow as a local http endpoint. # - [Prompt flow in Azure AI](./quickstart-azure.ipynb): run and evaluate flow in Azure AI where you can collaborate with team better.