#!/usr/bin/env python
# coding: utf-8

# # Images extraction and analysis from a PDF file using Azure Open AI GPT4-Vision

# In[1]:


# %pip install Spire.Pdf


# In[2]:


import base64
import datetime
import json
import os
import openai
import requests
import shutil
import time

from dotenv import load_dotenv
from PIL import Image as PILimage
from spire.pdf.common import *
from spire.pdf import *


# In[3]:


def check_openai_version():
    """
    Check Azure Open AI version
    """
    installed_version = openai.__version__

    try:
        version_number = float(installed_version[:3])
    except ValueError:
        print("Invalid OpenAI version format")
        return

    print(f"Installed OpenAI version: {installed_version}")

    if version_number < 1.0:
        print("[Warning] You should upgrade OpenAI to have version >= 1.0.0")
        print("To upgrade, run: %pip install openai --upgrade")
    else:
        print(f"[OK] OpenAI version {installed_version} is >= 1.0.0")


# In[4]:


check_openai_version()


# In[5]:


sys.version


# In[6]:


print(f"Today is {datetime.datetime.today().strftime('%d-%b-%Y %H:%M:%S')}")


# In[7]:


print(f"Python version: {sys.version}")


# In[8]:


print(f"OpenAI version: {openai.__version__}")


# ## Azure Open AI

# In[9]:


load_dotenv("azure.env")

# Azure Open AI
openai.api_type: str = "azure"
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")

# Azure AI Vision (aka Azure Computer Vision)
azure_aivision_endpoint = os.getenv("azure_aivision_endpoint")
azure_aivision_key = os.getenv("azure_aivision_key")


# In[10]:


model = "GPT4Vision"  # This is the deployed name of your GPT4 Vision model from the Azure Open AI studio


# In[11]:


indexname = "pdf-docs"


# ## Directory to save extracted images from the PDF file

# In[12]:


images_dir = "images"

if os.path.exists(images_dir):
    shutil.rmtree(images_dir)

os.makedirs(images_dir, exist_ok=True)


# ## Running the extraction of all the images from the PDF file

# In[13]:


pdf_file = "ios17.pdf"


# In[14]:


get_ipython().system('ls $pdf_file -lh')


# In[15]:


# Reading the pdf file
doc = PdfDocument()
doc.LoadFromFile(pdf_file)

images_list = []
nb = 0
print(f"Extracting images from PDF file {pdf_file}\n")

# For each page
for p in range(doc.Pages.Count):
    page = doc.Pages.get_Item(p)
    idx = 1
    # Extract any image from the page and save each image to a png file
    for image in page.ExtractImages():
        image_file = (
            image_file
        ) = f"{images_dir}/{os.path.splitext(os.path.basename(pdf_file))[0]}_page_{p + 1:03}_nb_{idx:02}.png"
        image.Save(image_file, ImageFormat.get_Png())
        print(f"{nb + 1:04} Saving image to {image_file}")
        images_list.append(image_file)
        idx += 1
        nb += 1

doc.Close()
print("\nDone")
print(f"{nb} images were saved.")


# In[16]:


print(f"Number of extracted images from the PDF file = {len(images_list)}")


# ## Viewing all the extracted images

# In[17]:


get_ipython().system('ls $images_dir -lh')


# In[18]:


get_ipython().run_cell_magic('javascript', 'Python', 'OutputArea.auto_scroll_threshold = 9999\n')


# In[19]:


for idx in range(len(images_list)):
    print(f"{idx+1:02} Extracted image file: {images_list[idx]}")
    display(PILimage.open(images_list[idx]))

print("\nEnd")


# ## Analysing these extracted images with GPT4 Vision

# In[20]:


def GPT4V_with_AzureAIVision(image_file, prompt, disp=False):
    """
    GPT-4 Turbo with vision and Azure AI enhancements
    """
    # Testing if image file exists
    if not os.path.exists(image_file):
        print(f"[Error] Image file {image_file} does not exist.")

    # Endpoint
    base_url = f"{openai.api_base}/openai/deployments/{model}"
    gpt4vision_endpoint = (
        f"{base_url}/extensions/chat/completions?api-version=2023-12-01-preview"
    )

    # Header
    headers = {"Content-Type": "application/json", "api-key": openai.api_key}

    # Encoded image
    base_64_encoded_image = base64.b64encode(open(image_file, "rb").read()).decode(
        "ascii"
    )

    # Context
    context = "You are an AI assistant. You analyse an image and make some answers based on a prompt. Always reply in English."

    # Payload
    json_data = {
        "model": "gpt-4-vision-preview",
        "enhancements": {"ocr": {"enabled": True}, "grounding": {"enabled": True}},
        "dataSources": [
            {
                "type": "AzureComputerVision",
                "endpoint": azure_aivision_endpoint,
                "key": azure_aivision_key,
                "indexName": indexname,
            }
        ],
        "messages": [
            {"role": "system", "content": context},
            {"role": "user", "content": [prompt, {"image": base_64_encoded_image}]},
        ],
        "max_tokens": 4000,
        "temperature": 0.2,
        "top_p": 1,
    }

    # Response
    response = requests.post(
        gpt4vision_endpoint, headers=headers, data=json.dumps(json_data)
    )

    # Testing the status code from the model response
    if response.status_code == 200:
        now = str(datetime.datetime.today().strftime("%d-%b-%Y %H:%M:%S"))
        print(f"Analysis of image: {image_file}")
        results = json.loads(response.text)
        print("\033[1;31;34m")
        print(results["choices"][0]["message"]["content"])

        prompt_tokens = results["usage"]["prompt_tokens"]
        completion_tokens = results["usage"]["completion_tokens"]
        total_tokens = results["usage"]["total_tokens"]

        print("\n\033[1;31;32mDone:", now)
        print(
            f"Prompt tokens = {prompt_tokens} | Completion tokens = {completion_tokens} \
| Total tokens = {total_tokens}"
        )
        print("\n[Note] These results are generated by an AI")
        print("\033[0m")

        if disp:
            return results

    elif response.status_code == 429:
        print(
            "[429 Error] Too many requests. Please wait a couple of seconds and try again.\n"
        )
        print(json.loads(response.text))

    else:
        print(f"[Error] Error code: {response.status_code}\n")
        print(json.loads(response.text))


# In[22]:


print("Analysing all extracted images with Azure Open AI GPT4 Turbo with Vision\n")

for idx in range(len(images_list)):
    # Print image filename
    print(f"{idx+1:02} Extracted image file: {images_list[idx]}")
    # Display image
    display(PILimage.open(images_list[idx]))
    # Run the GPT4 Turbo Vision model
    GPT4V_with_AzureAIVision(images_list[idx], "Describe this in one line and generate some hashtags and emojis.")
    # Pause to avoid 429 errors
    time.sleep(30)

print("\nEnd")


# In[ ]: