pivot-prompt-demo

Sleeping

App Files Files Community

Dekel Cohen commited on Jun 12

Commit

db347cc

1 Parent(s): 3050102

Project is working!

Browse files

1) Added gpt-4o azure support

Files changed (4) hide show

.gitignore +1 -0
app.py +12 -3
azure_openai_gpt4o.py +92 -0
vlms.py +25 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.env

app.py CHANGED Viewed

@@ -3,7 +3,15 @@
 import gradio as gr
 import numpy as np
 from vip_runner import vip_runner
-from vlms import GPT4V
 # Adjust radius of annotations based on size of the image
 radius_per_pixel = 0.05
@@ -20,7 +28,7 @@ def run_vip(
     progress=gr.Progress(track_tqdm=False),
 ):
-  if not openai_api_key:
     return [], 'Must provide OpenAI API Key'
   if im is None:
     return [], 'Must specify image'
@@ -52,7 +60,8 @@ def run_vip(
       'robot': None,
   }
-  vlm = GPT4V(openai_api_key=openai_api_key)
   vip_gen = vip_runner(
       vlm,
       im,

 import gradio as gr
 import numpy as np
 from vip_runner import vip_runner
+USE_AZURE = True  # Set to False to use GPT-4V (OpenAI API)
+if USE_AZURE:
+    from vlms import GPT4Azure
+    GPT_WRAPPER_CLASSNAME = GPT4Azure
+else:
+    from vlms import GPT4V
+    GPT_WRAPPER_CLASSNAME = GPT4V
 # Adjust radius of annotations based on size of the image
 radius_per_pixel = 0.05
     progress=gr.Progress(track_tqdm=False),
 ):
+  if not openai_api_key and not USE_AZURE:
     return [], 'Must provide OpenAI API Key'
   if im is None:
     return [], 'Must specify image'
       'robot': None,
   }
+  # GPT4Azure or GPT4V - depend on flag USE_AZURE default to True
+  vlm = GPT_WRAPPER_CLASSNAME(openai_api_key=openai_api_key)
   vip_gen = vip_runner(
       vlm,
       im,

azure_openai_gpt4o.py ADDED Viewed

	@@ -0,0 +1,92 @@

+# -*- coding: utf-8 -*-
+# azure_openai.py
+import os
+import json
+import requests
+from dotenv import load_dotenv
+# Load environment variables from .env
+load_dotenv()
+def call_llm(messages, azure_deployment_model = None, max_tokens=2048, temperature=0.1):
+    """
+    Call Azure OpenAI's chat completion endpoint with the given messages and max_tokens.
+    Args:
+        azure_deployment_model - name of azure model deployment (not always gpt-4 as in openai)
+        messages (list): List of message objects for the conversation.
+        max_tokens (int): Maximum tokens for the response.
+        temperature : 0-1
+    Returns:
+        dict: The parsed JSON response from the LLM.
+    """
+    # Retrieve configuration variables from the environment
+    api_key = os.environ['AZURE_OPENAI_API_KEY']
+    azure_endpoint = os.environ['AZURE_OPENAI_ENDPOINT']
+    api_version = os.environ['AZURE_OPENAI_API_VERSION']
+    if azure_deployment_model is None:
+        azure_deployment_model = os.environ['AZURE_DEPLOYMENT_MODEL'] # default model
+    headers = {
+        "Content-Type": "application/json",
+        "api-key": api_key,
+    }
+    # Build the payload
+    payload = {
+        "messages": messages,
+        "max_tokens": max_tokens,
+        "temperature" : temperature,
+    }
+    # Construct the Azure OpenAI endpoint URL
+    GPT_ENDPOINT_URL = (
+        f"{azure_endpoint}/openai/deployments/{azure_deployment_model}"
+        f"/chat/completions?api-version={api_version}"
+    )
+    # Make the POST request
+    try:
+        response = requests.post(GPT_ENDPOINT_URL, headers=headers, json=payload)
+        response.raise_for_status()  # Raise an error for non-2xx responses
+    except requests.RequestException as e:
+        raise SystemExit(f"Failed to make the request. Error: {e}")
+    # Parse the JSON response
+    response_json = response.json()
+    # Extract the message content from the first choice
+    message_content = response_json["choices"][0]["message"]["content"]
+    # Convert the content string to a JSON object (if necessary)
+    #final_response = json.loads(message_content)
+    return message_content
+if __name__ == "__main__":
+    messages = [
+    {
+      "role": "system",
+      "content": [
+        {
+          "type": "text",
+          "text": "You are an expert NLP and Search AI assistant that helps people summarize and search for information"
+        }
+      ]
+    },
+    {
+        "role": "user",
+        "content": "<prompt - instructions + context text + first few shot example>",
+    },
+    {
+        "role": "assistant",
+        "content": "<expected answer for first few shot example>",
+    }]
+    response = call_llm(messages)
+    # Handle the response as needed (e.g., print or process)
+    print(response.json())

vlms.py CHANGED Viewed

@@ -31,3 +31,28 @@ class GPT4V:
     )
     return response.choices[0].message.content

     )
     return response.choices[0].message.content
+from azure_openai_gpt4o import call_llm
+class GPT4Azure:
+  """GPT4V VLM via Azure API"""
+  def __init__(self, openai_api_key):
+    """
+    Dummy inteface: azure api_key is read from .env file - no need to pass it here
+    """
+  def query(self, prompt_seq, temperature=0, max_tokens=512):
+    """Queries GPT-4V."""
+    content = []
+    for elem in prompt_seq:
+      if isinstance(elem, str):
+        content.append({'type': 'text', 'text': elem})
+      elif isinstance(elem, np.ndarray):
+        base64_image_str = base64.b64encode(elem).decode('utf-8')
+        image_url = f'data:image/jpeg;base64,{base64_image_str}'
+        content.append({'type': 'image_url', 'image_url': {'url': image_url}})
+    messages = [{'role': 'user', 'content': content}]
+    response = call_llm(messages, azure_deployment_model = None, max_tokens=max_tokens, temperature=temperature)
+    return response