Dekel Cohen commited on
Commit
db347cc
·
1 Parent(s): 3050102

Project is working!

Browse files

1) Added gpt-4o azure support

Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +12 -3
  3. azure_openai_gpt4o.py +92 -0
  4. vlms.py +25 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *.env
app.py CHANGED
@@ -3,7 +3,15 @@
3
  import gradio as gr
4
  import numpy as np
5
  from vip_runner import vip_runner
6
- from vlms import GPT4V
 
 
 
 
 
 
 
 
7
 
8
  # Adjust radius of annotations based on size of the image
9
  radius_per_pixel = 0.05
@@ -20,7 +28,7 @@ def run_vip(
20
  progress=gr.Progress(track_tqdm=False),
21
  ):
22
 
23
- if not openai_api_key:
24
  return [], 'Must provide OpenAI API Key'
25
  if im is None:
26
  return [], 'Must specify image'
@@ -52,7 +60,8 @@ def run_vip(
52
  'robot': None,
53
  }
54
 
55
- vlm = GPT4V(openai_api_key=openai_api_key)
 
56
  vip_gen = vip_runner(
57
  vlm,
58
  im,
 
3
  import gradio as gr
4
  import numpy as np
5
  from vip_runner import vip_runner
6
+
7
+ USE_AZURE = True # Set to False to use GPT-4V (OpenAI API)
8
+ if USE_AZURE:
9
+ from vlms import GPT4Azure
10
+ GPT_WRAPPER_CLASSNAME = GPT4Azure
11
+ else:
12
+ from vlms import GPT4V
13
+ GPT_WRAPPER_CLASSNAME = GPT4V
14
+
15
 
16
  # Adjust radius of annotations based on size of the image
17
  radius_per_pixel = 0.05
 
28
  progress=gr.Progress(track_tqdm=False),
29
  ):
30
 
31
+ if not openai_api_key and not USE_AZURE:
32
  return [], 'Must provide OpenAI API Key'
33
  if im is None:
34
  return [], 'Must specify image'
 
60
  'robot': None,
61
  }
62
 
63
+ # GPT4Azure or GPT4V - depend on flag USE_AZURE default to True
64
+ vlm = GPT_WRAPPER_CLASSNAME(openai_api_key=openai_api_key)
65
  vip_gen = vip_runner(
66
  vlm,
67
  im,
azure_openai_gpt4o.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ # azure_openai.py
4
+ import os
5
+ import json
6
+ import requests
7
+ from dotenv import load_dotenv
8
+
9
+ # Load environment variables from .env
10
+ load_dotenv()
11
+
12
+ def call_llm(messages, azure_deployment_model = None, max_tokens=2048, temperature=0.1):
13
+ """
14
+ Call Azure OpenAI's chat completion endpoint with the given messages and max_tokens.
15
+
16
+ Args:
17
+ azure_deployment_model - name of azure model deployment (not always gpt-4 as in openai)
18
+ messages (list): List of message objects for the conversation.
19
+ max_tokens (int): Maximum tokens for the response.
20
+ temperature : 0-1
21
+
22
+
23
+ Returns:
24
+ dict: The parsed JSON response from the LLM.
25
+ """
26
+ # Retrieve configuration variables from the environment
27
+ api_key = os.environ['AZURE_OPENAI_API_KEY']
28
+ azure_endpoint = os.environ['AZURE_OPENAI_ENDPOINT']
29
+ api_version = os.environ['AZURE_OPENAI_API_VERSION']
30
+ if azure_deployment_model is None:
31
+ azure_deployment_model = os.environ['AZURE_DEPLOYMENT_MODEL'] # default model
32
+
33
+
34
+ headers = {
35
+ "Content-Type": "application/json",
36
+ "api-key": api_key,
37
+ }
38
+
39
+ # Build the payload
40
+ payload = {
41
+ "messages": messages,
42
+ "max_tokens": max_tokens,
43
+ "temperature" : temperature,
44
+ }
45
+
46
+ # Construct the Azure OpenAI endpoint URL
47
+ GPT_ENDPOINT_URL = (
48
+ f"{azure_endpoint}/openai/deployments/{azure_deployment_model}"
49
+ f"/chat/completions?api-version={api_version}"
50
+ )
51
+
52
+ # Make the POST request
53
+ try:
54
+ response = requests.post(GPT_ENDPOINT_URL, headers=headers, json=payload)
55
+ response.raise_for_status() # Raise an error for non-2xx responses
56
+ except requests.RequestException as e:
57
+ raise SystemExit(f"Failed to make the request. Error: {e}")
58
+
59
+
60
+ # Parse the JSON response
61
+ response_json = response.json()
62
+
63
+ # Extract the message content from the first choice
64
+ message_content = response_json["choices"][0]["message"]["content"]
65
+
66
+ # Convert the content string to a JSON object (if necessary)
67
+ #final_response = json.loads(message_content)
68
+
69
+ return message_content
70
+
71
+ if __name__ == "__main__":
72
+ messages = [
73
+ {
74
+ "role": "system",
75
+ "content": [
76
+ {
77
+ "type": "text",
78
+ "text": "You are an expert NLP and Search AI assistant that helps people summarize and search for information"
79
+ }
80
+ ]
81
+ },
82
+ {
83
+ "role": "user",
84
+ "content": "<prompt - instructions + context text + first few shot example>",
85
+ },
86
+ {
87
+ "role": "assistant",
88
+ "content": "<expected answer for first few shot example>",
89
+ }]
90
+ response = call_llm(messages)
91
+ # Handle the response as needed (e.g., print or process)
92
+ print(response.json())
vlms.py CHANGED
@@ -31,3 +31,28 @@ class GPT4V:
31
  )
32
 
33
  return response.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  )
32
 
33
  return response.choices[0].message.content
34
+
35
+ from azure_openai_gpt4o import call_llm
36
+ class GPT4Azure:
37
+ """GPT4V VLM via Azure API"""
38
+
39
+ def __init__(self, openai_api_key):
40
+ """
41
+ Dummy inteface: azure api_key is read from .env file - no need to pass it here
42
+ """
43
+
44
+ def query(self, prompt_seq, temperature=0, max_tokens=512):
45
+ """Queries GPT-4V."""
46
+ content = []
47
+ for elem in prompt_seq:
48
+ if isinstance(elem, str):
49
+ content.append({'type': 'text', 'text': elem})
50
+ elif isinstance(elem, np.ndarray):
51
+ base64_image_str = base64.b64encode(elem).decode('utf-8')
52
+ image_url = f'data:image/jpeg;base64,{base64_image_str}'
53
+ content.append({'type': 'image_url', 'image_url': {'url': image_url}})
54
+
55
+ messages = [{'role': 'user', 'content': content}]
56
+
57
+ response = call_llm(messages, azure_deployment_model = None, max_tokens=max_tokens, temperature=temperature)
58
+ return response