Dekel Cohen
Project is working!
db347cc
"""VLM Helper Functions."""
import base64
import numpy as np
from openai import OpenAI
class GPT4V:
"""GPT4V VLM."""
def __init__(self, openai_api_key):
self.client = OpenAI(api_key=openai_api_key)
def query(self, prompt_seq, temperature=0, max_tokens=512):
"""Queries GPT-4V."""
content = []
for elem in prompt_seq:
if isinstance(elem, str):
content.append({'type': 'text', 'text': elem})
elif isinstance(elem, np.ndarray):
base64_image_str = base64.b64encode(elem).decode('utf-8')
image_url = f'data:image/jpeg;base64,{base64_image_str}'
content.append({'type': 'image_url', 'image_url': {'url': image_url}})
messages = [{'role': 'user', 'content': content}]
response = self.client.chat.completions.create(
model='gpt-4-vision-preview',
messages=messages,
temperature=temperature,
max_tokens=max_tokens
)
return response.choices[0].message.content
from azure_openai_gpt4o import call_llm
class GPT4Azure:
"""GPT4V VLM via Azure API"""
def __init__(self, openai_api_key):
"""
Dummy inteface: azure api_key is read from .env file - no need to pass it here
"""
def query(self, prompt_seq, temperature=0, max_tokens=512):
"""Queries GPT-4V."""
content = []
for elem in prompt_seq:
if isinstance(elem, str):
content.append({'type': 'text', 'text': elem})
elif isinstance(elem, np.ndarray):
base64_image_str = base64.b64encode(elem).decode('utf-8')
image_url = f'data:image/jpeg;base64,{base64_image_str}'
content.append({'type': 'image_url', 'image_url': {'url': image_url}})
messages = [{'role': 'user', 'content': content}]
response = call_llm(messages, azure_deployment_model = None, max_tokens=max_tokens, temperature=temperature)
return response