Spaces:
Sleeping
Sleeping
"""VLM Helper Functions.""" | |
import base64 | |
import numpy as np | |
from openai import OpenAI | |
class GPT4V: | |
"""GPT4V VLM.""" | |
def __init__(self, openai_api_key): | |
self.client = OpenAI(api_key=openai_api_key) | |
def query(self, prompt_seq, temperature=0, max_tokens=512): | |
"""Queries GPT-4V.""" | |
content = [] | |
for elem in prompt_seq: | |
if isinstance(elem, str): | |
content.append({'type': 'text', 'text': elem}) | |
elif isinstance(elem, np.ndarray): | |
base64_image_str = base64.b64encode(elem).decode('utf-8') | |
image_url = f'data:image/jpeg;base64,{base64_image_str}' | |
content.append({'type': 'image_url', 'image_url': {'url': image_url}}) | |
messages = [{'role': 'user', 'content': content}] | |
response = self.client.chat.completions.create( | |
model='gpt-4-vision-preview', | |
messages=messages, | |
temperature=temperature, | |
max_tokens=max_tokens | |
) | |
return response.choices[0].message.content | |
from azure_openai_gpt4o import call_llm | |
class GPT4Azure: | |
"""GPT4V VLM via Azure API""" | |
def __init__(self, openai_api_key): | |
""" | |
Dummy inteface: azure api_key is read from .env file - no need to pass it here | |
""" | |
def query(self, prompt_seq, temperature=0, max_tokens=512): | |
"""Queries GPT-4V.""" | |
content = [] | |
for elem in prompt_seq: | |
if isinstance(elem, str): | |
content.append({'type': 'text', 'text': elem}) | |
elif isinstance(elem, np.ndarray): | |
base64_image_str = base64.b64encode(elem).decode('utf-8') | |
image_url = f'data:image/jpeg;base64,{base64_image_str}' | |
content.append({'type': 'image_url', 'image_url': {'url': image_url}}) | |
messages = [{'role': 'user', 'content': content}] | |
response = call_llm(messages, azure_deployment_model = None, max_tokens=max_tokens, temperature=temperature) | |
return response | |