Spaces:
Running
Running
import gradio as gr | |
import os | |
from openai import OpenAI | |
import json | |
import time | |
# OpenRouter API key | |
OPENROUTER_API_KEY = "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" | |
# Available models categorized by type | |
TEXT_MODELS = { | |
"Mistral Small": "mistralai/mistral-small-3.2-24b-instruct:free", | |
"Claude 3 Haiku": "anthropic/claude-3-haiku", | |
"Mistral 3.1": "mistralai/mistral-small-3.1-24b-instruct:free", | |
"Gemma": "google/gemma-3-4b-it:free", | |
} | |
# Available image models | |
IMAGE_MODELS = { | |
"Kimi Vision": "moonshotai/kimi-vl-a3b-thinking:free", | |
"Qwen Vision": "qwen/qwen2.5-vl-32b-instruct:free", | |
"Gemma Vision": "google/gemma-3-4b-it:free", | |
"Llama 3 Vision": "meta-llama/llama-3.2-11b-vision-instruct:free", | |
} | |
# Token usage tracking | |
token_usage = { | |
"total_input_tokens": 0, | |
"total_output_tokens": 0, | |
"model_usage": {} | |
} | |
# Initialize chat history | |
history = [] | |
# Helper function to convert image to base64 | |
import base64 | |
from PIL import Image | |
import io | |
def image_to_base64(image): | |
"""Convert an image to base64 encoding""" | |
# If image is a file path | |
if isinstance(image, str): | |
with open(image, "rb") as img_file: | |
return base64.b64encode(img_file.read()).decode() | |
# If image is already a PIL Image | |
buffered = io.BytesIO() | |
image.save(buffered, format="JPEG") | |
return base64.b64encode(buffered.getvalue()).decode() | |
def chat_with_ai(message, model_name, history, image=None): | |
"""Chat with the selected OpenRouter model""" | |
try: | |
# Initialize OpenAI client with OpenRouter base URL | |
client = OpenAI( | |
base_url="https://openrouter.ai/api/v1", | |
api_key=OPENROUTER_API_KEY, | |
) | |
# Format the conversation history for the API | |
messages = [] | |
for human, assistant in history: | |
messages.append({"role": "user", "content": human}) | |
messages.append({"role": "assistant", "content": assistant}) | |
# Check if we're using an image model and have an image | |
if model_name in IMAGE_MODELS and image is not None: | |
# Convert image to base64 | |
img_base64 = image_to_base64(image) | |
# Add the current message with image | |
messages.append({ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "text", | |
"text": message | |
}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/jpeg;base64,{img_base64}" | |
} | |
} | |
] | |
}) | |
else: | |
# Add the current message (text only) | |
messages.append({"role": "user", "content": message}) | |
# Get the model ID based on the selected model name | |
model_id = TEXT_MODELS.get(model_name) or IMAGE_MODELS.get(model_name) | |
# Create the completion request | |
start_time = time.time() | |
completion = client.chat.completions.create( | |
extra_headers={ | |
"HTTP-Referer": "https://gradio-openrouter-interface.com", | |
"X-Title": "Gradio OpenRouter Interface", | |
}, | |
model=model_id, | |
messages=messages | |
) | |
end_time = time.time() | |
# Update token usage statistics | |
input_tokens = completion.usage.prompt_tokens | |
output_tokens = completion.usage.completion_tokens | |
token_usage["total_input_tokens"] += input_tokens | |
token_usage["total_output_tokens"] += output_tokens | |
if model_name not in token_usage["model_usage"]: | |
token_usage["model_usage"][model_name] = { | |
"input_tokens": 0, | |
"output_tokens": 0, | |
"requests": 0, | |
"avg_response_time": 0 | |
} | |
# Update model-specific usage | |
model_stats = token_usage["model_usage"][model_name] | |
model_stats["input_tokens"] += input_tokens | |
model_stats["output_tokens"] += output_tokens | |
model_stats["requests"] += 1 | |
# Calculate running average of response time | |
response_time = end_time - start_time | |
model_stats["avg_response_time"] = ((model_stats["avg_response_time"] * (model_stats["requests"] - 1)) + response_time) / model_stats["requests"] | |
# Return the model's response | |
return completion.choices[0].message.content | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# Function to generate token usage report | |
def generate_usage_report(): | |
report = f"### Token Usage Statistics\n\n" | |
report += f"**Total Input Tokens:** {token_usage['total_input_tokens']}\n" | |
report += f"**Total Output Tokens:** {token_usage['total_output_tokens']}\n\n" | |
report += "### Model-Specific Usage\n\n" | |
report += "| Model | Input Tokens | Output Tokens | Requests | Avg Response Time (s) |\n" | |
report += "|-------|--------------|---------------|----------|----------------------|\n" | |
for model, stats in token_usage["model_usage"].items(): | |
report += f"| {model} | {stats['input_tokens']} | {stats['output_tokens']} | {stats['requests']} | {stats['avg_response_time']:.2f} |\n" | |
return report | |
# Create the Gradio interface | |
with gr.Blocks(title="OpenRouter AI Multi-Modal Interface", css="style.css") as demo: | |
gr.Markdown( | |
""" | |
# 🤖 OpenRouter AI Multi-Modal Interface | |
Chat with multiple AI models powered by OpenRouter API - both text-only and vision models! | |
*Select your model type (Text or Image), choose a specific model, and start interacting!* | |
**All responses from image models will be provided in English** | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(scale=4): | |
# Chat interface | |
chatbot = gr.Chatbot(height=500, label="Conversation") | |
# Image upload (initially hidden) | |
image_input = gr.Image(type="pil", label="Upload Image for Analysis", visible=False) | |
msg = gr.Textbox(label="Your message", placeholder="Type your message here...") | |
with gr.Row(): | |
submit_btn = gr.Button("Send", variant="primary") | |
clear_btn = gr.Button("Clear Chat") | |
# Instructions for image models | |
image_instructions = gr.Markdown( | |
""" | |
### 📷 Image Analysis Instructions | |
1. Upload an image using the panel above | |
2. Ask a question about the image | |
3. The AI will analyze the image and respond in English | |
**Example prompts:** | |
- "What's in this image?" | |
- "Describe this scene in detail" | |
- "What objects can you identify?" | |
- "What's happening in this picture?" | |
""", | |
visible=False | |
) | |
with gr.Column(scale=1): | |
# Model selection | |
with gr.Group(): | |
model_type = gr.Radio( | |
choices=["Text Models", "Image Models"], | |
value="Text Models", | |
label="Model Type", | |
info="Choose between text-only or vision models" | |
) | |
# Function to update model dropdown based on selection | |
def update_model_choices(model_type): | |
if model_type == "Text Models": | |
return { | |
model_dropdown: gr.update(choices=list(TEXT_MODELS.keys()), value="Mistral Small"), | |
image_input: gr.update(visible=False), | |
image_instructions: gr.update(visible=False) | |
} | |
else: # Image Models | |
return { | |
model_dropdown: gr.update(choices=list(IMAGE_MODELS.keys()), value="Kimi Vision"), | |
image_input: gr.update(visible=True), | |
image_instructions: gr.update(visible=True) | |
} | |
model_dropdown = gr.Dropdown( | |
choices=list(TEXT_MODELS.keys()), | |
value="Mistral Small", | |
label="Select AI Model", | |
info="Choose from different AI models" | |
) | |
# Connect the radio button to update the dropdown and show/hide image components | |
model_type.change(fn=update_model_choices, inputs=model_type, outputs=[model_dropdown, image_input, image_instructions]) | |
with gr.Tabs(): | |
with gr.TabItem("Text Models"): | |
gr.Markdown( | |
""" | |
### Available Text Models | |
- **Mistral Small**: Powerful language model from Mistral AI | |
- **Mistral 3.1**: Earlier version of Mistral's model | |
- **Gemma**: Google's lightweight language model | |
- **Llama 3**: Meta's large language model | |
""" | |
) | |
with gr.TabItem("Image Models"): | |
gr.Markdown( | |
""" | |
### Available Image Models | |
- **Kimi Vision**: Moonshot AI's vision-language model | |
- **Qwen Vision**: Alibaba's vision-language model | |
- **Gemma Vision**: Google's lightweight vision model | |
- **Llama 3 Vision**: Meta's vision-language model | |
*Note: All responses will be provided in English* | |
""" | |
) | |
# Token usage statistics | |
usage_stats = gr.Markdown("### Token Usage Statistics\n\nNo usage data yet.") | |
refresh_stats_btn = gr.Button("Refresh Usage Stats") | |
# Set up the submit action | |
def respond(message, chat_history, model, image, model_type): | |
if not message.strip(): | |
return "", chat_history | |
# Check if we need to use image | |
use_image = model_type == "Image Models" and image is not None | |
# Add a note if using image model but no image uploaded | |
if model_type == "Image Models" and image is None: | |
return "", chat_history + [(message, "Please upload an image first before sending your message.")] | |
# Process with or without image | |
if use_image: | |
bot_message = chat_with_ai(message, model, chat_history, image) | |
else: | |
bot_message = chat_with_ai(message, model, chat_history) | |
chat_history.append((message, bot_message)) | |
return "", chat_history | |
# Connect the components | |
submit_btn.click( | |
respond, | |
[msg, chatbot, model_dropdown, image_input, model_type], | |
[msg, chatbot] | |
) | |
msg.submit( | |
respond, | |
[msg, chatbot, model_dropdown, image_input, model_type], | |
[msg, chatbot] | |
) | |
# Clear chat and image | |
def clear_all(): | |
return None, None | |
clear_btn.click(clear_all, None, [chatbot, image_input], queue=False) | |
# Update usage statistics | |
refresh_stats_btn.click(generate_usage_report, None, usage_stats) | |
# For Hugging Face Spaces compatibility | |
if __name__ == "__main__": | |
# Launch the interface | |
demo.launch(share=True) | |
else: | |
# For Hugging Face Spaces, we need to expose the app | |
app = demo.launch(share=False, show_api=False) | |