Multi_AI_Agent / app.py
SHIKARICHACHA's picture
Update app.py
7bdc965 verified
import gradio as gr
import os
from openai import OpenAI
import json
import time
# OpenRouter API key
OPENROUTER_API_KEY = "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"
# Available models categorized by type
TEXT_MODELS = {
"Mistral Small": "mistralai/mistral-small-3.2-24b-instruct:free",
"Claude 3 Haiku": "anthropic/claude-3-haiku",
"Mistral 3.1": "mistralai/mistral-small-3.1-24b-instruct:free",
"Gemma": "google/gemma-3-4b-it:free",
}
# Available image models
IMAGE_MODELS = {
"Kimi Vision": "moonshotai/kimi-vl-a3b-thinking:free",
"Qwen Vision": "qwen/qwen2.5-vl-32b-instruct:free",
"Gemma Vision": "google/gemma-3-4b-it:free",
"Llama 3 Vision": "meta-llama/llama-3.2-11b-vision-instruct:free",
}
# Token usage tracking
token_usage = {
"total_input_tokens": 0,
"total_output_tokens": 0,
"model_usage": {}
}
# Initialize chat history
history = []
# Helper function to convert image to base64
import base64
from PIL import Image
import io
def image_to_base64(image):
"""Convert an image to base64 encoding"""
# If image is a file path
if isinstance(image, str):
with open(image, "rb") as img_file:
return base64.b64encode(img_file.read()).decode()
# If image is already a PIL Image
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode()
def chat_with_ai(message, model_name, history, image=None):
"""Chat with the selected OpenRouter model"""
try:
# Initialize OpenAI client with OpenRouter base URL
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=OPENROUTER_API_KEY,
)
# Format the conversation history for the API
messages = []
for human, assistant in history:
messages.append({"role": "user", "content": human})
messages.append({"role": "assistant", "content": assistant})
# Check if we're using an image model and have an image
if model_name in IMAGE_MODELS and image is not None:
# Convert image to base64
img_base64 = image_to_base64(image)
# Add the current message with image
messages.append({
"role": "user",
"content": [
{
"type": "text",
"text": message
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{img_base64}"
}
}
]
})
else:
# Add the current message (text only)
messages.append({"role": "user", "content": message})
# Get the model ID based on the selected model name
model_id = TEXT_MODELS.get(model_name) or IMAGE_MODELS.get(model_name)
# Create the completion request
start_time = time.time()
completion = client.chat.completions.create(
extra_headers={
"HTTP-Referer": "https://gradio-openrouter-interface.com",
"X-Title": "Gradio OpenRouter Interface",
},
model=model_id,
messages=messages
)
end_time = time.time()
# Update token usage statistics
input_tokens = completion.usage.prompt_tokens
output_tokens = completion.usage.completion_tokens
token_usage["total_input_tokens"] += input_tokens
token_usage["total_output_tokens"] += output_tokens
if model_name not in token_usage["model_usage"]:
token_usage["model_usage"][model_name] = {
"input_tokens": 0,
"output_tokens": 0,
"requests": 0,
"avg_response_time": 0
}
# Update model-specific usage
model_stats = token_usage["model_usage"][model_name]
model_stats["input_tokens"] += input_tokens
model_stats["output_tokens"] += output_tokens
model_stats["requests"] += 1
# Calculate running average of response time
response_time = end_time - start_time
model_stats["avg_response_time"] = ((model_stats["avg_response_time"] * (model_stats["requests"] - 1)) + response_time) / model_stats["requests"]
# Return the model's response
return completion.choices[0].message.content
except Exception as e:
return f"Error: {str(e)}"
# Function to generate token usage report
def generate_usage_report():
report = f"### Token Usage Statistics\n\n"
report += f"**Total Input Tokens:** {token_usage['total_input_tokens']}\n"
report += f"**Total Output Tokens:** {token_usage['total_output_tokens']}\n\n"
report += "### Model-Specific Usage\n\n"
report += "| Model | Input Tokens | Output Tokens | Requests | Avg Response Time (s) |\n"
report += "|-------|--------------|---------------|----------|----------------------|\n"
for model, stats in token_usage["model_usage"].items():
report += f"| {model} | {stats['input_tokens']} | {stats['output_tokens']} | {stats['requests']} | {stats['avg_response_time']:.2f} |\n"
return report
# Create the Gradio interface
with gr.Blocks(title="OpenRouter AI Multi-Modal Interface", css="style.css") as demo:
gr.Markdown(
"""
# 🤖 OpenRouter AI Multi-Modal Interface
Chat with multiple AI models powered by OpenRouter API - both text-only and vision models!
*Select your model type (Text or Image), choose a specific model, and start interacting!*
**All responses from image models will be provided in English**
"""
)
with gr.Row():
with gr.Column(scale=4):
# Chat interface
chatbot = gr.Chatbot(height=500, label="Conversation")
# Image upload (initially hidden)
image_input = gr.Image(type="pil", label="Upload Image for Analysis", visible=False)
msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
with gr.Row():
submit_btn = gr.Button("Send", variant="primary")
clear_btn = gr.Button("Clear Chat")
# Instructions for image models
image_instructions = gr.Markdown(
"""
### 📷 Image Analysis Instructions
1. Upload an image using the panel above
2. Ask a question about the image
3. The AI will analyze the image and respond in English
**Example prompts:**
- "What's in this image?"
- "Describe this scene in detail"
- "What objects can you identify?"
- "What's happening in this picture?"
""",
visible=False
)
with gr.Column(scale=1):
# Model selection
with gr.Group():
model_type = gr.Radio(
choices=["Text Models", "Image Models"],
value="Text Models",
label="Model Type",
info="Choose between text-only or vision models"
)
# Function to update model dropdown based on selection
def update_model_choices(model_type):
if model_type == "Text Models":
return {
model_dropdown: gr.update(choices=list(TEXT_MODELS.keys()), value="Mistral Small"),
image_input: gr.update(visible=False),
image_instructions: gr.update(visible=False)
}
else: # Image Models
return {
model_dropdown: gr.update(choices=list(IMAGE_MODELS.keys()), value="Kimi Vision"),
image_input: gr.update(visible=True),
image_instructions: gr.update(visible=True)
}
model_dropdown = gr.Dropdown(
choices=list(TEXT_MODELS.keys()),
value="Mistral Small",
label="Select AI Model",
info="Choose from different AI models"
)
# Connect the radio button to update the dropdown and show/hide image components
model_type.change(fn=update_model_choices, inputs=model_type, outputs=[model_dropdown, image_input, image_instructions])
with gr.Tabs():
with gr.TabItem("Text Models"):
gr.Markdown(
"""
### Available Text Models
- **Mistral Small**: Powerful language model from Mistral AI
- **Mistral 3.1**: Earlier version of Mistral's model
- **Gemma**: Google's lightweight language model
- **Llama 3**: Meta's large language model
"""
)
with gr.TabItem("Image Models"):
gr.Markdown(
"""
### Available Image Models
- **Kimi Vision**: Moonshot AI's vision-language model
- **Qwen Vision**: Alibaba's vision-language model
- **Gemma Vision**: Google's lightweight vision model
- **Llama 3 Vision**: Meta's vision-language model
*Note: All responses will be provided in English*
"""
)
# Token usage statistics
usage_stats = gr.Markdown("### Token Usage Statistics\n\nNo usage data yet.")
refresh_stats_btn = gr.Button("Refresh Usage Stats")
# Set up the submit action
def respond(message, chat_history, model, image, model_type):
if not message.strip():
return "", chat_history
# Check if we need to use image
use_image = model_type == "Image Models" and image is not None
# Add a note if using image model but no image uploaded
if model_type == "Image Models" and image is None:
return "", chat_history + [(message, "Please upload an image first before sending your message.")]
# Process with or without image
if use_image:
bot_message = chat_with_ai(message, model, chat_history, image)
else:
bot_message = chat_with_ai(message, model, chat_history)
chat_history.append((message, bot_message))
return "", chat_history
# Connect the components
submit_btn.click(
respond,
[msg, chatbot, model_dropdown, image_input, model_type],
[msg, chatbot]
)
msg.submit(
respond,
[msg, chatbot, model_dropdown, image_input, model_type],
[msg, chatbot]
)
# Clear chat and image
def clear_all():
return None, None
clear_btn.click(clear_all, None, [chatbot, image_input], queue=False)
# Update usage statistics
refresh_stats_btn.click(generate_usage_report, None, usage_stats)
# For Hugging Face Spaces compatibility
if __name__ == "__main__":
# Launch the interface
demo.launch(share=True)
else:
# For Hugging Face Spaces, we need to expose the app
app = demo.launch(share=False, show_api=False)