|
from io import BytesIO |
|
import json, re |
|
import os |
|
import base64 |
|
import requests |
|
|
|
from fastapi import FastAPI, UploadFile, File |
|
from fastapi.responses import JSONResponse |
|
from fastapi.middleware.cors import CORSMiddleware |
|
from PIL import Image |
|
|
|
app = FastAPI(title="GLM-4.1V-9B-Thinking") |
|
|
|
|
|
app.add_middleware( |
|
CORSMiddleware, |
|
allow_origins=["*"], |
|
allow_methods=["*"], |
|
allow_headers=["*"], |
|
) |
|
|
|
|
|
API_URL = "https://router.huggingface.co/v1/chat/completions" |
|
HEADERS = { |
|
"Authorization": f"Bearer {os.environ['access_token']}", |
|
"Content-Type": "application/json" |
|
} |
|
|
|
PROMPT = """ |
|
You are an AI assistant. Extract item names and their prices from the following image. |
|
|
|
Your task is to extract item names and their corresponding prices from the image provided. |
|
|
|
Return ONLY a clean JSON array in this format: |
|
[ |
|
{"item": "<item_name>", "price": "<price>"}, |
|
... |
|
] |
|
|
|
⚠️ Guidelines: |
|
- Do not include any explanation or text before/after the JSON. |
|
- Include only entries that have both item and price. |
|
- Preserve original spellings and formatting from the image. |
|
- If prices are written in ₹, Rs., or INR, keep the symbol as is. |
|
- Handle both packaged labels (like chips or snacks) and printed/handwritten menus. |
|
- If there are duplicates or unclear text, skip them. |
|
|
|
Only return the final JSON output, No explanation. |
|
|
|
Make sure each entry has both item and price, and preserve the original spelling. |
|
""" |
|
|
|
def resize_image(image: Image.Image, max_size=(1024, 1024)) -> Image.Image: |
|
image.thumbnail(max_size) |
|
return image |
|
|
|
|
|
async def encode_image_to_data_url(file: UploadFile=File(...)) -> str: |
|
|
|
image = Image.open(BytesIO(await file.read())) |
|
|
|
|
|
image = resize_image(image) |
|
|
|
|
|
buffered = BytesIO() |
|
|
|
image.save(buffered, quality=80, format=image.format) |
|
buffered.seek(0) |
|
image_bytes = buffered.getvalue() |
|
|
|
|
|
base64_image = base64.b64encode(image_bytes).decode("utf-8") |
|
mime_type = file.content_type |
|
|
|
return f"data:{mime_type};base64,{base64_image}" |
|
|
|
@app.get("/") |
|
def root(): |
|
return {"message": "GLM 4.1V API for menu extraction is running."} |
|
|
|
@app.post("/extract/") |
|
async def extract(file: UploadFile = File(...)): |
|
try: |
|
|
|
image_data_url = await encode_image_to_data_url(file) |
|
|
|
|
|
payload = { |
|
"model": "zai-org/GLM-4.1V-9B-Thinking:novita", |
|
|
|
|
|
|
|
"messages": [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{ |
|
"type": "text", |
|
"text": PROMPT |
|
}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": image_data_url |
|
} |
|
} |
|
] |
|
} |
|
] |
|
} |
|
|
|
|
|
response = requests.post(API_URL, headers=HEADERS, json=payload) |
|
|
|
result = response.json() |
|
print("result :", result) |
|
reply = result["choices"][0]["message"]["content"] |
|
|
|
except Exception as e: |
|
return JSONResponse(content={"error": str(e)}, status_code=400) |
|
|
|
match = re.search(r"\[\s*{.*?}\s*\]", reply, re.DOTALL) |
|
if match: |
|
json_str = match.group(0) |
|
try: |
|
items = json.loads(json_str) |
|
return JSONResponse(content={"menu_items": items}) |
|
except json.JSONDecodeError: |
|
return JSONResponse(status_code=500, content={"error": "Failed to parse JSON", "raw": json_str}) |
|
else: |
|
return JSONResponse(status_code=404, |
|
content={"error": "No JSON array found in response", "model_response": reply}) |
|
|