Spaces:

AhmadA82
/

coder-demo

Sleeping

App Files Files Community

AhmadA82 commited on 6 days ago

Commit

bf1e634

verified ·

1 Parent(s): 62b67dd

use-cpu

Browse files

Files changed (2) hide show

Dockerfile +2 -1
app.py +131 -1

Dockerfile CHANGED Viewed

@@ -24,4 +24,5 @@ RUN pip install --no-cache-dir -r requirements.txt
 USER user
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 USER user
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
+EXPOSE 7860

app.py CHANGED Viewed

@@ -1,3 +1,11 @@
 # app.py
 import os
 import json
@@ -7,6 +15,28 @@ import threading
 from pathlib import Path
 from typing import List, Dict, Any, Tuple
 import numpy as np
 import faiss
 import pickle
@@ -400,6 +430,15 @@ def build_chat_prompt(history: List[List[str]], message: str, extra: str = "") -
 # =========================
 # FastAPI
 # =========================
 app = FastAPI(title="AI Code Analyst")
 # --- Root endpoint for Hugging Face health checks and simple UI ---
@@ -639,4 +678,95 @@ def chat(req: ChatRequest):
     updated = (history + [[req.message, response_text]])[-8:]
     save_history(req.session_id, updated)
-    return ChatResponse(response=response_text, updated_history=updated)

+# --- HF model lists ---
+THINKING_MODELS = ["Qwen/Qwen3-4B-Thinking-2507"]
+INSTRUCT_MODELS = ["Qwen/Qwen2.5-3B-Instruct", "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"]
+def _current_models():
+    return THINKING_MODELS if STATE.get("mode") == "thinking" else INSTRUCT_MODELS
 # app.py
 import os
 import json
 from pathlib import Path
 from typing import List, Dict, Any, Tuple
+# --- Model mode state (thinking/instruct) with simple persistence ---
+from pathlib import Path
+APP_DIR = Path(__file__).parent
+DATA_DIR = APP_DIR / "data"
+DATA_DIR.mkdir(parents=True, exist_ok=True)
+STATE_PATH = DATA_DIR / "state.json"
+def _load_state():
+    if STATE_PATH.exists():
+        try:
+            return json.loads(STATE_PATH.read_text(encoding="utf-8"))
+        except Exception:
+            pass
+    return {"mode": "instruct"}
+def _save_state(s: dict):
+    STATE_PATH.write_text(json.dumps(s, ensure_ascii=False, indent=2), encoding="utf-8")
+STATE = _load_state()
 import numpy as np
 import faiss
 import pickle
 # =========================
 # FastAPI
 # =========================
+# --- Warm-up best effort on startup ---
+try:
+    _ = _hf_call_single(_current_models()[0], "ping", 1, timeout_s=30)
+    logging.info("[HF] Warm-up OK for %s", _current_models()[0])
+except Exception as e:
+    logging.warning("[HF] Warm-up failed: %s", e)
 app = FastAPI(title="AI Code Analyst")
 # --- Root endpoint for Hugging Face health checks and simple UI ---
     updated = (history + [[req.message, response_text]])[-8:]
     save_history(req.session_id, updated)
+    return ChatResponse(response=response_text, updated_history=updated)
+# --- Robust HF Inference with retries/fallback/warmup ---
+import requests, time
+HF_API_URL = "https://api-inference.huggingface.co/models/{model}"
+HF_TOKEN = os.getenv("HF_TOKEN","")
+def _hf_headers():
+    hdr = {"Accept":"application/json"}
+    if HF_TOKEN:
+        hdr["Authorization"] = f"Bearer {HF_TOKEN}"
+    return hdr
+def _hf_call_single(model: str, prompt: str, max_new_tokens: int = 256, timeout_s: int = 60) -> str:
+    url = HF_API_URL.format(model=model)
+    payload = {
+        "inputs": prompt,
+        "parameters": {"max_new_tokens": max_new_tokens, "temperature": 0.3},
+        "options": {"wait_for_model": True, "use_cache": True}
+    }
+    tries, backoff = 0, 2
+    while True:
+        tries += 1
+        try:
+            r = requests.post(url, headers=_hf_headers(), json=payload, timeout=timeout_s)
+            if r.status_code == 503:
+                try:
+                    eta = r.json().get("estimated_time", 8)
+                except Exception:
+                    eta = 8
+                time.sleep(min(30, max(2, int(eta))))
+                continue
+            r.raise_for_status()
+            data = r.json()
+            if isinstance(data, list):
+                if data and isinstance(data[0], dict) and "generated_text" in data[0]:
+                    return data[0]["generated_text"]
+                if data and isinstance(data[0], dict) and "content" in data[0]:
+                    return data[0]["content"]
+            if isinstance(data, dict) and "generated_text" in data:
+                return data["generated_text"]
+            return json.dumps(data, ensure_ascii=False)
+        except requests.HTTPError as e:
+            status = getattr(e.response, "status_code", None)
+            if status in (502, 503, 504, 429) and tries < 3:
+                time.sleep(backoff); backoff *= 2; continue
+            try:
+                text = e.response.text
+            except Exception:
+                text = ""
+            raise RuntimeError(f"HF error {status} on {model}: {text}") from e
+        except requests.RequestException as e:
+            if tries < 3:
+                time.sleep(backoff); backoff *= 2; continue
+            raise RuntimeError(f"HF request failed on {model}: {e}") from e
+def call_hf_inference_robust(prompt: str, max_new_tokens: int = 256) -> str:
+    last_err = None
+    for m in _current_models():
+        try:
+            return _hf_call_single(m, prompt, max_new_tokens)
+        except Exception as e:
+            logging.warning(f"[HF] model {m} failed: {e}")
+            last_err = e
+            continue
+    raise RuntimeError(f"All HF models failed. Last: {last_err}")
+from fastapi import Body
+from pydantic import BaseModel
+class _SetModelIn(BaseModel):
+    mode: str  # 'thinking' or 'instruct'
+@app.post("/set-model")
+def set_model_endpoint(body: _SetModelIn):
+    mode = (body.mode or "").lower().strip()
+    if mode not in ("thinking","instruct"):
+        raise HTTPException(400, "mode must be 'thinking' or 'instruct'")
+    STATE["mode"] = mode
+    _save_state(STATE)
+    # Try warm-up immediately to inform user about readiness
+    try:
+        _ = _hf_call_single(_current_models()[0], "ping", 1, timeout_s=30)
+        warmed = True
+    except Exception:
+        warmed = False
+    return {"ok": True, "mode": mode, "models": _current_models(), "warmed": warmed}