Spaces:

AhmadA82
/

coder-demo

Running

App Files Files Community

AhmadA82 commited on 6 days ago

Commit

62b67dd

verified ·

1 Parent(s): 6842ef0

fix-api

Browse files

Files changed (1) hide show

app.py +641 -612

app.py CHANGED Viewed

@@ -1,613 +1,642 @@
-# app.py
-import os
-import json
-import hashlib
-import logging
-import threading
-from pathlib import Path
-from typing import List, Dict, Any, Tuple
-import numpy as np
-import faiss
-import pickle
-import ast as python_ast
-from fastapi import FastAPI, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
-from sentence_transformers import SentenceTransformer
-from huggingface_hub import hf_hub_download, HfApi
-from monitor import get_current_metrics, start_monitoring_thread
-from memory import get_history, save_history
-# =========================
-# إعداد السجلّات
-# =========================
-logging.basicConfig(
-    level=logging.INFO,
-    format="🪵 [%(asctime)s] [%(levelname)s] %(message)s"
-)
-logger = logging.getLogger("app")
-# =========================
-# ثوابت ومسارات
-# =========================
-DATA_DIR = Path("data")
-CACHE_DIR = DATA_DIR / "cache"
-INDEX_DIR = DATA_DIR / "index"
-FILES_DIR = DATA_DIR / "files"           # تخزين النص الكامل لكل ملف
-REPORT_FILE = DATA_DIR / "analysis_report.md"
-GRAPH_FILE = DATA_DIR / "code_graph.json"
-EMB_FILE = INDEX_DIR / "embeddings.faiss"
-META_FILE = INDEX_DIR / "chunks.pkl"
-HASH_MAP_FILE = INDEX_DIR / "hash_map.json"
-for p in [DATA_DIR, CACHE_DIR, INDEX_DIR, FILES_DIR]:
-    p.mkdir(parents=True, exist_ok=True)
-# Env
-HF_TOKEN = os.getenv("HF_TOKEN", "")
-MODEL_REPO = os.getenv("MODEL_REPO", "Qwen/Qwen3-4B-Thinking-2507")
-# بدائل تلقائية عند فشل السيرفرلس (404/403)
-FALLBACK_MODELS = [
-    m.strip() for m in os.getenv(
-        "FALLBACK_MODELS",
-        "Qwen/Qwen2.5-7B-Instruct,Qwen/Qwen2.5-Coder-7B-Instruct"
-    ).split(",") if m.strip()
-]
-# GGUF المحلي (إن توفر)
-LOCAL_GGUF_REPO = os.getenv("LOCAL_GGUF_REPO", "Triangle104/Qwen3-8B-Q4_K_M-GGUF")
-LOCAL_GGUF_FILE = os.getenv("LOCAL_GGUF_FILE", "qwen3-8b-q4_k_m.gguf")
-LOCAL_GGUF_PATH = CACHE_DIR / LOCAL_GGUF_FILE
-# تضمين
-EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
-EMBED_DIM = int(os.getenv("EMBED_DIM", "384"))
-# تقسيم الشيفرة
-CHUNK_STEP = int(os.getenv("CHUNK_STEP", "40"))  # ✅ قابل للتهيئة
-MAX_FILE_BYTES = int(os.getenv("MAX_FILE_BYTES", str(10 * 1024 * 1024)))  # 10MB احتياطيًا
-SYSTEM_PROMPT = """<|im_start|>system
-You are a senior AI code analyst. Analyze projects with hybrid indexing (code graph + retrieval).
-Return structured, accurate, concise answers. Use Arabic + English labels in the final report.
-<|im_end|>"""
-# =========================
-# الحالة العالمية والقفل
-# =========================
-embed_model: SentenceTransformer | None = None
-faiss_index: faiss.Index | None = None
-all_chunks: List[Tuple[str, str]] = []  # (file_name, chunk_text)
-code_graph: Dict[str, Any] = {"files": {}}
-hash_map: Dict[str, str] = {}
-index_lock = threading.RLock()  # ✅ لتأمين الفهرسة/الاسترجاع
-# =========================
-# LLM (محلي/سحابي)
-# =========================
-try:
-    from llama_cpp import Llama
-except Exception:
-    Llama = None
-llm = None  # كائن النموذج المحلي إن توفر
-logger.info(f"HF_TOKEN length: {len(HF_TOKEN)}")  # تحقق من طول الtoken
-def load_local_model_if_configured():
-    """تحميل GGUF محليًا إن كان مفعّلًا."""
-    global llm
-    if Llama is None:
-        logger.warning("ℹ️ llama_cpp غير متوفر. سيتم الاعتماد على HF Inference عند الحاجة.")
-        return
-    if not LOCAL_GGUF_PATH.exists():
-        try:
-            logger.info(f"⬇️ تنزيل GGUF: {LOCAL_GGUF_REPO}/{LOCAL_GGUF_FILE}")
-            hf_hub_download(
-                repo_id=LOCAL_GGUF_REPO,
-                filename=LOCAL_GGUF_FILE,
-                local_dir=str(CACHE_DIR),
-                token=HF_TOKEN or None
-            )
-            logger.info("✅ تم تنزيل GGUF بنجاح.")
-        except Exception as e:
-            logger.error(f"❌ تعذر تنزيل GGUF: {str(e)}. السبب المحتمل: token غير صالح أو repo غير موجود.")
-            return
-    try:
-        llm = Llama(
-            model_path=str(LOCAL_GGUF_PATH),
-            n_ctx=int(os.getenv("N_CTX", "32768")),
-            rope_scaling={"type": "yarn", "factor": 4.0},
-            n_threads=int(os.getenv("N_THREADS", "2")),
-            n_gpu_layers=int(os.getenv("N_GPU_LAYERS", "0")),
-            n_batch=int(os.getenv("N_BATCH", "64")),
-            use_mlock=False,
-            verbose=False
-        )
-        logger.info("✅ تم تحميل النموذج المحلي (GGUF).")
-    except Exception as e:
-        llm = None
-        logger.error(f"❌ فشل تحميل النموذج المحلي: {str(e)}. السبب المحتمل: مشكلة في الملف أو التوافق.")
-def call_local_llm(prompt: str, max_tokens: int = 800) -> str:
-    if llm is None or Llama is None:
-        logger.warning("❌ النموذج المحلي غير متوفر.")
-        return ""
-    try:
-        res = llm(
-            prompt,
-            max_tokens=max_tokens,
-            temperature=0.4,
-            top_p=0.9,
-            stop=["<|im_end|>", "<|im_start|>"],
-            echo=False
-        )
-        logger.info("✅ رد ناجح من النموذج المحلي.")
-        return res["choices"][0]["text"].strip()
-    except Exception as e:
-        logger.error(f"❌ فشل استدعاء النموذج المحلي: {str(e)}. السبب المحتمل: مشكلة في التنفيذ أو الذاكرة.")
-        return ""
-def _call_hf_single_model(model_repo: str, prompt: str, max_new_tokens: int = 900) -> str:
-    import requests
-    if not HF_TOKEN:
-        logger.error("❌ HF_TOKEN غير معرف.")
-        raise RuntimeError("التوكن HF_TOKEN غير مضبوط ولا يوجد نموذج محلي.")
-    url = f"https://api-inference.huggingface.co/models/{model_repo}"
-    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
-    payload = {
-        "inputs": prompt,
-        "parameters": {
-            "max_new_tokens": max_new_tokens,
-            "temperature": 0.4,
-            "top_p": 0.9,
-            "return_full_text": False
-        }
-    }
-    r = requests.post(url, headers=headers, json=payload, timeout=120)
-    if r.status_code == 503:
-        data = {}
-        try: data = r.json()
-        except Exception: pass
-        eta = data.get("estimated_time")
-        raise RuntimeError("النموذج قيد التحميل من HF (503)." + (f" متوقع {eta:.0f}ث" if isinstance(eta, (int, float)) else ""))
-    try:
-        r.raise_for_status()
-    except requests.exceptions.HTTPError as e:
-        status = e.response.status_code
-        if status == 401: raise RuntimeError("التوكن مفقود أو غير صالح (401). تأكد من HF_TOKEN.")
-        if status == 403:
-            msg = ""
-            try: msg = (e.response.json().get("error") or "").lower()
-            except Exception: pass
-            if "gated" in msg or "accept" in msg:
-                raise RuntimeError("النموذج مسيَّج (403). يجب دخول صفحة النموذج والضغط على Accept.")
-            raise RuntimeError("صلاحية الوصول مرفوضة (403).")
-        if status == 404: raise RuntimeError("النموذج غير موجود أو غير متاح عبر السيرفرلس (404).")
-        if status == 429: raise RuntimeError("تم تجاوز الحد المسموح للطلبات (429). جرّب لاحقًا.")
-        try:
-            err = e.response.json()
-        except Exception:
-            err = {"error": e.response.text}
-        raise RuntimeError(f"خطأ HF ({status}): {err.get('error') or err}")
-    data = r.json()
-    if isinstance(data, list) and data and "generated_text" in data[0]:
-        return data[0]["generated_text"]
-    if isinstance(data, dict) and "generated_text" in data:
-        return data["generated_text"]
-    if isinstance(data, dict) and "error" in data:
-        raise RuntimeError(f"HF error: {data['error']}")
-    return json.dumps(data)
-def call_hf_inference(prompt: str, max_new_tokens: int = 900) -> str:
-    candidates = [MODEL_REPO] + [m for m in FALLBACK_MODELS if m != MODEL_REPO]
-    errors = []
-    for i, model in enumerate(candidates, start=1):
-        try:
-            if i == 1:
-                logger.info(f"🎯 استخدام الموديل الأساسي: {model}")
-            else:
-                logger.warning(f"↪️ تفعيل Fallback ({i-1}/{len(candidates)-1}): {model}")
-            out = _call_hf_single_model(model, prompt, max_new_tokens)
-            if model != MODEL_REPO:
-                logger.info(f"✅ تم الرد من الموديل الاحتياطي: {model}")
-            return out
-        except RuntimeError as e:
-            msg = str(e)
-            errors.append(f"{model}: {msg}")
-            if "404" in msg or "403" in msg:
-                continue
-            raise
-    raise RuntimeError("تعذّر استخدام الموديل الأساسي وكل البدائل (403/404). التفاصيل:\n- " + "\n- ".join(errors))
-def call_llm(prompt: str, max_tokens: int = 900) -> str:
-    if llm:
-        return call_local_llm(prompt, max_tokens)
-    else:
-        return call_hf_inference(prompt, max_tokens)
-# =========================
-# تهيئة التضمين والفهرس
-# =========================
-def load_embed_model():
-    global embed_model
-    if embed_model is None:
-        logger.info(f"⬇️ تحميل نموذج التضمين: {EMBED_MODEL_NAME}")
-        embed_model = SentenceTransformer(EMBED_MODEL_NAME, device="cpu")
-        logger.info("✅ نم��ذج التضمين جاهز.")
-def build_faiss_index():
-    global faiss_index
-    if faiss_index is None:
-        faiss_index = faiss.IndexFlatL2(EMBED_DIM)
-        logger.info("✅ فهرس FAISS جاهز.")
-def load_index():
-    with index_lock:
-        if EMB_FILE.exists() and META_FILE.exists():
-            faiss_index = faiss.read_index(str(EMB_FILE))
-            with META_FILE.open("rb") as f:
-                all_chunks = pickle.load(f)
-            if HASH_MAP_FILE.exists():
-                with HASH_MAP_FILE.open("r", encoding="utf-8") as f:
-                    hash_map = json.load(f)
-            logger.info(f"✅ تم تحميل الفهرس: {len(all_chunks)} أجزاء")
-            return True
-        return False
-def save_index():
-    with index_lock:
-        if faiss_index:
-            faiss.write_index(faiss_index, str(EMB_FILE))
-        with META_FILE.open("wb") as f:
-            pickle.dump(all_chunks, f)
-        with HASH_MAP_FILE.open("w", encoding="utf-8") as f:
-            json.dump(hash_map, f, ensure_ascii=False, indent=2)
-        logger.info("✅ تم حفظ الفهرس")
-# =========================
-# معالجة الملفات والأجزاء
-# =========================
-def chunk_code(text: str, step: int = CHUNK_STEP) -> List[str]:
-    lines = text.splitlines(keepends=True)
-    chunks = []
-    for i in range(0, len(lines), step // 2):
-        chunk = "".join(lines[i:i + step])
-        if chunk.strip():
-            chunks.append(chunk)
-    return chunks
-def compute_hash(text: str) -> str:
-    return hashlib.sha256(text.encode("utf-8")).hexdigest()[:16]
-def add_file_to_index(file_path: Path):
-    with index_lock:
-        try:
-            text = file_path.read_text(encoding="utf-8", errors="ignore")
-            if len(text.encode("utf-8")) > MAX_FILE_BYTES:
-                logger.warning(f"⚠️ ملف كبير جدًا: {file_path.name}، سيتم تجاهله")
-                return
-            current_hash = compute_hash(text)
-            if file_path.name in hash_map and hash_map[file_path.name] == current_hash:
-                return  # لا تغيير
-            chunks = chunk_code(text)
-            embeddings = embed_model.encode(chunks, normalize_embeddings=True)
-            for i, emb in enumerate(embeddings):
-                faiss_index.add(np.array([emb], dtype=np.float32))
-                all_chunks.append((file_path.name, chunks[i]))
-            hash_map[file_path.name] = current_hash
-            update_code_graph(file_path.name, text)
-            logger.info(f"✅ أضيف ملف: {file_path.name} ({len(chunks)} أجزاء)")
-        except Exception as e:
-            logger.warning(f"⚠️ فشل إضافة {file_path.name}: {e}")
-def remove_file_from_index(file_name: str):
-    with index_lock:
-        to_remove = [i for i, (fn, _) in enumerate(all_chunks) if fn == file_name]
-        if to_remove:
-            mask = np.ones(len(all_chunks), dtype=bool)
-            mask[to_remove] = False
-            remaining_embs = np.array([embed_model.encode(all_chunks[i][1]) for i in range(len(all_chunks)) if mask[i]])
-            faiss_index.reset()
-            faiss_index.add(remaining_embs)
-            all_chunks[:] = [all_chunks[i] for i in range(len(all_chunks)) if mask[i]]
-            code_graph["files"].pop(file_name, None)
-            hash_map.pop(file_name, None)
-            logger.info(f"🗑️ حذف ملف: {file_name}")
-# =========================
-# رسم الرسم البياني للشيفرة
-# =========================
-def update_code_graph(file_name: str, text: str):
-    try:
-        tree = python_ast.parse(text)
-        imports = [n.names[0].name for n in python_ast.iter_child_nodes(tree) if isinstance(n, python_ast.Import)]
-        functions = [n.name for n in python_ast.iter_child_nodes(tree) if isinstance(n, python_ast.FunctionDef)]
-        classes = [n.name for n in python_ast.iter_child_nodes(tree) if isinstance(n, python_ast.ClassDef)]
-        code_graph["files"][file_name] = {
-            "imports": imports,
-            "functions": functions,
-            "classes": classes
-        }
-        with GRAPH_FILE.open("w", encoding="utf-8") as f:
-            json.dump(code_graph, f, ensure_ascii=False, indent=2)
-    except Exception as e:
-        logger.warning(f"⚠️ فشل تحليل {file_name}: {e}")
-def render_graph_overview(max_nodes: int = 50) -> str:
-    overview = "رسم بياني للشيفرة:\n"
-    for fname, data in list(code_graph["files"].items())[:max_nodes]:
-        overview += f"- {fname}: {len(data['functions'])} دوال, {len(data['classes'])} فئات, imports: {', '.join(data['imports'][:3])}\n"
-    return overview
-# =========================
-# استرجاع وتحليل
-# =========================
-def retrieve(query: str, k: int = 5) -> List[Tuple[str, str, float]]:
-    load_embed_model()
-    build_faiss_index()
-    if not load_index():
-        rebuild_index_from_files()
-    q_emb = embed_model.encode([query], normalize_embeddings=True)
-    distances, indices = faiss_index.search(q_emb, k)
-    results = []
-    for i, idx in enumerate(indices[0]):
-        if idx != -1:
-            fname, chunk = all_chunks[idx]
-            results.append((fname, chunk, distances[0][i]))
-    return results
-def build_analysis_prompt(query: str, retrieved_docs: List[Tuple[str, str, float]]) -> str:
-    ctx = []
-    for fname, chunk, score in retrieved_docs:
-        ctx.append(f"From {fname} (score={score:.4f}):\n{chunk}")
-    extra = "\n\n[Context]\n" + "\n\n".join(ctx) + "\n\n" + render_graph_overview()
-    return (
-        f"<|im_start|>system\n{SYSTEM_PROMPT}\n<|im_end|>\n"
-        f"<|im_start|>user\n{query}\n{extra}\n<|im_end|>\n"
-        f"<|im_start|>assistant\n"
-    )
-def analyze_and_report_internal(session_id: str, query: str, k: int = 10) -> str:
-    retrieved_docs = retrieve(query, k=k)
-    prompt = build_analysis_prompt(query, retrieved_docs)
-    try:
-        report = call_llm(prompt, max_tokens=1500)
-        REPORT_FILE.write_text(report, encoding="utf-8")
-        return report
-    except Exception as e:
-        logger.error(f"❌ LLM error in analysis: {e}")
-        raise
-# =========================
-# بناء الـ Prompt للدردشة
-# =========================
-def build_chat_prompt(history: List[List[str]], message: str, extra: str = "") -> str:
-    prompt = f"<|im_start|>system\n{SYSTEM_PROMPT}\n<|im_end|>\n"
-    for user_msg, ai_msg in history:
-        prompt += f"<|im_start|>user\n{user_msg}\n<|im_end|>\n"
-        prompt += f"<|im_start|>assistant\n{ai_msg}\n<|im_end|>\n"
-    prompt += f"<|im_start|>user\n{message}\n{extra}\n<|im_end|>\n"
-    prompt += f"<|im_start|>assistant\n"
-    return prompt
-# =========================
-# FastAPI
-# =========================
-app = FastAPI(title="AI Code Analyst")
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# Endpoint جديد لفحص التوكن وصلاحية الوصول
-@app.get("/hf-check")
-def hf_check():
-    api = HfApi()
-    out = {
-        "token_set": bool(HF_TOKEN),
-        "token_valid": False,
-        "model_repo": MODEL_REPO,
-        "model_access": False,
-        "model_private": None,
-        "gated_hint": False,
-        "message": ""
-    }
-    if not HF_TOKEN:
-        out["message"] = "HF_TOKEN غير مضبوط."
-        return out
-    try:
-        me = api.whoami(token=HF_TOKEN)
-        out["token_valid"] = True
-        out["message"] = f"Token OK for user: {me.get('name')}"
-    except Exception as e:
-        out["message"] = f"Token check failed: {type(e).__name__}: {e}"
-        return out
-    try:
-        info = api.model_info(MODEL_REPO, token=HF_TOKEN)
-        out["model_access"] = True
-        out["model_private"] = getattr(info, "private", None)
-        out["message"] += f" | Model reachable: {info.modelId}"
-    except Exception as e:
-        msg = str(e).lower()
-        out["message"] += f" | Model access failed: {type(e).__name__}: {e}"
-        out["gated_hint"] = ("gated" in msg or "accept" in msg)
-    return out
-class UploadFilesRequest(BaseModel):
-    files: Dict[str, str]  # fname: content
-class DiffFilesRequest(BaseModel):
-    deleted: List[str]
-    modified: Dict[str, str]  # fname: new_content
-class AnalyzeAndReportRequest(BaseModel):
-    session_id: str
-    query: str
-    top_k: int | None = None
-class ChatRequest(BaseModel):
-    session_id: str
-    message: str
-class ChatResponse(BaseModel):
-    response: str
-    updated_history: List[List[str]]
-@app.on_event("startup")
-def startup_event():
-    load_embed_model()
-    build_faiss_index()
-    if not load_index():
-        logger.info("ℹ️ فهرس غير موجود، سيتم بناؤه من الملفات الحالية")
-        rebuild_index_from_files()
-    load_local_model_if_configured()
-    start_monitoring_thread()
-def rebuild_index_from_files():
-    with index_lock:
-        faiss_index.reset()
-        all_chunks.clear()
-        hash_map.clear()
-        code_graph["files"].clear()
-        for file_path in FILES_DIR.glob("**/*"):
-            if file_path.is_file() and not file_path.name.startswith("."):
-                add_file_to_index(file_path)
-        save_index()
-@app.get("/metrics")
-def metrics():
-    return get_current_metrics()
-@app.post("/upload-files")
-def upload_files(req: UploadFilesRequest):
-    added = []
-    for fname, content in req.files.items():
-        file_path = FILES_DIR / fname
-        file_path.parent.mkdir(parents=True, exist_ok=True)
-        file_path.write_text(content, encoding="utf-8")
-        add_file_to_index(file_path)
-        added.append(fname)
-    save_index()
-    return {"status": "ok", "added": added}
-@app.post("/diff-files")
-def diff_files(req: DiffFilesRequest):
-    for fname in req.deleted:
-        (FILES_DIR / fname).unlink(missing_ok=True)
-        remove_file_from_index(fname)
-    for fname, new_content in req.modified.items():
-        file_path = FILES_DIR / fname
-        file_path.parent.mkdir(parents=True, exist_ok=True)
-        file_path.write_text(new_content, encoding="utf-8")
-        add_file_to_index(file_path)
-    save_index()
-    return {
-        "status": "ok",
-        "deleted": req.deleted,
-        "modified": list(req.modified.keys()),
-        "total_index_vectors": int(faiss_index.ntotal) if faiss_index else 0
-    }
-@app.post("/analyze-and-report")
-def analyze_and_report(req: AnalyzeAndReportRequest):
-    report = analyze_and_report_internal(req.session_id, req.query, k=req.top_k or 10)
-    return {"status": "ok", "report_path": str(REPORT_FILE), "preview": report[:1200]}
-def classify_intent(history: List[List[str]], message: str) -> Dict[str, Any]:
-    inst = (
-        "أعد JSON فقط دون أي نص آخر.\n"
-        "المفاتيح: intent (string), confidence (0-1), action (RETRIEVE_ONLY|ANALYZE_AND_REPORT|TRACE_SUBSET|NONE), "
-        "targets (list of strings), reason (string).\n"
-        "أمثلة:\n"
-        "س: ما عمل الملف X؟ → {\"intent\":\"ASK_FILE_ROLE\",\"confidence\":0.9,\"action\":\"RETRIEVE_ONLY\",\"targets\":[\"X\"],\"reason\":\"...\"}\n"
-        "س: لماذا لا تعمل ميزة الدخول؟ → {\"intent\":\"WHY_FEATURE_NOT_WORKING\",\"confidence\":0.85,\"action\":\"ANALYZE_AND_REPORT\",\"targets\":[],\"reason\":\"...\"}\n"
-        "س: اين يُعرّف المتغير TOKEN وكيف يتغير؟ → {\"intent\":\"CODE_FLOW_TRACE\",\"confidence\":0.8,\"action\":\"TRACE_SUBSET\",\"targets\":[\"TOKEN\"],\"reason\":\"...\"}\n"
-    )
-    p = (
-        f"<|im_start|>system\n{SYSTEM_PROMPT}\n<|im_end|>\n"
-        f"<|im_start|>user\n{inst}\nالسؤال: {message}\nأعد JSON فقط.\n<|im_end|>\n"
-        f"<|im_start|>assistant\n"
-    )
-    txt = call_llm(p, max_tokens=200)
-    try:
-        start = txt.find("{")
-        end = txt.rfind("}")
-        obj = json.loads(txt[start:end+1]) if start != -1 and end != -1 else {}
-    except Exception:
-        obj = {}
-    if not isinstance(obj, dict):
-        obj = {}
-    obj.setdefault("intent", "UNKNOWN")
-    obj.setdefault("confidence", 0.0)
-    obj.setdefault("action", "NONE")
-    obj.setdefault("targets", [])
-    obj.setdefault("reason", "")
-    return obj
-@app.post("/chat", response_model=ChatResponse)
-def chat(req: ChatRequest):
-    history = get_history(req.session_id)
-    decision = classify_intent(history, req.message)
-    action = decision.get("action", "NONE")
-    response_text = ""
-    if action == "ANALYZE_AND_REPORT":
-        try:
-            report = analyze_and_report_internal(req.session_id, req.message, k=10)
-            response_text = "تم إنشاء تقرير تحليلي:\n\n" + report
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
-    elif action == "RETRIEVE_ONLY":
-        retrieved_docs = retrieve(req.message, k=6)
-        ctx = []
-        for fname, chunk, score in retrieved_docs:
-            ctx.append(f"From {fname} (score={score:.4f}):\n{chunk}")
-        extra = "\n\n[Context]\n" + "\n\n".join(ctx) + "\n\n" + render_graph_overview(60)
-        prompt = build_chat_prompt(history, req.message, extra)
-        try:
-            response_text = call_llm(prompt, max_tokens=700)
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
-    elif action == "TRACE_SUBSET":
-        targets = decision.get("targets", [])
-        key = " ".join(targets) if targets else req.message
-        retrieved_docs = retrieve(key, k=10)
-        ctx = []
-        for fname, chunk, score in retrieved_docs:
-            ctx.append(f"From {fname} (score={score:.4f}):\n{chunk}")
-        flow_query = req.message + "\nPlease trace variables/functions: " + ", ".join(targets)
-        prompt = build_analysis_prompt(flow_query, retrieved_docs)
-        try:
-            trace_report = call_llm(prompt, max_tokens=1200)
-            REPORT_FILE.write_text(trace_report, encoding="utf-8")
-            response_text = "تقرير التتبع:\n\n" + trace_report
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
-    else:
-        prompt = build_chat_prompt(history, req.message, "")
-        try:
-            response_text = call_llm(prompt, max_tokens=600)
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
-    updated = (history + [[req.message, response_text]])[-8:]
-    save_history(req.session_id, updated)
     return ChatResponse(response=response_text, updated_history=updated)

+# app.py
+import os
+import json
+import hashlib
+import logging
+import threading
+from pathlib import Path
+from typing import List, Dict, Any, Tuple
+import numpy as np
+import faiss
+import pickle
+import ast as python_ast
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from sentence_transformers import SentenceTransformer
+from huggingface_hub import hf_hub_download, HfApi
+from monitor import get_current_metrics, start_monitoring_thread
+from memory import get_history, save_history
+# =========================
+# إعداد السجلّات
+# =========================
+logging.basicConfig(
+    level=logging.INFO,
+    format="🪵 [%(asctime)s] [%(levelname)s] %(message)s"
+)
+logger = logging.getLogger("app")
+# =========================
+# ثوابت ومسارات
+# =========================
+DATA_DIR = Path("data")
+CACHE_DIR = DATA_DIR / "cache"
+INDEX_DIR = DATA_DIR / "index"
+FILES_DIR = DATA_DIR / "files"           # تخزين النص الكامل لكل ملف
+REPORT_FILE = DATA_DIR / "analysis_report.md"
+GRAPH_FILE = DATA_DIR / "code_graph.json"
+EMB_FILE = INDEX_DIR / "embeddings.faiss"
+META_FILE = INDEX_DIR / "chunks.pkl"
+HASH_MAP_FILE = INDEX_DIR / "hash_map.json"
+for p in [DATA_DIR, CACHE_DIR, INDEX_DIR, FILES_DIR]:
+    p.mkdir(parents=True, exist_ok=True)
+# Env
+HF_TOKEN = os.getenv("HF_TOKEN", "")
+MODEL_REPO = os.getenv("MODEL_REPO", "Qwen/Qwen3-4B-Thinking-2507")
+# بدائل تلقائية عند فشل السيرفرلس (404/403)
+FALLBACK_MODELS = [
+    m.strip() for m in os.getenv(
+        "FALLBACK_MODELS",
+        "Qwen/Qwen2.5-7B-Instruct,Qwen/Qwen2.5-Coder-7B-Instruct"
+    ).split(",") if m.strip()
+]
+# GGUF المحلي (إن توفر)
+LOCAL_GGUF_REPO = os.getenv("LOCAL_GGUF_REPO", "Triangle104/Qwen3-8B-Q4_K_M-GGUF")
+LOCAL_GGUF_FILE = os.getenv("LOCAL_GGUF_FILE", "qwen3-8b-q4_k_m.gguf")
+LOCAL_GGUF_PATH = CACHE_DIR / LOCAL_GGUF_FILE
+# تضمين
+EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
+EMBED_DIM = int(os.getenv("EMBED_DIM", "384"))
+# تقسيم الشيفرة
+CHUNK_STEP = int(os.getenv("CHUNK_STEP", "40"))  # ✅ قابل للتهيئة
+MAX_FILE_BYTES = int(os.getenv("MAX_FILE_BYTES", str(10 * 1024 * 1024)))  # 10MB احتياطيًا
+SYSTEM_PROMPT = """<|im_start|>system
+You are a senior AI code analyst. Analyze projects with hybrid indexing (code graph + retrieval).
+Return structured, accurate, concise answers. Use Arabic + English labels in the final report.
+<|im_end|>"""
+# =========================
+# الحالة العالمية والقفل
+# =========================
+embed_model: SentenceTransformer | None = None
+faiss_index: faiss.Index | None = None
+all_chunks: List[Tuple[str, str]] = []  # (file_name, chunk_text)
+code_graph: Dict[str, Any] = {"files": {}}
+hash_map: Dict[str, str] = {}
+index_lock = threading.RLock()  # ✅ لتأمين الفهرسة/الاسترجاع
+# =========================
+# LLM (محلي/سحابي)
+# =========================
+try:
+    from llama_cpp import Llama
+except Exception:
+    Llama = None
+llm = None  # كائن النموذج المحلي إن توفر
+logger.info(f"HF_TOKEN length: {len(HF_TOKEN)}")  # تحقق من طول الtoken
+def load_local_model_if_configured():
+    """تحميل GGUF محليًا إن كان مفعّلًا."""
+    global llm
+    if Llama is None:
+        logger.warning("ℹ️ llama_cpp غير متوفر. سيتم الاعتماد على HF Inference عند الحاجة.")
+        return
+    if not LOCAL_GGUF_PATH.exists():
+        try:
+            logger.info(f"⬇️ تنزيل GGUF: {LOCAL_GGUF_REPO}/{LOCAL_GGUF_FILE}")
+            hf_hub_download(
+                repo_id=LOCAL_GGUF_REPO,
+                filename=LOCAL_GGUF_FILE,
+                local_dir=str(CACHE_DIR),
+                token=HF_TOKEN or None
+            )
+            logger.info("✅ تم تنزيل GGUF بنجاح.")
+        except Exception as e:
+            logger.error(f"❌ تعذر تنزيل GGUF: {str(e)}. السبب المحتمل: token غير صالح أو repo غير موجود.")
+            return
+    try:
+        llm = Llama(
+            model_path=str(LOCAL_GGUF_PATH),
+            n_ctx=int(os.getenv("N_CTX", "32768")),
+            rope_scaling={"type": "yarn", "factor": 4.0},
+            n_threads=int(os.getenv("N_THREADS", "2")),
+            n_gpu_layers=int(os.getenv("N_GPU_LAYERS", "0")),
+            n_batch=int(os.getenv("N_BATCH", "64")),
+            use_mlock=False,
+            verbose=False
+        )
+        logger.info("✅ تم تحميل النموذج المحلي (GGUF).")
+    except Exception as e:
+        llm = None
+        logger.error(f"❌ فشل تحميل النموذج المحلي: {str(e)}. السبب المحتمل: مشكلة في الملف أو التوافق.")
+def call_local_llm(prompt: str, max_tokens: int = 800) -> str:
+    if llm is None or Llama is None:
+        logger.warning("❌ النموذج المحلي غير متوفر.")
+        return ""
+    try:
+        res = llm(
+            prompt,
+            max_tokens=max_tokens,
+            temperature=0.4,
+            top_p=0.9,
+            stop=["<|im_end|>", "<|im_start|>"],
+            echo=False
+        )
+        logger.info("✅ رد ناجح من النموذج المحلي.")
+        return res["choices"][0]["text"].strip()
+    except Exception as e:
+        logger.error(f"❌ فشل استدعاء النموذج المحلي: {str(e)}. السبب المحتمل: مشكلة في التنفيذ أو الذاكرة.")
+        return ""
+def _call_hf_single_model(model_repo: str, prompt: str, max_new_tokens: int = 900) -> str:
+    import requests
+    if not HF_TOKEN:
+        logger.error("❌ HF_TOKEN غير معرف.")
+        raise RuntimeError("التوكن HF_TOKEN غير مضبوط ولا يوجد نموذج محلي.")
+    url = f"https://api-inference.huggingface.co/models/{model_repo}"
+    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
+    payload = {
+        "inputs": prompt,
+        "parameters": {
+            "max_new_tokens": max_new_tokens,
+            "temperature": 0.4,
+            "top_p": 0.9,
+            "return_full_text": False
+        }
+    }
+    r = requests.post(url, headers=headers, json=payload, timeout=120)
+    if r.status_code == 503:
+        data = {}
+        try: data = r.json()
+        except Exception: pass
+        eta = data.get("estimated_time")
+        raise RuntimeError("النموذج قيد التحميل من HF (503)." + (f" متوقع {eta:.0f}ث" if isinstance(eta, (int, float)) else ""))
+    try:
+        r.raise_for_status()
+    except requests.exceptions.HTTPError as e:
+        status = e.response.status_code
+        if status == 401: raise RuntimeError("التوكن مفقود أو غير صالح (401). تأكد من HF_TOKEN.")
+        if status == 403:
+            msg = ""
+            try: msg = (e.response.json().get("error") or "").lower()
+            except Exception: pass
+            if "gated" in msg or "accept" in msg:
+                raise RuntimeError("النموذج مسيَّج (403). يجب دخول صفحة النموذج والضغط على Accept.")
+            raise RuntimeError("صلاحية الوصول مرفوضة (403).")
+        if status == 404: raise RuntimeError("النموذج غير موجود أو غير متاح عبر السيرفرلس (404).")
+        if status == 429: raise RuntimeError("تم تجاوز الحد المسموح للطلبات (429). جرّب لاحقًا.")
+        try:
+            err = e.response.json()
+        except Exception:
+            err = {"error": e.response.text}
+        raise RuntimeError(f"خطأ HF ({status}): {err.get('error') or err}")
+    data = r.json()
+    if isinstance(data, list) and data and "generated_text" in data[0]:
+        return data[0]["generated_text"]
+    if isinstance(data, dict) and "generated_text" in data:
+        return data["generated_text"]
+    if isinstance(data, dict) and "error" in data:
+        raise RuntimeError(f"HF error: {data['error']}")
+    return json.dumps(data)
+def call_hf_inference(prompt: str, max_new_tokens: int = 900) -> str:
+    candidates = [MODEL_REPO] + [m for m in FALLBACK_MODELS if m != MODEL_REPO]
+    errors = []
+    for i, model in enumerate(candidates, start=1):
+        try:
+            if i == 1:
+                logger.info(f"🎯 استخدام الموديل الأساسي: {model}")
+            else:
+                logger.warning(f"↪️ تفعيل Fallback ({i-1}/{len(candidates)-1}): {model}")
+            out = _call_hf_single_model(model, prompt, max_new_tokens)
+            if model != MODEL_REPO:
+                logger.info(f"✅ تم الرد من الموديل الاحتياطي: {model}")
+            return out
+        except RuntimeError as e:
+            msg = str(e)
+            errors.append(f"{model}: {msg}")
+            if "404" in msg or "403" in msg:
+                continue
+            raise
+    raise RuntimeError("تعذّر استخدام الموديل الأساسي وكل البدائل (403/404). التفاصيل:\n- " + "\n- ".join(errors))
+def call_llm(prompt: str, max_tokens: int = 900) -> str:
+    if llm:
+        return call_local_llm(prompt, max_tokens)
+    else:
+        return call_hf_inference(prompt, max_tokens)
+# =========================
+# تهيئة التضمين والفهرس
+# =========================
+def load_embed_model():
+    global embed_model
+    if embed_model is None:
+        logger.info(f"⬇️ تحميل نموذج التضمين: {EMBED_MODEL_NAME}")
+        embed_model = SentenceTransformer(EMBED_MODEL_NAME, device="cpu")
+        logger.info("✅ نموذج التضمين جاهز.")
+def build_faiss_index():
+    global faiss_index
+    if faiss_index is None:
+        faiss_index = faiss.IndexFlatL2(EMBED_DIM)
+        logger.info("✅ فهرس FAISS جاهز.")
+def load_index():
+    with index_lock:
+        if EMB_FILE.exists() and META_FILE.exists():
+            faiss_index = faiss.read_index(str(EMB_FILE))
+            with META_FILE.open("rb") as f:
+                all_chunks = pickle.load(f)
+            if HASH_MAP_FILE.exists():
+                with HASH_MAP_FILE.open("r", encoding="utf-8") as f:
+                    hash_map = json.load(f)
+            logger.info(f"✅ تم تحميل الفهرس: {len(all_chunks)} أجزاء")
+            return True
+        return False
+def save_index():
+    with index_lock:
+        if faiss_index:
+            faiss.write_index(faiss_index, str(EMB_FILE))
+        with META_FILE.open("wb") as f:
+            pickle.dump(all_chunks, f)
+        with HASH_MAP_FILE.open("w", encoding="utf-8") as f:
+            json.dump(hash_map, f, ensure_ascii=False, indent=2)
+        logger.info("✅ تم حفظ الفهرس")
+# =========================
+# معالجة الملفات والأجزاء
+# =========================
+def chunk_code(text: str, step: int = CHUNK_STEP) -> List[str]:
+    lines = text.splitlines(keepends=True)
+    chunks = []
+    for i in range(0, len(lines), step // 2):
+        chunk = "".join(lines[i:i + step])
+        if chunk.strip():
+            chunks.append(chunk)
+    return chunks
+def compute_hash(text: str) -> str:
+    return hashlib.sha256(text.encode("utf-8")).hexdigest()[:16]
+def add_file_to_index(file_path: Path):
+    with index_lock:
+        try:
+            text = file_path.read_text(encoding="utf-8", errors="ignore")
+            if len(text.encode("utf-8")) > MAX_FILE_BYTES:
+                logger.warning(f"⚠️ ملف كبير جدًا: {file_path.name}، سيتم تجاهله")
+                return
+            current_hash = compute_hash(text)
+            if file_path.name in hash_map and hash_map[file_path.name] == current_hash:
+                return  # لا تغيير
+            chunks = chunk_code(text)
+            embeddings = embed_model.encode(chunks, normalize_embeddings=True)
+            for i, emb in enumerate(embeddings):
+                faiss_index.add(np.array([emb], dtype=np.float32))
+                all_chunks.append((file_path.name, chunks[i]))
+            hash_map[file_path.name] = current_hash
+            update_code_graph(file_path.name, text)
+            logger.info(f"✅ أضيف ملف: {file_path.name} ({len(chunks)} أجزاء)")
+        except Exception as e:
+            logger.warning(f"⚠️ فشل إضافة {file_path.name}: {e}")
+def remove_file_from_index(file_name: str):
+    with index_lock:
+        to_remove = [i for i, (fn, _) in enumerate(all_chunks) if fn == file_name]
+        if to_remove:
+            mask = np.ones(len(all_chunks), dtype=bool)
+            mask[to_remove] = False
+            remaining_embs = np.array([embed_model.encode(all_chunks[i][1]) for i in range(len(all_chunks)) if mask[i]])
+            faiss_index.reset()
+            faiss_index.add(remaining_embs)
+            all_chunks[:] = [all_chunks[i] for i in range(len(all_chunks)) if mask[i]]
+            code_graph["files"].pop(file_name, None)
+            hash_map.pop(file_name, None)
+            logger.info(f"🗑️ حذف ملف: {file_name}")
+# =========================
+# رسم الرسم البياني للشيفرة
+# =========================
+def update_code_graph(file_name: str, text: str):
+    try:
+        tree = python_ast.parse(text)
+        imports = [n.names[0].name for n in python_ast.iter_child_nodes(tree) if isinstance(n, python_ast.Import)]
+        functions = [n.name for n in python_ast.iter_child_nodes(tree) if isinstance(n, python_ast.FunctionDef)]
+        classes = [n.name for n in python_ast.iter_child_nodes(tree) if isinstance(n, python_ast.ClassDef)]
+        code_graph["files"][file_name] = {
+            "imports": imports,
+            "functions": functions,
+            "classes": classes
+        }
+        with GRAPH_FILE.open("w", encoding="utf-8") as f:
+            json.dump(code_graph, f, ensure_ascii=False, indent=2)
+    except Exception as e:
+        logger.warning(f"⚠️ فشل تحليل {file_name}: {e}")
+def render_graph_overview(max_nodes: int = 50) -> str:
+    overview = "رسم بياني للشيفرة:\n"
+    for fname, data in list(code_graph["files"].items())[:max_nodes]:
+        overview += f"- {fname}: {len(data['functions'])} دوال, {len(data['classes'])} فئات, imports: {', '.join(data['imports'][:3])}\n"
+    return overview
+# =========================
+# استرجاع وتحليل
+# =========================
+def retrieve(query: str, k: int = 5) -> List[Tuple[str, str, float]]:
+    load_embed_model()
+    build_faiss_index()
+    if not load_index():
+        rebuild_index_from_files()
+    q_emb = embed_model.encode([query], normalize_embeddings=True)
+    distances, indices = faiss_index.search(q_emb, k)
+    results = []
+    for i, idx in enumerate(indices[0]):
+        if idx != -1:
+            fname, chunk = all_chunks[idx]
+            results.append((fname, chunk, distances[0][i]))
+    return results
+def build_analysis_prompt(query: str, retrieved_docs: List[Tuple[str, str, float]]) -> str:
+    ctx = []
+    for fname, chunk, score in retrieved_docs:
+        ctx.append(f"From {fname} (score={score:.4f}):\n{chunk}")
+    extra = "\n\n[Context]\n" + "\n\n".join(ctx) + "\n\n" + render_graph_overview()
+    return (
+        f"<|im_start|>system\n{SYSTEM_PROMPT}\n<|im_end|>\n"
+        f"<|im_start|>user\n{query}\n{extra}\n<|im_end|>\n"
+        f"<|im_start|>assistant\n"
+    )
+def analyze_and_report_internal(session_id: str, query: str, k: int = 10) -> str:
+    retrieved_docs = retrieve(query, k=k)
+    prompt = build_analysis_prompt(query, retrieved_docs)
+    try:
+        report = call_llm(prompt, max_tokens=1500)
+        REPORT_FILE.write_text(report, encoding="utf-8")
+        return report
+    except Exception as e:
+        logger.error(f"❌ LLM error in analysis: {e}")
+        raise
+# =========================
+# بناء الـ Prompt للدردشة
+# =========================
+def build_chat_prompt(history: List[List[str]], message: str, extra: str = "") -> str:
+    prompt = f"<|im_start|>system\n{SYSTEM_PROMPT}\n<|im_end|>\n"
+    for user_msg, ai_msg in history:
+        prompt += f"<|im_start|>user\n{user_msg}\n<|im_end|>\n"
+        prompt += f"<|im_start|>assistant\n{ai_msg}\n<|im_end|>\n"
+    prompt += f"<|im_start|>user\n{message}\n{extra}\n<|im_end|>\n"
+    prompt += f"<|im_start|>assistant\n"
+    return prompt
+# =========================
+# FastAPI
+# =========================
+app = FastAPI(title="AI Code Analyst")
+# --- Root endpoint for Hugging Face health checks and simple UI ---
+from fastapi.responses import PlainTextResponse, HTMLResponse, JSONResponse
+@app.get("/", response_class=HTMLResponse)
+def root(logs: str | None = None):
+    """
+    Minimal root endpoint so HF / healthcheck returns 200 OK.
+    Use `/?logs=container` to tail last lines from data/app.log.
+    """
+    if logs == "container":
+        log_file = Path(DATA_DIR) / "app.log"
+        if log_file.exists():
+            tail = "".join(log_file.read_text(encoding="utf-8", errors="ignore").splitlines(True)[-200:])
+            return PlainTextResponse(tail)
+        return PlainTextResponse("No logs yet.", status_code=200)
+    # Small HTML with links
+    html = """
+    <html>
+      <head><meta charset="utf-8"><title>AI Code Analyst</title></head>
+      <body style="font-family: ui-sans-serif, system-ui; padding:20px">
+        <h1>✅ AI Code Analyst is running</h1>
+        <p>Try <a href="/docs">/docs</a>, <a href="/hf-check">/hf-check</a>, or <a href="/metrics">/metrics</a>.</p>
+        <p>Logs: <a href="/?logs=container">tail</a></p>
+      </body>
+    </html>
+    """
+    return HTMLResponse(html)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Endpoint جديد لفحص التوكن وصلاحية الوصول
+@app.get("/hf-check")
+def hf_check():
+    api = HfApi()
+    out = {
+        "token_set": bool(HF_TOKEN),
+        "token_valid": False,
+        "model_repo": MODEL_REPO,
+        "model_access": False,
+        "model_private": None,
+        "gated_hint": False,
+        "message": ""
+    }
+    if not HF_TOKEN:
+        out["message"] = "HF_TOKEN غير مضبوط."
+        return out
+    try:
+        me = api.whoami(token=HF_TOKEN)
+        out["token_valid"] = True
+        out["message"] = f"Token OK for user: {me.get('name')}"
+    except Exception as e:
+        out["message"] = f"Token check failed: {type(e).__name__}: {e}"
+        return out
+    try:
+        info = api.model_info(MODEL_REPO, token=HF_TOKEN)
+        out["model_access"] = True
+        out["model_private"] = getattr(info, "private", None)
+        out["message"] += f" | Model reachable: {info.modelId}"
+    except Exception as e:
+        msg = str(e).lower()
+        out["message"] += f" | Model access failed: {type(e).__name__}: {e}"
+        out["gated_hint"] = ("gated" in msg or "accept" in msg)
+    return out
+class UploadFilesRequest(BaseModel):
+    files: Dict[str, str]  # fname: content
+class DiffFilesRequest(BaseModel):
+    deleted: List[str]
+    modified: Dict[str, str]  # fname: new_content
+class AnalyzeAndReportRequest(BaseModel):
+    session_id: str
+    query: str
+    top_k: int | None = None
+class ChatRequest(BaseModel):
+    session_id: str
+    message: str
+class ChatResponse(BaseModel):
+    response: str
+    updated_history: List[List[str]]
+@app.on_event("startup")
+def startup_event():
+    load_embed_model()
+    build_faiss_index()
+    if not load_index():
+        logger.info("ℹ️ فهرس غير موجود، سيتم بناؤه من الملفات الحالية")
+        rebuild_index_from_files()
+    load_local_model_if_configured()
+    start_monitoring_thread()
+def rebuild_index_from_files():
+    with index_lock:
+        faiss_index.reset()
+        all_chunks.clear()
+        hash_map.clear()
+        code_graph["files"].clear()
+        for file_path in FILES_DIR.glob("**/*"):
+            if file_path.is_file() and not file_path.name.startswith("."):
+                add_file_to_index(file_path)
+        save_index()
+@app.get("/metrics")
+def metrics():
+    return get_current_metrics()
+@app.post("/upload-files")
+def upload_files(req: UploadFilesRequest):
+    added = []
+    for fname, content in req.files.items():
+        file_path = FILES_DIR / fname
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        file_path.write_text(content, encoding="utf-8")
+        add_file_to_index(file_path)
+        added.append(fname)
+    save_index()
+    return {"status": "ok", "added": added}
+@app.post("/diff-files")
+def diff_files(req: DiffFilesRequest):
+    for fname in req.deleted:
+        (FILES_DIR / fname).unlink(missing_ok=True)
+        remove_file_from_index(fname)
+    for fname, new_content in req.modified.items():
+        file_path = FILES_DIR / fname
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        file_path.write_text(new_content, encoding="utf-8")
+        add_file_to_index(file_path)
+    save_index()
+    return {
+        "status": "ok",
+        "deleted": req.deleted,
+        "modified": list(req.modified.keys()),
+        "total_index_vectors": int(faiss_index.ntotal) if faiss_index else 0
+    }
+@app.post("/analyze-and-report")
+def analyze_and_report(req: AnalyzeAndReportRequest):
+    report = analyze_and_report_internal(req.session_id, req.query, k=req.top_k or 10)
+    return {"status": "ok", "report_path": str(REPORT_FILE), "preview": report[:1200]}
+def classify_intent(history: List[List[str]], message: str) -> Dict[str, Any]:
+    inst = (
+        "أعد JSON فقط دون أي نص آخر.\n"
+        "المفاتيح: intent (string), confidence (0-1), action (RETRIEVE_ONLY|ANALYZE_AND_REPORT|TRACE_SUBSET|NONE), "
+        "targets (list of strings), reason (string).\n"
+        "أمثلة:\n"
+        "س: ما عمل الملف X؟ → {\"intent\":\"ASK_FILE_ROLE\",\"confidence\":0.9,\"action\":\"RETRIEVE_ONLY\",\"targets\":[\"X\"],\"reason\":\"...\"}\n"
+        "س: لماذا لا تعمل ميزة الدخول؟ → {\"intent\":\"WHY_FEATURE_NOT_WORKING\",\"confidence\":0.85,\"action\":\"ANALYZE_AND_REPORT\",\"targets\":[],\"reason\":\"...\"}\n"
+        "س: اين يُعرّف المتغير TOKEN وكيف يتغير؟ → {\"intent\":\"CODE_FLOW_TRACE\",\"confidence\":0.8,\"action\":\"TRACE_SUBSET\",\"targets\":[\"TOKEN\"],\"reason\":\"...\"}\n"
+    )
+    p = (
+        f"<|im_start|>system\n{SYSTEM_PROMPT}\n<|im_end|>\n"
+        f"<|im_start|>user\n{inst}\nالسؤال: {message}\nأعد JSON فقط.\n<|im_end|>\n"
+        f"<|im_start|>assistant\n"
+    )
+    txt = call_llm(p, max_tokens=200)
+    try:
+        start = txt.find("{")
+        end = txt.rfind("}")
+        obj = json.loads(txt[start:end+1]) if start != -1 and end != -1 else {}
+    except Exception:
+        obj = {}
+    if not isinstance(obj, dict):
+        obj = {}
+    obj.setdefault("intent", "UNKNOWN")
+    obj.setdefault("confidence", 0.0)
+    obj.setdefault("action", "NONE")
+    obj.setdefault("targets", [])
+    obj.setdefault("reason", "")
+    return obj
+@app.post("/chat", response_model=ChatResponse)
+def chat(req: ChatRequest):
+    history = get_history(req.session_id)
+    decision = classify_intent(history, req.message)
+    action = decision.get("action", "NONE")
+    response_text = ""
+    if action == "ANALYZE_AND_REPORT":
+        try:
+            report = analyze_and_report_internal(req.session_id, req.message, k=10)
+            response_text = "تم إنشاء تقرير تحليلي:\n\n" + report
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
+    elif action == "RETRIEVE_ONLY":
+        retrieved_docs = retrieve(req.message, k=6)
+        ctx = []
+        for fname, chunk, score in retrieved_docs:
+            ctx.append(f"From {fname} (score={score:.4f}):\n{chunk}")
+        extra = "\n\n[Context]\n" + "\n\n".join(ctx) + "\n\n" + render_graph_overview(60)
+        prompt = build_chat_prompt(history, req.message, extra)
+        try:
+            response_text = call_llm(prompt, max_tokens=700)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
+    elif action == "TRACE_SUBSET":
+        targets = decision.get("targets", [])
+        key = " ".join(targets) if targets else req.message
+        retrieved_docs = retrieve(key, k=10)
+        ctx = []
+        for fname, chunk, score in retrieved_docs:
+            ctx.append(f"From {fname} (score={score:.4f}):\n{chunk}")
+        flow_query = req.message + "\nPlease trace variables/functions: " + ", ".join(targets)
+        prompt = build_analysis_prompt(flow_query, retrieved_docs)
+        try:
+            trace_report = call_llm(prompt, max_tokens=1200)
+            REPORT_FILE.write_text(trace_report, encoding="utf-8")
+            response_text = "تقرير التتبع:\n\n" + trace_report
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
+    else:
+        prompt = build_chat_prompt(history, req.message, "")
+        try:
+            response_text = call_llm(prompt, max_tokens=600)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
+    updated = (history + [[req.message, response_text]])[-8:]
+    save_history(req.session_id, updated)
     return ChatResponse(response=response_text, updated_history=updated)