AhmadA82 commited on
Commit
62b67dd
·
verified ·
1 Parent(s): 6842ef0
Files changed (1) hide show
  1. app.py +641 -612
app.py CHANGED
@@ -1,613 +1,642 @@
1
- # app.py
2
- import os
3
- import json
4
- import hashlib
5
- import logging
6
- import threading
7
- from pathlib import Path
8
- from typing import List, Dict, Any, Tuple
9
-
10
- import numpy as np
11
- import faiss
12
- import pickle
13
- import ast as python_ast
14
-
15
- from fastapi import FastAPI, HTTPException
16
- from fastapi.middleware.cors import CORSMiddleware
17
- from pydantic import BaseModel
18
-
19
- from sentence_transformers import SentenceTransformer
20
- from huggingface_hub import hf_hub_download, HfApi
21
-
22
- from monitor import get_current_metrics, start_monitoring_thread
23
- from memory import get_history, save_history
24
-
25
- # =========================
26
- # إعداد السجلّات
27
- # =========================
28
- logging.basicConfig(
29
- level=logging.INFO,
30
- format="🪵 [%(asctime)s] [%(levelname)s] %(message)s"
31
- )
32
- logger = logging.getLogger("app")
33
-
34
- # =========================
35
- # ثوابت ومسارات
36
- # =========================
37
- DATA_DIR = Path("data")
38
- CACHE_DIR = DATA_DIR / "cache"
39
- INDEX_DIR = DATA_DIR / "index"
40
- FILES_DIR = DATA_DIR / "files" # تخزين النص الكامل لكل ملف
41
- REPORT_FILE = DATA_DIR / "analysis_report.md"
42
- GRAPH_FILE = DATA_DIR / "code_graph.json"
43
- EMB_FILE = INDEX_DIR / "embeddings.faiss"
44
- META_FILE = INDEX_DIR / "chunks.pkl"
45
- HASH_MAP_FILE = INDEX_DIR / "hash_map.json"
46
-
47
- for p in [DATA_DIR, CACHE_DIR, INDEX_DIR, FILES_DIR]:
48
- p.mkdir(parents=True, exist_ok=True)
49
-
50
- # Env
51
- HF_TOKEN = os.getenv("HF_TOKEN", "")
52
- MODEL_REPO = os.getenv("MODEL_REPO", "Qwen/Qwen3-4B-Thinking-2507")
53
- # بدائل تلقائية عند فشل السيرفرلس (404/403)
54
- FALLBACK_MODELS = [
55
- m.strip() for m in os.getenv(
56
- "FALLBACK_MODELS",
57
- "Qwen/Qwen2.5-7B-Instruct,Qwen/Qwen2.5-Coder-7B-Instruct"
58
- ).split(",") if m.strip()
59
- ]
60
- # GGUF المحلي (إن توفر)
61
- LOCAL_GGUF_REPO = os.getenv("LOCAL_GGUF_REPO", "Triangle104/Qwen3-8B-Q4_K_M-GGUF")
62
- LOCAL_GGUF_FILE = os.getenv("LOCAL_GGUF_FILE", "qwen3-8b-q4_k_m.gguf")
63
- LOCAL_GGUF_PATH = CACHE_DIR / LOCAL_GGUF_FILE
64
-
65
- # تضمين
66
- EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
67
- EMBED_DIM = int(os.getenv("EMBED_DIM", "384"))
68
-
69
- # تقسيم الشيفرة
70
- CHUNK_STEP = int(os.getenv("CHUNK_STEP", "40")) # ✅ قابل للتهيئة
71
- MAX_FILE_BYTES = int(os.getenv("MAX_FILE_BYTES", str(10 * 1024 * 1024))) # 10MB احتياطيًا
72
-
73
- SYSTEM_PROMPT = """<|im_start|>system
74
- You are a senior AI code analyst. Analyze projects with hybrid indexing (code graph + retrieval).
75
- Return structured, accurate, concise answers. Use Arabic + English labels in the final report.
76
- <|im_end|>"""
77
-
78
- # =========================
79
- # الحالة العالمية والقفل
80
- # =========================
81
- embed_model: SentenceTransformer | None = None
82
- faiss_index: faiss.Index | None = None
83
- all_chunks: List[Tuple[str, str]] = [] # (file_name, chunk_text)
84
- code_graph: Dict[str, Any] = {"files": {}}
85
- hash_map: Dict[str, str] = {}
86
-
87
- index_lock = threading.RLock() # ✅ لتأمين الفهرسة/الاسترجاع
88
-
89
- # =========================
90
- # LLM (محلي/سحابي)
91
- # =========================
92
- try:
93
- from llama_cpp import Llama
94
- except Exception:
95
- Llama = None
96
-
97
- llm = None # كائن النموذج المحلي إن توفر
98
-
99
- logger.info(f"HF_TOKEN length: {len(HF_TOKEN)}") # تحقق من طول الtoken
100
-
101
- def load_local_model_if_configured():
102
- """تحميل GGUF محليًا إن كان مفعّلًا."""
103
- global llm
104
- if Llama is None:
105
- logger.warning("ℹ️ llama_cpp غير متوفر. سيتم الاعتماد على HF Inference عند الحاجة.")
106
- return
107
- if not LOCAL_GGUF_PATH.exists():
108
- try:
109
- logger.info(f"⬇️ تنزيل GGUF: {LOCAL_GGUF_REPO}/{LOCAL_GGUF_FILE}")
110
- hf_hub_download(
111
- repo_id=LOCAL_GGUF_REPO,
112
- filename=LOCAL_GGUF_FILE,
113
- local_dir=str(CACHE_DIR),
114
- token=HF_TOKEN or None
115
- )
116
- logger.info("✅ تم تنزيل GGUF بنجاح.")
117
- except Exception as e:
118
- logger.error(f"❌ تعذر تنزيل GGUF: {str(e)}. السبب المحتمل: token غير صالح أو repo غير موجود.")
119
- return
120
- try:
121
- llm = Llama(
122
- model_path=str(LOCAL_GGUF_PATH),
123
- n_ctx=int(os.getenv("N_CTX", "32768")),
124
- rope_scaling={"type": "yarn", "factor": 4.0},
125
- n_threads=int(os.getenv("N_THREADS", "2")),
126
- n_gpu_layers=int(os.getenv("N_GPU_LAYERS", "0")),
127
- n_batch=int(os.getenv("N_BATCH", "64")),
128
- use_mlock=False,
129
- verbose=False
130
- )
131
- logger.info("✅ تم تحميل النموذج المحلي (GGUF).")
132
- except Exception as e:
133
- llm = None
134
- logger.error(f"❌ فشل تحميل النموذج المحلي: {str(e)}. السبب المحتمل: مشكلة في الملف أو التوافق.")
135
-
136
- def call_local_llm(prompt: str, max_tokens: int = 800) -> str:
137
- if llm is None or Llama is None:
138
- logger.warning("❌ النموذج المحلي غير متوفر.")
139
- return ""
140
- try:
141
- res = llm(
142
- prompt,
143
- max_tokens=max_tokens,
144
- temperature=0.4,
145
- top_p=0.9,
146
- stop=["<|im_end|>", "<|im_start|>"],
147
- echo=False
148
- )
149
- logger.info("✅ رد ناجح من النموذج المحلي.")
150
- return res["choices"][0]["text"].strip()
151
- except Exception as e:
152
- logger.error(f"❌ فشل استدعاء النموذج المحلي: {str(e)}. السبب المحتمل: مشكلة في التنفيذ أو الذاكرة.")
153
- return ""
154
-
155
- def _call_hf_single_model(model_repo: str, prompt: str, max_new_tokens: int = 900) -> str:
156
- import requests
157
- if not HF_TOKEN:
158
- logger.error("❌ HF_TOKEN غير معرف.")
159
- raise RuntimeError("التوكن HF_TOKEN غير مضبوط ولا يوجد نموذج محلي.")
160
- url = f"https://api-inference.huggingface.co/models/{model_repo}"
161
- headers = {"Authorization": f"Bearer {HF_TOKEN}"}
162
- payload = {
163
- "inputs": prompt,
164
- "parameters": {
165
- "max_new_tokens": max_new_tokens,
166
- "temperature": 0.4,
167
- "top_p": 0.9,
168
- "return_full_text": False
169
- }
170
- }
171
- r = requests.post(url, headers=headers, json=payload, timeout=120)
172
- if r.status_code == 503:
173
- data = {}
174
- try: data = r.json()
175
- except Exception: pass
176
- eta = data.get("estimated_time")
177
- raise RuntimeError("النموذج قيد التحميل من HF (503)." + (f" متوقع {eta:.0f}ث" if isinstance(eta, (int, float)) else ""))
178
- try:
179
- r.raise_for_status()
180
- except requests.exceptions.HTTPError as e:
181
- status = e.response.status_code
182
- if status == 401: raise RuntimeError("التوكن مفقود أو غير صالح (401). تأكد من HF_TOKEN.")
183
- if status == 403:
184
- msg = ""
185
- try: msg = (e.response.json().get("error") or "").lower()
186
- except Exception: pass
187
- if "gated" in msg or "accept" in msg:
188
- raise RuntimeError("النموذج مسيَّج (403). يجب دخول صفحة النموذج والضغط على Accept.")
189
- raise RuntimeError("صلاحية الوصول مرفوضة (403).")
190
- if status == 404: raise RuntimeError("النموذج غير موجود أو غير متاح عبر السيرفرلس (404).")
191
- if status == 429: raise RuntimeError("تم تجاوز الحد المسموح للطلبات (429). جرّب لاحقًا.")
192
- try:
193
- err = e.response.json()
194
- except Exception:
195
- err = {"error": e.response.text}
196
- raise RuntimeError(f"خطأ HF ({status}): {err.get('error') or err}")
197
- data = r.json()
198
- if isinstance(data, list) and data and "generated_text" in data[0]:
199
- return data[0]["generated_text"]
200
- if isinstance(data, dict) and "generated_text" in data:
201
- return data["generated_text"]
202
- if isinstance(data, dict) and "error" in data:
203
- raise RuntimeError(f"HF error: {data['error']}")
204
- return json.dumps(data)
205
-
206
- def call_hf_inference(prompt: str, max_new_tokens: int = 900) -> str:
207
- candidates = [MODEL_REPO] + [m for m in FALLBACK_MODELS if m != MODEL_REPO]
208
- errors = []
209
- for i, model in enumerate(candidates, start=1):
210
- try:
211
- if i == 1:
212
- logger.info(f"🎯 استخدام الموديل الأساسي: {model}")
213
- else:
214
- logger.warning(f"↪️ تفعيل Fallback ({i-1}/{len(candidates)-1}): {model}")
215
- out = _call_hf_single_model(model, prompt, max_new_tokens)
216
- if model != MODEL_REPO:
217
- logger.info(f"✅ تم الرد من الموديل الاحتياطي: {model}")
218
- return out
219
- except RuntimeError as e:
220
- msg = str(e)
221
- errors.append(f"{model}: {msg}")
222
- if "404" in msg or "403" in msg:
223
- continue
224
- raise
225
- raise RuntimeError("تعذّر استخدام الموديل الأساسي وكل البدائل (403/404). التفاصيل:\n- " + "\n- ".join(errors))
226
-
227
- def call_llm(prompt: str, max_tokens: int = 900) -> str:
228
- if llm:
229
- return call_local_llm(prompt, max_tokens)
230
- else:
231
- return call_hf_inference(prompt, max_tokens)
232
-
233
- # =========================
234
- # تهيئة التضمين والفهرس
235
- # =========================
236
- def load_embed_model():
237
- global embed_model
238
- if embed_model is None:
239
- logger.info(f"⬇️ تحميل نموذج التضمين: {EMBED_MODEL_NAME}")
240
- embed_model = SentenceTransformer(EMBED_MODEL_NAME, device="cpu")
241
- logger.info("✅ نم��ذج التضمين جاهز.")
242
-
243
- def build_faiss_index():
244
- global faiss_index
245
- if faiss_index is None:
246
- faiss_index = faiss.IndexFlatL2(EMBED_DIM)
247
- logger.info("✅ فهرس FAISS جاهز.")
248
-
249
- def load_index():
250
- with index_lock:
251
- if EMB_FILE.exists() and META_FILE.exists():
252
- faiss_index = faiss.read_index(str(EMB_FILE))
253
- with META_FILE.open("rb") as f:
254
- all_chunks = pickle.load(f)
255
- if HASH_MAP_FILE.exists():
256
- with HASH_MAP_FILE.open("r", encoding="utf-8") as f:
257
- hash_map = json.load(f)
258
- logger.info(f"✅ تم تحميل الفهرس: {len(all_chunks)} أجزاء")
259
- return True
260
- return False
261
-
262
- def save_index():
263
- with index_lock:
264
- if faiss_index:
265
- faiss.write_index(faiss_index, str(EMB_FILE))
266
- with META_FILE.open("wb") as f:
267
- pickle.dump(all_chunks, f)
268
- with HASH_MAP_FILE.open("w", encoding="utf-8") as f:
269
- json.dump(hash_map, f, ensure_ascii=False, indent=2)
270
- logger.info("✅ تم حفظ الفهرس")
271
-
272
- # =========================
273
- # معالجة الملفات والأجزاء
274
- # =========================
275
- def chunk_code(text: str, step: int = CHUNK_STEP) -> List[str]:
276
- lines = text.splitlines(keepends=True)
277
- chunks = []
278
- for i in range(0, len(lines), step // 2):
279
- chunk = "".join(lines[i:i + step])
280
- if chunk.strip():
281
- chunks.append(chunk)
282
- return chunks
283
-
284
- def compute_hash(text: str) -> str:
285
- return hashlib.sha256(text.encode("utf-8")).hexdigest()[:16]
286
-
287
- def add_file_to_index(file_path: Path):
288
- with index_lock:
289
- try:
290
- text = file_path.read_text(encoding="utf-8", errors="ignore")
291
- if len(text.encode("utf-8")) > MAX_FILE_BYTES:
292
- logger.warning(f"⚠️ ملف كبير جدًا: {file_path.name}، سيتم تجاهله")
293
- return
294
- current_hash = compute_hash(text)
295
- if file_path.name in hash_map and hash_map[file_path.name] == current_hash:
296
- return # لا تغيير
297
-
298
- chunks = chunk_code(text)
299
- embeddings = embed_model.encode(chunks, normalize_embeddings=True)
300
- for i, emb in enumerate(embeddings):
301
- faiss_index.add(np.array([emb], dtype=np.float32))
302
- all_chunks.append((file_path.name, chunks[i]))
303
-
304
- hash_map[file_path.name] = current_hash
305
- update_code_graph(file_path.name, text)
306
- logger.info(f"✅ أضيف ملف: {file_path.name} ({len(chunks)} أجزاء)")
307
- except Exception as e:
308
- logger.warning(f"⚠️ فشل إضافة {file_path.name}: {e}")
309
-
310
- def remove_file_from_index(file_name: str):
311
- with index_lock:
312
- to_remove = [i for i, (fn, _) in enumerate(all_chunks) if fn == file_name]
313
- if to_remove:
314
- mask = np.ones(len(all_chunks), dtype=bool)
315
- mask[to_remove] = False
316
- remaining_embs = np.array([embed_model.encode(all_chunks[i][1]) for i in range(len(all_chunks)) if mask[i]])
317
- faiss_index.reset()
318
- faiss_index.add(remaining_embs)
319
- all_chunks[:] = [all_chunks[i] for i in range(len(all_chunks)) if mask[i]]
320
- code_graph["files"].pop(file_name, None)
321
- hash_map.pop(file_name, None)
322
- logger.info(f"🗑️ حذف ملف: {file_name}")
323
-
324
- # =========================
325
- # رسم الرسم البياني للشيفرة
326
- # =========================
327
- def update_code_graph(file_name: str, text: str):
328
- try:
329
- tree = python_ast.parse(text)
330
- imports = [n.names[0].name for n in python_ast.iter_child_nodes(tree) if isinstance(n, python_ast.Import)]
331
- functions = [n.name for n in python_ast.iter_child_nodes(tree) if isinstance(n, python_ast.FunctionDef)]
332
- classes = [n.name for n in python_ast.iter_child_nodes(tree) if isinstance(n, python_ast.ClassDef)]
333
- code_graph["files"][file_name] = {
334
- "imports": imports,
335
- "functions": functions,
336
- "classes": classes
337
- }
338
- with GRAPH_FILE.open("w", encoding="utf-8") as f:
339
- json.dump(code_graph, f, ensure_ascii=False, indent=2)
340
- except Exception as e:
341
- logger.warning(f"⚠️ فشل تحليل {file_name}: {e}")
342
-
343
- def render_graph_overview(max_nodes: int = 50) -> str:
344
- overview = "رسم بياني للشيفرة:\n"
345
- for fname, data in list(code_graph["files"].items())[:max_nodes]:
346
- overview += f"- {fname}: {len(data['functions'])} دوال, {len(data['classes'])} فئات, imports: {', '.join(data['imports'][:3])}\n"
347
- return overview
348
-
349
- # =========================
350
- # استرجاع وتحليل
351
- # =========================
352
- def retrieve(query: str, k: int = 5) -> List[Tuple[str, str, float]]:
353
- load_embed_model()
354
- build_faiss_index()
355
- if not load_index():
356
- rebuild_index_from_files()
357
- q_emb = embed_model.encode([query], normalize_embeddings=True)
358
- distances, indices = faiss_index.search(q_emb, k)
359
- results = []
360
- for i, idx in enumerate(indices[0]):
361
- if idx != -1:
362
- fname, chunk = all_chunks[idx]
363
- results.append((fname, chunk, distances[0][i]))
364
- return results
365
-
366
- def build_analysis_prompt(query: str, retrieved_docs: List[Tuple[str, str, float]]) -> str:
367
- ctx = []
368
- for fname, chunk, score in retrieved_docs:
369
- ctx.append(f"From {fname} (score={score:.4f}):\n{chunk}")
370
- extra = "\n\n[Context]\n" + "\n\n".join(ctx) + "\n\n" + render_graph_overview()
371
- return (
372
- f"<|im_start|>system\n{SYSTEM_PROMPT}\n<|im_end|>\n"
373
- f"<|im_start|>user\n{query}\n{extra}\n<|im_end|>\n"
374
- f"<|im_start|>assistant\n"
375
- )
376
-
377
- def analyze_and_report_internal(session_id: str, query: str, k: int = 10) -> str:
378
- retrieved_docs = retrieve(query, k=k)
379
- prompt = build_analysis_prompt(query, retrieved_docs)
380
- try:
381
- report = call_llm(prompt, max_tokens=1500)
382
- REPORT_FILE.write_text(report, encoding="utf-8")
383
- return report
384
- except Exception as e:
385
- logger.error(f"❌ LLM error in analysis: {e}")
386
- raise
387
-
388
- # =========================
389
- # بناء الـ Prompt للدردشة
390
- # =========================
391
- def build_chat_prompt(history: List[List[str]], message: str, extra: str = "") -> str:
392
- prompt = f"<|im_start|>system\n{SYSTEM_PROMPT}\n<|im_end|>\n"
393
- for user_msg, ai_msg in history:
394
- prompt += f"<|im_start|>user\n{user_msg}\n<|im_end|>\n"
395
- prompt += f"<|im_start|>assistant\n{ai_msg}\n<|im_end|>\n"
396
- prompt += f"<|im_start|>user\n{message}\n{extra}\n<|im_end|>\n"
397
- prompt += f"<|im_start|>assistant\n"
398
- return prompt
399
-
400
- # =========================
401
- # FastAPI
402
- # =========================
403
- app = FastAPI(title="AI Code Analyst")
404
-
405
- app.add_middleware(
406
- CORSMiddleware,
407
- allow_origins=["*"],
408
- allow_credentials=True,
409
- allow_methods=["*"],
410
- allow_headers=["*"],
411
- )
412
-
413
- # Endpoint جديد لفحص التوكن وصلاحية الوصول
414
- @app.get("/hf-check")
415
- def hf_check():
416
- api = HfApi()
417
- out = {
418
- "token_set": bool(HF_TOKEN),
419
- "token_valid": False,
420
- "model_repo": MODEL_REPO,
421
- "model_access": False,
422
- "model_private": None,
423
- "gated_hint": False,
424
- "message": ""
425
- }
426
- if not HF_TOKEN:
427
- out["message"] = "HF_TOKEN غير مضبوط."
428
- return out
429
-
430
- try:
431
- me = api.whoami(token=HF_TOKEN)
432
- out["token_valid"] = True
433
- out["message"] = f"Token OK for user: {me.get('name')}"
434
- except Exception as e:
435
- out["message"] = f"Token check failed: {type(e).__name__}: {e}"
436
- return out
437
-
438
- try:
439
- info = api.model_info(MODEL_REPO, token=HF_TOKEN)
440
- out["model_access"] = True
441
- out["model_private"] = getattr(info, "private", None)
442
- out["message"] += f" | Model reachable: {info.modelId}"
443
- except Exception as e:
444
- msg = str(e).lower()
445
- out["message"] += f" | Model access failed: {type(e).__name__}: {e}"
446
- out["gated_hint"] = ("gated" in msg or "accept" in msg)
447
- return out
448
-
449
- class UploadFilesRequest(BaseModel):
450
- files: Dict[str, str] # fname: content
451
-
452
- class DiffFilesRequest(BaseModel):
453
- deleted: List[str]
454
- modified: Dict[str, str] # fname: new_content
455
-
456
- class AnalyzeAndReportRequest(BaseModel):
457
- session_id: str
458
- query: str
459
- top_k: int | None = None
460
-
461
- class ChatRequest(BaseModel):
462
- session_id: str
463
- message: str
464
-
465
- class ChatResponse(BaseModel):
466
- response: str
467
- updated_history: List[List[str]]
468
-
469
- @app.on_event("startup")
470
- def startup_event():
471
- load_embed_model()
472
- build_faiss_index()
473
- if not load_index():
474
- logger.info("ℹ️ فهرس غير موجود، سيتم بناؤه من الملفات الحالية")
475
- rebuild_index_from_files()
476
- load_local_model_if_configured()
477
- start_monitoring_thread()
478
-
479
- def rebuild_index_from_files():
480
- with index_lock:
481
- faiss_index.reset()
482
- all_chunks.clear()
483
- hash_map.clear()
484
- code_graph["files"].clear()
485
- for file_path in FILES_DIR.glob("**/*"):
486
- if file_path.is_file() and not file_path.name.startswith("."):
487
- add_file_to_index(file_path)
488
- save_index()
489
-
490
- @app.get("/metrics")
491
- def metrics():
492
- return get_current_metrics()
493
-
494
- @app.post("/upload-files")
495
- def upload_files(req: UploadFilesRequest):
496
- added = []
497
- for fname, content in req.files.items():
498
- file_path = FILES_DIR / fname
499
- file_path.parent.mkdir(parents=True, exist_ok=True)
500
- file_path.write_text(content, encoding="utf-8")
501
- add_file_to_index(file_path)
502
- added.append(fname)
503
- save_index()
504
- return {"status": "ok", "added": added}
505
-
506
- @app.post("/diff-files")
507
- def diff_files(req: DiffFilesRequest):
508
- for fname in req.deleted:
509
- (FILES_DIR / fname).unlink(missing_ok=True)
510
- remove_file_from_index(fname)
511
-
512
- for fname, new_content in req.modified.items():
513
- file_path = FILES_DIR / fname
514
- file_path.parent.mkdir(parents=True, exist_ok=True)
515
- file_path.write_text(new_content, encoding="utf-8")
516
- add_file_to_index(file_path)
517
-
518
- save_index()
519
- return {
520
- "status": "ok",
521
- "deleted": req.deleted,
522
- "modified": list(req.modified.keys()),
523
- "total_index_vectors": int(faiss_index.ntotal) if faiss_index else 0
524
- }
525
-
526
- @app.post("/analyze-and-report")
527
- def analyze_and_report(req: AnalyzeAndReportRequest):
528
- report = analyze_and_report_internal(req.session_id, req.query, k=req.top_k or 10)
529
- return {"status": "ok", "report_path": str(REPORT_FILE), "preview": report[:1200]}
530
-
531
- def classify_intent(history: List[List[str]], message: str) -> Dict[str, Any]:
532
- inst = (
533
- "أعد JSON فقط دون أي نص آخر.\n"
534
- "المفاتيح: intent (string), confidence (0-1), action (RETRIEVE_ONLY|ANALYZE_AND_REPORT|TRACE_SUBSET|NONE), "
535
- "targets (list of strings), reason (string).\n"
536
- "أمثلة:\n"
537
- "س: ما عمل الملف X؟ → {\"intent\":\"ASK_FILE_ROLE\",\"confidence\":0.9,\"action\":\"RETRIEVE_ONLY\",\"targets\":[\"X\"],\"reason\":\"...\"}\n"
538
- "س: لماذا لا تعمل ميزة الدخول؟ → {\"intent\":\"WHY_FEATURE_NOT_WORKING\",\"confidence\":0.85,\"action\":\"ANALYZE_AND_REPORT\",\"targets\":[],\"reason\":\"...\"}\n"
539
- "س: اين يُعرّف المتغير TOKEN وكيف يتغير؟ → {\"intent\":\"CODE_FLOW_TRACE\",\"confidence\":0.8,\"action\":\"TRACE_SUBSET\",\"targets\":[\"TOKEN\"],\"reason\":\"...\"}\n"
540
- )
541
- p = (
542
- f"<|im_start|>system\n{SYSTEM_PROMPT}\n<|im_end|>\n"
543
- f"<|im_start|>user\n{inst}\nالسؤال: {message}\nأعد JSON فقط.\n<|im_end|>\n"
544
- f"<|im_start|>assistant\n"
545
- )
546
- txt = call_llm(p, max_tokens=200)
547
- try:
548
- start = txt.find("{")
549
- end = txt.rfind("}")
550
- obj = json.loads(txt[start:end+1]) if start != -1 and end != -1 else {}
551
- except Exception:
552
- obj = {}
553
- if not isinstance(obj, dict):
554
- obj = {}
555
- obj.setdefault("intent", "UNKNOWN")
556
- obj.setdefault("confidence", 0.0)
557
- obj.setdefault("action", "NONE")
558
- obj.setdefault("targets", [])
559
- obj.setdefault("reason", "")
560
- return obj
561
-
562
- @app.post("/chat", response_model=ChatResponse)
563
- def chat(req: ChatRequest):
564
- history = get_history(req.session_id)
565
- decision = classify_intent(history, req.message)
566
- action = decision.get("action", "NONE")
567
- response_text = ""
568
-
569
- if action == "ANALYZE_AND_REPORT":
570
- try:
571
- report = analyze_and_report_internal(req.session_id, req.message, k=10)
572
- response_text = "تم إنشاء تقرير تحليلي:\n\n" + report
573
- except Exception as e:
574
- raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
575
-
576
- elif action == "RETRIEVE_ONLY":
577
- retrieved_docs = retrieve(req.message, k=6)
578
- ctx = []
579
- for fname, chunk, score in retrieved_docs:
580
- ctx.append(f"From {fname} (score={score:.4f}):\n{chunk}")
581
- extra = "\n\n[Context]\n" + "\n\n".join(ctx) + "\n\n" + render_graph_overview(60)
582
- prompt = build_chat_prompt(history, req.message, extra)
583
- try:
584
- response_text = call_llm(prompt, max_tokens=700)
585
- except Exception as e:
586
- raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
587
-
588
- elif action == "TRACE_SUBSET":
589
- targets = decision.get("targets", [])
590
- key = " ".join(targets) if targets else req.message
591
- retrieved_docs = retrieve(key, k=10)
592
- ctx = []
593
- for fname, chunk, score in retrieved_docs:
594
- ctx.append(f"From {fname} (score={score:.4f}):\n{chunk}")
595
- flow_query = req.message + "\nPlease trace variables/functions: " + ", ".join(targets)
596
- prompt = build_analysis_prompt(flow_query, retrieved_docs)
597
- try:
598
- trace_report = call_llm(prompt, max_tokens=1200)
599
- REPORT_FILE.write_text(trace_report, encoding="utf-8")
600
- response_text = "تقرير التتبع:\n\n" + trace_report
601
- except Exception as e:
602
- raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
603
-
604
- else:
605
- prompt = build_chat_prompt(history, req.message, "")
606
- try:
607
- response_text = call_llm(prompt, max_tokens=600)
608
- except Exception as e:
609
- raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
610
-
611
- updated = (history + [[req.message, response_text]])[-8:]
612
- save_history(req.session_id, updated)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
  return ChatResponse(response=response_text, updated_history=updated)
 
1
+ # app.py
2
+ import os
3
+ import json
4
+ import hashlib
5
+ import logging
6
+ import threading
7
+ from pathlib import Path
8
+ from typing import List, Dict, Any, Tuple
9
+
10
+ import numpy as np
11
+ import faiss
12
+ import pickle
13
+ import ast as python_ast
14
+
15
+ from fastapi import FastAPI, HTTPException
16
+ from fastapi.middleware.cors import CORSMiddleware
17
+ from pydantic import BaseModel
18
+
19
+ from sentence_transformers import SentenceTransformer
20
+ from huggingface_hub import hf_hub_download, HfApi
21
+
22
+ from monitor import get_current_metrics, start_monitoring_thread
23
+ from memory import get_history, save_history
24
+
25
+ # =========================
26
+ # إعداد السجلّات
27
+ # =========================
28
+ logging.basicConfig(
29
+ level=logging.INFO,
30
+ format="🪵 [%(asctime)s] [%(levelname)s] %(message)s"
31
+ )
32
+ logger = logging.getLogger("app")
33
+
34
+ # =========================
35
+ # ثوابت ومسارات
36
+ # =========================
37
+ DATA_DIR = Path("data")
38
+ CACHE_DIR = DATA_DIR / "cache"
39
+ INDEX_DIR = DATA_DIR / "index"
40
+ FILES_DIR = DATA_DIR / "files" # تخزين النص الكامل لكل ملف
41
+ REPORT_FILE = DATA_DIR / "analysis_report.md"
42
+ GRAPH_FILE = DATA_DIR / "code_graph.json"
43
+ EMB_FILE = INDEX_DIR / "embeddings.faiss"
44
+ META_FILE = INDEX_DIR / "chunks.pkl"
45
+ HASH_MAP_FILE = INDEX_DIR / "hash_map.json"
46
+
47
+ for p in [DATA_DIR, CACHE_DIR, INDEX_DIR, FILES_DIR]:
48
+ p.mkdir(parents=True, exist_ok=True)
49
+
50
+ # Env
51
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
52
+ MODEL_REPO = os.getenv("MODEL_REPO", "Qwen/Qwen3-4B-Thinking-2507")
53
+ # بدائل تلقائية عند فشل السيرفرلس (404/403)
54
+ FALLBACK_MODELS = [
55
+ m.strip() for m in os.getenv(
56
+ "FALLBACK_MODELS",
57
+ "Qwen/Qwen2.5-7B-Instruct,Qwen/Qwen2.5-Coder-7B-Instruct"
58
+ ).split(",") if m.strip()
59
+ ]
60
+ # GGUF المحلي (إن توفر)
61
+ LOCAL_GGUF_REPO = os.getenv("LOCAL_GGUF_REPO", "Triangle104/Qwen3-8B-Q4_K_M-GGUF")
62
+ LOCAL_GGUF_FILE = os.getenv("LOCAL_GGUF_FILE", "qwen3-8b-q4_k_m.gguf")
63
+ LOCAL_GGUF_PATH = CACHE_DIR / LOCAL_GGUF_FILE
64
+
65
+ # تضمين
66
+ EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
67
+ EMBED_DIM = int(os.getenv("EMBED_DIM", "384"))
68
+
69
+ # تقسيم الشيفرة
70
+ CHUNK_STEP = int(os.getenv("CHUNK_STEP", "40")) # ✅ قابل للتهيئة
71
+ MAX_FILE_BYTES = int(os.getenv("MAX_FILE_BYTES", str(10 * 1024 * 1024))) # 10MB احتياطيًا
72
+
73
+ SYSTEM_PROMPT = """<|im_start|>system
74
+ You are a senior AI code analyst. Analyze projects with hybrid indexing (code graph + retrieval).
75
+ Return structured, accurate, concise answers. Use Arabic + English labels in the final report.
76
+ <|im_end|>"""
77
+
78
+ # =========================
79
+ # الحالة العالمية والقفل
80
+ # =========================
81
+ embed_model: SentenceTransformer | None = None
82
+ faiss_index: faiss.Index | None = None
83
+ all_chunks: List[Tuple[str, str]] = [] # (file_name, chunk_text)
84
+ code_graph: Dict[str, Any] = {"files": {}}
85
+ hash_map: Dict[str, str] = {}
86
+
87
+ index_lock = threading.RLock() # ✅ لتأمين الفهرسة/الاسترجاع
88
+
89
+ # =========================
90
+ # LLM (محلي/سحابي)
91
+ # =========================
92
+ try:
93
+ from llama_cpp import Llama
94
+ except Exception:
95
+ Llama = None
96
+
97
+ llm = None # كائن النموذج المحلي إن توفر
98
+
99
+ logger.info(f"HF_TOKEN length: {len(HF_TOKEN)}") # تحقق من طول الtoken
100
+
101
+ def load_local_model_if_configured():
102
+ """تحميل GGUF محليًا إن كان مفعّلًا."""
103
+ global llm
104
+ if Llama is None:
105
+ logger.warning("ℹ️ llama_cpp غير متوفر. سيتم الاعتماد على HF Inference عند الحاجة.")
106
+ return
107
+ if not LOCAL_GGUF_PATH.exists():
108
+ try:
109
+ logger.info(f"⬇️ تنزيل GGUF: {LOCAL_GGUF_REPO}/{LOCAL_GGUF_FILE}")
110
+ hf_hub_download(
111
+ repo_id=LOCAL_GGUF_REPO,
112
+ filename=LOCAL_GGUF_FILE,
113
+ local_dir=str(CACHE_DIR),
114
+ token=HF_TOKEN or None
115
+ )
116
+ logger.info("✅ تم تنزيل GGUF بنجاح.")
117
+ except Exception as e:
118
+ logger.error(f"❌ تعذر تنزيل GGUF: {str(e)}. السبب المحتمل: token غير صالح أو repo غير موجود.")
119
+ return
120
+ try:
121
+ llm = Llama(
122
+ model_path=str(LOCAL_GGUF_PATH),
123
+ n_ctx=int(os.getenv("N_CTX", "32768")),
124
+ rope_scaling={"type": "yarn", "factor": 4.0},
125
+ n_threads=int(os.getenv("N_THREADS", "2")),
126
+ n_gpu_layers=int(os.getenv("N_GPU_LAYERS", "0")),
127
+ n_batch=int(os.getenv("N_BATCH", "64")),
128
+ use_mlock=False,
129
+ verbose=False
130
+ )
131
+ logger.info("✅ تم تحميل النموذج المحلي (GGUF).")
132
+ except Exception as e:
133
+ llm = None
134
+ logger.error(f"❌ فشل تحميل النموذج المحلي: {str(e)}. السبب المحتمل: مشكلة في الملف أو التوافق.")
135
+
136
+ def call_local_llm(prompt: str, max_tokens: int = 800) -> str:
137
+ if llm is None or Llama is None:
138
+ logger.warning("❌ النموذج المحلي غير متوفر.")
139
+ return ""
140
+ try:
141
+ res = llm(
142
+ prompt,
143
+ max_tokens=max_tokens,
144
+ temperature=0.4,
145
+ top_p=0.9,
146
+ stop=["<|im_end|>", "<|im_start|>"],
147
+ echo=False
148
+ )
149
+ logger.info("✅ رد ناجح من النموذج المحلي.")
150
+ return res["choices"][0]["text"].strip()
151
+ except Exception as e:
152
+ logger.error(f"❌ فشل استدعاء النموذج المحلي: {str(e)}. السبب المحتمل: مشكلة في التنفيذ أو الذاكرة.")
153
+ return ""
154
+
155
+ def _call_hf_single_model(model_repo: str, prompt: str, max_new_tokens: int = 900) -> str:
156
+ import requests
157
+ if not HF_TOKEN:
158
+ logger.error("❌ HF_TOKEN غير معرف.")
159
+ raise RuntimeError("التوكن HF_TOKEN غير مضبوط ولا يوجد نموذج محلي.")
160
+ url = f"https://api-inference.huggingface.co/models/{model_repo}"
161
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
162
+ payload = {
163
+ "inputs": prompt,
164
+ "parameters": {
165
+ "max_new_tokens": max_new_tokens,
166
+ "temperature": 0.4,
167
+ "top_p": 0.9,
168
+ "return_full_text": False
169
+ }
170
+ }
171
+ r = requests.post(url, headers=headers, json=payload, timeout=120)
172
+ if r.status_code == 503:
173
+ data = {}
174
+ try: data = r.json()
175
+ except Exception: pass
176
+ eta = data.get("estimated_time")
177
+ raise RuntimeError("النموذج قيد التحميل من HF (503)." + (f" متوقع {eta:.0f}ث" if isinstance(eta, (int, float)) else ""))
178
+ try:
179
+ r.raise_for_status()
180
+ except requests.exceptions.HTTPError as e:
181
+ status = e.response.status_code
182
+ if status == 401: raise RuntimeError("التوكن مفقود أو غير صالح (401). تأكد من HF_TOKEN.")
183
+ if status == 403:
184
+ msg = ""
185
+ try: msg = (e.response.json().get("error") or "").lower()
186
+ except Exception: pass
187
+ if "gated" in msg or "accept" in msg:
188
+ raise RuntimeError("النموذج مسيَّج (403). يجب دخول صفحة النموذج والضغط على Accept.")
189
+ raise RuntimeError("صلاحية الوصول مرفوضة (403).")
190
+ if status == 404: raise RuntimeError("النموذج غير موجود أو غير متاح عبر السيرفرلس (404).")
191
+ if status == 429: raise RuntimeError("تم تجاوز الحد المسموح للطلبات (429). جرّب لاحقًا.")
192
+ try:
193
+ err = e.response.json()
194
+ except Exception:
195
+ err = {"error": e.response.text}
196
+ raise RuntimeError(f"خطأ HF ({status}): {err.get('error') or err}")
197
+ data = r.json()
198
+ if isinstance(data, list) and data and "generated_text" in data[0]:
199
+ return data[0]["generated_text"]
200
+ if isinstance(data, dict) and "generated_text" in data:
201
+ return data["generated_text"]
202
+ if isinstance(data, dict) and "error" in data:
203
+ raise RuntimeError(f"HF error: {data['error']}")
204
+ return json.dumps(data)
205
+
206
+ def call_hf_inference(prompt: str, max_new_tokens: int = 900) -> str:
207
+ candidates = [MODEL_REPO] + [m for m in FALLBACK_MODELS if m != MODEL_REPO]
208
+ errors = []
209
+ for i, model in enumerate(candidates, start=1):
210
+ try:
211
+ if i == 1:
212
+ logger.info(f"🎯 استخدام الموديل الأساسي: {model}")
213
+ else:
214
+ logger.warning(f"↪️ تفعيل Fallback ({i-1}/{len(candidates)-1}): {model}")
215
+ out = _call_hf_single_model(model, prompt, max_new_tokens)
216
+ if model != MODEL_REPO:
217
+ logger.info(f"✅ تم الرد من الموديل الاحتياطي: {model}")
218
+ return out
219
+ except RuntimeError as e:
220
+ msg = str(e)
221
+ errors.append(f"{model}: {msg}")
222
+ if "404" in msg or "403" in msg:
223
+ continue
224
+ raise
225
+ raise RuntimeError("تعذّر استخدام الموديل الأساسي وكل البدائل (403/404). التفاصيل:\n- " + "\n- ".join(errors))
226
+
227
+ def call_llm(prompt: str, max_tokens: int = 900) -> str:
228
+ if llm:
229
+ return call_local_llm(prompt, max_tokens)
230
+ else:
231
+ return call_hf_inference(prompt, max_tokens)
232
+
233
+ # =========================
234
+ # تهيئة التضمين والفهرس
235
+ # =========================
236
+ def load_embed_model():
237
+ global embed_model
238
+ if embed_model is None:
239
+ logger.info(f"⬇️ تحميل نموذج التضمين: {EMBED_MODEL_NAME}")
240
+ embed_model = SentenceTransformer(EMBED_MODEL_NAME, device="cpu")
241
+ logger.info("✅ نموذج التضمين جاهز.")
242
+
243
+ def build_faiss_index():
244
+ global faiss_index
245
+ if faiss_index is None:
246
+ faiss_index = faiss.IndexFlatL2(EMBED_DIM)
247
+ logger.info("✅ فهرس FAISS جاهز.")
248
+
249
+ def load_index():
250
+ with index_lock:
251
+ if EMB_FILE.exists() and META_FILE.exists():
252
+ faiss_index = faiss.read_index(str(EMB_FILE))
253
+ with META_FILE.open("rb") as f:
254
+ all_chunks = pickle.load(f)
255
+ if HASH_MAP_FILE.exists():
256
+ with HASH_MAP_FILE.open("r", encoding="utf-8") as f:
257
+ hash_map = json.load(f)
258
+ logger.info(f"✅ تم تحميل الفهرس: {len(all_chunks)} أجزاء")
259
+ return True
260
+ return False
261
+
262
+ def save_index():
263
+ with index_lock:
264
+ if faiss_index:
265
+ faiss.write_index(faiss_index, str(EMB_FILE))
266
+ with META_FILE.open("wb") as f:
267
+ pickle.dump(all_chunks, f)
268
+ with HASH_MAP_FILE.open("w", encoding="utf-8") as f:
269
+ json.dump(hash_map, f, ensure_ascii=False, indent=2)
270
+ logger.info("✅ تم حفظ الفهرس")
271
+
272
+ # =========================
273
+ # معالجة الملفات والأجزاء
274
+ # =========================
275
+ def chunk_code(text: str, step: int = CHUNK_STEP) -> List[str]:
276
+ lines = text.splitlines(keepends=True)
277
+ chunks = []
278
+ for i in range(0, len(lines), step // 2):
279
+ chunk = "".join(lines[i:i + step])
280
+ if chunk.strip():
281
+ chunks.append(chunk)
282
+ return chunks
283
+
284
+ def compute_hash(text: str) -> str:
285
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()[:16]
286
+
287
+ def add_file_to_index(file_path: Path):
288
+ with index_lock:
289
+ try:
290
+ text = file_path.read_text(encoding="utf-8", errors="ignore")
291
+ if len(text.encode("utf-8")) > MAX_FILE_BYTES:
292
+ logger.warning(f"⚠️ ملف كبير جدًا: {file_path.name}، سيتم تجاهله")
293
+ return
294
+ current_hash = compute_hash(text)
295
+ if file_path.name in hash_map and hash_map[file_path.name] == current_hash:
296
+ return # لا تغيير
297
+
298
+ chunks = chunk_code(text)
299
+ embeddings = embed_model.encode(chunks, normalize_embeddings=True)
300
+ for i, emb in enumerate(embeddings):
301
+ faiss_index.add(np.array([emb], dtype=np.float32))
302
+ all_chunks.append((file_path.name, chunks[i]))
303
+
304
+ hash_map[file_path.name] = current_hash
305
+ update_code_graph(file_path.name, text)
306
+ logger.info(f"✅ أضيف ملف: {file_path.name} ({len(chunks)} أجزاء)")
307
+ except Exception as e:
308
+ logger.warning(f"⚠️ فشل إضافة {file_path.name}: {e}")
309
+
310
+ def remove_file_from_index(file_name: str):
311
+ with index_lock:
312
+ to_remove = [i for i, (fn, _) in enumerate(all_chunks) if fn == file_name]
313
+ if to_remove:
314
+ mask = np.ones(len(all_chunks), dtype=bool)
315
+ mask[to_remove] = False
316
+ remaining_embs = np.array([embed_model.encode(all_chunks[i][1]) for i in range(len(all_chunks)) if mask[i]])
317
+ faiss_index.reset()
318
+ faiss_index.add(remaining_embs)
319
+ all_chunks[:] = [all_chunks[i] for i in range(len(all_chunks)) if mask[i]]
320
+ code_graph["files"].pop(file_name, None)
321
+ hash_map.pop(file_name, None)
322
+ logger.info(f"🗑️ حذف ملف: {file_name}")
323
+
324
+ # =========================
325
+ # رسم الرسم البياني للشيفرة
326
+ # =========================
327
+ def update_code_graph(file_name: str, text: str):
328
+ try:
329
+ tree = python_ast.parse(text)
330
+ imports = [n.names[0].name for n in python_ast.iter_child_nodes(tree) if isinstance(n, python_ast.Import)]
331
+ functions = [n.name for n in python_ast.iter_child_nodes(tree) if isinstance(n, python_ast.FunctionDef)]
332
+ classes = [n.name for n in python_ast.iter_child_nodes(tree) if isinstance(n, python_ast.ClassDef)]
333
+ code_graph["files"][file_name] = {
334
+ "imports": imports,
335
+ "functions": functions,
336
+ "classes": classes
337
+ }
338
+ with GRAPH_FILE.open("w", encoding="utf-8") as f:
339
+ json.dump(code_graph, f, ensure_ascii=False, indent=2)
340
+ except Exception as e:
341
+ logger.warning(f"⚠️ فشل تحليل {file_name}: {e}")
342
+
343
+ def render_graph_overview(max_nodes: int = 50) -> str:
344
+ overview = "رسم بياني للشيفرة:\n"
345
+ for fname, data in list(code_graph["files"].items())[:max_nodes]:
346
+ overview += f"- {fname}: {len(data['functions'])} دوال, {len(data['classes'])} فئات, imports: {', '.join(data['imports'][:3])}\n"
347
+ return overview
348
+
349
+ # =========================
350
+ # استرجاع وتحليل
351
+ # =========================
352
+ def retrieve(query: str, k: int = 5) -> List[Tuple[str, str, float]]:
353
+ load_embed_model()
354
+ build_faiss_index()
355
+ if not load_index():
356
+ rebuild_index_from_files()
357
+ q_emb = embed_model.encode([query], normalize_embeddings=True)
358
+ distances, indices = faiss_index.search(q_emb, k)
359
+ results = []
360
+ for i, idx in enumerate(indices[0]):
361
+ if idx != -1:
362
+ fname, chunk = all_chunks[idx]
363
+ results.append((fname, chunk, distances[0][i]))
364
+ return results
365
+
366
+ def build_analysis_prompt(query: str, retrieved_docs: List[Tuple[str, str, float]]) -> str:
367
+ ctx = []
368
+ for fname, chunk, score in retrieved_docs:
369
+ ctx.append(f"From {fname} (score={score:.4f}):\n{chunk}")
370
+ extra = "\n\n[Context]\n" + "\n\n".join(ctx) + "\n\n" + render_graph_overview()
371
+ return (
372
+ f"<|im_start|>system\n{SYSTEM_PROMPT}\n<|im_end|>\n"
373
+ f"<|im_start|>user\n{query}\n{extra}\n<|im_end|>\n"
374
+ f"<|im_start|>assistant\n"
375
+ )
376
+
377
+ def analyze_and_report_internal(session_id: str, query: str, k: int = 10) -> str:
378
+ retrieved_docs = retrieve(query, k=k)
379
+ prompt = build_analysis_prompt(query, retrieved_docs)
380
+ try:
381
+ report = call_llm(prompt, max_tokens=1500)
382
+ REPORT_FILE.write_text(report, encoding="utf-8")
383
+ return report
384
+ except Exception as e:
385
+ logger.error(f"❌ LLM error in analysis: {e}")
386
+ raise
387
+
388
+ # =========================
389
+ # بناء الـ Prompt للدردشة
390
+ # =========================
391
+ def build_chat_prompt(history: List[List[str]], message: str, extra: str = "") -> str:
392
+ prompt = f"<|im_start|>system\n{SYSTEM_PROMPT}\n<|im_end|>\n"
393
+ for user_msg, ai_msg in history:
394
+ prompt += f"<|im_start|>user\n{user_msg}\n<|im_end|>\n"
395
+ prompt += f"<|im_start|>assistant\n{ai_msg}\n<|im_end|>\n"
396
+ prompt += f"<|im_start|>user\n{message}\n{extra}\n<|im_end|>\n"
397
+ prompt += f"<|im_start|>assistant\n"
398
+ return prompt
399
+
400
+ # =========================
401
+ # FastAPI
402
+ # =========================
403
+ app = FastAPI(title="AI Code Analyst")
404
+
405
+ # --- Root endpoint for Hugging Face health checks and simple UI ---
406
+ from fastapi.responses import PlainTextResponse, HTMLResponse, JSONResponse
407
+
408
+ @app.get("/", response_class=HTMLResponse)
409
+ def root(logs: str | None = None):
410
+ """
411
+ Minimal root endpoint so HF / healthcheck returns 200 OK.
412
+ Use `/?logs=container` to tail last lines from data/app.log.
413
+ """
414
+ if logs == "container":
415
+ log_file = Path(DATA_DIR) / "app.log"
416
+ if log_file.exists():
417
+ tail = "".join(log_file.read_text(encoding="utf-8", errors="ignore").splitlines(True)[-200:])
418
+ return PlainTextResponse(tail)
419
+ return PlainTextResponse("No logs yet.", status_code=200)
420
+
421
+ # Small HTML with links
422
+ html = """
423
+ <html>
424
+ <head><meta charset="utf-8"><title>AI Code Analyst</title></head>
425
+ <body style="font-family: ui-sans-serif, system-ui; padding:20px">
426
+ <h1>✅ AI Code Analyst is running</h1>
427
+ <p>Try <a href="/docs">/docs</a>, <a href="/hf-check">/hf-check</a>, or <a href="/metrics">/metrics</a>.</p>
428
+ <p>Logs: <a href="/?logs=container">tail</a></p>
429
+ </body>
430
+ </html>
431
+ """
432
+ return HTMLResponse(html)
433
+
434
+ app.add_middleware(
435
+ CORSMiddleware,
436
+ allow_origins=["*"],
437
+ allow_credentials=True,
438
+ allow_methods=["*"],
439
+ allow_headers=["*"],
440
+ )
441
+
442
+ # Endpoint جديد لفحص التوكن وصلاحية الوصول
443
+ @app.get("/hf-check")
444
+ def hf_check():
445
+ api = HfApi()
446
+ out = {
447
+ "token_set": bool(HF_TOKEN),
448
+ "token_valid": False,
449
+ "model_repo": MODEL_REPO,
450
+ "model_access": False,
451
+ "model_private": None,
452
+ "gated_hint": False,
453
+ "message": ""
454
+ }
455
+ if not HF_TOKEN:
456
+ out["message"] = "HF_TOKEN غير مضبوط."
457
+ return out
458
+
459
+ try:
460
+ me = api.whoami(token=HF_TOKEN)
461
+ out["token_valid"] = True
462
+ out["message"] = f"Token OK for user: {me.get('name')}"
463
+ except Exception as e:
464
+ out["message"] = f"Token check failed: {type(e).__name__}: {e}"
465
+ return out
466
+
467
+ try:
468
+ info = api.model_info(MODEL_REPO, token=HF_TOKEN)
469
+ out["model_access"] = True
470
+ out["model_private"] = getattr(info, "private", None)
471
+ out["message"] += f" | Model reachable: {info.modelId}"
472
+ except Exception as e:
473
+ msg = str(e).lower()
474
+ out["message"] += f" | Model access failed: {type(e).__name__}: {e}"
475
+ out["gated_hint"] = ("gated" in msg or "accept" in msg)
476
+ return out
477
+
478
+ class UploadFilesRequest(BaseModel):
479
+ files: Dict[str, str] # fname: content
480
+
481
+ class DiffFilesRequest(BaseModel):
482
+ deleted: List[str]
483
+ modified: Dict[str, str] # fname: new_content
484
+
485
+ class AnalyzeAndReportRequest(BaseModel):
486
+ session_id: str
487
+ query: str
488
+ top_k: int | None = None
489
+
490
+ class ChatRequest(BaseModel):
491
+ session_id: str
492
+ message: str
493
+
494
+ class ChatResponse(BaseModel):
495
+ response: str
496
+ updated_history: List[List[str]]
497
+
498
+ @app.on_event("startup")
499
+ def startup_event():
500
+ load_embed_model()
501
+ build_faiss_index()
502
+ if not load_index():
503
+ logger.info("ℹ️ فهرس غير موجود، سيتم بناؤه من الملفات الحالية")
504
+ rebuild_index_from_files()
505
+ load_local_model_if_configured()
506
+ start_monitoring_thread()
507
+
508
+ def rebuild_index_from_files():
509
+ with index_lock:
510
+ faiss_index.reset()
511
+ all_chunks.clear()
512
+ hash_map.clear()
513
+ code_graph["files"].clear()
514
+ for file_path in FILES_DIR.glob("**/*"):
515
+ if file_path.is_file() and not file_path.name.startswith("."):
516
+ add_file_to_index(file_path)
517
+ save_index()
518
+
519
+ @app.get("/metrics")
520
+ def metrics():
521
+ return get_current_metrics()
522
+
523
+ @app.post("/upload-files")
524
+ def upload_files(req: UploadFilesRequest):
525
+ added = []
526
+ for fname, content in req.files.items():
527
+ file_path = FILES_DIR / fname
528
+ file_path.parent.mkdir(parents=True, exist_ok=True)
529
+ file_path.write_text(content, encoding="utf-8")
530
+ add_file_to_index(file_path)
531
+ added.append(fname)
532
+ save_index()
533
+ return {"status": "ok", "added": added}
534
+
535
+ @app.post("/diff-files")
536
+ def diff_files(req: DiffFilesRequest):
537
+ for fname in req.deleted:
538
+ (FILES_DIR / fname).unlink(missing_ok=True)
539
+ remove_file_from_index(fname)
540
+
541
+ for fname, new_content in req.modified.items():
542
+ file_path = FILES_DIR / fname
543
+ file_path.parent.mkdir(parents=True, exist_ok=True)
544
+ file_path.write_text(new_content, encoding="utf-8")
545
+ add_file_to_index(file_path)
546
+
547
+ save_index()
548
+ return {
549
+ "status": "ok",
550
+ "deleted": req.deleted,
551
+ "modified": list(req.modified.keys()),
552
+ "total_index_vectors": int(faiss_index.ntotal) if faiss_index else 0
553
+ }
554
+
555
+ @app.post("/analyze-and-report")
556
+ def analyze_and_report(req: AnalyzeAndReportRequest):
557
+ report = analyze_and_report_internal(req.session_id, req.query, k=req.top_k or 10)
558
+ return {"status": "ok", "report_path": str(REPORT_FILE), "preview": report[:1200]}
559
+
560
+ def classify_intent(history: List[List[str]], message: str) -> Dict[str, Any]:
561
+ inst = (
562
+ "أعد JSON فقط دون أي نص آخر.\n"
563
+ "المفاتيح: intent (string), confidence (0-1), action (RETRIEVE_ONLY|ANALYZE_AND_REPORT|TRACE_SUBSET|NONE), "
564
+ "targets (list of strings), reason (string).\n"
565
+ "أمثلة:\n"
566
+ "س: ما عمل الملف X؟ → {\"intent\":\"ASK_FILE_ROLE\",\"confidence\":0.9,\"action\":\"RETRIEVE_ONLY\",\"targets\":[\"X\"],\"reason\":\"...\"}\n"
567
+ "س: لماذا لا تعمل ميزة الدخول؟ → {\"intent\":\"WHY_FEATURE_NOT_WORKING\",\"confidence\":0.85,\"action\":\"ANALYZE_AND_REPORT\",\"targets\":[],\"reason\":\"...\"}\n"
568
+ "س: اين يُعرّف المتغير TOKEN وكيف يتغير؟ → {\"intent\":\"CODE_FLOW_TRACE\",\"confidence\":0.8,\"action\":\"TRACE_SUBSET\",\"targets\":[\"TOKEN\"],\"reason\":\"...\"}\n"
569
+ )
570
+ p = (
571
+ f"<|im_start|>system\n{SYSTEM_PROMPT}\n<|im_end|>\n"
572
+ f"<|im_start|>user\n{inst}\nالسؤال: {message}\nأعد JSON فقط.\n<|im_end|>\n"
573
+ f"<|im_start|>assistant\n"
574
+ )
575
+ txt = call_llm(p, max_tokens=200)
576
+ try:
577
+ start = txt.find("{")
578
+ end = txt.rfind("}")
579
+ obj = json.loads(txt[start:end+1]) if start != -1 and end != -1 else {}
580
+ except Exception:
581
+ obj = {}
582
+ if not isinstance(obj, dict):
583
+ obj = {}
584
+ obj.setdefault("intent", "UNKNOWN")
585
+ obj.setdefault("confidence", 0.0)
586
+ obj.setdefault("action", "NONE")
587
+ obj.setdefault("targets", [])
588
+ obj.setdefault("reason", "")
589
+ return obj
590
+
591
+ @app.post("/chat", response_model=ChatResponse)
592
+ def chat(req: ChatRequest):
593
+ history = get_history(req.session_id)
594
+ decision = classify_intent(history, req.message)
595
+ action = decision.get("action", "NONE")
596
+ response_text = ""
597
+
598
+ if action == "ANALYZE_AND_REPORT":
599
+ try:
600
+ report = analyze_and_report_internal(req.session_id, req.message, k=10)
601
+ response_text = "تم إنشاء تقرير تحليلي:\n\n" + report
602
+ except Exception as e:
603
+ raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
604
+
605
+ elif action == "RETRIEVE_ONLY":
606
+ retrieved_docs = retrieve(req.message, k=6)
607
+ ctx = []
608
+ for fname, chunk, score in retrieved_docs:
609
+ ctx.append(f"From {fname} (score={score:.4f}):\n{chunk}")
610
+ extra = "\n\n[Context]\n" + "\n\n".join(ctx) + "\n\n" + render_graph_overview(60)
611
+ prompt = build_chat_prompt(history, req.message, extra)
612
+ try:
613
+ response_text = call_llm(prompt, max_tokens=700)
614
+ except Exception as e:
615
+ raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
616
+
617
+ elif action == "TRACE_SUBSET":
618
+ targets = decision.get("targets", [])
619
+ key = " ".join(targets) if targets else req.message
620
+ retrieved_docs = retrieve(key, k=10)
621
+ ctx = []
622
+ for fname, chunk, score in retrieved_docs:
623
+ ctx.append(f"From {fname} (score={score:.4f}):\n{chunk}")
624
+ flow_query = req.message + "\nPlease trace variables/functions: " + ", ".join(targets)
625
+ prompt = build_analysis_prompt(flow_query, retrieved_docs)
626
+ try:
627
+ trace_report = call_llm(prompt, max_tokens=1200)
628
+ REPORT_FILE.write_text(trace_report, encoding="utf-8")
629
+ response_text = "تقرير التتبع:\n\n" + trace_report
630
+ except Exception as e:
631
+ raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
632
+
633
+ else:
634
+ prompt = build_chat_prompt(history, req.message, "")
635
+ try:
636
+ response_text = call_llm(prompt, max_tokens=600)
637
+ except Exception as e:
638
+ raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
639
+
640
+ updated = (history + [[req.message, response_text]])[-8:]
641
+ save_history(req.session_id, updated)
642
  return ChatResponse(response=response_text, updated_history=updated)