Spaces:
Sleeping
Sleeping
soupstick
commited on
Commit
·
ccb470a
1
Parent(s):
112f78a
refactor: scaffold modular split (agent, tools, modules/*, llm_provider, mcp, validation, security)
Browse files- agent.py +1 -0
- app.py +1 -670
- app_monolith_backup.py +670 -0
- llm_provider.py +1 -0
- mcp.py +1 -0
- modules/__init__.py +0 -0
- modules/credit.py +1 -0
- modules/kyc.py +1 -0
- modules/sanctions.py +1 -0
- modules/transactions.py +1 -0
- threat_intel.py +1 -0
- tools.py +1 -0
- ttp_guard.py +1 -0
- validation.py +1 -0
agent.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"""TODO: implement agent.py (split from app_monolith_backup.py)"""
|
app.py
CHANGED
@@ -1,670 +1 @@
|
|
1 |
-
"""
|
2 |
-
Fraud Detector Analyst — LangChain + (optional) MCP
|
3 |
-
Advanced “prototype-first” build:
|
4 |
-
- Chat uses chat-completion models (LangChain ChatHuggingFace).
|
5 |
-
- AI Summary shows a notice when no inference is connected.
|
6 |
-
|
7 |
-
LLM env (serverless friendly):
|
8 |
-
HF_TOKEN (or HF_SPACES)
|
9 |
-
LC_CHAT_MODEL (default: "Qwen/Qwen2.5-0.5B-Instruct")
|
10 |
-
LC_CHAT_MODEL_FALLBACK (default: "mistralai/Mistral-7B-Instruct")
|
11 |
-
|
12 |
-
Summary behavior:
|
13 |
-
If no working inference/token -> summary fields display:
|
14 |
-
"🔌 Please connect to an inference point to generate summary."
|
15 |
-
|
16 |
-
Optional MCP:
|
17 |
-
ENABLE_MCP=1
|
18 |
-
MCP_SANCTIONS_URL, MCP_HIGH_RISK_MCC_URL
|
19 |
-
MCP_AUTH_HEADER="Authorization: Bearer <token>"
|
20 |
-
|
21 |
-
Run:
|
22 |
-
pip install -r requirements.txt
|
23 |
-
python app.py
|
24 |
-
On Spaces:
|
25 |
-
Add secret HF_TOKEN (or HF_SPACES). Launch.
|
26 |
-
"""
|
27 |
-
|
28 |
-
from __future__ import annotations
|
29 |
-
|
30 |
-
import os, io, re, json, math, unicodedata, logging
|
31 |
-
from typing import Optional, Tuple, List, Dict
|
32 |
-
|
33 |
-
import numpy as np
|
34 |
-
import pandas as pd
|
35 |
-
import gradio as gr
|
36 |
-
from dotenv import load_dotenv
|
37 |
-
|
38 |
-
# LangChain
|
39 |
-
from langchain.tools import tool
|
40 |
-
from langchain_core.tools import Tool
|
41 |
-
from langchain.agents import initialize_agent, AgentType
|
42 |
-
from langchain.schema import HumanMessage, SystemMessage
|
43 |
-
|
44 |
-
from pydantic import BaseModel, Field
|
45 |
-
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
|
46 |
-
|
47 |
-
# Phone normalization
|
48 |
-
try:
|
49 |
-
import phonenumbers
|
50 |
-
HAVE_PHONENUM = True
|
51 |
-
except Exception:
|
52 |
-
HAVE_PHONENUM = False
|
53 |
-
|
54 |
-
# ------------------------
|
55 |
-
# Setup
|
56 |
-
# ------------------------
|
57 |
-
load_dotenv()
|
58 |
-
logging.basicConfig(level=logging.INFO)
|
59 |
-
log = logging.getLogger("fraud-analyst")
|
60 |
-
|
61 |
-
HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HF_SPACES")
|
62 |
-
|
63 |
-
# Chat models (chat-completions)
|
64 |
-
DEFAULT_CHAT_MODEL = os.getenv("LC_CHAT_MODEL", "Qwen/Qwen2.5-0.5B-Instruct")
|
65 |
-
FALLBACK_CHAT_MODEL = os.getenv("LC_CHAT_MODEL_FALLBACK", "mistralai/Mistral-7B-Instruct")
|
66 |
-
|
67 |
-
SUMMARY_NOTICE = "🔌 Please connect to an inference point to generate summary."
|
68 |
-
CHAT_NOTICE = "🔌 Chat model not configured. Set HF_TOKEN and LC_CHAT_MODEL to enable chat."
|
69 |
-
|
70 |
-
# ------------------------
|
71 |
-
# LLM builders
|
72 |
-
# ------------------------
|
73 |
-
def _mk_chat_llm(model_id: str) -> ChatHuggingFace:
|
74 |
-
"""
|
75 |
-
ChatHuggingFace uses HF Inference under the hood.
|
76 |
-
Although the backend task is 'text-generation', this wrapper handles chat-style messages.
|
77 |
-
"""
|
78 |
-
base = HuggingFaceEndpoint(
|
79 |
-
repo_id=model_id,
|
80 |
-
task="text-generation",
|
81 |
-
huggingfacehub_api_token=HF_TOKEN,
|
82 |
-
max_new_tokens=256,
|
83 |
-
temperature=0.2,
|
84 |
-
repetition_penalty=1.05,
|
85 |
-
timeout=60,
|
86 |
-
)
|
87 |
-
return ChatHuggingFace(llm=base)
|
88 |
-
|
89 |
-
def _heartbeat_chat(model_id: str) -> bool:
|
90 |
-
try:
|
91 |
-
chat = _mk_chat_llm(model_id)
|
92 |
-
_ = chat.invoke([HumanMessage(content="ok")])
|
93 |
-
return True
|
94 |
-
except Exception as e:
|
95 |
-
log.warning(f"Heartbeat failed for {model_id}: {str(e)[:160]}")
|
96 |
-
return False
|
97 |
-
|
98 |
-
def build_chat_llm() -> Optional[ChatHuggingFace]:
|
99 |
-
"""
|
100 |
-
Returns a working ChatHuggingFace or None (if token/permissions missing).
|
101 |
-
"""
|
102 |
-
log.info(f"HF token present: {bool(HF_TOKEN)} len={len(HF_TOKEN) if HF_TOKEN else 0}")
|
103 |
-
if HF_TOKEN and _heartbeat_chat(DEFAULT_CHAT_MODEL):
|
104 |
-
log.info(f"Using chat model: {DEFAULT_CHAT_MODEL}")
|
105 |
-
return _mk_chat_llm(DEFAULT_CHAT_MODEL)
|
106 |
-
if HF_TOKEN and _heartbeat_chat(FALLBACK_CHAT_MODEL):
|
107 |
-
log.info(f"Using fallback chat model: {FALLBACK_CHAT_MODEL}")
|
108 |
-
return _mk_chat_llm(FALLBACK_CHAT_MODEL)
|
109 |
-
log.warning("No working chat model; chat will show a notice.")
|
110 |
-
return None
|
111 |
-
|
112 |
-
CHAT_LLM = build_chat_llm()
|
113 |
-
|
114 |
-
# ------------------------
|
115 |
-
# Normalization helpers
|
116 |
-
# ------------------------
|
117 |
-
def _norm_colname(c: str) -> str:
|
118 |
-
c = c.strip().lower()
|
119 |
-
c = re.sub(r"\s+", "_", c)
|
120 |
-
c = re.sub(r"[^\w]+", "_", c)
|
121 |
-
return c.strip("_")
|
122 |
-
|
123 |
-
def _nfkc(s: str) -> str:
|
124 |
-
return unicodedata.normalize("NFKC", s)
|
125 |
-
|
126 |
-
def _collapse_ws(s: str) -> str:
|
127 |
-
return re.sub(r"\s+", " ", s).strip()
|
128 |
-
|
129 |
-
def _clean_str(x):
|
130 |
-
if pd.isna(x): return x
|
131 |
-
return _collapse_ws(_nfkc(str(x)))
|
132 |
-
|
133 |
-
def _is_email(s: str) -> bool:
|
134 |
-
return bool(re.match(r"^[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}$", s or ""))
|
135 |
-
|
136 |
-
def _clean_phone(s: str, default_region: str = "IN"):
|
137 |
-
if s is None or str(s).strip() == "":
|
138 |
-
return None, "missing_phone"
|
139 |
-
raw = re.sub(r"[^\d+]", "", str(s))
|
140 |
-
if HAVE_PHONENUM:
|
141 |
-
try:
|
142 |
-
pn = phonenumbers.parse(raw, default_region)
|
143 |
-
if phonenumbers.is_possible_number(pn) and phonenumbers.is_valid_number(pn):
|
144 |
-
return phonenumbers.format_number(pn, phonenumbers.PhoneNumberFormat.E164), None
|
145 |
-
return raw, "invalid_phone"
|
146 |
-
except Exception:
|
147 |
-
return raw, "invalid_phone"
|
148 |
-
digits = re.sub(r"\D", "", raw)
|
149 |
-
return (digits, None) if 8 <= len(digits) <= 15 else (digits, "invalid_phone")
|
150 |
-
|
151 |
-
def _parse_datetime(s):
|
152 |
-
try:
|
153 |
-
return pd.to_datetime(s, errors="coerce", utc=True)
|
154 |
-
except Exception:
|
155 |
-
return pd.NaT
|
156 |
-
|
157 |
-
def _to_numeric(series: pd.Series):
|
158 |
-
coerced = pd.to_numeric(series, errors="coerce")
|
159 |
-
return coerced, (coerced.isna() & series.notna())
|
160 |
-
|
161 |
-
def _read_csv_any(file_obj) -> pd.DataFrame:
|
162 |
-
if file_obj is None:
|
163 |
-
raise ValueError("No file uploaded.")
|
164 |
-
if hasattr(file_obj, "name"):
|
165 |
-
p = file_obj.name
|
166 |
-
try: return pd.read_csv(p)
|
167 |
-
except Exception: return pd.read_csv(p, encoding="latin-1")
|
168 |
-
try: return pd.read_csv(file_obj)
|
169 |
-
except Exception:
|
170 |
-
file_obj.seek(0)
|
171 |
-
return pd.read_csv(file_obj, encoding="latin-1")
|
172 |
-
|
173 |
-
def _standardize_df(df: pd.DataFrame) -> pd.DataFrame:
|
174 |
-
df = df.copy()
|
175 |
-
df.columns = [_norm_colname(c) for c in df.columns]
|
176 |
-
for c in df.select_dtypes(include=["object"]).columns:
|
177 |
-
df[c] = df[c].apply(_clean_str)
|
178 |
-
return df
|
179 |
-
|
180 |
-
def _prepare_generic(df: pd.DataFrame, expected: Dict[str, List[str]]):
|
181 |
-
issues = []
|
182 |
-
df0 = _standardize_df(df)
|
183 |
-
|
184 |
-
# Synonym mapping
|
185 |
-
colmap = {}
|
186 |
-
cols = set(df0.columns)
|
187 |
-
for canon, syns in expected.items():
|
188 |
-
found = None
|
189 |
-
for s in [canon] + syns:
|
190 |
-
s = _norm_colname(s)
|
191 |
-
if s in cols:
|
192 |
-
found = s; break
|
193 |
-
if found: colmap[canon] = found
|
194 |
-
|
195 |
-
# Email/phone quality
|
196 |
-
for c in list(df0.columns):
|
197 |
-
if "email" in c:
|
198 |
-
df0[c] = df0[c].apply(lambda x: str(x).lower().strip() if pd.notna(x) else x)
|
199 |
-
for idx, v in df0[c].items():
|
200 |
-
if pd.isna(v) or str(v).strip()=="":
|
201 |
-
issues.append({"row": idx, "field": c, "issue":"missing_email","value":""})
|
202 |
-
elif not _is_email(v):
|
203 |
-
issues.append({"row": idx, "field": c, "issue":"invalid_email","value":str(v)})
|
204 |
-
if "phone" in c or "mobile" in c:
|
205 |
-
vals = []
|
206 |
-
for idx, v in df0[c].items():
|
207 |
-
e164, prob = _clean_phone(v)
|
208 |
-
vals.append(e164)
|
209 |
-
if prob: issues.append({"row": idx, "field": c, "issue":prob, "value":str(v)})
|
210 |
-
df0[c] = vals
|
211 |
-
|
212 |
-
# Datetime parsing
|
213 |
-
for c in df0.columns:
|
214 |
-
if any(k in c for k in ["date","time","timestamp","created_at","updated_at"]):
|
215 |
-
parsed = _parse_datetime(df0[c])
|
216 |
-
bad = parsed.isna() & df0[c].notna()
|
217 |
-
for idx in df0.index[bad]:
|
218 |
-
issues.append({"row": int(idx), "field": c, "issue":"unparseable_timestamp", "value":str(df0.loc[idx, c])})
|
219 |
-
df0[c] = parsed
|
220 |
-
|
221 |
-
# Numeric coercions for common fields
|
222 |
-
for nc in ["amount","credit_score","utilization","dti","recent_defaults","income"]:
|
223 |
-
for c in df0.columns:
|
224 |
-
if c == nc or c.endswith("_"+nc) or nc in c:
|
225 |
-
coerced, badmask = _to_numeric(df0[c])
|
226 |
-
for idx in df0.index[badmask]:
|
227 |
-
issues.append({"row": int(idx), "field": c, "issue":"non_numeric", "value":str(df0.loc[idx, c])})
|
228 |
-
df0[c] = coerced
|
229 |
-
|
230 |
-
issues_df = pd.DataFrame(issues, columns=["row","field","issue","value"])
|
231 |
-
missing = [k for k in expected.keys() if k not in colmap]
|
232 |
-
quality_summary = f"Rows={len(df0)}, Cols={len(df0.columns)}; Missing required fields: {missing if missing else 'None'}"
|
233 |
-
return df0, issues_df, quality_summary, colmap
|
234 |
-
|
235 |
-
# ------------------------
|
236 |
-
# Modules & Rules
|
237 |
-
# ------------------------
|
238 |
-
TX_EXPECTED = {
|
239 |
-
"transaction_id":["txn_id","transactionid","id","tx_id"],
|
240 |
-
"customer_id":["cust_id","user_id","client_id"],
|
241 |
-
"amount":["amt","amount_inr","value"],
|
242 |
-
"timestamp":["date","event_time","created_at","tx_time"],
|
243 |
-
"merchant_category":["mcc","merchant_cat","category"]
|
244 |
-
}
|
245 |
-
def prepare_transactions(df): return _prepare_generic(df, TX_EXPECTED)
|
246 |
-
|
247 |
-
def detect_transactions(clean_df, colmap, high_risk_mcc: Optional[List[str]]=None):
|
248 |
-
high_risk = set(["HIGH_RISK","GAMBLING","CRYPTO_EXCHANGE","ESCORTS","CASINO"])
|
249 |
-
if high_risk_mcc:
|
250 |
-
high_risk.update([_nfkc(x).strip().upper().replace(" ","_") for x in high_risk_mcc])
|
251 |
-
if not all(k in colmap for k in ["customer_id","amount"]):
|
252 |
-
return pd.DataFrame(), "Required columns missing for detection (need at least customer_id, amount)."
|
253 |
-
df = clean_df.copy()
|
254 |
-
reasons = []
|
255 |
-
amtcol = colmap.get("amount")
|
256 |
-
if amtcol is not None:
|
257 |
-
reasons.append(("large_amount>10k", df[amtcol] > 10000))
|
258 |
-
reasons.append(("negative_amount", df[amtcol] < 0))
|
259 |
-
if "merchant_category" in colmap:
|
260 |
-
mcc = colmap["merchant_category"]
|
261 |
-
high = df[mcc].astype(str).str.upper().str.replace(" ","_", regex=False).isin(high_risk)
|
262 |
-
reasons.append(("merchant_category_high_risk", high))
|
263 |
-
if all(k in colmap for k in ["customer_id","timestamp","amount"]):
|
264 |
-
cid, ts, amt = colmap["customer_id"], colmap["timestamp"], colmap["amount"]
|
265 |
-
daily = df.groupby([cid, df[ts].dt.date])[amt].transform("sum")
|
266 |
-
reasons.append(("daily_sum_per_customer>50k", daily > 50000))
|
267 |
-
mask = None
|
268 |
-
for _, m in reasons:
|
269 |
-
mask = m if mask is None else (mask | m)
|
270 |
-
flagged = df[mask] if mask is not None else pd.DataFrame()
|
271 |
-
if not flagged.empty:
|
272 |
-
rr=[]
|
273 |
-
for _, row in flagged.iterrows():
|
274 |
-
hits=[]
|
275 |
-
a = row[amtcol] if amtcol in flagged.columns else None
|
276 |
-
if pd.notna(a) and a>10000: hits.append("large_amount")
|
277 |
-
if pd.notna(a) and a<0: hits.append("negative_amount")
|
278 |
-
if "merchant_category" in colmap:
|
279 |
-
val = str(row[colmap["merchant_category"]]).upper().replace(" ","_")
|
280 |
-
if val in high_risk: hits.append("mcc_high_risk")
|
281 |
-
# daily sum check reconstructed
|
282 |
-
try:
|
283 |
-
if all(k in colmap for k in ["customer_id","timestamp","amount"]):
|
284 |
-
sub = df[(df[colmap["customer_id"]]==row[colmap["customer_id"]]) &
|
285 |
-
(df[colmap["timestamp"]].dt.date==pd.to_datetime(row[colmap["timestamp"]], errors="coerce").date())]
|
286 |
-
if sub[colmap["amount"]].sum() > 50000: hits.append("daily_sum>50k")
|
287 |
-
except Exception: pass
|
288 |
-
rr.append(", ".join(sorted(set(hits))) or "rule_hit")
|
289 |
-
flagged = flagged.assign(risk_reason=rr)
|
290 |
-
stats = f"Transactions flagged: {len(flagged)} of {len(df)}."
|
291 |
-
return flagged, stats
|
292 |
-
|
293 |
-
KYC_EXPECTED = {
|
294 |
-
"customer_id":["cust_id","user_id","client_id"],
|
295 |
-
"name":["full_name","customer_name"],
|
296 |
-
"email":["email_address","mail"],
|
297 |
-
"phone":["phone_number","mobile","contact"],
|
298 |
-
"dob":["date_of_birth","birthdate"]
|
299 |
-
}
|
300 |
-
def prepare_kyc(df): return _prepare_generic(df, KYC_EXPECTED)
|
301 |
-
|
302 |
-
def _age_years(dob: pd.Series) -> pd.Series:
|
303 |
-
now = pd.Timestamp.utcnow()
|
304 |
-
return (now - dob).dt.days / 365.25
|
305 |
-
|
306 |
-
def detect_kyc(clean_df, colmap):
|
307 |
-
if not all(k in colmap for k in ["customer_id","name"]):
|
308 |
-
return pd.DataFrame(), "Required columns missing for KYC (need at least customer_id, name)."
|
309 |
-
df = clean_df.copy()
|
310 |
-
reasons=[]
|
311 |
-
if "email" in colmap:
|
312 |
-
dupe_email = df.duplicated(subset=[colmap["email"]], keep=False) & df[colmap["email"]].notna()
|
313 |
-
reasons.append(("duplicate_email", dupe_email))
|
314 |
-
if "phone" in colmap:
|
315 |
-
dupe_phone = df.duplicated(subset=[colmap["phone"]], keep=False) & df[colmap["phone"]].notna()
|
316 |
-
reasons.append(("duplicate_phone", dupe_phone))
|
317 |
-
if "dob" in colmap:
|
318 |
-
age = _age_years(df[colmap["dob"]])
|
319 |
-
invalid = (df[colmap["dob"]].isna()) | (df[colmap["dob"]] > pd.Timestamp.utcnow()) | (age > 120)
|
320 |
-
reasons.append(("invalid_dob", invalid))
|
321 |
-
if "name" in colmap:
|
322 |
-
name = df[colmap["name"]].astype(str)
|
323 |
-
susp = name.str.isupper() | name.str.contains(r"\d") | (name.str.len()<3)
|
324 |
-
reasons.append(("suspicious_name", susp))
|
325 |
-
mask = None
|
326 |
-
for _, m in reasons:
|
327 |
-
mask = m if mask is None else (mask | m)
|
328 |
-
flagged = df[mask] if mask is not None else pd.DataFrame()
|
329 |
-
if not flagged.empty:
|
330 |
-
flagged = flagged.assign(risk_reason="kyc_rule_hit")
|
331 |
-
stats = f"KYC flagged: {len(flagged)} of {len(df)}."
|
332 |
-
return flagged, stats
|
333 |
-
|
334 |
-
SAN_EXPECTED = {"customer_id":["cust_id","user_id","client_id"], "name":["full_name","customer_name"]}
|
335 |
-
def prepare_sanctions(df): return _prepare_generic(df, SAN_EXPECTED)
|
336 |
-
|
337 |
-
DEMO_SANCTIONS = pd.DataFrame({"name":["Ivan Petrov","Global Terror Org","Acme Front LLC","John Doe (PEP)","Shadow Brokers"]})
|
338 |
-
|
339 |
-
def token_overlap(a: str, b: str) -> int:
|
340 |
-
at = set(re.findall(r"[A-Za-z0-9]+", a.lower()))
|
341 |
-
bt = set(re.findall(r"[A-Za-z0-9]+", b.lower()))
|
342 |
-
return len(at & bt)
|
343 |
-
|
344 |
-
def detect_sanctions(clean_df, colmap, sanctions_df: Optional[pd.DataFrame]=None):
|
345 |
-
if "name" not in colmap:
|
346 |
-
return pd.DataFrame(), "Required column missing for Sanctions (need name)."
|
347 |
-
df = clean_df.copy()
|
348 |
-
sanc = sanctions_df if sanctions_df is not None else DEMO_SANCTIONS.copy()
|
349 |
-
sanc = _standardize_df(sanc)
|
350 |
-
if "name" not in sanc.columns:
|
351 |
-
for c in sanc.columns:
|
352 |
-
if "name" in c: sanc = sanc.rename(columns={c:"name"}); break
|
353 |
-
sanc_names = sanc["name"].dropna().astype(str).tolist()
|
354 |
-
matches=[]
|
355 |
-
for idx, row in df.iterrows():
|
356 |
-
nm = str(row[colmap["name"]] or "").strip()
|
357 |
-
if not nm: continue
|
358 |
-
if any(nm.lower()==s.lower() for s in sanc_names):
|
359 |
-
matches.append((idx,"exact")); continue
|
360 |
-
if any(token_overlap(nm, s) >= 2 for s in sanc_names):
|
361 |
-
matches.append((idx,"fuzzy"))
|
362 |
-
flagged = df.loc[[i for i,_ in matches]].copy() if matches else pd.DataFrame()
|
363 |
-
if not flagged.empty:
|
364 |
-
mt = {i:t for i,t in matches}
|
365 |
-
flagged = flagged.assign(match_type=[mt.get(i,"") for i in flagged.index])
|
366 |
-
stats = f"Sanctions matches: {len(flagged)} of {len(df)}. (Using {'uploaded/MCP' if sanctions_df is not None else 'demo'} list)"
|
367 |
-
return flagged, stats
|
368 |
-
|
369 |
-
CR_EXPECTED = {
|
370 |
-
"customer_id":["cust_id","user_id","client_id"],
|
371 |
-
"credit_score":["creditscore","score"],
|
372 |
-
"utilization":["util","credit_utilization","utilization_ratio"],
|
373 |
-
"dti":["debt_to_income","debt_to_income_ratio"],
|
374 |
-
"recent_defaults":["defaults","recentdefaults"],
|
375 |
-
"income":["annual_income","salary"]
|
376 |
-
}
|
377 |
-
def prepare_credit(df): return _prepare_generic(df, CR_EXPECTED)
|
378 |
-
|
379 |
-
def detect_credit(clean_df, colmap):
|
380 |
-
needed = ["credit_score","utilization","dti","recent_defaults","income"]
|
381 |
-
if not any(k in colmap for k in needed):
|
382 |
-
return pd.DataFrame(), "Required columns missing for Credit Risk."
|
383 |
-
df = clean_df.copy()
|
384 |
-
cs = df[colmap.get("credit_score","credit_score")] if "credit_score" in colmap else pd.Series([np.nan]*len(df))
|
385 |
-
util= df[colmap.get("utilization","utilization")] if "utilization" in colmap else pd.Series([np.nan]*len(df))
|
386 |
-
dti = df[colmap.get("dti","dti")] if "dti" in colmap else pd.Series([np.nan]*len(df))
|
387 |
-
rde = df[colmap.get("recent_defaults","recent_defaults")] if "recent_defaults" in colmap else pd.Series([np.nan]*len(df))
|
388 |
-
inc = df[colmap.get("income","income")] if "income" in colmap else pd.Series([np.nan]*len(df))
|
389 |
-
out=[]
|
390 |
-
for i in range(len(df)):
|
391 |
-
hits=0; reasons=[]
|
392 |
-
if pd.notna(cs.iloc[i]) and cs.iloc[i] < 600: hits+=1; reasons.append("credit_score<600")
|
393 |
-
if pd.notna(util.iloc[i]) and util.iloc[i] > 0.8: hits+=1; reasons.append("utilization>0.8")
|
394 |
-
if pd.notna(dti.iloc[i]) and dti.iloc[i] > 0.4: hits+=1; reasons.append("DTI>0.4")
|
395 |
-
if pd.notna(rde.iloc[i]) and rde.iloc[i] > 0: hits+=1; reasons.append("recent_defaults>0")
|
396 |
-
if pd.notna(inc.iloc[i]) and inc.iloc[i] < 30000: hits+=1; reasons.append("income<30000")
|
397 |
-
level = "High" if hits>=3 else ("Medium" if hits==2 else ("Low" if hits==1 else "None"))
|
398 |
-
out.append((hits, level, ", ".join(reasons)))
|
399 |
-
risk_score=[x[0] for x in out]; risk_level=[x[1] for x in out]; reason=[x[2] for x in out]
|
400 |
-
res = df.assign(risk_score=risk_score, risk_level=risk_level, risk_reason=reason)
|
401 |
-
flagged = res[res["risk_level"].isin(["High","Medium","Low"]) & (res["risk_level"]!="None")]
|
402 |
-
stats = f"Credit Risk flagged: {len(flagged)} of {len(df)}. Distribution: High={(res['risk_level']=='High').sum()}, Medium={(res['risk_level']=='Medium').sum()}, Low={(res['risk_level']=='Low').sum()}."
|
403 |
-
return flagged, stats
|
404 |
-
|
405 |
-
# ------------------------
|
406 |
-
# Summarizer (notice-first)
|
407 |
-
# ------------------------
|
408 |
-
SUMMARY_SYS = "You are a helpful Fraud/Risk analyst. Be concise (<120 words), list key counts, drivers, and data quality caveats."
|
409 |
-
|
410 |
-
def summarize_ai(context: str) -> str:
|
411 |
-
"""
|
412 |
-
If chat LLM is available, use it to generate a short summary.
|
413 |
-
Otherwise return the prototype notice string.
|
414 |
-
"""
|
415 |
-
if CHAT_LLM is None:
|
416 |
-
return SUMMARY_NOTICE
|
417 |
-
try:
|
418 |
-
out = CHAT_LLM.invoke([SystemMessage(content=SUMMARY_SYS), HumanMessage(content=context[:4000])])
|
419 |
-
if hasattr(out, "content"): return out.content
|
420 |
-
return str(out)
|
421 |
-
except Exception as e:
|
422 |
-
msg = str(e)
|
423 |
-
if "401" in msg or "403" in msg:
|
424 |
-
return SUMMARY_NOTICE
|
425 |
-
return SUMMARY_NOTICE
|
426 |
-
|
427 |
-
# ------------------------
|
428 |
-
# Optional MCP
|
429 |
-
# ------------------------
|
430 |
-
from urllib.request import Request, urlopen
|
431 |
-
def _mcp_get_json(url: str, auth_header: Optional[str]):
|
432 |
-
try:
|
433 |
-
req = Request(url)
|
434 |
-
if auth_header:
|
435 |
-
k, v = auth_header.split(":", 1)
|
436 |
-
req.add_header(k.strip(), v.strip())
|
437 |
-
with urlopen(req, timeout=10) as r:
|
438 |
-
return json.loads(r.read().decode("utf-8"))
|
439 |
-
except Exception as e:
|
440 |
-
log.warning(f"MCP fetch failed: {e}")
|
441 |
-
return None
|
442 |
-
|
443 |
-
def mcp_fetch_sanctions() -> Optional[pd.DataFrame]:
|
444 |
-
if os.getenv("ENABLE_MCP","0") not in ("1","true","TRUE"): return None
|
445 |
-
url = os.getenv("MCP_SANCTIONS_URL")
|
446 |
-
if not url: return None
|
447 |
-
data = _mcp_get_json(url, os.getenv("MCP_AUTH_HEADER"))
|
448 |
-
if not data: return None
|
449 |
-
if isinstance(data, list):
|
450 |
-
if all(isinstance(x, dict) for x in data):
|
451 |
-
rows = [{"name": x.get("name") or x.get("Name")} for x in data if x.get("name") or x.get("Name")]
|
452 |
-
return pd.DataFrame(rows) if rows else None
|
453 |
-
if all(isinstance(x, str) for x in data):
|
454 |
-
return pd.DataFrame({"name": data})
|
455 |
-
return None
|
456 |
-
|
457 |
-
def mcp_fetch_high_risk_mcc() -> Optional[List[str]]:
|
458 |
-
if os.getenv("ENABLE_MCP","0") not in ("1","true","TRUE"): return None
|
459 |
-
url = os.getenv("MCP_HIGH_RISK_MCC_URL")
|
460 |
-
if not url: return None
|
461 |
-
data = _mcp_get_json(url, os.getenv("MCP_AUTH_HEADER"))
|
462 |
-
return [str(x) for x in data] if isinstance(data, list) else None
|
463 |
-
|
464 |
-
# ------------------------
|
465 |
-
# Pipelines (per tab)
|
466 |
-
# ------------------------
|
467 |
-
def run_transactions(file):
|
468 |
-
try:
|
469 |
-
df = _read_csv_any(file)
|
470 |
-
clean, issues, quality, colmap = prepare_transactions(df)
|
471 |
-
mcc = mcp_fetch_high_risk_mcc()
|
472 |
-
flagged, stats = detect_transactions(clean, colmap, mcc)
|
473 |
-
ctx = f"[Transactions]\n{stats}\nQuality: {quality}\nHead:\n{clean.head(5).to_csv(index=False)}\nFlagged:\n{flagged.head(5).to_csv(index=False)}"
|
474 |
-
ai = summarize_ai(ctx)
|
475 |
-
return ai, stats, flagged, issues
|
476 |
-
except Exception as e:
|
477 |
-
return f"Error: {e}", "Validation failed.", pd.DataFrame(), pd.DataFrame()
|
478 |
-
|
479 |
-
def run_kyc(file):
|
480 |
-
try:
|
481 |
-
df = _read_csv_any(file)
|
482 |
-
clean, issues, quality, colmap = prepare_kyc(df)
|
483 |
-
flagged, stats = detect_kyc(clean, colmap)
|
484 |
-
ctx = f"[KYC]\n{stats}\nQuality: {quality}\nHead:\n{clean.head(5).to_csv(index=False)}\nFlagged:\n{flagged.head(5).to_csv(index=False)}"
|
485 |
-
ai = summarize_ai(ctx)
|
486 |
-
return ai, stats, flagged, issues
|
487 |
-
except Exception as e:
|
488 |
-
return f"Error: {e}", "Validation failed.", pd.DataFrame(), pd.DataFrame()
|
489 |
-
|
490 |
-
def run_sanctions(customers_file, sanctions_file):
|
491 |
-
try:
|
492 |
-
df = _read_csv_any(customers_file)
|
493 |
-
clean, issues, quality, colmap = prepare_sanctions(df)
|
494 |
-
sanc_df = mcp_fetch_sanctions()
|
495 |
-
if sanc_df is None and sanctions_file is not None:
|
496 |
-
sanc_df = _read_csv_any(sanctions_file)
|
497 |
-
flagged, stats = detect_sanctions(clean, colmap, sanc_df)
|
498 |
-
ctx = f"[Sanctions]\n{stats}\nQuality: {quality}\nHead:\n{clean.head(5).to_csv(index=False)}\nMatches:\n{flagged.head(5).to_csv(index=False)}"
|
499 |
-
ai = summarize_ai(ctx)
|
500 |
-
return ai, stats, flagged, issues
|
501 |
-
except Exception as e:
|
502 |
-
return f"Error: {e}", "Validation failed.", pd.DataFrame(), pd.DataFrame()
|
503 |
-
|
504 |
-
def run_credit(file):
|
505 |
-
try:
|
506 |
-
df = _read_csv_any(file)
|
507 |
-
clean, issues, quality, colmap = prepare_credit(df)
|
508 |
-
flagged, stats = detect_credit(clean, colmap)
|
509 |
-
ctx = f"[Credit]\n{stats}\nQuality: {quality}\nHead:\n{clean.head(5).to_csv(index=False)}\nFlagged:\n{flagged.head(5).to_csv(index=False)}"
|
510 |
-
ai = summarize_ai(ctx)
|
511 |
-
return ai, stats, flagged, issues
|
512 |
-
except Exception as e:
|
513 |
-
return f"Error: {e}", "Validation failed.", pd.DataFrame(), pd.DataFrame()
|
514 |
-
|
515 |
-
# ------------------------
|
516 |
-
# Tools (CSV text in → concise text out)
|
517 |
-
# ------------------------
|
518 |
-
def _csv_text_to_df(csv_text: str) -> pd.DataFrame:
|
519 |
-
return pd.read_csv(io.StringIO(csv_text))
|
520 |
-
|
521 |
-
class TransactionCSVInput(BaseModel):
|
522 |
-
csv_text: str = Field(..., description="Transactions CSV text")
|
523 |
-
|
524 |
-
@tool("transactions_fraud_tool", args_schema=TransactionCSVInput)
|
525 |
-
def transactions_fraud_tool(csv_text: str) -> str:
|
526 |
-
df = _csv_text_to_df(csv_text)
|
527 |
-
clean, issues, quality, colmap = prepare_transactions(df)
|
528 |
-
flagged, stats = detect_transactions(clean, colmap)
|
529 |
-
return f"{stats}\nDQ issues: {len(issues)}\nFirst flagged:\n{flagged.head(5).to_csv(index=False)}"[:2800]
|
530 |
-
|
531 |
-
class KYCCSVInput(BaseModel):
|
532 |
-
csv_text: str = Field(..., description="KYC CSV text")
|
533 |
-
|
534 |
-
@tool("kyc_fraud_tool", args_schema=KYCCSVInput)
|
535 |
-
def kyc_fraud_tool(csv_text: str) -> str:
|
536 |
-
df = _csv_text_to_df(csv_text)
|
537 |
-
clean, issues, quality, colmap = prepare_kyc(df)
|
538 |
-
flagged, stats = detect_kyc(clean, colmap)
|
539 |
-
return f"{stats}\nDQ issues: {len(issues)}\nFirst flagged:\n{flagged.head(5).to_csv(index=False)}"[:2800]
|
540 |
-
|
541 |
-
class SanctionsCSVInput(BaseModel):
|
542 |
-
csv_text: str = Field(..., description="Customers CSV text with a 'name' column")
|
543 |
-
|
544 |
-
@tool("sanctions_pep_tool", args_schema=SanctionsCSVInput)
|
545 |
-
def sanctions_pep_tool(csv_text: str) -> str:
|
546 |
-
df = _csv_text_to_df(csv_text)
|
547 |
-
clean, issues, quality, colmap = prepare_sanctions(df)
|
548 |
-
flagged, stats = detect_sanctions(clean, colmap)
|
549 |
-
return f"{stats}\nDQ issues: {len(issues)}\nFirst matches:\n{flagged.head(5).to_csv(index=False)}"[:2800]
|
550 |
-
|
551 |
-
class CreditCSVInput(BaseModel):
|
552 |
-
csv_text: str = Field(..., description="Credit CSV text")
|
553 |
-
|
554 |
-
@tool("credit_risk_tool", args_schema=CreditCSVInput)
|
555 |
-
def credit_risk_tool(csv_text: str) -> str:
|
556 |
-
df = _csv_text_to_df(csv_text)
|
557 |
-
clean, issues, quality, colmap = prepare_credit(df)
|
558 |
-
flagged, stats = detect_credit(clean, colmap)
|
559 |
-
return f"{stats}\nDQ issues: {len(issues)}\nFirst flagged:\n{flagged.head(5).to_csv(index=False)}"[:2800]
|
560 |
-
|
561 |
-
TOOLS: List[Tool] = [
|
562 |
-
transactions_fraud_tool,
|
563 |
-
kyc_fraud_tool,
|
564 |
-
sanctions_pep_tool,
|
565 |
-
credit_risk_tool,
|
566 |
-
]
|
567 |
-
|
568 |
-
# ------------------------
|
569 |
-
# Agent (chat-completions)
|
570 |
-
# ------------------------
|
571 |
-
AGENT_SYSTEM = """You are an AI Consultant for Fraud/Risk.
|
572 |
-
You have tools for Transactions, KYC, Sanctions/PEP, and Credit Risk.
|
573 |
-
If the user pastes a small CSV snippet, pick the relevant tool and analyze it.
|
574 |
-
Be concise and actionable."""
|
575 |
-
|
576 |
-
def build_agent():
|
577 |
-
if CHAT_LLM is None:
|
578 |
-
class Stub:
|
579 |
-
def invoke(self, prompt): return CHAT_NOTICE
|
580 |
-
return Stub()
|
581 |
-
return initialize_agent(
|
582 |
-
TOOLS,
|
583 |
-
CHAT_LLM,
|
584 |
-
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
585 |
-
verbose=False,
|
586 |
-
agent_kwargs={"system_message": AGENT_SYSTEM},
|
587 |
-
handle_parsing_errors=True,
|
588 |
-
)
|
589 |
-
|
590 |
-
AGENT = build_agent()
|
591 |
-
|
592 |
-
def agent_reply(history: List[Dict], user_msg: str):
|
593 |
-
try:
|
594 |
-
looks_like_csv = ("," in user_msg) and ("\n" in user_msg) and (user_msg.count(",") >= 2)
|
595 |
-
prompt = f"CSV snippet detected. Decide tool and analyze:\n\n{user_msg}" if looks_like_csv else user_msg
|
596 |
-
res = AGENT.invoke(prompt)
|
597 |
-
if isinstance(res, dict) and "output" in res: return res["output"]
|
598 |
-
return str(res)
|
599 |
-
except Exception as e:
|
600 |
-
return f"Agent error: {e}"
|
601 |
-
|
602 |
-
# ------------------------
|
603 |
-
# UI
|
604 |
-
# ------------------------
|
605 |
-
with gr.Blocks(title="Fraud Detector Analyst — LangChain + MCP", theme=gr.themes.Soft()) as demo:
|
606 |
-
gr.Markdown("# 🛡️ Fraud Detector Analyst — LangChain + MCP")
|
607 |
-
gr.Markdown(
|
608 |
-
"This prototype runs **rules & data checks locally**. "
|
609 |
-
"Chat + AI summaries require a remote inference provider (HF Inference)."
|
610 |
-
)
|
611 |
-
|
612 |
-
with gr.Tabs():
|
613 |
-
with gr.Tab("Transactions"):
|
614 |
-
gr.Markdown("Upload a **transactions** CSV.")
|
615 |
-
tx_file = gr.File(file_types=[".csv"], label="Transactions CSV", type="binary")
|
616 |
-
tx_ai = gr.Textbox(label="AI Summary (requires inference)", value=SUMMARY_NOTICE, lines=6)
|
617 |
-
tx_stats = gr.Textbox(label="Stats", lines=3)
|
618 |
-
tx_flagged = gr.Dataframe(label="Flagged Transactions")
|
619 |
-
tx_issues = gr.Dataframe(label="Data Quality Issues (row, field, issue, value)")
|
620 |
-
tx_file.upload(run_transactions, inputs=[tx_file], outputs=[tx_ai, tx_stats, tx_flagged, tx_issues])
|
621 |
-
|
622 |
-
with gr.Tab("KYC"):
|
623 |
-
gr.Markdown("Upload a **KYC** CSV.")
|
624 |
-
kyc_file = gr.File(file_types=[".csv"], label="KYC CSV", type="binary")
|
625 |
-
kyc_ai = gr.Textbox(label="AI Summary (requires inference)", value=SUMMARY_NOTICE, lines=6)
|
626 |
-
kyc_stats = gr.Textbox(label="Stats", lines=3)
|
627 |
-
kyc_flagged = gr.Dataframe(label="Flagged KYC Rows")
|
628 |
-
kyc_issues = gr.Dataframe(label="Data Quality Issues")
|
629 |
-
kyc_file.upload(run_kyc, inputs=[kyc_file], outputs=[kyc_ai, kyc_stats, kyc_flagged, kyc_issues])
|
630 |
-
|
631 |
-
with gr.Tab("Sanctions/PEP"):
|
632 |
-
gr.Markdown("Upload **customers** CSV (+ optional sanctions CSV).")
|
633 |
-
san_customers = gr.File(file_types=[".csv"], label="Customers CSV", type="binary")
|
634 |
-
san_list = gr.File(file_types=[".csv"], label="Sanctions/PEP CSV (optional)", type="binary")
|
635 |
-
san_ai = gr.Textbox(label="AI Summary (requires inference)", value=SUMMARY_NOTICE, lines=6)
|
636 |
-
san_stats = gr.Textbox(label="Stats", lines=3)
|
637 |
-
san_flagged = gr.Dataframe(label="Matches")
|
638 |
-
san_issues = gr.Dataframe(label="Data Quality Issues")
|
639 |
-
san_customers.upload(run_sanctions, inputs=[san_customers, san_list], outputs=[san_ai, san_stats, san_flagged, san_issues])
|
640 |
-
san_list.upload(run_sanctions, inputs=[san_customers, san_list], outputs=[san_ai, san_stats, san_flagged, san_issues])
|
641 |
-
|
642 |
-
with gr.Tab("Credit Risk"):
|
643 |
-
gr.Markdown("Upload a **credit** CSV.")
|
644 |
-
cr_file = gr.File(file_types=[".csv"], label="Credit CSV", type="binary")
|
645 |
-
cr_ai = gr.Textbox(label="AI Summary (requires inference)", value=SUMMARY_NOTICE, lines=6)
|
646 |
-
cr_stats = gr.Textbox(label="Stats", lines=3)
|
647 |
-
cr_flagged = gr.Dataframe(label="Flagged Applicants")
|
648 |
-
cr_issues = gr.Dataframe(label="Data Quality Issues")
|
649 |
-
cr_file.upload(run_credit, inputs=[cr_file], outputs=[cr_ai, cr_stats, cr_flagged, cr_issues])
|
650 |
-
|
651 |
-
with gr.Tab("AI Consultant (Agent)"):
|
652 |
-
gr.Markdown("Paste a small CSV snippet or ask questions. Uses chat-completions when configured.")
|
653 |
-
chatbot = gr.Chatbot(type="messages", label="Fraud AI Consultant")
|
654 |
-
user_in = gr.Textbox(label="Message or CSV snippet")
|
655 |
-
send_btn = gr.Button("Send")
|
656 |
-
def _chat_fn(history, msg):
|
657 |
-
reply = agent_reply(history, msg)
|
658 |
-
history = (history or []) + [{"role":"user","content":msg}, {"role":"assistant","content":reply}]
|
659 |
-
return history, ""
|
660 |
-
send_btn.click(_chat_fn, inputs=[chatbot, user_in], outputs=[chatbot, user_in])
|
661 |
-
|
662 |
-
gr.Markdown(
|
663 |
-
"### ⚙️ Enable inference\n"
|
664 |
-
"- Set **HF_TOKEN** (or HF_SPACES on Spaces)\n"
|
665 |
-
"- Optional: **LC_CHAT_MODEL** (default Qwen 0.5B Instruct), **LC_CHAT_MODEL_FALLBACK** (default Mistral 7B Instruct)\n"
|
666 |
-
"- Optional MCP: `ENABLE_MCP=1`, `MCP_SANCTIONS_URL`, `MCP_HIGH_RISK_MCC_URL`, `MCP_AUTH_HEADER`"
|
667 |
-
)
|
668 |
-
|
669 |
-
if __name__ == "__main__":
|
670 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
1 |
+
"""TODO: implement app.py (split from app_monolith_backup.py)"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app_monolith_backup.py
ADDED
@@ -0,0 +1,670 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Fraud Detector Analyst — LangChain + (optional) MCP
|
3 |
+
Advanced “prototype-first” build:
|
4 |
+
- Chat uses chat-completion models (LangChain ChatHuggingFace).
|
5 |
+
- AI Summary shows a notice when no inference is connected.
|
6 |
+
|
7 |
+
LLM env (serverless friendly):
|
8 |
+
HF_TOKEN (or HF_SPACES)
|
9 |
+
LC_CHAT_MODEL (default: "Qwen/Qwen2.5-0.5B-Instruct")
|
10 |
+
LC_CHAT_MODEL_FALLBACK (default: "mistralai/Mistral-7B-Instruct")
|
11 |
+
|
12 |
+
Summary behavior:
|
13 |
+
If no working inference/token -> summary fields display:
|
14 |
+
"🔌 Please connect to an inference point to generate summary."
|
15 |
+
|
16 |
+
Optional MCP:
|
17 |
+
ENABLE_MCP=1
|
18 |
+
MCP_SANCTIONS_URL, MCP_HIGH_RISK_MCC_URL
|
19 |
+
MCP_AUTH_HEADER="Authorization: Bearer <token>"
|
20 |
+
|
21 |
+
Run:
|
22 |
+
pip install -r requirements.txt
|
23 |
+
python app.py
|
24 |
+
On Spaces:
|
25 |
+
Add secret HF_TOKEN (or HF_SPACES). Launch.
|
26 |
+
"""
|
27 |
+
|
28 |
+
from __future__ import annotations
|
29 |
+
|
30 |
+
import os, io, re, json, math, unicodedata, logging
|
31 |
+
from typing import Optional, Tuple, List, Dict
|
32 |
+
|
33 |
+
import numpy as np
|
34 |
+
import pandas as pd
|
35 |
+
import gradio as gr
|
36 |
+
from dotenv import load_dotenv
|
37 |
+
|
38 |
+
# LangChain
|
39 |
+
from langchain.tools import tool
|
40 |
+
from langchain_core.tools import Tool
|
41 |
+
from langchain.agents import initialize_agent, AgentType
|
42 |
+
from langchain.schema import HumanMessage, SystemMessage
|
43 |
+
|
44 |
+
from pydantic import BaseModel, Field
|
45 |
+
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
|
46 |
+
|
47 |
+
# Phone normalization
|
48 |
+
try:
|
49 |
+
import phonenumbers
|
50 |
+
HAVE_PHONENUM = True
|
51 |
+
except Exception:
|
52 |
+
HAVE_PHONENUM = False
|
53 |
+
|
54 |
+
# ------------------------
|
55 |
+
# Setup
|
56 |
+
# ------------------------
|
57 |
+
load_dotenv()
|
58 |
+
logging.basicConfig(level=logging.INFO)
|
59 |
+
log = logging.getLogger("fraud-analyst")
|
60 |
+
|
61 |
+
HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HF_SPACES")
|
62 |
+
|
63 |
+
# Chat models (chat-completions)
|
64 |
+
DEFAULT_CHAT_MODEL = os.getenv("LC_CHAT_MODEL", "Qwen/Qwen2.5-0.5B-Instruct")
|
65 |
+
FALLBACK_CHAT_MODEL = os.getenv("LC_CHAT_MODEL_FALLBACK", "mistralai/Mistral-7B-Instruct")
|
66 |
+
|
67 |
+
SUMMARY_NOTICE = "🔌 Please connect to an inference point to generate summary."
|
68 |
+
CHAT_NOTICE = "🔌 Chat model not configured. Set HF_TOKEN and LC_CHAT_MODEL to enable chat."
|
69 |
+
|
70 |
+
# ------------------------
|
71 |
+
# LLM builders
|
72 |
+
# ------------------------
|
73 |
+
def _mk_chat_llm(model_id: str) -> ChatHuggingFace:
|
74 |
+
"""
|
75 |
+
ChatHuggingFace uses HF Inference under the hood.
|
76 |
+
Although the backend task is 'text-generation', this wrapper handles chat-style messages.
|
77 |
+
"""
|
78 |
+
base = HuggingFaceEndpoint(
|
79 |
+
repo_id=model_id,
|
80 |
+
task="text-generation",
|
81 |
+
huggingfacehub_api_token=HF_TOKEN,
|
82 |
+
max_new_tokens=256,
|
83 |
+
temperature=0.2,
|
84 |
+
repetition_penalty=1.05,
|
85 |
+
timeout=60,
|
86 |
+
)
|
87 |
+
return ChatHuggingFace(llm=base)
|
88 |
+
|
89 |
+
def _heartbeat_chat(model_id: str) -> bool:
|
90 |
+
try:
|
91 |
+
chat = _mk_chat_llm(model_id)
|
92 |
+
_ = chat.invoke([HumanMessage(content="ok")])
|
93 |
+
return True
|
94 |
+
except Exception as e:
|
95 |
+
log.warning(f"Heartbeat failed for {model_id}: {str(e)[:160]}")
|
96 |
+
return False
|
97 |
+
|
98 |
+
def build_chat_llm() -> Optional[ChatHuggingFace]:
|
99 |
+
"""
|
100 |
+
Returns a working ChatHuggingFace or None (if token/permissions missing).
|
101 |
+
"""
|
102 |
+
log.info(f"HF token present: {bool(HF_TOKEN)} len={len(HF_TOKEN) if HF_TOKEN else 0}")
|
103 |
+
if HF_TOKEN and _heartbeat_chat(DEFAULT_CHAT_MODEL):
|
104 |
+
log.info(f"Using chat model: {DEFAULT_CHAT_MODEL}")
|
105 |
+
return _mk_chat_llm(DEFAULT_CHAT_MODEL)
|
106 |
+
if HF_TOKEN and _heartbeat_chat(FALLBACK_CHAT_MODEL):
|
107 |
+
log.info(f"Using fallback chat model: {FALLBACK_CHAT_MODEL}")
|
108 |
+
return _mk_chat_llm(FALLBACK_CHAT_MODEL)
|
109 |
+
log.warning("No working chat model; chat will show a notice.")
|
110 |
+
return None
|
111 |
+
|
112 |
+
CHAT_LLM = build_chat_llm()
|
113 |
+
|
114 |
+
# ------------------------
|
115 |
+
# Normalization helpers
|
116 |
+
# ------------------------
|
117 |
+
def _norm_colname(c: str) -> str:
|
118 |
+
c = c.strip().lower()
|
119 |
+
c = re.sub(r"\s+", "_", c)
|
120 |
+
c = re.sub(r"[^\w]+", "_", c)
|
121 |
+
return c.strip("_")
|
122 |
+
|
123 |
+
def _nfkc(s: str) -> str:
|
124 |
+
return unicodedata.normalize("NFKC", s)
|
125 |
+
|
126 |
+
def _collapse_ws(s: str) -> str:
|
127 |
+
return re.sub(r"\s+", " ", s).strip()
|
128 |
+
|
129 |
+
def _clean_str(x):
|
130 |
+
if pd.isna(x): return x
|
131 |
+
return _collapse_ws(_nfkc(str(x)))
|
132 |
+
|
133 |
+
def _is_email(s: str) -> bool:
|
134 |
+
return bool(re.match(r"^[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}$", s or ""))
|
135 |
+
|
136 |
+
def _clean_phone(s: str, default_region: str = "IN"):
|
137 |
+
if s is None or str(s).strip() == "":
|
138 |
+
return None, "missing_phone"
|
139 |
+
raw = re.sub(r"[^\d+]", "", str(s))
|
140 |
+
if HAVE_PHONENUM:
|
141 |
+
try:
|
142 |
+
pn = phonenumbers.parse(raw, default_region)
|
143 |
+
if phonenumbers.is_possible_number(pn) and phonenumbers.is_valid_number(pn):
|
144 |
+
return phonenumbers.format_number(pn, phonenumbers.PhoneNumberFormat.E164), None
|
145 |
+
return raw, "invalid_phone"
|
146 |
+
except Exception:
|
147 |
+
return raw, "invalid_phone"
|
148 |
+
digits = re.sub(r"\D", "", raw)
|
149 |
+
return (digits, None) if 8 <= len(digits) <= 15 else (digits, "invalid_phone")
|
150 |
+
|
151 |
+
def _parse_datetime(s):
|
152 |
+
try:
|
153 |
+
return pd.to_datetime(s, errors="coerce", utc=True)
|
154 |
+
except Exception:
|
155 |
+
return pd.NaT
|
156 |
+
|
157 |
+
def _to_numeric(series: pd.Series):
|
158 |
+
coerced = pd.to_numeric(series, errors="coerce")
|
159 |
+
return coerced, (coerced.isna() & series.notna())
|
160 |
+
|
161 |
+
def _read_csv_any(file_obj) -> pd.DataFrame:
|
162 |
+
if file_obj is None:
|
163 |
+
raise ValueError("No file uploaded.")
|
164 |
+
if hasattr(file_obj, "name"):
|
165 |
+
p = file_obj.name
|
166 |
+
try: return pd.read_csv(p)
|
167 |
+
except Exception: return pd.read_csv(p, encoding="latin-1")
|
168 |
+
try: return pd.read_csv(file_obj)
|
169 |
+
except Exception:
|
170 |
+
file_obj.seek(0)
|
171 |
+
return pd.read_csv(file_obj, encoding="latin-1")
|
172 |
+
|
173 |
+
def _standardize_df(df: pd.DataFrame) -> pd.DataFrame:
|
174 |
+
df = df.copy()
|
175 |
+
df.columns = [_norm_colname(c) for c in df.columns]
|
176 |
+
for c in df.select_dtypes(include=["object"]).columns:
|
177 |
+
df[c] = df[c].apply(_clean_str)
|
178 |
+
return df
|
179 |
+
|
180 |
+
def _prepare_generic(df: pd.DataFrame, expected: Dict[str, List[str]]):
|
181 |
+
issues = []
|
182 |
+
df0 = _standardize_df(df)
|
183 |
+
|
184 |
+
# Synonym mapping
|
185 |
+
colmap = {}
|
186 |
+
cols = set(df0.columns)
|
187 |
+
for canon, syns in expected.items():
|
188 |
+
found = None
|
189 |
+
for s in [canon] + syns:
|
190 |
+
s = _norm_colname(s)
|
191 |
+
if s in cols:
|
192 |
+
found = s; break
|
193 |
+
if found: colmap[canon] = found
|
194 |
+
|
195 |
+
# Email/phone quality
|
196 |
+
for c in list(df0.columns):
|
197 |
+
if "email" in c:
|
198 |
+
df0[c] = df0[c].apply(lambda x: str(x).lower().strip() if pd.notna(x) else x)
|
199 |
+
for idx, v in df0[c].items():
|
200 |
+
if pd.isna(v) or str(v).strip()=="":
|
201 |
+
issues.append({"row": idx, "field": c, "issue":"missing_email","value":""})
|
202 |
+
elif not _is_email(v):
|
203 |
+
issues.append({"row": idx, "field": c, "issue":"invalid_email","value":str(v)})
|
204 |
+
if "phone" in c or "mobile" in c:
|
205 |
+
vals = []
|
206 |
+
for idx, v in df0[c].items():
|
207 |
+
e164, prob = _clean_phone(v)
|
208 |
+
vals.append(e164)
|
209 |
+
if prob: issues.append({"row": idx, "field": c, "issue":prob, "value":str(v)})
|
210 |
+
df0[c] = vals
|
211 |
+
|
212 |
+
# Datetime parsing
|
213 |
+
for c in df0.columns:
|
214 |
+
if any(k in c for k in ["date","time","timestamp","created_at","updated_at"]):
|
215 |
+
parsed = _parse_datetime(df0[c])
|
216 |
+
bad = parsed.isna() & df0[c].notna()
|
217 |
+
for idx in df0.index[bad]:
|
218 |
+
issues.append({"row": int(idx), "field": c, "issue":"unparseable_timestamp", "value":str(df0.loc[idx, c])})
|
219 |
+
df0[c] = parsed
|
220 |
+
|
221 |
+
# Numeric coercions for common fields
|
222 |
+
for nc in ["amount","credit_score","utilization","dti","recent_defaults","income"]:
|
223 |
+
for c in df0.columns:
|
224 |
+
if c == nc or c.endswith("_"+nc) or nc in c:
|
225 |
+
coerced, badmask = _to_numeric(df0[c])
|
226 |
+
for idx in df0.index[badmask]:
|
227 |
+
issues.append({"row": int(idx), "field": c, "issue":"non_numeric", "value":str(df0.loc[idx, c])})
|
228 |
+
df0[c] = coerced
|
229 |
+
|
230 |
+
issues_df = pd.DataFrame(issues, columns=["row","field","issue","value"])
|
231 |
+
missing = [k for k in expected.keys() if k not in colmap]
|
232 |
+
quality_summary = f"Rows={len(df0)}, Cols={len(df0.columns)}; Missing required fields: {missing if missing else 'None'}"
|
233 |
+
return df0, issues_df, quality_summary, colmap
|
234 |
+
|
235 |
+
# ------------------------
|
236 |
+
# Modules & Rules
|
237 |
+
# ------------------------
|
238 |
+
TX_EXPECTED = {
|
239 |
+
"transaction_id":["txn_id","transactionid","id","tx_id"],
|
240 |
+
"customer_id":["cust_id","user_id","client_id"],
|
241 |
+
"amount":["amt","amount_inr","value"],
|
242 |
+
"timestamp":["date","event_time","created_at","tx_time"],
|
243 |
+
"merchant_category":["mcc","merchant_cat","category"]
|
244 |
+
}
|
245 |
+
def prepare_transactions(df): return _prepare_generic(df, TX_EXPECTED)
|
246 |
+
|
247 |
+
def detect_transactions(clean_df, colmap, high_risk_mcc: Optional[List[str]]=None):
|
248 |
+
high_risk = set(["HIGH_RISK","GAMBLING","CRYPTO_EXCHANGE","ESCORTS","CASINO"])
|
249 |
+
if high_risk_mcc:
|
250 |
+
high_risk.update([_nfkc(x).strip().upper().replace(" ","_") for x in high_risk_mcc])
|
251 |
+
if not all(k in colmap for k in ["customer_id","amount"]):
|
252 |
+
return pd.DataFrame(), "Required columns missing for detection (need at least customer_id, amount)."
|
253 |
+
df = clean_df.copy()
|
254 |
+
reasons = []
|
255 |
+
amtcol = colmap.get("amount")
|
256 |
+
if amtcol is not None:
|
257 |
+
reasons.append(("large_amount>10k", df[amtcol] > 10000))
|
258 |
+
reasons.append(("negative_amount", df[amtcol] < 0))
|
259 |
+
if "merchant_category" in colmap:
|
260 |
+
mcc = colmap["merchant_category"]
|
261 |
+
high = df[mcc].astype(str).str.upper().str.replace(" ","_", regex=False).isin(high_risk)
|
262 |
+
reasons.append(("merchant_category_high_risk", high))
|
263 |
+
if all(k in colmap for k in ["customer_id","timestamp","amount"]):
|
264 |
+
cid, ts, amt = colmap["customer_id"], colmap["timestamp"], colmap["amount"]
|
265 |
+
daily = df.groupby([cid, df[ts].dt.date])[amt].transform("sum")
|
266 |
+
reasons.append(("daily_sum_per_customer>50k", daily > 50000))
|
267 |
+
mask = None
|
268 |
+
for _, m in reasons:
|
269 |
+
mask = m if mask is None else (mask | m)
|
270 |
+
flagged = df[mask] if mask is not None else pd.DataFrame()
|
271 |
+
if not flagged.empty:
|
272 |
+
rr=[]
|
273 |
+
for _, row in flagged.iterrows():
|
274 |
+
hits=[]
|
275 |
+
a = row[amtcol] if amtcol in flagged.columns else None
|
276 |
+
if pd.notna(a) and a>10000: hits.append("large_amount")
|
277 |
+
if pd.notna(a) and a<0: hits.append("negative_amount")
|
278 |
+
if "merchant_category" in colmap:
|
279 |
+
val = str(row[colmap["merchant_category"]]).upper().replace(" ","_")
|
280 |
+
if val in high_risk: hits.append("mcc_high_risk")
|
281 |
+
# daily sum check reconstructed
|
282 |
+
try:
|
283 |
+
if all(k in colmap for k in ["customer_id","timestamp","amount"]):
|
284 |
+
sub = df[(df[colmap["customer_id"]]==row[colmap["customer_id"]]) &
|
285 |
+
(df[colmap["timestamp"]].dt.date==pd.to_datetime(row[colmap["timestamp"]], errors="coerce").date())]
|
286 |
+
if sub[colmap["amount"]].sum() > 50000: hits.append("daily_sum>50k")
|
287 |
+
except Exception: pass
|
288 |
+
rr.append(", ".join(sorted(set(hits))) or "rule_hit")
|
289 |
+
flagged = flagged.assign(risk_reason=rr)
|
290 |
+
stats = f"Transactions flagged: {len(flagged)} of {len(df)}."
|
291 |
+
return flagged, stats
|
292 |
+
|
293 |
+
KYC_EXPECTED = {
|
294 |
+
"customer_id":["cust_id","user_id","client_id"],
|
295 |
+
"name":["full_name","customer_name"],
|
296 |
+
"email":["email_address","mail"],
|
297 |
+
"phone":["phone_number","mobile","contact"],
|
298 |
+
"dob":["date_of_birth","birthdate"]
|
299 |
+
}
|
300 |
+
def prepare_kyc(df): return _prepare_generic(df, KYC_EXPECTED)
|
301 |
+
|
302 |
+
def _age_years(dob: pd.Series) -> pd.Series:
|
303 |
+
now = pd.Timestamp.utcnow()
|
304 |
+
return (now - dob).dt.days / 365.25
|
305 |
+
|
306 |
+
def detect_kyc(clean_df, colmap):
|
307 |
+
if not all(k in colmap for k in ["customer_id","name"]):
|
308 |
+
return pd.DataFrame(), "Required columns missing for KYC (need at least customer_id, name)."
|
309 |
+
df = clean_df.copy()
|
310 |
+
reasons=[]
|
311 |
+
if "email" in colmap:
|
312 |
+
dupe_email = df.duplicated(subset=[colmap["email"]], keep=False) & df[colmap["email"]].notna()
|
313 |
+
reasons.append(("duplicate_email", dupe_email))
|
314 |
+
if "phone" in colmap:
|
315 |
+
dupe_phone = df.duplicated(subset=[colmap["phone"]], keep=False) & df[colmap["phone"]].notna()
|
316 |
+
reasons.append(("duplicate_phone", dupe_phone))
|
317 |
+
if "dob" in colmap:
|
318 |
+
age = _age_years(df[colmap["dob"]])
|
319 |
+
invalid = (df[colmap["dob"]].isna()) | (df[colmap["dob"]] > pd.Timestamp.utcnow()) | (age > 120)
|
320 |
+
reasons.append(("invalid_dob", invalid))
|
321 |
+
if "name" in colmap:
|
322 |
+
name = df[colmap["name"]].astype(str)
|
323 |
+
susp = name.str.isupper() | name.str.contains(r"\d") | (name.str.len()<3)
|
324 |
+
reasons.append(("suspicious_name", susp))
|
325 |
+
mask = None
|
326 |
+
for _, m in reasons:
|
327 |
+
mask = m if mask is None else (mask | m)
|
328 |
+
flagged = df[mask] if mask is not None else pd.DataFrame()
|
329 |
+
if not flagged.empty:
|
330 |
+
flagged = flagged.assign(risk_reason="kyc_rule_hit")
|
331 |
+
stats = f"KYC flagged: {len(flagged)} of {len(df)}."
|
332 |
+
return flagged, stats
|
333 |
+
|
334 |
+
SAN_EXPECTED = {"customer_id":["cust_id","user_id","client_id"], "name":["full_name","customer_name"]}
|
335 |
+
def prepare_sanctions(df): return _prepare_generic(df, SAN_EXPECTED)
|
336 |
+
|
337 |
+
DEMO_SANCTIONS = pd.DataFrame({"name":["Ivan Petrov","Global Terror Org","Acme Front LLC","John Doe (PEP)","Shadow Brokers"]})
|
338 |
+
|
339 |
+
def token_overlap(a: str, b: str) -> int:
|
340 |
+
at = set(re.findall(r"[A-Za-z0-9]+", a.lower()))
|
341 |
+
bt = set(re.findall(r"[A-Za-z0-9]+", b.lower()))
|
342 |
+
return len(at & bt)
|
343 |
+
|
344 |
+
def detect_sanctions(clean_df, colmap, sanctions_df: Optional[pd.DataFrame]=None):
|
345 |
+
if "name" not in colmap:
|
346 |
+
return pd.DataFrame(), "Required column missing for Sanctions (need name)."
|
347 |
+
df = clean_df.copy()
|
348 |
+
sanc = sanctions_df if sanctions_df is not None else DEMO_SANCTIONS.copy()
|
349 |
+
sanc = _standardize_df(sanc)
|
350 |
+
if "name" not in sanc.columns:
|
351 |
+
for c in sanc.columns:
|
352 |
+
if "name" in c: sanc = sanc.rename(columns={c:"name"}); break
|
353 |
+
sanc_names = sanc["name"].dropna().astype(str).tolist()
|
354 |
+
matches=[]
|
355 |
+
for idx, row in df.iterrows():
|
356 |
+
nm = str(row[colmap["name"]] or "").strip()
|
357 |
+
if not nm: continue
|
358 |
+
if any(nm.lower()==s.lower() for s in sanc_names):
|
359 |
+
matches.append((idx,"exact")); continue
|
360 |
+
if any(token_overlap(nm, s) >= 2 for s in sanc_names):
|
361 |
+
matches.append((idx,"fuzzy"))
|
362 |
+
flagged = df.loc[[i for i,_ in matches]].copy() if matches else pd.DataFrame()
|
363 |
+
if not flagged.empty:
|
364 |
+
mt = {i:t for i,t in matches}
|
365 |
+
flagged = flagged.assign(match_type=[mt.get(i,"") for i in flagged.index])
|
366 |
+
stats = f"Sanctions matches: {len(flagged)} of {len(df)}. (Using {'uploaded/MCP' if sanctions_df is not None else 'demo'} list)"
|
367 |
+
return flagged, stats
|
368 |
+
|
369 |
+
CR_EXPECTED = {
|
370 |
+
"customer_id":["cust_id","user_id","client_id"],
|
371 |
+
"credit_score":["creditscore","score"],
|
372 |
+
"utilization":["util","credit_utilization","utilization_ratio"],
|
373 |
+
"dti":["debt_to_income","debt_to_income_ratio"],
|
374 |
+
"recent_defaults":["defaults","recentdefaults"],
|
375 |
+
"income":["annual_income","salary"]
|
376 |
+
}
|
377 |
+
def prepare_credit(df): return _prepare_generic(df, CR_EXPECTED)
|
378 |
+
|
379 |
+
def detect_credit(clean_df, colmap):
|
380 |
+
needed = ["credit_score","utilization","dti","recent_defaults","income"]
|
381 |
+
if not any(k in colmap for k in needed):
|
382 |
+
return pd.DataFrame(), "Required columns missing for Credit Risk."
|
383 |
+
df = clean_df.copy()
|
384 |
+
cs = df[colmap.get("credit_score","credit_score")] if "credit_score" in colmap else pd.Series([np.nan]*len(df))
|
385 |
+
util= df[colmap.get("utilization","utilization")] if "utilization" in colmap else pd.Series([np.nan]*len(df))
|
386 |
+
dti = df[colmap.get("dti","dti")] if "dti" in colmap else pd.Series([np.nan]*len(df))
|
387 |
+
rde = df[colmap.get("recent_defaults","recent_defaults")] if "recent_defaults" in colmap else pd.Series([np.nan]*len(df))
|
388 |
+
inc = df[colmap.get("income","income")] if "income" in colmap else pd.Series([np.nan]*len(df))
|
389 |
+
out=[]
|
390 |
+
for i in range(len(df)):
|
391 |
+
hits=0; reasons=[]
|
392 |
+
if pd.notna(cs.iloc[i]) and cs.iloc[i] < 600: hits+=1; reasons.append("credit_score<600")
|
393 |
+
if pd.notna(util.iloc[i]) and util.iloc[i] > 0.8: hits+=1; reasons.append("utilization>0.8")
|
394 |
+
if pd.notna(dti.iloc[i]) and dti.iloc[i] > 0.4: hits+=1; reasons.append("DTI>0.4")
|
395 |
+
if pd.notna(rde.iloc[i]) and rde.iloc[i] > 0: hits+=1; reasons.append("recent_defaults>0")
|
396 |
+
if pd.notna(inc.iloc[i]) and inc.iloc[i] < 30000: hits+=1; reasons.append("income<30000")
|
397 |
+
level = "High" if hits>=3 else ("Medium" if hits==2 else ("Low" if hits==1 else "None"))
|
398 |
+
out.append((hits, level, ", ".join(reasons)))
|
399 |
+
risk_score=[x[0] for x in out]; risk_level=[x[1] for x in out]; reason=[x[2] for x in out]
|
400 |
+
res = df.assign(risk_score=risk_score, risk_level=risk_level, risk_reason=reason)
|
401 |
+
flagged = res[res["risk_level"].isin(["High","Medium","Low"]) & (res["risk_level"]!="None")]
|
402 |
+
stats = f"Credit Risk flagged: {len(flagged)} of {len(df)}. Distribution: High={(res['risk_level']=='High').sum()}, Medium={(res['risk_level']=='Medium').sum()}, Low={(res['risk_level']=='Low').sum()}."
|
403 |
+
return flagged, stats
|
404 |
+
|
405 |
+
# ------------------------
|
406 |
+
# Summarizer (notice-first)
|
407 |
+
# ------------------------
|
408 |
+
SUMMARY_SYS = "You are a helpful Fraud/Risk analyst. Be concise (<120 words), list key counts, drivers, and data quality caveats."
|
409 |
+
|
410 |
+
def summarize_ai(context: str) -> str:
|
411 |
+
"""
|
412 |
+
If chat LLM is available, use it to generate a short summary.
|
413 |
+
Otherwise return the prototype notice string.
|
414 |
+
"""
|
415 |
+
if CHAT_LLM is None:
|
416 |
+
return SUMMARY_NOTICE
|
417 |
+
try:
|
418 |
+
out = CHAT_LLM.invoke([SystemMessage(content=SUMMARY_SYS), HumanMessage(content=context[:4000])])
|
419 |
+
if hasattr(out, "content"): return out.content
|
420 |
+
return str(out)
|
421 |
+
except Exception as e:
|
422 |
+
msg = str(e)
|
423 |
+
if "401" in msg or "403" in msg:
|
424 |
+
return SUMMARY_NOTICE
|
425 |
+
return SUMMARY_NOTICE
|
426 |
+
|
427 |
+
# ------------------------
|
428 |
+
# Optional MCP
|
429 |
+
# ------------------------
|
430 |
+
from urllib.request import Request, urlopen
|
431 |
+
def _mcp_get_json(url: str, auth_header: Optional[str]):
|
432 |
+
try:
|
433 |
+
req = Request(url)
|
434 |
+
if auth_header:
|
435 |
+
k, v = auth_header.split(":", 1)
|
436 |
+
req.add_header(k.strip(), v.strip())
|
437 |
+
with urlopen(req, timeout=10) as r:
|
438 |
+
return json.loads(r.read().decode("utf-8"))
|
439 |
+
except Exception as e:
|
440 |
+
log.warning(f"MCP fetch failed: {e}")
|
441 |
+
return None
|
442 |
+
|
443 |
+
def mcp_fetch_sanctions() -> Optional[pd.DataFrame]:
|
444 |
+
if os.getenv("ENABLE_MCP","0") not in ("1","true","TRUE"): return None
|
445 |
+
url = os.getenv("MCP_SANCTIONS_URL")
|
446 |
+
if not url: return None
|
447 |
+
data = _mcp_get_json(url, os.getenv("MCP_AUTH_HEADER"))
|
448 |
+
if not data: return None
|
449 |
+
if isinstance(data, list):
|
450 |
+
if all(isinstance(x, dict) for x in data):
|
451 |
+
rows = [{"name": x.get("name") or x.get("Name")} for x in data if x.get("name") or x.get("Name")]
|
452 |
+
return pd.DataFrame(rows) if rows else None
|
453 |
+
if all(isinstance(x, str) for x in data):
|
454 |
+
return pd.DataFrame({"name": data})
|
455 |
+
return None
|
456 |
+
|
457 |
+
def mcp_fetch_high_risk_mcc() -> Optional[List[str]]:
|
458 |
+
if os.getenv("ENABLE_MCP","0") not in ("1","true","TRUE"): return None
|
459 |
+
url = os.getenv("MCP_HIGH_RISK_MCC_URL")
|
460 |
+
if not url: return None
|
461 |
+
data = _mcp_get_json(url, os.getenv("MCP_AUTH_HEADER"))
|
462 |
+
return [str(x) for x in data] if isinstance(data, list) else None
|
463 |
+
|
464 |
+
# ------------------------
|
465 |
+
# Pipelines (per tab)
|
466 |
+
# ------------------------
|
467 |
+
def run_transactions(file):
|
468 |
+
try:
|
469 |
+
df = _read_csv_any(file)
|
470 |
+
clean, issues, quality, colmap = prepare_transactions(df)
|
471 |
+
mcc = mcp_fetch_high_risk_mcc()
|
472 |
+
flagged, stats = detect_transactions(clean, colmap, mcc)
|
473 |
+
ctx = f"[Transactions]\n{stats}\nQuality: {quality}\nHead:\n{clean.head(5).to_csv(index=False)}\nFlagged:\n{flagged.head(5).to_csv(index=False)}"
|
474 |
+
ai = summarize_ai(ctx)
|
475 |
+
return ai, stats, flagged, issues
|
476 |
+
except Exception as e:
|
477 |
+
return f"Error: {e}", "Validation failed.", pd.DataFrame(), pd.DataFrame()
|
478 |
+
|
479 |
+
def run_kyc(file):
|
480 |
+
try:
|
481 |
+
df = _read_csv_any(file)
|
482 |
+
clean, issues, quality, colmap = prepare_kyc(df)
|
483 |
+
flagged, stats = detect_kyc(clean, colmap)
|
484 |
+
ctx = f"[KYC]\n{stats}\nQuality: {quality}\nHead:\n{clean.head(5).to_csv(index=False)}\nFlagged:\n{flagged.head(5).to_csv(index=False)}"
|
485 |
+
ai = summarize_ai(ctx)
|
486 |
+
return ai, stats, flagged, issues
|
487 |
+
except Exception as e:
|
488 |
+
return f"Error: {e}", "Validation failed.", pd.DataFrame(), pd.DataFrame()
|
489 |
+
|
490 |
+
def run_sanctions(customers_file, sanctions_file):
|
491 |
+
try:
|
492 |
+
df = _read_csv_any(customers_file)
|
493 |
+
clean, issues, quality, colmap = prepare_sanctions(df)
|
494 |
+
sanc_df = mcp_fetch_sanctions()
|
495 |
+
if sanc_df is None and sanctions_file is not None:
|
496 |
+
sanc_df = _read_csv_any(sanctions_file)
|
497 |
+
flagged, stats = detect_sanctions(clean, colmap, sanc_df)
|
498 |
+
ctx = f"[Sanctions]\n{stats}\nQuality: {quality}\nHead:\n{clean.head(5).to_csv(index=False)}\nMatches:\n{flagged.head(5).to_csv(index=False)}"
|
499 |
+
ai = summarize_ai(ctx)
|
500 |
+
return ai, stats, flagged, issues
|
501 |
+
except Exception as e:
|
502 |
+
return f"Error: {e}", "Validation failed.", pd.DataFrame(), pd.DataFrame()
|
503 |
+
|
504 |
+
def run_credit(file):
|
505 |
+
try:
|
506 |
+
df = _read_csv_any(file)
|
507 |
+
clean, issues, quality, colmap = prepare_credit(df)
|
508 |
+
flagged, stats = detect_credit(clean, colmap)
|
509 |
+
ctx = f"[Credit]\n{stats}\nQuality: {quality}\nHead:\n{clean.head(5).to_csv(index=False)}\nFlagged:\n{flagged.head(5).to_csv(index=False)}"
|
510 |
+
ai = summarize_ai(ctx)
|
511 |
+
return ai, stats, flagged, issues
|
512 |
+
except Exception as e:
|
513 |
+
return f"Error: {e}", "Validation failed.", pd.DataFrame(), pd.DataFrame()
|
514 |
+
|
515 |
+
# ------------------------
|
516 |
+
# Tools (CSV text in → concise text out)
|
517 |
+
# ------------------------
|
518 |
+
def _csv_text_to_df(csv_text: str) -> pd.DataFrame:
|
519 |
+
return pd.read_csv(io.StringIO(csv_text))
|
520 |
+
|
521 |
+
class TransactionCSVInput(BaseModel):
|
522 |
+
csv_text: str = Field(..., description="Transactions CSV text")
|
523 |
+
|
524 |
+
@tool("transactions_fraud_tool", args_schema=TransactionCSVInput)
|
525 |
+
def transactions_fraud_tool(csv_text: str) -> str:
|
526 |
+
df = _csv_text_to_df(csv_text)
|
527 |
+
clean, issues, quality, colmap = prepare_transactions(df)
|
528 |
+
flagged, stats = detect_transactions(clean, colmap)
|
529 |
+
return f"{stats}\nDQ issues: {len(issues)}\nFirst flagged:\n{flagged.head(5).to_csv(index=False)}"[:2800]
|
530 |
+
|
531 |
+
class KYCCSVInput(BaseModel):
|
532 |
+
csv_text: str = Field(..., description="KYC CSV text")
|
533 |
+
|
534 |
+
@tool("kyc_fraud_tool", args_schema=KYCCSVInput)
|
535 |
+
def kyc_fraud_tool(csv_text: str) -> str:
|
536 |
+
df = _csv_text_to_df(csv_text)
|
537 |
+
clean, issues, quality, colmap = prepare_kyc(df)
|
538 |
+
flagged, stats = detect_kyc(clean, colmap)
|
539 |
+
return f"{stats}\nDQ issues: {len(issues)}\nFirst flagged:\n{flagged.head(5).to_csv(index=False)}"[:2800]
|
540 |
+
|
541 |
+
class SanctionsCSVInput(BaseModel):
|
542 |
+
csv_text: str = Field(..., description="Customers CSV text with a 'name' column")
|
543 |
+
|
544 |
+
@tool("sanctions_pep_tool", args_schema=SanctionsCSVInput)
|
545 |
+
def sanctions_pep_tool(csv_text: str) -> str:
|
546 |
+
df = _csv_text_to_df(csv_text)
|
547 |
+
clean, issues, quality, colmap = prepare_sanctions(df)
|
548 |
+
flagged, stats = detect_sanctions(clean, colmap)
|
549 |
+
return f"{stats}\nDQ issues: {len(issues)}\nFirst matches:\n{flagged.head(5).to_csv(index=False)}"[:2800]
|
550 |
+
|
551 |
+
class CreditCSVInput(BaseModel):
|
552 |
+
csv_text: str = Field(..., description="Credit CSV text")
|
553 |
+
|
554 |
+
@tool("credit_risk_tool", args_schema=CreditCSVInput)
|
555 |
+
def credit_risk_tool(csv_text: str) -> str:
|
556 |
+
df = _csv_text_to_df(csv_text)
|
557 |
+
clean, issues, quality, colmap = prepare_credit(df)
|
558 |
+
flagged, stats = detect_credit(clean, colmap)
|
559 |
+
return f"{stats}\nDQ issues: {len(issues)}\nFirst flagged:\n{flagged.head(5).to_csv(index=False)}"[:2800]
|
560 |
+
|
561 |
+
TOOLS: List[Tool] = [
|
562 |
+
transactions_fraud_tool,
|
563 |
+
kyc_fraud_tool,
|
564 |
+
sanctions_pep_tool,
|
565 |
+
credit_risk_tool,
|
566 |
+
]
|
567 |
+
|
568 |
+
# ------------------------
|
569 |
+
# Agent (chat-completions)
|
570 |
+
# ------------------------
|
571 |
+
AGENT_SYSTEM = """You are an AI Consultant for Fraud/Risk.
|
572 |
+
You have tools for Transactions, KYC, Sanctions/PEP, and Credit Risk.
|
573 |
+
If the user pastes a small CSV snippet, pick the relevant tool and analyze it.
|
574 |
+
Be concise and actionable."""
|
575 |
+
|
576 |
+
def build_agent():
|
577 |
+
if CHAT_LLM is None:
|
578 |
+
class Stub:
|
579 |
+
def invoke(self, prompt): return CHAT_NOTICE
|
580 |
+
return Stub()
|
581 |
+
return initialize_agent(
|
582 |
+
TOOLS,
|
583 |
+
CHAT_LLM,
|
584 |
+
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
585 |
+
verbose=False,
|
586 |
+
agent_kwargs={"system_message": AGENT_SYSTEM},
|
587 |
+
handle_parsing_errors=True,
|
588 |
+
)
|
589 |
+
|
590 |
+
AGENT = build_agent()
|
591 |
+
|
592 |
+
def agent_reply(history: List[Dict], user_msg: str):
|
593 |
+
try:
|
594 |
+
looks_like_csv = ("," in user_msg) and ("\n" in user_msg) and (user_msg.count(",") >= 2)
|
595 |
+
prompt = f"CSV snippet detected. Decide tool and analyze:\n\n{user_msg}" if looks_like_csv else user_msg
|
596 |
+
res = AGENT.invoke(prompt)
|
597 |
+
if isinstance(res, dict) and "output" in res: return res["output"]
|
598 |
+
return str(res)
|
599 |
+
except Exception as e:
|
600 |
+
return f"Agent error: {e}"
|
601 |
+
|
602 |
+
# ------------------------
|
603 |
+
# UI
|
604 |
+
# ------------------------
|
605 |
+
with gr.Blocks(title="Fraud Detector Analyst — LangChain + MCP", theme=gr.themes.Soft()) as demo:
|
606 |
+
gr.Markdown("# 🛡️ Fraud Detector Analyst — LangChain + MCP")
|
607 |
+
gr.Markdown(
|
608 |
+
"This prototype runs **rules & data checks locally**. "
|
609 |
+
"Chat + AI summaries require a remote inference provider (HF Inference)."
|
610 |
+
)
|
611 |
+
|
612 |
+
with gr.Tabs():
|
613 |
+
with gr.Tab("Transactions"):
|
614 |
+
gr.Markdown("Upload a **transactions** CSV.")
|
615 |
+
tx_file = gr.File(file_types=[".csv"], label="Transactions CSV", type="binary")
|
616 |
+
tx_ai = gr.Textbox(label="AI Summary (requires inference)", value=SUMMARY_NOTICE, lines=6)
|
617 |
+
tx_stats = gr.Textbox(label="Stats", lines=3)
|
618 |
+
tx_flagged = gr.Dataframe(label="Flagged Transactions")
|
619 |
+
tx_issues = gr.Dataframe(label="Data Quality Issues (row, field, issue, value)")
|
620 |
+
tx_file.upload(run_transactions, inputs=[tx_file], outputs=[tx_ai, tx_stats, tx_flagged, tx_issues])
|
621 |
+
|
622 |
+
with gr.Tab("KYC"):
|
623 |
+
gr.Markdown("Upload a **KYC** CSV.")
|
624 |
+
kyc_file = gr.File(file_types=[".csv"], label="KYC CSV", type="binary")
|
625 |
+
kyc_ai = gr.Textbox(label="AI Summary (requires inference)", value=SUMMARY_NOTICE, lines=6)
|
626 |
+
kyc_stats = gr.Textbox(label="Stats", lines=3)
|
627 |
+
kyc_flagged = gr.Dataframe(label="Flagged KYC Rows")
|
628 |
+
kyc_issues = gr.Dataframe(label="Data Quality Issues")
|
629 |
+
kyc_file.upload(run_kyc, inputs=[kyc_file], outputs=[kyc_ai, kyc_stats, kyc_flagged, kyc_issues])
|
630 |
+
|
631 |
+
with gr.Tab("Sanctions/PEP"):
|
632 |
+
gr.Markdown("Upload **customers** CSV (+ optional sanctions CSV).")
|
633 |
+
san_customers = gr.File(file_types=[".csv"], label="Customers CSV", type="binary")
|
634 |
+
san_list = gr.File(file_types=[".csv"], label="Sanctions/PEP CSV (optional)", type="binary")
|
635 |
+
san_ai = gr.Textbox(label="AI Summary (requires inference)", value=SUMMARY_NOTICE, lines=6)
|
636 |
+
san_stats = gr.Textbox(label="Stats", lines=3)
|
637 |
+
san_flagged = gr.Dataframe(label="Matches")
|
638 |
+
san_issues = gr.Dataframe(label="Data Quality Issues")
|
639 |
+
san_customers.upload(run_sanctions, inputs=[san_customers, san_list], outputs=[san_ai, san_stats, san_flagged, san_issues])
|
640 |
+
san_list.upload(run_sanctions, inputs=[san_customers, san_list], outputs=[san_ai, san_stats, san_flagged, san_issues])
|
641 |
+
|
642 |
+
with gr.Tab("Credit Risk"):
|
643 |
+
gr.Markdown("Upload a **credit** CSV.")
|
644 |
+
cr_file = gr.File(file_types=[".csv"], label="Credit CSV", type="binary")
|
645 |
+
cr_ai = gr.Textbox(label="AI Summary (requires inference)", value=SUMMARY_NOTICE, lines=6)
|
646 |
+
cr_stats = gr.Textbox(label="Stats", lines=3)
|
647 |
+
cr_flagged = gr.Dataframe(label="Flagged Applicants")
|
648 |
+
cr_issues = gr.Dataframe(label="Data Quality Issues")
|
649 |
+
cr_file.upload(run_credit, inputs=[cr_file], outputs=[cr_ai, cr_stats, cr_flagged, cr_issues])
|
650 |
+
|
651 |
+
with gr.Tab("AI Consultant (Agent)"):
|
652 |
+
gr.Markdown("Paste a small CSV snippet or ask questions. Uses chat-completions when configured.")
|
653 |
+
chatbot = gr.Chatbot(type="messages", label="Fraud AI Consultant")
|
654 |
+
user_in = gr.Textbox(label="Message or CSV snippet")
|
655 |
+
send_btn = gr.Button("Send")
|
656 |
+
def _chat_fn(history, msg):
|
657 |
+
reply = agent_reply(history, msg)
|
658 |
+
history = (history or []) + [{"role":"user","content":msg}, {"role":"assistant","content":reply}]
|
659 |
+
return history, ""
|
660 |
+
send_btn.click(_chat_fn, inputs=[chatbot, user_in], outputs=[chatbot, user_in])
|
661 |
+
|
662 |
+
gr.Markdown(
|
663 |
+
"### ⚙️ Enable inference\n"
|
664 |
+
"- Set **HF_TOKEN** (or HF_SPACES on Spaces)\n"
|
665 |
+
"- Optional: **LC_CHAT_MODEL** (default Qwen 0.5B Instruct), **LC_CHAT_MODEL_FALLBACK** (default Mistral 7B Instruct)\n"
|
666 |
+
"- Optional MCP: `ENABLE_MCP=1`, `MCP_SANCTIONS_URL`, `MCP_HIGH_RISK_MCC_URL`, `MCP_AUTH_HEADER`"
|
667 |
+
)
|
668 |
+
|
669 |
+
if __name__ == "__main__":
|
670 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
llm_provider.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"""TODO: implement llm_provider.py (split from app_monolith_backup.py)"""
|
mcp.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"""TODO: implement mcp.py (split from app_monolith_backup.py)"""
|
modules/__init__.py
ADDED
File without changes
|
modules/credit.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"""TODO: implement modules/credit.py (split from app_monolith_backup.py)"""
|
modules/kyc.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"""TODO: implement modules/kyc.py (split from app_monolith_backup.py)"""
|
modules/sanctions.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"""TODO: implement modules/sanctions.py (split from app_monolith_backup.py)"""
|
modules/transactions.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"""TODO: implement modules/transactions.py (split from app_monolith_backup.py)"""
|
threat_intel.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"""TODO: implement threat_intel.py (split from app_monolith_backup.py)"""
|
tools.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"""TODO: implement tools.py (split from app_monolith_backup.py)"""
|
ttp_guard.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"""TODO: implement ttp_guard.py (split from app_monolith_backup.py)"""
|
validation.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"""TODO: implement validation.py (split from app_monolith_backup.py)"""
|