MMADS commited on
Commit
0de653d
Β·
1 Parent(s): 34fa4d0

Initial commit of CVE decoder application

Browse files
Files changed (3) hide show
  1. .github/workflows/sync-to-hf.yml +26 -0
  2. app.py +615 -0
  3. requirements.txt +5 -0
.github/workflows/sync-to-hf.yml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+ workflow_dispatch:
6
+
7
+ jobs:
8
+ sync-to-hub:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: actions/checkout@v3
12
+ with:
13
+ fetch-depth: 0
14
+ lfs: true
15
+
16
+ - name: Push to hub
17
+ env:
18
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
19
+ HF_USERNAME: MMADS
20
+ SPACE_NAME: cve-decoder
21
+ run: |
22
+ # Add HuggingFace Space as remote
23
+ git remote add space https://${HF_USERNAME}:${HF_TOKEN}@huggingface.co/spaces/${HF_USERNAME}/${SPACE_NAME}
24
+
25
+ # Force push to space
26
+ git push --force space main
app.py ADDED
@@ -0,0 +1,615 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import os
4
+ from collections import OrderedDict
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, Optional, Tuple
7
+ from threading import Lock
8
+ import time
9
+
10
+ import gradio as gr
11
+ import pandas as pd
12
+ import plotly.express as px
13
+ import requests
14
+ from requests.adapters import HTTPAdapter
15
+ from urllib3.util.retry import Retry
16
+
17
+ # Configure logging for the application
18
+ logging.basicConfig(
19
+ level=logging.INFO,
20
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
21
+ )
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # --- Constants and Global Variables ---
25
+
26
+ CURRENT_YEAR = datetime.now().year
27
+ # --- REFACTORED: Use NVD API v2.0 endpoint ---
28
+ NVD_API_V2_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"
29
+ RESULTS_PER_PAGE = 2000 # Max allowed by the API
30
+
31
+ # Thread-safe cache with lock
32
+ CACHE_MAX_SIZE = 5
33
+ DATAFRAME_CACHE: Dict[int, Tuple[pd.DataFrame, float]] = OrderedDict()
34
+ CACHE_LOCK = Lock()
35
+ CACHE_TTL = 3600 # Cache TTL in seconds (1 hour)
36
+
37
+ # HTTP session with retry strategy
38
+ SESSION = requests.Session()
39
+ retry_strategy = Retry(
40
+ total=5, # Increased retries for API robustness
41
+ backoff_factor=1,
42
+ status_forcelist=[429, 500, 502, 503, 504],
43
+ )
44
+ adapter = HTTPAdapter(max_retries=retry_strategy)
45
+ SESSION.mount("http://", adapter)
46
+ SESSION.mount("https://", adapter)
47
+
48
+ # NVD API Key from environment variables
49
+ NVD_API_KEY = os.environ.get("NVD_API_KEY")
50
+ if NVD_API_KEY:
51
+ logger.info("NVD API key found and will be used.")
52
+ SESSION.headers.update({"apiKey": NVD_API_KEY})
53
+ else:
54
+ logger.warning("NVD_API_KEY environment variable not set. Using public, rate-limited access.")
55
+
56
+ # Profiles for tailoring LLM-generated summaries to different audiences
57
+ AUDIENCE_PROFILES = {
58
+ "Cybersecurity Professional": {
59
+ "focus": "threat assessment, attack vectors, mitigation strategies, and security controls",
60
+ "tone": "technical and precise",
61
+ "priorities": ["exploitation methods", "defensive measures", "risk assessment", "compliance implications"]
62
+ },
63
+ "Data Scientist": {
64
+ "focus": "data exposure risks, model vulnerabilities, and statistical analysis implications",
65
+ "tone": "analytical and research-oriented",
66
+ "priorities": ["data integrity", "model security", "pipeline vulnerabilities", "privacy concerns"]
67
+ },
68
+ "Data Engineer": {
69
+ "focus": "infrastructure vulnerabilities, data pipeline security, and system architecture impacts",
70
+ "tone": "technical with infrastructure emphasis",
71
+ "priorities": ["database security", "ETL vulnerabilities", "infrastructure risks", "data flow security"]
72
+ },
73
+ "Full-Stack Developer": {
74
+ "focus": "code vulnerabilities, dependency risks, and implementation fixes",
75
+ "tone": "practical and code-oriented",
76
+ "priorities": ["code examples", "library updates", "patch implementation", "secure coding practices"]
77
+ },
78
+ "Product Owner": {
79
+ "focus": "business impact, user experience, and prioritization for backlog",
80
+ "tone": "business-oriented with technical context",
81
+ "priorities": ["user impact", "feature implications", "timeline considerations", "resource requirements"]
82
+ },
83
+ "Manager": {
84
+ "focus": "business risk, resource allocation, and strategic implications",
85
+ "tone": "executive summary style",
86
+ "priorities": ["business impact", "cost implications", "team requirements", "timeline urgency"]
87
+ }
88
+ }
89
+
90
+ # Valid year range for NVD feeds
91
+ MIN_YEAR = 2002
92
+ MAX_YEAR = CURRENT_YEAR
93
+
94
+
95
+ # --- Utility Functions ---
96
+
97
+ def validate_year(year: int) -> bool:
98
+ """Validates if the year is within the acceptable range."""
99
+ return MIN_YEAR <= year <= MAX_YEAR
100
+
101
+
102
+ def clean_cache() -> None:
103
+ """Removes expired entries from the cache."""
104
+ current_time = time.time()
105
+ with CACHE_LOCK:
106
+ expired_keys = [
107
+ key for key, (_, timestamp) in DATAFRAME_CACHE.items()
108
+ if current_time - timestamp > CACHE_TTL
109
+ ]
110
+ for key in expired_keys:
111
+ if key in DATAFRAME_CACHE:
112
+ del DATAFRAME_CACHE[key]
113
+ logger.info(f"Removed expired cache entry for year {key}")
114
+
115
+
116
+ # --- Data Fetching and Parsing (REFACTORED for API v2.0) ---
117
+
118
+ def get_cve_dataframe(year: int) -> pd.DataFrame:
119
+ """
120
+ Fetches, parses, and caches CVE data for a specific year from the NVD API 2.0.
121
+ Returns a pandas DataFrame with thread-safe caching.
122
+ """
123
+ if not validate_year(year):
124
+ raise gr.Error(f"Invalid year: {year}. Please select a year between {MIN_YEAR} and {MAX_YEAR}.")
125
+
126
+ # Clean cache before checking
127
+ clean_cache()
128
+
129
+ with CACHE_LOCK:
130
+ if year in DATAFRAME_CACHE:
131
+ logger.info(f"Cache hit for year {year}.")
132
+ DATAFRAME_CACHE.move_to_end(year) # Mark as recently used
133
+ return DATAFRAME_CACHE[year][0].copy() # Return a copy to prevent mutations
134
+
135
+ logger.info(f"Cache miss. Fetching NVD data for year {year} from API v2.0.")
136
+
137
+ # Define date range for the selected year
138
+ start_date = datetime(year, 1, 1, 0, 0, 0).isoformat()
139
+ end_date = datetime(year + 1, 1, 1, 0, 0, 0).isoformat()
140
+
141
+ all_vulnerabilities = []
142
+ start_index = 0
143
+
144
+ try:
145
+ while True:
146
+ params = {
147
+ 'pubStartDate': start_date,
148
+ 'pubEndDate': end_date,
149
+ 'resultsPerPage': RESULTS_PER_PAGE,
150
+ 'startIndex': start_index
151
+ }
152
+
153
+ logger.info(f"Requesting CVEs from index {start_index}...")
154
+ response = SESSION.get(NVD_API_V2_URL, params=params, timeout=60)
155
+ response.raise_for_status()
156
+
157
+ data = response.json()
158
+ vulnerabilities = data.get("vulnerabilities", [])
159
+ all_vulnerabilities.extend(vulnerabilities)
160
+
161
+ total_results = data.get("totalResults", 0)
162
+ start_index += len(vulnerabilities)
163
+
164
+ if start_index >= total_results:
165
+ break
166
+
167
+ # --- Respect NVD rate limits ---
168
+ # Sleep for 6 seconds with API key, 10 without, to be safe
169
+ time.sleep(6 if NVD_API_KEY else 10)
170
+
171
+ if not all_vulnerabilities:
172
+ logger.warning(f"No CVE data found for year {year}")
173
+ raise gr.Error(f"No CVE data available for year {year}.")
174
+
175
+ df = parse_cve_items(all_vulnerabilities)
176
+
177
+ with CACHE_LOCK:
178
+ if len(DATAFRAME_CACHE) >= CACHE_MAX_SIZE:
179
+ DATAFRAME_CACHE.popitem(last=False)
180
+ DATAFRAME_CACHE[year] = (df, time.time())
181
+
182
+ return df.copy()
183
+
184
+ except requests.exceptions.Timeout:
185
+ logger.error(f"Timeout while fetching data for {year}")
186
+ raise gr.Error("Request timed out. The NVD API might be busy. Please try again.")
187
+ except requests.exceptions.HTTPError as e:
188
+ logger.error(f"HTTP Error for {year}: {e}")
189
+ raise gr.Error(f"Failed to fetch data for {year}. HTTP Error: {e.response.status_code}")
190
+ except json.JSONDecodeError as e:
191
+ logger.error(f"Failed to parse JSON for {year}: {e}")
192
+ raise gr.Error(f"Data for {year} is corrupted or invalid.")
193
+ except Exception as e:
194
+ logger.error(f"Unexpected error processing feed for {year}: {e}", exc_info=True)
195
+ raise gr.Error(f"An unexpected error occurred: {str(e)}")
196
+
197
+
198
+ def parse_cve_items(vulnerabilities: list) -> pd.DataFrame:
199
+ """
200
+ Extracts vulnerability details from the NVD API v2.0 JSON data.
201
+ """
202
+ rows = []
203
+
204
+ for item in vulnerabilities:
205
+ cve_data = item.get("cve", {})
206
+ if not cve_data:
207
+ continue
208
+
209
+ cve_id = cve_data.get("id", "N/A")
210
+
211
+ # Get English description
212
+ description = "No description available"
213
+ for desc in cve_data.get("descriptions", []):
214
+ if desc.get("lang") == "en":
215
+ description = desc.get("value", description)
216
+ break
217
+
218
+ published = cve_data.get("published", "N/A")
219
+
220
+ # --- REFACTORED: Extract CVSS metrics, prioritizing v3.1 -> v3.0 -> v2 ---
221
+ base_score, severity, attack_vector = None, "N/A", "N/A"
222
+ metrics = cve_data.get("metrics", {})
223
+
224
+ if "cvssMetricV31" in metrics:
225
+ metric_data = metrics["cvssMetricV31"][0].get("cvssData", {})
226
+ base_score = metric_data.get("baseScore")
227
+ severity = metric_data.get("baseSeverity", "N/A")
228
+ attack_vector = metric_data.get("attackVector", "N/A")
229
+ elif "cvssMetricV30" in metrics:
230
+ metric_data = metrics["cvssMetricV30"][0].get("cvssData", {})
231
+ base_score = metric_data.get("baseScore")
232
+ severity = metric_data.get("baseSeverity", "N/A")
233
+ attack_vector = metric_data.get("attackVector", "N/A")
234
+ elif "cvssMetricV2" in metrics:
235
+ metric_data = metrics["cvssMetricV2"][0]
236
+ base_score = metric_data.get("cvssData", {}).get("baseScore")
237
+ severity = metric_data.get("baseSeverity", "N/A")
238
+ attack_vector = metric_data.get("accessVector", "N/A") # Note the different key for V2
239
+
240
+ # Extract CWE IDs
241
+ cwe_ids = []
242
+ for weakness in cve_data.get("weaknesses", []):
243
+ for desc in weakness.get("description", []):
244
+ if desc.get("lang") == "en":
245
+ cwe_id = desc.get("value")
246
+ if cwe_id and cwe_id.startswith("CWE-"):
247
+ cwe_ids.append(cwe_id)
248
+
249
+ rows.append({
250
+ "CVE_ID": cve_id,
251
+ "Description": description,
252
+ "Published": published[:10] if published else "N/A",
253
+ "Base_Score": base_score,
254
+ "Severity": severity.upper() if severity else "N/A",
255
+ "Attack_Vector": attack_vector.upper() if attack_vector else "N/A",
256
+ "CWE_IDs": ", ".join(cwe_ids) if cwe_ids else "N/A"
257
+ })
258
+
259
+ if not rows:
260
+ logger.warning("No valid CVE items could be parsed")
261
+ return pd.DataFrame()
262
+
263
+ df = pd.DataFrame(rows)
264
+ df["Base_Score"] = pd.to_numeric(df["Base_Score"], errors='coerce')
265
+ df = df.sort_values("Published", ascending=False, na_position='last').reset_index(drop=True)
266
+
267
+ return df
268
+
269
+
270
+ # --- LLM Integration ---
271
+
272
+ def generate_tailored_summary(cve_description: str, audience: str, hf_token: str) -> str:
273
+ """
274
+ Generates a tailored CVE summary using the Hugging Face Inference API.
275
+ """
276
+ if not hf_token:
277
+ return "⚠️ Hugging Face API token is not configured. Please set the HF_TOKEN environment variable."
278
+ if not cve_description or cve_description == "":
279
+ return "Please select a CVE from the table first."
280
+ if audience not in AUDIENCE_PROFILES:
281
+ return "Invalid audience selected."
282
+
283
+ api_url = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
284
+ headers = {"Authorization": f"Bearer {hf_token}"}
285
+ profile = AUDIENCE_PROFILES[audience]
286
+
287
+ prompt = f"""<s>[INST] You are an expert cybersecurity analyst. Your task is to rewrite the following technical CVE description into a concise, actionable summary for a specific professional audience.
288
+
289
+ **Target Audience:** {audience}
290
+ - **Focus:** {profile.get('focus', 'N/A')}
291
+ - **Key Priorities:** {', '.join(profile.get('priorities', []))}
292
+
293
+ **Original CVE Description:**
294
+ ---
295
+ {cve_description}
296
+ ---
297
+
298
+ Provide a clear, concise summary (max 200 words) in a {profile.get('tone', 'professional')} tone, focusing on what matters most to this audience. Include actionable insights and recommendations. [/INST]"""
299
+
300
+ payload = {
301
+ "inputs": prompt,
302
+ "parameters": {
303
+ "max_new_tokens": 256,
304
+ "temperature": 0.7,
305
+ "top_p": 0.95,
306
+ "return_full_text": False
307
+ }
308
+ }
309
+
310
+ try:
311
+ response = SESSION.post(api_url, headers=headers, json=payload, timeout=60)
312
+
313
+ if response.status_code == 503:
314
+ return "⏳ The model is currently loading. Please try again in a few moments."
315
+ elif response.status_code == 401:
316
+ return "❌ Invalid API token. Please check your Hugging Face token."
317
+ elif response.status_code != 200:
318
+ error_data = response.json()
319
+ error_message = error_data.get("error", "Unknown error")
320
+ logger.error(f"Inference API Error ({response.status_code}): {error_message}")
321
+ return f"⚠️ API Error: {error_message}"
322
+
323
+ result = response.json()
324
+ if isinstance(result, list) and len(result) > 0:
325
+ generated_text = result[0].get('generated_text', '').strip()
326
+ if generated_text:
327
+ return f"### Tailored Summary for {audience}\n\n{generated_text}"
328
+ else:
329
+ return "⚠️ The model returned an empty response. Please try again."
330
+ else:
331
+ return "⚠️ Unexpected response format from the API."
332
+ except requests.exceptions.Timeout:
333
+ logger.error("Timeout while calling Inference API")
334
+ return "⏱️ Request timed out. The model might be overloaded. Please try again."
335
+ except Exception as e:
336
+ logger.error(f"Unexpected error in generate_tailored_summary: {e}")
337
+ return f"❌ An unexpected error occurred: {str(e)}"
338
+
339
+
340
+ # --- Analysis and Visualization ---
341
+
342
+ def analyze_and_visualize(
343
+ df: Optional[pd.DataFrame],
344
+ severity: str,
345
+ vector: str,
346
+ search: str
347
+ ) -> Tuple[pd.DataFrame, Optional[px.bar], Optional[px.line], str]:
348
+ """
349
+ Filters the main DataFrame and generates all outputs.
350
+ """
351
+ if df is None or df.empty:
352
+ empty_df = pd.DataFrame(columns=["CVE_ID", "Severity", "Base_Score", "Description"])
353
+ return empty_df, None, None, "### No Data Loaded\n\nPlease select a year to load CVE data."
354
+
355
+ try:
356
+ filtered_df = df.copy()
357
+
358
+ # Apply filters
359
+ if severity and severity != "All":
360
+ filtered_df = filtered_df[filtered_df["Severity"] == severity]
361
+ if vector and vector != "All":
362
+ filtered_df = filtered_df[filtered_df["Attack_Vector"] == vector]
363
+ if search and search.strip():
364
+ search_term = search.strip()
365
+ masks = [
366
+ filtered_df[col].str.contains(search_term, case=False, na=False)
367
+ for col in ["CVE_ID", "Description", "CWE_IDs"] if col in filtered_df.columns
368
+ ]
369
+ if masks:
370
+ combined_mask = pd.concat(masks, axis=1).any(axis=1)
371
+ filtered_df = filtered_df[combined_mask]
372
+
373
+ # Create outputs
374
+ severity_chart = create_severity_chart(filtered_df)
375
+ timeline_chart = create_timeline_chart(filtered_df)
376
+ summary_text = create_summary_text(filtered_df)
377
+
378
+ display_columns = ["CVE_ID", "Severity", "Base_Score", "Description"]
379
+ display_df = filtered_df[[col for col in display_columns if col in filtered_df.columns]]
380
+
381
+ return display_df, severity_chart, timeline_chart, summary_text
382
+ except Exception as e:
383
+ logger.error(f"Error in analyze_and_visualize: {e}", exc_info=True)
384
+ empty_df = pd.DataFrame(columns=["CVE_ID", "Severity", "Base_Score", "Description"])
385
+ return empty_df, None, None, f"### Error\n\nAn error occurred while filtering data: {str(e)}"
386
+
387
+
388
+ def create_severity_chart(df: pd.DataFrame) -> Optional[px.bar]:
389
+ """Creates a bar chart for CVE severity distribution."""
390
+ if df.empty or "Severity" not in df.columns:
391
+ return None
392
+ try:
393
+ order = ["CRITICAL", "HIGH", "MEDIUM", "LOW", "N/A"]
394
+ counts = df["Severity"].value_counts().reindex(order, fill_value=0)
395
+ color_map = {"CRITICAL": "#8B0000", "HIGH": "#FF4500", "MEDIUM": "#FFA500", "LOW": "#FFD700", "N/A": "#D3D3D3"}
396
+ fig = px.bar(
397
+ x=counts.index, y=counts.values,
398
+ labels={"x": "Severity Level", "y": "Number of CVEs"},
399
+ title="CVE Severity Distribution",
400
+ color=counts.index, color_discrete_map=color_map, text=counts.values
401
+ )
402
+ fig.update_traces(texttemplate='%{text}', textposition='outside')
403
+ fig.update_layout(showlegend=False, xaxis={'categoryorder': 'array', 'categoryarray': order})
404
+ return fig
405
+ except Exception as e:
406
+ logger.error(f"Error creating severity chart: {e}")
407
+ return None
408
+
409
+ def create_timeline_chart(df: pd.DataFrame) -> Optional[px.line]:
410
+ """Creates a line chart showing CVE publications over time."""
411
+ if df.empty or 'Published' not in df.columns:
412
+ return None
413
+ try:
414
+ df_copy = df.copy()
415
+ df_copy["Date"] = pd.to_datetime(df_copy["Published"], errors='coerce')
416
+ df_copy.dropna(subset=["Date"], inplace=True)
417
+ if df_copy.empty: return None
418
+
419
+ counts = df_copy.set_index("Date").resample('M').size()
420
+ if counts.empty: return None
421
+
422
+ fig = px.line(
423
+ x=counts.index, y=counts.values,
424
+ labels={"x": "Month", "y": "Number of CVEs"},
425
+ title="CVE Publications Timeline", markers=True
426
+ )
427
+ return fig
428
+ except Exception as e:
429
+ logger.error(f"Error creating timeline chart: {e}")
430
+ return None
431
+
432
+
433
+ def create_summary_text(df: pd.DataFrame) -> str:
434
+ """Generates a markdown string with key statistics."""
435
+ if df.empty:
436
+ return "### No Results\n\nNo CVEs match your current filter criteria."
437
+ try:
438
+ total_cves = len(df)
439
+ sev_counts = df['Severity'].value_counts() if 'Severity' in df.columns else {}
440
+ scores = df['Base_Score'].dropna()
441
+ avg_score = f"{scores.mean():.2f}" if not scores.empty else "N/A"
442
+ max_score = f"{scores.max():.1f}" if not scores.empty else "N/A"
443
+
444
+ return "\n".join([
445
+ f"### Summary Statistics",
446
+ f"- **Total CVEs Found:** {total_cves:,}",
447
+ f"- **Critical:** {sev_counts.get('CRITICAL', 0):,}",
448
+ f"- **High:** {sev_counts.get('HIGH', 0):,}",
449
+ f"- **Medium:** {sev_counts.get('MEDIUM', 0):,}",
450
+ f"- **Low:** {sev_counts.get('LOW', 0):,}",
451
+ f"- **Average Base Score:** {avg_score}",
452
+ f"- **Maximum Base Score:** {max_score}"
453
+ ])
454
+ except Exception as e:
455
+ logger.error(f"Error creating summary text: {e}")
456
+ return f"### Error\n\nCould not generate summary: {str(e)}"
457
+
458
+ # --- Gradio UI and Event Logic ---
459
+
460
+ def create_dashboard():
461
+ """Builds the entire Gradio interface."""
462
+
463
+ with gr.Blocks(theme=gr.themes.Soft(), title="CVE Dashboard - NVD API v2.0 Analyzer") as dashboard:
464
+
465
+ df_state = gr.State(value=None)
466
+ selected_cve_description = gr.State(value="")
467
+ hf_token_state = gr.State(value=os.environ.get("HF_TOKEN", ""))
468
+
469
+ gr.Markdown(
470
+ """
471
+ # πŸ›‘οΈ CVE Dashboard: NVD API v2.0 Analyzer
472
+ Explore Common Vulnerabilities and Exposures (CVE) data from the National Vulnerability Database, fetched live using the NVD API 2.0.
473
+
474
+ Select a year to load CVE data, apply filters, and leverage AI to generate tailored summaries for different professional audiences.
475
+ """
476
+ )
477
+
478
+ with gr.Row():
479
+ with gr.Column(scale=1):
480
+ gr.Markdown("### πŸŽ›οΈ Controls")
481
+ year_dd = gr.Dropdown(
482
+ choices=list(range(MIN_YEAR, MAX_YEAR + 1))[::-1], value=CURRENT_YEAR,
483
+ label="1. Select Year", info="Choose a year to load CVE data"
484
+ )
485
+
486
+ gr.Markdown("### πŸ” Filters")
487
+ severity_dd = gr.Dropdown(
488
+ choices=["All", "CRITICAL", "HIGH", "MEDIUM", "LOW"], value="All",
489
+ label="2. Severity Level", info="Filter by CVSS severity rating"
490
+ )
491
+ vector_dd = gr.Dropdown(
492
+ choices=["All", "NETWORK", "ADJACENT_NETWORK", "LOCAL", "PHYSICAL"], value="All",
493
+ label="3. Attack Vector", info="Filter by attack vector type"
494
+ )
495
+ search_tb = gr.Textbox(
496
+ label="4. Search", placeholder="e.g., 'Log4j', 'SQL injection', 'CWE-89'...",
497
+ info="Search in CVE IDs, descriptions, and CWE IDs"
498
+ )
499
+ filter_btn = gr.Button("πŸ”„ Apply Filters", variant="primary", size="lg")
500
+
501
+ with gr.Column(scale=3):
502
+ summary_out = gr.Markdown(value="### Loading...")
503
+ with gr.Tabs():
504
+ with gr.TabItem("πŸ“Š Data Table"):
505
+ table_out = gr.DataFrame(
506
+ headers=["CVE_ID", "Severity", "Base_Score", "Description"],
507
+ wrap=True, max_rows=20, interactive=True, label="CVE Data"
508
+ )
509
+ with gr.TabItem("πŸ“ˆ Severity Analysis"):
510
+ plot_severity_out = gr.Plot(label="Severity Distribution")
511
+ with gr.TabItem("πŸ“‰ Timeline Analysis"):
512
+ plot_timeline_out = gr.Plot(label="Publication Timeline")
513
+
514
+ with gr.Accordion(
515
+ "πŸ€– AI-Powered CVE Analysis (Select a CVE from the table)",
516
+ open=False, visible=False
517
+ ) as llm_accordion:
518
+ with gr.Row():
519
+ with gr.Column(scale=2):
520
+ original_desc_out = gr.Textbox(
521
+ label="Original CVE Description", lines=6, interactive=False, show_copy_button=True
522
+ )
523
+ with gr.Column(scale=1):
524
+ audience_dd = gr.Dropdown(
525
+ choices=list(AUDIENCE_PROFILES.keys()), value="Cybersecurity Professional",
526
+ label="Target Audience", info="Select your role for a tailored summary"
527
+ )
528
+ generate_btn = gr.Button("✨ Generate Tailored Summary", variant="primary")
529
+ summary_llm_out = gr.Markdown(value="*Select an audience and click 'Generate'...*")
530
+
531
+ # --- Event Handlers ---
532
+
533
+ def on_year_change(year):
534
+ """Handle year selection change."""
535
+ try:
536
+ if year is None:
537
+ return None, pd.DataFrame(), None, None, "### Please select a year"
538
+ df = get_cve_dataframe(int(year))
539
+ return df, *analyze_and_visualize(df, "All", "All", "")
540
+ except Exception as e:
541
+ logger.error(f"Error in on_year_change: {e}")
542
+ return None, pd.DataFrame(), None, None, f"### Error\n\n{str(e)}"
543
+
544
+ # --- Correct CVE selection logic ---
545
+ def on_select_cve(full_df: pd.DataFrame, evt: gr.SelectData):
546
+ """Handle CVE row selection safely."""
547
+ try:
548
+ if full_df is None or evt.value is None:
549
+ return "", "", gr.update(visible=False)
550
+
551
+ # Get the CVE_ID from the selected row's first column value
552
+ selected_cve_id = evt.value
553
+
554
+ # Look up the full description in the master dataframe
555
+ cve_record = full_df[full_df["CVE_ID"] == selected_cve_id]
556
+ if cve_record.empty:
557
+ return "", "Could not find details for the selected CVE.", gr.update(visible=False)
558
+
559
+ full_description = cve_record.iloc[0]["Description"]
560
+ return full_description, full_description, gr.update(visible=True)
561
+ except Exception as e:
562
+ logger.error(f"Error in on_select_cve: {e}", exc_info=True)
563
+ return "", "Error loading CVE details", gr.update(visible=False)
564
+
565
+ # Wire up events
566
+ analysis_outputs = [table_out, plot_severity_out, plot_timeline_out, summary_out]
567
+ filter_inputs = [df_state, severity_dd, vector_dd, search_tb]
568
+
569
+ year_dd.change(
570
+ fn=on_year_change, inputs=[year_dd],
571
+ outputs=[df_state, *analysis_outputs], show_progress="full"
572
+ )
573
+ dashboard.load(
574
+ fn=on_year_change, inputs=[year_dd],
575
+ outputs=[df_state, *analysis_outputs], show_progress="full"
576
+ )
577
+
578
+ filter_btn.click(
579
+ fn=analyze_and_visualize, inputs=filter_inputs, outputs=analysis_outputs
580
+ )
581
+ search_tb.submit(
582
+ fn=analyze_and_visualize, inputs=filter_inputs, outputs=analysis_outputs
583
+ )
584
+ for control in [severity_dd, vector_dd]:
585
+ control.change(
586
+ fn=analyze_and_visualize, inputs=filter_inputs, outputs=analysis_outputs
587
+ )
588
+
589
+ table_out.select(
590
+ fn=on_select_cve,
591
+ inputs=[df_state],
592
+ outputs=[selected_cve_description, original_desc_out, llm_accordion],
593
+ # Use the cell value (CVE_ID) as the event data
594
+ _js="((df, evt) => { return [df, evt.value] })",
595
+ show_progress="hidden"
596
+ )
597
+
598
+ generate_btn.click(
599
+ fn=generate_tailored_summary,
600
+ inputs=[selected_cve_description, audience_dd, hf_token_state],
601
+ outputs=[summary_llm_out]
602
+ )
603
+
604
+ return dashboard
605
+
606
+ if __name__ == "__main__":
607
+ try:
608
+ if not os.environ.get("HF_TOKEN"):
609
+ logger.warning("HF_TOKEN not found. AI features will be limited.")
610
+
611
+ cve_dashboard = create_dashboard()
612
+ cve_dashboard.launch(server_name="0.0.0.0", show_error=True)
613
+ except Exception as e:
614
+ logger.error(f"Failed to launch application: {e}", exc_info=True)
615
+ raise
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ plotly
4
+ requests
5
+ urllib3