MMADS commited on
Commit
28f96f2
·
1 Parent(s): 295eab6

fell back to deprecated api

Browse files
Files changed (2) hide show
  1. app.py +183 -492
  2. requirements.txt +1 -2
app.py CHANGED
@@ -1,56 +1,29 @@
1
  import json
2
  import logging
 
3
  import os
4
  from collections import OrderedDict
5
  from datetime import datetime
6
- from threading import Lock
7
- import time
8
- from typing import Dict, Optional, Tuple
9
 
10
  import gradio as gr
11
  import pandas as pd
12
  import plotly.express as px
13
  import requests
14
- from requests.adapters import HTTPAdapter
15
- from urllib3.util.retry import Retry
16
 
17
  # Configure logging for the application
18
- logging.basicConfig(
19
- level=logging.INFO,
20
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
21
- )
22
  logger = logging.getLogger(__name__)
23
 
24
  # --- Constants and Global Variables ---
25
 
26
  CURRENT_YEAR = datetime.now().year
27
- NVD_API_V2_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"
28
- RESULTS_PER_PAGE = 2000 # Max allowed by the API
29
-
30
- # Thread-safe cache with lock
31
- CACHE_MAX_SIZE = 5
32
- DATAFRAME_CACHE: Dict[int, Tuple[pd.DataFrame, float]] = OrderedDict()
33
- CACHE_LOCK = Lock()
34
- CACHE_TTL = 3600 # Cache TTL in seconds (1 hour)
35
-
36
- # HTTP session with retry strategy
37
- SESSION = requests.Session()
38
- retry_strategy = Retry(
39
- total=5,
40
- backoff_factor=1,
41
- status_forcelist=[429, 500, 502, 503, 504],
42
- )
43
- adapter = HTTPAdapter(max_retries=retry_strategy)
44
- SESSION.mount("http://", adapter)
45
- SESSION.mount("https://", adapter)
46
-
47
- # NVD API Key from environment variables
48
- NVD_API_KEY = os.environ.get("NVD_API_KEY")
49
- if NVD_API_KEY:
50
- logger.info("NVD API key found and will be used.")
51
- SESSION.headers.update({"apiKey": NVD_API_KEY})
52
- else:
53
- logger.warning("NVD_API_KEY environment variable not set. Using public, rate-limited access.")
54
 
55
  # Profiles for tailoring LLM-generated summaries to different audiences
56
  AUDIENCE_PROFILES = {
@@ -86,191 +59,83 @@ AUDIENCE_PROFILES = {
86
  }
87
  }
88
 
89
- # Valid year range for NVD feeds
90
- MIN_YEAR = 2002
91
- MAX_YEAR = CURRENT_YEAR
92
-
93
-
94
- # --- Utility Functions ---
95
-
96
- def validate_year(year: int) -> bool:
97
- """Validates if the year is within the acceptable range."""
98
- return MIN_YEAR <= year <= MAX_YEAR
99
-
100
 
101
- def clean_cache() -> None:
102
- """Removes expired entries from the cache."""
103
- current_time = time.time()
104
- with CACHE_LOCK:
105
- expired_keys = [
106
- key for key, (_, timestamp) in DATAFRAME_CACHE.items()
107
- if current_time - timestamp > CACHE_TTL
108
- ]
109
- for key in expired_keys:
110
- if key in DATAFRAME_CACHE:
111
- del DATAFRAME_CACHE[key]
112
- logger.info(f"Removed expired cache entry for year {key}")
113
-
114
-
115
- # --- Data Fetching and Parsing (FIXED for API v2.0) ---
116
 
117
  def get_cve_dataframe(year: int) -> pd.DataFrame:
118
  """
119
- Fetches, parses, and caches CVE data for a specific year from the NVD API 2.0.
120
- Returns a pandas DataFrame with thread-safe caching.
121
  """
122
- if not validate_year(year):
123
- raise gr.Error(f"Invalid year: {year}. Please select a year between {MIN_YEAR} and {MAX_YEAR}.")
124
-
125
- clean_cache()
126
-
127
- with CACHE_LOCK:
128
- if year in DATAFRAME_CACHE:
129
- logger.info(f"Cache hit for year {year}.")
130
- DATAFRAME_CACHE.move_to_end(year)
131
- return DATAFRAME_CACHE[year][0].copy()
132
-
133
- logger.info(f"Cache miss. Fetching NVD data for year {year} from API v2.0.")
134
 
135
- # Format dates correctly for NVD API v2.0
136
- # The API expects dates in the format: YYYY-MM-DDTHH:MM:SS.mmm
137
- start_date = f"{year}-01-01T00:00:00.000"
138
- end_date = f"{year}-12-31T23:59:59.999"
139
-
140
- all_vulnerabilities = []
141
- start_index = 0
142
 
143
  try:
144
- while True:
145
- params = {
146
- 'pubStartDate': start_date,
147
- 'pubEndDate': end_date,
148
- 'resultsPerPage': RESULTS_PER_PAGE,
149
- 'startIndex': start_index
150
- }
151
-
152
- logger.info(f"Requesting CVEs from index {start_index}...")
153
- response = SESSION.get(NVD_API_V2_URL, params=params, timeout=60)
154
-
155
- if response.status_code == 404:
156
- logger.error(f"404 Error: URL requested: {response.url}")
157
- logger.error(f"Response content: {response.text[:500]}")
158
-
159
- response.raise_for_status()
160
-
161
- data = response.json()
162
- vulnerabilities = data.get("vulnerabilities", [])
163
- all_vulnerabilities.extend(vulnerabilities)
164
 
165
- total_results = data.get("totalResults", 0)
166
- logger.info(f"Retrieved {len(vulnerabilities)} CVEs. Total: {total_results}")
167
-
168
- start_index += len(vulnerabilities)
169
-
170
- if start_index >= total_results:
171
- break
172
-
173
- # Rate limiting: 6 seconds with API key, 10 seconds without
174
- time.sleep(6 if NVD_API_KEY else 10)
175
 
176
- if not all_vulnerabilities:
177
- logger.warning(f"No CVE data found for year {year}")
178
- raise gr.Error(f"No CVE data available for year {year}.")
179
 
180
- df = parse_cve_items(all_vulnerabilities)
181
-
182
- with CACHE_LOCK:
183
- if len(DATAFRAME_CACHE) >= CACHE_MAX_SIZE:
184
- DATAFRAME_CACHE.popitem(last=False)
185
- DATAFRAME_CACHE[year] = (df, time.time())
186
-
187
- logger.info(f"Successfully cached {len(df)} CVEs for year {year}")
188
- return df.copy()
189
 
190
- except requests.exceptions.Timeout:
191
- logger.error(f"Timeout while fetching data for {year}")
192
- raise gr.Error("Request timed out. The NVD API might be busy. Please try again.")
193
  except requests.exceptions.HTTPError as e:
194
  logger.error(f"HTTP Error for {year}: {e}")
195
- raise gr.Error(f"Failed to fetch data for {year}. HTTP Error: {e.response.status_code}")
196
- except json.JSONDecodeError as e:
197
- logger.error(f"Failed to parse JSON for {year}: {e}")
198
- raise gr.Error(f"Data for {year} is corrupted or invalid.")
199
  except Exception as e:
200
- logger.error(f"Unexpected error processing feed for {year}: {e}", exc_info=True)
201
  raise gr.Error(f"An unexpected error occurred: {str(e)}")
202
 
203
-
204
- def parse_cve_items(vulnerabilities: list) -> pd.DataFrame:
205
  """
206
- Extracts vulnerability details from the NVD API v2.0 JSON data.
207
  """
208
  rows = []
209
-
210
- for item in vulnerabilities:
211
- cve_data = item.get("cve", {})
212
- if not cve_data:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  continue
214
 
215
- cve_id = cve_data.get("id", "N/A")
216
-
217
- # Get English description
218
- description = "No description available"
219
- for desc in cve_data.get("descriptions", []):
220
- if desc.get("lang") == "en":
221
- description = desc.get("value", description)
222
- break
223
-
224
- published = cve_data.get("published", "N/A")
225
-
226
- # Parse CVSS metrics (prioritize v3.1, then v3.0, then v2)
227
- base_score, severity, attack_vector = None, "N/A", "N/A"
228
- metrics = cve_data.get("metrics", {})
229
-
230
- if "cvssMetricV31" in metrics and metrics["cvssMetricV31"]:
231
- metric_data = metrics["cvssMetricV31"][0].get("cvssData", {})
232
- base_score = metric_data.get("baseScore")
233
- severity = metric_data.get("baseSeverity", "N/A")
234
- attack_vector = metric_data.get("attackVector", "N/A")
235
- elif "cvssMetricV30" in metrics and metrics["cvssMetricV30"]:
236
- metric_data = metrics["cvssMetricV30"][0].get("cvssData", {})
237
- base_score = metric_data.get("baseScore")
238
- severity = metric_data.get("baseSeverity", "N/A")
239
- attack_vector = metric_data.get("attackVector", "N/A")
240
- elif "cvssMetricV2" in metrics and metrics["cvssMetricV2"]:
241
- metric_data = metrics["cvssMetricV2"][0]
242
- cvss_data = metric_data.get("cvssData", {})
243
- base_score = cvss_data.get("baseScore")
244
- severity = metric_data.get("baseSeverity", "N/A")
245
- attack_vector = cvss_data.get("accessVector", "N/A")
246
-
247
- # Extract CWE IDs
248
- cwe_ids = []
249
- for weakness in cve_data.get("weaknesses", []):
250
- for desc in weakness.get("description", []):
251
- if desc.get("lang") == "en":
252
- cwe_id = desc.get("value")
253
- if cwe_id and cwe_id.startswith("CWE-"):
254
- cwe_ids.append(cwe_id)
255
-
256
- rows.append({
257
- "CVE_ID": cve_id,
258
- "Description": description,
259
- "Published": published[:10] if published and published != "N/A" else "N/A",
260
- "Base_Score": base_score,
261
- "Severity": severity.upper() if severity and severity != "N/A" else "N/A",
262
- "Attack_Vector": attack_vector.upper() if attack_vector and attack_vector != "N/A" else "N/A",
263
- "CWE_IDs": ", ".join(cwe_ids) if cwe_ids else "N/A"
264
- })
265
-
266
- if not rows:
267
- logger.warning("No valid CVE items could be parsed")
268
- return pd.DataFrame()
269
-
270
  df = pd.DataFrame(rows)
271
- df["Base_Score"] = pd.to_numeric(df["Base_Score"], errors='coerce')
272
- df = df.sort_values("Published", ascending=False, na_position='last').reset_index(drop=True)
273
-
274
  return df
275
 
276
 
@@ -281,341 +146,167 @@ def generate_tailored_summary(cve_description: str, audience: str, hf_token: str
281
  Generates a tailored CVE summary using the Hugging Face Inference API.
282
  """
283
  if not hf_token:
284
- return "⚠️ Hugging Face API token is not configured. Please set the HF_TOKEN environment variable."
285
- if not cve_description or cve_description == "":
286
- return "Please select a CVE from the table first."
287
- if audience not in AUDIENCE_PROFILES:
288
- return "Invalid audience selected."
289
 
290
  api_url = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
291
  headers = {"Authorization": f"Bearer {hf_token}"}
292
- profile = AUDIENCE_PROFILES[audience]
293
 
294
  prompt = f"""<s>[INST] You are an expert cybersecurity analyst. Your task is to rewrite the following technical CVE description into a concise, actionable summary for a specific professional audience.
295
 
296
- **Target Audience:** {audience}
297
- - **Focus:** {profile.get('focus', 'N/A')}
298
- - **Key Priorities:** {', '.join(profile.get('priorities', []))}
299
-
300
- **Original CVE Description:**
301
- ---
302
- {cve_description}
303
- ---
304
-
305
- Provide a clear, concise summary (max 200 words) in a {profile.get('tone', 'professional')} tone, focusing on what matters most to this audience. Include actionable insights and recommendations. [/INST]"""
306
-
307
- payload = {
308
- "inputs": prompt,
309
- "parameters": {
310
- "max_new_tokens": 256,
311
- "temperature": 0.7,
312
- "top_p": 0.95,
313
- "return_full_text": False
314
- }
315
- }
316
 
317
  try:
318
- response = SESSION.post(api_url, headers=headers, json=payload, timeout=60)
319
-
320
- if response.status_code == 503:
321
- return " The model is currently loading. Please try again in a few moments."
322
- elif response.status_code == 401:
323
- return "❌ Invalid API token. Please check your Hugging Face token."
324
- elif response.status_code != 200:
325
- error_data = response.json()
326
- error_message = error_data.get("error", "Unknown error")
327
- logger.error(f"Inference API Error ({response.status_code}): {error_message}")
328
- return f"⚠️ API Error: {error_message}"
329
 
330
- result = response.json()
331
- if isinstance(result, list) and len(result) > 0:
332
- generated_text = result[0].get('generated_text', '').strip()
333
- if generated_text:
334
- return f"### Tailored Summary for {audience}\n\n{generated_text}"
335
- else:
336
- return "⚠️ The model returned an empty response. Please try again."
337
- else:
338
- return "⚠️ Unexpected response format from the API."
339
- except requests.exceptions.Timeout:
340
- logger.error("Timeout while calling Inference API")
341
- return "⏱️ Request timed out. The model might be overloaded. Please try again."
342
- except Exception as e:
343
- logger.error(f"Unexpected error in generate_tailored_summary: {e}")
344
- return f"❌ An unexpected error occurred: {str(e)}"
345
 
346
 
347
  # --- Analysis and Visualization ---
348
 
349
- def analyze_and_visualize(
350
- df: Optional[pd.DataFrame],
351
- severity: str,
352
- vector: str,
353
- search: str
354
- ) -> Tuple[pd.DataFrame, Optional[px.bar], Optional[px.line], str]:
355
  """
356
- Filters the main DataFrame and generates all outputs.
 
357
  """
358
  if df is None or df.empty:
359
- empty_df = pd.DataFrame(columns=["CVE_ID", "Severity", "Base_Score", "Description"])
360
- return empty_df, None, None, "### No Data Loaded\n\nPlease select a year to load CVE data."
361
-
362
- try:
363
- filtered_df = df.copy()
364
-
365
- if severity and severity != "All":
366
- filtered_df = filtered_df[filtered_df["Severity"] == severity]
367
- if vector and vector != "All":
368
- filtered_df = filtered_df[filtered_df["Attack_Vector"] == vector]
369
- if search and search.strip():
370
- search_term = search.strip()
371
- masks = [
372
- filtered_df[col].str.contains(search_term, case=False, na=False)
373
- for col in ["CVE_ID", "Description", "CWE_IDs"] if col in filtered_df.columns
374
- ]
375
- if masks:
376
- combined_mask = pd.concat(masks, axis=1).any(axis=1)
377
- filtered_df = filtered_df[combined_mask]
378
-
379
- severity_chart = create_severity_chart(filtered_df)
380
- timeline_chart = create_timeline_chart(filtered_df)
381
- summary_text = create_summary_text(filtered_df)
382
-
383
- display_columns = ["CVE_ID", "Severity", "Base_Score", "Description"]
384
- display_df = filtered_df[[col for col in display_columns if col in filtered_df.columns]]
385
-
386
- return display_df, severity_chart, timeline_chart, summary_text
387
- except Exception as e:
388
- logger.error(f"Error in analyze_and_visualize: {e}", exc_info=True)
389
- empty_df = pd.DataFrame(columns=["CVE_ID", "Severity", "Base_Score", "Description"])
390
- return empty_df, None, None, f"### Error\n\nAn error occurred while filtering data: {str(e)}"
391
-
392
-
393
- def create_severity_chart(df: pd.DataFrame) -> Optional[px.bar]:
394
  """Creates a bar chart for CVE severity distribution."""
395
- if df.empty or "Severity" not in df.columns:
396
- return None
397
- try:
398
- order = ["CRITICAL", "HIGH", "MEDIUM", "LOW", "N/A"]
399
- counts = df["Severity"].value_counts().reindex(order, fill_value=0)
400
- color_map = {"CRITICAL": "#8B0000", "HIGH": "#FF4500", "MEDIUM": "#FFA500", "LOW": "#FFD700", "N/A": "#D3D3D3"}
401
- fig = px.bar(
402
- x=counts.index, y=counts.values,
403
- labels={"x": "Severity Level", "y": "Number of CVEs"},
404
- title="CVE Severity Distribution",
405
- color=counts.index, color_discrete_map=color_map, text=counts.values
406
- )
407
- fig.update_traces(texttemplate='%{text}', textposition='outside')
408
- fig.update_layout(showlegend=False, xaxis={'categoryorder': 'array', 'categoryarray': order})
409
- return fig
410
- except Exception as e:
411
- logger.error(f"Error creating severity chart: {e}")
412
- return None
413
 
414
- def create_timeline_chart(df: pd.DataFrame) -> Optional[px.line]:
415
  """Creates a line chart showing CVE publications over time."""
416
- if df.empty or 'Published' not in df.columns:
417
- return None
418
- try:
419
- df_copy = df.copy()
420
- df_copy["Date"] = pd.to_datetime(df_copy["Published"], errors='coerce')
421
- df_copy.dropna(subset=["Date"], inplace=True)
422
- if df_copy.empty: return None
423
-
424
- counts = df_copy.set_index("Date").resample('M').size()
425
- if counts.empty: return None
426
-
427
- fig = px.line(
428
- x=counts.index, y=counts.values,
429
- labels={"x": "Month", "y": "Number of CVEs"},
430
- title="CVE Publications Timeline", markers=True
431
- )
432
- return fig
433
- except Exception as e:
434
- logger.error(f"Error creating timeline chart: {e}")
435
- return None
436
 
 
 
 
 
437
 
438
  def create_summary_text(df: pd.DataFrame) -> str:
439
- """Generates a markdown string with key statistics."""
440
- if df.empty:
441
- return "### No Results\n\nNo CVEs match your current filter criteria."
442
- try:
443
- total_cves = len(df)
444
- sev_counts = df['Severity'].value_counts() if 'Severity' in df.columns else {}
445
- scores = df['Base_Score'].dropna()
446
- avg_score = f"{scores.mean():.2f}" if not scores.empty else "N/A"
447
- max_score = f"{scores.max():.1f}" if not scores.empty else "N/A"
448
-
449
- return "\n".join([
450
- f"### Summary Statistics",
451
- f"- **Total CVEs Found:** {total_cves:,}",
452
- f"- **Critical:** {sev_counts.get('CRITICAL', 0):,}",
453
- f"- **High:** {sev_counts.get('HIGH', 0):,}",
454
- f"- **Medium:** {sev_counts.get('MEDIUM', 0):,}",
455
- f"- **Low:** {sev_counts.get('LOW', 0):,}",
456
- f"- **Average Base Score:** {avg_score}",
457
- f"- **Maximum Base Score:** {max_score}"
458
- ])
459
- except Exception as e:
460
- logger.error(f"Error creating summary text: {e}")
461
- return f"### Error\n\nCould not generate summary: {str(e)}"
462
 
463
  # --- Gradio UI and Event Logic ---
464
 
465
  def create_dashboard():
466
- """Builds the entire Gradio interface."""
467
-
468
- with gr.Blocks(theme=gr.themes.Soft(), title="CVE Dashboard - NVD API v2.0 Analyzer") as dashboard:
469
-
470
- df_state = gr.State(value=None)
471
- selected_cve_description = gr.State(value="")
472
- hf_token_state = gr.State(value=os.environ.get("HF_TOKEN", ""))
473
-
474
- gr.Markdown(
475
- """
476
- # 🛡️ CVE Dashboard: NVD API v2.0 Analyzer
477
- Explore Common Vulnerabilities and Exposures (CVE) data from the National Vulnerability Database, fetched live using the NVD API 2.0.
478
-
479
- **Note:** For faster loading times, set an `NVD_API_KEY` in your environment. You can request one from the [NVD website](https://nvd.nist.gov/developers/request-an-api-key).
480
- """
481
- )
482
 
483
  with gr.Row():
484
  with gr.Column(scale=1):
485
- gr.Markdown("### 🎛️ Controls")
486
- year_dd = gr.Dropdown(
487
- choices=list(range(MIN_YEAR, MAX_YEAR + 1))[::-1], value=CURRENT_YEAR - 1,
488
- label="1. Select Year", info="Choose a year to load CVE data"
489
- )
490
-
491
- gr.Markdown("### 🔍 Filters")
492
- severity_dd = gr.Dropdown(
493
- choices=["All", "CRITICAL", "HIGH", "MEDIUM", "LOW"], value="All",
494
- label="2. Severity Level", info="Filter by CVSS severity rating"
495
- )
496
- vector_dd = gr.Dropdown(
497
- choices=["All", "NETWORK", "ADJACENT_NETWORK", "LOCAL", "PHYSICAL"], value="All",
498
- label="3. Attack Vector", info="Filter by attack vector type"
499
- )
500
- search_tb = gr.Textbox(
501
- label="4. Search", placeholder="e.g., 'Log4j', 'SQL injection', 'CWE-89'...",
502
- info="Search in CVE IDs, descriptions, and CWE IDs"
503
- )
504
- filter_btn = gr.Button("🔄 Apply Filters", variant="primary", size="lg")
505
-
506
  with gr.Column(scale=3):
507
- summary_out = gr.Markdown(value="### Loading...")
508
  with gr.Tabs():
509
  with gr.TabItem("📊 Data Table"):
510
- table_out = gr.DataFrame(
511
- headers=["CVE_ID", "Severity", "Base_Score", "Description"],
512
- wrap=True,
513
- row_count=20,
514
- interactive=True,
515
- label="CVE Data"
516
- )
517
- with gr.TabItem("📈 Severity Analysis"):
518
- plot_severity_out = gr.Plot(label="Severity Distribution")
519
- with gr.TabItem("📉 Timeline Analysis"):
520
- plot_timeline_out = gr.Plot(label="Publication Timeline")
521
 
522
- with gr.Accordion(
523
- "🤖 AI-Powered CVE Analysis (Select a CVE from the table)",
524
- open=False, visible=False
525
- ) as llm_accordion:
526
  with gr.Row():
527
  with gr.Column(scale=2):
528
- original_desc_out = gr.Textbox(
529
- label="Original CVE Description", lines=6, interactive=False, show_copy_button=True
530
- )
531
  with gr.Column(scale=1):
532
- audience_dd = gr.Dropdown(
533
- choices=list(AUDIENCE_PROFILES.keys()), value="Cybersecurity Professional",
534
- label="Target Audience", info="Select your role for a tailored summary"
535
- )
536
- generate_btn = gr.Button("✨ Generate Tailored Summary", variant="primary")
537
- summary_llm_out = gr.Markdown(value="*Select an audience and click 'Generate'...*")
538
-
539
  def on_year_change(year):
540
- """Handle year selection change."""
541
- try:
542
- if year is None:
543
- return None, pd.DataFrame(), None, None, "### Please select a year"
544
- df = get_cve_dataframe(int(year))
545
- return df, *analyze_and_visualize(df, "All", "All", "")
546
- except Exception as e:
547
- logger.error(f"Error in on_year_change: {e}")
548
- return None, pd.DataFrame(), None, None, f"### Error\n\n{str(e)}"
549
-
550
- def on_select_cve(full_df: pd.DataFrame, evt: gr.SelectData):
551
- """Handle CVE row selection safely."""
552
- try:
553
- if full_df is None or evt.value is None:
554
- return "", "", gr.update(visible=False)
555
-
556
- # Extract the CVE_ID from the first column of the selected row
557
- if hasattr(evt, 'index') and isinstance(evt.index, list) and len(evt.index) >= 2:
558
- row_idx = evt.index[0]
559
- selected_cve_id = full_df.iloc[row_idx]["CVE_ID"]
560
- else:
561
- # Fallback: try to use the value directly
562
- selected_cve_id = evt.value
563
-
564
- cve_record = full_df[full_df["CVE_ID"] == selected_cve_id]
565
- if cve_record.empty:
566
- return "", "Could not find details for the selected CVE.", gr.update(visible=False)
567
 
568
- full_description = cve_record.iloc[0]["Description"]
569
- return full_description, full_description, gr.update(visible=True)
570
- except Exception as e:
571
- logger.error(f"Error in on_select_cve: {e}", exc_info=True)
572
- return "", "Error loading CVE details", gr.update(visible=False)
573
 
574
- analysis_outputs = [table_out, plot_severity_out, plot_timeline_out, summary_out]
575
  filter_inputs = [df_state, severity_dd, vector_dd, search_tb]
576
-
577
- year_dd.change(
578
- fn=on_year_change, inputs=[year_dd],
579
- outputs=[df_state, *analysis_outputs], show_progress="full"
580
- )
581
- dashboard.load(
582
- fn=on_year_change, inputs=[year_dd],
583
- outputs=[df_state, *analysis_outputs], show_progress="full"
584
- )
585
-
586
- filter_btn.click(
587
- fn=analyze_and_visualize, inputs=filter_inputs, outputs=analysis_outputs
588
- )
589
- search_tb.submit(
590
- fn=analyze_and_visualize, inputs=filter_inputs, outputs=analysis_outputs
591
- )
592
- for control in [severity_dd, vector_dd]:
593
- control.change(
594
- fn=analyze_and_visualize, inputs=filter_inputs, outputs=analysis_outputs
595
- )
596
-
597
- table_out.select(
598
- fn=on_select_cve,
599
- inputs=[df_state],
600
- outputs=[selected_cve_description, original_desc_out, llm_accordion],
601
- show_progress="hidden"
602
- )
603
-
604
- generate_btn.click(
605
- fn=generate_tailored_summary,
606
- inputs=[selected_cve_description, audience_dd, hf_token_state],
607
- outputs=[summary_llm_out]
608
- )
609
 
 
 
 
 
 
 
 
 
 
 
610
  return dashboard
611
 
612
  if __name__ == "__main__":
613
- try:
614
- if not os.environ.get("HF_TOKEN"):
615
- logger.warning("HF_TOKEN not found. AI features will be limited.")
616
-
617
- cve_dashboard = create_dashboard()
618
- cve_dashboard.launch(server_name="0.0.0.0", show_error=True)
619
- except Exception as e:
620
- logger.error(f"Failed to launch application: {e}", exc_info=True)
621
- raise
 
1
  import json
2
  import logging
3
+ import gzip
4
  import os
5
  from collections import OrderedDict
6
  from datetime import datetime
7
+ from io import BytesIO
8
+ from typing import Dict
 
9
 
10
  import gradio as gr
11
  import pandas as pd
12
  import plotly.express as px
13
  import requests
 
 
14
 
15
  # Configure logging for the application
16
+ logging.basicConfig(level=logging.INFO)
 
 
 
17
  logger = logging.getLogger(__name__)
18
 
19
  # --- Constants and Global Variables ---
20
 
21
  CURRENT_YEAR = datetime.now().year
22
+ NVD_BASE_URL = "https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz"
23
+
24
+ # In-memory LRU cache (by insertion order) to store DataFrames for recent years.
25
+ CACHE_MAX_SIZE = 3
26
+ DATAFRAME_CACHE: Dict[int, pd.DataFrame] = OrderedDict()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # Profiles for tailoring LLM-generated summaries to different audiences
29
  AUDIENCE_PROFILES = {
 
59
  }
60
  }
61
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ # --- Data Fetching and Parsing ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  def get_cve_dataframe(year: int) -> pd.DataFrame:
66
  """
67
+ Downloads, parses, and caches the NVD feed for a specific year.
68
+ It returns a pandas DataFrame. Caching is used to avoid repeated downloads.
69
  """
70
+ if year in DATAFRAME_CACHE:
71
+ logger.info(f"Cache hit for year {year}.")
72
+ DATAFRAME_CACHE.move_to_end(year) # Mark as recently used
73
+ return DATAFRAME_CACHE[year]
 
 
 
 
 
 
 
 
74
 
75
+ logger.info(f"Cache miss. Downloading NVD data for year {year}.")
76
+ url = NVD_BASE_URL.format(year=year)
 
 
 
 
 
77
 
78
  try:
79
+ response = requests.get(url, timeout=30)
80
+ response.raise_for_status()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ with gzip.GzipFile(fileobj=BytesIO(response.content)) as f:
83
+ nvd_data = json.load(f)
 
 
 
 
 
 
 
 
84
 
85
+ df = parse_cve_items(nvd_data)
 
 
86
 
87
+ if len(DATAFRAME_CACHE) >= CACHE_MAX_SIZE:
88
+ DATAFRAME_CACHE.popitem(last=False)
89
+ DATAFRAME_CACHE[year] = df
90
+ return df
 
 
 
 
 
91
 
 
 
 
92
  except requests.exceptions.HTTPError as e:
93
  logger.error(f"HTTP Error for {year}: {e}")
94
+ raise gr.Error(f"Failed to download data for {year}. The feed may be unavailable.")
 
 
 
95
  except Exception as e:
96
+ logger.error(f"Error processing feed for {year}: {e}")
97
  raise gr.Error(f"An unexpected error occurred: {str(e)}")
98
 
99
+ def parse_cve_items(nvd_data: dict) -> pd.DataFrame:
 
100
  """
101
+ Extracts vulnerability details from the raw NVD JSON data into a structured DataFrame.
102
  """
103
  rows = []
104
+ for item in nvd_data.get("CVE_Items", []):
105
+ try:
106
+ cve_id = item.get("cve", {}).get("CVE_data_meta", {}).get("ID", "N/A")
107
+ desc_data = item.get("cve", {}).get("description", {}).get("description_data", [])
108
+ description = desc_data[0].get("value", "No description") if desc_data else "No description"
109
+ published = item.get("publishedDate", "")
110
+ base_score, severity, attack_vector = None, "N/A", "N/A"
111
+
112
+ if "baseMetricV3" in item.get("impact", {}):
113
+ impact_v3 = item["impact"]["baseMetricV3"]["cvssV3"]
114
+ base_score = impact_v3.get("baseScore")
115
+ severity = impact_v3.get("baseSeverity")
116
+ attack_vector = impact_v3.get("attackVector")
117
+ elif "baseMetricV2" in item.get("impact", {}):
118
+ impact_v2 = item["impact"]["baseMetricV2"]
119
+ base_score = impact_v2["cvssV2"].get("baseScore")
120
+ severity = impact_v2.get("severity")
121
+ attack_vector = impact_v2.get("accessVector")
122
+
123
+ problem_types = item.get("cve", {}).get("problemtype", {}).get("problemtype_data", [])
124
+ cwe_ids = [desc["value"] for pt in problem_types for desc in pt.get("description", []) if desc.get("value", "").startswith("CWE-")]
125
+
126
+ rows.append({
127
+ "CVE_ID": cve_id, "Description": description, "Published": published[:10],
128
+ "Base_Score": base_score, "Severity": severity, "Attack_Vector": attack_vector,
129
+ "CWE_IDs": ", ".join(cwe_ids) if cwe_ids else "N/A"
130
+ })
131
+ except Exception as e:
132
+ cve_id_str = cve_id if 'cve_id' in locals() else "Unknown"
133
+ logger.warning(f"Skipping malformed CVE item ({cve_id_str}): {e}")
134
  continue
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  df = pd.DataFrame(rows)
137
+ if "Base_Score" in df.columns:
138
+ df["Base_Score"] = pd.to_numeric(df["Base_Score"], errors='coerce')
 
139
  return df
140
 
141
 
 
146
  Generates a tailored CVE summary using the Hugging Face Inference API.
147
  """
148
  if not hf_token:
149
+ raise gr.Error("Hugging Face API token is not configured as a Space Secret.")
150
+ if not cve_description or not audience:
151
+ return "Please select a CVE and an audience first."
 
 
152
 
153
  api_url = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
154
  headers = {"Authorization": f"Bearer {hf_token}"}
155
+ profile = AUDIENCE_PROFILES.get(audience, {})
156
 
157
  prompt = f"""<s>[INST] You are an expert cybersecurity analyst. Your task is to rewrite the following technical CVE description into a concise, actionable summary for a specific professional audience.
158
 
159
+ **Target Audience:** {audience}
160
+ - **Focus:** {profile.get('focus', 'N/A')}
161
+ - **Key Priorities:** {', '.join(profile.get('priorities', []))}
162
+
163
+ **Original CVE Description:**
164
+ ---
165
+ {cve_description}
166
+ ---
167
+
168
+ Rewrite the description in a {profile.get('tone', 'professional')} tone, focusing on what matters most to this audience. Do not start with "As a [role]...". Directly provide the summary. [/INST]"""
169
+
170
+ payload = {"inputs": prompt, "parameters": {"max_new_tokens": 256, "return_full_text": False}}
 
 
 
 
 
 
 
 
171
 
172
  try:
173
+ response = requests.post(api_url, headers=headers, json=payload, timeout=45)
174
+ if response.status_code != 200:
175
+ error_message = response.json().get("error", "Unknown error")
176
+ logger.error(f"Inference API Error: {error_message}")
177
+ return f"Error from API: {error_message}. The model might be loading, please try again."
 
 
 
 
 
 
178
 
179
+ return response.json()[0]['generated_text'].strip()
180
+
181
+ except requests.exceptions.RequestException as e:
182
+ logger.error(f"Request to Inference API failed: {e}")
183
+ return f"Error: Could not connect to the Hugging Face API. {e}"
 
 
 
 
 
 
 
 
 
 
184
 
185
 
186
  # --- Analysis and Visualization ---
187
 
188
+ def analyze_and_visualize(df: pd.DataFrame, severity: str, vector: str, search: str):
 
 
 
 
 
189
  """
190
+ Filters the main DataFrame and generates all outputs: a filtered table,
191
+ visualizations, and a summary markdown string.
192
  """
193
  if df is None or df.empty:
194
+ return pd.DataFrame(), None, None, "### No Data Loaded"
195
+
196
+ filtered_df = df.copy()
197
+ if severity != "All":
198
+ filtered_df = filtered_df[filtered_df["Severity"] == severity]
199
+ if vector != "All":
200
+ filtered_df = filtered_df[filtered_df["Attack_Vector"] == vector]
201
+ if search:
202
+ mask = (filtered_df["CVE_ID"].str.contains(search, case=False, na=False) |
203
+ filtered_df["Description"].str.contains(search, case=False, na=False) |
204
+ filtered_df["CWE_IDs"].str.contains(search, case=False, na=False))
205
+ filtered_df = filtered_df[mask]
206
+
207
+ return filtered_df, create_severity_chart(filtered_df), create_timeline_chart(filtered_df), create_summary_text(filtered_df)
208
+
209
+ def create_severity_chart(df: pd.DataFrame):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  """Creates a bar chart for CVE severity distribution."""
211
+ if df.empty: return None
212
+ order = ["CRITICAL", "HIGH", "MEDIUM", "LOW", "N/A"]
213
+ counts = df["Severity"].value_counts().reindex(order, fill_value=0)
214
+ color_map = {"CRITICAL": "#8B0000", "HIGH": "#FF4500", "MEDIUM": "#FFA500", "LOW": "#FFD700", "N/A": "#D3D3D3"}
215
+
216
+ fig = px.bar(counts, x=counts.index, y=counts.values, labels={"x": "Severity", "y": "Count"},
217
+ title="CVE Severity Distribution", color=counts.index, color_discrete_map=color_map, text_auto=True)
218
+ fig.update_layout(showlegend=False, xaxis={'categoryorder':'array', 'categoryarray':order})
219
+ return fig
 
 
 
 
 
 
 
 
 
220
 
221
+ def create_timeline_chart(df: pd.DataFrame):
222
  """Creates a line chart showing CVE publications over time."""
223
+ if df.empty or 'Published' not in df.columns: return None
224
+ df_copy = df.copy()
225
+ df_copy["Date"] = pd.to_datetime(df_copy["Published"], errors='coerce')
226
+ df_copy.dropna(subset=["Date"], inplace=True)
227
+ if df_copy.empty: return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
+ counts = df_copy.set_index("Date").resample('M').size()
230
+ fig = px.line(x=counts.index, y=counts.values, labels={"x": "Month", "y": "Number of CVEs"},
231
+ title="CVE Publications Timeline", markers=True)
232
+ return fig
233
 
234
  def create_summary_text(df: pd.DataFrame) -> str:
235
+ """Generates a markdown string with key statistics from the DataFrame."""
236
+ if df.empty: return "### No results match your filter criteria."
237
+ scores = df['Base_Score'].dropna()
238
+ avg_score = f"{scores.mean():.2f}" if not scores.empty else 'N/A'
239
+ return f"""### Summary Statistics
240
+ - **Total CVEs Found:** {len(df):,}
241
+ - **Critical:** {len(df[df['Severity'] == 'CRITICAL']):,}
242
+ - **High:** {len(df[df['Severity'] == 'HIGH']):,}
243
+ - **Average Base Score:** {avg_score}"""
244
+
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
  # --- Gradio UI and Event Logic ---
247
 
248
  def create_dashboard():
249
+ """Builds the entire Gradio interface and defines event handling."""
250
+ with gr.Blocks(theme=gr.themes.Soft(), title="CVE Dashboard") as dashboard:
251
+ df_state = gr.State()
252
+ selected_cve_description = gr.State("")
253
+ hf_token_state = gr.State(os.environ.get("HF_TOKEN"))
254
+
255
+ gr.Markdown("# CVE Dashboard: NVD Feed Analyzer")
256
+ gr.Markdown("Explore CVE data from the National Vulnerability Database. **Note:** This demo uses deprecated NVD JSON feeds; a production app should use the NVD API 2.0.")
 
 
 
 
 
 
 
 
257
 
258
  with gr.Row():
259
  with gr.Column(scale=1):
260
+ year_dd = gr.Dropdown(choices=list(range(2002, CURRENT_YEAR + 1))[::-1], value=CURRENT_YEAR, label="1. Select Year")
261
+ severity_dd = gr.Dropdown(choices=["All", "CRITICAL", "HIGH", "MEDIUM", "LOW"], value="All", label="2. Filter by Severity")
262
+ vector_dd = gr.Dropdown(choices=["All", "NETWORK", "ADJACENT_NETWORK", "LOCAL", "PHYSICAL"], value="All", label="3. Filter by Attack Vector")
263
+ search_tb = gr.Textbox(label="4. Search Keyword", placeholder="e.g., 'Log4j', 'CWE-79', ...")
264
+ filter_btn = gr.Button("Apply Filters", variant="primary")
265
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  with gr.Column(scale=3):
267
+ summary_out = gr.Markdown()
268
  with gr.Tabs():
269
  with gr.TabItem("📊 Data Table"):
270
+ table_out = gr.DataFrame(headers=["CVE_ID", "Severity", "Base_Score", "Description"], wrap=True, max_rows=15, interactive=True)
271
+ with gr.TabItem("📈 Severity Chart"):
272
+ plot_severity_out = gr.Plot()
273
+ with gr.TabItem("📉 Timeline Chart"):
274
+ plot_timeline_out = gr.Plot()
 
 
 
 
 
 
275
 
276
+ with gr.Accordion("Tailored CVE Analysis (Select a row in the table above)", open=False) as llm_accordion:
 
 
 
277
  with gr.Row():
278
  with gr.Column(scale=2):
279
+ original_desc_out = gr.Textbox(label="Full Original CVE Description", lines=8, interactive=False)
 
 
280
  with gr.Column(scale=1):
281
+ audience_dd = gr.Dropdown(choices=list(AUDIENCE_PROFILES.keys()), label="Select Audience", value="Cybersecurity Professional")
282
+ generate_btn = gr.Button("Generate Tailored Summary", variant="primary")
283
+ summary_llm_out = gr.Markdown("*Your tailored summary will appear here...*")
284
+
285
+ # --- Event Handling Logic ---
 
 
286
  def on_year_change(year):
287
+ df = get_cve_dataframe(year)
288
+ return df, *analyze_and_visualize(df, "All", "All", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
+ def on_select_cve(df: pd.DataFrame, evt: gr.SelectData):
291
+ if evt.value is None: return "", "", gr.update(visible=False)
292
+ full_description = df.iloc[evt.index[0]]["Description"]
293
+ return full_description, full_description, gr.update(visible=True)
 
294
 
 
295
  filter_inputs = [df_state, severity_dd, vector_dd, search_tb]
296
+ analysis_outputs = [table_out, plot_severity_out, plot_timeline_out, summary_out]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
+ year_dd.change(fn=on_year_change, inputs=[year_dd], outputs=[df_state] + analysis_outputs)
299
+ dashboard.load(fn=on_year_change, inputs=[year_dd], outputs=[df_state] + analysis_outputs)
300
+
301
+ for control in [severity_dd, vector_dd, filter_btn, search_tb]:
302
+ event = control.click if isinstance(control, gr.Button) else (control.submit if isinstance(control, gr.Textbox) else control.change)
303
+ event(fn=analyze_and_visualize, inputs=filter_inputs, outputs=analysis_outputs)
304
+
305
+ table_out.select(fn=on_select_cve, inputs=[df_state], outputs=[selected_cve_description, original_desc_out, llm_accordion], show_progress="hidden")
306
+ generate_btn.click(fn=generate_tailored_summary, inputs=[selected_cve_description, audience_dd, hf_token_state], outputs=[summary_llm_out])
307
+
308
  return dashboard
309
 
310
  if __name__ == "__main__":
311
+ cve_dashboard = create_dashboard()
312
+ cve_dashboard.launch()
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
  gradio
2
  pandas
3
  plotly
4
- requests
5
- urllib3
 
1
  gradio
2
  pandas
3
  plotly
4
+ requests