jesusgj commited on
Commit
b9ee2ea
Β·
1 Parent(s): 0a282bd

Modified files

Browse files
Files changed (2) hide show
  1. agent.py +21 -9
  2. app.py +55 -111
agent.py CHANGED
@@ -5,10 +5,11 @@ import re
5
  from functools import lru_cache, wraps
6
  from typing import Optional, Dict
7
 
8
- from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
9
- from dotenv import load_dotenv
10
  from requests.exceptions import RequestException
11
  import wikipedia
 
12
  from llama_index.readers.web import BeautifulSoupWebReader
13
 
14
  from smolagents import (
@@ -24,11 +25,17 @@ def configure_logging():
24
  """Sets up detailed logging configuration."""
25
  logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
26
 
27
- def load_api_keys() -> Dict[str, Optional[str]]:
28
- """Loads API keys from environment variables."""
29
- load_dotenv()
30
- keys = {'together': os.getenv('TOGETHER_API_KEY'), 'serpapi': os.getenv('SERPAPI_API_KEY')}
31
- if not keys['together']: raise ValueError("TOGETHER_API_KEY is required but not found.")
 
 
 
 
 
 
32
  return keys
33
 
34
  # --- Custom Exceptions ---
@@ -104,7 +111,7 @@ def initialize_agent():
104
  logging.info("πŸš€ Starting GAIA agent initialization...")
105
 
106
  try:
107
- api_keys = load_api_keys()
108
  except ValueError as e:
109
  logging.error(f"FATAL: {e}")
110
  return None
@@ -243,7 +250,12 @@ def initialize_agent():
243
  # --- Main Execution Block for Local Testing ---
244
 
245
  def main():
246
- """Tests the agent with sample GAIA-style questions."""
 
 
 
 
 
247
  configure_logging()
248
  logging.info("πŸ§ͺ Starting local agent testing...")
249
 
 
5
  from functools import lru_cache, wraps
6
  from typing import Optional, Dict
7
 
8
+ # The python-dotenv library is no longer needed as keys are read from the environment (HF Secrets)
9
+ # from dotenv import load_dotenv
10
  from requests.exceptions import RequestException
11
  import wikipedia
12
+ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
13
  from llama_index.readers.web import BeautifulSoupWebReader
14
 
15
  from smolagents import (
 
25
  """Sets up detailed logging configuration."""
26
  logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
27
 
28
+ def get_api_keys_from_env() -> Dict[str, Optional[str]]:
29
+ """
30
+ Retrieves API keys directly from environment variables (e.g., Hugging Face Secrets).
31
+ """
32
+ # load_dotenv() is removed. We expect the platform to provide the environment variables.
33
+ keys = {
34
+ 'together': os.getenv('TOGETHER_API_KEY'),
35
+ 'serpapi': os.getenv('SERPAPI_API_KEY')
36
+ }
37
+ if not keys['together']:
38
+ raise ValueError("TOGETHER_API_KEY is required but not found in environment variables.")
39
  return keys
40
 
41
  # --- Custom Exceptions ---
 
111
  logging.info("πŸš€ Starting GAIA agent initialization...")
112
 
113
  try:
114
+ api_keys = get_api_keys_from_env()
115
  except ValueError as e:
116
  logging.error(f"FATAL: {e}")
117
  return None
 
250
  # --- Main Execution Block for Local Testing ---
251
 
252
  def main():
253
+ """
254
+ Tests the agent with sample GAIA-style questions.
255
+ For local testing, ensure you have set the required environment variables:
256
+ export TOGETHER_API_KEY="your_key"
257
+ export SERPAPI_API_KEY="your_key"
258
+ """
259
  configure_logging()
260
  logging.info("πŸ§ͺ Starting local agent testing...")
261
 
app.py CHANGED
@@ -9,37 +9,21 @@ from agent import initialize_agent # Import the agent initialization function
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
- # Configure logging
13
- logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
16
- # --- Helper Functions ---
 
 
 
 
 
 
 
 
17
 
18
- def extract_final_answer_from_response(response: str) -> str:
19
- """
20
- Extract the final answer from agent response following GAIA format.
21
- The agent should return responses ending with 'FINAL ANSWER: [answer]'
22
- """
23
- if not response:
24
- return ""
25
-
26
- # The agent wrapper should already return just the final answer
27
- # but this is a safety check in case the format isn't perfect
28
- if isinstance(response, str):
29
- # Look for FINAL ANSWER pattern
30
- final_answer_pattern = re.compile(r'FINAL\s+ANSWER\s*:\s*(.+?)(?:\n|$)', re.IGNORECASE | re.DOTALL)
31
- match = final_answer_pattern.search(response)
32
-
33
- if match:
34
- answer = match.group(1).strip()
35
- # Clean up the answer
36
- answer = re.sub(r'\s+', ' ', answer)
37
- answer = answer.rstrip('.')
38
- return answer
39
-
40
- # If no FINAL ANSWER pattern found, return the response as is
41
- # (the agent wrapper should have already cleaned it)
42
- return str(response).strip()
43
 
44
  def _fetch_questions(api_url: str) -> list:
45
  """Fetches evaluation questions from the API."""
@@ -76,15 +60,10 @@ def _run_agent_on_questions(agent, questions_data: list) -> tuple[list, list]:
76
  try:
77
  logger.info(f"Processing task {task_id}: {question_text[:100]}...")
78
 
79
- # The agent is now wrapped to return GAIA-compliant format
80
- raw_response = agent(question_text)
81
-
82
- # Extract the final answer (should already be clean from wrapper)
83
- submitted_answer = extract_final_answer_from_response(raw_response)
84
 
85
- # Log the full interaction for debugging
86
- logger.info(f"Task {task_id} - Raw response: {raw_response}")
87
- logger.info(f"Task {task_id} - Final answer: {submitted_answer}")
88
 
89
  answers_payload.append({
90
  "task_id": task_id,
@@ -94,13 +73,12 @@ def _run_agent_on_questions(agent, questions_data: list) -> tuple[list, list]:
94
  results_log.append({
95
  "Task ID": task_id,
96
  "Question": question_text,
97
- "Raw Response": raw_response,
98
  "Final Answer": submitted_answer
99
  })
100
 
101
  except Exception as e:
102
  error_msg = f"AGENT ERROR: {e}"
103
- logger.error(f"Error running agent on task {task_id}: {e}")
104
 
105
  answers_payload.append({
106
  "task_id": task_id,
@@ -110,7 +88,6 @@ def _run_agent_on_questions(agent, questions_data: list) -> tuple[list, list]:
110
  results_log.append({
111
  "Task ID": task_id,
112
  "Question": question_text,
113
- "Raw Response": error_msg,
114
  "Final Answer": error_msg
115
  })
116
 
@@ -151,44 +128,37 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
151
  """
152
  Orchestrates the fetching of questions, running the agent, and submitting answers.
153
  """
154
- username = None
155
- if profile:
156
- username = profile.username
157
- logger.info(f"User logged in: {username}")
158
- else:
159
- logger.info("User not logged in.")
160
- return "Please Login to Hugging Face with the button.", None
161
 
162
- if not username:
163
- return "Hugging Face username not found. Please ensure you are logged in.", None
164
 
165
  space_id = os.getenv("SPACE_ID")
166
  if not space_id:
167
  logger.error("SPACE_ID environment variable not found. Cannot determine agent_code URL.")
168
- return "Error: SPACE_ID not set. Cannot determine agent_code URL.", None
169
  agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main"
170
 
171
  status_message = ""
172
  results_df = pd.DataFrame()
 
173
 
174
  try:
175
- # 1. Instantiate Agent
176
- logger.info("Initializing agent...")
177
- agent = initialize_agent()
178
- if agent is None:
179
- raise RuntimeError("Agent initialization failed. Check agent.py for details.")
180
- logger.info("Agent initialized successfully.")
181
-
182
- # 2. Fetch Questions
183
  questions_data = _fetch_questions(DEFAULT_API_URL)
184
 
185
- # 3. Run Agent on Questions
186
- answers_payload, results_log = _run_agent_on_questions(agent, questions_data)
187
  if not answers_payload:
188
  status_message = "Agent did not produce any answers to submit."
189
  return status_message, pd.DataFrame(results_log)
190
 
191
- # 4. Submit Answers
192
  submission_result = _submit_answers(DEFAULT_API_URL, username, agent_code_url, answers_payload)
193
 
194
  final_status = (
@@ -205,14 +175,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
205
  except RuntimeError as e:
206
  status_message = f"❌ Operation Failed: {e}"
207
  logger.error(status_message)
208
- # If an error occurs during agent run, results_log might be partially filled
209
- if 'results_log' in locals():
210
- results_df = pd.DataFrame(results_log)
211
- else:
212
- results_df = pd.DataFrame([{"Status": "Error", "Details": str(e)}])
213
  except Exception as e:
214
  status_message = f"πŸ’₯ Critical Error: {e}"
215
- logger.error(status_message)
216
  results_df = pd.DataFrame([{"Status": "Critical Error", "Details": str(e)}])
217
 
218
  return status_message, results_df
@@ -222,36 +188,26 @@ with gr.Blocks(title="GAIA Benchmark Agent", theme=gr.themes.Soft()) as demo:
222
  gr.Markdown("""
223
  # 🧠 GAIA Benchmark Evaluation Agent
224
 
225
- **Enhanced AI Agent for General AI Assistant (GAIA) Benchmark**
226
  """)
227
 
228
  gr.Markdown("""
229
  ## πŸ“‹ Instructions:
230
 
231
- 1. **Setup**: Clone this Space and ensure your `.env` file contains:
232
- ```
233
- TOGETHER_API_KEY=your_together_api_key
234
- SERPAPI_API_KEY=your_serpapi_key
235
- ```
236
 
237
- 2. **Login**: Use the button below to log in with your Hugging Face account
238
 
239
- 3. **Run**: Click 'Run Evaluation & Submit' to process all GAIA questions
240
 
241
- 4. **Wait**: The process may take several minutes depending on question complexity
242
 
243
  ---
244
 
245
- ### 🎯 GAIA Format Requirements:
246
- - **Numbers**: No commas, no units (unless specified)
247
- - **Strings**: No articles (a, an, the), no abbreviations
248
- - **Lists**: Comma-separated values following above rules
249
-
250
- ### πŸ”§ Agent Capabilities:
251
- - **Web Research**: Google Search, Wikipedia, webpage analysis
252
- - **Video Analysis**: YouTube transcript processing
253
- - **Mathematical Computing**: Python execution with scientific libraries
254
- - **Multi-step Reasoning**: Complex problem decomposition
255
  """)
256
 
257
  with gr.Row():
@@ -269,7 +225,7 @@ with gr.Blocks(title="GAIA Benchmark Agent", theme=gr.themes.Soft()) as demo:
269
  label="πŸ“ Detailed Question Results",
270
  wrap=True,
271
  interactive=False,
272
- column_widths=["10%", "40%", "25%", "25%"]
273
  )
274
 
275
  run_button.click(
@@ -277,44 +233,32 @@ with gr.Blocks(title="GAIA Benchmark Agent", theme=gr.themes.Soft()) as demo:
277
  outputs=[status_output, results_table]
278
  )
279
 
280
- gr.Markdown("""
281
- ---
282
- ### πŸ’‘ Tips for Better Performance:
283
- - Ensure stable internet connection for web searches
284
- - Monitor the status output for real-time progress
285
- - Check the detailed results table for individual question analysis
286
- - The agent automatically formats answers according to GAIA requirements
287
- """)
288
-
289
  if __name__ == "__main__":
290
  print("\n" + "="*70)
291
- print("πŸš€ GAIA BENCHMARK AGENT STARTING")
292
  print("="*70)
293
 
294
- # Check environment variables
295
- space_host = os.getenv("SPACE_HOST")
296
  space_id = os.getenv("SPACE_ID")
297
  together_key = os.getenv("TOGETHER_API_KEY")
298
  serpapi_key = os.getenv("SERPAPI_API_KEY")
299
 
300
- if space_host:
301
- print(f"βœ… SPACE_HOST: {space_host}")
302
- print(f" 🌐 Runtime URL: https://{space_host}.hf.space")
303
- else:
304
- print("ℹ️ SPACE_HOST not found (local development)")
305
-
306
  if space_id:
307
  print(f"βœ… SPACE_ID: {space_id}")
308
- print(f" πŸ“‚ Repo URL: https://huggingface.co/spaces/{space_id}")
309
  else:
310
- print("⚠️ SPACE_ID not found - submissions may fail")
 
 
 
 
311
 
312
- print(f"πŸ”‘ API Keys Status:")
313
- print(f" Together AI: {'βœ… Set' if together_key else '❌ Missing'}")
314
- print(f" SerpAPI: {'βœ… Set' if serpapi_key else '⚠️ Missing (optional)'}")
315
 
316
  print("="*70)
317
- print("🎯 Launching GAIA Benchmark Interface...")
318
  print("="*70 + "\n")
319
 
320
- demo.launch(debug=True, share=False)
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
+ # --- Logging Configuration ---
13
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
14
  logger = logging.getLogger(__name__)
15
 
16
+ # --- Global Agent Initialization ---
17
+ # The agent is initialized once when the Space starts up.
18
+ # This is critical for performance and to avoid reloading the model on every request.
19
+ logger.info("πŸš€ Application starting up! Initializing the GAIA agent...")
20
+ AGENT = initialize_agent()
21
+ if AGENT is None:
22
+ logger.error("πŸ’₯ FATAL: Agent initialization failed. The application will not be able to process questions.")
23
+ else:
24
+ logger.info("βœ… Agent initialized successfully.")
25
 
26
+ # --- Helper Functions ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  def _fetch_questions(api_url: str) -> list:
29
  """Fetches evaluation questions from the API."""
 
60
  try:
61
  logger.info(f"Processing task {task_id}: {question_text[:100]}...")
62
 
63
+ # The agent wrapper returns the final, normalized answer directly.
64
+ submitted_answer = agent(question_text)
 
 
 
65
 
66
+ logger.info(f"Task {task_id} - Final answer from agent: {submitted_answer}")
 
 
67
 
68
  answers_payload.append({
69
  "task_id": task_id,
 
73
  results_log.append({
74
  "Task ID": task_id,
75
  "Question": question_text,
 
76
  "Final Answer": submitted_answer
77
  })
78
 
79
  except Exception as e:
80
  error_msg = f"AGENT ERROR: {e}"
81
+ logger.error(f"Error running agent on task {task_id}: {e}", exc_info=True)
82
 
83
  answers_payload.append({
84
  "task_id": task_id,
 
88
  results_log.append({
89
  "Task ID": task_id,
90
  "Question": question_text,
 
91
  "Final Answer": error_msg
92
  })
93
 
 
128
  """
129
  Orchestrates the fetching of questions, running the agent, and submitting answers.
130
  """
131
+ if not profile:
132
+ logger.warning("Attempted to run evaluation without being logged in.")
133
+ return "Please Login to Hugging Face with the button above.", None
134
+
135
+ username = profile.username
136
+ logger.info(f"User '{username}' initiated evaluation.")
 
137
 
138
+ if AGENT is None:
139
+ return "❌ Error: The agent failed to initialize on startup. Please check the Space logs for details.", None
140
 
141
  space_id = os.getenv("SPACE_ID")
142
  if not space_id:
143
  logger.error("SPACE_ID environment variable not found. Cannot determine agent_code URL.")
144
+ return "❌ Error: SPACE_ID not set. This is required for submission.", None
145
  agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main"
146
 
147
  status_message = ""
148
  results_df = pd.DataFrame()
149
+ results_log = []
150
 
151
  try:
152
+ # 1. Fetch Questions
 
 
 
 
 
 
 
153
  questions_data = _fetch_questions(DEFAULT_API_URL)
154
 
155
+ # 2. Run Agent on Questions (using the pre-initialized global agent)
156
+ answers_payload, results_log = _run_agent_on_questions(AGENT, questions_data)
157
  if not answers_payload:
158
  status_message = "Agent did not produce any answers to submit."
159
  return status_message, pd.DataFrame(results_log)
160
 
161
+ # 3. Submit Answers
162
  submission_result = _submit_answers(DEFAULT_API_URL, username, agent_code_url, answers_payload)
163
 
164
  final_status = (
 
175
  except RuntimeError as e:
176
  status_message = f"❌ Operation Failed: {e}"
177
  logger.error(status_message)
178
+ results_df = pd.DataFrame(results_log) if results_log else pd.DataFrame([{"Status": "Error", "Details": str(e)}])
 
 
 
 
179
  except Exception as e:
180
  status_message = f"πŸ’₯ Critical Error: {e}"
181
+ logger.error(status_message, exc_info=True)
182
  results_df = pd.DataFrame([{"Status": "Critical Error", "Details": str(e)}])
183
 
184
  return status_message, results_df
 
188
  gr.Markdown("""
189
  # 🧠 GAIA Benchmark Evaluation Agent
190
 
191
+ **An advanced agent designed to tackle the General AI Assistant (GAIA) benchmark.**
192
  """)
193
 
194
  gr.Markdown("""
195
  ## πŸ“‹ Instructions:
196
 
197
+ 1. **Add Secrets**: If you have cloned this Space, go to the **Settings** tab and add your API keys as **Secrets**.
198
+ * `TOGETHER_API_KEY`: Your key from Together AI.
199
+ * `SERPAPI_API_KEY`: Your key from SerpApi for Google Search (optional but recommended).
 
 
200
 
201
+ 2. **Login**: Use the button below to log in with your Hugging Face account. Your username is required for submission.
202
 
203
+ 3. **Run**: Click 'Run Evaluation & Submit' to start the process. The agent will fetch all questions, solve them, and submit the answers automatically.
204
 
205
+ 4. **Wait**: The process can take several minutes. You can monitor the progress in the status box and see detailed results in the table below.
206
 
207
  ---
208
 
209
+ ### 🎯 GAIA Answer Formatting
210
+ The agent is designed to automatically format answers according to GAIA's strict requirements (e.g., no commas in numbers, no articles in strings).
 
 
 
 
 
 
 
 
211
  """)
212
 
213
  with gr.Row():
 
225
  label="πŸ“ Detailed Question Results",
226
  wrap=True,
227
  interactive=False,
228
+ column_widths=["10%", "60%", "30%"]
229
  )
230
 
231
  run_button.click(
 
233
  outputs=[status_output, results_table]
234
  )
235
 
 
 
 
 
 
 
 
 
 
236
  if __name__ == "__main__":
237
  print("\n" + "="*70)
238
+ print("πŸš€ GAIA BENCHMARK AGENT STARTING UP")
239
  print("="*70)
240
 
241
+ # Check environment variables loaded from HF Secrets
 
242
  space_id = os.getenv("SPACE_ID")
243
  together_key = os.getenv("TOGETHER_API_KEY")
244
  serpapi_key = os.getenv("SERPAPI_API_KEY")
245
 
 
 
 
 
 
 
246
  if space_id:
247
  print(f"βœ… SPACE_ID: {space_id}")
248
+ print(f" - Submission URL will be: https://huggingface.co/spaces/{space_id}")
249
  else:
250
+ print("⚠️ SPACE_ID not found - submissions will fail. This is normal for local dev.")
251
+
252
+ print(f"πŸ”‘ API Keys Status (from Secrets):")
253
+ print(f" - Together AI: {'βœ… Set' if together_key else '❌ Missing - Agent will fail to initialize!'}")
254
+ print(f" - SerpAPI: {'βœ… Set' if serpapi_key else '⚠️ Missing - Google Search tool will be disabled.'}")
255
 
256
+ if not together_key:
257
+ print("\n‼️ CRITICAL: TOGETHER_API_KEY is not set in the Space Secrets.")
258
+ print(" Please add it in the 'Settings' tab of your Space.")
259
 
260
  print("="*70)
261
+ print("🎯 Launching Gradio Interface...")
262
  print("="*70 + "\n")
263
 
264
+ demo.launch(debug=False, share=False)