chibuikeeugene commited on
Commit
c056e7c
·
1 Parent(s): 2512471

completed the agent workflow

Browse files
Files changed (5) hide show
  1. .gitignore +4 -1
  2. agent.py +11 -1
  3. app.py +31 -16
  4. requirements.txt +4 -2
  5. tools.py +47 -2
.gitignore CHANGED
@@ -1 +1,4 @@
1
- *.env
 
 
 
 
1
+ *.env
2
+ /gaia_env
3
+ *.bin
4
+ *.pyc
agent.py CHANGED
@@ -1 +1,11 @@
1
- # use a multimodal llm
 
 
 
 
 
 
 
 
 
 
 
1
+ # use a multimodal llm
2
+ from llama_index.core.agent.workflow import AgentWorkflow
3
+
4
+
5
+ def basic_agent(tool, llm_model):
6
+ """a basic agent with the ability to take decisions, act by calling the right tools and provide answer to the input prompt query string"""
7
+ agent = AgentWorkflow.from_tools_or_functions(
8
+ tools_or_functions=tool,
9
+ llm=llm_model
10
+ )
11
+ return agent
app.py CHANGED
@@ -1,23 +1,37 @@
 
1
  import os
2
  import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
 
 
 
 
 
 
 
21
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
@@ -39,11 +53,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
39
  submit_url = f"{api_url}/submit"
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
- try:
43
- agent = BasicAgent()
44
- except Exception as e:
45
- print(f"Error instantiating agent: {e}")
46
- return f"Error initializing agent: {e}", None
 
47
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
  print(agent_code)
@@ -80,7 +95,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
 
1
+ from json import tool
2
  import os
3
  import gradio as gr
4
  import requests
5
  import inspect
6
  import pandas as pd
7
+ from agent import basic_agent
8
+ from tools import search_tool, image_tool, video_tool
9
+ from llama_index.llms.ollama import Ollama
10
+ from llama_index.core.workflow import Context
11
+
12
+
13
 
14
  # (Keep Constants as is)
15
  # --- Constants ---
16
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
 
18
+ # # --- Basic Agent Definition ---
19
+ # # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
20
+ # class BasicAgent:
21
+ # def __init__(self):
22
+ # print("BasicAgent initialized.")
23
+ # def __call__(self, question: str) -> str:
24
+ # print(f"Agent received question (first 50 chars): {question[:50]}...")
25
+ # fixed_answer = "This is a default answer."
26
+ # print(f"Agent returning fixed answer: {fixed_answer}")
27
+ # return fixed_answer
28
+ tools = [search_tool, image_tool, video_tool]
29
+
30
+ llm = Ollama(model="llama3.1", request_timeout=120.0)
31
+
32
+ agent = basic_agent(tool=tools, llm_model=llm)
33
+
34
+ ctx = Context(agent)
35
 
36
  def run_and_submit_all( profile: gr.OAuthProfile | None):
37
  """
 
53
  submit_url = f"{api_url}/submit"
54
 
55
  # 1. Instantiate Agent ( modify this part to create your agent)
56
+ # try:
57
+ # agent = BasicAgent()
58
+ # except Exception as e:
59
+ # print(f"Error instantiating agent: {e}")
60
+ # return f"Error initializing agent: {e}", None
61
+
62
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
63
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
64
  print(agent_code)
 
95
  print(f"Skipping item with missing task_id or question: {item}")
96
  continue
97
  try:
98
+ submitted_answer = agent.run(question_text, ctx=ctx)
99
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
100
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
101
  except Exception as e:
requirements.txt CHANGED
@@ -1,7 +1,9 @@
1
- gradio
2
  requests
3
  llama-index-tools-brave-search
4
  huggingface_hub
5
  python-dotenv
6
  llama-index
7
- llama-index-llms-ollama
 
 
 
1
+ # gradio
2
  requests
3
  llama-index-tools-brave-search
4
  huggingface_hub
5
  python-dotenv
6
  llama-index
7
+ llama-index-llms-ollama
8
+ llama-index-multi-modal-llms-ollama
9
+ opencv-python
tools.py CHANGED
@@ -1,8 +1,53 @@
1
  # Define tools such as: Web search tool, image processing tool, language translation tool, video processing tool
2
  from llama_index.tools.brave_search import BraveSearchToolSpec
3
  from llama_index.core.tools import FunctionTool
 
 
 
 
 
 
4
 
 
5
 
 
6
 
7
- def web_search_tool():
8
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Define tools such as: Web search tool, image processing tool, language translation tool, video processing tool
2
  from llama_index.tools.brave_search import BraveSearchToolSpec
3
  from llama_index.core.tools import FunctionTool
4
+ import os
5
+ from llama_index.multi_modal_llms.ollama import OllamaMultiModal
6
+ from llama_index.core.schema import ImageNode
7
+ import cv2
8
+ from PIL import Image
9
+ from dotenv import load_dotenv
10
 
11
+ load_dotenv()
12
 
13
+ brave_api_key = os.getenv('BRAVE_API_KEY', 'No key')
14
 
15
+ mm_model = OllamaMultiModal(
16
+ model='llava',
17
+ temperature=0.7,
18
+ )
19
+
20
+ search_tool_spec = BraveSearchToolSpec(api_key=brave_api_key)
21
+
22
+ search_tool = search_tool_spec.to_tool_list()[0]
23
+
24
+ # creating an image handling tool
25
+ def image_handling_tool(input_data:str) -> str:
26
+ """this tool takes an image file processes it based on the user or system prompt and generates a response in a string format"""
27
+ image = ImageNode(image_url = input_data)
28
+
29
+ result = mm_model.complete(
30
+ prompt='Use the context prompt generated by the agent\'s reasoning to answer the question asked on the image',
31
+ image_documents=[image]
32
+ )
33
+ return str(result)
34
+
35
+
36
+ # creating a video handling tool
37
+ def video_handling_tool(input:str):
38
+ """this tool takes a video url link, processes it based on the agent's prompt and or context and generates a response in astring format"""
39
+ # Load video
40
+ cap = cv2.VideoCapture(input)
41
+
42
+ # Read a frame at 5-second mark
43
+ cap.set(cv2.CAP_PROP_POS_MSEC, 5000)
44
+ success, frame = cap.read()
45
+
46
+ if success:
47
+ image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
48
+ prompt = "What is happening in this frame of the video?"
49
+ response = mm_model.complete(prompt=prompt, image=image)
50
+ return str(response)
51
+
52
+ image_tool = FunctionTool.from_defaults(fn=image_handling_tool)
53
+ video_tool = FunctionTool.from_defaults(fn=video_handling_tool)