Final_Assignment_Template

Sleeping

App Files Files Community

chibuikeeugene commited on Jul 23

Commit

c056e7c

1 Parent(s): 2512471

completed the agent workflow

Browse files

Files changed (5) hide show

.gitignore +4 -1
agent.py +11 -1
app.py +31 -16
requirements.txt +4 -2
tools.py +47 -2

.gitignore CHANGED Viewed

	@@ -1 +1,4 @@
1	- *.env

+*.env
+/gaia_env
+*.bin
+*.pyc

agent.py CHANGED Viewed

	@@ -1 +1,11 @@
1	- # use a multimodal llm

+# use a multimodal llm
+from llama_index.core.agent.workflow import AgentWorkflow
+def basic_agent(tool, llm_model):
+    """a basic agent with the ability to take decisions, act by calling the right tools and provide answer to the input prompt query string"""
+    agent = AgentWorkflow.from_tools_or_functions(
+        tools_or_functions=tool,
+        llm=llm_model
+    )
+    return agent

app.py CHANGED Viewed

@@ -1,23 +1,37 @@
 import os
 import gradio as gr
 import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
@@ -39,11 +53,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     submit_url = f"{api_url}/submit"
     # 1. Instantiate Agent ( modify this part to create your agent)
-    try:
-        agent = BasicAgent()
-    except Exception as e:
-        print(f"Error instantiating agent: {e}")
-        return f"Error initializing agent: {e}", None
     # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
@@ -80,7 +95,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:

+from json import tool
 import os
 import gradio as gr
 import requests
 import inspect
 import pandas as pd
+from agent import basic_agent
+from tools import search_tool, image_tool, video_tool
+from llama_index.llms.ollama import Ollama
+from llama_index.core.workflow import Context
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# # --- Basic Agent Definition ---
+# # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+# class BasicAgent:
+#     def __init__(self):
+#         print("BasicAgent initialized.")
+#     def __call__(self, question: str) -> str:
+#         print(f"Agent received question (first 50 chars): {question[:50]}...")
+#         fixed_answer = "This is a default answer."
+#         print(f"Agent returning fixed answer: {fixed_answer}")
+#         return fixed_answer
+tools = [search_tool, image_tool, video_tool]
+llm = Ollama(model="llama3.1", request_timeout=120.0)
+agent = basic_agent(tool=tools, llm_model=llm)
+ctx = Context(agent)
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     submit_url = f"{api_url}/submit"
     # 1. Instantiate Agent ( modify this part to create your agent)
+    # try:
+    #     agent = BasicAgent()
+    # except Exception as e:
+    #     print(f"Error instantiating agent: {e}")
+    #     return f"Error initializing agent: {e}", None
     # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = agent.run(question_text, ctx=ctx)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:

requirements.txt CHANGED Viewed

@@ -1,7 +1,9 @@
-gradio
 requests
 llama-index-tools-brave-search
 huggingface_hub
 python-dotenv
 llama-index
-llama-index-llms-ollama

+# gradio
 requests
 llama-index-tools-brave-search
 huggingface_hub
 python-dotenv
 llama-index
+llama-index-llms-ollama
+llama-index-multi-modal-llms-ollama
+opencv-python

tools.py CHANGED Viewed

@@ -1,8 +1,53 @@
 # Define tools such as: Web search tool, image processing tool, language translation tool, video processing tool
 from llama_index.tools.brave_search import BraveSearchToolSpec
 from llama_index.core.tools import FunctionTool
-def web_search_tool():

 # Define tools such as: Web search tool, image processing tool, language translation tool, video processing tool
 from llama_index.tools.brave_search import BraveSearchToolSpec
 from llama_index.core.tools import FunctionTool
+import os
+from llama_index.multi_modal_llms.ollama import OllamaMultiModal
+from llama_index.core.schema import ImageNode
+import cv2
+from PIL import Image
+from dotenv import load_dotenv
+load_dotenv()
+brave_api_key = os.getenv('BRAVE_API_KEY', 'No key')
+mm_model = OllamaMultiModal(
+        model='llava',
+        temperature=0.7,
+    )
+search_tool_spec = BraveSearchToolSpec(api_key=brave_api_key)
+search_tool =  search_tool_spec.to_tool_list()[0]
+# creating an image handling tool
+def image_handling_tool(input_data:str) -> str:
+    """this tool takes an image file processes it based on the user or system prompt and generates a response in a string format"""
+    image = ImageNode(image_url = input_data)
+    result = mm_model.complete(
+        prompt='Use the context prompt generated by the agent\'s reasoning to answer the question asked on the image',
+        image_documents=[image]
+    )
+    return str(result)
+# creating a video handling tool
+def video_handling_tool(input:str):
+    """this tool takes a video url link, processes it based on the agent's prompt and or context and generates a response in astring format"""
+    # Load video
+    cap = cv2.VideoCapture(input)
+    # Read a frame at 5-second mark
+    cap.set(cv2.CAP_PROP_POS_MSEC, 5000)
+    success, frame = cap.read()
+    if success:
+        image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        prompt = "What is happening in this frame of the video?"
+        response = mm_model.complete(prompt=prompt, image=image)
+        return str(response)
+image_tool =  FunctionTool.from_defaults(fn=image_handling_tool)
+video_tool =  FunctionTool.from_defaults(fn=video_handling_tool)