alisamak commited on
Commit
f6176f9
·
verified ·
1 Parent(s): 76fffd5

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +40 -0
tools.py CHANGED
@@ -37,6 +37,41 @@ def handle_question(question: str) -> str:
37
  return ", ".join(filter_vegetables.run(question.split(", ")))
38
  return web_lookup.run(question)
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  @tool
42
  def web_lookup(query: str) -> str:
@@ -52,6 +87,7 @@ def web_lookup(query: str) -> str:
52
  Returns:
53
  str: A concise factual answer extracted from Tavily or Wikipedia.
54
  """
 
55
  try:
56
  # Step 1: Tavily search
57
  response = client.search(query=query, search_depth="advanced", max_results=5)
@@ -109,6 +145,7 @@ def extract_number_from_snippets(snippets: list[str]) -> Optional[int]:
109
  >>> extract_number_from_snippets(["The Eiffel Tower was built in 1889.", "Height is 324m"])
110
  1889
111
  """
 
112
  for s in snippets:
113
  match = re.search(r"\b\d{1,4}\b", s)
114
  if match:
@@ -121,6 +158,7 @@ def get_article_nominator_from_fac_page(title: str) -> str:
121
  """
122
  Get the nominator of a Featured Article by scanning the main FAC page (not just archives).
123
  """
 
124
  base = "https://en.wikipedia.org/wiki/Wikipedia:Featured_article_candidates"
125
  url = f"{base}/{title}"
126
  res = requests.get(url)
@@ -155,6 +193,7 @@ def extract_structured_facts_from_url(url: str, selector: Optional[str] = None)
155
  Returns:
156
  str: Cleaned structured data from the page.
157
  """
 
158
  try:
159
  response = requests.get(url, timeout=10)
160
  response.raise_for_status()
@@ -357,6 +396,7 @@ def filter_vegetables(items: list[str]) -> list[str]:
357
 
358
  # List of all tools
359
  all_tools = [
 
360
  web_lookup,
361
  extract_number_from_snippets,
362
  detect_non_commutative_subset,
 
37
  return ", ".join(filter_vegetables.run(question.split(", ")))
38
  return web_lookup.run(question)
39
 
40
+ @tool
41
+ def count_albums_by_year_range(
42
+ artist_name: str, start_year: int, end_year: int
43
+ ) -> Optional[int]:
44
+ """
45
+ Counts how many albums (typically studio albums) an artist released between two years by parsing their Wikipedia discography page.
46
+
47
+ Args:
48
+ artist_name (str): Name of the artist (e.g., "Mercedes Sosa").
49
+ start_year (int): Start of the year range (inclusive).
50
+ end_year (int): End of the year range (inclusive).
51
+
52
+ Returns:
53
+ Optional[int]: Number of albums released in the year range, or None if parsing failed.
54
+ """
55
+ print("🛠️ count_albums_by_year_range")
56
+ try:
57
+ title = artist_name.strip().replace(" ", "_") + "_discography"
58
+ url = f"https://en.wikipedia.org/wiki/{title}"
59
+ res = requests.get(url, timeout=10)
60
+ if res.status_code != 200:
61
+ return None
62
+
63
+ soup = BeautifulSoup(res.text, "html.parser")
64
+ text = soup.get_text()
65
+
66
+ # Match years in parentheses like (2003), (2005)
67
+ years = re.findall(r"\((19|20)\d{2}\)", text)
68
+ year_nums = [int(y) for y in years]
69
+ count = sum(start_year <= y <= end_year for y in year_nums)
70
+ return count
71
+
72
+ except Exception as e:
73
+ return None
74
+
75
 
76
  @tool
77
  def web_lookup(query: str) -> str:
 
87
  Returns:
88
  str: A concise factual answer extracted from Tavily or Wikipedia.
89
  """
90
+ print("🛠️ web_lookup")
91
  try:
92
  # Step 1: Tavily search
93
  response = client.search(query=query, search_depth="advanced", max_results=5)
 
145
  >>> extract_number_from_snippets(["The Eiffel Tower was built in 1889.", "Height is 324m"])
146
  1889
147
  """
148
+ print("🛠️ extract_number_from_snippets")
149
  for s in snippets:
150
  match = re.search(r"\b\d{1,4}\b", s)
151
  if match:
 
158
  """
159
  Get the nominator of a Featured Article by scanning the main FAC page (not just archives).
160
  """
161
+ print("🛠️ get_article_nominator_from_fac_page")
162
  base = "https://en.wikipedia.org/wiki/Wikipedia:Featured_article_candidates"
163
  url = f"{base}/{title}"
164
  res = requests.get(url)
 
193
  Returns:
194
  str: Cleaned structured data from the page.
195
  """
196
+ print("🛠️ extract_structured_facts_from_url")
197
  try:
198
  response = requests.get(url, timeout=10)
199
  response.raise_for_status()
 
396
 
397
  # List of all tools
398
  all_tools = [
399
+ count_albums_by_year_range,
400
  web_lookup,
401
  extract_number_from_snippets,
402
  detect_non_commutative_subset,