first version FunSearch
Browse files- .gitignore +444 -0
- EvaluatorFlowModule/EvaluatorFlow.py +216 -0
- EvaluatorFlowModule/EvaluatorFlow.yaml +18 -0
- EvaluatorFlowModule/__init__.py +13 -0
- EvaluatorFlowModule/demo.yaml +111 -0
- EvaluatorFlowModule/pip_requirements.txt +1 -0
- EvaluatorFlowModule/run.py +62 -0
- FunSearch.py +446 -0
- FunSearch.yaml +37 -0
- Loader.py +55 -0
- ProgramDBFlowModule/Cluster.py +68 -0
- ProgramDBFlowModule/Island.py +168 -0
- ProgramDBFlowModule/Program.py +110 -0
- ProgramDBFlowModule/ProgramDBFlow.py +271 -0
- ProgramDBFlowModule/ProgramDBFlow.yaml +21 -0
- ProgramDBFlowModule/__init__.py +13 -0
- ProgramDBFlowModule/artifacts/__init__.py +2 -0
- ProgramDBFlowModule/artifacts/abstract.py +49 -0
- ProgramDBFlowModule/artifacts/function.py +116 -0
- ProgramDBFlowModule/demo.yaml +24 -0
- ProgramDBFlowModule/pip_requirements.txt +2 -0
- ProgramDBFlowModule/run.py +88 -0
- ProgramDBFlowModule/utils.py +55 -0
- README.md +672 -0
- SamplerFlowModule/SamplerFlow.py +87 -0
- SamplerFlowModule/SamplerFlow.yaml +99 -0
- SamplerFlowModule/__init__.py +13 -0
- SamplerFlowModule/demo.yaml +10 -0
- SamplerFlowModule/pip_requirements.txt +0 -0
- SamplerFlowModule/run.py +181 -0
- __init__.py +4 -0
- cf_functions.py +60 -0
- demo.yaml +34 -0
- pip_requirements.txt +4 -0
- run.py +241 -0
.gitignore
ADDED
@@ -0,0 +1,444 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Created by https://www.toptal.com/developers/gitignore/api/python,java,c++,pycharm,visualstudiocode,macos,linux,windows
|
2 |
+
# Edit at https://www.toptal.com/developers/gitignore?templates=python,java,c++,pycharm,visualstudiocode,macos,linux,windows
|
3 |
+
|
4 |
+
### C++ ###
|
5 |
+
# Prerequisites
|
6 |
+
*.d
|
7 |
+
|
8 |
+
# Compiled Object files
|
9 |
+
*.slo
|
10 |
+
*.lo
|
11 |
+
*.o
|
12 |
+
*.obj
|
13 |
+
|
14 |
+
# Precompiled Headers
|
15 |
+
*.gch
|
16 |
+
*.pch
|
17 |
+
|
18 |
+
# Compiled Dynamic libraries
|
19 |
+
*.so
|
20 |
+
*.dylib
|
21 |
+
*.dll
|
22 |
+
|
23 |
+
# Fortran module files
|
24 |
+
*.mod
|
25 |
+
*.smod
|
26 |
+
|
27 |
+
# Compiled Static libraries
|
28 |
+
*.lai
|
29 |
+
*.la
|
30 |
+
*.a
|
31 |
+
*.lib
|
32 |
+
|
33 |
+
# Executables
|
34 |
+
*.exe
|
35 |
+
*.out
|
36 |
+
*.app
|
37 |
+
|
38 |
+
### Java ###
|
39 |
+
# Compiled class file
|
40 |
+
*.class
|
41 |
+
|
42 |
+
# Log file
|
43 |
+
*.log
|
44 |
+
|
45 |
+
# BlueJ files
|
46 |
+
*.ctxt
|
47 |
+
|
48 |
+
# Mobile Tools for Java (J2ME)
|
49 |
+
.mtj.tmp/
|
50 |
+
|
51 |
+
# Package Files #
|
52 |
+
*.jar
|
53 |
+
*.war
|
54 |
+
*.nar
|
55 |
+
*.ear
|
56 |
+
*.zip
|
57 |
+
*.tar.gz
|
58 |
+
*.rar
|
59 |
+
|
60 |
+
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
|
61 |
+
hs_err_pid*
|
62 |
+
replay_pid*
|
63 |
+
|
64 |
+
### Linux ###
|
65 |
+
*~
|
66 |
+
|
67 |
+
# temporary files which can be created if a process still has a handle open of a deleted file
|
68 |
+
.fuse_hidden*
|
69 |
+
|
70 |
+
# KDE directory preferences
|
71 |
+
.directory
|
72 |
+
|
73 |
+
# Linux trash folder which might appear on any partition or disk
|
74 |
+
.Trash-*
|
75 |
+
|
76 |
+
# .nfs files are created when an open file is removed but is still being accessed
|
77 |
+
.nfs*
|
78 |
+
|
79 |
+
### macOS ###
|
80 |
+
# General
|
81 |
+
.DS_Store
|
82 |
+
.AppleDouble
|
83 |
+
.LSOverride
|
84 |
+
|
85 |
+
# Icon must end with two \r
|
86 |
+
Icon
|
87 |
+
|
88 |
+
|
89 |
+
# Thumbnails
|
90 |
+
._*
|
91 |
+
|
92 |
+
# Files that might appear in the root of a volume
|
93 |
+
.DocumentRevisions-V100
|
94 |
+
.fseventsd
|
95 |
+
.Spotlight-V100
|
96 |
+
.TemporaryItems
|
97 |
+
.Trashes
|
98 |
+
.VolumeIcon.icns
|
99 |
+
.com.apple.timemachine.donotpresent
|
100 |
+
|
101 |
+
# Directories potentially created on remote AFP share
|
102 |
+
.AppleDB
|
103 |
+
.AppleDesktop
|
104 |
+
Network Trash Folder
|
105 |
+
Temporary Items
|
106 |
+
.apdisk
|
107 |
+
|
108 |
+
### macOS Patch ###
|
109 |
+
# iCloud generated files
|
110 |
+
*.icloud
|
111 |
+
|
112 |
+
### PyCharm ###
|
113 |
+
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
114 |
+
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
115 |
+
|
116 |
+
# User-specific stuff
|
117 |
+
.idea/**/workspace.xml
|
118 |
+
.idea/**/tasks.xml
|
119 |
+
.idea/**/usage.statistics.xml
|
120 |
+
.idea/**/dictionaries
|
121 |
+
.idea/**/shelf
|
122 |
+
|
123 |
+
# AWS User-specific
|
124 |
+
.idea/**/aws.xml
|
125 |
+
|
126 |
+
# Generated files
|
127 |
+
.idea/**/contentModel.xml
|
128 |
+
|
129 |
+
# Sensitive or high-churn files
|
130 |
+
.idea/**/dataSources/
|
131 |
+
.idea/**/dataSources.ids
|
132 |
+
.idea/**/dataSources.local.xml
|
133 |
+
.idea/**/sqlDataSources.xml
|
134 |
+
.idea/**/dynamic.xml
|
135 |
+
.idea/**/uiDesigner.xml
|
136 |
+
.idea/**/dbnavigator.xml
|
137 |
+
|
138 |
+
# Gradle
|
139 |
+
.idea/**/gradle.xml
|
140 |
+
.idea/**/libraries
|
141 |
+
|
142 |
+
# Gradle and Maven with auto-import
|
143 |
+
# When using Gradle or Maven with auto-import, you should exclude module files,
|
144 |
+
# since they will be recreated, and may cause churn. Uncomment if using
|
145 |
+
# auto-import.
|
146 |
+
# .idea/artifacts
|
147 |
+
# .idea/compiler.xml
|
148 |
+
# .idea/jarRepositories.xml
|
149 |
+
# .idea/modules.xml
|
150 |
+
# .idea/*.iml
|
151 |
+
# .idea/modules
|
152 |
+
# *.iml
|
153 |
+
# *.ipr
|
154 |
+
|
155 |
+
# CMake
|
156 |
+
cmake-build-*/
|
157 |
+
|
158 |
+
# Mongo Explorer plugin
|
159 |
+
.idea/**/mongoSettings.xml
|
160 |
+
|
161 |
+
# File-based project format
|
162 |
+
*.iws
|
163 |
+
|
164 |
+
# IntelliJ
|
165 |
+
out/
|
166 |
+
|
167 |
+
# mpeltonen/sbt-idea plugin
|
168 |
+
.idea_modules/
|
169 |
+
|
170 |
+
# JIRA plugin
|
171 |
+
atlassian-ide-plugin.xml
|
172 |
+
|
173 |
+
# Cursive Clojure plugin
|
174 |
+
.idea/replstate.xml
|
175 |
+
|
176 |
+
# SonarLint plugin
|
177 |
+
.idea/sonarlint/
|
178 |
+
|
179 |
+
# Crashlytics plugin (for Android Studio and IntelliJ)
|
180 |
+
com_crashlytics_export_strings.xml
|
181 |
+
crashlytics.properties
|
182 |
+
crashlytics-build.properties
|
183 |
+
fabric.properties
|
184 |
+
|
185 |
+
# Editor-based Rest Client
|
186 |
+
.idea/httpRequests
|
187 |
+
|
188 |
+
# Android studio 3.1+ serialized cache file
|
189 |
+
.idea/caches/build_file_checksums.ser
|
190 |
+
|
191 |
+
### PyCharm Patch ###
|
192 |
+
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
|
193 |
+
|
194 |
+
# *.iml
|
195 |
+
# modules.xml
|
196 |
+
# .idea/misc.xml
|
197 |
+
# *.ipr
|
198 |
+
|
199 |
+
# Sonarlint plugin
|
200 |
+
# https://plugins.jetbrains.com/plugin/7973-sonarlint
|
201 |
+
.idea/**/sonarlint/
|
202 |
+
|
203 |
+
# SonarQube Plugin
|
204 |
+
# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
|
205 |
+
.idea/**/sonarIssues.xml
|
206 |
+
|
207 |
+
# Markdown Navigator plugin
|
208 |
+
# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
|
209 |
+
.idea/**/markdown-navigator.xml
|
210 |
+
.idea/**/markdown-navigator-enh.xml
|
211 |
+
.idea/**/markdown-navigator/
|
212 |
+
|
213 |
+
# Cache file creation bug
|
214 |
+
# See https://youtrack.jetbrains.com/issue/JBR-2257
|
215 |
+
.idea/$CACHE_FILE$
|
216 |
+
|
217 |
+
# CodeStream plugin
|
218 |
+
# https://plugins.jetbrains.com/plugin/12206-codestream
|
219 |
+
.idea/codestream.xml
|
220 |
+
|
221 |
+
# Azure Toolkit for IntelliJ plugin
|
222 |
+
# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
|
223 |
+
.idea/**/azureSettings.xml
|
224 |
+
|
225 |
+
### Python ###
|
226 |
+
# Byte-compiled / optimized / DLL files
|
227 |
+
__pycache__/
|
228 |
+
*.py[cod]
|
229 |
+
*$py.class
|
230 |
+
|
231 |
+
# C extensions
|
232 |
+
|
233 |
+
# Distribution / packaging
|
234 |
+
.Python
|
235 |
+
build/
|
236 |
+
develop-eggs/
|
237 |
+
dist/
|
238 |
+
downloads/
|
239 |
+
eggs/
|
240 |
+
.eggs/
|
241 |
+
lib/
|
242 |
+
lib64/
|
243 |
+
parts/
|
244 |
+
sdist/
|
245 |
+
var/
|
246 |
+
wheels/
|
247 |
+
share/python-wheels/
|
248 |
+
*.egg-info/
|
249 |
+
.installed.cfg
|
250 |
+
*.egg
|
251 |
+
MANIFEST
|
252 |
+
|
253 |
+
# PyInstaller
|
254 |
+
# Usually these files are written by a python script from a template
|
255 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
256 |
+
*.manifest
|
257 |
+
*.spec
|
258 |
+
|
259 |
+
# Installer logs
|
260 |
+
pip-log.txt
|
261 |
+
pip-delete-this-directory.txt
|
262 |
+
|
263 |
+
# Unit test / coverage reports
|
264 |
+
htmlcov/
|
265 |
+
.tox/
|
266 |
+
.nox/
|
267 |
+
.coverage
|
268 |
+
.coverage.*
|
269 |
+
.cache
|
270 |
+
nosetests.xml
|
271 |
+
coverage.xml
|
272 |
+
*.cover
|
273 |
+
*.py,cover
|
274 |
+
.hypothesis/
|
275 |
+
.pytest_cache/
|
276 |
+
cover/
|
277 |
+
|
278 |
+
# Translations
|
279 |
+
*.mo
|
280 |
+
*.pot
|
281 |
+
|
282 |
+
# Django stuff:
|
283 |
+
local_settings.py
|
284 |
+
db.sqlite3
|
285 |
+
db.sqlite3-journal
|
286 |
+
|
287 |
+
# Flask stuff:
|
288 |
+
instance/
|
289 |
+
.webassets-cache
|
290 |
+
|
291 |
+
# Scrapy stuff:
|
292 |
+
.scrapy
|
293 |
+
|
294 |
+
# Sphinx documentation
|
295 |
+
docs/_build/
|
296 |
+
|
297 |
+
# PyBuilder
|
298 |
+
.pybuilder/
|
299 |
+
target/
|
300 |
+
|
301 |
+
# Jupyter Notebook
|
302 |
+
.ipynb_checkpoints
|
303 |
+
|
304 |
+
# IPython
|
305 |
+
profile_default/
|
306 |
+
ipython_config.py
|
307 |
+
|
308 |
+
# pyenv
|
309 |
+
# For a library or package, you might want to ignore these files since the code is
|
310 |
+
# intended to run in multiple environments; otherwise, check them in:
|
311 |
+
# .python-version
|
312 |
+
|
313 |
+
# pipenv
|
314 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
315 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
316 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
317 |
+
# install all needed dependencies.
|
318 |
+
#Pipfile.lock
|
319 |
+
|
320 |
+
# poetry
|
321 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
322 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
323 |
+
# commonly ignored for libraries.
|
324 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
325 |
+
#poetry.lock
|
326 |
+
|
327 |
+
# pdm
|
328 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
329 |
+
#pdm.lock
|
330 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
331 |
+
# in version control.
|
332 |
+
# https://pdm.fming.dev/#use-with-ide
|
333 |
+
.pdm.toml
|
334 |
+
|
335 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
336 |
+
__pypackages__/
|
337 |
+
|
338 |
+
# Celery stuff
|
339 |
+
celerybeat-schedule
|
340 |
+
celerybeat.pid
|
341 |
+
|
342 |
+
# SageMath parsed files
|
343 |
+
*.sage.py
|
344 |
+
|
345 |
+
# Environments
|
346 |
+
.env
|
347 |
+
.venv
|
348 |
+
env/
|
349 |
+
venv/
|
350 |
+
ENV/
|
351 |
+
env.bak/
|
352 |
+
venv.bak/
|
353 |
+
|
354 |
+
# Spyder project settings
|
355 |
+
.spyderproject
|
356 |
+
.spyproject
|
357 |
+
|
358 |
+
# Rope project settings
|
359 |
+
.ropeproject
|
360 |
+
|
361 |
+
# mkdocs documentation
|
362 |
+
/site
|
363 |
+
|
364 |
+
# mypy
|
365 |
+
.mypy_cache/
|
366 |
+
.dmypy.json
|
367 |
+
dmypy.json
|
368 |
+
|
369 |
+
# Pyre type checker
|
370 |
+
.pyre/
|
371 |
+
|
372 |
+
# pytype static type analyzer
|
373 |
+
.pytype/
|
374 |
+
|
375 |
+
# Cython debug symbols
|
376 |
+
cython_debug/
|
377 |
+
|
378 |
+
# PyCharm
|
379 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
380 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
381 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
382 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
383 |
+
#.idea/
|
384 |
+
|
385 |
+
### Python Patch ###
|
386 |
+
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
|
387 |
+
poetry.toml
|
388 |
+
|
389 |
+
# ruff
|
390 |
+
.ruff_cache/
|
391 |
+
|
392 |
+
# LSP config files
|
393 |
+
pyrightconfig.json
|
394 |
+
|
395 |
+
### VisualStudioCode ###
|
396 |
+
.vscode/*
|
397 |
+
!.vscode/settings.json
|
398 |
+
!.vscode/tasks.json
|
399 |
+
!.vscode/launch.json
|
400 |
+
!.vscode/extensions.json
|
401 |
+
!.vscode/*.code-snippets
|
402 |
+
|
403 |
+
# Local History for Visual Studio Code
|
404 |
+
.history/
|
405 |
+
|
406 |
+
# Built Visual Studio Code Extensions
|
407 |
+
*.vsix
|
408 |
+
|
409 |
+
### VisualStudioCode Patch ###
|
410 |
+
# Ignore all local history of files
|
411 |
+
.history
|
412 |
+
.ionide
|
413 |
+
|
414 |
+
### Windows ###
|
415 |
+
# Windows thumbnail cache files
|
416 |
+
Thumbs.db
|
417 |
+
Thumbs.db:encryptable
|
418 |
+
ehthumbs.db
|
419 |
+
ehthumbs_vista.db
|
420 |
+
|
421 |
+
# Dump file
|
422 |
+
*.stackdump
|
423 |
+
|
424 |
+
# Folder config file
|
425 |
+
[Dd]esktop.ini
|
426 |
+
|
427 |
+
# Recycle Bin used on file shares
|
428 |
+
$RECYCLE.BIN/
|
429 |
+
|
430 |
+
# Windows Installer files
|
431 |
+
*.cab
|
432 |
+
*.msi
|
433 |
+
*.msix
|
434 |
+
*.msm
|
435 |
+
*.msp
|
436 |
+
|
437 |
+
# Windows shortcuts
|
438 |
+
*.lnk
|
439 |
+
|
440 |
+
# End of https://www.toptal.com/developers/gitignore/api/python,java,c++,pycharm,visualstudiocode,macos,linux,windows
|
441 |
+
|
442 |
+
.*
|
443 |
+
flow_modules/
|
444 |
+
data/
|
EvaluatorFlowModule/EvaluatorFlow.py
ADDED
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" NOTE: THIS IS A BETA VERSION OF FUNSEARCH. NEW VERSION DOCUMENTATION WILL BE RELEASED SOON."""
|
2 |
+
|
3 |
+
from aiflows.base_flows import AtomicFlow
|
4 |
+
from typing import Dict, Any
|
5 |
+
import os
|
6 |
+
from aiflows.utils import logging
|
7 |
+
import ast
|
8 |
+
import signal
|
9 |
+
from aiflows.interfaces.key_interface import KeyInterface
|
10 |
+
log = logging.get_logger(f"aiflows.{__name__}")
|
11 |
+
import threading
|
12 |
+
from aiflows.messages import FlowMessage
|
13 |
+
class TimeoutException(Exception):
|
14 |
+
pass
|
15 |
+
|
16 |
+
def timeout_handler(signum, frame):
|
17 |
+
raise TimeoutException("Execution timed out")
|
18 |
+
|
19 |
+
class EvaluatorFlow(AtomicFlow):
|
20 |
+
""" This class implements an EvaluatorFlow. It is a flow that evaluates a program (python code) using a given evaluator function. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
|
21 |
+
|
22 |
+
**Configuration Parameters**:
|
23 |
+
|
24 |
+
- `name` (str): The name of the flow. Default: "EvaluatorFlow"
|
25 |
+
- `description` (str): A description of the flow. This description is used to generate the help message of the flow. Default: "A flow that evaluates code on tests"
|
26 |
+
- `py_file` (str): The python code containing the evaluation function. No default value. This MUST be passed as a parameter.
|
27 |
+
- `function_to_run_name` (str): The name of the function to run (the evaluation function) in the evaluator file. No default value. This MUST be passed as a parameter.
|
28 |
+
- `test_inputs` (Dict[str,Any]): A dictionary of test inputs to evaluate the program. Default: {"test1": None, "test2": None}
|
29 |
+
- `timeout_seconds` (int): The maximum number of seconds to run the evaluation function before returning an error. Default: 10
|
30 |
+
- `run_error_score` (int): The score to return if the evaluation function fails to run. Default: -100
|
31 |
+
- `use_test_input_as_key` (bool): Whether to use the test input parameters as the key in the output dictionary. Default: False
|
32 |
+
|
33 |
+
**Input Interface**:
|
34 |
+
|
35 |
+
- `artifact` (str): The program/artifact to evaluate.
|
36 |
+
|
37 |
+
**Output Interface**:
|
38 |
+
|
39 |
+
- `scores_per_test` (Dict[str, Dict[str, Any]]): A dictionary of scores per test input.
|
40 |
+
|
41 |
+
**Citation**:
|
42 |
+
|
43 |
+
@Article{FunSearch2023,
|
44 |
+
author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
|
45 |
+
journal = {Nature},
|
46 |
+
title = {Mathematical discoveries from program search with large language models},
|
47 |
+
year = {2023},
|
48 |
+
doi = {10.1038/s41586-023-06924-6}
|
49 |
+
}
|
50 |
+
"""
|
51 |
+
def __init__(self, **kwargs):
|
52 |
+
super().__init__(**kwargs)
|
53 |
+
|
54 |
+
self.evaluator_py_file = self.flow_config["py_file"]
|
55 |
+
self.run_error_score = self.flow_config["run_error_score"]
|
56 |
+
|
57 |
+
# Create a local namespace for the class
|
58 |
+
self.local_namespace = {}
|
59 |
+
self.load_functions()
|
60 |
+
self.function_to_run_name = self.flow_config["function_to_run_name"]
|
61 |
+
assert self.function_to_run_name in self.local_namespace, f"Function {self.function_to_run_name} not found in {self.evaluator_py_file_path}"
|
62 |
+
self.function_to_run = self.local_namespace.get(self.function_to_run_name)
|
63 |
+
|
64 |
+
self.test_inputs = self.flow_config["test_inputs"]
|
65 |
+
self.timeout_seconds = self.flow_config["timeout_seconds"]
|
66 |
+
self.local_namespace = {}
|
67 |
+
|
68 |
+
|
69 |
+
select_island_id_with_default = lambda data_dict,**kwargs: {**data_dict,**{"island_id": data_dict.get("island_id", None)}}
|
70 |
+
|
71 |
+
self.output_interface = KeyInterface(
|
72 |
+
additional_transformations= [select_island_id_with_default],
|
73 |
+
keys_to_select= ["scores_per_test"]
|
74 |
+
)
|
75 |
+
|
76 |
+
|
77 |
+
def load_functions(self):
|
78 |
+
""" Load the functions from the evaluator py file with ast parsing"""
|
79 |
+
|
80 |
+
file_content = self.evaluator_py_file
|
81 |
+
try:
|
82 |
+
# Parse the AST (Abstract Syntax Tree) of the file content
|
83 |
+
parsed_ast = ast.parse(file_content)
|
84 |
+
|
85 |
+
# Iterate over the parsed AST nodes
|
86 |
+
for node in parsed_ast.body:
|
87 |
+
# Check if the node is an import statement
|
88 |
+
if isinstance(node, ast.Import):
|
89 |
+
# Execute the import statement in the global namespace
|
90 |
+
exec(compile(ast.Module(body=[node],type_ignores=[]), '<ast>', 'exec'), self.local_namespace)
|
91 |
+
elif isinstance(node, ast.ImportFrom):
|
92 |
+
# Execute the import-from statement in the global namespace
|
93 |
+
exec(compile(ast.Module(body=[node],type_ignores=[]), '<ast>', 'exec'), self.local_namespace)
|
94 |
+
|
95 |
+
# Execute the content of the file in the global namespace
|
96 |
+
exec(file_content, self.local_namespace)
|
97 |
+
except Exception as e:
|
98 |
+
log.error(f"Error functions: {e}")
|
99 |
+
raise e
|
100 |
+
|
101 |
+
def run_function_with_timeout(self, program: str, **kwargs):
|
102 |
+
""" Run the evaluation function with a timeout
|
103 |
+
|
104 |
+
:param program: The program to evaluate
|
105 |
+
:type program: str
|
106 |
+
:param kwargs: The keyword arguments to pass to the evaluation function
|
107 |
+
:type kwargs: Dict[str, Any]
|
108 |
+
:return: A tuple (bool, result) where bool is True if the function ran successfully and result is the output of the function
|
109 |
+
:rtype: Tuple[bool, Any]
|
110 |
+
"""
|
111 |
+
self.result = None
|
112 |
+
self.exception = None
|
113 |
+
|
114 |
+
# Function to run with a timeout
|
115 |
+
def target():
|
116 |
+
try:
|
117 |
+
result = self.function_to_run(program, **kwargs)
|
118 |
+
self.result = result
|
119 |
+
except Exception as e:
|
120 |
+
self.exception = e
|
121 |
+
|
122 |
+
# Create a separate thread to run the target function
|
123 |
+
thread = threading.Thread(target=target)
|
124 |
+
thread.start()
|
125 |
+
|
126 |
+
# Wait for the specified timeout
|
127 |
+
thread.join(self.timeout_seconds)
|
128 |
+
|
129 |
+
# If thread is still alive, it means the timeout has occurred
|
130 |
+
if thread.is_alive():
|
131 |
+
# Raise a TimeoutException
|
132 |
+
thread.terminate()
|
133 |
+
return False, f"Function execution timed out after {self.timeout_seconds} seconds"
|
134 |
+
|
135 |
+
# If thread has finished execution, check if there was an exception
|
136 |
+
if self.exception is not None:
|
137 |
+
return False, str(self.exception)
|
138 |
+
|
139 |
+
# If no exception, return the result
|
140 |
+
return True, self.result
|
141 |
+
|
142 |
+
|
143 |
+
|
144 |
+
def evaluate_program(self, program: str, **kwargs):
|
145 |
+
""" Evaluate the program using the evaluation function
|
146 |
+
|
147 |
+
:param program: The program to evaluate
|
148 |
+
:type program: str
|
149 |
+
:param kwargs: The keyword arguments to pass to the evaluation function
|
150 |
+
:type kwargs: Dict[str, Any]
|
151 |
+
:return: A tuple (bool, result) where bool is True if the function ran successfully and result is the output of the function
|
152 |
+
:rtype: Tuple[bool, Any]
|
153 |
+
"""
|
154 |
+
try:
|
155 |
+
runs_ok, test_output = self.run_function_with_timeout(program, **kwargs)
|
156 |
+
|
157 |
+
return runs_ok, test_output
|
158 |
+
|
159 |
+
except Exception as e:
|
160 |
+
log.debug(f"Error defining runnin program: {e} (could be due to syntax error from LLM)")
|
161 |
+
return False, e
|
162 |
+
|
163 |
+
|
164 |
+
|
165 |
+
def analyse(self, program: str):
|
166 |
+
""" Analyse the program on the test inputs
|
167 |
+
|
168 |
+
:param program: The program to evaluate
|
169 |
+
:type program: str
|
170 |
+
:return: A dictionary of scores per test input
|
171 |
+
:rtype: Dict[str, Dict[str, Any]]
|
172 |
+
"""
|
173 |
+
#Often happens that it returns a codeblock so remove it
|
174 |
+
if program.startswith("```python"):
|
175 |
+
program = program[9:]
|
176 |
+
if program.endswith("```"):
|
177 |
+
program = program[:-3]
|
178 |
+
|
179 |
+
scores_per_test = {}
|
180 |
+
for key,test_input in self.test_inputs.items():
|
181 |
+
|
182 |
+
test_input_key = str(test_input) if self.flow_config["use_test_input_as_key"] else key
|
183 |
+
|
184 |
+
if test_input is None:
|
185 |
+
runs_ok,test_output = self.evaluate_program(program)
|
186 |
+
else:
|
187 |
+
runs_ok,test_output = self.evaluate_program(program, **test_input) # Run the program
|
188 |
+
|
189 |
+
if runs_ok and test_output is not None: # and not utils.calls_ancestor(program) (TODO: check what they mean by this in the paper)
|
190 |
+
scores_per_test[test_input_key] = {"score": test_output, "feedback": "No feedback available."}
|
191 |
+
log.debug(f"Program run successfully for test case {test_input_key} with score: {test_output}")
|
192 |
+
else:
|
193 |
+
log.debug(f"Error running Program for test case {test_input_key}. Error is : {test_output} (could be due to syntax error from LLM)")
|
194 |
+
scores_per_test[test_input_key] = {"score": self.run_error_score, "feedback": str(test_output)}
|
195 |
+
|
196 |
+
return scores_per_test
|
197 |
+
|
198 |
+
def run(self, input_message: FlowMessage):
|
199 |
+
""" This method runs the flow. It's the main method of the flow.
|
200 |
+
|
201 |
+
:param input_message: The input message
|
202 |
+
:type input_message: FlowMessage
|
203 |
+
"""
|
204 |
+
input_data = input_message.data
|
205 |
+
|
206 |
+
# Analyse the program
|
207 |
+
scores_per_test = self.analyse(input_data["artifact"])
|
208 |
+
# Prepare the response
|
209 |
+
response = {"scores_per_test": scores_per_test, "from": "EvaluatorFlow"}
|
210 |
+
|
211 |
+
# Send back the response
|
212 |
+
reply = self.package_output_message(
|
213 |
+
input_message,
|
214 |
+
response
|
215 |
+
)
|
216 |
+
self.send_message(reply)
|
EvaluatorFlowModule/EvaluatorFlow.yaml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_target_: flow_modules.aiflows.FunSearchFlowModule.EvaluatorFlow.instantiate_from_default_config
|
2 |
+
name: EvaluatorFlow
|
3 |
+
description: A flow that evaluates code on tests
|
4 |
+
|
5 |
+
input_interface:
|
6 |
+
- "artifact"
|
7 |
+
output_interface:
|
8 |
+
- "scores_per_test"
|
9 |
+
|
10 |
+
py_file: ???
|
11 |
+
function_to_run_name: ???
|
12 |
+
test_inputs:
|
13 |
+
test_1: null
|
14 |
+
test_2: null
|
15 |
+
timeout_seconds: 10
|
16 |
+
run_error_score: -100
|
17 |
+
use_test_input_as_key: false
|
18 |
+
|
EvaluatorFlowModule/__init__.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ~~~ Specify the dependencies ~~~
|
2 |
+
# e.g.,
|
3 |
+
# dependencies = [
|
4 |
+
# {"url": "aiflows/AutoGPTFlowModule", "revision": "main"},
|
5 |
+
# ]
|
6 |
+
# Revision can correspond toa branch, commit hash or a absolute path to a local directory (ideal for development)
|
7 |
+
# from aiflows import flow_verse
|
8 |
+
|
9 |
+
# flow_verse.sync_dependencies(dependencies)
|
10 |
+
|
11 |
+
# ~~~ Import of your flow class (if you have any) ~~~
|
12 |
+
# from .NAMEOFYOURFLOW import NAMEOFYOURFLOWCLASS
|
13 |
+
from .EvaluatorFlow import EvaluatorFlow
|
EvaluatorFlowModule/demo.yaml
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_target_: flow_modules.aiflows.FunSearchFlowModule.EvaluatorFlow.instantiate_from_default_config
|
2 |
+
|
3 |
+
py_file: |2-
|
4 |
+
"""Problem Description:
|
5 |
+
Serval has a string s that only consists of 0 and 1 of length n. The i-th character of s is denoted as s_i, where 1\leq i\leq n.
|
6 |
+
Serval can perform the following operation called Inversion Magic on the string s:
|
7 |
+
Choose an segment [l, r] (1\leq l\leq r\leq n). For l\leq i\leq r, change s_i into 1 if s_i is 0, and change s_i into 0 if s_i is 1.
|
8 |
+
For example, let s be 010100 and the segment [2,5] is chosen. The string s will be 001010 after performing the Inversion Magic.
|
9 |
+
Serval wants to make s a palindrome after performing Inversion Magic exactly once. Help him to determine whether it is possible.
|
10 |
+
A string is a palindrome iff it reads the same backwards as forwards. For example, 010010 is a palindrome but 10111 is not.
|
11 |
+
|
12 |
+
Input Description:
|
13 |
+
Input
|
14 |
+
Each test contains multiple test cases. The first line contains the number of test cases t (1\leq t\leq 10^4). The description of the test cases follows.
|
15 |
+
The first line of each test case contains a single integer n (2\leq n\leq 10^5) — the length of string s.
|
16 |
+
The second line of each test case contains a binary string s of length n. Only characters 0 and 1 can appear in s.
|
17 |
+
It's guaranteed that the sum of n over all test cases does not exceed 2\cdot 10^5.
|
18 |
+
|
19 |
+
Output Description:
|
20 |
+
Output
|
21 |
+
For each test case, print Yes if s can be a palindrome after performing Inversion Magic exactly once, and print No if not.
|
22 |
+
You can output Yes and No in any case (for example, strings yEs, yes, Yes and YES will be recognized as a positive response).
|
23 |
+
|
24 |
+
Public Tests:
|
25 |
+
Test 1:
|
26 |
+
Input: ['1', '4', '1001']
|
27 |
+
Output: 'YES'
|
28 |
+
Test 2:
|
29 |
+
Input: ['1', '5', '10010']
|
30 |
+
Output: 'YES'
|
31 |
+
Test 3:
|
32 |
+
Input: ['1', '7', '0111011']
|
33 |
+
Output: 'NO'
|
34 |
+
|
35 |
+
"""
|
36 |
+
|
37 |
+
|
38 |
+
import ast
|
39 |
+
import itertools
|
40 |
+
import numpy as np
|
41 |
+
from typing import List
|
42 |
+
|
43 |
+
def solve(solve_function: str,input: List[str], expected_output: str) -> str:
|
44 |
+
"""function used to run the solve function on input *kwargs and return the the predicted output
|
45 |
+
|
46 |
+
:param solve_function: the function to run (the solve function below as a string)
|
47 |
+
:type solve_function: str
|
48 |
+
:param kwargs: the inputs to the solve function
|
49 |
+
:type kwargs: List[str]
|
50 |
+
"""
|
51 |
+
local_namespace = {}
|
52 |
+
exec(solve_function,local_namespace)
|
53 |
+
found_name, program_name = get_function_name_from_code(solve_function)
|
54 |
+
|
55 |
+
if not found_name:
|
56 |
+
raise ValueError(f"Function name not found in program: {solve_function}")
|
57 |
+
|
58 |
+
solve_fn = local_namespace.get(program_name)
|
59 |
+
|
60 |
+
prediction = solve_fn(input)
|
61 |
+
|
62 |
+
prediction = prediction.split()
|
63 |
+
expected_output = expected_output.split()
|
64 |
+
|
65 |
+
if len(prediction) != len(expected_output):
|
66 |
+
raise ValueError(f"Invalid Format of prediction")
|
67 |
+
|
68 |
+
for i in range(len(prediction)):
|
69 |
+
if prediction[i] != expected_output[i]:
|
70 |
+
return False
|
71 |
+
|
72 |
+
return True
|
73 |
+
|
74 |
+
def evaluate(solve_function: str, tests_inputs: List[str], expected_outputs: str) -> float:
|
75 |
+
"""Returns the score of the solve function we're evolving based on the tests_inputs and expected_outputs.
|
76 |
+
Scores are between 0 and 1, unless the program fails to run, in which case the score is -1.
|
77 |
+
"""
|
78 |
+
if solve(solve_function,tests_inputs,expected_outputs) == True:
|
79 |
+
return 1.0
|
80 |
+
return 0.0
|
81 |
+
|
82 |
+
|
83 |
+
def get_function_name_from_code(code):
|
84 |
+
tree = ast.parse(code)
|
85 |
+
for node in ast.walk(tree):
|
86 |
+
if isinstance(node, ast.FunctionDef):
|
87 |
+
return True, node.name
|
88 |
+
|
89 |
+
# something is wrong
|
90 |
+
return False, None
|
91 |
+
|
92 |
+
|
93 |
+
function_to_run_name: evaluate
|
94 |
+
test_inputs:
|
95 |
+
|
96 |
+
test_1:
|
97 |
+
tests_inputs: ['1', '4', '1001']
|
98 |
+
expected_outputs: 'YES'
|
99 |
+
test_2:
|
100 |
+
tests_inputs: ['1', '5', '10010']
|
101 |
+
expected_outputs: 'YES'
|
102 |
+
test_3:
|
103 |
+
tests_inputs: ['1', '7', '0111011']
|
104 |
+
expected_outputs: 'NO'
|
105 |
+
test_4:
|
106 |
+
tests_inputs: ['3', '4', '1001', '5', '10010', '7', '0111011', '']
|
107 |
+
expected_outputs: 'YES\nYES\nNO\n'
|
108 |
+
|
109 |
+
timeout_seconds: 10
|
110 |
+
run_error_score: -1
|
111 |
+
use_test_input_as_key: false
|
EvaluatorFlowModule/pip_requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
astunparse==1.6.3
|
EvaluatorFlowModule/run.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import hydra
|
4 |
+
|
5 |
+
import aiflows
|
6 |
+
from aiflows.backends.api_info import ApiInfo
|
7 |
+
from aiflows.utils.general_helpers import read_yaml_file, quick_load_api_keys
|
8 |
+
|
9 |
+
from aiflows import logging
|
10 |
+
from aiflows.flow_cache import CACHING_PARAMETERS, clear_cache
|
11 |
+
|
12 |
+
from aiflows.utils import serving
|
13 |
+
from aiflows.workers import run_dispatch_worker_thread
|
14 |
+
from aiflows.messages import FlowMessage
|
15 |
+
from aiflows.interfaces import KeyInterface
|
16 |
+
from aiflows.utils.colink_utils import start_colink_server
|
17 |
+
from aiflows import flow_verse
|
18 |
+
|
19 |
+
|
20 |
+
dependencies = [
|
21 |
+
{
|
22 |
+
"url": "aiflows/FunSearchFlowModule",
|
23 |
+
"revision": os.path.abspath("../")
|
24 |
+
}
|
25 |
+
]
|
26 |
+
flow_verse.sync_dependencies(dependencies)
|
27 |
+
|
28 |
+
logging.set_verbosity_debug()
|
29 |
+
|
30 |
+
|
31 |
+
if __name__ == "__main__":
|
32 |
+
|
33 |
+
cl = start_colink_server()
|
34 |
+
|
35 |
+
serving.recursive_serve_flow(
|
36 |
+
cl=cl,
|
37 |
+
flow_class_name="flow_modules.aiflows.FunSearchFlowModule.EvaluatorFlow",
|
38 |
+
flow_endpoint="EvaluatorFlow",
|
39 |
+
)
|
40 |
+
|
41 |
+
run_dispatch_worker_thread(cl)
|
42 |
+
|
43 |
+
config_overrides = read_yaml_file(os.path.join(".", "demo.yaml"))
|
44 |
+
|
45 |
+
funsearch_proxy = serving.get_flow_instance(
|
46 |
+
cl=cl,
|
47 |
+
flow_endpoint="EvaluatorFlow",
|
48 |
+
config_overrides=config_overrides,
|
49 |
+
)
|
50 |
+
data = {
|
51 |
+
'artifact': \
|
52 |
+
'def solve_function(input) -> str:\n """Attempt at solving the problem given the input input and returns the predicted output (see the top of the file for problem description)"""\n return \'YES\'\n'
|
53 |
+
}
|
54 |
+
|
55 |
+
input_message = funsearch_proxy.package_input_message(data = data)
|
56 |
+
|
57 |
+
funsearch_proxy.send_message(input_message)
|
58 |
+
|
59 |
+
future = funsearch_proxy.get_reply_future(input_message)
|
60 |
+
response = future.get_data()
|
61 |
+
print("~~~Response~~~")
|
62 |
+
print(response)
|
FunSearch.py
ADDED
@@ -0,0 +1,446 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from aiflows.base_flows import CompositeFlow
|
2 |
+
from aiflows.utils import logging
|
3 |
+
from aiflows.interfaces import KeyInterface
|
4 |
+
from aiflows.messages import FlowMessage
|
5 |
+
from typing import Dict, Any
|
6 |
+
log = logging.get_logger(f"aiflows.{__name__}")
|
7 |
+
|
8 |
+
class FunSearch(CompositeFlow):
|
9 |
+
""" This class implements FunSearch. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch) . It's a Flow in charge of starting, stopping and managing (passing around messages) the FunSearch process. It passes messages around to the following subflows:
|
10 |
+
|
11 |
+
- ProgramDBFlow: which is in charge of storing and retrieving programs.
|
12 |
+
- SamplerFlow: which is in charge of sampling programs.
|
13 |
+
- EvaluatorFlow: which is in charge of evaluating programs.
|
14 |
+
|
15 |
+
*Configuration Parameters*:
|
16 |
+
|
17 |
+
- `name` (str): The name of the flow. Default: "FunSearchFlow".
|
18 |
+
- `description` (str): The description of the flow. Default: "A flow implementing FunSearch"
|
19 |
+
- `subflows_config` (Dict[str,Any]): A dictionary of subflows configurations. Default:
|
20 |
+
- `ProgramDBFlow`: By default, it uses the `ProgramDBFlow` class and uses its default parameters.
|
21 |
+
- `SamplerFlow`: By default, it uses the `SamplerFlow` class and uses its default parameters.
|
22 |
+
- `EvaluatorFlow`: By default, it uses the `EvaluatorFlow` class and uses its default parameters.
|
23 |
+
|
24 |
+
**Input Interface**:
|
25 |
+
|
26 |
+
- `from` (str): The flow from which the message is coming from. It can be one of the following: "FunSearch", "SamplerFlow", "EvaluatorFlow", "ProgramDBFlow".
|
27 |
+
- `operation` (str): The operation to perform. It can be one of the following: "start", "stop", "get_prompt", "get_best_programs_per_island", "register_program".
|
28 |
+
- `content` (Dict[str,Any]): The content associated to an operation. Here is the expected content for each operation:
|
29 |
+
- "start":
|
30 |
+
- `num_samplers` (int): The number of samplers to start up. Note that it's still restricted by the number of workers available. Default: 1.
|
31 |
+
- "stop":
|
32 |
+
- No content. Pass either an empty dictionary or None. Works also with no content.
|
33 |
+
- "get_prompt":
|
34 |
+
- No content. Pass either an empty dictionary or None. Works also with no content.
|
35 |
+
- "get_best_programs_per_island":
|
36 |
+
- No content. Pass either an empty dictionary or None. Works also with no content.
|
37 |
+
|
38 |
+
**Output Interface**:
|
39 |
+
|
40 |
+
- `retrieved` (Dict[str,Any]): The retrieved data.
|
41 |
+
|
42 |
+
**Citation**:
|
43 |
+
|
44 |
+
@Article{FunSearch2023,
|
45 |
+
author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
|
46 |
+
journal = {Nature},
|
47 |
+
title = {Mathematical discoveries from program search with large language models},
|
48 |
+
year = {2023},
|
49 |
+
doi = {10.1038/s41586-023-06924-6}
|
50 |
+
}
|
51 |
+
"""
|
52 |
+
|
53 |
+
def __init__(self, **kwargs):
|
54 |
+
super().__init__(**kwargs)
|
55 |
+
|
56 |
+
#next state per action
|
57 |
+
#this is a dictionary that maps the next state of the flow based on the action and the current state
|
58 |
+
self.next_state_per_action = {
|
59 |
+
"get_prompt": {
|
60 |
+
"FunSearch": "ProgramDBFlow",
|
61 |
+
"ProgramDBFlow": "SamplerFlow",
|
62 |
+
},
|
63 |
+
|
64 |
+
"get_best_programs_per_island": {
|
65 |
+
"FunSearch": "ProgramDBFlow",
|
66 |
+
"ProgramDBFlow": "GenerateReply",
|
67 |
+
},
|
68 |
+
"register_program": {
|
69 |
+
"SamplerFlow": "EvaluatorFlow",
|
70 |
+
"EvaluatorFlow": "ProgramDBFlow",
|
71 |
+
},
|
72 |
+
"start":
|
73 |
+
{"FunSearch": "FunSearch"},
|
74 |
+
"stop":
|
75 |
+
{"FunSearch": "FunSearch"},
|
76 |
+
}
|
77 |
+
|
78 |
+
#key interface to make a request for a prompt
|
79 |
+
self.make_request_for_prompt_data = KeyInterface(
|
80 |
+
keys_to_set= {"operation": "get_prompt", "content": {}, "from": "FunSearch"},
|
81 |
+
keys_to_select= ["operation", "content", "from"]
|
82 |
+
)
|
83 |
+
|
84 |
+
def make_request_for_prompt(self):
|
85 |
+
""" This method makes a request for a prompt. It sends a message to itself with the operation "get_prompt" which will trigger the flow to call the `ProgramDBFlow` to get a prompt. """
|
86 |
+
|
87 |
+
#Prepare data to make request for prompt
|
88 |
+
data = self.make_request_for_prompt_data({})
|
89 |
+
|
90 |
+
#Package message to make request for prompt
|
91 |
+
msg = self.package_input_message(
|
92 |
+
data=data,
|
93 |
+
dst_flow="FunSearch"
|
94 |
+
)
|
95 |
+
#Send message to itself to start the process of getting a prompt
|
96 |
+
self.send_message(
|
97 |
+
msg
|
98 |
+
)
|
99 |
+
|
100 |
+
def request_samplers(self,input_message: FlowMessage):
|
101 |
+
""" This method requests samplers. It sends a message to itself with the operation "get_prompt" which will trigger the flow to call the `ProgramDBFlow` to get a prompt.
|
102 |
+
|
103 |
+
:param input_message: The input message that triggered the request for samplers.
|
104 |
+
:type input_message: FlowMessage
|
105 |
+
"""
|
106 |
+
|
107 |
+
#Get state associated with the message
|
108 |
+
message_state = self.pop_message_from_state(input_message.input_message_id)
|
109 |
+
#Get number of samplers to request
|
110 |
+
num_samplers = message_state["content"].get("num_samplers",1)
|
111 |
+
for i in range(num_samplers):
|
112 |
+
self.make_request_for_prompt()
|
113 |
+
|
114 |
+
def get_next_state(self, input_message: FlowMessage):
|
115 |
+
""" This method determines the next state of the flow based on the input message. It will return the next state based on the current state and the message received.
|
116 |
+
|
117 |
+
:param input_message: The input message that triggered the request for the next state.
|
118 |
+
:type input_message: FlowMessage
|
119 |
+
:return: The next state of the flow.
|
120 |
+
:rtype: str
|
121 |
+
"""
|
122 |
+
#Get state associated with the message
|
123 |
+
message_state = self.get_message_from_state(input_message.input_message_id)
|
124 |
+
message_from = message_state["from"]
|
125 |
+
operation = message_state["operation"]
|
126 |
+
#Get next state based on the action and the current state
|
127 |
+
next_state = self.next_state_per_action[operation][message_from]
|
128 |
+
return next_state
|
129 |
+
|
130 |
+
def set_up_flow_state(self):
|
131 |
+
""" This method sets up the state of the flow. It's called at the beginning of the flow."""
|
132 |
+
super().set_up_flow_state()
|
133 |
+
#Dictonary containing state of message currently being handled by FunSearch
|
134 |
+
#Each message has its own state in the flow state
|
135 |
+
#Once a message is done being handled, it's removed from the state
|
136 |
+
self.flow_state["msg_requests"] = {}
|
137 |
+
#Flag to keep track if the first sample has been saved to the db
|
138 |
+
self.flow_state["first_sample_saved_to_db"] = False
|
139 |
+
#Flag to keep track if FunSearch is running
|
140 |
+
self.flow_state["funsearch_running"] = False
|
141 |
+
|
142 |
+
def save_message_to_state(self,msg_id: str, message: FlowMessage):
|
143 |
+
""" This method saves a message to the state of the flow. It's used to keep track of state on a per message basis (i.e., state of the flow depending on the message received and id).
|
144 |
+
|
145 |
+
:param msg_id: The id of the message to save.
|
146 |
+
:type msg_id: str
|
147 |
+
:param message: The message to save.
|
148 |
+
:type message: FlowMessage
|
149 |
+
"""
|
150 |
+
self.flow_state["msg_requests"][msg_id] = {"og_message": message}
|
151 |
+
|
152 |
+
def rename_key_message_in_state(self, old_key: str, new_key: str):
|
153 |
+
""" This method renames a key in the state of the flow in the "msg_requests" dictonary. It's used to rename a key in the state of the flow (i.e., rename a message id).
|
154 |
+
|
155 |
+
:param old_key: The old key to rename.
|
156 |
+
:type old_key: str
|
157 |
+
:param new_key: The new key to rename to.
|
158 |
+
:type new_key: str
|
159 |
+
"""
|
160 |
+
self.flow_state["msg_requests"][new_key] = self.flow_state["msg_requests"].pop(old_key)
|
161 |
+
|
162 |
+
def message_in_state(self,msg_id: str) -> bool:
|
163 |
+
""" This method checks if a message is in the state of the flow (in "msg_requests" dictionary). It returns True if the message is in the state, otherwise it returns False.
|
164 |
+
|
165 |
+
:param msg_id: The id of the message to check if it's in the state.
|
166 |
+
:type msg_id: str
|
167 |
+
:return: True if the message is in the state, otherwise False.
|
168 |
+
:rtype: bool
|
169 |
+
"""
|
170 |
+
|
171 |
+
return msg_id in self.flow_state["msg_requests"].keys()
|
172 |
+
|
173 |
+
def get_message_from_state(self, msg_id: str) -> Dict[str,Any]:
|
174 |
+
""" This method returns the state associated with a message id in the state of the flow (in "msg_requests" dictionary).
|
175 |
+
|
176 |
+
:param msg_id: The id of the message to get the state from.
|
177 |
+
:type msg_id: str
|
178 |
+
:return: The state associated with the message id.
|
179 |
+
:rtype: Dict[str,Any]
|
180 |
+
"""
|
181 |
+
return self.flow_state["msg_requests"][msg_id]
|
182 |
+
|
183 |
+
def pop_message_from_state(self, msg_id: str) -> Dict[str,Any]:
|
184 |
+
""" This method pops a message from the state of the flow (in "msg_requests" dictionary). It the state associate to a message and removes it from the state.
|
185 |
+
|
186 |
+
:param msg_id: The id of the message to pop from the state.
|
187 |
+
:type msg_id: str
|
188 |
+
:return: The state associated with the message id.
|
189 |
+
:rtype: Dict[str,Any]
|
190 |
+
"""
|
191 |
+
return self.flow_state["msg_requests"].pop(msg_id)
|
192 |
+
|
193 |
+
def merge_message_request_state(self,id: str, new_states: Dict[str,Any]):
|
194 |
+
""" This method merges new states to a message in the state of the flow (in "msg_requests" dictionary). It merges new states to a message in the state.
|
195 |
+
|
196 |
+
:param id: The id of the message to merge new states to.
|
197 |
+
:type id: str
|
198 |
+
:param new_states: The new states to merge to the message.
|
199 |
+
:type new_states: Dict[str,Any]
|
200 |
+
"""
|
201 |
+
self.flow_state["msg_requests"][id] = {**self.flow_state["msg_requests"][id], **new_states}
|
202 |
+
|
203 |
+
def register_data_to_state(self, input_message: FlowMessage):
|
204 |
+
"""This method registers the input message data to the flow state. It's called everytime a new input message is received.
|
205 |
+
|
206 |
+
:param input_message: The input message
|
207 |
+
:type input_message: FlowMessage
|
208 |
+
"""
|
209 |
+
|
210 |
+
#Determine Who the message is from (should be either FunSearch, SamplerFlow, EvaluatorFlow, or ProgramDBFlow)
|
211 |
+
msg_from = input_message.data.get("from", "FunSearch")
|
212 |
+
#Check if this a first request or part of a message that is being handled (it's part of message being handled if message is in the state)
|
213 |
+
msg_id = input_message.input_message_id
|
214 |
+
msg_in_state = self.message_in_state(msg_id)
|
215 |
+
|
216 |
+
#If message is not in state, save it to state
|
217 |
+
if not msg_in_state:
|
218 |
+
self.save_message_to_state(msg_id, input_message)
|
219 |
+
|
220 |
+
#Get the state associated to the message
|
221 |
+
message_state = self.get_message_from_state(msg_id)
|
222 |
+
|
223 |
+
#Determine what to do based on who the message is from
|
224 |
+
|
225 |
+
if msg_from == "FunSearch":
|
226 |
+
#Calls From FunSearch expect operation and content
|
227 |
+
operation = input_message.data["operation"]
|
228 |
+
content = input_message.data.get("content",{})
|
229 |
+
to_add_to_state = {
|
230 |
+
"content": content,
|
231 |
+
"operation": operation
|
232 |
+
}
|
233 |
+
#save operation and content to state
|
234 |
+
self.merge_message_request_state(msg_id, to_add_to_state)
|
235 |
+
|
236 |
+
elif msg_from == "SamplerFlow":
|
237 |
+
#Calls From SamplerFlow expect api_output, merge it to state
|
238 |
+
to_add_to_state = {
|
239 |
+
"content": {
|
240 |
+
**message_state.get("content",{}),
|
241 |
+
**{"artifact": input_message.data["api_output"]}
|
242 |
+
},
|
243 |
+
"operation": "register_program"
|
244 |
+
}
|
245 |
+
self.merge_message_request_state(msg_id, to_add_to_state)
|
246 |
+
|
247 |
+
elif msg_from == "EvaluatorFlow":
|
248 |
+
#Calls From EvaluatorFlow expect scores_per_test, merge it to state
|
249 |
+
message_state = self.get_message_from_state(msg_id)
|
250 |
+
to_add_to_state = {
|
251 |
+
"content": {
|
252 |
+
**message_state.get("content",{}),
|
253 |
+
**{"scores_per_test": input_message.data["scores_per_test"]}
|
254 |
+
}
|
255 |
+
}
|
256 |
+
self.merge_message_request_state(msg_id, to_add_to_state)
|
257 |
+
|
258 |
+
elif msg_from == "ProgramDBFlow":
|
259 |
+
#Calls From ProgramDBFlow expect retrieved, merge it to state
|
260 |
+
to_add_to_state = {
|
261 |
+
"retrieved": input_message.data["retrieved"],
|
262 |
+
}
|
263 |
+
|
264 |
+
#if message from ProgramDBFlow is associate to a "get_prompt" operation,
|
265 |
+
# save island_id to state
|
266 |
+
if message_state["operation"] == "get_prompt":
|
267 |
+
island_id = input_message.data["retrieved"]["island_id"]
|
268 |
+
to_add_to_state["content"] = {
|
269 |
+
**message_state.get("content",{}),
|
270 |
+
**{"island_id": island_id}
|
271 |
+
}
|
272 |
+
|
273 |
+
self.merge_message_request_state(msg_id, to_add_to_state)
|
274 |
+
|
275 |
+
#save from to state
|
276 |
+
self.merge_message_request_state(msg_id, {"from": msg_from})
|
277 |
+
|
278 |
+
def call_program_db(self, input_message):
|
279 |
+
""" This method calls the ProgramDBFlow. It sends a message to the ProgramDBFlow with the data of the input message.
|
280 |
+
|
281 |
+
:param input_message: The input message to send to the ProgramDBFlow.
|
282 |
+
:type input_message: FlowMessage
|
283 |
+
"""
|
284 |
+
|
285 |
+
#Fetch state associated with the message
|
286 |
+
msg_id = input_message.input_message_id
|
287 |
+
message_state = self.get_message_from_state(input_message.input_message_id)
|
288 |
+
|
289 |
+
#Get operation and content from state to send to ProgramDBFlow
|
290 |
+
operation = message_state["operation"]
|
291 |
+
content = message_state.get("content", {})
|
292 |
+
|
293 |
+
data = {
|
294 |
+
"operation": operation,
|
295 |
+
"content": content
|
296 |
+
}
|
297 |
+
#package message to send to ProgramDBFlow
|
298 |
+
msg = self.package_input_message(
|
299 |
+
data = data,
|
300 |
+
dst_flow = "ProgramDBFlow"
|
301 |
+
)
|
302 |
+
|
303 |
+
#If operation is "register_program",
|
304 |
+
# pop message from state (because inital message has been fully handled) and set first_sample_saved_to_db to True
|
305 |
+
#Send a message to register program without expecting a reply (no need to wait for a reply, just save to db and move on)
|
306 |
+
if data["operation"] == "register_program":
|
307 |
+
self.pop_message_from_state(msg_id)
|
308 |
+
|
309 |
+
self.flow_state["first_sample_saved_to_db"] = True
|
310 |
+
|
311 |
+
self.subflows["ProgramDBFlow"].send_message(
|
312 |
+
msg
|
313 |
+
)
|
314 |
+
|
315 |
+
# If operation is "get_prompt" or "get_best_programs_per_island"
|
316 |
+
# rename key in state to new message id (in order to be able to track of the message in state when the reply arrives)
|
317 |
+
elif data["operation"] in ["get_prompt","get_best_programs_per_island"]:
|
318 |
+
self.rename_key_message_in_state(msg_id, msg.message_id)
|
319 |
+
#if no sample has been saved to db, Send input message back to itself (to try again, hopefully this time a sample will be saved to db)
|
320 |
+
if not self.flow_state["first_sample_saved_to_db"]:
|
321 |
+
#send back to itself message (to try again)
|
322 |
+
self.send_message(
|
323 |
+
input_message
|
324 |
+
)
|
325 |
+
#If a sample has been saved to db, send message to ProgramDBFlow to fetch prompt or best programs per island
|
326 |
+
else:
|
327 |
+
self.subflows["ProgramDBFlow"].get_reply(
|
328 |
+
msg
|
329 |
+
)
|
330 |
+
#If operation is not "register_program", "get_prompt" or "get_best_programs_per_island"
|
331 |
+
else:
|
332 |
+
log.error("No operation found, input_message received: \n" + str(input_message))
|
333 |
+
|
334 |
+
def call_evaluator(self, input_message):
|
335 |
+
""" This method calls the EvaluatorFlow. It sends a message to the EvaluatorFlow with the data of the input message.
|
336 |
+
|
337 |
+
:param input_message: The input message to send to the EvaluatorFlow.
|
338 |
+
:type input_message: FlowMessage
|
339 |
+
"""
|
340 |
+
|
341 |
+
#Fetch state associated with the message
|
342 |
+
msg_id = input_message.input_message_id
|
343 |
+
message_state = self.get_message_from_state(msg_id)
|
344 |
+
|
345 |
+
#Get data to send to EvaluatorFlow (artifact generated by Sampler to be evaluated)
|
346 |
+
data = {
|
347 |
+
"artifact": message_state["content"]["artifact"]
|
348 |
+
}
|
349 |
+
|
350 |
+
msg = self.package_input_message(
|
351 |
+
data = data,
|
352 |
+
dst_flow = "EvaluatorFlow"
|
353 |
+
)
|
354 |
+
# rename key in state to new message id (in order to be able to track of the message in state when the reply arrives)
|
355 |
+
self.rename_key_message_in_state(msg_id, msg.message_id)
|
356 |
+
#Send message to EvaluatorFlow and expect a reply to be sent back to FunSearch's input message queue
|
357 |
+
self.subflows["EvaluatorFlow"].get_reply(
|
358 |
+
msg
|
359 |
+
)
|
360 |
+
|
361 |
+
def call_sampler(self, input_message):
|
362 |
+
""" This method calls the SamplerFlow. It sends a message to the SamplerFlow with the data of the input message.
|
363 |
+
|
364 |
+
:param input_message: The input message to send to the SamplerFlow.
|
365 |
+
:type input_message: FlowMessage
|
366 |
+
"""
|
367 |
+
|
368 |
+
#Fetch state associated with the message
|
369 |
+
msg_id = input_message.input_message_id
|
370 |
+
message_state = self.get_message_from_state(msg_id)
|
371 |
+
|
372 |
+
#Get data to send to SamplerFlow (prompt to generate a program)
|
373 |
+
data = {
|
374 |
+
**message_state["retrieved"],
|
375 |
+
}
|
376 |
+
msg = self.package_input_message(
|
377 |
+
data = data,
|
378 |
+
dst_flow = "SamplerFlow"
|
379 |
+
)
|
380 |
+
# rename key in state to new message id (in order to be able to track of the message in state when the reply arrives)
|
381 |
+
self.rename_key_message_in_state(msg_id, msg.message_id)
|
382 |
+
|
383 |
+
#send message to SamplerFlow and expect a reply to be sent back to FunSearch's input message queue
|
384 |
+
self.subflows["SamplerFlow"].get_reply(
|
385 |
+
msg
|
386 |
+
)
|
387 |
+
#If FunSearch is running, make a new request for a prompt (to keep the process going)
|
388 |
+
if self.flow_state["funsearch_running"]:
|
389 |
+
self.make_request_for_prompt()
|
390 |
+
|
391 |
+
|
392 |
+
def generate_reply(self, input_message: FlowMessage):
|
393 |
+
""" This method generates a reply to a message sent to user. It packages the output message and sends it.
|
394 |
+
|
395 |
+
:param input_message: The input message to generate a reply to.
|
396 |
+
:type input_message: FlowMessage
|
397 |
+
"""
|
398 |
+
|
399 |
+
#Fetch state associated with the message
|
400 |
+
msg_id = input_message.input_message_id
|
401 |
+
message_state = self.pop_message_from_state(msg_id)
|
402 |
+
#Prepare response to send to user (due to a call to get_best_programs_per_island)
|
403 |
+
response = {
|
404 |
+
"retrieved": message_state["retrieved"]
|
405 |
+
}
|
406 |
+
reply = self.package_output_message(
|
407 |
+
message_state["og_message"],
|
408 |
+
response
|
409 |
+
)
|
410 |
+
|
411 |
+
self.send_message(
|
412 |
+
reply
|
413 |
+
)
|
414 |
+
|
415 |
+
def run(self,input_message: FlowMessage):
|
416 |
+
""" This method runs the flow. It's the main method of the flow. It's called when the flow is executed.
|
417 |
+
|
418 |
+
:input_message: The input message of the flow
|
419 |
+
:type input_message: Message
|
420 |
+
"""
|
421 |
+
self.register_data_to_state(input_message)
|
422 |
+
|
423 |
+
next_state = self.get_next_state(input_message)
|
424 |
+
|
425 |
+
if next_state == "ProgramDBFlow":
|
426 |
+
self.call_program_db(input_message)
|
427 |
+
|
428 |
+
elif next_state == "EvaluatorFlow":
|
429 |
+
self.call_evaluator(input_message)
|
430 |
+
|
431 |
+
elif next_state == "SamplerFlow":
|
432 |
+
self.call_sampler(input_message)
|
433 |
+
|
434 |
+
elif next_state == "GenerateReply":
|
435 |
+
self.generate_reply(input_message)
|
436 |
+
|
437 |
+
elif next_state == "FunSearch":
|
438 |
+
#If operation is "start", set funsearch_running to True and make a request for a prompt
|
439 |
+
if input_message.data["operation"] == "start":
|
440 |
+
self.flow_state["funsearch_running"] = True
|
441 |
+
self.request_samplers(input_message)
|
442 |
+
#If operation is "stop", set funsearch_running to False (will stop the process of generating new samples)
|
443 |
+
elif input_message.data["operation"] == "stop":
|
444 |
+
self.flow_state["funsearch_running"] = False
|
445 |
+
else:
|
446 |
+
log.error("No next state found, input_message received: \n" + str(input_message))
|
FunSearch.yaml
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: FunSearchFlow
|
2 |
+
description: "A flow implementing FunSearch"
|
3 |
+
_target_: flow_modules.aiflows.FunSearchFlowModule.FunSearch.instantiate_from_default_config
|
4 |
+
user_id: local
|
5 |
+
input_interface:
|
6 |
+
- "action"
|
7 |
+
- "content"
|
8 |
+
- "from"
|
9 |
+
|
10 |
+
output_interface:
|
11 |
+
- "retrieved"
|
12 |
+
### Subflows specification
|
13 |
+
subflows_config:
|
14 |
+
SamplerFlow:
|
15 |
+
flow_class_name: flow_modules.aiflows.FunSearchFlowModule.SamplerFlow
|
16 |
+
flow_endpoint: SamplerFlow
|
17 |
+
parallel_dispatch: True
|
18 |
+
singleton: False
|
19 |
+
user_id: local
|
20 |
+
name: "Sampler Flow"
|
21 |
+
description: "A flow that queries an LLM model to generate prompts"
|
22 |
+
|
23 |
+
EvaluatorFlow:
|
24 |
+
flow_class_name: flow_modules.aiflows.FunSearchFlowModule.EvaluatorFlow
|
25 |
+
flow_endpoint: EvaluatorFlow
|
26 |
+
user_id: local
|
27 |
+
parallel_dispatch: True
|
28 |
+
name: "A flow that evaluates code on tests"
|
29 |
+
description: "A flow that evaluates code on tests"
|
30 |
+
|
31 |
+
ProgramDBFlow:
|
32 |
+
flow_class_name: flow_modules.aiflows.FunSearchFlowModule.ProgramDBFlow
|
33 |
+
flow_endpoint: ProgramDBFlow
|
34 |
+
singleton: True
|
35 |
+
user_id: local
|
36 |
+
name: "ProgramDB"
|
37 |
+
description: "A flow that registers samples and evaluations in a database"
|
Loader.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ast
|
2 |
+
import os
|
3 |
+
import yaml
|
4 |
+
|
5 |
+
class Loader:
|
6 |
+
def __init__(self, file_path, target_name):
|
7 |
+
self.py_file_path = file_path
|
8 |
+
self.target_name = target_name
|
9 |
+
|
10 |
+
if not os.path.exists(file_path):
|
11 |
+
raise ValueError(f"File {file_path} does not exist")
|
12 |
+
|
13 |
+
with open(file_path, 'r') as file:
|
14 |
+
self.source_code = file.read()
|
15 |
+
|
16 |
+
def load_target(self):
|
17 |
+
if self.py_file_path.endswith('.yaml'):
|
18 |
+
return self.load_yaml()
|
19 |
+
else:
|
20 |
+
return self.load_code()
|
21 |
+
|
22 |
+
def load_full_file(self):
|
23 |
+
return self.source_code
|
24 |
+
|
25 |
+
def load_code(self):
|
26 |
+
# Parse the source code into an abstract syntax tree (AST)
|
27 |
+
tree = ast.parse(self.source_code)
|
28 |
+
|
29 |
+
# Find the target node (FunctionDef, ClassDef, or variable)
|
30 |
+
target_node = None
|
31 |
+
for node in ast.walk(tree):
|
32 |
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)) and node.name == self.target_name:
|
33 |
+
target_node = node
|
34 |
+
break
|
35 |
+
elif isinstance(node, ast.Assign):
|
36 |
+
for target in node.targets:
|
37 |
+
if isinstance(target, ast.Name) and target.id == self.target_name:
|
38 |
+
target_node = node
|
39 |
+
break
|
40 |
+
|
41 |
+
if target_node is not None:
|
42 |
+
# Extract the source code of the target
|
43 |
+
target_source_code = ast.unparse(target_node)
|
44 |
+
return target_source_code
|
45 |
+
else:
|
46 |
+
raise ValueError(f"Target '{self.target_name}' not found in the module.")
|
47 |
+
|
48 |
+
def load_yaml(self):
|
49 |
+
try:
|
50 |
+
with open(self.py_file_path, 'r') as yaml_file:
|
51 |
+
yaml_content = yaml.safe_load(yaml_file)
|
52 |
+
return yaml_content
|
53 |
+
except yaml.YAMLError as e:
|
54 |
+
raise ValueError(f"Error loading YAML file: {e}")
|
55 |
+
|
ProgramDBFlowModule/Cluster.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from .artifacts import AbstractArtifact
|
3 |
+
from .utils import _softmax
|
4 |
+
class Cluster:
|
5 |
+
""" An implementation of a Cluster of an island. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
|
6 |
+
|
7 |
+
**Citation**:
|
8 |
+
|
9 |
+
@Article{FunSearch2023,
|
10 |
+
author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
|
11 |
+
journal = {Nature},
|
12 |
+
title = {Mathematical discoveries from program search with large language models},
|
13 |
+
year = {2023},
|
14 |
+
doi = {10.1038/s41586-023-06924-6}
|
15 |
+
}
|
16 |
+
"""
|
17 |
+
def __init__(self,score: float,first_program: AbstractArtifact,epsilon=1e-6,sample_with_replacement=False, default_program_temperature=0.1):
|
18 |
+
self.score: float = score
|
19 |
+
self.programs: list[AbstractArtifact] = [first_program]
|
20 |
+
self.lengths = np.array([len(str(first_program))],dtype=np.float32)
|
21 |
+
self.epsilon = epsilon
|
22 |
+
self.sample_with_replacement = sample_with_replacement
|
23 |
+
self.default_program_temperature = default_program_temperature
|
24 |
+
|
25 |
+
|
26 |
+
def compute_length_probs(self,program_temperature: float):
|
27 |
+
""" Compute the probability of each program given the length of the program. The probability is computed as the softmax of the negative length of the program. The temperature of the softmax is controlled by the program_temperature parameter.
|
28 |
+
|
29 |
+
:param program_temperature: The temperature of the softmax
|
30 |
+
:type program_temperature: float
|
31 |
+
:return: The probability of each program given the length of the program
|
32 |
+
:rtype: np.array
|
33 |
+
"""
|
34 |
+
min_length = np.min(self.lengths)
|
35 |
+
max_length = np.max(self.lengths)
|
36 |
+
|
37 |
+
|
38 |
+
length_logits = (self.lengths - min_length)/(max_length + self.epsilon)
|
39 |
+
|
40 |
+
probs = _softmax(-length_logits,program_temperature)
|
41 |
+
return probs
|
42 |
+
|
43 |
+
def register_program(self,program: str):
|
44 |
+
""" Register a program on the cluster.
|
45 |
+
|
46 |
+
:param program: The program to register
|
47 |
+
:type program: str
|
48 |
+
"""
|
49 |
+
self.programs.append(program)
|
50 |
+
self.lengths = np.append(self.lengths,len(str(program)))
|
51 |
+
|
52 |
+
def sample_program(self,program_temperature=None):
|
53 |
+
""" Sample a program from the cluster given the program temperature.
|
54 |
+
|
55 |
+
:param program_temperature: The temperature of the program
|
56 |
+
:type program_temperature: float, optional
|
57 |
+
:return: The sampled program
|
58 |
+
:rtype: str
|
59 |
+
"""
|
60 |
+
if program_temperature is None:
|
61 |
+
program_temperature = self.default_program_temperature
|
62 |
+
|
63 |
+
|
64 |
+
probs = self.compute_length_probs(program_temperature)
|
65 |
+
#sample an index of probs randomly givent the probs
|
66 |
+
index = np.random.choice(len(probs),p=probs,replace=self.sample_with_replacement)
|
67 |
+
|
68 |
+
return self.programs[index]
|
ProgramDBFlowModule/Island.py
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#Heavily Inpired by https://github.com/google-deepmind/funsearch/tree/main
|
2 |
+
|
3 |
+
|
4 |
+
import numpy as np
|
5 |
+
from typing import Callable
|
6 |
+
from .Cluster import Cluster
|
7 |
+
from .Program import Program
|
8 |
+
import ast
|
9 |
+
import astunparse
|
10 |
+
from typing import Optional,Dict,Any
|
11 |
+
from .artifacts import AbstractArtifact
|
12 |
+
from collections.abc import Mapping, Sequence
|
13 |
+
from copy import deepcopy
|
14 |
+
from .Program import ProgramVisitor,Program,text_to_artifact
|
15 |
+
import dataclasses
|
16 |
+
import scipy
|
17 |
+
from .utils import _softmax
|
18 |
+
ScoresPerTest = Mapping
|
19 |
+
|
20 |
+
|
21 |
+
|
22 |
+
class Island:
|
23 |
+
""" An implementation of an Island of the ProgramDB. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
|
24 |
+
|
25 |
+
**Citation**:
|
26 |
+
|
27 |
+
@Article{FunSearch2023,
|
28 |
+
author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
|
29 |
+
journal = {Nature},
|
30 |
+
title = {Mathematical discoveries from program search with large language models},
|
31 |
+
year = {2023},
|
32 |
+
doi = {10.1038/s41586-023-06924-6}
|
33 |
+
}
|
34 |
+
"""
|
35 |
+
def __init__(self,
|
36 |
+
artifact_to_evolve_name: str,
|
37 |
+
artifacts_per_prompt: int,
|
38 |
+
temperature: float,
|
39 |
+
temperature_period: int,
|
40 |
+
template: Program,
|
41 |
+
reduce_score_method: Optional[Callable] = np.mean,
|
42 |
+
sample_with_replacement: Optional[bool] = False):
|
43 |
+
|
44 |
+
self.artifact_to_evolve_name:str = artifact_to_evolve_name
|
45 |
+
self.artifacts_per_prompt: int = artifacts_per_prompt
|
46 |
+
|
47 |
+
self.temperature: float = temperature
|
48 |
+
self.temperature_period: int = temperature_period
|
49 |
+
|
50 |
+
self.clusters: Dict[str,Cluster] = {}
|
51 |
+
self.template: Program = template
|
52 |
+
|
53 |
+
self.total_programs_on_island: int = 0
|
54 |
+
self.reduce_score_method: Callable = reduce_score_method
|
55 |
+
self.sample_with_replacement: bool = sample_with_replacement
|
56 |
+
|
57 |
+
def register_program(self, program: AbstractArtifact ,scores_per_test: ScoresPerTest):
|
58 |
+
""" Register a program on the island.
|
59 |
+
|
60 |
+
:param program: The program to register
|
61 |
+
:type program: AbstractArtifact
|
62 |
+
:param scores_per_test: The scores per test of the program
|
63 |
+
:type scores_per_test: Dict[str,Any]
|
64 |
+
"""
|
65 |
+
scores_per_test_key = " ".join([str(key) + ":" + str(score) for key,score in scores_per_test.items()])
|
66 |
+
|
67 |
+
scores_per_test_values = np.array([ score_per_test["score"] for score_per_test in scores_per_test.values()])
|
68 |
+
|
69 |
+
if scores_per_test_key not in self.clusters:
|
70 |
+
score = self.reduce_score_method(scores_per_test_values)
|
71 |
+
self.clusters[scores_per_test_key] = Cluster(score = score,first_program=program,sample_with_replacement=self.sample_with_replacement)
|
72 |
+
|
73 |
+
else:
|
74 |
+
self.clusters[scores_per_test_key].register_program(program)
|
75 |
+
|
76 |
+
self.total_programs_on_island += 1
|
77 |
+
|
78 |
+
def pick_clusters(self):
|
79 |
+
""" Pick the clusters to generate the prompt
|
80 |
+
|
81 |
+
:return: The clusters and their names
|
82 |
+
:rtype: Tuple[List[Cluster],List[str]]
|
83 |
+
"""
|
84 |
+
cluster_keys = list(self.clusters.keys())
|
85 |
+
clusters = [self.clusters[key] for key in cluster_keys]
|
86 |
+
|
87 |
+
cluster_scores = np.array([cluster.score for cluster in clusters])
|
88 |
+
cluster_temperature = self.temperature * (1 - (self.total_programs_on_island % self.temperature_period) / self.temperature_period)
|
89 |
+
probs = _softmax(cluster_scores, cluster_temperature)
|
90 |
+
|
91 |
+
#can occur at the begining when there are not many clusters
|
92 |
+
functions_per_prompt = min(self.artifacts_per_prompt,len(self.clusters))
|
93 |
+
select_cluster_ids = np.random.choice(len(cluster_scores),size=functions_per_prompt,p = probs,replace=self.sample_with_replacement)
|
94 |
+
|
95 |
+
return [clusters[cluster_id] for cluster_id in select_cluster_ids], [cluster_keys[cluster_id] for cluster_id in select_cluster_ids]
|
96 |
+
|
97 |
+
def _get_versioned_artifact_name(self,i):
|
98 |
+
""" Get the versioned artifact name
|
99 |
+
|
100 |
+
:param i: The version of the artifact
|
101 |
+
:type i: int
|
102 |
+
:return: The versioned artifact name
|
103 |
+
:rtype: str
|
104 |
+
"""
|
105 |
+
return self.artifact_to_evolve_name + "_v" +str(i)
|
106 |
+
|
107 |
+
def _generate_prompt(self,implementations: Sequence[AbstractArtifact], chosen_cluster_names: Sequence[str]):
|
108 |
+
""" Generate the prompt
|
109 |
+
|
110 |
+
:param implementations: The implementations
|
111 |
+
:type implementations: Sequence[AbstractArtifact]
|
112 |
+
:param chosen_cluster_names: The chosen cluster names
|
113 |
+
:type chosen_cluster_names: Sequence[str]
|
114 |
+
:return: The prompt
|
115 |
+
:rtype: str
|
116 |
+
"""
|
117 |
+
implementations = deepcopy(implementations)
|
118 |
+
|
119 |
+
versioned_artifacts: list[AbstractArtifact] = []
|
120 |
+
|
121 |
+
for i,implementation in enumerate(implementations):
|
122 |
+
new_artifact_name = self._get_versioned_artifact_name(i)
|
123 |
+
implementation.name = new_artifact_name
|
124 |
+
score_per_test = " ".join(chosen_cluster_names[i].split(" "))
|
125 |
+
implementation.docstring = f'Scores per test: {score_per_test}'
|
126 |
+
if i >= 1:
|
127 |
+
implementation.docstring += f'\nImproved version of {self._get_versioned_artifact_name(i-1)}'
|
128 |
+
implementation = implementation.rename_artifact_calls(source_name = self.artifact_to_evolve_name, target_name = new_artifact_name)
|
129 |
+
versioned_artifacts.append(text_to_artifact(implementation))
|
130 |
+
|
131 |
+
#Create the header of the function to be generated by the LLM
|
132 |
+
next_version = len(implementations)
|
133 |
+
new_artifact_name = self._get_versioned_artifact_name(next_version)
|
134 |
+
|
135 |
+
docstring = f'Improved version of {self._get_versioned_artifact_name(next_version-1)}'
|
136 |
+
|
137 |
+
header = dataclasses.replace(
|
138 |
+
implementations[-1],
|
139 |
+
name=new_artifact_name,
|
140 |
+
body='',
|
141 |
+
docstring=docstring
|
142 |
+
)
|
143 |
+
|
144 |
+
versioned_artifacts.append(header)
|
145 |
+
#rename the call to the target function
|
146 |
+
prompt = dataclasses.replace(self.template, artifacts=versioned_artifacts)
|
147 |
+
return str(prompt)
|
148 |
+
|
149 |
+
def get_prompt(self):
|
150 |
+
""" Get the prompt
|
151 |
+
|
152 |
+
:return: The prompt
|
153 |
+
:rtype: str
|
154 |
+
"""
|
155 |
+
chosen_clusters, chosen_cluster_names = self.pick_clusters()
|
156 |
+
|
157 |
+
scores = [cluster.score for cluster in chosen_clusters]
|
158 |
+
|
159 |
+
indices = np.argsort(scores)
|
160 |
+
|
161 |
+
sorted_implementations = [chosen_clusters[i].sample_program() for i in indices]
|
162 |
+
sorted_cluster_names = [chosen_cluster_names[i] for i in indices]
|
163 |
+
|
164 |
+
version_generated = len(sorted_implementations)
|
165 |
+
return self._generate_prompt(sorted_implementations,sorted_cluster_names),version_generated
|
166 |
+
|
167 |
+
|
168 |
+
|
ProgramDBFlowModule/Program.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import dataclasses
|
2 |
+
from typing import Optional
|
3 |
+
from .artifacts import AbstractArtifact
|
4 |
+
import ast
|
5 |
+
from .artifacts import FunctionArtifact
|
6 |
+
|
7 |
+
@dataclasses.dataclass(frozen=True)
|
8 |
+
class Program:
|
9 |
+
"""A parsed Python program."""
|
10 |
+
|
11 |
+
# `preface` is everything from the beginning of the code till the first
|
12 |
+
# function is found.
|
13 |
+
preface: str
|
14 |
+
artifacts: list[AbstractArtifact]
|
15 |
+
|
16 |
+
def __str__(self) -> str:
|
17 |
+
program = f'{self.preface}\n' if self.preface else ''
|
18 |
+
program += '\n'.join([str(f) for f in self.artifacts])
|
19 |
+
return program
|
20 |
+
|
21 |
+
def find_artifact_index(self, artifact_name: str) -> int:
|
22 |
+
"""Returns the index of input function name."""
|
23 |
+
|
24 |
+
artifact_names = [a.name for a in self.artifacts]
|
25 |
+
count = artifact_names.count(artifact_name)
|
26 |
+
if count == 0:
|
27 |
+
raise ValueError(
|
28 |
+
f'artifact {artifact_name} does not exist in program:\n{str(self)}'
|
29 |
+
)
|
30 |
+
if count > 1:
|
31 |
+
raise ValueError(
|
32 |
+
f'artifact {artifact_name} exists more than once in program:\n'
|
33 |
+
f'{str(self)}'
|
34 |
+
)
|
35 |
+
index = artifact_names.index(artifact_name)
|
36 |
+
return index
|
37 |
+
|
38 |
+
def get_artifact(self, artifact_name: str) -> AbstractArtifact:
|
39 |
+
index = self.find_artifact_index(artifact_name)
|
40 |
+
return self.artifacts[index]
|
41 |
+
|
42 |
+
# TODO: Do this for various types of artifacts (only for functions rn)
|
43 |
+
class ProgramVisitor(ast.NodeVisitor):
|
44 |
+
"""Parses code to collect all required information to produce a `Program`.
|
45 |
+
|
46 |
+
Note that we do not store function decorators.
|
47 |
+
"""
|
48 |
+
|
49 |
+
def __init__(self, sourcecode: str):
|
50 |
+
self._codelines: list[str] = sourcecode.splitlines()
|
51 |
+
|
52 |
+
self._preface: str = ''
|
53 |
+
self._artifacts: list[AbstractArtifact] = []
|
54 |
+
self._current_artifact: Optional[str] = None
|
55 |
+
|
56 |
+
def visit_FunctionDef(self,
|
57 |
+
node: ast.FunctionDef):
|
58 |
+
"""Collects all information about the function being parsed."""
|
59 |
+
if node.col_offset == 0: # We only care about first level functions.
|
60 |
+
self._current_function = node.name
|
61 |
+
if not self._artifacts:
|
62 |
+
self._preface = '\n'.join(self._codelines[:node.lineno - 1])
|
63 |
+
|
64 |
+
function_end_line = node.end_lineno
|
65 |
+
body_start_line = node.body[0].lineno - 1
|
66 |
+
# Extract the docstring.
|
67 |
+
docstring = None
|
68 |
+
if isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value,ast.Str):
|
69 |
+
docstring = f' """{ast.literal_eval(ast.unparse(node.body[0]))}"""'
|
70 |
+
if len(node.body) > 1:
|
71 |
+
body_start_line = node.body[1].lineno - 1
|
72 |
+
else:
|
73 |
+
body_start_line = function_end_line
|
74 |
+
|
75 |
+
self._artifacts.append(FunctionArtifact(
|
76 |
+
name=node.name,
|
77 |
+
args=ast.unparse(node.args),
|
78 |
+
return_type=ast.unparse(node.returns) if node.returns else None,
|
79 |
+
docstring=docstring,
|
80 |
+
body='\n'.join(self._codelines[body_start_line:function_end_line]),
|
81 |
+
))
|
82 |
+
self.generic_visit(node)
|
83 |
+
|
84 |
+
def return_program(self) -> Program:
|
85 |
+
return Program(preface=self._preface, artifacts=self._artifacts)
|
86 |
+
|
87 |
+
|
88 |
+
def text_to_program(text: str) -> Program:
|
89 |
+
"""Returns Program object by parsing input text using Python AST."""
|
90 |
+
# We assume that the program is composed of some preface (e.g. imports,
|
91 |
+
# classes, assignments, ...) followed by a sequence of functions.
|
92 |
+
|
93 |
+
#Often happens that it returns a codeblock so remove it
|
94 |
+
if text.startswith("```python"):
|
95 |
+
text = text[9:]
|
96 |
+
if text.endswith("```"):
|
97 |
+
text = text[:-3]
|
98 |
+
|
99 |
+
tree = ast.parse(text)
|
100 |
+
visitor = ProgramVisitor(text)
|
101 |
+
visitor.visit(tree)
|
102 |
+
return visitor.return_program()
|
103 |
+
|
104 |
+
def text_to_artifact(text: str) -> AbstractArtifact:
|
105 |
+
"""Returns Function object by parsing input text using Python AST."""
|
106 |
+
program = text_to_program(text)
|
107 |
+
if len(program.artifacts) != 1:
|
108 |
+
raise ValueError(f'Only one artifact expected, got {len(program.artifacts)}'
|
109 |
+
f':\n{program.functions}')
|
110 |
+
return program.artifacts[0]
|
ProgramDBFlowModule/ProgramDBFlow.py
ADDED
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" NOTE: THIS IS A BETA VERSION OF FUNSEARCH. NEW VERSION DOCUMENTATION WILL BE RELEASED SOON."""
|
2 |
+
|
3 |
+
|
4 |
+
from aiflows.base_flows import AtomicFlow
|
5 |
+
from .Island import Island,ScoresPerTest
|
6 |
+
import numpy as np
|
7 |
+
from typing import Callable,Dict,Union, Any,Optional, List
|
8 |
+
import time
|
9 |
+
from aiflows.utils import logging
|
10 |
+
from .artifacts import AbstractArtifact
|
11 |
+
from .Program import Program,text_to_artifact
|
12 |
+
import ast
|
13 |
+
import os
|
14 |
+
from aiflows.messages import FlowMessage
|
15 |
+
log = logging.get_logger(f"aiflows.{__name__}")
|
16 |
+
|
17 |
+
class ProgramDBFlow(AtomicFlow):
|
18 |
+
""" This class implements a ProgramDBFlow. It's a flow that stores programs and their scores in a database. It can also query the database for the best programs or generate a prompt containing stored programs in order to evolve them with a SamplerFlow. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
|
19 |
+
|
20 |
+
**Configuration Parameters**:
|
21 |
+
|
22 |
+
- `name` (str): The name of the flow. Default: "ProgramDBFlow"
|
23 |
+
- `description` (str): A description of the flow. This description is used to generate the help message of the flow. Default: " A flow that saves programs in a database of islands"
|
24 |
+
- `artifact_to_evolve_name` (str): The name of the artifact/program to evolve. Default: "solve_function"
|
25 |
+
- `evaluate_function` (str): The function used to evaluate the program. No Default value. This MUST be passed as a parameter.
|
26 |
+
- `evaluate_file_full_content` (str): The full content of the file containing the evaluation function. No Default value. This MUST be passed as a parameter.
|
27 |
+
- `num_islands`: The number of islands to use. Default: 3
|
28 |
+
- `reset_period`: The period in seconds to reset the islands. Default: 3600
|
29 |
+
- `artifacts_per_prompt`: The number of previous artifacts/programs to include in a prompt. Default: 2
|
30 |
+
- `temperature`: The temperature of the island. Default: 0.1
|
31 |
+
- `temperature_period`: The period in seconds to change the temperature. Default: 30000
|
32 |
+
- `sample_with_replacement`: Whether to sample with replacement. Default: False
|
33 |
+
- `portion_of_islands_to_reset`: The portion of islands to reset. Default: 0.5
|
34 |
+
- `template` (dict): The template to use for a program. Default: {"preface": ""}
|
35 |
+
|
36 |
+
**Input Interface**:
|
37 |
+
|
38 |
+
- `operation` (str): The operation to perform. It can be one of the following: ["register_program","get_prompt","get_best_programs_per_island"]
|
39 |
+
|
40 |
+
**Output Interface**:
|
41 |
+
|
42 |
+
- `retrieved` (Any): The retrieved data. It can be one of the following:
|
43 |
+
- If the operation is "get_prompt", it can be a dictionary with the following keys
|
44 |
+
- `code` (str): The code of the prompt
|
45 |
+
- `version_generated` (int): The version of the prompt generated
|
46 |
+
- `island_id` (int): The id of the island that generated the prompt
|
47 |
+
- `header` (str): The header of the prompt
|
48 |
+
- If the operation is "register_program", it can be a string with the message "Program registered" or "Program failed to register"
|
49 |
+
- If the operation is "get_best_programs_per_island", it can be a dictionary with the following keys:
|
50 |
+
- `best_island_programs` (List[Dict[str,Any]]): A list of dictionaries with the following keys:
|
51 |
+
- `rank` (int): The rank of the program (1 is the best)
|
52 |
+
- `score` (float): The score of the program
|
53 |
+
- `program` (str): The program
|
54 |
+
- `island_id` (int): The id of the island that generated the program
|
55 |
+
|
56 |
+
**Citation**:
|
57 |
+
|
58 |
+
@Article{FunSearch2023,
|
59 |
+
author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
|
60 |
+
journal = {Nature},
|
61 |
+
title = {Mathematical discoveries from program search with large language models},
|
62 |
+
year = {2023},
|
63 |
+
doi = {10.1038/s41586-023-06924-6}
|
64 |
+
}
|
65 |
+
"""
|
66 |
+
def __init__(self,
|
67 |
+
**kwargs
|
68 |
+
):
|
69 |
+
super().__init__(**kwargs)
|
70 |
+
|
71 |
+
#Unpack config (for clarity)
|
72 |
+
self.artifact_to_evolve_name: str = self.flow_config["artifact_to_evolve_name"]
|
73 |
+
|
74 |
+
self.artifacts_per_prompt: int = self.flow_config["artifacts_per_prompt"]
|
75 |
+
self.temperature: float = self.flow_config["temperature"]
|
76 |
+
self.temperature_period: int = self.flow_config["temperature_period"]
|
77 |
+
self.reduce_score_method: Callable = np.mean
|
78 |
+
self.sample_with_replacement: bool = self.flow_config["sample_with_replacement"]
|
79 |
+
self.num_islands: int = self.flow_config["num_islands"]
|
80 |
+
self.portion_of_islands_to_reset: float = self.flow_config["portion_of_islands_to_reset"]
|
81 |
+
self.reset_period: float = self.flow_config["reset_period"]
|
82 |
+
|
83 |
+
self.evaluate_function = self.flow_config["evaluate_function"]
|
84 |
+
self.evaluate_file_full_content = self.flow_config["evaluate_file_full_content"]
|
85 |
+
|
86 |
+
|
87 |
+
|
88 |
+
assert self.portion_of_islands_to_reset <= 1.0 and self.portion_of_islands_to_reset >= 0.0, "portion_of_islands_to_reset must be between 0 and 1"
|
89 |
+
|
90 |
+
self.islands_to_reset = int(round(self.portion_of_islands_to_reset * self.num_islands)) #round to nearest integer
|
91 |
+
|
92 |
+
|
93 |
+
def set_up_flow_state(self):
|
94 |
+
""" This method sets up the state of the flow and clears the previous messages."""
|
95 |
+
super().set_up_flow_state()
|
96 |
+
|
97 |
+
preface = \
|
98 |
+
self.flow_config["template"]["preface"] + "\n\n" "#function used to evaluate the program:\n" + self.flow_config["evaluate_function"] + "\n\n"
|
99 |
+
|
100 |
+
self.template: Program = Program(preface =preface,artifacts = [])
|
101 |
+
|
102 |
+
# ~~~instantiate isladns~~~
|
103 |
+
self.flow_state["islands"] = [
|
104 |
+
Island(
|
105 |
+
artifact_to_evolve_name =self.flow_config["artifact_to_evolve_name"],
|
106 |
+
artifacts_per_prompt = self.flow_config["artifacts_per_prompt"],
|
107 |
+
temperature = self.flow_config["temperature"],
|
108 |
+
temperature_period = self.flow_config["temperature_period"],
|
109 |
+
reduce_score_method = np.mean,
|
110 |
+
sample_with_replacement = self.flow_config["sample_with_replacement"],
|
111 |
+
template=self.template
|
112 |
+
)
|
113 |
+
for _ in range(self.flow_config["num_islands"])
|
114 |
+
]
|
115 |
+
|
116 |
+
self.flow_state["last_reset_time"] = time.time()
|
117 |
+
self.flow_state["best_score_per_island"] = [float("-inf") for _ in range(self.flow_config["num_islands"])]
|
118 |
+
self.flow_state["best_program_per_island"] = [None for _ in range(self.flow_config["num_islands"])]
|
119 |
+
self.flow_state["best_scores_per_test_per_island"] = [None for _ in range(self.flow_config["num_islands"])]
|
120 |
+
self.flow_state["first_program_registered"] = False
|
121 |
+
|
122 |
+
def get_prompt(self):
|
123 |
+
""" This method gets a prompt from an island. It returns the code, the version generated and the island id."""
|
124 |
+
island_id = np.random.choice(len(self.flow_state["islands"]))
|
125 |
+
code, version_generated = self.flow_state["islands"][island_id].get_prompt()
|
126 |
+
return code,version_generated,island_id
|
127 |
+
|
128 |
+
def reset_islands(self):
|
129 |
+
""" This method resets the islands. It resets the worst islands and copies the best programs to the worst islands."""
|
130 |
+
# gaussian noise to break ties
|
131 |
+
sorted_island_ids = np.argsort(
|
132 |
+
np.array(self.flow_state["best_score_per_island"]) +
|
133 |
+
(np.random.randn(len(self.flow_state["best_score_per_island"])) * 1e-6)
|
134 |
+
)
|
135 |
+
|
136 |
+
reset_island_ids = sorted_island_ids[:self.islands_to_reset]
|
137 |
+
keep_island_ids = sorted_island_ids[self.islands_to_reset:]
|
138 |
+
|
139 |
+
for island_id in reset_island_ids:
|
140 |
+
self.flow_state["islands"][island_id] = Island(
|
141 |
+
artifact_to_evolve_name =self.artifact_to_evolve_name,
|
142 |
+
artifacts_per_prompt = self.artifacts_per_prompt,
|
143 |
+
temperature = self.temperature,
|
144 |
+
temperature_period = self.temperature_period,
|
145 |
+
reduce_score_method = np.mean,
|
146 |
+
sample_with_replacement = self.sample_with_replacement,
|
147 |
+
template=self.template
|
148 |
+
)
|
149 |
+
|
150 |
+
self.flow_state["best_score_per_island"][island_id] = float("-inf")
|
151 |
+
founder_island_id = np.random.choice(keep_island_ids)
|
152 |
+
founder = self.flow_state["best_score_per_island"][founder_island_id]
|
153 |
+
founder_scores = self.flow_state["best_scores_per_test_per_island"][founder_island_id]
|
154 |
+
self._register_program_in_island(program=founder,island_id=island_id,scores_per_test=founder_scores)
|
155 |
+
|
156 |
+
|
157 |
+
|
158 |
+
def register_program(self, program: AbstractArtifact ,island_id: int,scores_per_test: ScoresPerTest):
|
159 |
+
""" This method registers a program in an island. It also updates the best program if needed.
|
160 |
+
|
161 |
+
:param program: The program to register
|
162 |
+
:type program: AbstractArtifact
|
163 |
+
:param island_id: The id of the island to register the program
|
164 |
+
:type island_id: int
|
165 |
+
:param scores_per_test: The scores per test of the program
|
166 |
+
:type scores_per_test: ScoresPerTest
|
167 |
+
"""
|
168 |
+
if not program.calls_ancestor(artifact_to_evolve=self.artifact_to_evolve_name):
|
169 |
+
#program added at the beggining, so add to all islands
|
170 |
+
if island_id is None:
|
171 |
+
for id in range(self.num_islands):
|
172 |
+
self._register_program_in_island(program=program,island_id=id,scores_per_test=scores_per_test)
|
173 |
+
|
174 |
+
else:
|
175 |
+
self._register_program_in_island(program=program,island_id=island_id,scores_per_test=scores_per_test)
|
176 |
+
|
177 |
+
#reset islands if needed
|
178 |
+
if time.time() - self.flow_state["last_reset_time"]> self.reset_period:
|
179 |
+
self.reset_islands()
|
180 |
+
self.flow_state["last_reset_time"] = time.time()
|
181 |
+
|
182 |
+
def _register_program_in_island(self,program: AbstractArtifact, scores_per_test: ScoresPerTest, island_id: Optional[int] = None):
|
183 |
+
""" This method registers a program in an island. It also updates the best program if needed.
|
184 |
+
|
185 |
+
:param program: The program to register
|
186 |
+
:type program: AbstractArtifact
|
187 |
+
:param scores_per_test: The scores per test of the program
|
188 |
+
:type scores_per_test: ScoresPerTest
|
189 |
+
:param island_id: The id of the island to register the program
|
190 |
+
:type island_id: Optional[int]
|
191 |
+
"""
|
192 |
+
self.flow_state["islands"][island_id].register_program(program,scores_per_test)
|
193 |
+
|
194 |
+
scores_per_test_values = np.array([ score_per_test["score"] for score_per_test in scores_per_test.values()])
|
195 |
+
score = self.reduce_score_method(scores_per_test_values)
|
196 |
+
|
197 |
+
if score > self.flow_state["best_score_per_island"][island_id]:
|
198 |
+
self.flow_state["best_score_per_island"][island_id] = score
|
199 |
+
self.flow_state["best_program_per_island"][island_id] = str(program)
|
200 |
+
self.flow_state["best_scores_per_test_per_island"][island_id] = scores_per_test
|
201 |
+
|
202 |
+
def get_best_programs(self) -> List[Dict[str,Any]]:
|
203 |
+
""" This method returns the best programs per island."""
|
204 |
+
sorted_island_ids = np.argsort(np.array(self.flow_state["best_score_per_island"]))
|
205 |
+
return {
|
206 |
+
"best_island_programs": [
|
207 |
+
{
|
208 |
+
"rank": self.num_islands - rank,
|
209 |
+
"score": self.flow_state["best_score_per_island"][island_id],
|
210 |
+
"program": self.flow_state["best_program_per_island"][island_id],
|
211 |
+
"island_id": int(island_id),
|
212 |
+
}
|
213 |
+
for rank,island_id in enumerate(sorted_island_ids)
|
214 |
+
]
|
215 |
+
}
|
216 |
+
|
217 |
+
|
218 |
+
def run(self,input_message: FlowMessage):
|
219 |
+
""" This method runs the flow. It performs the operation requested in the input message."""
|
220 |
+
input_data = input_message.data
|
221 |
+
operation = input_data["operation"]
|
222 |
+
content = input_data["content"]
|
223 |
+
|
224 |
+
possible_operations = [
|
225 |
+
"register_program",
|
226 |
+
"get_prompt",
|
227 |
+
"get_best_programs_per_island",
|
228 |
+
]
|
229 |
+
|
230 |
+
if operation not in possible_operations:
|
231 |
+
raise ValueError(f"operation must be one of the following: {possible_operations}")
|
232 |
+
|
233 |
+
response = {}
|
234 |
+
if operation == "get_prompt":
|
235 |
+
response["retrieved"] = False
|
236 |
+
|
237 |
+
if not self.flow_state["first_program_registered"]:
|
238 |
+
response["retrieved"] = False
|
239 |
+
|
240 |
+
else:
|
241 |
+
code,version_generated,island_id = self.get_prompt()
|
242 |
+
|
243 |
+
response["retrieved"] = {
|
244 |
+
"code":code,
|
245 |
+
"version_generated": version_generated,
|
246 |
+
"island_id":island_id,
|
247 |
+
"header": self.evaluate_file_full_content
|
248 |
+
}
|
249 |
+
|
250 |
+
elif operation == "register_program":
|
251 |
+
try:
|
252 |
+
|
253 |
+
artifact = text_to_artifact(content["artifact"])
|
254 |
+
island_id = content.get("island_id",None)
|
255 |
+
scores_per_test = content["scores_per_test"]
|
256 |
+
if scores_per_test is not None:
|
257 |
+
self.register_program(program=artifact,island_id=island_id,scores_per_test=scores_per_test)
|
258 |
+
response["retrieved"] = "Program registered"
|
259 |
+
self.flow_state["first_program_registered"] = True
|
260 |
+
except:
|
261 |
+
response["retrieved"] = "Program failed to register"
|
262 |
+
else:
|
263 |
+
response["retrieved"] = self.get_best_programs()
|
264 |
+
|
265 |
+
response["from"] = "ProgramDBFlow"
|
266 |
+
reply = self.package_output_message(
|
267 |
+
input_message,
|
268 |
+
response
|
269 |
+
)
|
270 |
+
|
271 |
+
self.send_message(reply)
|
ProgramDBFlowModule/ProgramDBFlow.yaml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#default configuration of your flow (if needed) goes here
|
2 |
+
_target_: flow_modules.aiflows.FunSearchFlowModule.ProgramDBFlow.instantiate_from_default_config
|
3 |
+
input_interface:
|
4 |
+
- "operation"
|
5 |
+
output_interface:
|
6 |
+
- "retrieved"
|
7 |
+
|
8 |
+
name: ProgramDBFlow
|
9 |
+
description: A flow that saves programs in a database of islands
|
10 |
+
artifact_to_evolve_name: solve_function
|
11 |
+
evaluate_function: ???
|
12 |
+
evaluate_file_full_content: ???
|
13 |
+
num_islands: 3
|
14 |
+
reset_period : 14400 #4 hours in seconds
|
15 |
+
artifacts_per_prompt: 2
|
16 |
+
temperature: 0.1
|
17 |
+
temperature_period: 30000
|
18 |
+
sample_with_replacement: False
|
19 |
+
portion_of_islands_to_reset: 0.5
|
20 |
+
template:
|
21 |
+
preface: ""
|
ProgramDBFlowModule/__init__.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ~~~ Specify the dependencies ~~~
|
2 |
+
# e.g.,
|
3 |
+
# dependencies = [
|
4 |
+
# {"url": "aiflows/AutoGPTFlowModule", "revision": "main"},
|
5 |
+
# ]
|
6 |
+
# Revision can correspond toa branch, commit hash or a absolute path to a local directory (ideal for development)
|
7 |
+
# from aiflows import flow_verse
|
8 |
+
|
9 |
+
# flow_verse.sync_dependencies(dependencies)
|
10 |
+
|
11 |
+
# ~~~ Import of your flow class (if you have any) ~~~
|
12 |
+
# from .NAMEOFYOURFLOW import NAMEOFYOURFLOWCLASS
|
13 |
+
from .ProgramDBFlow import ProgramDBFlow
|
ProgramDBFlowModule/artifacts/__init__.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from .abstract import AbstractArtifact
|
2 |
+
from .function import FunctionArtifact
|
ProgramDBFlowModule/artifacts/abstract.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
|
3 |
+
|
4 |
+
**Citation**:
|
5 |
+
|
6 |
+
@Article{FunSearch2023,
|
7 |
+
author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
|
8 |
+
journal = {Nature},
|
9 |
+
title = {Mathematical discoveries from program search with large language models},
|
10 |
+
year = {2023},
|
11 |
+
doi = {10.1038/s41586-023-06924-6}
|
12 |
+
}
|
13 |
+
"""
|
14 |
+
from typing import Any, Optional
|
15 |
+
import dataclasses
|
16 |
+
|
17 |
+
|
18 |
+
@dataclasses.dataclass
|
19 |
+
class AbstractArtifact:
|
20 |
+
""" Abstract class for artifacts."""
|
21 |
+
|
22 |
+
name: str
|
23 |
+
args: str
|
24 |
+
body: str
|
25 |
+
return_type: Optional[str] = None
|
26 |
+
docstring: Optional[str] = None
|
27 |
+
|
28 |
+
def __str__(self)->str:
|
29 |
+
raise NotImplementedError()
|
30 |
+
|
31 |
+
def __setattr__(self, name: str, value: str) -> None:
|
32 |
+
# Ensure there aren't leading & trailing new lines in `body`.
|
33 |
+
if name == 'body':
|
34 |
+
value = value.strip('\n')
|
35 |
+
# Ensure there aren't leading & trailing quotes in `docstring``.
|
36 |
+
if name == 'docstring' and value is not None:
|
37 |
+
if '"""' in value:
|
38 |
+
value = value.strip()
|
39 |
+
value = value.replace('"""', '')
|
40 |
+
super().__setattr__(name, value)
|
41 |
+
|
42 |
+
def rename_artifact_calls(self, source_name, target_name) -> str:
|
43 |
+
raise NotImplementedError
|
44 |
+
|
45 |
+
def text_to_artifact(self):
|
46 |
+
raise NotImplementedError
|
47 |
+
|
48 |
+
def calls_ancestor(self,artifact_to_evolve: str) -> bool:
|
49 |
+
raise NotImplementedError
|
ProgramDBFlowModule/artifacts/function.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
|
3 |
+
|
4 |
+
**Citation**:
|
5 |
+
|
6 |
+
@Article{FunSearch2023,
|
7 |
+
author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
|
8 |
+
journal = {Nature},
|
9 |
+
title = {Mathematical discoveries from program search with large language models},
|
10 |
+
year = {2023},
|
11 |
+
doi = {10.1038/s41586-023-06924-6}
|
12 |
+
}
|
13 |
+
"""
|
14 |
+
|
15 |
+
from . import AbstractArtifact
|
16 |
+
import dataclasses
|
17 |
+
import tokenize
|
18 |
+
import io
|
19 |
+
from collections.abc import Iterator, MutableSet, Sequence
|
20 |
+
|
21 |
+
@dataclasses.dataclass
|
22 |
+
class FunctionArtifact(AbstractArtifact):
|
23 |
+
def __str__(self) -> str:
|
24 |
+
|
25 |
+
return_type = f' -> {self.return_type}' if self.return_type else ''
|
26 |
+
function = f'def {self.name}({self.args}){return_type}:\n'
|
27 |
+
|
28 |
+
if self.docstring:
|
29 |
+
# self.docstring is already indented on every line except the first one.
|
30 |
+
# Here, we assume the indentation is always two spaces.
|
31 |
+
new_line = '\n' if self.body else ''
|
32 |
+
function += f' """{self.docstring}"""{new_line}'
|
33 |
+
|
34 |
+
# self.body is already indented.
|
35 |
+
function += self.body + '\n\n'
|
36 |
+
return function
|
37 |
+
|
38 |
+
@staticmethod
|
39 |
+
def _tokenize(code: str) -> Iterator[tokenize.TokenInfo]:
|
40 |
+
"""Transforms `code` into Python tokens."""
|
41 |
+
code_bytes = code.encode()
|
42 |
+
code_io = io.BytesIO(code_bytes)
|
43 |
+
return tokenize.tokenize(code_io.readline)
|
44 |
+
|
45 |
+
@staticmethod
|
46 |
+
def _untokenize(tokens: Sequence[tokenize.TokenInfo]) -> str:
|
47 |
+
"""Transforms a list of Python tokens into code."""
|
48 |
+
code_bytes = tokenize.untokenize(tokens)
|
49 |
+
return code_bytes.decode()
|
50 |
+
|
51 |
+
def _get_artifacts_called(self) -> MutableSet[str]:
|
52 |
+
"""Returns the set of all functions called in function."""
|
53 |
+
code = str(self.body)
|
54 |
+
return set(token.string for token, is_call in
|
55 |
+
self._yield_token_and_is_call(code) if is_call)
|
56 |
+
|
57 |
+
def calls_ancestor(self,artifact_to_evolve: str) -> bool:
|
58 |
+
"""Returns whether the generated function is calling an earlier version."""
|
59 |
+
|
60 |
+
for name in self._get_artifacts_called():
|
61 |
+
# In `program` passed into this function the most recently generated
|
62 |
+
# function has already been renamed to `function_to_evolve` (wihout the
|
63 |
+
# suffix). Therefore any function call starting with `function_to_evolve_v`
|
64 |
+
# is a call to an ancestor function.
|
65 |
+
if name.startswith(f'{artifact_to_evolve}_v') and not name.startswith(self.name):
|
66 |
+
return True
|
67 |
+
return False
|
68 |
+
|
69 |
+
|
70 |
+
def _yield_token_and_is_call(cls,code: str) -> Iterator[tuple[tokenize.TokenInfo, bool]]:
|
71 |
+
"""Yields each token with a bool indicating whether it is a function call."""
|
72 |
+
|
73 |
+
tokens = cls._tokenize(code)
|
74 |
+
prev_token = None
|
75 |
+
is_attribute_access = False
|
76 |
+
for token in tokens:
|
77 |
+
if (prev_token and # If the previous token exists and
|
78 |
+
prev_token.type == tokenize.NAME and # it is a Python identifier
|
79 |
+
token.type == tokenize.OP and # and the current token is a delimiter
|
80 |
+
token.string == "("
|
81 |
+
): # and in particular it is '('.
|
82 |
+
yield prev_token, not is_attribute_access
|
83 |
+
is_attribute_access = False
|
84 |
+
else:
|
85 |
+
if prev_token:
|
86 |
+
is_attribute_access = (
|
87 |
+
prev_token.type == tokenize.OP and prev_token.string == '.'
|
88 |
+
)
|
89 |
+
yield prev_token, False
|
90 |
+
|
91 |
+
prev_token = token
|
92 |
+
if prev_token:
|
93 |
+
yield prev_token, False
|
94 |
+
|
95 |
+
|
96 |
+
def rename_artifact_calls(self, source_name, target_name) -> str:
|
97 |
+
implementation = str(self)
|
98 |
+
|
99 |
+
if source_name not in implementation:
|
100 |
+
return implementation
|
101 |
+
|
102 |
+
modified_tokens = []
|
103 |
+
for token, is_call in self._yield_token_and_is_call(implementation):
|
104 |
+
if is_call and token.string == source_name:
|
105 |
+
# Replace the function name token
|
106 |
+
modified_token = tokenize.TokenInfo(
|
107 |
+
type=token.type,
|
108 |
+
string=target_name,
|
109 |
+
start=token.start,
|
110 |
+
end=token.end,
|
111 |
+
line=token.line,
|
112 |
+
)
|
113 |
+
modified_tokens.append(modified_token)
|
114 |
+
else:
|
115 |
+
modified_tokens.append(token)
|
116 |
+
return self._untokenize(modified_tokens)
|
ProgramDBFlowModule/demo.yaml
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#default configuration of your flow (if needed) goes here
|
2 |
+
_target_: flow_modules.aiflows.FunSearchFlowModule.ProgramDBFlow.instantiate_from_default_config
|
3 |
+
input_interface:
|
4 |
+
- "operation"
|
5 |
+
output_interface:
|
6 |
+
- "retrieved"
|
7 |
+
|
8 |
+
name: ProgramDBFlow
|
9 |
+
description: A flow that saves programs in a database of islands
|
10 |
+
artifact_to_evolve_name: solve_function
|
11 |
+
evaluate_function: |2-
|
12 |
+
def evaluate(solve_function: str, tests_inputs: List[str], expected_outputs: str) -> float:\n """Returns the score of the solve function we\'re evolving based on the tests_inputs and expected_outputs.\n Scores are between 0 and 1, unless the program fails to run, in which case the score is -1.\n """\n if solve(solve_function, tests_inputs, expected_outputs) == True:\n return 1.0\n return 0.0
|
13 |
+
|
14 |
+
evaluate_file_full_content: |2-
|
15 |
+
"""Problem Description:\nServal has a string s that only consists of 0 and 1 of length n. The i-th character of s is denoted as s_i, where 1\\leq i\\leq n.\nServal can perform the following operation called Inversion Magic on the string s:\nChoose an segment [l, r] (1\\leq l\\leq r\\leq n). For l\\leq i\\leq r, change s_i into 1 if s_i is 0, and change s_i into 0 if s_i is 1.\nFor example, let s be 010100 and the segment [2,5] is chosen. The string s will be 001010 after performing the Inversion Magic.\nServal wants to make s a palindrome after performing Inversion Magic exactly once. Help him to determine whether it is possible.\nA string is a palindrome iff it reads the same backwards as forwards. For example, 010010 is a palindrome but 10111 is not.\n\nInput Description:\nInput\nEach test contains multiple test cases. The first line contains the number of test cases t (1\\leq t\\leq 10^4). The description of the test cases follows.\nThe first line of each test case contains a single integer n (2\\leq n\\leq 10^5) — the length of string s.\nThe second line of each test case contains a binary string s of length n. Only characters 0 and 1 can appear in s.\nIt\'s guaranteed that the sum of n over all test cases does not exceed 2\\cdot 10^5.\n\nOutput Description:\nOutput\nFor each test case, print Yes if s can be a palindrome after performing Inversion Magic exactly once, and print No if not.\nYou can output Yes and No in any case (for example, strings yEs, yes, Yes and YES will be recognized as a positive response).\n\nPublic Tests:\nTest 1:\n Input: [\'1\', \'4\', \'1001\']\n Output: \'YES\'\nTest 2:\n Input: [\'1\', \'5\', \'10010\']\n Output: \'YES\'\nTest 3:\n Input: [\'1\', \'7\', \'0111011\']\n Output: \'NO\'\n\n"""\n\n\nimport ast\nimport itertools\nimport numpy as np\nfrom typing import List\n\ndef solve(solve_function: str,input: List[str], expected_output: str) -> str:\n """function used to run the solve function on input *kwargs and return the the predicted output\n \n :param solve_function: the function to run (the solve function below as a string)\n :type solve_function: str\n :param kwargs: the inputs to the solve function\n :type kwargs: List[str]\n """\n local_namespace = {}\n exec(solve_function,local_namespace)\n found_name, program_name = get_function_name_from_code(solve_function)\n \n if not found_name:\n raise ValueError(f"Function name not found in program: {solve_function}")\n \n solve_fn = local_namespace.get(program_name)\n \n prediction = solve_fn(input)\n \n prediction = prediction.split()\n expected_output = expected_output.split()\n \n if len(prediction) != len(expected_output):\n raise ValueError(f"Invalid Format of prediction")\n \n for i in range(len(prediction)):\n if prediction[i] != expected_output[i]:\n return False\n \n return True\n\ndef evaluate(solve_function: str, tests_inputs: List[str], expected_outputs: str) -> float:\n """Returns the score of the solve function we\'re evolving based on the tests_inputs and expected_outputs.\n Scores are between 0 and 1, unless the program fails to run, in which case the score is -1.\n """\n if solve(solve_function,tests_inputs,expected_outputs) == True:\n return 1.0\n return 0.0\n\n\ndef get_function_name_from_code(code):\n tree = ast.parse(code)\n for node in ast.walk(tree):\n if isinstance(node, ast.FunctionDef):\n return True, node.name\n\n # something is wrong\n return False, None\n\n\n\n\n \n
|
16 |
+
num_islands: 3
|
17 |
+
reset_period : 14400 #4 hours in seconds
|
18 |
+
artifacts_per_prompt: 2
|
19 |
+
temperature: 0.1
|
20 |
+
temperature_period: 30000
|
21 |
+
sample_with_replacement: False
|
22 |
+
portion_of_islands_to_reset: 0.5
|
23 |
+
template:
|
24 |
+
preface: ""
|
ProgramDBFlowModule/pip_requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
numpy==1.26.2
|
2 |
+
SciPy==1.11.4
|
ProgramDBFlowModule/run.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import hydra
|
4 |
+
|
5 |
+
import aiflows
|
6 |
+
from aiflows.backends.api_info import ApiInfo
|
7 |
+
from aiflows.utils.general_helpers import read_yaml_file, quick_load_api_keys
|
8 |
+
|
9 |
+
from aiflows import logging
|
10 |
+
from aiflows.flow_cache import CACHING_PARAMETERS, clear_cache
|
11 |
+
|
12 |
+
from aiflows.utils import serving
|
13 |
+
from aiflows.workers import run_dispatch_worker_thread
|
14 |
+
from aiflows.messages import FlowMessage
|
15 |
+
from aiflows.interfaces import KeyInterface
|
16 |
+
from aiflows.utils.colink_utils import start_colink_server
|
17 |
+
from aiflows import flow_verse
|
18 |
+
|
19 |
+
|
20 |
+
dependencies = [
|
21 |
+
{
|
22 |
+
"url": "aiflows/FunSearchFlowModule",
|
23 |
+
"revision": os.path.abspath("../")
|
24 |
+
}
|
25 |
+
]
|
26 |
+
flow_verse.sync_dependencies(dependencies)
|
27 |
+
|
28 |
+
logging.set_verbosity_debug()
|
29 |
+
|
30 |
+
|
31 |
+
if __name__ == "__main__":
|
32 |
+
|
33 |
+
cl = start_colink_server()
|
34 |
+
|
35 |
+
serving.recursive_serve_flow(
|
36 |
+
cl=cl,
|
37 |
+
flow_class_name="flow_modules.aiflows.FunSearchFlowModule.ProgramDBFlow",
|
38 |
+
flow_endpoint="ProgramDBFlow",
|
39 |
+
)
|
40 |
+
|
41 |
+
run_dispatch_worker_thread(cl)
|
42 |
+
|
43 |
+
config_overrides = read_yaml_file(os.path.join(".", "demo.yaml"))
|
44 |
+
|
45 |
+
funsearch_proxy = serving.get_flow_instance(
|
46 |
+
cl=cl,
|
47 |
+
flow_endpoint="ProgramDBFlow",
|
48 |
+
config_overrides=config_overrides,
|
49 |
+
)
|
50 |
+
|
51 |
+
data = {
|
52 |
+
'operation': 'register_program',
|
53 |
+
'content': {
|
54 |
+
'artifact': 'def solve_function(input) -> str:\n """Attempt at solving the problem given the input input and returns the predicted output (see the top of the file for problem description)"""\n return \'YES\'\n',
|
55 |
+
'scores_per_test':
|
56 |
+
{
|
57 |
+
'test_1':
|
58 |
+
{'score': 1.0, 'feedback': 'No feedback available.'},
|
59 |
+
'test_2':
|
60 |
+
{'score': 1.0, 'feedback': 'No feedback available.'},
|
61 |
+
'test_3': {'score': 0.0, 'feedback': 'No feedback available.'},
|
62 |
+
'test_4': {'score': -1, 'feedback': 'Invalid Format of prediction'}
|
63 |
+
}
|
64 |
+
}
|
65 |
+
}
|
66 |
+
|
67 |
+
input_message = funsearch_proxy.package_input_message(data = data)
|
68 |
+
funsearch_proxy.send_message(input_message)
|
69 |
+
|
70 |
+
data = {'operation': 'get_prompt', 'content': {}}
|
71 |
+
input_message = funsearch_proxy.package_input_message(data = data)
|
72 |
+
|
73 |
+
example_of_prompt = funsearch_proxy.get_reply_future(input_message).get_data()
|
74 |
+
|
75 |
+
data = {
|
76 |
+
"operation": "get_best_programs_per_island",
|
77 |
+
"content": {}
|
78 |
+
}
|
79 |
+
|
80 |
+
input_message = funsearch_proxy.package_input_message(data = data)
|
81 |
+
|
82 |
+
best_pg_per_island = funsearch_proxy.get_reply_future(input_message).get_data()
|
83 |
+
|
84 |
+
print("~~~Example of Prompt~~~")
|
85 |
+
print(example_of_prompt)
|
86 |
+
|
87 |
+
print("~~~Best Programs Per Island~~~")
|
88 |
+
print(best_pg_per_island)
|
ProgramDBFlowModule/utils.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
|
2 |
+
|
3 |
+
**Citation**:
|
4 |
+
|
5 |
+
@Article{FunSearch2023,
|
6 |
+
author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
|
7 |
+
journal = {Nature},
|
8 |
+
title = {Mathematical discoveries from program search with large language models},
|
9 |
+
year = {2023},
|
10 |
+
doi = {10.1038/s41586-023-06924-6}
|
11 |
+
}
|
12 |
+
"""
|
13 |
+
|
14 |
+
import numpy as np
|
15 |
+
import scipy
|
16 |
+
def add_gaussian_noise(array,mean=0,std=1):
|
17 |
+
return array + np.random.normal(mean,std,array.shape)
|
18 |
+
|
19 |
+
def get_versioned_function_name(function_name,i):
|
20 |
+
pass
|
21 |
+
|
22 |
+
def rename_function(implementation,function_name):
|
23 |
+
pass
|
24 |
+
|
25 |
+
def string_concatenation(strings,split_char='\n\n'):
|
26 |
+
pass
|
27 |
+
|
28 |
+
def make_header_like(implementation,function_name):
|
29 |
+
pass
|
30 |
+
|
31 |
+
def rename_function_calls(implementation,function_name):
|
32 |
+
pass
|
33 |
+
|
34 |
+
def extract_template_from_program(program):
|
35 |
+
pass
|
36 |
+
|
37 |
+
|
38 |
+
def _softmax(logits: np.ndarray, temperature: float, epsilon = 1e-6) -> np.ndarray:
|
39 |
+
"""Returns the tempered softmax of 1D finite `logits`."""
|
40 |
+
if not np.all(np.isfinite(logits)):
|
41 |
+
non_finites = set(logits[~np.isfinite(logits)])
|
42 |
+
raise ValueError(f'`logits` contains non-finite value(s): {non_finites}')
|
43 |
+
if not np.issubdtype(logits.dtype, np.floating):
|
44 |
+
logits = np.array(logits, dtype=np.float32)
|
45 |
+
|
46 |
+
result = scipy.special.softmax(logits / temperature, axis=-1)
|
47 |
+
|
48 |
+
#Non zero mass to prevent zero probabilities
|
49 |
+
result += epsilon # Add epsilon to prevent zeros
|
50 |
+
result /= np.sum(result, axis=-1, keepdims=True) # Normalize
|
51 |
+
|
52 |
+
# Ensure that probabilities sum to 1 to prevent error in `np.random.choice`.
|
53 |
+
index = np.argmax(result)
|
54 |
+
result[index] = 1 - np.sum(result[0:index]) - np.sum(result[index+1:])
|
55 |
+
return result
|
README.md
CHANGED
@@ -1,3 +1,675 @@
|
|
1 |
---
|
2 |
license: mit
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: mit
|
3 |
---
|
4 |
+
# Table of Contents
|
5 |
+
|
6 |
+
* [FunSearch](#FunSearch)
|
7 |
+
* [FunSearch](#FunSearch.FunSearch)
|
8 |
+
* [make\_request\_for\_prompt](#FunSearch.FunSearch.make_request_for_prompt)
|
9 |
+
* [request\_samplers](#FunSearch.FunSearch.request_samplers)
|
10 |
+
* [get\_next\_state](#FunSearch.FunSearch.get_next_state)
|
11 |
+
* [set\_up\_flow\_state](#FunSearch.FunSearch.set_up_flow_state)
|
12 |
+
* [save\_message\_to\_state](#FunSearch.FunSearch.save_message_to_state)
|
13 |
+
* [rename\_key\_message\_in\_state](#FunSearch.FunSearch.rename_key_message_in_state)
|
14 |
+
* [message\_in\_state](#FunSearch.FunSearch.message_in_state)
|
15 |
+
* [get\_message\_from\_state](#FunSearch.FunSearch.get_message_from_state)
|
16 |
+
* [pop\_message\_from\_state](#FunSearch.FunSearch.pop_message_from_state)
|
17 |
+
* [merge\_message\_request\_state](#FunSearch.FunSearch.merge_message_request_state)
|
18 |
+
* [register\_data\_to\_state](#FunSearch.FunSearch.register_data_to_state)
|
19 |
+
* [call\_program\_db](#FunSearch.FunSearch.call_program_db)
|
20 |
+
* [call\_evaluator](#FunSearch.FunSearch.call_evaluator)
|
21 |
+
* [call\_sampler](#FunSearch.FunSearch.call_sampler)
|
22 |
+
* [generate\_reply](#FunSearch.FunSearch.generate_reply)
|
23 |
+
* [run](#FunSearch.FunSearch.run)
|
24 |
+
* [ProgramDBFlowModule](#ProgramDBFlowModule)
|
25 |
+
* [ProgramDBFlowModule.ProgramDBFlow](#ProgramDBFlowModule.ProgramDBFlow)
|
26 |
+
* [ProgramDBFlow](#ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow)
|
27 |
+
* [set\_up\_flow\_state](#ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.set_up_flow_state)
|
28 |
+
* [get\_prompt](#ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.get_prompt)
|
29 |
+
* [reset\_islands](#ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.reset_islands)
|
30 |
+
* [register\_program](#ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.register_program)
|
31 |
+
* [get\_best\_programs](#ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.get_best_programs)
|
32 |
+
* [run](#ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.run)
|
33 |
+
* [SamplerFlowModule](#SamplerFlowModule)
|
34 |
+
* [SamplerFlowModule.SamplerFlow](#SamplerFlowModule.SamplerFlow)
|
35 |
+
* [SamplerFlow](#SamplerFlowModule.SamplerFlow.SamplerFlow)
|
36 |
+
* [run](#SamplerFlowModule.SamplerFlow.SamplerFlow.run)
|
37 |
+
* [EvaluatorFlowModule](#EvaluatorFlowModule)
|
38 |
+
* [EvaluatorFlowModule.EvaluatorFlow](#EvaluatorFlowModule.EvaluatorFlow)
|
39 |
+
* [EvaluatorFlow](#EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow)
|
40 |
+
* [load\_functions](#EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.load_functions)
|
41 |
+
* [run\_function\_with\_timeout](#EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.run_function_with_timeout)
|
42 |
+
* [evaluate\_program](#EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.evaluate_program)
|
43 |
+
* [analyse](#EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.analyse)
|
44 |
+
* [run](#EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.run)
|
45 |
+
|
46 |
+
<a id="FunSearch"></a>
|
47 |
+
|
48 |
+
# FunSearch
|
49 |
+
|
50 |
+
<a id="FunSearch.FunSearch"></a>
|
51 |
+
|
52 |
+
## FunSearch Objects
|
53 |
+
|
54 |
+
```python
|
55 |
+
class FunSearch(CompositeFlow)
|
56 |
+
```
|
57 |
+
|
58 |
+
This class implements FunSearch. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch) . It's a Flow in charge of starting, stopping and managing (passing around messages) the FunSearch process. It passes messages around to the following subflows:
|
59 |
+
|
60 |
+
- ProgramDBFlow: which is in charge of storing and retrieving programs.
|
61 |
+
- SamplerFlow: which is in charge of sampling programs.
|
62 |
+
- EvaluatorFlow: which is in charge of evaluating programs.
|
63 |
+
|
64 |
+
*Configuration Parameters*:
|
65 |
+
|
66 |
+
- `name` (str): The name of the flow. Default: "FunSearchFlow".
|
67 |
+
- `description` (str): The description of the flow. Default: "A flow implementing FunSearch"
|
68 |
+
- `subflows_config` (Dict[str,Any]): A dictionary of subflows configurations. Default:
|
69 |
+
- `ProgramDBFlow`: By default, it uses the `ProgramDBFlow` class and uses its default parameters.
|
70 |
+
- `SamplerFlow`: By default, it uses the `SamplerFlow` class and uses its default parameters.
|
71 |
+
- `EvaluatorFlow`: By default, it uses the `EvaluatorFlow` class and uses its default parameters.
|
72 |
+
|
73 |
+
**Input Interface**:
|
74 |
+
|
75 |
+
- `from` (str): The flow from which the message is coming from. It can be one of the following: "FunSearch", "SamplerFlow", "EvaluatorFlow", "ProgramDBFlow".
|
76 |
+
- `operation` (str): The operation to perform. It can be one of the following: "start", "stop", "get_prompt", "get_best_programs_per_island", "register_program".
|
77 |
+
- `content` (Dict[str,Any]): The content associated to an operation. Here is the expected content for each operation:
|
78 |
+
- "start":
|
79 |
+
- `num_samplers` (int): The number of samplers to start up. Note that it's still restricted by the number of workers available. Default: 1.
|
80 |
+
- "stop":
|
81 |
+
- No content. Pass either an empty dictionary or None. Works also with no content.
|
82 |
+
- "get_prompt":
|
83 |
+
- No content. Pass either an empty dictionary or None. Works also with no content.
|
84 |
+
- "get_best_programs_per_island":
|
85 |
+
- No content. Pass either an empty dictionary or None. Works also with no content.
|
86 |
+
|
87 |
+
**Output Interface**:
|
88 |
+
|
89 |
+
- `retrieved` (Dict[str,Any]): The retrieved data.
|
90 |
+
|
91 |
+
**Citation**:
|
92 |
+
|
93 |
+
@Article{FunSearch2023,
|
94 |
+
author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
|
95 |
+
journal = {Nature},
|
96 |
+
title = {Mathematical discoveries from program search with large language models},
|
97 |
+
year = {2023},
|
98 |
+
doi = {10.1038/s41586-023-06924-6}
|
99 |
+
}
|
100 |
+
|
101 |
+
<a id="FunSearch.FunSearch.make_request_for_prompt"></a>
|
102 |
+
|
103 |
+
#### make\_request\_for\_prompt
|
104 |
+
|
105 |
+
```python
|
106 |
+
def make_request_for_prompt()
|
107 |
+
```
|
108 |
+
|
109 |
+
This method makes a request for a prompt. It sends a message to itself with the operation "get_prompt" which will trigger the flow to call the `ProgramDBFlow` to get a prompt.
|
110 |
+
|
111 |
+
<a id="FunSearch.FunSearch.request_samplers"></a>
|
112 |
+
|
113 |
+
#### request\_samplers
|
114 |
+
|
115 |
+
```python
|
116 |
+
def request_samplers(input_message: FlowMessage)
|
117 |
+
```
|
118 |
+
|
119 |
+
This method requests samplers. It sends a message to itself with the operation "get_prompt" which will trigger the flow to call the `ProgramDBFlow` to get a prompt.
|
120 |
+
|
121 |
+
**Arguments**:
|
122 |
+
|
123 |
+
- `input_message` (`FlowMessage`): The input message that triggered the request for samplers.
|
124 |
+
|
125 |
+
<a id="FunSearch.FunSearch.get_next_state"></a>
|
126 |
+
|
127 |
+
#### get\_next\_state
|
128 |
+
|
129 |
+
```python
|
130 |
+
def get_next_state(input_message: FlowMessage)
|
131 |
+
```
|
132 |
+
|
133 |
+
This method determines the next state of the flow based on the input message. It will return the next state based on the current state and the message received.
|
134 |
+
|
135 |
+
**Arguments**:
|
136 |
+
|
137 |
+
- `input_message` (`FlowMessage`): The input message that triggered the request for the next state.
|
138 |
+
|
139 |
+
**Returns**:
|
140 |
+
|
141 |
+
`str`: The next state of the flow.
|
142 |
+
|
143 |
+
<a id="FunSearch.FunSearch.set_up_flow_state"></a>
|
144 |
+
|
145 |
+
#### set\_up\_flow\_state
|
146 |
+
|
147 |
+
```python
|
148 |
+
def set_up_flow_state()
|
149 |
+
```
|
150 |
+
|
151 |
+
This method sets up the state of the flow. It's called at the beginning of the flow.
|
152 |
+
|
153 |
+
<a id="FunSearch.FunSearch.save_message_to_state"></a>
|
154 |
+
|
155 |
+
#### save\_message\_to\_state
|
156 |
+
|
157 |
+
```python
|
158 |
+
def save_message_to_state(msg_id: str, message: FlowMessage)
|
159 |
+
```
|
160 |
+
|
161 |
+
This method saves a message to the state of the flow. It's used to keep track of state on a per message basis (i.e., state of the flow depending on the message received and id).
|
162 |
+
|
163 |
+
**Arguments**:
|
164 |
+
|
165 |
+
- `msg_id` (`str`): The id of the message to save.
|
166 |
+
- `message` (`FlowMessage`): The message to save.
|
167 |
+
|
168 |
+
<a id="FunSearch.FunSearch.rename_key_message_in_state"></a>
|
169 |
+
|
170 |
+
#### rename\_key\_message\_in\_state
|
171 |
+
|
172 |
+
```python
|
173 |
+
def rename_key_message_in_state(old_key: str, new_key: str)
|
174 |
+
```
|
175 |
+
|
176 |
+
This method renames a key in the state of the flow in the "msg_requests" dictonary. It's used to rename a key in the state of the flow (i.e., rename a message id).
|
177 |
+
|
178 |
+
**Arguments**:
|
179 |
+
|
180 |
+
- `old_key` (`str`): The old key to rename.
|
181 |
+
- `new_key` (`str`): The new key to rename to.
|
182 |
+
|
183 |
+
<a id="FunSearch.FunSearch.message_in_state"></a>
|
184 |
+
|
185 |
+
#### message\_in\_state
|
186 |
+
|
187 |
+
```python
|
188 |
+
def message_in_state(msg_id: str) -> bool
|
189 |
+
```
|
190 |
+
|
191 |
+
This method checks if a message is in the state of the flow (in "msg_requests" dictionary). It returns True if the message is in the state, otherwise it returns False.
|
192 |
+
|
193 |
+
**Arguments**:
|
194 |
+
|
195 |
+
- `msg_id` (`str`): The id of the message to check if it's in the state.
|
196 |
+
|
197 |
+
**Returns**:
|
198 |
+
|
199 |
+
`bool`: True if the message is in the state, otherwise False.
|
200 |
+
|
201 |
+
<a id="FunSearch.FunSearch.get_message_from_state"></a>
|
202 |
+
|
203 |
+
#### get\_message\_from\_state
|
204 |
+
|
205 |
+
```python
|
206 |
+
def get_message_from_state(msg_id: str) -> Dict[str, Any]
|
207 |
+
```
|
208 |
+
|
209 |
+
This method returns the state associated with a message id in the state of the flow (in "msg_requests" dictionary).
|
210 |
+
|
211 |
+
**Arguments**:
|
212 |
+
|
213 |
+
- `msg_id` (`str`): The id of the message to get the state from.
|
214 |
+
|
215 |
+
**Returns**:
|
216 |
+
|
217 |
+
`Dict[str,Any]`: The state associated with the message id.
|
218 |
+
|
219 |
+
<a id="FunSearch.FunSearch.pop_message_from_state"></a>
|
220 |
+
|
221 |
+
#### pop\_message\_from\_state
|
222 |
+
|
223 |
+
```python
|
224 |
+
def pop_message_from_state(msg_id: str) -> Dict[str, Any]
|
225 |
+
```
|
226 |
+
|
227 |
+
This method pops a message from the state of the flow (in "msg_requests" dictionary). It the state associate to a message and removes it from the state.
|
228 |
+
|
229 |
+
**Arguments**:
|
230 |
+
|
231 |
+
- `msg_id` (`str`): The id of the message to pop from the state.
|
232 |
+
|
233 |
+
**Returns**:
|
234 |
+
|
235 |
+
`Dict[str,Any]`: The state associated with the message id.
|
236 |
+
|
237 |
+
<a id="FunSearch.FunSearch.merge_message_request_state"></a>
|
238 |
+
|
239 |
+
#### merge\_message\_request\_state
|
240 |
+
|
241 |
+
```python
|
242 |
+
def merge_message_request_state(id: str, new_states: Dict[str, Any])
|
243 |
+
```
|
244 |
+
|
245 |
+
This method merges new states to a message in the state of the flow (in "msg_requests" dictionary). It merges new states to a message in the state.
|
246 |
+
|
247 |
+
**Arguments**:
|
248 |
+
|
249 |
+
- `id` (`str`): The id of the message to merge new states to.
|
250 |
+
- `new_states` (`Dict[str,Any]`): The new states to merge to the message.
|
251 |
+
|
252 |
+
<a id="FunSearch.FunSearch.register_data_to_state"></a>
|
253 |
+
|
254 |
+
#### register\_data\_to\_state
|
255 |
+
|
256 |
+
```python
|
257 |
+
def register_data_to_state(input_message: FlowMessage)
|
258 |
+
```
|
259 |
+
|
260 |
+
This method registers the input message data to the flow state. It's called everytime a new input message is received.
|
261 |
+
|
262 |
+
**Arguments**:
|
263 |
+
|
264 |
+
- `input_message` (`FlowMessage`): The input message
|
265 |
+
|
266 |
+
<a id="FunSearch.FunSearch.call_program_db"></a>
|
267 |
+
|
268 |
+
#### call\_program\_db
|
269 |
+
|
270 |
+
```python
|
271 |
+
def call_program_db(input_message)
|
272 |
+
```
|
273 |
+
|
274 |
+
This method calls the ProgramDBFlow. It sends a message to the ProgramDBFlow with the data of the input message.
|
275 |
+
|
276 |
+
**Arguments**:
|
277 |
+
|
278 |
+
- `input_message` (`FlowMessage`): The input message to send to the ProgramDBFlow.
|
279 |
+
|
280 |
+
<a id="FunSearch.FunSearch.call_evaluator"></a>
|
281 |
+
|
282 |
+
#### call\_evaluator
|
283 |
+
|
284 |
+
```python
|
285 |
+
def call_evaluator(input_message)
|
286 |
+
```
|
287 |
+
|
288 |
+
This method calls the EvaluatorFlow. It sends a message to the EvaluatorFlow with the data of the input message.
|
289 |
+
|
290 |
+
**Arguments**:
|
291 |
+
|
292 |
+
- `input_message` (`FlowMessage`): The input message to send to the EvaluatorFlow.
|
293 |
+
|
294 |
+
<a id="FunSearch.FunSearch.call_sampler"></a>
|
295 |
+
|
296 |
+
#### call\_sampler
|
297 |
+
|
298 |
+
```python
|
299 |
+
def call_sampler(input_message)
|
300 |
+
```
|
301 |
+
|
302 |
+
This method calls the SamplerFlow. It sends a message to the SamplerFlow with the data of the input message.
|
303 |
+
|
304 |
+
**Arguments**:
|
305 |
+
|
306 |
+
- `input_message` (`FlowMessage`): The input message to send to the SamplerFlow.
|
307 |
+
|
308 |
+
<a id="FunSearch.FunSearch.generate_reply"></a>
|
309 |
+
|
310 |
+
#### generate\_reply
|
311 |
+
|
312 |
+
```python
|
313 |
+
def generate_reply(input_message: FlowMessage)
|
314 |
+
```
|
315 |
+
|
316 |
+
This method generates a reply to a message sent to user. It packages the output message and sends it.
|
317 |
+
|
318 |
+
**Arguments**:
|
319 |
+
|
320 |
+
- `input_message` (`FlowMessage`): The input message to generate a reply to.
|
321 |
+
|
322 |
+
<a id="FunSearch.FunSearch.run"></a>
|
323 |
+
|
324 |
+
#### run
|
325 |
+
|
326 |
+
```python
|
327 |
+
def run(input_message: FlowMessage)
|
328 |
+
```
|
329 |
+
|
330 |
+
This method runs the flow. It's the main method of the flow. It's called when the flow is executed.
|
331 |
+
|
332 |
+
<a id="ProgramDBFlowModule"></a>
|
333 |
+
|
334 |
+
# ProgramDBFlowModule
|
335 |
+
|
336 |
+
<a id="ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow"></a>
|
337 |
+
|
338 |
+
## ProgramDBFlow Objects
|
339 |
+
|
340 |
+
```python
|
341 |
+
class ProgramDBFlow(AtomicFlow)
|
342 |
+
```
|
343 |
+
|
344 |
+
This class implements a ProgramDBFlow. It's a flow that stores programs and their scores in a database. It can also query the database for the best programs or generate a prompt containing stored programs in order to evolve them with a SamplerFlow. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
|
345 |
+
|
346 |
+
**Configuration Parameters**:
|
347 |
+
|
348 |
+
- `name` (str): The name of the flow. Default: "ProgramDBFlow"
|
349 |
+
- `description` (str): A description of the flow. This description is used to generate the help message of the flow. Default: " A flow that saves programs in a database of islands"
|
350 |
+
- `artifact_to_evolve_name` (str): The name of the artifact/program to evolve. Default: "solve_function"
|
351 |
+
- `evaluate_function` (str): The function used to evaluate the program. No Default value. This MUST be passed as a parameter.
|
352 |
+
- `evaluate_file_full_content` (str): The full content of the file containing the evaluation function. No Default value. This MUST be passed as a parameter.
|
353 |
+
- `num_islands`: The number of islands to use. Default: 3
|
354 |
+
- `reset_period`: The period in seconds to reset the islands. Default: 3600
|
355 |
+
- `artifacts_per_prompt`: The number of previous artifacts/programs to include in a prompt. Default: 2
|
356 |
+
- `temperature`: The temperature of the island. Default: 0.1
|
357 |
+
- `temperature_period`: The period in seconds to change the temperature. Default: 30000
|
358 |
+
- `sample_with_replacement`: Whether to sample with replacement. Default: False
|
359 |
+
- `portion_of_islands_to_reset`: The portion of islands to reset. Default: 0.5
|
360 |
+
- `template` (dict): The template to use for a program. Default: {"preface": ""}
|
361 |
+
|
362 |
+
**Input Interface**:
|
363 |
+
|
364 |
+
- `operation` (str): The operation to perform. It can be one of the following: ["register_program","get_prompt","get_best_programs_per_island"]
|
365 |
+
|
366 |
+
**Output Interface**:
|
367 |
+
|
368 |
+
- `retrieved` (Any): The retrieved data. It can be one of the following:
|
369 |
+
- If the operation is "get_prompt", it can be a dictionary with the following keys
|
370 |
+
- `code` (str): The code of the prompt
|
371 |
+
- `version_generated` (int): The version of the prompt generated
|
372 |
+
- `island_id` (int): The id of the island that generated the prompt
|
373 |
+
- `header` (str): The header of the prompt
|
374 |
+
- If the operation is "register_program", it can be a string with the message "Program registered" or "Program failed to register"
|
375 |
+
- If the operation is "get_best_programs_per_island", it can be a dictionary with the following keys:
|
376 |
+
- `best_island_programs` (List[Dict[str,Any]]): A list of dictionaries with the following keys:
|
377 |
+
- `rank` (int): The rank of the program (1 is the best)
|
378 |
+
- `score` (float): The score of the program
|
379 |
+
- `program` (str): The program
|
380 |
+
- `island_id` (int): The id of the island that generated the program
|
381 |
+
|
382 |
+
**Citation**:
|
383 |
+
|
384 |
+
@Article{FunSearch2023,
|
385 |
+
author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
|
386 |
+
journal = {Nature},
|
387 |
+
title = {Mathematical discoveries from program search with large language models},
|
388 |
+
year = {2023},
|
389 |
+
doi = {10.1038/s41586-023-06924-6}
|
390 |
+
}
|
391 |
+
|
392 |
+
<a id="ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.set_up_flow_state"></a>
|
393 |
+
|
394 |
+
#### set\_up\_flow\_state
|
395 |
+
|
396 |
+
```python
|
397 |
+
def set_up_flow_state()
|
398 |
+
```
|
399 |
+
|
400 |
+
This method sets up the state of the flow and clears the previous messages.
|
401 |
+
|
402 |
+
<a id="ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.get_prompt"></a>
|
403 |
+
|
404 |
+
#### get\_prompt
|
405 |
+
|
406 |
+
```python
|
407 |
+
def get_prompt()
|
408 |
+
```
|
409 |
+
|
410 |
+
This method gets a prompt from an island. It returns the code, the version generated and the island id.
|
411 |
+
|
412 |
+
<a id="ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.reset_islands"></a>
|
413 |
+
|
414 |
+
#### reset\_islands
|
415 |
+
|
416 |
+
```python
|
417 |
+
def reset_islands()
|
418 |
+
```
|
419 |
+
|
420 |
+
This method resets the islands. It resets the worst islands and copies the best programs to the worst islands.
|
421 |
+
|
422 |
+
<a id="ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.register_program"></a>
|
423 |
+
|
424 |
+
#### register\_program
|
425 |
+
|
426 |
+
```python
|
427 |
+
def register_program(program: AbstractArtifact, island_id: int,
|
428 |
+
scores_per_test: ScoresPerTest)
|
429 |
+
```
|
430 |
+
|
431 |
+
This method registers a program in an island. It also updates the best program if needed.
|
432 |
+
|
433 |
+
**Arguments**:
|
434 |
+
|
435 |
+
- `program` (`AbstractArtifact`): The program to register
|
436 |
+
- `island_id` (`int`): The id of the island to register the program
|
437 |
+
- `scores_per_test` (`ScoresPerTest`): The scores per test of the program
|
438 |
+
|
439 |
+
<a id="ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.get_best_programs"></a>
|
440 |
+
|
441 |
+
#### get\_best\_programs
|
442 |
+
|
443 |
+
```python
|
444 |
+
def get_best_programs() -> List[Dict[str, Any]]
|
445 |
+
```
|
446 |
+
|
447 |
+
This method returns the best programs per island.
|
448 |
+
|
449 |
+
<a id="ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.run"></a>
|
450 |
+
|
451 |
+
#### run
|
452 |
+
|
453 |
+
```python
|
454 |
+
def run(input_message: FlowMessage)
|
455 |
+
```
|
456 |
+
|
457 |
+
This method runs the flow. It performs the operation requested in the input message.
|
458 |
+
|
459 |
+
<a id="SamplerFlowModule"></a>
|
460 |
+
|
461 |
+
# SamplerFlowModule
|
462 |
+
|
463 |
+
<a id="SamplerFlowModule.SamplerFlow"></a>
|
464 |
+
|
465 |
+
# SamplerFlowModule.SamplerFlow
|
466 |
+
|
467 |
+
<a id="SamplerFlowModule.SamplerFlow.SamplerFlow"></a>
|
468 |
+
|
469 |
+
## SamplerFlow Objects
|
470 |
+
|
471 |
+
```python
|
472 |
+
class SamplerFlow(ChatAtomicFlow)
|
473 |
+
```
|
474 |
+
|
475 |
+
This class implements a SamplerFlow. It is a flow that queries a LLM to generate a response to a given input. This class is a child of ChatAtomicFlow.
|
476 |
+
and expects the same parameters as ChatAtomicFlow (see https://huggingface.co/aiflows/ChatFlowModule).
|
477 |
+
|
478 |
+
**Configuration Parameters**:
|
479 |
+
- `name` (str): The name of the flow. Default: "SamplerFlowModule"
|
480 |
+
- `description` (str): A description of the flow. Default: "A flow that queries an LLM model to generate prompts for the Sampler flow"
|
481 |
+
- `backend` Dict[str,Any]: The backend of the flow. Used to call models via an API.
|
482 |
+
See litellm's supported models and APIs here: https://docs.litellm.ai/docs/providers.
|
483 |
+
The default parameters of the backend are all defined at aiflows.backends.llm_lite.LiteLLMBackend
|
484 |
+
(also see the defaul parameters of litellm's completion parameters: https://docs.litellm.ai/docs/completion/input#input-params-1).
|
485 |
+
Except for the following parameters who are overwritten by the ChatAtomicFlow in ChatAtomicFlow.yaml:
|
486 |
+
- `model_name` (Union[Dict[str,str],str]): The name of the model to use. Default: "gpt-4"
|
487 |
+
When using multiple API providers, the model_name can be a dictionary of the form
|
488 |
+
{"provider_name": "model_name"}. E.g. {"openai": "gpt-3.5-turbo", "azure": "azure/gpt-3.5-turbo"}
|
489 |
+
Default: "gpt-3.5-turbo" (the name needs to follow the name of the model in litellm https://docs.litellm.ai/docs/providers).
|
490 |
+
- `n` (int) : The number of answers to generate. Default: 1
|
491 |
+
- `max_tokens` (int): The maximum number of tokens to generate. Default: 2000
|
492 |
+
- `temperature` float: The temperature of the generation. Default: 1.0
|
493 |
+
- `top_p` float: An alternative to sampling with temperature. It instructs the model to consider the results of
|
494 |
+
the tokens with top_p probability. Default: 0.4
|
495 |
+
- `frequency_penalty` (number): It is used to penalize new tokens based on their frequency in the text so far. Default: 0.0
|
496 |
+
- `presence_penalty` (number): It is used to penalize new tokens based on their existence in the text so far. Default: 0.0
|
497 |
+
- `stream` (bool): Whether to stream the response or not. Default: false
|
498 |
+
- `system_message_prompt_template` (Dict[str,Any]): The template of the system message. It is used to generate the system message. Default: See SamplerFlow.yaml for default.
|
499 |
+
- `init_human_message_prompt_template` (Dict[str,Any]): The prompt template of the human/user message used to initialize the conversation
|
500 |
+
(first time in). It is used to generate the human message. It's passed as the user message to the LLM. Default: See SamplerFlow.yaml for default.
|
501 |
+
- `human_message_prompt_template` (Dict[str,Any]): The prompt template of the human/user message (message used everytime the except the first time in).
|
502 |
+
It's passed as the user message to the LLM. Default: See SamplerFlow.yaml for default.
|
503 |
+
- `previous_messages` (Dict[str,Any]): Defines which previous messages to include in the input of the LLM. Note that if `first_k`and `last_k` are both none,
|
504 |
+
all the messages of the flows's history are added to the input of the LLM. Default:
|
505 |
+
- `first_k` (int): If defined, adds the first_k earliest messages of the flow's chat history to the input of the LLM. Default: 1
|
506 |
+
- `last_k` (int): If defined, adds the last_k latest messages of the flow's chat history to the input of the LLM. Default: 1
|
507 |
+
|
508 |
+
*Input Interface Initialized (Expected input the first time in flow)*:
|
509 |
+
|
510 |
+
- `header` (str): A header message to include in prompt
|
511 |
+
- `code` (str): The "example" samples to generate our new sample from.
|
512 |
+
|
513 |
+
*Input Interface (Expected input the after the first time in flow)*:
|
514 |
+
|
515 |
+
- `header` (str): A header message to include in prompt
|
516 |
+
- `code` (str): The "example" samples to generate our new sample from.
|
517 |
+
|
518 |
+
*Output Interface*:
|
519 |
+
|
520 |
+
- `api_output` (str): The output of the API call. It is the response of the LLM to the input.
|
521 |
+
- `from` (str): The name of the flow that generated the output. It's always "SamplerFlow"
|
522 |
+
|
523 |
+
|
524 |
+
**Citation**:
|
525 |
+
|
526 |
+
@Article{FunSearch2023,
|
527 |
+
author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
|
528 |
+
journal = {Nature},
|
529 |
+
title = {Mathematical discoveries from program search with large language models},
|
530 |
+
year = {2023},
|
531 |
+
doi = {10.1038/s41586-023-06924-6}
|
532 |
+
}
|
533 |
+
|
534 |
+
<a id="SamplerFlowModule.SamplerFlow.SamplerFlow.run"></a>
|
535 |
+
|
536 |
+
#### run
|
537 |
+
|
538 |
+
```python
|
539 |
+
def run(input_message)
|
540 |
+
```
|
541 |
+
|
542 |
+
This method calls the backend of the flow (so queries the LLM). It calls the backend with the previous messages of the flow.
|
543 |
+
|
544 |
+
**Returns**:
|
545 |
+
|
546 |
+
`Any`: The output of the backend.
|
547 |
+
|
548 |
+
<a id="EvaluatorFlowModule"></a>
|
549 |
+
|
550 |
+
# EvaluatorFlowModule
|
551 |
+
|
552 |
+
<a id="EvaluatorFlowModule.EvaluatorFlow"></a>
|
553 |
+
|
554 |
+
# EvaluatorFlowModule.EvaluatorFlow
|
555 |
+
|
556 |
+
|
557 |
+
<a id="EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow"></a>
|
558 |
+
|
559 |
+
## EvaluatorFlow Objects
|
560 |
+
|
561 |
+
```python
|
562 |
+
class EvaluatorFlow(AtomicFlow)
|
563 |
+
```
|
564 |
+
|
565 |
+
This class implements an EvaluatorFlow. It is a flow that evaluates a program (python code) using a given evaluator function. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
|
566 |
+
|
567 |
+
**Configuration Parameters**:
|
568 |
+
|
569 |
+
- `name` (str): The name of the flow. Default: "EvaluatorFlow"
|
570 |
+
- `description` (str): A description of the flow. This description is used to generate the help message of the flow. Default: "A flow that evaluates code on tests"
|
571 |
+
- `py_file` (str): The python code containing the evaluation function. No default value. This MUST be passed as a parameter.
|
572 |
+
- `function_to_run_name` (str): The name of the function to run (the evaluation function) in the evaluator file. No default value. This MUST be passed as a parameter.
|
573 |
+
- `test_inputs` (Dict[str,Any]): A dictionary of test inputs to evaluate the program. Default: {"test1": None, "test2": None}
|
574 |
+
- `timeout_seconds` (int): The maximum number of seconds to run the evaluation function before returning an error. Default: 10
|
575 |
+
- `run_error_score` (int): The score to return if the evaluation function fails to run. Default: -100
|
576 |
+
- `use_test_input_as_key` (bool): Whether to use the test input parameters as the key in the output dictionary. Default: False
|
577 |
+
|
578 |
+
**Input Interface**:
|
579 |
+
|
580 |
+
- `artifact` (str): The program/artifact to evaluate.
|
581 |
+
|
582 |
+
**Output Interface**:
|
583 |
+
|
584 |
+
- `scores_per_test` (Dict[str, Dict[str, Any]]): A dictionary of scores per test input.
|
585 |
+
|
586 |
+
**Citation**:
|
587 |
+
|
588 |
+
@Article{FunSearch2023,
|
589 |
+
author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
|
590 |
+
journal = {Nature},
|
591 |
+
title = {Mathematical discoveries from program search with large language models},
|
592 |
+
year = {2023},
|
593 |
+
doi = {10.1038/s41586-023-06924-6}
|
594 |
+
}
|
595 |
+
|
596 |
+
<a id="EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.load_functions"></a>
|
597 |
+
|
598 |
+
#### load\_functions
|
599 |
+
|
600 |
+
```python
|
601 |
+
def load_functions()
|
602 |
+
```
|
603 |
+
|
604 |
+
Load the functions from the evaluator py file with ast parsing
|
605 |
+
|
606 |
+
<a id="EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.run_function_with_timeout"></a>
|
607 |
+
|
608 |
+
#### run\_function\_with\_timeout
|
609 |
+
|
610 |
+
```python
|
611 |
+
def run_function_with_timeout(program: str, **kwargs)
|
612 |
+
```
|
613 |
+
|
614 |
+
Run the evaluation function with a timeout
|
615 |
+
|
616 |
+
**Arguments**:
|
617 |
+
|
618 |
+
- `program` (`str`): The program to evaluate
|
619 |
+
- `kwargs` (`Dict[str, Any]`): The keyword arguments to pass to the evaluation function
|
620 |
+
|
621 |
+
**Returns**:
|
622 |
+
|
623 |
+
`Tuple[bool, Any]`: A tuple (bool, result) where bool is True if the function ran successfully and result is the output of the function
|
624 |
+
|
625 |
+
<a id="EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.evaluate_program"></a>
|
626 |
+
|
627 |
+
#### evaluate\_program
|
628 |
+
|
629 |
+
```python
|
630 |
+
def evaluate_program(program: str, **kwargs)
|
631 |
+
```
|
632 |
+
|
633 |
+
Evaluate the program using the evaluation function
|
634 |
+
|
635 |
+
**Arguments**:
|
636 |
+
|
637 |
+
- `program` (`str`): The program to evaluate
|
638 |
+
- `kwargs` (`Dict[str, Any]`): The keyword arguments to pass to the evaluation function
|
639 |
+
|
640 |
+
**Returns**:
|
641 |
+
|
642 |
+
`Tuple[bool, Any]`: A tuple (bool, result) where bool is True if the function ran successfully and result is the output of the function
|
643 |
+
|
644 |
+
<a id="EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.analyse"></a>
|
645 |
+
|
646 |
+
#### analyse
|
647 |
+
|
648 |
+
```python
|
649 |
+
def analyse(program: str)
|
650 |
+
```
|
651 |
+
|
652 |
+
Analyse the program on the test inputs
|
653 |
+
|
654 |
+
**Arguments**:
|
655 |
+
|
656 |
+
- `program` (`str`): The program to evaluate
|
657 |
+
|
658 |
+
**Returns**:
|
659 |
+
|
660 |
+
`Dict[str, Dict[str, Any]]`: A dictionary of scores per test input
|
661 |
+
|
662 |
+
<a id="EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.run"></a>
|
663 |
+
|
664 |
+
#### run
|
665 |
+
|
666 |
+
```python
|
667 |
+
def run(input_message: FlowMessage)
|
668 |
+
```
|
669 |
+
|
670 |
+
This method runs the flow. It's the main method of the flow.
|
671 |
+
|
672 |
+
**Arguments**:
|
673 |
+
|
674 |
+
- `input_message` (`FlowMessage`): The input message
|
675 |
+
|
SamplerFlowModule/SamplerFlow.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" NOTE: THIS IS A BETA VERSION OF FUNSEARCH. NEW VERSION DOCUMENTATION WILL BE RELEASED SOON."""
|
2 |
+
|
3 |
+
from typing import Dict, Any, List
|
4 |
+
from aiflows.interfaces import KeyInterface
|
5 |
+
from aiflows.utils import logging
|
6 |
+
from flow_modules.aiflows.ChatFlowModule import ChatAtomicFlow
|
7 |
+
|
8 |
+
log = logging.get_logger(f"aiflows.{__name__}")
|
9 |
+
|
10 |
+
|
11 |
+
class SamplerFlow(ChatAtomicFlow):
|
12 |
+
""" This class implements a SamplerFlow. It is a flow that queries a LLM to generate a response to a given input. This class is a child of ChatAtomicFlow.
|
13 |
+
and expects the same parameters as ChatAtomicFlow (see https://huggingface.co/aiflows/ChatFlowModule).
|
14 |
+
|
15 |
+
**Configuration Parameters**:
|
16 |
+
- `name` (str): The name of the flow. Default: "SamplerFlowModule"
|
17 |
+
- `description` (str): A description of the flow. Default: "A flow that queries an LLM model to generate prompts for the Sampler flow"
|
18 |
+
- `backend` Dict[str,Any]: The backend of the flow. Used to call models via an API.
|
19 |
+
See litellm's supported models and APIs here: https://docs.litellm.ai/docs/providers.
|
20 |
+
The default parameters of the backend are all defined at aiflows.backends.llm_lite.LiteLLMBackend
|
21 |
+
(also see the defaul parameters of litellm's completion parameters: https://docs.litellm.ai/docs/completion/input#input-params-1).
|
22 |
+
Except for the following parameters who are overwritten by the ChatAtomicFlow in ChatAtomicFlow.yaml:
|
23 |
+
- `model_name` (Union[Dict[str,str],str]): The name of the model to use. Default: "gpt-4"
|
24 |
+
When using multiple API providers, the model_name can be a dictionary of the form
|
25 |
+
{"provider_name": "model_name"}. E.g. {"openai": "gpt-3.5-turbo", "azure": "azure/gpt-3.5-turbo"}
|
26 |
+
Default: "gpt-3.5-turbo" (the name needs to follow the name of the model in litellm https://docs.litellm.ai/docs/providers).
|
27 |
+
- `n` (int) : The number of answers to generate. Default: 1
|
28 |
+
- `max_tokens` (int): The maximum number of tokens to generate. Default: 2000
|
29 |
+
- `temperature` float: The temperature of the generation. Default: 1.0
|
30 |
+
- `top_p` float: An alternative to sampling with temperature. It instructs the model to consider the results of
|
31 |
+
the tokens with top_p probability. Default: 0.4
|
32 |
+
- `frequency_penalty` (number): It is used to penalize new tokens based on their frequency in the text so far. Default: 0.0
|
33 |
+
- `presence_penalty` (number): It is used to penalize new tokens based on their existence in the text so far. Default: 0.0
|
34 |
+
- `stream` (bool): Whether to stream the response or not. Default: false
|
35 |
+
- `system_message_prompt_template` (Dict[str,Any]): The template of the system message. It is used to generate the system message. Default: See SamplerFlow.yaml for default.
|
36 |
+
- `init_human_message_prompt_template` (Dict[str,Any]): The prompt template of the human/user message used to initialize the conversation
|
37 |
+
(first time in). It is used to generate the human message. It's passed as the user message to the LLM. Default: See SamplerFlow.yaml for default.
|
38 |
+
- `human_message_prompt_template` (Dict[str,Any]): The prompt template of the human/user message (message used everytime the except the first time in).
|
39 |
+
It's passed as the user message to the LLM. Default: See SamplerFlow.yaml for default.
|
40 |
+
- `previous_messages` (Dict[str,Any]): Defines which previous messages to include in the input of the LLM. Note that if `first_k`and `last_k` are both none,
|
41 |
+
all the messages of the flows's history are added to the input of the LLM. Default:
|
42 |
+
- `first_k` (int): If defined, adds the first_k earliest messages of the flow's chat history to the input of the LLM. Default: 1
|
43 |
+
- `last_k` (int): If defined, adds the last_k latest messages of the flow's chat history to the input of the LLM. Default: 1
|
44 |
+
|
45 |
+
*Input Interface Initialized (Expected input the first time in flow)*:
|
46 |
+
|
47 |
+
- `header` (str): A header message to include in prompt
|
48 |
+
- `code` (str): The "example" samples to generate our new sample from.
|
49 |
+
|
50 |
+
*Input Interface (Expected input the after the first time in flow)*:
|
51 |
+
|
52 |
+
- `header` (str): A header message to include in prompt
|
53 |
+
- `code` (str): The "example" samples to generate our new sample from.
|
54 |
+
|
55 |
+
*Output Interface*:
|
56 |
+
|
57 |
+
- `api_output` (str): The output of the API call. It is the response of the LLM to the input.
|
58 |
+
- `from` (str): The name of the flow that generated the output. It's always "SamplerFlow"
|
59 |
+
|
60 |
+
|
61 |
+
**Citation**:
|
62 |
+
|
63 |
+
@Article{FunSearch2023,
|
64 |
+
author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
|
65 |
+
journal = {Nature},
|
66 |
+
title = {Mathematical discoveries from program search with large language models},
|
67 |
+
year = {2023},
|
68 |
+
doi = {10.1038/s41586-023-06924-6}
|
69 |
+
}
|
70 |
+
"""
|
71 |
+
|
72 |
+
def run(self,input_message):
|
73 |
+
""" This method calls the backend of the flow (so queries the LLM). It calls the backend with the previous messages of the flow.
|
74 |
+
|
75 |
+
:return: The output of the backend.
|
76 |
+
:rtype: Any
|
77 |
+
"""
|
78 |
+
input_data = input_message.data
|
79 |
+
|
80 |
+
response = self.query_llm(input_data=input_data)
|
81 |
+
|
82 |
+
reply_message = self.package_output_message(
|
83 |
+
input_message,
|
84 |
+
response = {"api_output": response, "from": "SamplerFlow"}
|
85 |
+
)
|
86 |
+
|
87 |
+
self.send_message(reply_message)
|
SamplerFlowModule/SamplerFlow.yaml
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
input_interface:
|
2 |
+
- "action"
|
3 |
+
- "content"
|
4 |
+
|
5 |
+
|
6 |
+
_target_: flow_modules.aiflows.FunSearchFlowModule.SamplerFlow.instantiate_from_default_config
|
7 |
+
name: SamplerFlowModule
|
8 |
+
description: A flow that queries an LLM model to generate prompts for the Sampler flow
|
9 |
+
|
10 |
+
input_interface_initialized:
|
11 |
+
- header
|
12 |
+
- code
|
13 |
+
|
14 |
+
input_interface_non_initialized:
|
15 |
+
- header
|
16 |
+
- code
|
17 |
+
|
18 |
+
output_interface:
|
19 |
+
- "api_output"
|
20 |
+
- "from"
|
21 |
+
|
22 |
+
backend:
|
23 |
+
api_infos: ???
|
24 |
+
model_name: gpt-4
|
25 |
+
frequency_penalty: 0
|
26 |
+
max_tokens: 2000
|
27 |
+
n: 1
|
28 |
+
presence_penalty: 0
|
29 |
+
stream: false
|
30 |
+
temperature: 1.0
|
31 |
+
top_p: 0.4
|
32 |
+
|
33 |
+
previous_messages:
|
34 |
+
last_k: 1
|
35 |
+
first_k: 1
|
36 |
+
|
37 |
+
system_message_prompt_template:
|
38 |
+
template: |2-
|
39 |
+
|
40 |
+
Role: You are an expert coder and algorithm designer who provides solutions.
|
41 |
+
|
42 |
+
|
43 |
+
Goal: Write evolve functions that optimize the score of an evaluation function (provided). Aim to achieve higher scores than previous functions. Write a new function that optimizes the evaluation function and scores better than the given functions. Develop non-trivial functions and actual algorithms.
|
44 |
+
|
45 |
+
|
46 |
+
Resources:
|
47 |
+
|
48 |
+
You will be given code related to evolve and functions.
|
49 |
+
You will be given a few functions attempting to optimize the evaluation function.
|
50 |
+
The function you're evolving (the last one passed) is the only one you have to write. Make sure to include only nested functions or recursive calls.
|
51 |
+
|
52 |
+
|
53 |
+
Naming:
|
54 |
+
|
55 |
+
Evaluation function is named {{evaluate_name}}.
|
56 |
+
Evolve functions have a naming structure similar to: {{evolve_name}}_v.
|
57 |
+
|
58 |
+
|
59 |
+
Important Constraints:
|
60 |
+
|
61 |
+
Only write one evolve function solution: Write only the uncompleted function given to you (i.e., only {{evolve_name}}_v{i}, where {i} is the function with the largest number in the prompt, and i is at most {{artifacts_per_prompt}}). Stop writing once you're done with your first solution.
|
62 |
+
Output must be a function that is parsable by the AST library: Write code only within functions. No textual comments, no code blocks (like ```python), or no explanations. Provide only Python code encapsulated in a single function. Your output should be parsable by the AST Python library. Do not include starting or ending comments like "Here's an improved version."
|
63 |
+
Follow the naming structure: Keep the function name as it is in the input.
|
64 |
+
Keep the same function format: Maintain the same input and output format in your solution.
|
65 |
+
Self-contained solution: You can't call other functions (only recursive calls or calls to nested functions).
|
66 |
+
Do not use the evaluate function in the solution: Avoid using the evaluate function in your solution.
|
67 |
+
|
68 |
+
partial_variables:
|
69 |
+
evaluate_name: ???
|
70 |
+
evolve_name: ???
|
71 |
+
artifacts_per_prompt: ???
|
72 |
+
|
73 |
+
|
74 |
+
init_human_message_prompt_template:
|
75 |
+
_target_: aiflows.prompt_template.JinjaPrompt
|
76 |
+
template: |2-
|
77 |
+
functions for evaluation:
|
78 |
+
{{header}}
|
79 |
+
|
80 |
+
function to evolve and to evaluate:
|
81 |
+
{{code}}
|
82 |
+
input_variables: ["header","code"]
|
83 |
+
partial_variables: {}
|
84 |
+
|
85 |
+
human_message_prompt_template:
|
86 |
+
_target_: aiflows.prompt_template.JinjaPrompt
|
87 |
+
template: |2-
|
88 |
+
full code:
|
89 |
+
{{header}}
|
90 |
+
|
91 |
+
function to evolve and to evaluate:
|
92 |
+
{{code}}
|
93 |
+
input_variables: ["header","code"]
|
94 |
+
partial_variables: {}
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
|
SamplerFlowModule/__init__.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ~~~ Specify the dependencies ~~~
|
2 |
+
|
3 |
+
dependencies = [
|
4 |
+
{"url": "aiflows/ChatFlowModule", "revision": "main"}
|
5 |
+
]
|
6 |
+
# Revision can correspond toa branch, commit hash or a absolute path to a local directory (ideal for development)
|
7 |
+
from aiflows import flow_verse
|
8 |
+
|
9 |
+
flow_verse.sync_dependencies(dependencies)
|
10 |
+
|
11 |
+
# ~~~ Import of your flow class (if you have any) ~~~
|
12 |
+
# from .NAMEOFYOURFLOW import NAMEOFYOURFLOWCLASS
|
13 |
+
from .SamplerFlow import SamplerFlow
|
SamplerFlowModule/demo.yaml
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_target_: flow_modules.aiflows.FunSearchFlowModule.SamplerFlow.instantiate_from_default_config
|
2 |
+
|
3 |
+
backend:
|
4 |
+
api_infos: ???
|
5 |
+
|
6 |
+
system_message_prompt_template:
|
7 |
+
partial_variables:
|
8 |
+
evaluate_name: evaluate
|
9 |
+
evolve_name: solve_function
|
10 |
+
artifacts_per_prompt: 2
|
SamplerFlowModule/pip_requirements.txt
ADDED
File without changes
|
SamplerFlowModule/run.py
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import hydra
|
4 |
+
|
5 |
+
import aiflows
|
6 |
+
from aiflows.backends.api_info import ApiInfo
|
7 |
+
from aiflows.utils.general_helpers import read_yaml_file, quick_load_api_keys
|
8 |
+
|
9 |
+
from aiflows import logging
|
10 |
+
from aiflows.flow_cache import CACHING_PARAMETERS, clear_cache
|
11 |
+
|
12 |
+
from aiflows.utils import serving
|
13 |
+
from aiflows.workers import run_dispatch_worker_thread
|
14 |
+
from aiflows.messages import FlowMessage
|
15 |
+
from aiflows.interfaces import KeyInterface
|
16 |
+
from aiflows.utils.colink_utils import start_colink_server
|
17 |
+
from aiflows import flow_verse
|
18 |
+
|
19 |
+
|
20 |
+
dependencies = [
|
21 |
+
{
|
22 |
+
"url": "aiflows/FunSearchFlowModule",
|
23 |
+
"revision": os.path.abspath("../")
|
24 |
+
}
|
25 |
+
]
|
26 |
+
flow_verse.sync_dependencies(dependencies)
|
27 |
+
|
28 |
+
logging.set_verbosity_debug()
|
29 |
+
|
30 |
+
|
31 |
+
if __name__ == "__main__":
|
32 |
+
|
33 |
+
cl = start_colink_server()
|
34 |
+
|
35 |
+
serving.recursive_serve_flow(
|
36 |
+
cl=cl,
|
37 |
+
flow_class_name="flow_modules.aiflows.FunSearchFlowModule.SamplerFlow",
|
38 |
+
flow_endpoint="SamplerFlow",
|
39 |
+
)
|
40 |
+
|
41 |
+
run_dispatch_worker_thread(cl)
|
42 |
+
|
43 |
+
config_overrides = read_yaml_file(os.path.join(".", "demo.yaml"))
|
44 |
+
|
45 |
+
api_information = [ApiInfo(backend_used="openai",
|
46 |
+
api_key = os.getenv("OPENAI_API_KEY"))]
|
47 |
+
|
48 |
+
quick_load_api_keys(config_overrides, api_information, key="api_infos")
|
49 |
+
|
50 |
+
funsearch_proxy = serving.get_flow_instance(
|
51 |
+
cl=cl,
|
52 |
+
flow_endpoint="SamplerFlow",
|
53 |
+
config_overrides=config_overrides,
|
54 |
+
)
|
55 |
+
|
56 |
+
code = \
|
57 |
+
"""
|
58 |
+
#function used to evaluate the program:
|
59 |
+
def evaluate(solve_function: str, tests_inputs: List[str], expected_outputs: str) -> float:
|
60 |
+
\"\"\"Returns the score of the solve function we're evolving based on the tests_inputs and expected_outputs.
|
61 |
+
Scores are between 0 and 1, unless the program fails to run, in which case the score is -1.
|
62 |
+
\"\"\"
|
63 |
+
if solve(solve_function, tests_inputs, expected_outputs) == True:
|
64 |
+
return 1.0
|
65 |
+
return 0.0
|
66 |
+
|
67 |
+
|
68 |
+
def solve_function_v0(input) -> str:
|
69 |
+
\"\"\"Scores per test: test_1:{'score': 1.0, 'feedback': 'No feedback available.'} test_2:{'score': 1.0, 'feedback': 'No feedback available.'} test_3:{'score': 0.0, 'feedback': 'No feedback available.'} test_4:{'score': -1, 'feedback': 'Invalid Format of prediction'}\"\"\"
|
70 |
+
return 'YES'
|
71 |
+
|
72 |
+
|
73 |
+
def solve_function_v1(input) -> str:
|
74 |
+
\"\"\"Improved version of solve_function_v0\"\"\"
|
75 |
+
"""
|
76 |
+
|
77 |
+
header = \
|
78 |
+
"""
|
79 |
+
\"\"\"Problem Description:
|
80 |
+
Serval has a string s that only consists of 0 and 1 of length n. The i-th character of s is denoted as s_i, where 1\leq i\leq n.
|
81 |
+
Serval can perform the following operation called Inversion Magic on the string s:
|
82 |
+
Choose an segment [l, r] (1\leq l\leq r\leq n). For l\leq i\leq r, change s_i into 1 if s_i is 0, and change s_i into 0 if s_i is 1.
|
83 |
+
For example, let s be 010100 and the segment [2,5] is chosen. The string s will be 001010 after performing the Inversion Magic.
|
84 |
+
Serval wants to make s a palindrome after performing Inversion Magic exactly once. Help him to determine whether it is possible.
|
85 |
+
A string is a palindrome iff it reads the same backwards as forwards. For example, 010010 is a palindrome but 10111 is not.
|
86 |
+
|
87 |
+
Input Description:
|
88 |
+
Input
|
89 |
+
Each test contains multiple test cases. The first line contains the number of test cases t (1\leq t\leq 10^4). The description of the test cases follows.
|
90 |
+
The first line of each test case contains a single integer n (2\leq n\leq 10^5) — the length of string s.
|
91 |
+
The second line of each test case contains a binary string s of length n. Only characters 0 and 1 can appear in s.
|
92 |
+
It's guaranteed that the sum of n over all test cases does not exceed 2\cdot 10^5.
|
93 |
+
|
94 |
+
Output Description:
|
95 |
+
Output
|
96 |
+
For each test case, print Yes if s can be a palindrome after performing Inversion Magic exactly once, and print No if not.
|
97 |
+
You can output Yes and No in any case (for example, strings yEs, yes, Yes and YES will be recognized as a positive response).
|
98 |
+
|
99 |
+
Public Tests:
|
100 |
+
Test 1:
|
101 |
+
Input: ['1', '4', '1001']
|
102 |
+
Output: 'YES'
|
103 |
+
Test 2:
|
104 |
+
Input: ['1', '5', '10010']
|
105 |
+
Output: 'YES'
|
106 |
+
Test 3:
|
107 |
+
Input: ['1', '7', '0111011']
|
108 |
+
Output: 'NO'
|
109 |
+
|
110 |
+
\"\"\"
|
111 |
+
|
112 |
+
|
113 |
+
import ast
|
114 |
+
import itertools
|
115 |
+
import numpy as np
|
116 |
+
from typing import List
|
117 |
+
|
118 |
+
def solve(solve_function: str,input: List[str], expected_output: str) -> str:
|
119 |
+
\"\"\"function used to run the solve function on input *kwargs and return the the predicted output
|
120 |
+
|
121 |
+
:param solve_function: the function to run (the solve function below as a string)
|
122 |
+
:type solve_function: str
|
123 |
+
:param kwargs: the inputs to the solve function
|
124 |
+
:type kwargs: List[str]
|
125 |
+
\"\"\"
|
126 |
+
local_namespace = {}
|
127 |
+
exec(solve_function,local_namespace)
|
128 |
+
found_name, program_name = get_function_name_from_code(solve_function)
|
129 |
+
|
130 |
+
if not found_name:
|
131 |
+
raise ValueError(f"Function name not found in program: {solve_function}")
|
132 |
+
|
133 |
+
solve_fn = local_namespace.get(program_name)
|
134 |
+
|
135 |
+
prediction = solve_fn(input)
|
136 |
+
|
137 |
+
prediction = prediction.split()
|
138 |
+
expected_output = expected_output.split()
|
139 |
+
|
140 |
+
if len(prediction) != len(expected_output):
|
141 |
+
raise ValueError(f"Invalid Format of prediction")
|
142 |
+
|
143 |
+
for i in range(len(prediction)):
|
144 |
+
if prediction[i] != expected_output[i]:
|
145 |
+
return False
|
146 |
+
|
147 |
+
return True
|
148 |
+
|
149 |
+
def evaluate(solve_function: str, tests_inputs: List[str], expected_outputs: str) -> float:
|
150 |
+
\"\"\"Returns the score of the solve function we're evolving based on the tests_inputs and expected_outputs.
|
151 |
+
Scores are between 0 and 1, unless the program fails to run, in which case the score is -1.
|
152 |
+
\"\"\"
|
153 |
+
if solve(solve_function,tests_inputs,expected_outputs) == True:
|
154 |
+
return 1.0
|
155 |
+
return 0.0
|
156 |
+
|
157 |
+
|
158 |
+
def get_function_name_from_code(code):
|
159 |
+
tree = ast.parse(code)
|
160 |
+
for node in ast.walk(tree):
|
161 |
+
if isinstance(node, ast.FunctionDef):
|
162 |
+
return True, node.name
|
163 |
+
|
164 |
+
# something is wrong
|
165 |
+
return False, None
|
166 |
+
|
167 |
+
"""
|
168 |
+
|
169 |
+
data = {
|
170 |
+
'code': code,
|
171 |
+
'header': header
|
172 |
+
}
|
173 |
+
|
174 |
+
input_message = funsearch_proxy.package_input_message(data = data)
|
175 |
+
|
176 |
+
funsearch_proxy.send_message(input_message)
|
177 |
+
|
178 |
+
future = funsearch_proxy.get_reply_future(input_message)
|
179 |
+
response = future.get_data()
|
180 |
+
print("~~~Response~~~")
|
181 |
+
print(response)
|
__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .FunSearch import FunSearch
|
2 |
+
from .SamplerFlowModule.SamplerFlow import SamplerFlow
|
3 |
+
from .EvaluatorFlowModule.EvaluatorFlow import EvaluatorFlow
|
4 |
+
from .ProgramDBFlowModule.ProgramDBFlow import ProgramDBFlow
|
cf_functions.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import ast
|
3 |
+
import itertools
|
4 |
+
import numpy as np
|
5 |
+
from typing import List
|
6 |
+
|
7 |
+
def solve(solve_function: str,input: List[str], expected_output: str) -> str:
|
8 |
+
"""function used to run the solve function on input *kwargs and return the the predicted output
|
9 |
+
|
10 |
+
:param solve_function: the function to run (the solve function below as a string)
|
11 |
+
:type solve_function: str
|
12 |
+
:param kwargs: the inputs to the solve function
|
13 |
+
:type kwargs: List[str]
|
14 |
+
"""
|
15 |
+
local_namespace = {}
|
16 |
+
exec(solve_function,local_namespace)
|
17 |
+
found_name, program_name = get_function_name_from_code(solve_function)
|
18 |
+
|
19 |
+
if not found_name:
|
20 |
+
raise ValueError(f"Function name not found in program: {solve_function}")
|
21 |
+
|
22 |
+
solve_fn = local_namespace.get(program_name)
|
23 |
+
|
24 |
+
prediction = solve_fn(input)
|
25 |
+
|
26 |
+
prediction = prediction.split()
|
27 |
+
expected_output = expected_output.split()
|
28 |
+
|
29 |
+
if len(prediction) != len(expected_output):
|
30 |
+
raise ValueError(f"Invalid Format of prediction")
|
31 |
+
|
32 |
+
for i in range(len(prediction)):
|
33 |
+
if prediction[i] != expected_output[i]:
|
34 |
+
return False
|
35 |
+
|
36 |
+
return True
|
37 |
+
|
38 |
+
def evaluate(solve_function: str, tests_inputs: List[str], expected_outputs: str) -> float:
|
39 |
+
"""Returns the score of the solve function we're evolving based on the tests_inputs and expected_outputs.
|
40 |
+
Scores are between 0 and 1, unless the program fails to run, in which case the score is -1.
|
41 |
+
"""
|
42 |
+
if solve(solve_function,tests_inputs,expected_outputs) == True:
|
43 |
+
return 1.0
|
44 |
+
return 0.0
|
45 |
+
|
46 |
+
|
47 |
+
def get_function_name_from_code(code):
|
48 |
+
tree = ast.parse(code)
|
49 |
+
for node in ast.walk(tree):
|
50 |
+
if isinstance(node, ast.FunctionDef):
|
51 |
+
return True, node.name
|
52 |
+
|
53 |
+
# something is wrong
|
54 |
+
return False, None
|
55 |
+
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
|
demo.yaml
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: FunSearchFlow
|
2 |
+
description: "A flow implementing FunSearch Asynchronous Search"
|
3 |
+
|
4 |
+
### Subflows specification
|
5 |
+
subflows_config:
|
6 |
+
SamplerFlow:
|
7 |
+
# _target_: flow_modules.SamplerFlowModule.SamplerFlow.instantiate_from_default_config
|
8 |
+
name: "Sampler Flow"
|
9 |
+
description: "A flow that queries an LLM model to generate prompts"
|
10 |
+
system_message_prompt_template:
|
11 |
+
partial_variables: ???
|
12 |
+
backend:
|
13 |
+
api_infos: ???
|
14 |
+
EvaluatorFlow:
|
15 |
+
name: "A flow that evaluates code on tests"
|
16 |
+
description: "A flow that evaluates code on tests"
|
17 |
+
singleton: False
|
18 |
+
run_error_score: ???
|
19 |
+
py_file: ???
|
20 |
+
function_to_run_name: ???
|
21 |
+
test_inputs: ???
|
22 |
+
timeout_seconds: 10
|
23 |
+
use_test_input_as_key: ???
|
24 |
+
|
25 |
+
ProgramDBFlow:
|
26 |
+
# _target_: flow_modules.ProgramDBFlowModule.ProgramDBFlow.instantiate_from_default_config
|
27 |
+
name: "ProgramDB"
|
28 |
+
description: "A flow that registers samples and evaluations in a database"
|
29 |
+
evaluate_function: ???
|
30 |
+
evaluate_file_full_content: ???
|
31 |
+
artifact_to_evolve_name: solve_function
|
32 |
+
num_islands: 3
|
33 |
+
template:
|
34 |
+
preface: ""
|
pip_requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
astunparse==1.6.3
|
2 |
+
numpy==1.26.2
|
3 |
+
SciPy==1.11.4
|
4 |
+
pandas
|
run.py
ADDED
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import hydra
|
4 |
+
|
5 |
+
import aiflows
|
6 |
+
from aiflows.backends.api_info import ApiInfo
|
7 |
+
from aiflows.utils.general_helpers import read_yaml_file, quick_load_api_keys
|
8 |
+
|
9 |
+
from aiflows import logging
|
10 |
+
from aiflows.flow_cache import CACHING_PARAMETERS, clear_cache
|
11 |
+
|
12 |
+
from aiflows.utils import serving
|
13 |
+
from aiflows.workers import run_dispatch_worker_thread
|
14 |
+
from aiflows.messages import FlowMessage
|
15 |
+
from aiflows.interfaces import KeyInterface
|
16 |
+
from aiflows.utils.colink_utils import start_colink_server
|
17 |
+
from aiflows import flow_verse
|
18 |
+
import pandas as pd
|
19 |
+
import sys
|
20 |
+
from copy import deepcopy
|
21 |
+
import requests
|
22 |
+
import time
|
23 |
+
|
24 |
+
dependencies = [
|
25 |
+
{
|
26 |
+
"url": "aiflows/FunSearchFlowModule",
|
27 |
+
"revision": "../FunSearchFlowModule"
|
28 |
+
}
|
29 |
+
]
|
30 |
+
flow_verse.sync_dependencies(dependencies)
|
31 |
+
from flow_modules.aiflows.FunSearchFlowModule.Loader import Loader
|
32 |
+
|
33 |
+
logging.set_verbosity_debug()
|
34 |
+
|
35 |
+
|
36 |
+
def load_problem(id, ds_location = "./data/codeforces.jsonl.gz"):
|
37 |
+
def make_problem_descriptions_str(row):
|
38 |
+
def write_public_tests_individual_io_str(row):
|
39 |
+
public_tests = row.public_tests_individual_io
|
40 |
+
tests = ""
|
41 |
+
for i,test in enumerate(public_tests):
|
42 |
+
input = test[0]
|
43 |
+
output = test[1]
|
44 |
+
tests += f"Test {i+1}:\n Input: {input}\n Output: \'{output}\'\n"
|
45 |
+
return tests
|
46 |
+
|
47 |
+
problem_descritption = row.problem_description
|
48 |
+
input_descriptions = row.input_description
|
49 |
+
ouput_descriptions = row.output_description
|
50 |
+
public_tests = write_public_tests_individual_io_str(row)
|
51 |
+
|
52 |
+
problem_description_str = f"Problem Description:\n{problem_descritption}\n\n"
|
53 |
+
input_description_str = f"Input Description:\n{input_descriptions}\n\n"
|
54 |
+
output_description_str = f"Output Description:\n{ouput_descriptions}\n\n"
|
55 |
+
public_tests_str = f"Public Tests:\n{public_tests}\n"
|
56 |
+
|
57 |
+
final_str = problem_description_str + input_description_str + output_description_str +public_tests_str
|
58 |
+
return final_str
|
59 |
+
|
60 |
+
df = pd.read_json(ds_location, lines=True, compression='gzip')
|
61 |
+
row = df[df.id == id].iloc[0]
|
62 |
+
|
63 |
+
assert row.non_unique_output == False, "Problem has non unique output. Not supported yet"
|
64 |
+
|
65 |
+
problem_description = make_problem_descriptions_str(row)
|
66 |
+
public_test = row.public_tests_individual_io
|
67 |
+
tests = {}
|
68 |
+
test_counter = 1
|
69 |
+
|
70 |
+
for public_test in public_test:
|
71 |
+
tests["test_"+str(test_counter)] = {"tests_inputs": public_test[0], "expected_outputs": public_test[1]}
|
72 |
+
test_counter += 1
|
73 |
+
|
74 |
+
for hidden_test in row.hidden_tests_io:
|
75 |
+
tests["test_"+str(test_counter)] = {"tests_inputs": hidden_test[0], "expected_outputs": hidden_test[1]}
|
76 |
+
test_counter += 1
|
77 |
+
|
78 |
+
return tests, problem_description
|
79 |
+
|
80 |
+
def download_codeforces_data(data_folder_path,file_name):
|
81 |
+
print("Downloading data....")
|
82 |
+
os.makedirs(data_folder_path, exist_ok=True)
|
83 |
+
url = "https://github.com/epfl-dlab/cc_flows/raw/main/data/codeforces/codeforces.jsonl.gz"
|
84 |
+
response = requests.get(url, stream=True)
|
85 |
+
|
86 |
+
if response.status_code == 200:
|
87 |
+
with open(os.path.join(data_folder_path,file_name), 'wb') as file:
|
88 |
+
for chunk in response:
|
89 |
+
file.write(chunk)
|
90 |
+
print("Download complete")
|
91 |
+
else:
|
92 |
+
print("Failed to download data", response.status_code)
|
93 |
+
|
94 |
+
|
95 |
+
def get_configs(problem_id, ds_location = "./data/codeforces.jsonl.gz"):
|
96 |
+
tests, problem_description = load_problem(problem_id,ds_location)
|
97 |
+
|
98 |
+
path = os.path.join(".", "demo.yaml")
|
99 |
+
funsearch_cfg = read_yaml_file(path)
|
100 |
+
|
101 |
+
evaluate_function_file_path: str = "./cf_functions.py"
|
102 |
+
evaluate_function_name: str = "evaluate"
|
103 |
+
evolve_function_name:str = "solve_function"
|
104 |
+
|
105 |
+
loader = Loader(file_path = evaluate_function_file_path, target_name = evaluate_function_name)
|
106 |
+
evaluate_function: str= loader.load_target()
|
107 |
+
evaluate_file_full_content = loader.load_full_file()
|
108 |
+
|
109 |
+
evaluate_file_full_content = f"\"\"\"{problem_description}\"\"\"\n\n" + evaluate_file_full_content
|
110 |
+
|
111 |
+
#~~~~~ ProgramDBFlow Overrides ~~~~~~~~
|
112 |
+
funsearch_cfg["subflows_config"]["ProgramDBFlow"]["evaluate_function"] = evaluate_function
|
113 |
+
funsearch_cfg["subflows_config"]["ProgramDBFlow"]["evaluate_file_full_content"] = evaluate_file_full_content
|
114 |
+
funsearch_cfg["subflows_config"]["ProgramDBFlow"]["artifact_to_evolve_name"] = evolve_function_name
|
115 |
+
|
116 |
+
if len(tests) > 0:
|
117 |
+
first_test = tests["test_1"]
|
118 |
+
|
119 |
+
dummy_solution = f"def {evolve_function_name}(input) -> str:" +\
|
120 |
+
"\n \"\"\"Attempt at solving the problem given the input input and returns the predicted output (see the top of the file for problem description)\"\"\"" +\
|
121 |
+
f"\n return \'{first_test['expected_outputs']}\'\n"
|
122 |
+
|
123 |
+
|
124 |
+
else:
|
125 |
+
dummy_solution = f"def {evolve_function_name}(input) -> str:" +\
|
126 |
+
"\n \"\"\"Attempt at solving the problem given the input input and returns the predicted output (see the top of the file for problem description)\"\"\"" +\
|
127 |
+
f"\n return 0.0\"\"\n"
|
128 |
+
|
129 |
+
#~~~~~~~~~~Evaluator overrides~~~~~~~~~~~~
|
130 |
+
funsearch_cfg["subflows_config"]["EvaluatorFlow"]["py_file"] = evaluate_file_full_content
|
131 |
+
funsearch_cfg["subflows_config"]["EvaluatorFlow"]["run_error_score"] = -1
|
132 |
+
funsearch_cfg["subflows_config"]["EvaluatorFlow"]["function_to_run_name"] = evaluate_function_name
|
133 |
+
funsearch_cfg["subflows_config"]["EvaluatorFlow"]["test_inputs"] = tests
|
134 |
+
#Hides test inputs from LLM (necessary for hidden tests. Makes same setup as in a real contest.)
|
135 |
+
funsearch_cfg["subflows_config"]["EvaluatorFlow"]["use_test_input_as_key"] = False
|
136 |
+
|
137 |
+
|
138 |
+
#~~~~~~~~~~Sampler overrides~~~~~~~~~~~~
|
139 |
+
funsearch_cfg["subflows_config"]["SamplerFlow"]["system_message_prompt_template"]["partial_variables"] = \
|
140 |
+
{
|
141 |
+
"evaluate_name": evaluate_function_name,
|
142 |
+
"evolve_name": evolve_function_name,
|
143 |
+
"artifacts_per_prompt": 2
|
144 |
+
}
|
145 |
+
|
146 |
+
|
147 |
+
return funsearch_cfg, dummy_solution
|
148 |
+
|
149 |
+
|
150 |
+
FLOW_MODULES_PATH = "./"
|
151 |
+
|
152 |
+
|
153 |
+
if __name__ == "__main__":
|
154 |
+
|
155 |
+
cl = start_colink_server()
|
156 |
+
|
157 |
+
problem_id = "1789B" #put the problem id here
|
158 |
+
|
159 |
+
if not os.path.exists("./data/codeforces.jsonl.gz"):
|
160 |
+
download_codeforces_data("./data", "codeforces.jsonl.gz")
|
161 |
+
|
162 |
+
funsearch_cfg, dummy_solution = get_configs(problem_id)
|
163 |
+
#Serve Program Database and get its flow type explicitly
|
164 |
+
api_information = [ApiInfo(backend_used="openai",
|
165 |
+
api_key = os.getenv("OPENAI_API_KEY"))]
|
166 |
+
|
167 |
+
serving.recursive_serve_flow(
|
168 |
+
cl=cl,
|
169 |
+
flow_class_name="flow_modules.aiflows.FunSearchFlowModule.FunSearch",
|
170 |
+
flow_endpoint="FunSearch",
|
171 |
+
)
|
172 |
+
|
173 |
+
# #Serve the rest
|
174 |
+
# serving.recursive_serve_flow(
|
175 |
+
# cl=cl,
|
176 |
+
# flow_type="FunSearch_served",
|
177 |
+
# default_config=funsearch_cfg,
|
178 |
+
# default_state=None,
|
179 |
+
# default_dispatch_point="coflows_dispatch",
|
180 |
+
# )
|
181 |
+
n_workers = 10
|
182 |
+
for i in range(n_workers):
|
183 |
+
run_dispatch_worker_thread(cl)
|
184 |
+
|
185 |
+
quick_load_api_keys(funsearch_cfg, api_information, key="api_infos")
|
186 |
+
config_overrides = None
|
187 |
+
#Mount ProgramDBFlow first to get it's flow ref
|
188 |
+
funsearch_proxy = serving.get_flow_instance(
|
189 |
+
cl=cl,
|
190 |
+
flow_endpoint="FunSearch",
|
191 |
+
config_overrides=funsearch_cfg,
|
192 |
+
)
|
193 |
+
|
194 |
+
data = {
|
195 |
+
"from": "SamplerFlow",
|
196 |
+
"operation": "register_program",
|
197 |
+
"api_output": dummy_solution
|
198 |
+
}
|
199 |
+
|
200 |
+
input_message = funsearch_proxy.package_input_message(data = data)
|
201 |
+
|
202 |
+
funsearch_proxy.send_message(input_message)
|
203 |
+
|
204 |
+
|
205 |
+
data = {
|
206 |
+
"from": "FunSearch",
|
207 |
+
"operation": "start",
|
208 |
+
"content": {"num_samplers": 5},
|
209 |
+
}
|
210 |
+
|
211 |
+
input_message = funsearch_proxy.package_input_message(data = data)
|
212 |
+
|
213 |
+
funsearch_proxy.send_message(input_message)
|
214 |
+
|
215 |
+
data = {
|
216 |
+
"from": "FunSearch",
|
217 |
+
"operation": "stop",
|
218 |
+
"content": {},
|
219 |
+
}
|
220 |
+
|
221 |
+
input_message = funsearch_proxy.package_input_message(data = data)
|
222 |
+
|
223 |
+
funsearch_proxy.send_message(input_message)
|
224 |
+
|
225 |
+
|
226 |
+
wait_time = 1000
|
227 |
+
print(f"Waiting {wait_time} seconds before requesting result...")
|
228 |
+
time.sleep(wait_time)
|
229 |
+
|
230 |
+
data = {
|
231 |
+
"from": "FunSearch",
|
232 |
+
"operation": "get_best_programs_per_island",
|
233 |
+
"content": {}
|
234 |
+
}
|
235 |
+
|
236 |
+
input_message = funsearch_proxy.package_input_message(data = data)
|
237 |
+
|
238 |
+
future = funsearch_proxy.get_reply_future(input_message)
|
239 |
+
print("waiting for response....")
|
240 |
+
response = future.get_data()
|
241 |
+
print(response)
|