nbaldwin commited on
Commit
97e363b
·
1 Parent(s): 9adf3ef

first version FunSearch

Browse files
.gitignore ADDED
@@ -0,0 +1,444 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Created by https://www.toptal.com/developers/gitignore/api/python,java,c++,pycharm,visualstudiocode,macos,linux,windows
2
+ # Edit at https://www.toptal.com/developers/gitignore?templates=python,java,c++,pycharm,visualstudiocode,macos,linux,windows
3
+
4
+ ### C++ ###
5
+ # Prerequisites
6
+ *.d
7
+
8
+ # Compiled Object files
9
+ *.slo
10
+ *.lo
11
+ *.o
12
+ *.obj
13
+
14
+ # Precompiled Headers
15
+ *.gch
16
+ *.pch
17
+
18
+ # Compiled Dynamic libraries
19
+ *.so
20
+ *.dylib
21
+ *.dll
22
+
23
+ # Fortran module files
24
+ *.mod
25
+ *.smod
26
+
27
+ # Compiled Static libraries
28
+ *.lai
29
+ *.la
30
+ *.a
31
+ *.lib
32
+
33
+ # Executables
34
+ *.exe
35
+ *.out
36
+ *.app
37
+
38
+ ### Java ###
39
+ # Compiled class file
40
+ *.class
41
+
42
+ # Log file
43
+ *.log
44
+
45
+ # BlueJ files
46
+ *.ctxt
47
+
48
+ # Mobile Tools for Java (J2ME)
49
+ .mtj.tmp/
50
+
51
+ # Package Files #
52
+ *.jar
53
+ *.war
54
+ *.nar
55
+ *.ear
56
+ *.zip
57
+ *.tar.gz
58
+ *.rar
59
+
60
+ # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
61
+ hs_err_pid*
62
+ replay_pid*
63
+
64
+ ### Linux ###
65
+ *~
66
+
67
+ # temporary files which can be created if a process still has a handle open of a deleted file
68
+ .fuse_hidden*
69
+
70
+ # KDE directory preferences
71
+ .directory
72
+
73
+ # Linux trash folder which might appear on any partition or disk
74
+ .Trash-*
75
+
76
+ # .nfs files are created when an open file is removed but is still being accessed
77
+ .nfs*
78
+
79
+ ### macOS ###
80
+ # General
81
+ .DS_Store
82
+ .AppleDouble
83
+ .LSOverride
84
+
85
+ # Icon must end with two \r
86
+ Icon
87
+
88
+
89
+ # Thumbnails
90
+ ._*
91
+
92
+ # Files that might appear in the root of a volume
93
+ .DocumentRevisions-V100
94
+ .fseventsd
95
+ .Spotlight-V100
96
+ .TemporaryItems
97
+ .Trashes
98
+ .VolumeIcon.icns
99
+ .com.apple.timemachine.donotpresent
100
+
101
+ # Directories potentially created on remote AFP share
102
+ .AppleDB
103
+ .AppleDesktop
104
+ Network Trash Folder
105
+ Temporary Items
106
+ .apdisk
107
+
108
+ ### macOS Patch ###
109
+ # iCloud generated files
110
+ *.icloud
111
+
112
+ ### PyCharm ###
113
+ # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
114
+ # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
115
+
116
+ # User-specific stuff
117
+ .idea/**/workspace.xml
118
+ .idea/**/tasks.xml
119
+ .idea/**/usage.statistics.xml
120
+ .idea/**/dictionaries
121
+ .idea/**/shelf
122
+
123
+ # AWS User-specific
124
+ .idea/**/aws.xml
125
+
126
+ # Generated files
127
+ .idea/**/contentModel.xml
128
+
129
+ # Sensitive or high-churn files
130
+ .idea/**/dataSources/
131
+ .idea/**/dataSources.ids
132
+ .idea/**/dataSources.local.xml
133
+ .idea/**/sqlDataSources.xml
134
+ .idea/**/dynamic.xml
135
+ .idea/**/uiDesigner.xml
136
+ .idea/**/dbnavigator.xml
137
+
138
+ # Gradle
139
+ .idea/**/gradle.xml
140
+ .idea/**/libraries
141
+
142
+ # Gradle and Maven with auto-import
143
+ # When using Gradle or Maven with auto-import, you should exclude module files,
144
+ # since they will be recreated, and may cause churn. Uncomment if using
145
+ # auto-import.
146
+ # .idea/artifacts
147
+ # .idea/compiler.xml
148
+ # .idea/jarRepositories.xml
149
+ # .idea/modules.xml
150
+ # .idea/*.iml
151
+ # .idea/modules
152
+ # *.iml
153
+ # *.ipr
154
+
155
+ # CMake
156
+ cmake-build-*/
157
+
158
+ # Mongo Explorer plugin
159
+ .idea/**/mongoSettings.xml
160
+
161
+ # File-based project format
162
+ *.iws
163
+
164
+ # IntelliJ
165
+ out/
166
+
167
+ # mpeltonen/sbt-idea plugin
168
+ .idea_modules/
169
+
170
+ # JIRA plugin
171
+ atlassian-ide-plugin.xml
172
+
173
+ # Cursive Clojure plugin
174
+ .idea/replstate.xml
175
+
176
+ # SonarLint plugin
177
+ .idea/sonarlint/
178
+
179
+ # Crashlytics plugin (for Android Studio and IntelliJ)
180
+ com_crashlytics_export_strings.xml
181
+ crashlytics.properties
182
+ crashlytics-build.properties
183
+ fabric.properties
184
+
185
+ # Editor-based Rest Client
186
+ .idea/httpRequests
187
+
188
+ # Android studio 3.1+ serialized cache file
189
+ .idea/caches/build_file_checksums.ser
190
+
191
+ ### PyCharm Patch ###
192
+ # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
193
+
194
+ # *.iml
195
+ # modules.xml
196
+ # .idea/misc.xml
197
+ # *.ipr
198
+
199
+ # Sonarlint plugin
200
+ # https://plugins.jetbrains.com/plugin/7973-sonarlint
201
+ .idea/**/sonarlint/
202
+
203
+ # SonarQube Plugin
204
+ # https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
205
+ .idea/**/sonarIssues.xml
206
+
207
+ # Markdown Navigator plugin
208
+ # https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
209
+ .idea/**/markdown-navigator.xml
210
+ .idea/**/markdown-navigator-enh.xml
211
+ .idea/**/markdown-navigator/
212
+
213
+ # Cache file creation bug
214
+ # See https://youtrack.jetbrains.com/issue/JBR-2257
215
+ .idea/$CACHE_FILE$
216
+
217
+ # CodeStream plugin
218
+ # https://plugins.jetbrains.com/plugin/12206-codestream
219
+ .idea/codestream.xml
220
+
221
+ # Azure Toolkit for IntelliJ plugin
222
+ # https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
223
+ .idea/**/azureSettings.xml
224
+
225
+ ### Python ###
226
+ # Byte-compiled / optimized / DLL files
227
+ __pycache__/
228
+ *.py[cod]
229
+ *$py.class
230
+
231
+ # C extensions
232
+
233
+ # Distribution / packaging
234
+ .Python
235
+ build/
236
+ develop-eggs/
237
+ dist/
238
+ downloads/
239
+ eggs/
240
+ .eggs/
241
+ lib/
242
+ lib64/
243
+ parts/
244
+ sdist/
245
+ var/
246
+ wheels/
247
+ share/python-wheels/
248
+ *.egg-info/
249
+ .installed.cfg
250
+ *.egg
251
+ MANIFEST
252
+
253
+ # PyInstaller
254
+ # Usually these files are written by a python script from a template
255
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
256
+ *.manifest
257
+ *.spec
258
+
259
+ # Installer logs
260
+ pip-log.txt
261
+ pip-delete-this-directory.txt
262
+
263
+ # Unit test / coverage reports
264
+ htmlcov/
265
+ .tox/
266
+ .nox/
267
+ .coverage
268
+ .coverage.*
269
+ .cache
270
+ nosetests.xml
271
+ coverage.xml
272
+ *.cover
273
+ *.py,cover
274
+ .hypothesis/
275
+ .pytest_cache/
276
+ cover/
277
+
278
+ # Translations
279
+ *.mo
280
+ *.pot
281
+
282
+ # Django stuff:
283
+ local_settings.py
284
+ db.sqlite3
285
+ db.sqlite3-journal
286
+
287
+ # Flask stuff:
288
+ instance/
289
+ .webassets-cache
290
+
291
+ # Scrapy stuff:
292
+ .scrapy
293
+
294
+ # Sphinx documentation
295
+ docs/_build/
296
+
297
+ # PyBuilder
298
+ .pybuilder/
299
+ target/
300
+
301
+ # Jupyter Notebook
302
+ .ipynb_checkpoints
303
+
304
+ # IPython
305
+ profile_default/
306
+ ipython_config.py
307
+
308
+ # pyenv
309
+ # For a library or package, you might want to ignore these files since the code is
310
+ # intended to run in multiple environments; otherwise, check them in:
311
+ # .python-version
312
+
313
+ # pipenv
314
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
315
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
316
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
317
+ # install all needed dependencies.
318
+ #Pipfile.lock
319
+
320
+ # poetry
321
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
322
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
323
+ # commonly ignored for libraries.
324
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
325
+ #poetry.lock
326
+
327
+ # pdm
328
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
329
+ #pdm.lock
330
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
331
+ # in version control.
332
+ # https://pdm.fming.dev/#use-with-ide
333
+ .pdm.toml
334
+
335
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
336
+ __pypackages__/
337
+
338
+ # Celery stuff
339
+ celerybeat-schedule
340
+ celerybeat.pid
341
+
342
+ # SageMath parsed files
343
+ *.sage.py
344
+
345
+ # Environments
346
+ .env
347
+ .venv
348
+ env/
349
+ venv/
350
+ ENV/
351
+ env.bak/
352
+ venv.bak/
353
+
354
+ # Spyder project settings
355
+ .spyderproject
356
+ .spyproject
357
+
358
+ # Rope project settings
359
+ .ropeproject
360
+
361
+ # mkdocs documentation
362
+ /site
363
+
364
+ # mypy
365
+ .mypy_cache/
366
+ .dmypy.json
367
+ dmypy.json
368
+
369
+ # Pyre type checker
370
+ .pyre/
371
+
372
+ # pytype static type analyzer
373
+ .pytype/
374
+
375
+ # Cython debug symbols
376
+ cython_debug/
377
+
378
+ # PyCharm
379
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
380
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
381
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
382
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
383
+ #.idea/
384
+
385
+ ### Python Patch ###
386
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
387
+ poetry.toml
388
+
389
+ # ruff
390
+ .ruff_cache/
391
+
392
+ # LSP config files
393
+ pyrightconfig.json
394
+
395
+ ### VisualStudioCode ###
396
+ .vscode/*
397
+ !.vscode/settings.json
398
+ !.vscode/tasks.json
399
+ !.vscode/launch.json
400
+ !.vscode/extensions.json
401
+ !.vscode/*.code-snippets
402
+
403
+ # Local History for Visual Studio Code
404
+ .history/
405
+
406
+ # Built Visual Studio Code Extensions
407
+ *.vsix
408
+
409
+ ### VisualStudioCode Patch ###
410
+ # Ignore all local history of files
411
+ .history
412
+ .ionide
413
+
414
+ ### Windows ###
415
+ # Windows thumbnail cache files
416
+ Thumbs.db
417
+ Thumbs.db:encryptable
418
+ ehthumbs.db
419
+ ehthumbs_vista.db
420
+
421
+ # Dump file
422
+ *.stackdump
423
+
424
+ # Folder config file
425
+ [Dd]esktop.ini
426
+
427
+ # Recycle Bin used on file shares
428
+ $RECYCLE.BIN/
429
+
430
+ # Windows Installer files
431
+ *.cab
432
+ *.msi
433
+ *.msix
434
+ *.msm
435
+ *.msp
436
+
437
+ # Windows shortcuts
438
+ *.lnk
439
+
440
+ # End of https://www.toptal.com/developers/gitignore/api/python,java,c++,pycharm,visualstudiocode,macos,linux,windows
441
+
442
+ .*
443
+ flow_modules/
444
+ data/
EvaluatorFlowModule/EvaluatorFlow.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ NOTE: THIS IS A BETA VERSION OF FUNSEARCH. NEW VERSION DOCUMENTATION WILL BE RELEASED SOON."""
2
+
3
+ from aiflows.base_flows import AtomicFlow
4
+ from typing import Dict, Any
5
+ import os
6
+ from aiflows.utils import logging
7
+ import ast
8
+ import signal
9
+ from aiflows.interfaces.key_interface import KeyInterface
10
+ log = logging.get_logger(f"aiflows.{__name__}")
11
+ import threading
12
+ from aiflows.messages import FlowMessage
13
+ class TimeoutException(Exception):
14
+ pass
15
+
16
+ def timeout_handler(signum, frame):
17
+ raise TimeoutException("Execution timed out")
18
+
19
+ class EvaluatorFlow(AtomicFlow):
20
+ """ This class implements an EvaluatorFlow. It is a flow that evaluates a program (python code) using a given evaluator function. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
21
+
22
+ **Configuration Parameters**:
23
+
24
+ - `name` (str): The name of the flow. Default: "EvaluatorFlow"
25
+ - `description` (str): A description of the flow. This description is used to generate the help message of the flow. Default: "A flow that evaluates code on tests"
26
+ - `py_file` (str): The python code containing the evaluation function. No default value. This MUST be passed as a parameter.
27
+ - `function_to_run_name` (str): The name of the function to run (the evaluation function) in the evaluator file. No default value. This MUST be passed as a parameter.
28
+ - `test_inputs` (Dict[str,Any]): A dictionary of test inputs to evaluate the program. Default: {"test1": None, "test2": None}
29
+ - `timeout_seconds` (int): The maximum number of seconds to run the evaluation function before returning an error. Default: 10
30
+ - `run_error_score` (int): The score to return if the evaluation function fails to run. Default: -100
31
+ - `use_test_input_as_key` (bool): Whether to use the test input parameters as the key in the output dictionary. Default: False
32
+
33
+ **Input Interface**:
34
+
35
+ - `artifact` (str): The program/artifact to evaluate.
36
+
37
+ **Output Interface**:
38
+
39
+ - `scores_per_test` (Dict[str, Dict[str, Any]]): A dictionary of scores per test input.
40
+
41
+ **Citation**:
42
+
43
+ @Article{FunSearch2023,
44
+ author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
45
+ journal = {Nature},
46
+ title = {Mathematical discoveries from program search with large language models},
47
+ year = {2023},
48
+ doi = {10.1038/s41586-023-06924-6}
49
+ }
50
+ """
51
+ def __init__(self, **kwargs):
52
+ super().__init__(**kwargs)
53
+
54
+ self.evaluator_py_file = self.flow_config["py_file"]
55
+ self.run_error_score = self.flow_config["run_error_score"]
56
+
57
+ # Create a local namespace for the class
58
+ self.local_namespace = {}
59
+ self.load_functions()
60
+ self.function_to_run_name = self.flow_config["function_to_run_name"]
61
+ assert self.function_to_run_name in self.local_namespace, f"Function {self.function_to_run_name} not found in {self.evaluator_py_file_path}"
62
+ self.function_to_run = self.local_namespace.get(self.function_to_run_name)
63
+
64
+ self.test_inputs = self.flow_config["test_inputs"]
65
+ self.timeout_seconds = self.flow_config["timeout_seconds"]
66
+ self.local_namespace = {}
67
+
68
+
69
+ select_island_id_with_default = lambda data_dict,**kwargs: {**data_dict,**{"island_id": data_dict.get("island_id", None)}}
70
+
71
+ self.output_interface = KeyInterface(
72
+ additional_transformations= [select_island_id_with_default],
73
+ keys_to_select= ["scores_per_test"]
74
+ )
75
+
76
+
77
+ def load_functions(self):
78
+ """ Load the functions from the evaluator py file with ast parsing"""
79
+
80
+ file_content = self.evaluator_py_file
81
+ try:
82
+ # Parse the AST (Abstract Syntax Tree) of the file content
83
+ parsed_ast = ast.parse(file_content)
84
+
85
+ # Iterate over the parsed AST nodes
86
+ for node in parsed_ast.body:
87
+ # Check if the node is an import statement
88
+ if isinstance(node, ast.Import):
89
+ # Execute the import statement in the global namespace
90
+ exec(compile(ast.Module(body=[node],type_ignores=[]), '<ast>', 'exec'), self.local_namespace)
91
+ elif isinstance(node, ast.ImportFrom):
92
+ # Execute the import-from statement in the global namespace
93
+ exec(compile(ast.Module(body=[node],type_ignores=[]), '<ast>', 'exec'), self.local_namespace)
94
+
95
+ # Execute the content of the file in the global namespace
96
+ exec(file_content, self.local_namespace)
97
+ except Exception as e:
98
+ log.error(f"Error functions: {e}")
99
+ raise e
100
+
101
+ def run_function_with_timeout(self, program: str, **kwargs):
102
+ """ Run the evaluation function with a timeout
103
+
104
+ :param program: The program to evaluate
105
+ :type program: str
106
+ :param kwargs: The keyword arguments to pass to the evaluation function
107
+ :type kwargs: Dict[str, Any]
108
+ :return: A tuple (bool, result) where bool is True if the function ran successfully and result is the output of the function
109
+ :rtype: Tuple[bool, Any]
110
+ """
111
+ self.result = None
112
+ self.exception = None
113
+
114
+ # Function to run with a timeout
115
+ def target():
116
+ try:
117
+ result = self.function_to_run(program, **kwargs)
118
+ self.result = result
119
+ except Exception as e:
120
+ self.exception = e
121
+
122
+ # Create a separate thread to run the target function
123
+ thread = threading.Thread(target=target)
124
+ thread.start()
125
+
126
+ # Wait for the specified timeout
127
+ thread.join(self.timeout_seconds)
128
+
129
+ # If thread is still alive, it means the timeout has occurred
130
+ if thread.is_alive():
131
+ # Raise a TimeoutException
132
+ thread.terminate()
133
+ return False, f"Function execution timed out after {self.timeout_seconds} seconds"
134
+
135
+ # If thread has finished execution, check if there was an exception
136
+ if self.exception is not None:
137
+ return False, str(self.exception)
138
+
139
+ # If no exception, return the result
140
+ return True, self.result
141
+
142
+
143
+
144
+ def evaluate_program(self, program: str, **kwargs):
145
+ """ Evaluate the program using the evaluation function
146
+
147
+ :param program: The program to evaluate
148
+ :type program: str
149
+ :param kwargs: The keyword arguments to pass to the evaluation function
150
+ :type kwargs: Dict[str, Any]
151
+ :return: A tuple (bool, result) where bool is True if the function ran successfully and result is the output of the function
152
+ :rtype: Tuple[bool, Any]
153
+ """
154
+ try:
155
+ runs_ok, test_output = self.run_function_with_timeout(program, **kwargs)
156
+
157
+ return runs_ok, test_output
158
+
159
+ except Exception as e:
160
+ log.debug(f"Error defining runnin program: {e} (could be due to syntax error from LLM)")
161
+ return False, e
162
+
163
+
164
+
165
+ def analyse(self, program: str):
166
+ """ Analyse the program on the test inputs
167
+
168
+ :param program: The program to evaluate
169
+ :type program: str
170
+ :return: A dictionary of scores per test input
171
+ :rtype: Dict[str, Dict[str, Any]]
172
+ """
173
+ #Often happens that it returns a codeblock so remove it
174
+ if program.startswith("```python"):
175
+ program = program[9:]
176
+ if program.endswith("```"):
177
+ program = program[:-3]
178
+
179
+ scores_per_test = {}
180
+ for key,test_input in self.test_inputs.items():
181
+
182
+ test_input_key = str(test_input) if self.flow_config["use_test_input_as_key"] else key
183
+
184
+ if test_input is None:
185
+ runs_ok,test_output = self.evaluate_program(program)
186
+ else:
187
+ runs_ok,test_output = self.evaluate_program(program, **test_input) # Run the program
188
+
189
+ if runs_ok and test_output is not None: # and not utils.calls_ancestor(program) (TODO: check what they mean by this in the paper)
190
+ scores_per_test[test_input_key] = {"score": test_output, "feedback": "No feedback available."}
191
+ log.debug(f"Program run successfully for test case {test_input_key} with score: {test_output}")
192
+ else:
193
+ log.debug(f"Error running Program for test case {test_input_key}. Error is : {test_output} (could be due to syntax error from LLM)")
194
+ scores_per_test[test_input_key] = {"score": self.run_error_score, "feedback": str(test_output)}
195
+
196
+ return scores_per_test
197
+
198
+ def run(self, input_message: FlowMessage):
199
+ """ This method runs the flow. It's the main method of the flow.
200
+
201
+ :param input_message: The input message
202
+ :type input_message: FlowMessage
203
+ """
204
+ input_data = input_message.data
205
+
206
+ # Analyse the program
207
+ scores_per_test = self.analyse(input_data["artifact"])
208
+ # Prepare the response
209
+ response = {"scores_per_test": scores_per_test, "from": "EvaluatorFlow"}
210
+
211
+ # Send back the response
212
+ reply = self.package_output_message(
213
+ input_message,
214
+ response
215
+ )
216
+ self.send_message(reply)
EvaluatorFlowModule/EvaluatorFlow.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: flow_modules.aiflows.FunSearchFlowModule.EvaluatorFlow.instantiate_from_default_config
2
+ name: EvaluatorFlow
3
+ description: A flow that evaluates code on tests
4
+
5
+ input_interface:
6
+ - "artifact"
7
+ output_interface:
8
+ - "scores_per_test"
9
+
10
+ py_file: ???
11
+ function_to_run_name: ???
12
+ test_inputs:
13
+ test_1: null
14
+ test_2: null
15
+ timeout_seconds: 10
16
+ run_error_score: -100
17
+ use_test_input_as_key: false
18
+
EvaluatorFlowModule/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ~~~ Specify the dependencies ~~~
2
+ # e.g.,
3
+ # dependencies = [
4
+ # {"url": "aiflows/AutoGPTFlowModule", "revision": "main"},
5
+ # ]
6
+ # Revision can correspond toa branch, commit hash or a absolute path to a local directory (ideal for development)
7
+ # from aiflows import flow_verse
8
+
9
+ # flow_verse.sync_dependencies(dependencies)
10
+
11
+ # ~~~ Import of your flow class (if you have any) ~~~
12
+ # from .NAMEOFYOURFLOW import NAMEOFYOURFLOWCLASS
13
+ from .EvaluatorFlow import EvaluatorFlow
EvaluatorFlowModule/demo.yaml ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: flow_modules.aiflows.FunSearchFlowModule.EvaluatorFlow.instantiate_from_default_config
2
+
3
+ py_file: |2-
4
+ """Problem Description:
5
+ Serval has a string s that only consists of 0 and 1 of length n. The i-th character of s is denoted as s_i, where 1\leq i\leq n.
6
+ Serval can perform the following operation called Inversion Magic on the string s:
7
+ Choose an segment [l, r] (1\leq l\leq r\leq n). For l\leq i\leq r, change s_i into 1 if s_i is 0, and change s_i into 0 if s_i is 1.
8
+ For example, let s be 010100 and the segment [2,5] is chosen. The string s will be 001010 after performing the Inversion Magic.
9
+ Serval wants to make s a palindrome after performing Inversion Magic exactly once. Help him to determine whether it is possible.
10
+ A string is a palindrome iff it reads the same backwards as forwards. For example, 010010 is a palindrome but 10111 is not.
11
+
12
+ Input Description:
13
+ Input
14
+ Each test contains multiple test cases. The first line contains the number of test cases t (1\leq t\leq 10^4). The description of the test cases follows.
15
+ The first line of each test case contains a single integer n (2\leq n\leq 10^5) — the length of string s.
16
+ The second line of each test case contains a binary string s of length n. Only characters 0 and 1 can appear in s.
17
+ It's guaranteed that the sum of n over all test cases does not exceed 2\cdot 10^5.
18
+
19
+ Output Description:
20
+ Output
21
+ For each test case, print Yes if s can be a palindrome after performing Inversion Magic exactly once, and print No if not.
22
+ You can output Yes and No in any case (for example, strings yEs, yes, Yes and YES will be recognized as a positive response).
23
+
24
+ Public Tests:
25
+ Test 1:
26
+ Input: ['1', '4', '1001']
27
+ Output: 'YES'
28
+ Test 2:
29
+ Input: ['1', '5', '10010']
30
+ Output: 'YES'
31
+ Test 3:
32
+ Input: ['1', '7', '0111011']
33
+ Output: 'NO'
34
+
35
+ """
36
+
37
+
38
+ import ast
39
+ import itertools
40
+ import numpy as np
41
+ from typing import List
42
+
43
+ def solve(solve_function: str,input: List[str], expected_output: str) -> str:
44
+ """function used to run the solve function on input *kwargs and return the the predicted output
45
+
46
+ :param solve_function: the function to run (the solve function below as a string)
47
+ :type solve_function: str
48
+ :param kwargs: the inputs to the solve function
49
+ :type kwargs: List[str]
50
+ """
51
+ local_namespace = {}
52
+ exec(solve_function,local_namespace)
53
+ found_name, program_name = get_function_name_from_code(solve_function)
54
+
55
+ if not found_name:
56
+ raise ValueError(f"Function name not found in program: {solve_function}")
57
+
58
+ solve_fn = local_namespace.get(program_name)
59
+
60
+ prediction = solve_fn(input)
61
+
62
+ prediction = prediction.split()
63
+ expected_output = expected_output.split()
64
+
65
+ if len(prediction) != len(expected_output):
66
+ raise ValueError(f"Invalid Format of prediction")
67
+
68
+ for i in range(len(prediction)):
69
+ if prediction[i] != expected_output[i]:
70
+ return False
71
+
72
+ return True
73
+
74
+ def evaluate(solve_function: str, tests_inputs: List[str], expected_outputs: str) -> float:
75
+ """Returns the score of the solve function we're evolving based on the tests_inputs and expected_outputs.
76
+ Scores are between 0 and 1, unless the program fails to run, in which case the score is -1.
77
+ """
78
+ if solve(solve_function,tests_inputs,expected_outputs) == True:
79
+ return 1.0
80
+ return 0.0
81
+
82
+
83
+ def get_function_name_from_code(code):
84
+ tree = ast.parse(code)
85
+ for node in ast.walk(tree):
86
+ if isinstance(node, ast.FunctionDef):
87
+ return True, node.name
88
+
89
+ # something is wrong
90
+ return False, None
91
+
92
+
93
+ function_to_run_name: evaluate
94
+ test_inputs:
95
+
96
+ test_1:
97
+ tests_inputs: ['1', '4', '1001']
98
+ expected_outputs: 'YES'
99
+ test_2:
100
+ tests_inputs: ['1', '5', '10010']
101
+ expected_outputs: 'YES'
102
+ test_3:
103
+ tests_inputs: ['1', '7', '0111011']
104
+ expected_outputs: 'NO'
105
+ test_4:
106
+ tests_inputs: ['3', '4', '1001', '5', '10010', '7', '0111011', '']
107
+ expected_outputs: 'YES\nYES\nNO\n'
108
+
109
+ timeout_seconds: 10
110
+ run_error_score: -1
111
+ use_test_input_as_key: false
EvaluatorFlowModule/pip_requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ astunparse==1.6.3
EvaluatorFlowModule/run.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import hydra
4
+
5
+ import aiflows
6
+ from aiflows.backends.api_info import ApiInfo
7
+ from aiflows.utils.general_helpers import read_yaml_file, quick_load_api_keys
8
+
9
+ from aiflows import logging
10
+ from aiflows.flow_cache import CACHING_PARAMETERS, clear_cache
11
+
12
+ from aiflows.utils import serving
13
+ from aiflows.workers import run_dispatch_worker_thread
14
+ from aiflows.messages import FlowMessage
15
+ from aiflows.interfaces import KeyInterface
16
+ from aiflows.utils.colink_utils import start_colink_server
17
+ from aiflows import flow_verse
18
+
19
+
20
+ dependencies = [
21
+ {
22
+ "url": "aiflows/FunSearchFlowModule",
23
+ "revision": os.path.abspath("../")
24
+ }
25
+ ]
26
+ flow_verse.sync_dependencies(dependencies)
27
+
28
+ logging.set_verbosity_debug()
29
+
30
+
31
+ if __name__ == "__main__":
32
+
33
+ cl = start_colink_server()
34
+
35
+ serving.recursive_serve_flow(
36
+ cl=cl,
37
+ flow_class_name="flow_modules.aiflows.FunSearchFlowModule.EvaluatorFlow",
38
+ flow_endpoint="EvaluatorFlow",
39
+ )
40
+
41
+ run_dispatch_worker_thread(cl)
42
+
43
+ config_overrides = read_yaml_file(os.path.join(".", "demo.yaml"))
44
+
45
+ funsearch_proxy = serving.get_flow_instance(
46
+ cl=cl,
47
+ flow_endpoint="EvaluatorFlow",
48
+ config_overrides=config_overrides,
49
+ )
50
+ data = {
51
+ 'artifact': \
52
+ 'def solve_function(input) -> str:\n """Attempt at solving the problem given the input input and returns the predicted output (see the top of the file for problem description)"""\n return \'YES\'\n'
53
+ }
54
+
55
+ input_message = funsearch_proxy.package_input_message(data = data)
56
+
57
+ funsearch_proxy.send_message(input_message)
58
+
59
+ future = funsearch_proxy.get_reply_future(input_message)
60
+ response = future.get_data()
61
+ print("~~~Response~~~")
62
+ print(response)
FunSearch.py ADDED
@@ -0,0 +1,446 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from aiflows.base_flows import CompositeFlow
2
+ from aiflows.utils import logging
3
+ from aiflows.interfaces import KeyInterface
4
+ from aiflows.messages import FlowMessage
5
+ from typing import Dict, Any
6
+ log = logging.get_logger(f"aiflows.{__name__}")
7
+
8
+ class FunSearch(CompositeFlow):
9
+ """ This class implements FunSearch. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch) . It's a Flow in charge of starting, stopping and managing (passing around messages) the FunSearch process. It passes messages around to the following subflows:
10
+
11
+ - ProgramDBFlow: which is in charge of storing and retrieving programs.
12
+ - SamplerFlow: which is in charge of sampling programs.
13
+ - EvaluatorFlow: which is in charge of evaluating programs.
14
+
15
+ *Configuration Parameters*:
16
+
17
+ - `name` (str): The name of the flow. Default: "FunSearchFlow".
18
+ - `description` (str): The description of the flow. Default: "A flow implementing FunSearch"
19
+ - `subflows_config` (Dict[str,Any]): A dictionary of subflows configurations. Default:
20
+ - `ProgramDBFlow`: By default, it uses the `ProgramDBFlow` class and uses its default parameters.
21
+ - `SamplerFlow`: By default, it uses the `SamplerFlow` class and uses its default parameters.
22
+ - `EvaluatorFlow`: By default, it uses the `EvaluatorFlow` class and uses its default parameters.
23
+
24
+ **Input Interface**:
25
+
26
+ - `from` (str): The flow from which the message is coming from. It can be one of the following: "FunSearch", "SamplerFlow", "EvaluatorFlow", "ProgramDBFlow".
27
+ - `operation` (str): The operation to perform. It can be one of the following: "start", "stop", "get_prompt", "get_best_programs_per_island", "register_program".
28
+ - `content` (Dict[str,Any]): The content associated to an operation. Here is the expected content for each operation:
29
+ - "start":
30
+ - `num_samplers` (int): The number of samplers to start up. Note that it's still restricted by the number of workers available. Default: 1.
31
+ - "stop":
32
+ - No content. Pass either an empty dictionary or None. Works also with no content.
33
+ - "get_prompt":
34
+ - No content. Pass either an empty dictionary or None. Works also with no content.
35
+ - "get_best_programs_per_island":
36
+ - No content. Pass either an empty dictionary or None. Works also with no content.
37
+
38
+ **Output Interface**:
39
+
40
+ - `retrieved` (Dict[str,Any]): The retrieved data.
41
+
42
+ **Citation**:
43
+
44
+ @Article{FunSearch2023,
45
+ author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
46
+ journal = {Nature},
47
+ title = {Mathematical discoveries from program search with large language models},
48
+ year = {2023},
49
+ doi = {10.1038/s41586-023-06924-6}
50
+ }
51
+ """
52
+
53
+ def __init__(self, **kwargs):
54
+ super().__init__(**kwargs)
55
+
56
+ #next state per action
57
+ #this is a dictionary that maps the next state of the flow based on the action and the current state
58
+ self.next_state_per_action = {
59
+ "get_prompt": {
60
+ "FunSearch": "ProgramDBFlow",
61
+ "ProgramDBFlow": "SamplerFlow",
62
+ },
63
+
64
+ "get_best_programs_per_island": {
65
+ "FunSearch": "ProgramDBFlow",
66
+ "ProgramDBFlow": "GenerateReply",
67
+ },
68
+ "register_program": {
69
+ "SamplerFlow": "EvaluatorFlow",
70
+ "EvaluatorFlow": "ProgramDBFlow",
71
+ },
72
+ "start":
73
+ {"FunSearch": "FunSearch"},
74
+ "stop":
75
+ {"FunSearch": "FunSearch"},
76
+ }
77
+
78
+ #key interface to make a request for a prompt
79
+ self.make_request_for_prompt_data = KeyInterface(
80
+ keys_to_set= {"operation": "get_prompt", "content": {}, "from": "FunSearch"},
81
+ keys_to_select= ["operation", "content", "from"]
82
+ )
83
+
84
+ def make_request_for_prompt(self):
85
+ """ This method makes a request for a prompt. It sends a message to itself with the operation "get_prompt" which will trigger the flow to call the `ProgramDBFlow` to get a prompt. """
86
+
87
+ #Prepare data to make request for prompt
88
+ data = self.make_request_for_prompt_data({})
89
+
90
+ #Package message to make request for prompt
91
+ msg = self.package_input_message(
92
+ data=data,
93
+ dst_flow="FunSearch"
94
+ )
95
+ #Send message to itself to start the process of getting a prompt
96
+ self.send_message(
97
+ msg
98
+ )
99
+
100
+ def request_samplers(self,input_message: FlowMessage):
101
+ """ This method requests samplers. It sends a message to itself with the operation "get_prompt" which will trigger the flow to call the `ProgramDBFlow` to get a prompt.
102
+
103
+ :param input_message: The input message that triggered the request for samplers.
104
+ :type input_message: FlowMessage
105
+ """
106
+
107
+ #Get state associated with the message
108
+ message_state = self.pop_message_from_state(input_message.input_message_id)
109
+ #Get number of samplers to request
110
+ num_samplers = message_state["content"].get("num_samplers",1)
111
+ for i in range(num_samplers):
112
+ self.make_request_for_prompt()
113
+
114
+ def get_next_state(self, input_message: FlowMessage):
115
+ """ This method determines the next state of the flow based on the input message. It will return the next state based on the current state and the message received.
116
+
117
+ :param input_message: The input message that triggered the request for the next state.
118
+ :type input_message: FlowMessage
119
+ :return: The next state of the flow.
120
+ :rtype: str
121
+ """
122
+ #Get state associated with the message
123
+ message_state = self.get_message_from_state(input_message.input_message_id)
124
+ message_from = message_state["from"]
125
+ operation = message_state["operation"]
126
+ #Get next state based on the action and the current state
127
+ next_state = self.next_state_per_action[operation][message_from]
128
+ return next_state
129
+
130
+ def set_up_flow_state(self):
131
+ """ This method sets up the state of the flow. It's called at the beginning of the flow."""
132
+ super().set_up_flow_state()
133
+ #Dictonary containing state of message currently being handled by FunSearch
134
+ #Each message has its own state in the flow state
135
+ #Once a message is done being handled, it's removed from the state
136
+ self.flow_state["msg_requests"] = {}
137
+ #Flag to keep track if the first sample has been saved to the db
138
+ self.flow_state["first_sample_saved_to_db"] = False
139
+ #Flag to keep track if FunSearch is running
140
+ self.flow_state["funsearch_running"] = False
141
+
142
+ def save_message_to_state(self,msg_id: str, message: FlowMessage):
143
+ """ This method saves a message to the state of the flow. It's used to keep track of state on a per message basis (i.e., state of the flow depending on the message received and id).
144
+
145
+ :param msg_id: The id of the message to save.
146
+ :type msg_id: str
147
+ :param message: The message to save.
148
+ :type message: FlowMessage
149
+ """
150
+ self.flow_state["msg_requests"][msg_id] = {"og_message": message}
151
+
152
+ def rename_key_message_in_state(self, old_key: str, new_key: str):
153
+ """ This method renames a key in the state of the flow in the "msg_requests" dictonary. It's used to rename a key in the state of the flow (i.e., rename a message id).
154
+
155
+ :param old_key: The old key to rename.
156
+ :type old_key: str
157
+ :param new_key: The new key to rename to.
158
+ :type new_key: str
159
+ """
160
+ self.flow_state["msg_requests"][new_key] = self.flow_state["msg_requests"].pop(old_key)
161
+
162
+ def message_in_state(self,msg_id: str) -> bool:
163
+ """ This method checks if a message is in the state of the flow (in "msg_requests" dictionary). It returns True if the message is in the state, otherwise it returns False.
164
+
165
+ :param msg_id: The id of the message to check if it's in the state.
166
+ :type msg_id: str
167
+ :return: True if the message is in the state, otherwise False.
168
+ :rtype: bool
169
+ """
170
+
171
+ return msg_id in self.flow_state["msg_requests"].keys()
172
+
173
+ def get_message_from_state(self, msg_id: str) -> Dict[str,Any]:
174
+ """ This method returns the state associated with a message id in the state of the flow (in "msg_requests" dictionary).
175
+
176
+ :param msg_id: The id of the message to get the state from.
177
+ :type msg_id: str
178
+ :return: The state associated with the message id.
179
+ :rtype: Dict[str,Any]
180
+ """
181
+ return self.flow_state["msg_requests"][msg_id]
182
+
183
+ def pop_message_from_state(self, msg_id: str) -> Dict[str,Any]:
184
+ """ This method pops a message from the state of the flow (in "msg_requests" dictionary). It the state associate to a message and removes it from the state.
185
+
186
+ :param msg_id: The id of the message to pop from the state.
187
+ :type msg_id: str
188
+ :return: The state associated with the message id.
189
+ :rtype: Dict[str,Any]
190
+ """
191
+ return self.flow_state["msg_requests"].pop(msg_id)
192
+
193
+ def merge_message_request_state(self,id: str, new_states: Dict[str,Any]):
194
+ """ This method merges new states to a message in the state of the flow (in "msg_requests" dictionary). It merges new states to a message in the state.
195
+
196
+ :param id: The id of the message to merge new states to.
197
+ :type id: str
198
+ :param new_states: The new states to merge to the message.
199
+ :type new_states: Dict[str,Any]
200
+ """
201
+ self.flow_state["msg_requests"][id] = {**self.flow_state["msg_requests"][id], **new_states}
202
+
203
+ def register_data_to_state(self, input_message: FlowMessage):
204
+ """This method registers the input message data to the flow state. It's called everytime a new input message is received.
205
+
206
+ :param input_message: The input message
207
+ :type input_message: FlowMessage
208
+ """
209
+
210
+ #Determine Who the message is from (should be either FunSearch, SamplerFlow, EvaluatorFlow, or ProgramDBFlow)
211
+ msg_from = input_message.data.get("from", "FunSearch")
212
+ #Check if this a first request or part of a message that is being handled (it's part of message being handled if message is in the state)
213
+ msg_id = input_message.input_message_id
214
+ msg_in_state = self.message_in_state(msg_id)
215
+
216
+ #If message is not in state, save it to state
217
+ if not msg_in_state:
218
+ self.save_message_to_state(msg_id, input_message)
219
+
220
+ #Get the state associated to the message
221
+ message_state = self.get_message_from_state(msg_id)
222
+
223
+ #Determine what to do based on who the message is from
224
+
225
+ if msg_from == "FunSearch":
226
+ #Calls From FunSearch expect operation and content
227
+ operation = input_message.data["operation"]
228
+ content = input_message.data.get("content",{})
229
+ to_add_to_state = {
230
+ "content": content,
231
+ "operation": operation
232
+ }
233
+ #save operation and content to state
234
+ self.merge_message_request_state(msg_id, to_add_to_state)
235
+
236
+ elif msg_from == "SamplerFlow":
237
+ #Calls From SamplerFlow expect api_output, merge it to state
238
+ to_add_to_state = {
239
+ "content": {
240
+ **message_state.get("content",{}),
241
+ **{"artifact": input_message.data["api_output"]}
242
+ },
243
+ "operation": "register_program"
244
+ }
245
+ self.merge_message_request_state(msg_id, to_add_to_state)
246
+
247
+ elif msg_from == "EvaluatorFlow":
248
+ #Calls From EvaluatorFlow expect scores_per_test, merge it to state
249
+ message_state = self.get_message_from_state(msg_id)
250
+ to_add_to_state = {
251
+ "content": {
252
+ **message_state.get("content",{}),
253
+ **{"scores_per_test": input_message.data["scores_per_test"]}
254
+ }
255
+ }
256
+ self.merge_message_request_state(msg_id, to_add_to_state)
257
+
258
+ elif msg_from == "ProgramDBFlow":
259
+ #Calls From ProgramDBFlow expect retrieved, merge it to state
260
+ to_add_to_state = {
261
+ "retrieved": input_message.data["retrieved"],
262
+ }
263
+
264
+ #if message from ProgramDBFlow is associate to a "get_prompt" operation,
265
+ # save island_id to state
266
+ if message_state["operation"] == "get_prompt":
267
+ island_id = input_message.data["retrieved"]["island_id"]
268
+ to_add_to_state["content"] = {
269
+ **message_state.get("content",{}),
270
+ **{"island_id": island_id}
271
+ }
272
+
273
+ self.merge_message_request_state(msg_id, to_add_to_state)
274
+
275
+ #save from to state
276
+ self.merge_message_request_state(msg_id, {"from": msg_from})
277
+
278
+ def call_program_db(self, input_message):
279
+ """ This method calls the ProgramDBFlow. It sends a message to the ProgramDBFlow with the data of the input message.
280
+
281
+ :param input_message: The input message to send to the ProgramDBFlow.
282
+ :type input_message: FlowMessage
283
+ """
284
+
285
+ #Fetch state associated with the message
286
+ msg_id = input_message.input_message_id
287
+ message_state = self.get_message_from_state(input_message.input_message_id)
288
+
289
+ #Get operation and content from state to send to ProgramDBFlow
290
+ operation = message_state["operation"]
291
+ content = message_state.get("content", {})
292
+
293
+ data = {
294
+ "operation": operation,
295
+ "content": content
296
+ }
297
+ #package message to send to ProgramDBFlow
298
+ msg = self.package_input_message(
299
+ data = data,
300
+ dst_flow = "ProgramDBFlow"
301
+ )
302
+
303
+ #If operation is "register_program",
304
+ # pop message from state (because inital message has been fully handled) and set first_sample_saved_to_db to True
305
+ #Send a message to register program without expecting a reply (no need to wait for a reply, just save to db and move on)
306
+ if data["operation"] == "register_program":
307
+ self.pop_message_from_state(msg_id)
308
+
309
+ self.flow_state["first_sample_saved_to_db"] = True
310
+
311
+ self.subflows["ProgramDBFlow"].send_message(
312
+ msg
313
+ )
314
+
315
+ # If operation is "get_prompt" or "get_best_programs_per_island"
316
+ # rename key in state to new message id (in order to be able to track of the message in state when the reply arrives)
317
+ elif data["operation"] in ["get_prompt","get_best_programs_per_island"]:
318
+ self.rename_key_message_in_state(msg_id, msg.message_id)
319
+ #if no sample has been saved to db, Send input message back to itself (to try again, hopefully this time a sample will be saved to db)
320
+ if not self.flow_state["first_sample_saved_to_db"]:
321
+ #send back to itself message (to try again)
322
+ self.send_message(
323
+ input_message
324
+ )
325
+ #If a sample has been saved to db, send message to ProgramDBFlow to fetch prompt or best programs per island
326
+ else:
327
+ self.subflows["ProgramDBFlow"].get_reply(
328
+ msg
329
+ )
330
+ #If operation is not "register_program", "get_prompt" or "get_best_programs_per_island"
331
+ else:
332
+ log.error("No operation found, input_message received: \n" + str(input_message))
333
+
334
+ def call_evaluator(self, input_message):
335
+ """ This method calls the EvaluatorFlow. It sends a message to the EvaluatorFlow with the data of the input message.
336
+
337
+ :param input_message: The input message to send to the EvaluatorFlow.
338
+ :type input_message: FlowMessage
339
+ """
340
+
341
+ #Fetch state associated with the message
342
+ msg_id = input_message.input_message_id
343
+ message_state = self.get_message_from_state(msg_id)
344
+
345
+ #Get data to send to EvaluatorFlow (artifact generated by Sampler to be evaluated)
346
+ data = {
347
+ "artifact": message_state["content"]["artifact"]
348
+ }
349
+
350
+ msg = self.package_input_message(
351
+ data = data,
352
+ dst_flow = "EvaluatorFlow"
353
+ )
354
+ # rename key in state to new message id (in order to be able to track of the message in state when the reply arrives)
355
+ self.rename_key_message_in_state(msg_id, msg.message_id)
356
+ #Send message to EvaluatorFlow and expect a reply to be sent back to FunSearch's input message queue
357
+ self.subflows["EvaluatorFlow"].get_reply(
358
+ msg
359
+ )
360
+
361
+ def call_sampler(self, input_message):
362
+ """ This method calls the SamplerFlow. It sends a message to the SamplerFlow with the data of the input message.
363
+
364
+ :param input_message: The input message to send to the SamplerFlow.
365
+ :type input_message: FlowMessage
366
+ """
367
+
368
+ #Fetch state associated with the message
369
+ msg_id = input_message.input_message_id
370
+ message_state = self.get_message_from_state(msg_id)
371
+
372
+ #Get data to send to SamplerFlow (prompt to generate a program)
373
+ data = {
374
+ **message_state["retrieved"],
375
+ }
376
+ msg = self.package_input_message(
377
+ data = data,
378
+ dst_flow = "SamplerFlow"
379
+ )
380
+ # rename key in state to new message id (in order to be able to track of the message in state when the reply arrives)
381
+ self.rename_key_message_in_state(msg_id, msg.message_id)
382
+
383
+ #send message to SamplerFlow and expect a reply to be sent back to FunSearch's input message queue
384
+ self.subflows["SamplerFlow"].get_reply(
385
+ msg
386
+ )
387
+ #If FunSearch is running, make a new request for a prompt (to keep the process going)
388
+ if self.flow_state["funsearch_running"]:
389
+ self.make_request_for_prompt()
390
+
391
+
392
+ def generate_reply(self, input_message: FlowMessage):
393
+ """ This method generates a reply to a message sent to user. It packages the output message and sends it.
394
+
395
+ :param input_message: The input message to generate a reply to.
396
+ :type input_message: FlowMessage
397
+ """
398
+
399
+ #Fetch state associated with the message
400
+ msg_id = input_message.input_message_id
401
+ message_state = self.pop_message_from_state(msg_id)
402
+ #Prepare response to send to user (due to a call to get_best_programs_per_island)
403
+ response = {
404
+ "retrieved": message_state["retrieved"]
405
+ }
406
+ reply = self.package_output_message(
407
+ message_state["og_message"],
408
+ response
409
+ )
410
+
411
+ self.send_message(
412
+ reply
413
+ )
414
+
415
+ def run(self,input_message: FlowMessage):
416
+ """ This method runs the flow. It's the main method of the flow. It's called when the flow is executed.
417
+
418
+ :input_message: The input message of the flow
419
+ :type input_message: Message
420
+ """
421
+ self.register_data_to_state(input_message)
422
+
423
+ next_state = self.get_next_state(input_message)
424
+
425
+ if next_state == "ProgramDBFlow":
426
+ self.call_program_db(input_message)
427
+
428
+ elif next_state == "EvaluatorFlow":
429
+ self.call_evaluator(input_message)
430
+
431
+ elif next_state == "SamplerFlow":
432
+ self.call_sampler(input_message)
433
+
434
+ elif next_state == "GenerateReply":
435
+ self.generate_reply(input_message)
436
+
437
+ elif next_state == "FunSearch":
438
+ #If operation is "start", set funsearch_running to True and make a request for a prompt
439
+ if input_message.data["operation"] == "start":
440
+ self.flow_state["funsearch_running"] = True
441
+ self.request_samplers(input_message)
442
+ #If operation is "stop", set funsearch_running to False (will stop the process of generating new samples)
443
+ elif input_message.data["operation"] == "stop":
444
+ self.flow_state["funsearch_running"] = False
445
+ else:
446
+ log.error("No next state found, input_message received: \n" + str(input_message))
FunSearch.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: FunSearchFlow
2
+ description: "A flow implementing FunSearch"
3
+ _target_: flow_modules.aiflows.FunSearchFlowModule.FunSearch.instantiate_from_default_config
4
+ user_id: local
5
+ input_interface:
6
+ - "action"
7
+ - "content"
8
+ - "from"
9
+
10
+ output_interface:
11
+ - "retrieved"
12
+ ### Subflows specification
13
+ subflows_config:
14
+ SamplerFlow:
15
+ flow_class_name: flow_modules.aiflows.FunSearchFlowModule.SamplerFlow
16
+ flow_endpoint: SamplerFlow
17
+ parallel_dispatch: True
18
+ singleton: False
19
+ user_id: local
20
+ name: "Sampler Flow"
21
+ description: "A flow that queries an LLM model to generate prompts"
22
+
23
+ EvaluatorFlow:
24
+ flow_class_name: flow_modules.aiflows.FunSearchFlowModule.EvaluatorFlow
25
+ flow_endpoint: EvaluatorFlow
26
+ user_id: local
27
+ parallel_dispatch: True
28
+ name: "A flow that evaluates code on tests"
29
+ description: "A flow that evaluates code on tests"
30
+
31
+ ProgramDBFlow:
32
+ flow_class_name: flow_modules.aiflows.FunSearchFlowModule.ProgramDBFlow
33
+ flow_endpoint: ProgramDBFlow
34
+ singleton: True
35
+ user_id: local
36
+ name: "ProgramDB"
37
+ description: "A flow that registers samples and evaluations in a database"
Loader.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import os
3
+ import yaml
4
+
5
+ class Loader:
6
+ def __init__(self, file_path, target_name):
7
+ self.py_file_path = file_path
8
+ self.target_name = target_name
9
+
10
+ if not os.path.exists(file_path):
11
+ raise ValueError(f"File {file_path} does not exist")
12
+
13
+ with open(file_path, 'r') as file:
14
+ self.source_code = file.read()
15
+
16
+ def load_target(self):
17
+ if self.py_file_path.endswith('.yaml'):
18
+ return self.load_yaml()
19
+ else:
20
+ return self.load_code()
21
+
22
+ def load_full_file(self):
23
+ return self.source_code
24
+
25
+ def load_code(self):
26
+ # Parse the source code into an abstract syntax tree (AST)
27
+ tree = ast.parse(self.source_code)
28
+
29
+ # Find the target node (FunctionDef, ClassDef, or variable)
30
+ target_node = None
31
+ for node in ast.walk(tree):
32
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)) and node.name == self.target_name:
33
+ target_node = node
34
+ break
35
+ elif isinstance(node, ast.Assign):
36
+ for target in node.targets:
37
+ if isinstance(target, ast.Name) and target.id == self.target_name:
38
+ target_node = node
39
+ break
40
+
41
+ if target_node is not None:
42
+ # Extract the source code of the target
43
+ target_source_code = ast.unparse(target_node)
44
+ return target_source_code
45
+ else:
46
+ raise ValueError(f"Target '{self.target_name}' not found in the module.")
47
+
48
+ def load_yaml(self):
49
+ try:
50
+ with open(self.py_file_path, 'r') as yaml_file:
51
+ yaml_content = yaml.safe_load(yaml_file)
52
+ return yaml_content
53
+ except yaml.YAMLError as e:
54
+ raise ValueError(f"Error loading YAML file: {e}")
55
+
ProgramDBFlowModule/Cluster.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from .artifacts import AbstractArtifact
3
+ from .utils import _softmax
4
+ class Cluster:
5
+ """ An implementation of a Cluster of an island. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
6
+
7
+ **Citation**:
8
+
9
+ @Article{FunSearch2023,
10
+ author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
11
+ journal = {Nature},
12
+ title = {Mathematical discoveries from program search with large language models},
13
+ year = {2023},
14
+ doi = {10.1038/s41586-023-06924-6}
15
+ }
16
+ """
17
+ def __init__(self,score: float,first_program: AbstractArtifact,epsilon=1e-6,sample_with_replacement=False, default_program_temperature=0.1):
18
+ self.score: float = score
19
+ self.programs: list[AbstractArtifact] = [first_program]
20
+ self.lengths = np.array([len(str(first_program))],dtype=np.float32)
21
+ self.epsilon = epsilon
22
+ self.sample_with_replacement = sample_with_replacement
23
+ self.default_program_temperature = default_program_temperature
24
+
25
+
26
+ def compute_length_probs(self,program_temperature: float):
27
+ """ Compute the probability of each program given the length of the program. The probability is computed as the softmax of the negative length of the program. The temperature of the softmax is controlled by the program_temperature parameter.
28
+
29
+ :param program_temperature: The temperature of the softmax
30
+ :type program_temperature: float
31
+ :return: The probability of each program given the length of the program
32
+ :rtype: np.array
33
+ """
34
+ min_length = np.min(self.lengths)
35
+ max_length = np.max(self.lengths)
36
+
37
+
38
+ length_logits = (self.lengths - min_length)/(max_length + self.epsilon)
39
+
40
+ probs = _softmax(-length_logits,program_temperature)
41
+ return probs
42
+
43
+ def register_program(self,program: str):
44
+ """ Register a program on the cluster.
45
+
46
+ :param program: The program to register
47
+ :type program: str
48
+ """
49
+ self.programs.append(program)
50
+ self.lengths = np.append(self.lengths,len(str(program)))
51
+
52
+ def sample_program(self,program_temperature=None):
53
+ """ Sample a program from the cluster given the program temperature.
54
+
55
+ :param program_temperature: The temperature of the program
56
+ :type program_temperature: float, optional
57
+ :return: The sampled program
58
+ :rtype: str
59
+ """
60
+ if program_temperature is None:
61
+ program_temperature = self.default_program_temperature
62
+
63
+
64
+ probs = self.compute_length_probs(program_temperature)
65
+ #sample an index of probs randomly givent the probs
66
+ index = np.random.choice(len(probs),p=probs,replace=self.sample_with_replacement)
67
+
68
+ return self.programs[index]
ProgramDBFlowModule/Island.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Heavily Inpired by https://github.com/google-deepmind/funsearch/tree/main
2
+
3
+
4
+ import numpy as np
5
+ from typing import Callable
6
+ from .Cluster import Cluster
7
+ from .Program import Program
8
+ import ast
9
+ import astunparse
10
+ from typing import Optional,Dict,Any
11
+ from .artifacts import AbstractArtifact
12
+ from collections.abc import Mapping, Sequence
13
+ from copy import deepcopy
14
+ from .Program import ProgramVisitor,Program,text_to_artifact
15
+ import dataclasses
16
+ import scipy
17
+ from .utils import _softmax
18
+ ScoresPerTest = Mapping
19
+
20
+
21
+
22
+ class Island:
23
+ """ An implementation of an Island of the ProgramDB. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
24
+
25
+ **Citation**:
26
+
27
+ @Article{FunSearch2023,
28
+ author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
29
+ journal = {Nature},
30
+ title = {Mathematical discoveries from program search with large language models},
31
+ year = {2023},
32
+ doi = {10.1038/s41586-023-06924-6}
33
+ }
34
+ """
35
+ def __init__(self,
36
+ artifact_to_evolve_name: str,
37
+ artifacts_per_prompt: int,
38
+ temperature: float,
39
+ temperature_period: int,
40
+ template: Program,
41
+ reduce_score_method: Optional[Callable] = np.mean,
42
+ sample_with_replacement: Optional[bool] = False):
43
+
44
+ self.artifact_to_evolve_name:str = artifact_to_evolve_name
45
+ self.artifacts_per_prompt: int = artifacts_per_prompt
46
+
47
+ self.temperature: float = temperature
48
+ self.temperature_period: int = temperature_period
49
+
50
+ self.clusters: Dict[str,Cluster] = {}
51
+ self.template: Program = template
52
+
53
+ self.total_programs_on_island: int = 0
54
+ self.reduce_score_method: Callable = reduce_score_method
55
+ self.sample_with_replacement: bool = sample_with_replacement
56
+
57
+ def register_program(self, program: AbstractArtifact ,scores_per_test: ScoresPerTest):
58
+ """ Register a program on the island.
59
+
60
+ :param program: The program to register
61
+ :type program: AbstractArtifact
62
+ :param scores_per_test: The scores per test of the program
63
+ :type scores_per_test: Dict[str,Any]
64
+ """
65
+ scores_per_test_key = " ".join([str(key) + ":" + str(score) for key,score in scores_per_test.items()])
66
+
67
+ scores_per_test_values = np.array([ score_per_test["score"] for score_per_test in scores_per_test.values()])
68
+
69
+ if scores_per_test_key not in self.clusters:
70
+ score = self.reduce_score_method(scores_per_test_values)
71
+ self.clusters[scores_per_test_key] = Cluster(score = score,first_program=program,sample_with_replacement=self.sample_with_replacement)
72
+
73
+ else:
74
+ self.clusters[scores_per_test_key].register_program(program)
75
+
76
+ self.total_programs_on_island += 1
77
+
78
+ def pick_clusters(self):
79
+ """ Pick the clusters to generate the prompt
80
+
81
+ :return: The clusters and their names
82
+ :rtype: Tuple[List[Cluster],List[str]]
83
+ """
84
+ cluster_keys = list(self.clusters.keys())
85
+ clusters = [self.clusters[key] for key in cluster_keys]
86
+
87
+ cluster_scores = np.array([cluster.score for cluster in clusters])
88
+ cluster_temperature = self.temperature * (1 - (self.total_programs_on_island % self.temperature_period) / self.temperature_period)
89
+ probs = _softmax(cluster_scores, cluster_temperature)
90
+
91
+ #can occur at the begining when there are not many clusters
92
+ functions_per_prompt = min(self.artifacts_per_prompt,len(self.clusters))
93
+ select_cluster_ids = np.random.choice(len(cluster_scores),size=functions_per_prompt,p = probs,replace=self.sample_with_replacement)
94
+
95
+ return [clusters[cluster_id] for cluster_id in select_cluster_ids], [cluster_keys[cluster_id] for cluster_id in select_cluster_ids]
96
+
97
+ def _get_versioned_artifact_name(self,i):
98
+ """ Get the versioned artifact name
99
+
100
+ :param i: The version of the artifact
101
+ :type i: int
102
+ :return: The versioned artifact name
103
+ :rtype: str
104
+ """
105
+ return self.artifact_to_evolve_name + "_v" +str(i)
106
+
107
+ def _generate_prompt(self,implementations: Sequence[AbstractArtifact], chosen_cluster_names: Sequence[str]):
108
+ """ Generate the prompt
109
+
110
+ :param implementations: The implementations
111
+ :type implementations: Sequence[AbstractArtifact]
112
+ :param chosen_cluster_names: The chosen cluster names
113
+ :type chosen_cluster_names: Sequence[str]
114
+ :return: The prompt
115
+ :rtype: str
116
+ """
117
+ implementations = deepcopy(implementations)
118
+
119
+ versioned_artifacts: list[AbstractArtifact] = []
120
+
121
+ for i,implementation in enumerate(implementations):
122
+ new_artifact_name = self._get_versioned_artifact_name(i)
123
+ implementation.name = new_artifact_name
124
+ score_per_test = " ".join(chosen_cluster_names[i].split(" "))
125
+ implementation.docstring = f'Scores per test: {score_per_test}'
126
+ if i >= 1:
127
+ implementation.docstring += f'\nImproved version of {self._get_versioned_artifact_name(i-1)}'
128
+ implementation = implementation.rename_artifact_calls(source_name = self.artifact_to_evolve_name, target_name = new_artifact_name)
129
+ versioned_artifacts.append(text_to_artifact(implementation))
130
+
131
+ #Create the header of the function to be generated by the LLM
132
+ next_version = len(implementations)
133
+ new_artifact_name = self._get_versioned_artifact_name(next_version)
134
+
135
+ docstring = f'Improved version of {self._get_versioned_artifact_name(next_version-1)}'
136
+
137
+ header = dataclasses.replace(
138
+ implementations[-1],
139
+ name=new_artifact_name,
140
+ body='',
141
+ docstring=docstring
142
+ )
143
+
144
+ versioned_artifacts.append(header)
145
+ #rename the call to the target function
146
+ prompt = dataclasses.replace(self.template, artifacts=versioned_artifacts)
147
+ return str(prompt)
148
+
149
+ def get_prompt(self):
150
+ """ Get the prompt
151
+
152
+ :return: The prompt
153
+ :rtype: str
154
+ """
155
+ chosen_clusters, chosen_cluster_names = self.pick_clusters()
156
+
157
+ scores = [cluster.score for cluster in chosen_clusters]
158
+
159
+ indices = np.argsort(scores)
160
+
161
+ sorted_implementations = [chosen_clusters[i].sample_program() for i in indices]
162
+ sorted_cluster_names = [chosen_cluster_names[i] for i in indices]
163
+
164
+ version_generated = len(sorted_implementations)
165
+ return self._generate_prompt(sorted_implementations,sorted_cluster_names),version_generated
166
+
167
+
168
+
ProgramDBFlowModule/Program.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dataclasses
2
+ from typing import Optional
3
+ from .artifacts import AbstractArtifact
4
+ import ast
5
+ from .artifacts import FunctionArtifact
6
+
7
+ @dataclasses.dataclass(frozen=True)
8
+ class Program:
9
+ """A parsed Python program."""
10
+
11
+ # `preface` is everything from the beginning of the code till the first
12
+ # function is found.
13
+ preface: str
14
+ artifacts: list[AbstractArtifact]
15
+
16
+ def __str__(self) -> str:
17
+ program = f'{self.preface}\n' if self.preface else ''
18
+ program += '\n'.join([str(f) for f in self.artifacts])
19
+ return program
20
+
21
+ def find_artifact_index(self, artifact_name: str) -> int:
22
+ """Returns the index of input function name."""
23
+
24
+ artifact_names = [a.name for a in self.artifacts]
25
+ count = artifact_names.count(artifact_name)
26
+ if count == 0:
27
+ raise ValueError(
28
+ f'artifact {artifact_name} does not exist in program:\n{str(self)}'
29
+ )
30
+ if count > 1:
31
+ raise ValueError(
32
+ f'artifact {artifact_name} exists more than once in program:\n'
33
+ f'{str(self)}'
34
+ )
35
+ index = artifact_names.index(artifact_name)
36
+ return index
37
+
38
+ def get_artifact(self, artifact_name: str) -> AbstractArtifact:
39
+ index = self.find_artifact_index(artifact_name)
40
+ return self.artifacts[index]
41
+
42
+ # TODO: Do this for various types of artifacts (only for functions rn)
43
+ class ProgramVisitor(ast.NodeVisitor):
44
+ """Parses code to collect all required information to produce a `Program`.
45
+
46
+ Note that we do not store function decorators.
47
+ """
48
+
49
+ def __init__(self, sourcecode: str):
50
+ self._codelines: list[str] = sourcecode.splitlines()
51
+
52
+ self._preface: str = ''
53
+ self._artifacts: list[AbstractArtifact] = []
54
+ self._current_artifact: Optional[str] = None
55
+
56
+ def visit_FunctionDef(self,
57
+ node: ast.FunctionDef):
58
+ """Collects all information about the function being parsed."""
59
+ if node.col_offset == 0: # We only care about first level functions.
60
+ self._current_function = node.name
61
+ if not self._artifacts:
62
+ self._preface = '\n'.join(self._codelines[:node.lineno - 1])
63
+
64
+ function_end_line = node.end_lineno
65
+ body_start_line = node.body[0].lineno - 1
66
+ # Extract the docstring.
67
+ docstring = None
68
+ if isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value,ast.Str):
69
+ docstring = f' """{ast.literal_eval(ast.unparse(node.body[0]))}"""'
70
+ if len(node.body) > 1:
71
+ body_start_line = node.body[1].lineno - 1
72
+ else:
73
+ body_start_line = function_end_line
74
+
75
+ self._artifacts.append(FunctionArtifact(
76
+ name=node.name,
77
+ args=ast.unparse(node.args),
78
+ return_type=ast.unparse(node.returns) if node.returns else None,
79
+ docstring=docstring,
80
+ body='\n'.join(self._codelines[body_start_line:function_end_line]),
81
+ ))
82
+ self.generic_visit(node)
83
+
84
+ def return_program(self) -> Program:
85
+ return Program(preface=self._preface, artifacts=self._artifacts)
86
+
87
+
88
+ def text_to_program(text: str) -> Program:
89
+ """Returns Program object by parsing input text using Python AST."""
90
+ # We assume that the program is composed of some preface (e.g. imports,
91
+ # classes, assignments, ...) followed by a sequence of functions.
92
+
93
+ #Often happens that it returns a codeblock so remove it
94
+ if text.startswith("```python"):
95
+ text = text[9:]
96
+ if text.endswith("```"):
97
+ text = text[:-3]
98
+
99
+ tree = ast.parse(text)
100
+ visitor = ProgramVisitor(text)
101
+ visitor.visit(tree)
102
+ return visitor.return_program()
103
+
104
+ def text_to_artifact(text: str) -> AbstractArtifact:
105
+ """Returns Function object by parsing input text using Python AST."""
106
+ program = text_to_program(text)
107
+ if len(program.artifacts) != 1:
108
+ raise ValueError(f'Only one artifact expected, got {len(program.artifacts)}'
109
+ f':\n{program.functions}')
110
+ return program.artifacts[0]
ProgramDBFlowModule/ProgramDBFlow.py ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ NOTE: THIS IS A BETA VERSION OF FUNSEARCH. NEW VERSION DOCUMENTATION WILL BE RELEASED SOON."""
2
+
3
+
4
+ from aiflows.base_flows import AtomicFlow
5
+ from .Island import Island,ScoresPerTest
6
+ import numpy as np
7
+ from typing import Callable,Dict,Union, Any,Optional, List
8
+ import time
9
+ from aiflows.utils import logging
10
+ from .artifacts import AbstractArtifact
11
+ from .Program import Program,text_to_artifact
12
+ import ast
13
+ import os
14
+ from aiflows.messages import FlowMessage
15
+ log = logging.get_logger(f"aiflows.{__name__}")
16
+
17
+ class ProgramDBFlow(AtomicFlow):
18
+ """ This class implements a ProgramDBFlow. It's a flow that stores programs and their scores in a database. It can also query the database for the best programs or generate a prompt containing stored programs in order to evolve them with a SamplerFlow. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
19
+
20
+ **Configuration Parameters**:
21
+
22
+ - `name` (str): The name of the flow. Default: "ProgramDBFlow"
23
+ - `description` (str): A description of the flow. This description is used to generate the help message of the flow. Default: " A flow that saves programs in a database of islands"
24
+ - `artifact_to_evolve_name` (str): The name of the artifact/program to evolve. Default: "solve_function"
25
+ - `evaluate_function` (str): The function used to evaluate the program. No Default value. This MUST be passed as a parameter.
26
+ - `evaluate_file_full_content` (str): The full content of the file containing the evaluation function. No Default value. This MUST be passed as a parameter.
27
+ - `num_islands`: The number of islands to use. Default: 3
28
+ - `reset_period`: The period in seconds to reset the islands. Default: 3600
29
+ - `artifacts_per_prompt`: The number of previous artifacts/programs to include in a prompt. Default: 2
30
+ - `temperature`: The temperature of the island. Default: 0.1
31
+ - `temperature_period`: The period in seconds to change the temperature. Default: 30000
32
+ - `sample_with_replacement`: Whether to sample with replacement. Default: False
33
+ - `portion_of_islands_to_reset`: The portion of islands to reset. Default: 0.5
34
+ - `template` (dict): The template to use for a program. Default: {"preface": ""}
35
+
36
+ **Input Interface**:
37
+
38
+ - `operation` (str): The operation to perform. It can be one of the following: ["register_program","get_prompt","get_best_programs_per_island"]
39
+
40
+ **Output Interface**:
41
+
42
+ - `retrieved` (Any): The retrieved data. It can be one of the following:
43
+ - If the operation is "get_prompt", it can be a dictionary with the following keys
44
+ - `code` (str): The code of the prompt
45
+ - `version_generated` (int): The version of the prompt generated
46
+ - `island_id` (int): The id of the island that generated the prompt
47
+ - `header` (str): The header of the prompt
48
+ - If the operation is "register_program", it can be a string with the message "Program registered" or "Program failed to register"
49
+ - If the operation is "get_best_programs_per_island", it can be a dictionary with the following keys:
50
+ - `best_island_programs` (List[Dict[str,Any]]): A list of dictionaries with the following keys:
51
+ - `rank` (int): The rank of the program (1 is the best)
52
+ - `score` (float): The score of the program
53
+ - `program` (str): The program
54
+ - `island_id` (int): The id of the island that generated the program
55
+
56
+ **Citation**:
57
+
58
+ @Article{FunSearch2023,
59
+ author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
60
+ journal = {Nature},
61
+ title = {Mathematical discoveries from program search with large language models},
62
+ year = {2023},
63
+ doi = {10.1038/s41586-023-06924-6}
64
+ }
65
+ """
66
+ def __init__(self,
67
+ **kwargs
68
+ ):
69
+ super().__init__(**kwargs)
70
+
71
+ #Unpack config (for clarity)
72
+ self.artifact_to_evolve_name: str = self.flow_config["artifact_to_evolve_name"]
73
+
74
+ self.artifacts_per_prompt: int = self.flow_config["artifacts_per_prompt"]
75
+ self.temperature: float = self.flow_config["temperature"]
76
+ self.temperature_period: int = self.flow_config["temperature_period"]
77
+ self.reduce_score_method: Callable = np.mean
78
+ self.sample_with_replacement: bool = self.flow_config["sample_with_replacement"]
79
+ self.num_islands: int = self.flow_config["num_islands"]
80
+ self.portion_of_islands_to_reset: float = self.flow_config["portion_of_islands_to_reset"]
81
+ self.reset_period: float = self.flow_config["reset_period"]
82
+
83
+ self.evaluate_function = self.flow_config["evaluate_function"]
84
+ self.evaluate_file_full_content = self.flow_config["evaluate_file_full_content"]
85
+
86
+
87
+
88
+ assert self.portion_of_islands_to_reset <= 1.0 and self.portion_of_islands_to_reset >= 0.0, "portion_of_islands_to_reset must be between 0 and 1"
89
+
90
+ self.islands_to_reset = int(round(self.portion_of_islands_to_reset * self.num_islands)) #round to nearest integer
91
+
92
+
93
+ def set_up_flow_state(self):
94
+ """ This method sets up the state of the flow and clears the previous messages."""
95
+ super().set_up_flow_state()
96
+
97
+ preface = \
98
+ self.flow_config["template"]["preface"] + "\n\n" "#function used to evaluate the program:\n" + self.flow_config["evaluate_function"] + "\n\n"
99
+
100
+ self.template: Program = Program(preface =preface,artifacts = [])
101
+
102
+ # ~~~instantiate isladns~~~
103
+ self.flow_state["islands"] = [
104
+ Island(
105
+ artifact_to_evolve_name =self.flow_config["artifact_to_evolve_name"],
106
+ artifacts_per_prompt = self.flow_config["artifacts_per_prompt"],
107
+ temperature = self.flow_config["temperature"],
108
+ temperature_period = self.flow_config["temperature_period"],
109
+ reduce_score_method = np.mean,
110
+ sample_with_replacement = self.flow_config["sample_with_replacement"],
111
+ template=self.template
112
+ )
113
+ for _ in range(self.flow_config["num_islands"])
114
+ ]
115
+
116
+ self.flow_state["last_reset_time"] = time.time()
117
+ self.flow_state["best_score_per_island"] = [float("-inf") for _ in range(self.flow_config["num_islands"])]
118
+ self.flow_state["best_program_per_island"] = [None for _ in range(self.flow_config["num_islands"])]
119
+ self.flow_state["best_scores_per_test_per_island"] = [None for _ in range(self.flow_config["num_islands"])]
120
+ self.flow_state["first_program_registered"] = False
121
+
122
+ def get_prompt(self):
123
+ """ This method gets a prompt from an island. It returns the code, the version generated and the island id."""
124
+ island_id = np.random.choice(len(self.flow_state["islands"]))
125
+ code, version_generated = self.flow_state["islands"][island_id].get_prompt()
126
+ return code,version_generated,island_id
127
+
128
+ def reset_islands(self):
129
+ """ This method resets the islands. It resets the worst islands and copies the best programs to the worst islands."""
130
+ # gaussian noise to break ties
131
+ sorted_island_ids = np.argsort(
132
+ np.array(self.flow_state["best_score_per_island"]) +
133
+ (np.random.randn(len(self.flow_state["best_score_per_island"])) * 1e-6)
134
+ )
135
+
136
+ reset_island_ids = sorted_island_ids[:self.islands_to_reset]
137
+ keep_island_ids = sorted_island_ids[self.islands_to_reset:]
138
+
139
+ for island_id in reset_island_ids:
140
+ self.flow_state["islands"][island_id] = Island(
141
+ artifact_to_evolve_name =self.artifact_to_evolve_name,
142
+ artifacts_per_prompt = self.artifacts_per_prompt,
143
+ temperature = self.temperature,
144
+ temperature_period = self.temperature_period,
145
+ reduce_score_method = np.mean,
146
+ sample_with_replacement = self.sample_with_replacement,
147
+ template=self.template
148
+ )
149
+
150
+ self.flow_state["best_score_per_island"][island_id] = float("-inf")
151
+ founder_island_id = np.random.choice(keep_island_ids)
152
+ founder = self.flow_state["best_score_per_island"][founder_island_id]
153
+ founder_scores = self.flow_state["best_scores_per_test_per_island"][founder_island_id]
154
+ self._register_program_in_island(program=founder,island_id=island_id,scores_per_test=founder_scores)
155
+
156
+
157
+
158
+ def register_program(self, program: AbstractArtifact ,island_id: int,scores_per_test: ScoresPerTest):
159
+ """ This method registers a program in an island. It also updates the best program if needed.
160
+
161
+ :param program: The program to register
162
+ :type program: AbstractArtifact
163
+ :param island_id: The id of the island to register the program
164
+ :type island_id: int
165
+ :param scores_per_test: The scores per test of the program
166
+ :type scores_per_test: ScoresPerTest
167
+ """
168
+ if not program.calls_ancestor(artifact_to_evolve=self.artifact_to_evolve_name):
169
+ #program added at the beggining, so add to all islands
170
+ if island_id is None:
171
+ for id in range(self.num_islands):
172
+ self._register_program_in_island(program=program,island_id=id,scores_per_test=scores_per_test)
173
+
174
+ else:
175
+ self._register_program_in_island(program=program,island_id=island_id,scores_per_test=scores_per_test)
176
+
177
+ #reset islands if needed
178
+ if time.time() - self.flow_state["last_reset_time"]> self.reset_period:
179
+ self.reset_islands()
180
+ self.flow_state["last_reset_time"] = time.time()
181
+
182
+ def _register_program_in_island(self,program: AbstractArtifact, scores_per_test: ScoresPerTest, island_id: Optional[int] = None):
183
+ """ This method registers a program in an island. It also updates the best program if needed.
184
+
185
+ :param program: The program to register
186
+ :type program: AbstractArtifact
187
+ :param scores_per_test: The scores per test of the program
188
+ :type scores_per_test: ScoresPerTest
189
+ :param island_id: The id of the island to register the program
190
+ :type island_id: Optional[int]
191
+ """
192
+ self.flow_state["islands"][island_id].register_program(program,scores_per_test)
193
+
194
+ scores_per_test_values = np.array([ score_per_test["score"] for score_per_test in scores_per_test.values()])
195
+ score = self.reduce_score_method(scores_per_test_values)
196
+
197
+ if score > self.flow_state["best_score_per_island"][island_id]:
198
+ self.flow_state["best_score_per_island"][island_id] = score
199
+ self.flow_state["best_program_per_island"][island_id] = str(program)
200
+ self.flow_state["best_scores_per_test_per_island"][island_id] = scores_per_test
201
+
202
+ def get_best_programs(self) -> List[Dict[str,Any]]:
203
+ """ This method returns the best programs per island."""
204
+ sorted_island_ids = np.argsort(np.array(self.flow_state["best_score_per_island"]))
205
+ return {
206
+ "best_island_programs": [
207
+ {
208
+ "rank": self.num_islands - rank,
209
+ "score": self.flow_state["best_score_per_island"][island_id],
210
+ "program": self.flow_state["best_program_per_island"][island_id],
211
+ "island_id": int(island_id),
212
+ }
213
+ for rank,island_id in enumerate(sorted_island_ids)
214
+ ]
215
+ }
216
+
217
+
218
+ def run(self,input_message: FlowMessage):
219
+ """ This method runs the flow. It performs the operation requested in the input message."""
220
+ input_data = input_message.data
221
+ operation = input_data["operation"]
222
+ content = input_data["content"]
223
+
224
+ possible_operations = [
225
+ "register_program",
226
+ "get_prompt",
227
+ "get_best_programs_per_island",
228
+ ]
229
+
230
+ if operation not in possible_operations:
231
+ raise ValueError(f"operation must be one of the following: {possible_operations}")
232
+
233
+ response = {}
234
+ if operation == "get_prompt":
235
+ response["retrieved"] = False
236
+
237
+ if not self.flow_state["first_program_registered"]:
238
+ response["retrieved"] = False
239
+
240
+ else:
241
+ code,version_generated,island_id = self.get_prompt()
242
+
243
+ response["retrieved"] = {
244
+ "code":code,
245
+ "version_generated": version_generated,
246
+ "island_id":island_id,
247
+ "header": self.evaluate_file_full_content
248
+ }
249
+
250
+ elif operation == "register_program":
251
+ try:
252
+
253
+ artifact = text_to_artifact(content["artifact"])
254
+ island_id = content.get("island_id",None)
255
+ scores_per_test = content["scores_per_test"]
256
+ if scores_per_test is not None:
257
+ self.register_program(program=artifact,island_id=island_id,scores_per_test=scores_per_test)
258
+ response["retrieved"] = "Program registered"
259
+ self.flow_state["first_program_registered"] = True
260
+ except:
261
+ response["retrieved"] = "Program failed to register"
262
+ else:
263
+ response["retrieved"] = self.get_best_programs()
264
+
265
+ response["from"] = "ProgramDBFlow"
266
+ reply = self.package_output_message(
267
+ input_message,
268
+ response
269
+ )
270
+
271
+ self.send_message(reply)
ProgramDBFlowModule/ProgramDBFlow.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #default configuration of your flow (if needed) goes here
2
+ _target_: flow_modules.aiflows.FunSearchFlowModule.ProgramDBFlow.instantiate_from_default_config
3
+ input_interface:
4
+ - "operation"
5
+ output_interface:
6
+ - "retrieved"
7
+
8
+ name: ProgramDBFlow
9
+ description: A flow that saves programs in a database of islands
10
+ artifact_to_evolve_name: solve_function
11
+ evaluate_function: ???
12
+ evaluate_file_full_content: ???
13
+ num_islands: 3
14
+ reset_period : 14400 #4 hours in seconds
15
+ artifacts_per_prompt: 2
16
+ temperature: 0.1
17
+ temperature_period: 30000
18
+ sample_with_replacement: False
19
+ portion_of_islands_to_reset: 0.5
20
+ template:
21
+ preface: ""
ProgramDBFlowModule/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ~~~ Specify the dependencies ~~~
2
+ # e.g.,
3
+ # dependencies = [
4
+ # {"url": "aiflows/AutoGPTFlowModule", "revision": "main"},
5
+ # ]
6
+ # Revision can correspond toa branch, commit hash or a absolute path to a local directory (ideal for development)
7
+ # from aiflows import flow_verse
8
+
9
+ # flow_verse.sync_dependencies(dependencies)
10
+
11
+ # ~~~ Import of your flow class (if you have any) ~~~
12
+ # from .NAMEOFYOURFLOW import NAMEOFYOURFLOWCLASS
13
+ from .ProgramDBFlow import ProgramDBFlow
ProgramDBFlowModule/artifacts/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .abstract import AbstractArtifact
2
+ from .function import FunctionArtifact
ProgramDBFlowModule/artifacts/abstract.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
3
+
4
+ **Citation**:
5
+
6
+ @Article{FunSearch2023,
7
+ author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
8
+ journal = {Nature},
9
+ title = {Mathematical discoveries from program search with large language models},
10
+ year = {2023},
11
+ doi = {10.1038/s41586-023-06924-6}
12
+ }
13
+ """
14
+ from typing import Any, Optional
15
+ import dataclasses
16
+
17
+
18
+ @dataclasses.dataclass
19
+ class AbstractArtifact:
20
+ """ Abstract class for artifacts."""
21
+
22
+ name: str
23
+ args: str
24
+ body: str
25
+ return_type: Optional[str] = None
26
+ docstring: Optional[str] = None
27
+
28
+ def __str__(self)->str:
29
+ raise NotImplementedError()
30
+
31
+ def __setattr__(self, name: str, value: str) -> None:
32
+ # Ensure there aren't leading & trailing new lines in `body`.
33
+ if name == 'body':
34
+ value = value.strip('\n')
35
+ # Ensure there aren't leading & trailing quotes in `docstring``.
36
+ if name == 'docstring' and value is not None:
37
+ if '"""' in value:
38
+ value = value.strip()
39
+ value = value.replace('"""', '')
40
+ super().__setattr__(name, value)
41
+
42
+ def rename_artifact_calls(self, source_name, target_name) -> str:
43
+ raise NotImplementedError
44
+
45
+ def text_to_artifact(self):
46
+ raise NotImplementedError
47
+
48
+ def calls_ancestor(self,artifact_to_evolve: str) -> bool:
49
+ raise NotImplementedError
ProgramDBFlowModule/artifacts/function.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
3
+
4
+ **Citation**:
5
+
6
+ @Article{FunSearch2023,
7
+ author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
8
+ journal = {Nature},
9
+ title = {Mathematical discoveries from program search with large language models},
10
+ year = {2023},
11
+ doi = {10.1038/s41586-023-06924-6}
12
+ }
13
+ """
14
+
15
+ from . import AbstractArtifact
16
+ import dataclasses
17
+ import tokenize
18
+ import io
19
+ from collections.abc import Iterator, MutableSet, Sequence
20
+
21
+ @dataclasses.dataclass
22
+ class FunctionArtifact(AbstractArtifact):
23
+ def __str__(self) -> str:
24
+
25
+ return_type = f' -> {self.return_type}' if self.return_type else ''
26
+ function = f'def {self.name}({self.args}){return_type}:\n'
27
+
28
+ if self.docstring:
29
+ # self.docstring is already indented on every line except the first one.
30
+ # Here, we assume the indentation is always two spaces.
31
+ new_line = '\n' if self.body else ''
32
+ function += f' """{self.docstring}"""{new_line}'
33
+
34
+ # self.body is already indented.
35
+ function += self.body + '\n\n'
36
+ return function
37
+
38
+ @staticmethod
39
+ def _tokenize(code: str) -> Iterator[tokenize.TokenInfo]:
40
+ """Transforms `code` into Python tokens."""
41
+ code_bytes = code.encode()
42
+ code_io = io.BytesIO(code_bytes)
43
+ return tokenize.tokenize(code_io.readline)
44
+
45
+ @staticmethod
46
+ def _untokenize(tokens: Sequence[tokenize.TokenInfo]) -> str:
47
+ """Transforms a list of Python tokens into code."""
48
+ code_bytes = tokenize.untokenize(tokens)
49
+ return code_bytes.decode()
50
+
51
+ def _get_artifacts_called(self) -> MutableSet[str]:
52
+ """Returns the set of all functions called in function."""
53
+ code = str(self.body)
54
+ return set(token.string for token, is_call in
55
+ self._yield_token_and_is_call(code) if is_call)
56
+
57
+ def calls_ancestor(self,artifact_to_evolve: str) -> bool:
58
+ """Returns whether the generated function is calling an earlier version."""
59
+
60
+ for name in self._get_artifacts_called():
61
+ # In `program` passed into this function the most recently generated
62
+ # function has already been renamed to `function_to_evolve` (wihout the
63
+ # suffix). Therefore any function call starting with `function_to_evolve_v`
64
+ # is a call to an ancestor function.
65
+ if name.startswith(f'{artifact_to_evolve}_v') and not name.startswith(self.name):
66
+ return True
67
+ return False
68
+
69
+
70
+ def _yield_token_and_is_call(cls,code: str) -> Iterator[tuple[tokenize.TokenInfo, bool]]:
71
+ """Yields each token with a bool indicating whether it is a function call."""
72
+
73
+ tokens = cls._tokenize(code)
74
+ prev_token = None
75
+ is_attribute_access = False
76
+ for token in tokens:
77
+ if (prev_token and # If the previous token exists and
78
+ prev_token.type == tokenize.NAME and # it is a Python identifier
79
+ token.type == tokenize.OP and # and the current token is a delimiter
80
+ token.string == "("
81
+ ): # and in particular it is '('.
82
+ yield prev_token, not is_attribute_access
83
+ is_attribute_access = False
84
+ else:
85
+ if prev_token:
86
+ is_attribute_access = (
87
+ prev_token.type == tokenize.OP and prev_token.string == '.'
88
+ )
89
+ yield prev_token, False
90
+
91
+ prev_token = token
92
+ if prev_token:
93
+ yield prev_token, False
94
+
95
+
96
+ def rename_artifact_calls(self, source_name, target_name) -> str:
97
+ implementation = str(self)
98
+
99
+ if source_name not in implementation:
100
+ return implementation
101
+
102
+ modified_tokens = []
103
+ for token, is_call in self._yield_token_and_is_call(implementation):
104
+ if is_call and token.string == source_name:
105
+ # Replace the function name token
106
+ modified_token = tokenize.TokenInfo(
107
+ type=token.type,
108
+ string=target_name,
109
+ start=token.start,
110
+ end=token.end,
111
+ line=token.line,
112
+ )
113
+ modified_tokens.append(modified_token)
114
+ else:
115
+ modified_tokens.append(token)
116
+ return self._untokenize(modified_tokens)
ProgramDBFlowModule/demo.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #default configuration of your flow (if needed) goes here
2
+ _target_: flow_modules.aiflows.FunSearchFlowModule.ProgramDBFlow.instantiate_from_default_config
3
+ input_interface:
4
+ - "operation"
5
+ output_interface:
6
+ - "retrieved"
7
+
8
+ name: ProgramDBFlow
9
+ description: A flow that saves programs in a database of islands
10
+ artifact_to_evolve_name: solve_function
11
+ evaluate_function: |2-
12
+ def evaluate(solve_function: str, tests_inputs: List[str], expected_outputs: str) -> float:\n """Returns the score of the solve function we\'re evolving based on the tests_inputs and expected_outputs.\n Scores are between 0 and 1, unless the program fails to run, in which case the score is -1.\n """\n if solve(solve_function, tests_inputs, expected_outputs) == True:\n return 1.0\n return 0.0
13
+
14
+ evaluate_file_full_content: |2-
15
+ """Problem Description:\nServal has a string s that only consists of 0 and 1 of length n. The i-th character of s is denoted as s_i, where 1\\leq i\\leq n.\nServal can perform the following operation called Inversion Magic on the string s:\nChoose an segment [l, r] (1\\leq l\\leq r\\leq n). For l\\leq i\\leq r, change s_i into 1 if s_i is 0, and change s_i into 0 if s_i is 1.\nFor example, let s be 010100 and the segment [2,5] is chosen. The string s will be 001010 after performing the Inversion Magic.\nServal wants to make s a palindrome after performing Inversion Magic exactly once. Help him to determine whether it is possible.\nA string is a palindrome iff it reads the same backwards as forwards. For example, 010010 is a palindrome but 10111 is not.\n\nInput Description:\nInput\nEach test contains multiple test cases. The first line contains the number of test cases t (1\\leq t\\leq 10^4). The description of the test cases follows.\nThe first line of each test case contains a single integer n (2\\leq n\\leq 10^5) — the length of string s.\nThe second line of each test case contains a binary string s of length n. Only characters 0 and 1 can appear in s.\nIt\'s guaranteed that the sum of n over all test cases does not exceed 2\\cdot 10^5.\n\nOutput Description:\nOutput\nFor each test case, print Yes if s can be a palindrome after performing Inversion Magic exactly once, and print No if not.\nYou can output Yes and No in any case (for example, strings yEs, yes, Yes and YES will be recognized as a positive response).\n\nPublic Tests:\nTest 1:\n Input: [\'1\', \'4\', \'1001\']\n Output: \'YES\'\nTest 2:\n Input: [\'1\', \'5\', \'10010\']\n Output: \'YES\'\nTest 3:\n Input: [\'1\', \'7\', \'0111011\']\n Output: \'NO\'\n\n"""\n\n\nimport ast\nimport itertools\nimport numpy as np\nfrom typing import List\n\ndef solve(solve_function: str,input: List[str], expected_output: str) -> str:\n """function used to run the solve function on input *kwargs and return the the predicted output\n \n :param solve_function: the function to run (the solve function below as a string)\n :type solve_function: str\n :param kwargs: the inputs to the solve function\n :type kwargs: List[str]\n """\n local_namespace = {}\n exec(solve_function,local_namespace)\n found_name, program_name = get_function_name_from_code(solve_function)\n \n if not found_name:\n raise ValueError(f"Function name not found in program: {solve_function}")\n \n solve_fn = local_namespace.get(program_name)\n \n prediction = solve_fn(input)\n \n prediction = prediction.split()\n expected_output = expected_output.split()\n \n if len(prediction) != len(expected_output):\n raise ValueError(f"Invalid Format of prediction")\n \n for i in range(len(prediction)):\n if prediction[i] != expected_output[i]:\n return False\n \n return True\n\ndef evaluate(solve_function: str, tests_inputs: List[str], expected_outputs: str) -> float:\n """Returns the score of the solve function we\'re evolving based on the tests_inputs and expected_outputs.\n Scores are between 0 and 1, unless the program fails to run, in which case the score is -1.\n """\n if solve(solve_function,tests_inputs,expected_outputs) == True:\n return 1.0\n return 0.0\n\n\ndef get_function_name_from_code(code):\n tree = ast.parse(code)\n for node in ast.walk(tree):\n if isinstance(node, ast.FunctionDef):\n return True, node.name\n\n # something is wrong\n return False, None\n\n\n\n\n \n
16
+ num_islands: 3
17
+ reset_period : 14400 #4 hours in seconds
18
+ artifacts_per_prompt: 2
19
+ temperature: 0.1
20
+ temperature_period: 30000
21
+ sample_with_replacement: False
22
+ portion_of_islands_to_reset: 0.5
23
+ template:
24
+ preface: ""
ProgramDBFlowModule/pip_requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ numpy==1.26.2
2
+ SciPy==1.11.4
ProgramDBFlowModule/run.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import hydra
4
+
5
+ import aiflows
6
+ from aiflows.backends.api_info import ApiInfo
7
+ from aiflows.utils.general_helpers import read_yaml_file, quick_load_api_keys
8
+
9
+ from aiflows import logging
10
+ from aiflows.flow_cache import CACHING_PARAMETERS, clear_cache
11
+
12
+ from aiflows.utils import serving
13
+ from aiflows.workers import run_dispatch_worker_thread
14
+ from aiflows.messages import FlowMessage
15
+ from aiflows.interfaces import KeyInterface
16
+ from aiflows.utils.colink_utils import start_colink_server
17
+ from aiflows import flow_verse
18
+
19
+
20
+ dependencies = [
21
+ {
22
+ "url": "aiflows/FunSearchFlowModule",
23
+ "revision": os.path.abspath("../")
24
+ }
25
+ ]
26
+ flow_verse.sync_dependencies(dependencies)
27
+
28
+ logging.set_verbosity_debug()
29
+
30
+
31
+ if __name__ == "__main__":
32
+
33
+ cl = start_colink_server()
34
+
35
+ serving.recursive_serve_flow(
36
+ cl=cl,
37
+ flow_class_name="flow_modules.aiflows.FunSearchFlowModule.ProgramDBFlow",
38
+ flow_endpoint="ProgramDBFlow",
39
+ )
40
+
41
+ run_dispatch_worker_thread(cl)
42
+
43
+ config_overrides = read_yaml_file(os.path.join(".", "demo.yaml"))
44
+
45
+ funsearch_proxy = serving.get_flow_instance(
46
+ cl=cl,
47
+ flow_endpoint="ProgramDBFlow",
48
+ config_overrides=config_overrides,
49
+ )
50
+
51
+ data = {
52
+ 'operation': 'register_program',
53
+ 'content': {
54
+ 'artifact': 'def solve_function(input) -> str:\n """Attempt at solving the problem given the input input and returns the predicted output (see the top of the file for problem description)"""\n return \'YES\'\n',
55
+ 'scores_per_test':
56
+ {
57
+ 'test_1':
58
+ {'score': 1.0, 'feedback': 'No feedback available.'},
59
+ 'test_2':
60
+ {'score': 1.0, 'feedback': 'No feedback available.'},
61
+ 'test_3': {'score': 0.0, 'feedback': 'No feedback available.'},
62
+ 'test_4': {'score': -1, 'feedback': 'Invalid Format of prediction'}
63
+ }
64
+ }
65
+ }
66
+
67
+ input_message = funsearch_proxy.package_input_message(data = data)
68
+ funsearch_proxy.send_message(input_message)
69
+
70
+ data = {'operation': 'get_prompt', 'content': {}}
71
+ input_message = funsearch_proxy.package_input_message(data = data)
72
+
73
+ example_of_prompt = funsearch_proxy.get_reply_future(input_message).get_data()
74
+
75
+ data = {
76
+ "operation": "get_best_programs_per_island",
77
+ "content": {}
78
+ }
79
+
80
+ input_message = funsearch_proxy.package_input_message(data = data)
81
+
82
+ best_pg_per_island = funsearch_proxy.get_reply_future(input_message).get_data()
83
+
84
+ print("~~~Example of Prompt~~~")
85
+ print(example_of_prompt)
86
+
87
+ print("~~~Best Programs Per Island~~~")
88
+ print(best_pg_per_island)
ProgramDBFlowModule/utils.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
2
+
3
+ **Citation**:
4
+
5
+ @Article{FunSearch2023,
6
+ author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
7
+ journal = {Nature},
8
+ title = {Mathematical discoveries from program search with large language models},
9
+ year = {2023},
10
+ doi = {10.1038/s41586-023-06924-6}
11
+ }
12
+ """
13
+
14
+ import numpy as np
15
+ import scipy
16
+ def add_gaussian_noise(array,mean=0,std=1):
17
+ return array + np.random.normal(mean,std,array.shape)
18
+
19
+ def get_versioned_function_name(function_name,i):
20
+ pass
21
+
22
+ def rename_function(implementation,function_name):
23
+ pass
24
+
25
+ def string_concatenation(strings,split_char='\n\n'):
26
+ pass
27
+
28
+ def make_header_like(implementation,function_name):
29
+ pass
30
+
31
+ def rename_function_calls(implementation,function_name):
32
+ pass
33
+
34
+ def extract_template_from_program(program):
35
+ pass
36
+
37
+
38
+ def _softmax(logits: np.ndarray, temperature: float, epsilon = 1e-6) -> np.ndarray:
39
+ """Returns the tempered softmax of 1D finite `logits`."""
40
+ if not np.all(np.isfinite(logits)):
41
+ non_finites = set(logits[~np.isfinite(logits)])
42
+ raise ValueError(f'`logits` contains non-finite value(s): {non_finites}')
43
+ if not np.issubdtype(logits.dtype, np.floating):
44
+ logits = np.array(logits, dtype=np.float32)
45
+
46
+ result = scipy.special.softmax(logits / temperature, axis=-1)
47
+
48
+ #Non zero mass to prevent zero probabilities
49
+ result += epsilon # Add epsilon to prevent zeros
50
+ result /= np.sum(result, axis=-1, keepdims=True) # Normalize
51
+
52
+ # Ensure that probabilities sum to 1 to prevent error in `np.random.choice`.
53
+ index = np.argmax(result)
54
+ result[index] = 1 - np.sum(result[0:index]) - np.sum(result[index+1:])
55
+ return result
README.md CHANGED
@@ -1,3 +1,675 @@
1
  ---
2
  license: mit
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
  ---
4
+ # Table of Contents
5
+
6
+ * [FunSearch](#FunSearch)
7
+ * [FunSearch](#FunSearch.FunSearch)
8
+ * [make\_request\_for\_prompt](#FunSearch.FunSearch.make_request_for_prompt)
9
+ * [request\_samplers](#FunSearch.FunSearch.request_samplers)
10
+ * [get\_next\_state](#FunSearch.FunSearch.get_next_state)
11
+ * [set\_up\_flow\_state](#FunSearch.FunSearch.set_up_flow_state)
12
+ * [save\_message\_to\_state](#FunSearch.FunSearch.save_message_to_state)
13
+ * [rename\_key\_message\_in\_state](#FunSearch.FunSearch.rename_key_message_in_state)
14
+ * [message\_in\_state](#FunSearch.FunSearch.message_in_state)
15
+ * [get\_message\_from\_state](#FunSearch.FunSearch.get_message_from_state)
16
+ * [pop\_message\_from\_state](#FunSearch.FunSearch.pop_message_from_state)
17
+ * [merge\_message\_request\_state](#FunSearch.FunSearch.merge_message_request_state)
18
+ * [register\_data\_to\_state](#FunSearch.FunSearch.register_data_to_state)
19
+ * [call\_program\_db](#FunSearch.FunSearch.call_program_db)
20
+ * [call\_evaluator](#FunSearch.FunSearch.call_evaluator)
21
+ * [call\_sampler](#FunSearch.FunSearch.call_sampler)
22
+ * [generate\_reply](#FunSearch.FunSearch.generate_reply)
23
+ * [run](#FunSearch.FunSearch.run)
24
+ * [ProgramDBFlowModule](#ProgramDBFlowModule)
25
+ * [ProgramDBFlowModule.ProgramDBFlow](#ProgramDBFlowModule.ProgramDBFlow)
26
+ * [ProgramDBFlow](#ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow)
27
+ * [set\_up\_flow\_state](#ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.set_up_flow_state)
28
+ * [get\_prompt](#ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.get_prompt)
29
+ * [reset\_islands](#ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.reset_islands)
30
+ * [register\_program](#ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.register_program)
31
+ * [get\_best\_programs](#ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.get_best_programs)
32
+ * [run](#ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.run)
33
+ * [SamplerFlowModule](#SamplerFlowModule)
34
+ * [SamplerFlowModule.SamplerFlow](#SamplerFlowModule.SamplerFlow)
35
+ * [SamplerFlow](#SamplerFlowModule.SamplerFlow.SamplerFlow)
36
+ * [run](#SamplerFlowModule.SamplerFlow.SamplerFlow.run)
37
+ * [EvaluatorFlowModule](#EvaluatorFlowModule)
38
+ * [EvaluatorFlowModule.EvaluatorFlow](#EvaluatorFlowModule.EvaluatorFlow)
39
+ * [EvaluatorFlow](#EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow)
40
+ * [load\_functions](#EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.load_functions)
41
+ * [run\_function\_with\_timeout](#EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.run_function_with_timeout)
42
+ * [evaluate\_program](#EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.evaluate_program)
43
+ * [analyse](#EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.analyse)
44
+ * [run](#EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.run)
45
+
46
+ <a id="FunSearch"></a>
47
+
48
+ # FunSearch
49
+
50
+ <a id="FunSearch.FunSearch"></a>
51
+
52
+ ## FunSearch Objects
53
+
54
+ ```python
55
+ class FunSearch(CompositeFlow)
56
+ ```
57
+
58
+ This class implements FunSearch. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch) . It's a Flow in charge of starting, stopping and managing (passing around messages) the FunSearch process. It passes messages around to the following subflows:
59
+
60
+ - ProgramDBFlow: which is in charge of storing and retrieving programs.
61
+ - SamplerFlow: which is in charge of sampling programs.
62
+ - EvaluatorFlow: which is in charge of evaluating programs.
63
+
64
+ *Configuration Parameters*:
65
+
66
+ - `name` (str): The name of the flow. Default: "FunSearchFlow".
67
+ - `description` (str): The description of the flow. Default: "A flow implementing FunSearch"
68
+ - `subflows_config` (Dict[str,Any]): A dictionary of subflows configurations. Default:
69
+ - `ProgramDBFlow`: By default, it uses the `ProgramDBFlow` class and uses its default parameters.
70
+ - `SamplerFlow`: By default, it uses the `SamplerFlow` class and uses its default parameters.
71
+ - `EvaluatorFlow`: By default, it uses the `EvaluatorFlow` class and uses its default parameters.
72
+
73
+ **Input Interface**:
74
+
75
+ - `from` (str): The flow from which the message is coming from. It can be one of the following: "FunSearch", "SamplerFlow", "EvaluatorFlow", "ProgramDBFlow".
76
+ - `operation` (str): The operation to perform. It can be one of the following: "start", "stop", "get_prompt", "get_best_programs_per_island", "register_program".
77
+ - `content` (Dict[str,Any]): The content associated to an operation. Here is the expected content for each operation:
78
+ - "start":
79
+ - `num_samplers` (int): The number of samplers to start up. Note that it's still restricted by the number of workers available. Default: 1.
80
+ - "stop":
81
+ - No content. Pass either an empty dictionary or None. Works also with no content.
82
+ - "get_prompt":
83
+ - No content. Pass either an empty dictionary or None. Works also with no content.
84
+ - "get_best_programs_per_island":
85
+ - No content. Pass either an empty dictionary or None. Works also with no content.
86
+
87
+ **Output Interface**:
88
+
89
+ - `retrieved` (Dict[str,Any]): The retrieved data.
90
+
91
+ **Citation**:
92
+
93
+ @Article{FunSearch2023,
94
+ author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
95
+ journal = {Nature},
96
+ title = {Mathematical discoveries from program search with large language models},
97
+ year = {2023},
98
+ doi = {10.1038/s41586-023-06924-6}
99
+ }
100
+
101
+ <a id="FunSearch.FunSearch.make_request_for_prompt"></a>
102
+
103
+ #### make\_request\_for\_prompt
104
+
105
+ ```python
106
+ def make_request_for_prompt()
107
+ ```
108
+
109
+ This method makes a request for a prompt. It sends a message to itself with the operation "get_prompt" which will trigger the flow to call the `ProgramDBFlow` to get a prompt.
110
+
111
+ <a id="FunSearch.FunSearch.request_samplers"></a>
112
+
113
+ #### request\_samplers
114
+
115
+ ```python
116
+ def request_samplers(input_message: FlowMessage)
117
+ ```
118
+
119
+ This method requests samplers. It sends a message to itself with the operation "get_prompt" which will trigger the flow to call the `ProgramDBFlow` to get a prompt.
120
+
121
+ **Arguments**:
122
+
123
+ - `input_message` (`FlowMessage`): The input message that triggered the request for samplers.
124
+
125
+ <a id="FunSearch.FunSearch.get_next_state"></a>
126
+
127
+ #### get\_next\_state
128
+
129
+ ```python
130
+ def get_next_state(input_message: FlowMessage)
131
+ ```
132
+
133
+ This method determines the next state of the flow based on the input message. It will return the next state based on the current state and the message received.
134
+
135
+ **Arguments**:
136
+
137
+ - `input_message` (`FlowMessage`): The input message that triggered the request for the next state.
138
+
139
+ **Returns**:
140
+
141
+ `str`: The next state of the flow.
142
+
143
+ <a id="FunSearch.FunSearch.set_up_flow_state"></a>
144
+
145
+ #### set\_up\_flow\_state
146
+
147
+ ```python
148
+ def set_up_flow_state()
149
+ ```
150
+
151
+ This method sets up the state of the flow. It's called at the beginning of the flow.
152
+
153
+ <a id="FunSearch.FunSearch.save_message_to_state"></a>
154
+
155
+ #### save\_message\_to\_state
156
+
157
+ ```python
158
+ def save_message_to_state(msg_id: str, message: FlowMessage)
159
+ ```
160
+
161
+ This method saves a message to the state of the flow. It's used to keep track of state on a per message basis (i.e., state of the flow depending on the message received and id).
162
+
163
+ **Arguments**:
164
+
165
+ - `msg_id` (`str`): The id of the message to save.
166
+ - `message` (`FlowMessage`): The message to save.
167
+
168
+ <a id="FunSearch.FunSearch.rename_key_message_in_state"></a>
169
+
170
+ #### rename\_key\_message\_in\_state
171
+
172
+ ```python
173
+ def rename_key_message_in_state(old_key: str, new_key: str)
174
+ ```
175
+
176
+ This method renames a key in the state of the flow in the "msg_requests" dictonary. It's used to rename a key in the state of the flow (i.e., rename a message id).
177
+
178
+ **Arguments**:
179
+
180
+ - `old_key` (`str`): The old key to rename.
181
+ - `new_key` (`str`): The new key to rename to.
182
+
183
+ <a id="FunSearch.FunSearch.message_in_state"></a>
184
+
185
+ #### message\_in\_state
186
+
187
+ ```python
188
+ def message_in_state(msg_id: str) -> bool
189
+ ```
190
+
191
+ This method checks if a message is in the state of the flow (in "msg_requests" dictionary). It returns True if the message is in the state, otherwise it returns False.
192
+
193
+ **Arguments**:
194
+
195
+ - `msg_id` (`str`): The id of the message to check if it's in the state.
196
+
197
+ **Returns**:
198
+
199
+ `bool`: True if the message is in the state, otherwise False.
200
+
201
+ <a id="FunSearch.FunSearch.get_message_from_state"></a>
202
+
203
+ #### get\_message\_from\_state
204
+
205
+ ```python
206
+ def get_message_from_state(msg_id: str) -> Dict[str, Any]
207
+ ```
208
+
209
+ This method returns the state associated with a message id in the state of the flow (in "msg_requests" dictionary).
210
+
211
+ **Arguments**:
212
+
213
+ - `msg_id` (`str`): The id of the message to get the state from.
214
+
215
+ **Returns**:
216
+
217
+ `Dict[str,Any]`: The state associated with the message id.
218
+
219
+ <a id="FunSearch.FunSearch.pop_message_from_state"></a>
220
+
221
+ #### pop\_message\_from\_state
222
+
223
+ ```python
224
+ def pop_message_from_state(msg_id: str) -> Dict[str, Any]
225
+ ```
226
+
227
+ This method pops a message from the state of the flow (in "msg_requests" dictionary). It the state associate to a message and removes it from the state.
228
+
229
+ **Arguments**:
230
+
231
+ - `msg_id` (`str`): The id of the message to pop from the state.
232
+
233
+ **Returns**:
234
+
235
+ `Dict[str,Any]`: The state associated with the message id.
236
+
237
+ <a id="FunSearch.FunSearch.merge_message_request_state"></a>
238
+
239
+ #### merge\_message\_request\_state
240
+
241
+ ```python
242
+ def merge_message_request_state(id: str, new_states: Dict[str, Any])
243
+ ```
244
+
245
+ This method merges new states to a message in the state of the flow (in "msg_requests" dictionary). It merges new states to a message in the state.
246
+
247
+ **Arguments**:
248
+
249
+ - `id` (`str`): The id of the message to merge new states to.
250
+ - `new_states` (`Dict[str,Any]`): The new states to merge to the message.
251
+
252
+ <a id="FunSearch.FunSearch.register_data_to_state"></a>
253
+
254
+ #### register\_data\_to\_state
255
+
256
+ ```python
257
+ def register_data_to_state(input_message: FlowMessage)
258
+ ```
259
+
260
+ This method registers the input message data to the flow state. It's called everytime a new input message is received.
261
+
262
+ **Arguments**:
263
+
264
+ - `input_message` (`FlowMessage`): The input message
265
+
266
+ <a id="FunSearch.FunSearch.call_program_db"></a>
267
+
268
+ #### call\_program\_db
269
+
270
+ ```python
271
+ def call_program_db(input_message)
272
+ ```
273
+
274
+ This method calls the ProgramDBFlow. It sends a message to the ProgramDBFlow with the data of the input message.
275
+
276
+ **Arguments**:
277
+
278
+ - `input_message` (`FlowMessage`): The input message to send to the ProgramDBFlow.
279
+
280
+ <a id="FunSearch.FunSearch.call_evaluator"></a>
281
+
282
+ #### call\_evaluator
283
+
284
+ ```python
285
+ def call_evaluator(input_message)
286
+ ```
287
+
288
+ This method calls the EvaluatorFlow. It sends a message to the EvaluatorFlow with the data of the input message.
289
+
290
+ **Arguments**:
291
+
292
+ - `input_message` (`FlowMessage`): The input message to send to the EvaluatorFlow.
293
+
294
+ <a id="FunSearch.FunSearch.call_sampler"></a>
295
+
296
+ #### call\_sampler
297
+
298
+ ```python
299
+ def call_sampler(input_message)
300
+ ```
301
+
302
+ This method calls the SamplerFlow. It sends a message to the SamplerFlow with the data of the input message.
303
+
304
+ **Arguments**:
305
+
306
+ - `input_message` (`FlowMessage`): The input message to send to the SamplerFlow.
307
+
308
+ <a id="FunSearch.FunSearch.generate_reply"></a>
309
+
310
+ #### generate\_reply
311
+
312
+ ```python
313
+ def generate_reply(input_message: FlowMessage)
314
+ ```
315
+
316
+ This method generates a reply to a message sent to user. It packages the output message and sends it.
317
+
318
+ **Arguments**:
319
+
320
+ - `input_message` (`FlowMessage`): The input message to generate a reply to.
321
+
322
+ <a id="FunSearch.FunSearch.run"></a>
323
+
324
+ #### run
325
+
326
+ ```python
327
+ def run(input_message: FlowMessage)
328
+ ```
329
+
330
+ This method runs the flow. It's the main method of the flow. It's called when the flow is executed.
331
+
332
+ <a id="ProgramDBFlowModule"></a>
333
+
334
+ # ProgramDBFlowModule
335
+
336
+ <a id="ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow"></a>
337
+
338
+ ## ProgramDBFlow Objects
339
+
340
+ ```python
341
+ class ProgramDBFlow(AtomicFlow)
342
+ ```
343
+
344
+ This class implements a ProgramDBFlow. It's a flow that stores programs and their scores in a database. It can also query the database for the best programs or generate a prompt containing stored programs in order to evolve them with a SamplerFlow. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
345
+
346
+ **Configuration Parameters**:
347
+
348
+ - `name` (str): The name of the flow. Default: "ProgramDBFlow"
349
+ - `description` (str): A description of the flow. This description is used to generate the help message of the flow. Default: " A flow that saves programs in a database of islands"
350
+ - `artifact_to_evolve_name` (str): The name of the artifact/program to evolve. Default: "solve_function"
351
+ - `evaluate_function` (str): The function used to evaluate the program. No Default value. This MUST be passed as a parameter.
352
+ - `evaluate_file_full_content` (str): The full content of the file containing the evaluation function. No Default value. This MUST be passed as a parameter.
353
+ - `num_islands`: The number of islands to use. Default: 3
354
+ - `reset_period`: The period in seconds to reset the islands. Default: 3600
355
+ - `artifacts_per_prompt`: The number of previous artifacts/programs to include in a prompt. Default: 2
356
+ - `temperature`: The temperature of the island. Default: 0.1
357
+ - `temperature_period`: The period in seconds to change the temperature. Default: 30000
358
+ - `sample_with_replacement`: Whether to sample with replacement. Default: False
359
+ - `portion_of_islands_to_reset`: The portion of islands to reset. Default: 0.5
360
+ - `template` (dict): The template to use for a program. Default: {"preface": ""}
361
+
362
+ **Input Interface**:
363
+
364
+ - `operation` (str): The operation to perform. It can be one of the following: ["register_program","get_prompt","get_best_programs_per_island"]
365
+
366
+ **Output Interface**:
367
+
368
+ - `retrieved` (Any): The retrieved data. It can be one of the following:
369
+ - If the operation is "get_prompt", it can be a dictionary with the following keys
370
+ - `code` (str): The code of the prompt
371
+ - `version_generated` (int): The version of the prompt generated
372
+ - `island_id` (int): The id of the island that generated the prompt
373
+ - `header` (str): The header of the prompt
374
+ - If the operation is "register_program", it can be a string with the message "Program registered" or "Program failed to register"
375
+ - If the operation is "get_best_programs_per_island", it can be a dictionary with the following keys:
376
+ - `best_island_programs` (List[Dict[str,Any]]): A list of dictionaries with the following keys:
377
+ - `rank` (int): The rank of the program (1 is the best)
378
+ - `score` (float): The score of the program
379
+ - `program` (str): The program
380
+ - `island_id` (int): The id of the island that generated the program
381
+
382
+ **Citation**:
383
+
384
+ @Article{FunSearch2023,
385
+ author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
386
+ journal = {Nature},
387
+ title = {Mathematical discoveries from program search with large language models},
388
+ year = {2023},
389
+ doi = {10.1038/s41586-023-06924-6}
390
+ }
391
+
392
+ <a id="ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.set_up_flow_state"></a>
393
+
394
+ #### set\_up\_flow\_state
395
+
396
+ ```python
397
+ def set_up_flow_state()
398
+ ```
399
+
400
+ This method sets up the state of the flow and clears the previous messages.
401
+
402
+ <a id="ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.get_prompt"></a>
403
+
404
+ #### get\_prompt
405
+
406
+ ```python
407
+ def get_prompt()
408
+ ```
409
+
410
+ This method gets a prompt from an island. It returns the code, the version generated and the island id.
411
+
412
+ <a id="ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.reset_islands"></a>
413
+
414
+ #### reset\_islands
415
+
416
+ ```python
417
+ def reset_islands()
418
+ ```
419
+
420
+ This method resets the islands. It resets the worst islands and copies the best programs to the worst islands.
421
+
422
+ <a id="ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.register_program"></a>
423
+
424
+ #### register\_program
425
+
426
+ ```python
427
+ def register_program(program: AbstractArtifact, island_id: int,
428
+ scores_per_test: ScoresPerTest)
429
+ ```
430
+
431
+ This method registers a program in an island. It also updates the best program if needed.
432
+
433
+ **Arguments**:
434
+
435
+ - `program` (`AbstractArtifact`): The program to register
436
+ - `island_id` (`int`): The id of the island to register the program
437
+ - `scores_per_test` (`ScoresPerTest`): The scores per test of the program
438
+
439
+ <a id="ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.get_best_programs"></a>
440
+
441
+ #### get\_best\_programs
442
+
443
+ ```python
444
+ def get_best_programs() -> List[Dict[str, Any]]
445
+ ```
446
+
447
+ This method returns the best programs per island.
448
+
449
+ <a id="ProgramDBFlowModule.ProgramDBFlow.ProgramDBFlow.run"></a>
450
+
451
+ #### run
452
+
453
+ ```python
454
+ def run(input_message: FlowMessage)
455
+ ```
456
+
457
+ This method runs the flow. It performs the operation requested in the input message.
458
+
459
+ <a id="SamplerFlowModule"></a>
460
+
461
+ # SamplerFlowModule
462
+
463
+ <a id="SamplerFlowModule.SamplerFlow"></a>
464
+
465
+ # SamplerFlowModule.SamplerFlow
466
+
467
+ <a id="SamplerFlowModule.SamplerFlow.SamplerFlow"></a>
468
+
469
+ ## SamplerFlow Objects
470
+
471
+ ```python
472
+ class SamplerFlow(ChatAtomicFlow)
473
+ ```
474
+
475
+ This class implements a SamplerFlow. It is a flow that queries a LLM to generate a response to a given input. This class is a child of ChatAtomicFlow.
476
+ and expects the same parameters as ChatAtomicFlow (see https://huggingface.co/aiflows/ChatFlowModule).
477
+
478
+ **Configuration Parameters**:
479
+ - `name` (str): The name of the flow. Default: "SamplerFlowModule"
480
+ - `description` (str): A description of the flow. Default: "A flow that queries an LLM model to generate prompts for the Sampler flow"
481
+ - `backend` Dict[str,Any]: The backend of the flow. Used to call models via an API.
482
+ See litellm's supported models and APIs here: https://docs.litellm.ai/docs/providers.
483
+ The default parameters of the backend are all defined at aiflows.backends.llm_lite.LiteLLMBackend
484
+ (also see the defaul parameters of litellm's completion parameters: https://docs.litellm.ai/docs/completion/input#input-params-1).
485
+ Except for the following parameters who are overwritten by the ChatAtomicFlow in ChatAtomicFlow.yaml:
486
+ - `model_name` (Union[Dict[str,str],str]): The name of the model to use. Default: "gpt-4"
487
+ When using multiple API providers, the model_name can be a dictionary of the form
488
+ {"provider_name": "model_name"}. E.g. {"openai": "gpt-3.5-turbo", "azure": "azure/gpt-3.5-turbo"}
489
+ Default: "gpt-3.5-turbo" (the name needs to follow the name of the model in litellm https://docs.litellm.ai/docs/providers).
490
+ - `n` (int) : The number of answers to generate. Default: 1
491
+ - `max_tokens` (int): The maximum number of tokens to generate. Default: 2000
492
+ - `temperature` float: The temperature of the generation. Default: 1.0
493
+ - `top_p` float: An alternative to sampling with temperature. It instructs the model to consider the results of
494
+ the tokens with top_p probability. Default: 0.4
495
+ - `frequency_penalty` (number): It is used to penalize new tokens based on their frequency in the text so far. Default: 0.0
496
+ - `presence_penalty` (number): It is used to penalize new tokens based on their existence in the text so far. Default: 0.0
497
+ - `stream` (bool): Whether to stream the response or not. Default: false
498
+ - `system_message_prompt_template` (Dict[str,Any]): The template of the system message. It is used to generate the system message. Default: See SamplerFlow.yaml for default.
499
+ - `init_human_message_prompt_template` (Dict[str,Any]): The prompt template of the human/user message used to initialize the conversation
500
+ (first time in). It is used to generate the human message. It's passed as the user message to the LLM. Default: See SamplerFlow.yaml for default.
501
+ - `human_message_prompt_template` (Dict[str,Any]): The prompt template of the human/user message (message used everytime the except the first time in).
502
+ It's passed as the user message to the LLM. Default: See SamplerFlow.yaml for default.
503
+ - `previous_messages` (Dict[str,Any]): Defines which previous messages to include in the input of the LLM. Note that if `first_k`and `last_k` are both none,
504
+ all the messages of the flows's history are added to the input of the LLM. Default:
505
+ - `first_k` (int): If defined, adds the first_k earliest messages of the flow's chat history to the input of the LLM. Default: 1
506
+ - `last_k` (int): If defined, adds the last_k latest messages of the flow's chat history to the input of the LLM. Default: 1
507
+
508
+ *Input Interface Initialized (Expected input the first time in flow)*:
509
+
510
+ - `header` (str): A header message to include in prompt
511
+ - `code` (str): The "example" samples to generate our new sample from.
512
+
513
+ *Input Interface (Expected input the after the first time in flow)*:
514
+
515
+ - `header` (str): A header message to include in prompt
516
+ - `code` (str): The "example" samples to generate our new sample from.
517
+
518
+ *Output Interface*:
519
+
520
+ - `api_output` (str): The output of the API call. It is the response of the LLM to the input.
521
+ - `from` (str): The name of the flow that generated the output. It's always "SamplerFlow"
522
+
523
+
524
+ **Citation**:
525
+
526
+ @Article{FunSearch2023,
527
+ author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
528
+ journal = {Nature},
529
+ title = {Mathematical discoveries from program search with large language models},
530
+ year = {2023},
531
+ doi = {10.1038/s41586-023-06924-6}
532
+ }
533
+
534
+ <a id="SamplerFlowModule.SamplerFlow.SamplerFlow.run"></a>
535
+
536
+ #### run
537
+
538
+ ```python
539
+ def run(input_message)
540
+ ```
541
+
542
+ This method calls the backend of the flow (so queries the LLM). It calls the backend with the previous messages of the flow.
543
+
544
+ **Returns**:
545
+
546
+ `Any`: The output of the backend.
547
+
548
+ <a id="EvaluatorFlowModule"></a>
549
+
550
+ # EvaluatorFlowModule
551
+
552
+ <a id="EvaluatorFlowModule.EvaluatorFlow"></a>
553
+
554
+ # EvaluatorFlowModule.EvaluatorFlow
555
+
556
+
557
+ <a id="EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow"></a>
558
+
559
+ ## EvaluatorFlow Objects
560
+
561
+ ```python
562
+ class EvaluatorFlow(AtomicFlow)
563
+ ```
564
+
565
+ This class implements an EvaluatorFlow. It is a flow that evaluates a program (python code) using a given evaluator function. This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)
566
+
567
+ **Configuration Parameters**:
568
+
569
+ - `name` (str): The name of the flow. Default: "EvaluatorFlow"
570
+ - `description` (str): A description of the flow. This description is used to generate the help message of the flow. Default: "A flow that evaluates code on tests"
571
+ - `py_file` (str): The python code containing the evaluation function. No default value. This MUST be passed as a parameter.
572
+ - `function_to_run_name` (str): The name of the function to run (the evaluation function) in the evaluator file. No default value. This MUST be passed as a parameter.
573
+ - `test_inputs` (Dict[str,Any]): A dictionary of test inputs to evaluate the program. Default: {"test1": None, "test2": None}
574
+ - `timeout_seconds` (int): The maximum number of seconds to run the evaluation function before returning an error. Default: 10
575
+ - `run_error_score` (int): The score to return if the evaluation function fails to run. Default: -100
576
+ - `use_test_input_as_key` (bool): Whether to use the test input parameters as the key in the output dictionary. Default: False
577
+
578
+ **Input Interface**:
579
+
580
+ - `artifact` (str): The program/artifact to evaluate.
581
+
582
+ **Output Interface**:
583
+
584
+ - `scores_per_test` (Dict[str, Dict[str, Any]]): A dictionary of scores per test input.
585
+
586
+ **Citation**:
587
+
588
+ @Article{FunSearch2023,
589
+ author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
590
+ journal = {Nature},
591
+ title = {Mathematical discoveries from program search with large language models},
592
+ year = {2023},
593
+ doi = {10.1038/s41586-023-06924-6}
594
+ }
595
+
596
+ <a id="EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.load_functions"></a>
597
+
598
+ #### load\_functions
599
+
600
+ ```python
601
+ def load_functions()
602
+ ```
603
+
604
+ Load the functions from the evaluator py file with ast parsing
605
+
606
+ <a id="EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.run_function_with_timeout"></a>
607
+
608
+ #### run\_function\_with\_timeout
609
+
610
+ ```python
611
+ def run_function_with_timeout(program: str, **kwargs)
612
+ ```
613
+
614
+ Run the evaluation function with a timeout
615
+
616
+ **Arguments**:
617
+
618
+ - `program` (`str`): The program to evaluate
619
+ - `kwargs` (`Dict[str, Any]`): The keyword arguments to pass to the evaluation function
620
+
621
+ **Returns**:
622
+
623
+ `Tuple[bool, Any]`: A tuple (bool, result) where bool is True if the function ran successfully and result is the output of the function
624
+
625
+ <a id="EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.evaluate_program"></a>
626
+
627
+ #### evaluate\_program
628
+
629
+ ```python
630
+ def evaluate_program(program: str, **kwargs)
631
+ ```
632
+
633
+ Evaluate the program using the evaluation function
634
+
635
+ **Arguments**:
636
+
637
+ - `program` (`str`): The program to evaluate
638
+ - `kwargs` (`Dict[str, Any]`): The keyword arguments to pass to the evaluation function
639
+
640
+ **Returns**:
641
+
642
+ `Tuple[bool, Any]`: A tuple (bool, result) where bool is True if the function ran successfully and result is the output of the function
643
+
644
+ <a id="EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.analyse"></a>
645
+
646
+ #### analyse
647
+
648
+ ```python
649
+ def analyse(program: str)
650
+ ```
651
+
652
+ Analyse the program on the test inputs
653
+
654
+ **Arguments**:
655
+
656
+ - `program` (`str`): The program to evaluate
657
+
658
+ **Returns**:
659
+
660
+ `Dict[str, Dict[str, Any]]`: A dictionary of scores per test input
661
+
662
+ <a id="EvaluatorFlowModule.EvaluatorFlow.EvaluatorFlow.run"></a>
663
+
664
+ #### run
665
+
666
+ ```python
667
+ def run(input_message: FlowMessage)
668
+ ```
669
+
670
+ This method runs the flow. It's the main method of the flow.
671
+
672
+ **Arguments**:
673
+
674
+ - `input_message` (`FlowMessage`): The input message
675
+
SamplerFlowModule/SamplerFlow.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ NOTE: THIS IS A BETA VERSION OF FUNSEARCH. NEW VERSION DOCUMENTATION WILL BE RELEASED SOON."""
2
+
3
+ from typing import Dict, Any, List
4
+ from aiflows.interfaces import KeyInterface
5
+ from aiflows.utils import logging
6
+ from flow_modules.aiflows.ChatFlowModule import ChatAtomicFlow
7
+
8
+ log = logging.get_logger(f"aiflows.{__name__}")
9
+
10
+
11
+ class SamplerFlow(ChatAtomicFlow):
12
+ """ This class implements a SamplerFlow. It is a flow that queries a LLM to generate a response to a given input. This class is a child of ChatAtomicFlow.
13
+ and expects the same parameters as ChatAtomicFlow (see https://huggingface.co/aiflows/ChatFlowModule).
14
+
15
+ **Configuration Parameters**:
16
+ - `name` (str): The name of the flow. Default: "SamplerFlowModule"
17
+ - `description` (str): A description of the flow. Default: "A flow that queries an LLM model to generate prompts for the Sampler flow"
18
+ - `backend` Dict[str,Any]: The backend of the flow. Used to call models via an API.
19
+ See litellm's supported models and APIs here: https://docs.litellm.ai/docs/providers.
20
+ The default parameters of the backend are all defined at aiflows.backends.llm_lite.LiteLLMBackend
21
+ (also see the defaul parameters of litellm's completion parameters: https://docs.litellm.ai/docs/completion/input#input-params-1).
22
+ Except for the following parameters who are overwritten by the ChatAtomicFlow in ChatAtomicFlow.yaml:
23
+ - `model_name` (Union[Dict[str,str],str]): The name of the model to use. Default: "gpt-4"
24
+ When using multiple API providers, the model_name can be a dictionary of the form
25
+ {"provider_name": "model_name"}. E.g. {"openai": "gpt-3.5-turbo", "azure": "azure/gpt-3.5-turbo"}
26
+ Default: "gpt-3.5-turbo" (the name needs to follow the name of the model in litellm https://docs.litellm.ai/docs/providers).
27
+ - `n` (int) : The number of answers to generate. Default: 1
28
+ - `max_tokens` (int): The maximum number of tokens to generate. Default: 2000
29
+ - `temperature` float: The temperature of the generation. Default: 1.0
30
+ - `top_p` float: An alternative to sampling with temperature. It instructs the model to consider the results of
31
+ the tokens with top_p probability. Default: 0.4
32
+ - `frequency_penalty` (number): It is used to penalize new tokens based on their frequency in the text so far. Default: 0.0
33
+ - `presence_penalty` (number): It is used to penalize new tokens based on their existence in the text so far. Default: 0.0
34
+ - `stream` (bool): Whether to stream the response or not. Default: false
35
+ - `system_message_prompt_template` (Dict[str,Any]): The template of the system message. It is used to generate the system message. Default: See SamplerFlow.yaml for default.
36
+ - `init_human_message_prompt_template` (Dict[str,Any]): The prompt template of the human/user message used to initialize the conversation
37
+ (first time in). It is used to generate the human message. It's passed as the user message to the LLM. Default: See SamplerFlow.yaml for default.
38
+ - `human_message_prompt_template` (Dict[str,Any]): The prompt template of the human/user message (message used everytime the except the first time in).
39
+ It's passed as the user message to the LLM. Default: See SamplerFlow.yaml for default.
40
+ - `previous_messages` (Dict[str,Any]): Defines which previous messages to include in the input of the LLM. Note that if `first_k`and `last_k` are both none,
41
+ all the messages of the flows's history are added to the input of the LLM. Default:
42
+ - `first_k` (int): If defined, adds the first_k earliest messages of the flow's chat history to the input of the LLM. Default: 1
43
+ - `last_k` (int): If defined, adds the last_k latest messages of the flow's chat history to the input of the LLM. Default: 1
44
+
45
+ *Input Interface Initialized (Expected input the first time in flow)*:
46
+
47
+ - `header` (str): A header message to include in prompt
48
+ - `code` (str): The "example" samples to generate our new sample from.
49
+
50
+ *Input Interface (Expected input the after the first time in flow)*:
51
+
52
+ - `header` (str): A header message to include in prompt
53
+ - `code` (str): The "example" samples to generate our new sample from.
54
+
55
+ *Output Interface*:
56
+
57
+ - `api_output` (str): The output of the API call. It is the response of the LLM to the input.
58
+ - `from` (str): The name of the flow that generated the output. It's always "SamplerFlow"
59
+
60
+
61
+ **Citation**:
62
+
63
+ @Article{FunSearch2023,
64
+ author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
65
+ journal = {Nature},
66
+ title = {Mathematical discoveries from program search with large language models},
67
+ year = {2023},
68
+ doi = {10.1038/s41586-023-06924-6}
69
+ }
70
+ """
71
+
72
+ def run(self,input_message):
73
+ """ This method calls the backend of the flow (so queries the LLM). It calls the backend with the previous messages of the flow.
74
+
75
+ :return: The output of the backend.
76
+ :rtype: Any
77
+ """
78
+ input_data = input_message.data
79
+
80
+ response = self.query_llm(input_data=input_data)
81
+
82
+ reply_message = self.package_output_message(
83
+ input_message,
84
+ response = {"api_output": response, "from": "SamplerFlow"}
85
+ )
86
+
87
+ self.send_message(reply_message)
SamplerFlowModule/SamplerFlow.yaml ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ input_interface:
2
+ - "action"
3
+ - "content"
4
+
5
+
6
+ _target_: flow_modules.aiflows.FunSearchFlowModule.SamplerFlow.instantiate_from_default_config
7
+ name: SamplerFlowModule
8
+ description: A flow that queries an LLM model to generate prompts for the Sampler flow
9
+
10
+ input_interface_initialized:
11
+ - header
12
+ - code
13
+
14
+ input_interface_non_initialized:
15
+ - header
16
+ - code
17
+
18
+ output_interface:
19
+ - "api_output"
20
+ - "from"
21
+
22
+ backend:
23
+ api_infos: ???
24
+ model_name: gpt-4
25
+ frequency_penalty: 0
26
+ max_tokens: 2000
27
+ n: 1
28
+ presence_penalty: 0
29
+ stream: false
30
+ temperature: 1.0
31
+ top_p: 0.4
32
+
33
+ previous_messages:
34
+ last_k: 1
35
+ first_k: 1
36
+
37
+ system_message_prompt_template:
38
+ template: |2-
39
+
40
+ Role: You are an expert coder and algorithm designer who provides solutions.
41
+
42
+
43
+ Goal: Write evolve functions that optimize the score of an evaluation function (provided). Aim to achieve higher scores than previous functions. Write a new function that optimizes the evaluation function and scores better than the given functions. Develop non-trivial functions and actual algorithms.
44
+
45
+
46
+ Resources:
47
+
48
+ You will be given code related to evolve and functions.
49
+ You will be given a few functions attempting to optimize the evaluation function.
50
+ The function you're evolving (the last one passed) is the only one you have to write. Make sure to include only nested functions or recursive calls.
51
+
52
+
53
+ Naming:
54
+
55
+ Evaluation function is named {{evaluate_name}}.
56
+ Evolve functions have a naming structure similar to: {{evolve_name}}_v.
57
+
58
+
59
+ Important Constraints:
60
+
61
+ Only write one evolve function solution: Write only the uncompleted function given to you (i.e., only {{evolve_name}}_v{i}, where {i} is the function with the largest number in the prompt, and i is at most {{artifacts_per_prompt}}). Stop writing once you're done with your first solution.
62
+ Output must be a function that is parsable by the AST library: Write code only within functions. No textual comments, no code blocks (like ```python), or no explanations. Provide only Python code encapsulated in a single function. Your output should be parsable by the AST Python library. Do not include starting or ending comments like "Here's an improved version."
63
+ Follow the naming structure: Keep the function name as it is in the input.
64
+ Keep the same function format: Maintain the same input and output format in your solution.
65
+ Self-contained solution: You can't call other functions (only recursive calls or calls to nested functions).
66
+ Do not use the evaluate function in the solution: Avoid using the evaluate function in your solution.
67
+
68
+ partial_variables:
69
+ evaluate_name: ???
70
+ evolve_name: ???
71
+ artifacts_per_prompt: ???
72
+
73
+
74
+ init_human_message_prompt_template:
75
+ _target_: aiflows.prompt_template.JinjaPrompt
76
+ template: |2-
77
+ functions for evaluation:
78
+ {{header}}
79
+
80
+ function to evolve and to evaluate:
81
+ {{code}}
82
+ input_variables: ["header","code"]
83
+ partial_variables: {}
84
+
85
+ human_message_prompt_template:
86
+ _target_: aiflows.prompt_template.JinjaPrompt
87
+ template: |2-
88
+ full code:
89
+ {{header}}
90
+
91
+ function to evolve and to evaluate:
92
+ {{code}}
93
+ input_variables: ["header","code"]
94
+ partial_variables: {}
95
+
96
+
97
+
98
+
99
+
SamplerFlowModule/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ~~~ Specify the dependencies ~~~
2
+
3
+ dependencies = [
4
+ {"url": "aiflows/ChatFlowModule", "revision": "main"}
5
+ ]
6
+ # Revision can correspond toa branch, commit hash or a absolute path to a local directory (ideal for development)
7
+ from aiflows import flow_verse
8
+
9
+ flow_verse.sync_dependencies(dependencies)
10
+
11
+ # ~~~ Import of your flow class (if you have any) ~~~
12
+ # from .NAMEOFYOURFLOW import NAMEOFYOURFLOWCLASS
13
+ from .SamplerFlow import SamplerFlow
SamplerFlowModule/demo.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: flow_modules.aiflows.FunSearchFlowModule.SamplerFlow.instantiate_from_default_config
2
+
3
+ backend:
4
+ api_infos: ???
5
+
6
+ system_message_prompt_template:
7
+ partial_variables:
8
+ evaluate_name: evaluate
9
+ evolve_name: solve_function
10
+ artifacts_per_prompt: 2
SamplerFlowModule/pip_requirements.txt ADDED
File without changes
SamplerFlowModule/run.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import hydra
4
+
5
+ import aiflows
6
+ from aiflows.backends.api_info import ApiInfo
7
+ from aiflows.utils.general_helpers import read_yaml_file, quick_load_api_keys
8
+
9
+ from aiflows import logging
10
+ from aiflows.flow_cache import CACHING_PARAMETERS, clear_cache
11
+
12
+ from aiflows.utils import serving
13
+ from aiflows.workers import run_dispatch_worker_thread
14
+ from aiflows.messages import FlowMessage
15
+ from aiflows.interfaces import KeyInterface
16
+ from aiflows.utils.colink_utils import start_colink_server
17
+ from aiflows import flow_verse
18
+
19
+
20
+ dependencies = [
21
+ {
22
+ "url": "aiflows/FunSearchFlowModule",
23
+ "revision": os.path.abspath("../")
24
+ }
25
+ ]
26
+ flow_verse.sync_dependencies(dependencies)
27
+
28
+ logging.set_verbosity_debug()
29
+
30
+
31
+ if __name__ == "__main__":
32
+
33
+ cl = start_colink_server()
34
+
35
+ serving.recursive_serve_flow(
36
+ cl=cl,
37
+ flow_class_name="flow_modules.aiflows.FunSearchFlowModule.SamplerFlow",
38
+ flow_endpoint="SamplerFlow",
39
+ )
40
+
41
+ run_dispatch_worker_thread(cl)
42
+
43
+ config_overrides = read_yaml_file(os.path.join(".", "demo.yaml"))
44
+
45
+ api_information = [ApiInfo(backend_used="openai",
46
+ api_key = os.getenv("OPENAI_API_KEY"))]
47
+
48
+ quick_load_api_keys(config_overrides, api_information, key="api_infos")
49
+
50
+ funsearch_proxy = serving.get_flow_instance(
51
+ cl=cl,
52
+ flow_endpoint="SamplerFlow",
53
+ config_overrides=config_overrides,
54
+ )
55
+
56
+ code = \
57
+ """
58
+ #function used to evaluate the program:
59
+ def evaluate(solve_function: str, tests_inputs: List[str], expected_outputs: str) -> float:
60
+ \"\"\"Returns the score of the solve function we're evolving based on the tests_inputs and expected_outputs.
61
+ Scores are between 0 and 1, unless the program fails to run, in which case the score is -1.
62
+ \"\"\"
63
+ if solve(solve_function, tests_inputs, expected_outputs) == True:
64
+ return 1.0
65
+ return 0.0
66
+
67
+
68
+ def solve_function_v0(input) -> str:
69
+ \"\"\"Scores per test: test_1:{'score': 1.0, 'feedback': 'No feedback available.'} test_2:{'score': 1.0, 'feedback': 'No feedback available.'} test_3:{'score': 0.0, 'feedback': 'No feedback available.'} test_4:{'score': -1, 'feedback': 'Invalid Format of prediction'}\"\"\"
70
+ return 'YES'
71
+
72
+
73
+ def solve_function_v1(input) -> str:
74
+ \"\"\"Improved version of solve_function_v0\"\"\"
75
+ """
76
+
77
+ header = \
78
+ """
79
+ \"\"\"Problem Description:
80
+ Serval has a string s that only consists of 0 and 1 of length n. The i-th character of s is denoted as s_i, where 1\leq i\leq n.
81
+ Serval can perform the following operation called Inversion Magic on the string s:
82
+ Choose an segment [l, r] (1\leq l\leq r\leq n). For l\leq i\leq r, change s_i into 1 if s_i is 0, and change s_i into 0 if s_i is 1.
83
+ For example, let s be 010100 and the segment [2,5] is chosen. The string s will be 001010 after performing the Inversion Magic.
84
+ Serval wants to make s a palindrome after performing Inversion Magic exactly once. Help him to determine whether it is possible.
85
+ A string is a palindrome iff it reads the same backwards as forwards. For example, 010010 is a palindrome but 10111 is not.
86
+
87
+ Input Description:
88
+ Input
89
+ Each test contains multiple test cases. The first line contains the number of test cases t (1\leq t\leq 10^4). The description of the test cases follows.
90
+ The first line of each test case contains a single integer n (2\leq n\leq 10^5) — the length of string s.
91
+ The second line of each test case contains a binary string s of length n. Only characters 0 and 1 can appear in s.
92
+ It's guaranteed that the sum of n over all test cases does not exceed 2\cdot 10^5.
93
+
94
+ Output Description:
95
+ Output
96
+ For each test case, print Yes if s can be a palindrome after performing Inversion Magic exactly once, and print No if not.
97
+ You can output Yes and No in any case (for example, strings yEs, yes, Yes and YES will be recognized as a positive response).
98
+
99
+ Public Tests:
100
+ Test 1:
101
+ Input: ['1', '4', '1001']
102
+ Output: 'YES'
103
+ Test 2:
104
+ Input: ['1', '5', '10010']
105
+ Output: 'YES'
106
+ Test 3:
107
+ Input: ['1', '7', '0111011']
108
+ Output: 'NO'
109
+
110
+ \"\"\"
111
+
112
+
113
+ import ast
114
+ import itertools
115
+ import numpy as np
116
+ from typing import List
117
+
118
+ def solve(solve_function: str,input: List[str], expected_output: str) -> str:
119
+ \"\"\"function used to run the solve function on input *kwargs and return the the predicted output
120
+
121
+ :param solve_function: the function to run (the solve function below as a string)
122
+ :type solve_function: str
123
+ :param kwargs: the inputs to the solve function
124
+ :type kwargs: List[str]
125
+ \"\"\"
126
+ local_namespace = {}
127
+ exec(solve_function,local_namespace)
128
+ found_name, program_name = get_function_name_from_code(solve_function)
129
+
130
+ if not found_name:
131
+ raise ValueError(f"Function name not found in program: {solve_function}")
132
+
133
+ solve_fn = local_namespace.get(program_name)
134
+
135
+ prediction = solve_fn(input)
136
+
137
+ prediction = prediction.split()
138
+ expected_output = expected_output.split()
139
+
140
+ if len(prediction) != len(expected_output):
141
+ raise ValueError(f"Invalid Format of prediction")
142
+
143
+ for i in range(len(prediction)):
144
+ if prediction[i] != expected_output[i]:
145
+ return False
146
+
147
+ return True
148
+
149
+ def evaluate(solve_function: str, tests_inputs: List[str], expected_outputs: str) -> float:
150
+ \"\"\"Returns the score of the solve function we're evolving based on the tests_inputs and expected_outputs.
151
+ Scores are between 0 and 1, unless the program fails to run, in which case the score is -1.
152
+ \"\"\"
153
+ if solve(solve_function,tests_inputs,expected_outputs) == True:
154
+ return 1.0
155
+ return 0.0
156
+
157
+
158
+ def get_function_name_from_code(code):
159
+ tree = ast.parse(code)
160
+ for node in ast.walk(tree):
161
+ if isinstance(node, ast.FunctionDef):
162
+ return True, node.name
163
+
164
+ # something is wrong
165
+ return False, None
166
+
167
+ """
168
+
169
+ data = {
170
+ 'code': code,
171
+ 'header': header
172
+ }
173
+
174
+ input_message = funsearch_proxy.package_input_message(data = data)
175
+
176
+ funsearch_proxy.send_message(input_message)
177
+
178
+ future = funsearch_proxy.get_reply_future(input_message)
179
+ response = future.get_data()
180
+ print("~~~Response~~~")
181
+ print(response)
__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .FunSearch import FunSearch
2
+ from .SamplerFlowModule.SamplerFlow import SamplerFlow
3
+ from .EvaluatorFlowModule.EvaluatorFlow import EvaluatorFlow
4
+ from .ProgramDBFlowModule.ProgramDBFlow import ProgramDBFlow
cf_functions.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import ast
3
+ import itertools
4
+ import numpy as np
5
+ from typing import List
6
+
7
+ def solve(solve_function: str,input: List[str], expected_output: str) -> str:
8
+ """function used to run the solve function on input *kwargs and return the the predicted output
9
+
10
+ :param solve_function: the function to run (the solve function below as a string)
11
+ :type solve_function: str
12
+ :param kwargs: the inputs to the solve function
13
+ :type kwargs: List[str]
14
+ """
15
+ local_namespace = {}
16
+ exec(solve_function,local_namespace)
17
+ found_name, program_name = get_function_name_from_code(solve_function)
18
+
19
+ if not found_name:
20
+ raise ValueError(f"Function name not found in program: {solve_function}")
21
+
22
+ solve_fn = local_namespace.get(program_name)
23
+
24
+ prediction = solve_fn(input)
25
+
26
+ prediction = prediction.split()
27
+ expected_output = expected_output.split()
28
+
29
+ if len(prediction) != len(expected_output):
30
+ raise ValueError(f"Invalid Format of prediction")
31
+
32
+ for i in range(len(prediction)):
33
+ if prediction[i] != expected_output[i]:
34
+ return False
35
+
36
+ return True
37
+
38
+ def evaluate(solve_function: str, tests_inputs: List[str], expected_outputs: str) -> float:
39
+ """Returns the score of the solve function we're evolving based on the tests_inputs and expected_outputs.
40
+ Scores are between 0 and 1, unless the program fails to run, in which case the score is -1.
41
+ """
42
+ if solve(solve_function,tests_inputs,expected_outputs) == True:
43
+ return 1.0
44
+ return 0.0
45
+
46
+
47
+ def get_function_name_from_code(code):
48
+ tree = ast.parse(code)
49
+ for node in ast.walk(tree):
50
+ if isinstance(node, ast.FunctionDef):
51
+ return True, node.name
52
+
53
+ # something is wrong
54
+ return False, None
55
+
56
+
57
+
58
+
59
+
60
+
demo.yaml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: FunSearchFlow
2
+ description: "A flow implementing FunSearch Asynchronous Search"
3
+
4
+ ### Subflows specification
5
+ subflows_config:
6
+ SamplerFlow:
7
+ # _target_: flow_modules.SamplerFlowModule.SamplerFlow.instantiate_from_default_config
8
+ name: "Sampler Flow"
9
+ description: "A flow that queries an LLM model to generate prompts"
10
+ system_message_prompt_template:
11
+ partial_variables: ???
12
+ backend:
13
+ api_infos: ???
14
+ EvaluatorFlow:
15
+ name: "A flow that evaluates code on tests"
16
+ description: "A flow that evaluates code on tests"
17
+ singleton: False
18
+ run_error_score: ???
19
+ py_file: ???
20
+ function_to_run_name: ???
21
+ test_inputs: ???
22
+ timeout_seconds: 10
23
+ use_test_input_as_key: ???
24
+
25
+ ProgramDBFlow:
26
+ # _target_: flow_modules.ProgramDBFlowModule.ProgramDBFlow.instantiate_from_default_config
27
+ name: "ProgramDB"
28
+ description: "A flow that registers samples and evaluations in a database"
29
+ evaluate_function: ???
30
+ evaluate_file_full_content: ???
31
+ artifact_to_evolve_name: solve_function
32
+ num_islands: 3
33
+ template:
34
+ preface: ""
pip_requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ astunparse==1.6.3
2
+ numpy==1.26.2
3
+ SciPy==1.11.4
4
+ pandas
run.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import hydra
4
+
5
+ import aiflows
6
+ from aiflows.backends.api_info import ApiInfo
7
+ from aiflows.utils.general_helpers import read_yaml_file, quick_load_api_keys
8
+
9
+ from aiflows import logging
10
+ from aiflows.flow_cache import CACHING_PARAMETERS, clear_cache
11
+
12
+ from aiflows.utils import serving
13
+ from aiflows.workers import run_dispatch_worker_thread
14
+ from aiflows.messages import FlowMessage
15
+ from aiflows.interfaces import KeyInterface
16
+ from aiflows.utils.colink_utils import start_colink_server
17
+ from aiflows import flow_verse
18
+ import pandas as pd
19
+ import sys
20
+ from copy import deepcopy
21
+ import requests
22
+ import time
23
+
24
+ dependencies = [
25
+ {
26
+ "url": "aiflows/FunSearchFlowModule",
27
+ "revision": "../FunSearchFlowModule"
28
+ }
29
+ ]
30
+ flow_verse.sync_dependencies(dependencies)
31
+ from flow_modules.aiflows.FunSearchFlowModule.Loader import Loader
32
+
33
+ logging.set_verbosity_debug()
34
+
35
+
36
+ def load_problem(id, ds_location = "./data/codeforces.jsonl.gz"):
37
+ def make_problem_descriptions_str(row):
38
+ def write_public_tests_individual_io_str(row):
39
+ public_tests = row.public_tests_individual_io
40
+ tests = ""
41
+ for i,test in enumerate(public_tests):
42
+ input = test[0]
43
+ output = test[1]
44
+ tests += f"Test {i+1}:\n Input: {input}\n Output: \'{output}\'\n"
45
+ return tests
46
+
47
+ problem_descritption = row.problem_description
48
+ input_descriptions = row.input_description
49
+ ouput_descriptions = row.output_description
50
+ public_tests = write_public_tests_individual_io_str(row)
51
+
52
+ problem_description_str = f"Problem Description:\n{problem_descritption}\n\n"
53
+ input_description_str = f"Input Description:\n{input_descriptions}\n\n"
54
+ output_description_str = f"Output Description:\n{ouput_descriptions}\n\n"
55
+ public_tests_str = f"Public Tests:\n{public_tests}\n"
56
+
57
+ final_str = problem_description_str + input_description_str + output_description_str +public_tests_str
58
+ return final_str
59
+
60
+ df = pd.read_json(ds_location, lines=True, compression='gzip')
61
+ row = df[df.id == id].iloc[0]
62
+
63
+ assert row.non_unique_output == False, "Problem has non unique output. Not supported yet"
64
+
65
+ problem_description = make_problem_descriptions_str(row)
66
+ public_test = row.public_tests_individual_io
67
+ tests = {}
68
+ test_counter = 1
69
+
70
+ for public_test in public_test:
71
+ tests["test_"+str(test_counter)] = {"tests_inputs": public_test[0], "expected_outputs": public_test[1]}
72
+ test_counter += 1
73
+
74
+ for hidden_test in row.hidden_tests_io:
75
+ tests["test_"+str(test_counter)] = {"tests_inputs": hidden_test[0], "expected_outputs": hidden_test[1]}
76
+ test_counter += 1
77
+
78
+ return tests, problem_description
79
+
80
+ def download_codeforces_data(data_folder_path,file_name):
81
+ print("Downloading data....")
82
+ os.makedirs(data_folder_path, exist_ok=True)
83
+ url = "https://github.com/epfl-dlab/cc_flows/raw/main/data/codeforces/codeforces.jsonl.gz"
84
+ response = requests.get(url, stream=True)
85
+
86
+ if response.status_code == 200:
87
+ with open(os.path.join(data_folder_path,file_name), 'wb') as file:
88
+ for chunk in response:
89
+ file.write(chunk)
90
+ print("Download complete")
91
+ else:
92
+ print("Failed to download data", response.status_code)
93
+
94
+
95
+ def get_configs(problem_id, ds_location = "./data/codeforces.jsonl.gz"):
96
+ tests, problem_description = load_problem(problem_id,ds_location)
97
+
98
+ path = os.path.join(".", "demo.yaml")
99
+ funsearch_cfg = read_yaml_file(path)
100
+
101
+ evaluate_function_file_path: str = "./cf_functions.py"
102
+ evaluate_function_name: str = "evaluate"
103
+ evolve_function_name:str = "solve_function"
104
+
105
+ loader = Loader(file_path = evaluate_function_file_path, target_name = evaluate_function_name)
106
+ evaluate_function: str= loader.load_target()
107
+ evaluate_file_full_content = loader.load_full_file()
108
+
109
+ evaluate_file_full_content = f"\"\"\"{problem_description}\"\"\"\n\n" + evaluate_file_full_content
110
+
111
+ #~~~~~ ProgramDBFlow Overrides ~~~~~~~~
112
+ funsearch_cfg["subflows_config"]["ProgramDBFlow"]["evaluate_function"] = evaluate_function
113
+ funsearch_cfg["subflows_config"]["ProgramDBFlow"]["evaluate_file_full_content"] = evaluate_file_full_content
114
+ funsearch_cfg["subflows_config"]["ProgramDBFlow"]["artifact_to_evolve_name"] = evolve_function_name
115
+
116
+ if len(tests) > 0:
117
+ first_test = tests["test_1"]
118
+
119
+ dummy_solution = f"def {evolve_function_name}(input) -> str:" +\
120
+ "\n \"\"\"Attempt at solving the problem given the input input and returns the predicted output (see the top of the file for problem description)\"\"\"" +\
121
+ f"\n return \'{first_test['expected_outputs']}\'\n"
122
+
123
+
124
+ else:
125
+ dummy_solution = f"def {evolve_function_name}(input) -> str:" +\
126
+ "\n \"\"\"Attempt at solving the problem given the input input and returns the predicted output (see the top of the file for problem description)\"\"\"" +\
127
+ f"\n return 0.0\"\"\n"
128
+
129
+ #~~~~~~~~~~Evaluator overrides~~~~~~~~~~~~
130
+ funsearch_cfg["subflows_config"]["EvaluatorFlow"]["py_file"] = evaluate_file_full_content
131
+ funsearch_cfg["subflows_config"]["EvaluatorFlow"]["run_error_score"] = -1
132
+ funsearch_cfg["subflows_config"]["EvaluatorFlow"]["function_to_run_name"] = evaluate_function_name
133
+ funsearch_cfg["subflows_config"]["EvaluatorFlow"]["test_inputs"] = tests
134
+ #Hides test inputs from LLM (necessary for hidden tests. Makes same setup as in a real contest.)
135
+ funsearch_cfg["subflows_config"]["EvaluatorFlow"]["use_test_input_as_key"] = False
136
+
137
+
138
+ #~~~~~~~~~~Sampler overrides~~~~~~~~~~~~
139
+ funsearch_cfg["subflows_config"]["SamplerFlow"]["system_message_prompt_template"]["partial_variables"] = \
140
+ {
141
+ "evaluate_name": evaluate_function_name,
142
+ "evolve_name": evolve_function_name,
143
+ "artifacts_per_prompt": 2
144
+ }
145
+
146
+
147
+ return funsearch_cfg, dummy_solution
148
+
149
+
150
+ FLOW_MODULES_PATH = "./"
151
+
152
+
153
+ if __name__ == "__main__":
154
+
155
+ cl = start_colink_server()
156
+
157
+ problem_id = "1789B" #put the problem id here
158
+
159
+ if not os.path.exists("./data/codeforces.jsonl.gz"):
160
+ download_codeforces_data("./data", "codeforces.jsonl.gz")
161
+
162
+ funsearch_cfg, dummy_solution = get_configs(problem_id)
163
+ #Serve Program Database and get its flow type explicitly
164
+ api_information = [ApiInfo(backend_used="openai",
165
+ api_key = os.getenv("OPENAI_API_KEY"))]
166
+
167
+ serving.recursive_serve_flow(
168
+ cl=cl,
169
+ flow_class_name="flow_modules.aiflows.FunSearchFlowModule.FunSearch",
170
+ flow_endpoint="FunSearch",
171
+ )
172
+
173
+ # #Serve the rest
174
+ # serving.recursive_serve_flow(
175
+ # cl=cl,
176
+ # flow_type="FunSearch_served",
177
+ # default_config=funsearch_cfg,
178
+ # default_state=None,
179
+ # default_dispatch_point="coflows_dispatch",
180
+ # )
181
+ n_workers = 10
182
+ for i in range(n_workers):
183
+ run_dispatch_worker_thread(cl)
184
+
185
+ quick_load_api_keys(funsearch_cfg, api_information, key="api_infos")
186
+ config_overrides = None
187
+ #Mount ProgramDBFlow first to get it's flow ref
188
+ funsearch_proxy = serving.get_flow_instance(
189
+ cl=cl,
190
+ flow_endpoint="FunSearch",
191
+ config_overrides=funsearch_cfg,
192
+ )
193
+
194
+ data = {
195
+ "from": "SamplerFlow",
196
+ "operation": "register_program",
197
+ "api_output": dummy_solution
198
+ }
199
+
200
+ input_message = funsearch_proxy.package_input_message(data = data)
201
+
202
+ funsearch_proxy.send_message(input_message)
203
+
204
+
205
+ data = {
206
+ "from": "FunSearch",
207
+ "operation": "start",
208
+ "content": {"num_samplers": 5},
209
+ }
210
+
211
+ input_message = funsearch_proxy.package_input_message(data = data)
212
+
213
+ funsearch_proxy.send_message(input_message)
214
+
215
+ data = {
216
+ "from": "FunSearch",
217
+ "operation": "stop",
218
+ "content": {},
219
+ }
220
+
221
+ input_message = funsearch_proxy.package_input_message(data = data)
222
+
223
+ funsearch_proxy.send_message(input_message)
224
+
225
+
226
+ wait_time = 1000
227
+ print(f"Waiting {wait_time} seconds before requesting result...")
228
+ time.sleep(wait_time)
229
+
230
+ data = {
231
+ "from": "FunSearch",
232
+ "operation": "get_best_programs_per_island",
233
+ "content": {}
234
+ }
235
+
236
+ input_message = funsearch_proxy.package_input_message(data = data)
237
+
238
+ future = funsearch_proxy.get_reply_future(input_message)
239
+ print("waiting for response....")
240
+ response = future.get_data()
241
+ print(response)