Spaces:

bigcode
/

arena

Running

App Files Files Community

terryyz commited on 11 days ago

Commit

1bb2113

1 Parent(s): 80f59bb

update

Browse files

Files changed (9) hide show

.gitignore +1 -1
app.py +0 -0
completion.py +0 -76
conversation.py +0 -11
requirements.txt +3 -1
sandbox/code_analyzer.py +49 -69
sandbox/code_runner.py +183 -241
sandbox/prompts.py +42 -4
sandbox/sandbox_manager.py +0 -12

.gitignore CHANGED Viewed

@@ -26,7 +26,7 @@ share/python-wheels/
 *.egg
 MANIFEST
 run.sh
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.

 *.egg
 MANIFEST
 run.sh
+logs/
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.

app.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

completion.py CHANGED Viewed

@@ -60,7 +60,6 @@ def load_model_answers(answer_dir: str):
         if not os.path.isdir(os.path.join(answer_dir, folder)):
             continue
         if not os.path.exists(os.path.join(answer_dir, folder, "generation.jsonl")):
-            print(f"WARNING: {folder} does not have generation.jsonl, skip it.")
             continue
         filenames.append(os.path.join(answer_dir, folder, "generation.jsonl"))
@@ -111,7 +110,6 @@ def load_model_answers_and_execution_results(data_dir: str):
         if not os.path.isdir(os.path.join(data_dir, folder)):
             continue
         if not os.path.exists(os.path.join(data_dir, folder, "execution_results.jsonl")):
-            print(f"WARNING: {folder} does not have execution_results.jsonl, skip it.")
             continue
         filenames.append(os.path.join(data_dir, folder, "execution_results.jsonl"))
@@ -205,41 +203,10 @@ def chat_completion_openai(model, messages, temperature, max_tokens, api_dict=No
             }
             break
         except openai.RateLimitError as e:
-            print(type(e), e)
             time.sleep(API_RETRY_SLEEP)
         except openai.BadRequestError as e:
-            print("=== DEBUG: OpenAI BadRequestError ===")
-            print("Error type:", type(e))
-            print("Error message:", str(e))
-            print("=== Analyzing messages for image issues ===")
-            for i, msg in enumerate(messages):
-                print(f"Message {i} role: {msg.get('role', 'unknown')}")
-                if "content" in msg:
-                    content = msg["content"]
-                    if isinstance(content, list):
-                        for j, item in enumerate(content):
-                            if isinstance(item, dict) and item.get("type") == "image_url":
-                                url = item.get("image_url", {}).get("url", "")
-                                if url.startswith("data:image/png;base64,"):
-                                    base64_part = url[22:]  # Remove "data:image/png;base64," prefix
-                                    print(f"  Image {j}: base64 length = {len(base64_part)}")
-                                    if len(base64_part) < 50:
-                                        print(f"  *** ISSUE: Image {j} has very short/empty base64: '{url}'")
-                                elif url.startswith("data:image/"):
-                                    print(f"  Image {j}: Non-PNG data URL: {url[:50]}...")
-                                else:
-                                    print(f"  Image {j}: Unexpected URL format: {url[:50]}...")
-                            elif isinstance(item, dict) and item.get("type") == "text":
-                                text_content = item.get("text", "")
-                                print(f"  Text {j}: {len(text_content)} chars")
-                            else:
-                                print(f"  Content {j}: {type(item)} - {str(item)[:50]}...")
-                    else:
-                        print(f"  Content: {type(content)} - {str(content)[:100]}...")
-            print("=== End debug info ===")
             break
         except KeyError:
-            print(type(e), e)
             break
     return output
@@ -304,41 +271,10 @@ def chat_completion_openai_thinking(model, messages, api_dict=None, **kwargs):
             }
             break
         except openai.RateLimitError as e:
-            print(type(e), e)
             time.sleep(API_RETRY_SLEEP)
         except openai.BadRequestError as e:
-            print("=== DEBUG: OpenAI BadRequestError ===")
-            print("Error type:", type(e))
-            print("Error message:", str(e))
-            print("=== Analyzing messages for image issues ===")
-            for i, msg in enumerate(messages):
-                print(f"Message {i} role: {msg.get('role', 'unknown')}")
-                if "content" in msg:
-                    content = msg["content"]
-                    if isinstance(content, list):
-                        for j, item in enumerate(content):
-                            if isinstance(item, dict) and item.get("type") == "image_url":
-                                url = item.get("image_url", {}).get("url", "")
-                                if url.startswith("data:image/png;base64,"):
-                                    base64_part = url[22:]  # Remove "data:image/png;base64," prefix
-                                    print(f"  Image {j}: base64 length = {len(base64_part)}")
-                                    if len(base64_part) < 50:
-                                        print(f"  *** ISSUE: Image {j} has very short/empty base64: '{url}'")
-                                elif url.startswith("data:image/"):
-                                    print(f"  Image {j}: Non-PNG data URL: {url[:50]}...")
-                                else:
-                                    print(f"  Image {j}: Unexpected URL format: {url[:50]}...")
-                            elif isinstance(item, dict) and item.get("type") == "text":
-                                text_content = item.get("text", "")
-                                print(f"  Text {j}: {len(text_content)} chars")
-                            else:
-                                print(f"  Content {j}: {type(item)} - {str(item)[:50]}...")
-                    else:
-                        print(f"  Content: {type(content)} - {str(content)[:100]}...")
-            print("=== End debug info ===")
             break
         except KeyError:
-            print(type(e), e)
             break
     return output
@@ -382,7 +318,6 @@ def chat_completion_deepseek_reasoner(messages, api_dict, **kwargs):
             }
             break
         except Exception as e:
-            print(type(e), e)
             time.sleep(API_RETRY_SLEEP)
     return output
@@ -426,7 +361,6 @@ def chat_completion_deepseek(messages, max_tokens, api_dict, **kwargs):
             }
             break
         except Exception as e:
-            print(type(e), e)
             time.sleep(API_RETRY_SLEEP)
     return output
@@ -463,7 +397,6 @@ def chat_completion_anthropic(model, messages, temperature, max_tokens, api_dict
             }
             break
         except anthropic.APIError as e:
-            print(type(e), e)
             time.sleep(API_RETRY_SLEEP)
     return output
@@ -494,7 +427,6 @@ def chat_completion_anthropic_thinking(model, messages, max_tokens, budget_token
             }
             break
         except anthropic.APIError as e:
-            print(type(e), e)
             time.sleep(API_RETRY_SLEEP)
     return output
@@ -525,7 +457,6 @@ def chat_completion_mistral(model, messages, temperature, max_tokens, **kwargs):
             }
             break
         except MistralException as e:
-            print(type(e), e)
             break
     return output
@@ -558,7 +489,6 @@ def chat_completion_xai(model, messages, temperature, max_tokens, api_dict=None,
             }
             break
         except Exception as e:
-            print(type(e), e)
             time.sleep(API_RETRY_SLEEP)
     return output
@@ -582,7 +512,6 @@ def chat_completion_litellm(model, messages, temperature, max_tokens, api_dict=N
             }
             break
         except Exception as e:
-            print(type(e), e)
             time.sleep(API_RETRY_SLEEP)
     return output
@@ -729,7 +658,6 @@ def http_completion_gemini(model, messages, **kwargs):
                 "answer": response.json()["candidates"][0]["content"]["parts"][0]["text"],
             }
         except KeyError as e:
-            print(type(e), e)
             print(response.json())
     return output
@@ -839,10 +767,8 @@ def chat_completion_cohere(model, messages, temperature, max_tokens, **kwargs):
             }
             break
         except cohere.core.api_error.ApiError as e:
-            print(type(e), e)
             raise
         except Exception as e:
-            print(type(e), e)
             break
     return output
@@ -1156,7 +1082,6 @@ def chat_completion_aws_bedrock_claude(messages, api_dict=None, aws_region="us-w
             break
         except Exception as e:
-            print(type(e), e)
             time.sleep(API_RETRY_SLEEP)
     return output
@@ -1230,7 +1155,6 @@ def chat_completion_aws_bedrock_mistral(messages, api_dict=None, aws_region="us-
             break
         except Exception as e:
-            print(type(e), e)
             time.sleep(API_RETRY_SLEEP)
     return output

         if not os.path.isdir(os.path.join(answer_dir, folder)):
             continue
         if not os.path.exists(os.path.join(answer_dir, folder, "generation.jsonl")):
             continue
         filenames.append(os.path.join(answer_dir, folder, "generation.jsonl"))
         if not os.path.isdir(os.path.join(data_dir, folder)):
             continue
         if not os.path.exists(os.path.join(data_dir, folder, "execution_results.jsonl")):
             continue
         filenames.append(os.path.join(data_dir, folder, "execution_results.jsonl"))
             }
             break
         except openai.RateLimitError as e:
             time.sleep(API_RETRY_SLEEP)
         except openai.BadRequestError as e:
             break
         except KeyError:
             break
     return output
             }
             break
         except openai.RateLimitError as e:
             time.sleep(API_RETRY_SLEEP)
         except openai.BadRequestError as e:
             break
         except KeyError:
             break
     return output
             }
             break
         except Exception as e:
             time.sleep(API_RETRY_SLEEP)
     return output
             }
             break
         except Exception as e:
             time.sleep(API_RETRY_SLEEP)
     return output
             }
             break
         except anthropic.APIError as e:
             time.sleep(API_RETRY_SLEEP)
     return output
             }
             break
         except anthropic.APIError as e:
             time.sleep(API_RETRY_SLEEP)
     return output
             }
             break
         except MistralException as e:
             break
     return output
             }
             break
         except Exception as e:
             time.sleep(API_RETRY_SLEEP)
     return output
             }
             break
         except Exception as e:
             time.sleep(API_RETRY_SLEEP)
     return output
                 "answer": response.json()["candidates"][0]["content"]["parts"][0]["text"],
             }
         except KeyError as e:
             print(response.json())
     return output
             }
             break
         except cohere.core.api_error.ApiError as e:
             raise
         except Exception as e:
             break
     return output
             break
         except Exception as e:
             time.sleep(API_RETRY_SLEEP)
     return output
             break
         except Exception as e:
             time.sleep(API_RETRY_SLEEP)
     return output

conversation.py CHANGED Viewed

@@ -2314,41 +2314,30 @@ register_conv_template(
 if __name__ == "__main__":
     from fastchat.conversation import get_conv_template
-    print("-- Vicuna template --")
     conv = get_conv_template("vicuna_v1.1")
     conv.append_message(conv.roles[0], "Hello!")
     conv.append_message(conv.roles[1], "Hi!")
     conv.append_message(conv.roles[0], "How are you?")
     conv.append_message(conv.roles[1], None)
-    print(conv.get_prompt())
-    print("\n")
-    print("-- Llama-2 template --")
     conv = get_conv_template("llama-2")
     conv.set_system_message("You are a helpful, respectful and honest assistant.")
     conv.append_message(conv.roles[0], "Hello!")
     conv.append_message(conv.roles[1], "Hi!")
     conv.append_message(conv.roles[0], "How are you?")
     conv.append_message(conv.roles[1], None)
-    print(conv.get_prompt())
-    print("\n")
-    print("-- ChatGPT template --")
     conv = get_conv_template("chatgpt")
     conv.append_message(conv.roles[0], "Hello!")
     conv.append_message(conv.roles[1], "Hi!")
     conv.append_message(conv.roles[0], "How are you?")
     conv.append_message(conv.roles[1], None)
-    print(conv.to_openai_api_messages())
-    print("\n")
-    print("-- Claude template --")
     conv = get_conv_template("claude")
     conv.append_message(conv.roles[0], "Hello!")
     conv.append_message(conv.roles[1], "Hi!")
     conv.append_message(conv.roles[0], "How are you?")
     conv.append_message(conv.roles[1], None)
-    print(conv.get_prompt())

 if __name__ == "__main__":
     from fastchat.conversation import get_conv_template
     conv = get_conv_template("vicuna_v1.1")
     conv.append_message(conv.roles[0], "Hello!")
     conv.append_message(conv.roles[1], "Hi!")
     conv.append_message(conv.roles[0], "How are you?")
     conv.append_message(conv.roles[1], None)
     conv = get_conv_template("llama-2")
     conv.set_system_message("You are a helpful, respectful and honest assistant.")
     conv.append_message(conv.roles[0], "Hello!")
     conv.append_message(conv.roles[1], "Hi!")
     conv.append_message(conv.roles[0], "How are you?")
     conv.append_message(conv.roles[1], None)
     conv = get_conv_template("chatgpt")
     conv.append_message(conv.roles[0], "Hello!")
     conv.append_message(conv.roles[1], "Hi!")
     conv.append_message(conv.roles[0], "How are you?")
     conv.append_message(conv.roles[1], None)
     conv = get_conv_template("claude")
     conv.append_message(conv.roles[0], "Hello!")
     conv.append_message(conv.roles[1], "Hi!")
     conv.append_message(conv.roles[0], "How are you?")
     conv.append_message(conv.roles[1], None)

requirements.txt CHANGED Viewed

@@ -21,4 +21,6 @@ tree-sitter-php
 tree-sitter-typescript
 tree-sitter-c
 e2b-code-interpreter==1.5.2
-azure-storage-blob

 tree-sitter-typescript
 tree-sitter-c
 e2b-code-interpreter==1.5.2
+azure-storage-blob
+huggingface_hub
+datasets

sandbox/code_analyzer.py CHANGED Viewed

@@ -84,7 +84,7 @@ def extract_python_imports(code: str) -> list[str]:
                         if len(node.args) > 0 and isinstance(node.args[0], ast.Str):
                             packages.add(node.args[0].s.split('.')[0])
         except Exception as e:
-            print(f"Error processing node {type(node)}: {e}")
             continue
     # Filter out standard library modules using sys.stdlib_module_names
@@ -114,11 +114,11 @@ def extract_js_imports(code: str) -> list[str]:
         try:
             tree = ts_parser.parse(code_bytes)
         except Exception as e:
-            print(f"TypeScript parsing failed: {e}")
             try:
                 tree = js_parser.parse(code_bytes)
             except Exception as e:
-                print(f"JavaScript parsing failed: {e}")
                 tree = None
         if tree is None:
@@ -206,7 +206,7 @@ def extract_js_imports(code: str) -> list[str]:
         return list(packages)
     except Exception as e:
-        print(f"Tree-sitter parsing failed: {e}")
         # Fallback to basic regex parsing if tree-sitter fails
         packages: Set[str] = set()
@@ -240,9 +240,9 @@ def extract_js_imports(code: str) -> list[str]:
         return list(packages)
-def determine_python_environment(code: str, imports: list[str]) -> SandboxEnvironment | None:
     '''
-    Determine Python sandbox environment based on imports and AST analysis.
     '''
     try:
         tree = ast.parse(code)
@@ -255,34 +255,34 @@ def determine_python_environment(code: str, imports: list[str]) -> SandboxEnviro
     except SyntaxError:
         pass
-    # Check imports for framework detection
-    if 'pygame' in imports:
         return SandboxEnvironment.PYGAME
-    elif 'gradio' in imports:
         return SandboxEnvironment.GRADIO
-    elif 'streamlit' in imports:
         return SandboxEnvironment.STREAMLIT
-    # elif 'nicegui' in imports:
     #     return SandboxEnvironment.NICEGUI
     return SandboxEnvironment.PYTHON_RUNNER
-def determine_jsts_environment(code: str, imports: list[str]) -> SandboxEnvironment | None:
     '''
-    Determine JavaScript/TypeScript sandbox environment based on imports and AST analysis.
     '''
     # First check for Vue SFC structure
     if '<template>' in code or '<script setup' in code:
         return SandboxEnvironment.VUE
-    # Check imports for framework detection
-    react_packages = {'react', '@react', 'next', '@next'}
     vue_packages = {'vue', '@vue', 'nuxt', '@nuxt'}
-    if any(pkg in react_packages for pkg in imports):
         return SandboxEnvironment.REACT
-    elif any(pkg in vue_packages for pkg in imports):
         return SandboxEnvironment.VUE
     try:
@@ -345,7 +345,7 @@ def determine_jsts_environment(code: str, imports: list[str]) -> SandboxEnvironm
                 return SandboxEnvironment.VUE
     except Exception as e:
-        print(f"Tree-sitter parsing error: {e}")
     return SandboxEnvironment.JAVASCRIPT_RUNNER
@@ -434,7 +434,7 @@ def detect_js_ts_code_lang(code: str) -> str:
             return 'typescript'
     except Exception as e:
-        print(f"Tree-sitter parsing error: {e}")
         # Fallback to basic checks if parsing fails
         pass
@@ -569,17 +569,17 @@ def extract_js_from_html_script_tags(code: str) -> list[str]:
     return list(packages)
-def extract_code_from_markdown(message: str, enable_auto_env: bool = False) -> tuple[str, str, tuple[list[str], list[str]], SandboxEnvironment | None] | None:
     '''
     Extracts code from a markdown message by parsing code blocks directly.
     Determines sandbox environment based on code content and frameworks used.
     Returns:
-        tuple[str, str, tuple[list[str], list[str]], SandboxEnvironment | None]: A tuple:
             1. code - the longest code block found
             2. code language
-            3. sandbox python and npm dependencies (extracted using static analysis)
-            4. sandbox environment determined from code content
     '''
     code_block_regex = r'```(?P<code_lang>[\w\+\#\-\.]*)?[ \t]*\r?\n?(?P<code>.*?)```'
     matches = list(re.finditer(code_block_regex, message, re.DOTALL))
@@ -591,9 +591,19 @@ def extract_code_from_markdown(message: str, enable_auto_env: bool = False) -> t
     low_priority_languages = ['bash', 'shell',
                               'sh', 'zsh', 'powershell', 'pwsh', '']
     # Find the main code block by avoiding low-priority languages
-    main_code = None
-    main_code_lang = None
     max_length = 0
     for match in matches:
@@ -604,11 +614,6 @@ def extract_code_from_markdown(message: str, enable_auto_env: bool = False) -> t
             main_code_lang = code_lang
             max_length = len(code)
-    # Fallback to the longest code block if no main code was found
-    if not main_code:
-        longest_match = max(matches, key=lambda m: len(m.group('code')))
-        main_code = longest_match.group('code').strip()
-        main_code_lang = (longest_match.group('code_lang') or '').lower()
     # Define language prefixes for each environment
     python_prefixes = ['py', 'ipython', 'pygame', 'gradio', 'streamlit']
@@ -625,45 +630,31 @@ def extract_code_from_markdown(message: str, enable_auto_env: bool = False) -> t
     rust_prefixes = ['rust']
     csharp_prefixes = ['cs', 'csharp', 'dotnet']
-    # Extract package dependencies from the main program
-    python_packages: list[str] = []
-    npm_packages: list[str] = []
     # Helper function to check if any prefix matches
     def matches_prefix(lang: str, prefixes: list[str]) -> bool:
         return any(lang.lower().startswith(prefix) for prefix in prefixes)
     if matches_prefix(main_code_lang, python_prefixes):
-        python_packages = extract_python_imports(main_code)
-        extra_python_packages, main_code = extract_inline_pip_install_commands(
-            main_code)
-        python_packages.extend(extra_python_packages)
-        sandbox_env_name = determine_python_environment(
-            main_code, python_packages)
     elif matches_prefix(main_code_lang, vue_prefixes):
-        npm_packages = extract_js_imports(main_code)
         sandbox_env_name = SandboxEnvironment.VUE
         main_code_lang = detect_js_ts_code_lang(main_code)
     elif matches_prefix(main_code_lang, react_prefixes):
-        npm_packages = extract_js_imports(main_code)
         sandbox_env_name = SandboxEnvironment.REACT
         main_code_lang = detect_js_ts_code_lang(main_code)
     elif ('<!DOCTYPE html>' in main_code and ('<head' in main_code or '<body' in main_code)) or (main_code.strip().startswith('<svg')) or (not matches_prefix(main_code_lang, [*react_prefixes, *vue_prefixes, *js_prefixes, *ts_prefixes]) and ('<html' in main_code or '<!DOCTYPE html>' in main_code)):
-        npm_packages = extract_js_from_html_script_tags(main_code)
         sandbox_env_name = SandboxEnvironment.HTML
         main_code_lang = 'html'
     elif matches_prefix(main_code_lang, js_prefixes):
         main_code_lang = 'javascript'
-        npm_packages = extract_js_imports(main_code)
-        sandbox_env_name = determine_jsts_environment(main_code, npm_packages)
     elif matches_prefix(main_code_lang, ts_prefixes):
         main_code_lang = 'typescript'
-        npm_packages = extract_js_imports(main_code)
-        sandbox_env_name = determine_jsts_environment(main_code, npm_packages)
     elif matches_prefix(main_code_lang, html_prefixes):
         main_code_lang = detect_js_ts_code_lang(main_code)
-        npm_packages = extract_js_imports(main_code)
-        sandbox_env_name = determine_jsts_environment(main_code, npm_packages)
     elif matches_prefix(main_code_lang, mermaid_prefixes):
         main_code_lang = 'markdown'
         sandbox_env_name = SandboxEnvironment.MERMAID
@@ -681,25 +672,14 @@ def extract_code_from_markdown(message: str, enable_auto_env: bool = False) -> t
         sandbox_env_name = SandboxEnvironment.RUST_RUNNER
     elif main_code_lang == 'c':
         main_code_lang = 'c'
-        sandbox_env_name = sandbox_env_name = SandboxEnvironment.C_RUNNER
     else:
         sandbox_env_name = None
-    all_python_packages: Set[str] = set(python_packages)
-    all_npm_packages: Set[str] = set(npm_packages)
-    for match in matches:
-        code = match.group('code').strip()
-        if code != main_code:
-            install_python_packages, install_npm_packages = extract_installation_commands(
-                code)
-            all_python_packages.update(install_python_packages)
-            all_npm_packages.update(install_npm_packages)
     if not main_code_lang:
         main_code_lang = 'markdown'
-    return main_code, main_code_lang, (list(all_python_packages), list(all_npm_packages)), sandbox_env_name
 def create_placeholder_svg_data_url(width: int, height: int) -> str:
@@ -735,7 +715,7 @@ def create_placeholder_svg_data_url(width: int, height: int) -> str:
         encoded_svg = base64.b64encode(svg.encode('utf-8')).decode('utf-8')
         return f'data:image/svg+xml;base64,{encoded_svg}'
     except Exception as e:
-        print(f'Error encoding SVG: {e}')
         # Fallback to a simple colored div
         return f'data:image/svg+xml;base64,PHN2ZyB3aWR0aD0ie3dpZHRofSIgaGVpZ2h0PSJ7aGVpZ2h0fSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiBmaWxsPSIjZjNmNGY2Ii8+PC9zdmc+'
@@ -760,17 +740,17 @@ def replace_placeholder_urls(code: str) -> str:
             # Validate dimensions
             if width <= 0 or height <= 0:
-                print(f'Warning: Invalid dimensions {width}x{height}, using default 100x100')
                 width, height = 100, 100
             elif width > 10000 or height > 10000:
-                print(f'Warning: Dimensions {width}x{height} are very large, capping at 1000x1000')
                 width, height = min(width, 1000), min(height, 1000)
-            print(f'Replacing placeholder URL with SVG: {width}x{height}')
             data_url = create_placeholder_svg_data_url(width, height)
             return data_url
         except Exception as e:
-            print(f'Error replacing placeholder URL: {e}')
             # Return a simple fallback
             return 'data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTAwIiBoZWlnaHQ9IjEwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiBmaWxsPSIjZjNmNGY2Ii8+PC9zdmc+'
@@ -780,10 +760,10 @@ def replace_placeholder_urls(code: str) -> str:
     try:
         # Replace all occurrences
         result = re.sub(pattern, replacer, code)
-        print(f'Placeholder URL replacement completed successfully')
         return result
     except Exception as e:
-        print(f'Error during placeholder URL replacement: {e}')
         return code  # Return original code if replacement fails

                         if len(node.args) > 0 and isinstance(node.args[0], ast.Str):
                             packages.add(node.args[0].s.split('.')[0])
         except Exception as e:
+            pass
             continue
     # Filter out standard library modules using sys.stdlib_module_names
         try:
             tree = ts_parser.parse(code_bytes)
         except Exception as e:
+            pass
             try:
                 tree = js_parser.parse(code_bytes)
             except Exception as e:
+                pass
                 tree = None
         if tree is None:
         return list(packages)
     except Exception as e:
+        pass
         # Fallback to basic regex parsing if tree-sitter fails
         packages: Set[str] = set()
         return list(packages)
+def determine_python_environment(code: str, install_command: str) -> SandboxEnvironment | None:
     '''
+    Determine Python sandbox environment based on install command and AST analysis.
     '''
     try:
         tree = ast.parse(code)
     except SyntaxError:
         pass
+    # Check install command for framework detection
+    if install_command and 'pygame' in install_command:
         return SandboxEnvironment.PYGAME
+    elif install_command and 'gradio' in install_command:
         return SandboxEnvironment.GRADIO
+    elif install_command and 'streamlit' in install_command:
         return SandboxEnvironment.STREAMLIT
+    # elif install_command and 'nicegui' in install_command:
     #     return SandboxEnvironment.NICEGUI
     return SandboxEnvironment.PYTHON_RUNNER
+def determine_jsts_environment(code: str, install_command: str) -> SandboxEnvironment | None:
     '''
+    Determine JavaScript/TypeScript sandbox environment based on install command and AST analysis.
     '''
     # First check for Vue SFC structure
     if '<template>' in code or '<script setup' in code:
         return SandboxEnvironment.VUE
+    # Check install command for framework detection
+    react_packages = {'react', '@react', 'next', '@next', '@tanstack/react-query', 'react-query'}
     vue_packages = {'vue', '@vue', 'nuxt', '@nuxt'}
+    if install_command and any(pkg in install_command for pkg in react_packages):
         return SandboxEnvironment.REACT
+    elif install_command and any(pkg in install_command for pkg in vue_packages):
         return SandboxEnvironment.VUE
     try:
                 return SandboxEnvironment.VUE
     except Exception as e:
+        pass
     return SandboxEnvironment.JAVASCRIPT_RUNNER
             return 'typescript'
     except Exception as e:
+        pass
         # Fallback to basic checks if parsing fails
         pass
     return list(packages)
+def extract_code_from_markdown(message: str, enable_auto_env: bool = False) -> tuple[str, str, SandboxEnvironment | None, str] | None:
     '''
     Extracts code from a markdown message by parsing code blocks directly.
     Determines sandbox environment based on code content and frameworks used.
     Returns:
+        tuple[str, str, SandboxEnvironment | None, str]: A tuple:
             1. code - the longest code block found
             2. code language
+            3. sandbox environment determined from code content
+            4. install_command - bash command from ```bash code blocks
     '''
     code_block_regex = r'```(?P<code_lang>[\w\+\#\-\.]*)?[ \t]*\r?\n?(?P<code>.*?)```'
     matches = list(re.finditer(code_block_regex, message, re.DOTALL))
     low_priority_languages = ['bash', 'shell',
                               'sh', 'zsh', 'powershell', 'pwsh', '']
+    # Extract bash commands first
+    install_command = ""
+    bash_matches = [match for match in matches if (match.group('code_lang') or '').lower() in ['bash', 'shell', 'sh']]
+    if bash_matches:
+        # Use the first bash command found, or concatenate multiple if needed
+        install_command = bash_matches[0].group('code').strip()
+        if len(bash_matches) > 1:
+            # If multiple bash blocks, join them with && or newlines
+            install_command = ' && '.join([match.group('code').strip() for match in bash_matches])
     # Find the main code block by avoiding low-priority languages
+    main_code = ""
+    main_code_lang = ""
     max_length = 0
     for match in matches:
             main_code_lang = code_lang
             max_length = len(code)
     # Define language prefixes for each environment
     python_prefixes = ['py', 'ipython', 'pygame', 'gradio', 'streamlit']
     rust_prefixes = ['rust']
     csharp_prefixes = ['cs', 'csharp', 'dotnet']
     # Helper function to check if any prefix matches
     def matches_prefix(lang: str, prefixes: list[str]) -> bool:
         return any(lang.lower().startswith(prefix) for prefix in prefixes)
+    # Determine sandbox environment based on language
     if matches_prefix(main_code_lang, python_prefixes):
+        sandbox_env_name =  determine_python_environment(main_code, install_command)
     elif matches_prefix(main_code_lang, vue_prefixes):
         sandbox_env_name = SandboxEnvironment.VUE
         main_code_lang = detect_js_ts_code_lang(main_code)
     elif matches_prefix(main_code_lang, react_prefixes):
         sandbox_env_name = SandboxEnvironment.REACT
         main_code_lang = detect_js_ts_code_lang(main_code)
     elif ('<!DOCTYPE html>' in main_code and ('<head' in main_code or '<body' in main_code)) or (main_code.strip().startswith('<svg')) or (not matches_prefix(main_code_lang, [*react_prefixes, *vue_prefixes, *js_prefixes, *ts_prefixes]) and ('<html' in main_code or '<!DOCTYPE html>' in main_code)):
         sandbox_env_name = SandboxEnvironment.HTML
         main_code_lang = 'html'
     elif matches_prefix(main_code_lang, js_prefixes):
         main_code_lang = 'javascript'
+        sandbox_env_name = determine_jsts_environment(main_code, install_command)
     elif matches_prefix(main_code_lang, ts_prefixes):
         main_code_lang = 'typescript'
+        sandbox_env_name = determine_jsts_environment(main_code, install_command)
     elif matches_prefix(main_code_lang, html_prefixes):
         main_code_lang = detect_js_ts_code_lang(main_code)
+        sandbox_env_name = SandboxEnvironment.HTML
     elif matches_prefix(main_code_lang, mermaid_prefixes):
         main_code_lang = 'markdown'
         sandbox_env_name = SandboxEnvironment.MERMAID
         sandbox_env_name = SandboxEnvironment.RUST_RUNNER
     elif main_code_lang == 'c':
         main_code_lang = 'c'
+        sandbox_env_name = SandboxEnvironment.C_RUNNER
     else:
         sandbox_env_name = None
     if not main_code_lang:
         main_code_lang = 'markdown'
+    return main_code, main_code_lang, sandbox_env_name, install_command
 def create_placeholder_svg_data_url(width: int, height: int) -> str:
         encoded_svg = base64.b64encode(svg.encode('utf-8')).decode('utf-8')
         return f'data:image/svg+xml;base64,{encoded_svg}'
     except Exception as e:
+        pass
         # Fallback to a simple colored div
         return f'data:image/svg+xml;base64,PHN2ZyB3aWR0aD0ie3dpZHRofSIgaGVpZ2h0PSJ7aGVpZ2h0fSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiBmaWxsPSIjZjNmNGY2Ii8+PC9zdmc+'
             # Validate dimensions
             if width <= 0 or height <= 0:
+                pass
                 width, height = 100, 100
             elif width > 10000 or height > 10000:
+                pass
                 width, height = min(width, 1000), min(height, 1000)
+            pass
             data_url = create_placeholder_svg_data_url(width, height)
             return data_url
         except Exception as e:
+            pass
             # Return a simple fallback
             return 'data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTAwIiBoZWlnaHQ9IjEwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiBmaWxsPSIjZjNmNGY2Ii8+PC9zdmc+'
     try:
         # Replace all occurrences
         result = re.sub(pattern, replacer, code)
+        pass
         return result
     except Exception as e:
+        pass
         return code  # Return original code if replacement fails

sandbox/code_runner.py CHANGED Viewed

@@ -6,6 +6,7 @@ Gradio will interact with this module.
 from typing import Any, Generator, Literal, TypeAlias, TypedDict, Set
 import uuid
 import gradio as gr
 import base64
@@ -124,7 +125,7 @@ def create_chatbot_sandbox_state(btn_list_length: int = 5) -> ChatbotSandboxStat
         'sandbox_instruction': DEFAULT_SANDBOX_INSTRUCTIONS[SandboxEnvironment.AUTO],
         'code_to_execute': "",
         'code_language': None,
-        'code_dependencies': ([], []),
         'btn_list_length': btn_list_length,
         'sandbox_id': None,
         'chat_session_id': None,
@@ -162,7 +163,7 @@ def reset_sandbox_state(state: ChatbotSandboxState) -> ChatbotSandboxState:
     state['sandbox_instruction'] = DEFAULT_SANDBOX_INSTRUCTIONS[SandboxEnvironment.AUTO]
     state['code_to_execute'] = ""
     state['code_language'] = None
-    state['code_dependencies'] = ([], [])
     state['sandbox_error'] = None
     state['sandbox_output'] = None
@@ -369,7 +370,7 @@ def render_result(result):
         return str(result)
-def run_code_interpreter(code: str, code_language: str | None, code_dependencies: tuple[list[str], list[str]]) -> tuple[str, str]:
     """
     Executes the provided code within a sandboxed environment and returns the output.
@@ -378,18 +379,19 @@ def run_code_interpreter(code: str, code_language: str | None, code_dependencies
     """
     sandbox = CodeSandbox()
-    sandbox.commands.run("pip install uv",
-                         timeout=60 * 3,
-                         on_stderr=lambda message: print(message),)
     stderrs = []
-    python_dependencies, npm_dependencies = code_dependencies
-    pip_install_errs = install_pip_dependencies(sandbox, python_dependencies)
-    npm_install_errs = install_npm_dependencies(sandbox, npm_dependencies)
-    stderrs.extend(pip_install_errs)
-    stderrs.extend(npm_install_errs)
     execution = sandbox.run_code(
         code=code,
@@ -403,7 +405,7 @@ def run_code_interpreter(code: str, code_language: str | None, code_dependencies
         stderr += f"\n{execution.error.name}: {execution.error.value}"
     output = ""
     if stdout:
-        output += f"### Stdout:\n```markdown\n{stdout}\n```\n\n"
     stderrs.append(stderr)
@@ -425,14 +427,14 @@ def run_code_interpreter(code: str, code_language: str | None, code_dependencies
     return output, "" if output else stderrs
-def run_html_sandbox(code: str, code_dependencies: tuple[list[str], list[str]], existing_sandbox_id: str | None = None) -> tuple[str, str, str]:
     """
     Executes the provided code within a sandboxed environment and returns the output.
     Supports both React and Vue.js rendering in HTML files.
     Args:
         code (str): The code to be executed.
-        code_dependencies: Tuple of (python_deps, npm_deps)
     Returns:
         tuple: (sandbox_url, sandbox_id, stderr)
@@ -441,9 +443,16 @@ def run_html_sandbox(code: str, code_dependencies: tuple[list[str], list[str]],
     project_root = "~/html_app"
     sandbox.files.make_dir(project_root)
-    # HTML does not support dependencies for now
-    # _, npm_dependencies = code_dependencies
-    # install_npm_dependencies(sandbox, npm_dependencies, project_root=project_root)
     # replace placeholder URLs with SVG data URLs
     code = replace_placeholder_urls(code)
@@ -455,12 +464,13 @@ def run_html_sandbox(code: str, code_dependencies: tuple[list[str], list[str]],
     return (sandbox_url, sandbox.sandbox_id, '')
-def run_react_sandbox(code: str, code_dependencies: tuple[list[str], list[str]], existing_sandbox_id: str | None = None) -> CodeRunResult:
     """
     Executes the provided code within a sandboxed environment and returns the output.
     Args:
         code (str): The code to be executed.
     Returns:
         url for remote sandbox
@@ -470,22 +480,31 @@ def run_react_sandbox(code: str, code_dependencies: tuple[list[str], list[str]],
     stderrs: list[str] = [] # to collect errors
-    _, npm_dependencies = code_dependencies
-    if npm_dependencies:
-        print(f"Installing NPM dependencies...: {npm_dependencies}")
-        install_errs = install_npm_dependencies(sandbox, npm_dependencies, project_root=project_root)
-        stderrs.extend(install_errs)
-        print("NPM dependencies installed. " + "Errors: " + str(install_errs))
     # replace placeholder URLs with SVG data URLs
     code = replace_placeholder_urls(code)
-    # set up the sandbox
     print("Setting up sandbox directory structure...")
     file_path = "~/react_app/src/App.tsx"
     sandbox.files.write(file_path, code, "user", 60)
     print("Code files written successfully.")
     is_run_success, _, build_stderrs = run_command_in_sandbox(
         sandbox=sandbox,
         command="npm run build --loglevel=error -- --mode development --logLevel error",
@@ -493,7 +512,13 @@ def run_react_sandbox(code: str, code_dependencies: tuple[list[str], list[str]],
     )
     stderrs.extend(build_stderrs)
     sandbox_url = get_sandbox_app_url(sandbox, 'react')
     return {
         'sandbox_id': sandbox.sandbox_id,
         'sandbox_url': sandbox_url,
@@ -502,12 +527,13 @@ def run_react_sandbox(code: str, code_dependencies: tuple[list[str], list[str]],
     }
-def run_vue_sandbox(code: str, code_dependencies: tuple[list[str], list[str]], existing_sandbox_id: str | None = None) -> CodeRunResult:
     """
     Executes the provided Vue code within a sandboxed environment and returns the output.
     Args:
         code (str): The Vue code to be executed.
     Returns:
         url for remote sandbox
@@ -520,17 +546,26 @@ def run_vue_sandbox(code: str, code_dependencies: tuple[list[str], list[str]], e
     # replace placeholder URLs with SVG data URLs
     code = replace_placeholder_urls(code)
-    # Set up the sandbox
     file_path = "~/vue_app/src/App.vue"
     sandbox.files.write(file_path, code, "user", 60)
-    _, npm_dependencies = code_dependencies
-    if npm_dependencies:
-        print(f"Installing NPM dependencies...: {npm_dependencies}")
-        install_errs = install_npm_dependencies(sandbox, npm_dependencies, project_root=project_root)
-        stderrs.extend(install_errs)
-        print("NPM dependencies installed. " + "Errors: " + str(install_errs))
     is_run_success, _, build_stderrs = run_command_in_sandbox(
         sandbox=sandbox,
         command="npm run build --loglevel=error -- --mode development --logLevel error",
@@ -538,7 +573,14 @@ def run_vue_sandbox(code: str, code_dependencies: tuple[list[str], list[str]], e
     )
     stderrs.extend(build_stderrs)
     sandbox_url = get_sandbox_app_url(sandbox, 'vue')
     return {
         'sandbox_id': sandbox.sandbox_id,
         'sandbox_url': sandbox_url,
@@ -547,12 +589,13 @@ def run_vue_sandbox(code: str, code_dependencies: tuple[list[str], list[str]], e
     }
-def run_pygame_sandbox(code: str, code_dependencies: tuple[list[str], list[str]], existing_sandbox_id: str | None = None) -> CodeRunResult:
     """
     Executes the provided code within a sandboxed environment and returns the output.
     Args:
         code (str): The code to be executed.
     Returns:
         url for remote sandbox
@@ -563,20 +606,39 @@ def run_pygame_sandbox(code: str, code_dependencies: tuple[list[str], list[str]]
     stderrs = []
     sandbox.files.write(file_path, code, "user", 60)
-    python_dependencies, _ = code_dependencies
-    install_errs = install_pip_dependencies(sandbox, python_dependencies)
-    stderrs.extend(install_errs)
-    # build the pygame code
     is_run_success, _, build_stderrs = run_command_in_sandbox(
         sandbox=sandbox,
         command="pygbag --build ~/pygame_app",
     )
     stderrs.extend(build_stderrs)
     sandbox_url = get_sandbox_app_url(sandbox, 'pygame')
     return {
         'sandbox_id': sandbox.sandbox_id,
         'sandbox_url': sandbox_url,
@@ -585,12 +647,13 @@ def run_pygame_sandbox(code: str, code_dependencies: tuple[list[str], list[str]]
     }
-def run_gradio_sandbox(code: str, code_dependencies: tuple[list[str], list[str]], existing_sandbox_id: str | None = None) -> tuple[str, str, str]:
     """
     Executes the provided code within a sandboxed environment and returns the output.
     Args:
         code (str): The code to be executed.
     Returns:
         url for remote sandbox and sandbox id
@@ -602,9 +665,18 @@ def run_gradio_sandbox(code: str, code_dependencies: tuple[list[str], list[str]]
     stderrs = []
-    python_dependencies, _ = code_dependencies
-    install_stderr = install_pip_dependencies(sandbox, python_dependencies)
-    stderrs.extend(install_stderr)
     stderr = run_background_command_with_timeout(
         sandbox,
@@ -618,7 +690,7 @@ def run_gradio_sandbox(code: str, code_dependencies: tuple[list[str], list[str]]
     return (sandbox_url, sandbox.sandbox_id, '\n'.join(stderrs))
-def run_streamlit_sandbox(code: str, code_dependencies: tuple[list[str], list[str]], existing_sandbox_id: str | None = None) -> tuple[str, str, str]:
     sandbox = reuse_or_create_sandbox(sandbox_id=existing_sandbox_id)
     stderrs = []
@@ -627,9 +699,18 @@ def run_streamlit_sandbox(code: str, code_dependencies: tuple[list[str], list[st
     file_path = "~/mystreamlit/app.py"
     sandbox.files.write(file_path, code, "user", 60)
-    python_dependencies, _ = code_dependencies
-    install_stderr = install_pip_dependencies(sandbox, python_dependencies)
-    stderrs.extend(install_stderr)
     stderr = run_background_command_with_timeout(
         sandbox,
@@ -825,70 +906,11 @@ def on_edit_code(
         return
     sandbox_state['code_to_execute'] = sandbox_code
-    # Extract packages from imports (without versions)
-    python_deps_from_imports = set(extract_python_imports(sandbox_code))
-    npm_deps_from_imports = set(extract_js_imports(sandbox_code))
-    # Get existing dependencies with versions from state
-    existing_python_deps, existing_npm_deps = sandbox_state["code_dependencies"]
-    # Create dictionaries to track package versions
-    python_deps_dict = {}  # pkg_name -> version
-    npm_deps_dict = {}     # pkg_name -> version
-    # First add existing dependencies with their specific versions
-    for dep in existing_python_deps:
-        pkg_name = dep.split('==')[0].split('>=')[0].split('<=')[0].split('~=')[0]
-        version = dep[len(pkg_name):]
-        if version:  # If it has a specific version
-            python_deps_dict[pkg_name] = version
-        elif pkg_name in python_deps_from_imports:  # Only keep packages that are still imported
-            python_deps_dict[pkg_name] = "latest"
-    for dep in existing_npm_deps:
-        if '@' in dep and not dep.startswith('@'):
-            pkg_name = dep.split('@')[0]
-            version = '@' + dep.split('@')[1]
-        elif '@' in dep[1:]:  # Handle scoped packages
-            pkg_name, version = dep.rsplit('@', 1)
-            version = '@' + version
-        else:
-            pkg_name = dep
-            version = "latest"
-        if version != "latest":  # If it has a specific version
-            npm_deps_dict[pkg_name] = version
-        elif pkg_name in npm_deps_from_imports:  # Only keep packages that are still imported
-            npm_deps_dict[pkg_name] = "latest"
-    # Add new dependencies from imports with "latest" if not already present
-    for dep in python_deps_from_imports:
-        if dep not in python_deps_dict:
-            python_deps_dict[dep] = "latest"
-    for dep in npm_deps_from_imports:
-        if dep not in npm_deps_dict:
-            npm_deps_dict[dep] = "latest"
-    # Convert to dataframe format
-    dependencies = []
-    # Add Python packages
-    for pkg_name, version in python_deps_dict.items():
-        dependencies.append(["python", pkg_name, version])
-    # Add NPM packages
-    for pkg_name, version in npm_deps_dict.items():
-        dependencies.append(["npm", pkg_name, version])
-    # If no dependencies found, provide default empty rows
-    if not dependencies:
-        dependencies = [["python", "", ""], ["npm", "", ""]]
-    # Update dependencies in sandbox state
-    sandbox_state["code_dependencies"] = (
-        [f"{pkg}{ver}" if ver != "latest" else pkg for pkg, ver in python_deps_dict.items()],
-        [f"{pkg}{ver}" if ver != "latest" else pkg for pkg, ver in npm_deps_dict.items()]
-    )
     yield (
         gr.skip(),  # sandbox_output_md
@@ -964,8 +986,9 @@ def on_edit_dependency(
             else:
                 npm_deps.append(pkg_name)
-    # Update sandbox state with new dependencies
-    sandbox_state["code_dependencies"] = (python_deps, npm_deps)
     # increase edit round
     sandbox_state['edit_round'] += 1
@@ -1019,7 +1042,7 @@ def on_click_code_message_run(
         yield gr.skip(), gr.skip(), gr.skip(), gr.skip()
         return
-    code, code_language, code_dependencies, env_selection = extract_result
     # As sandbox is reused, no need to skip
     # if sandbox_state['code_to_execute'] == code and sandbox_state['code_language'] == code_language:
@@ -1034,43 +1057,12 @@ def on_click_code_message_run(
         # ensure gradio supports the code language
     ) in VALID_GRADIO_CODE_LANGUAGES else None
-    python_deps, npm_deps = code_dependencies
-    # Convert to dataframe format
-    dependencies = []
-    # Add Python packages with versions
-    for dep in python_deps:
-        # Check if package has version specifier
-        if any(op in dep for op in ['==', '>=', '<=', '~=']):
-            # Split on first occurrence of version operator
-            pkg_name = dep.split('==')[0].split('>=')[0].split('<=')[0].split('~=')[0]
-            version = dep[len(pkg_name):]  # Get everything after package name
-            dependencies.append(["python", pkg_name, version])
-        else:
-            dependencies.append(["python", dep, "latest"])
-    # Add NPM packages with versions
-    for dep in npm_deps:
-        # Check if package has version specifier
-        if '@' in dep and not dep.startswith('@'):
-            # Handle non-scoped packages with version
-            pkg_name, version = dep.split('@', 1)
-            dependencies.append(["npm", pkg_name, '@' + version])
-        elif '@' in dep[1:]:  # Handle scoped packages with version
-            # Split on last @ for scoped packages
-            pkg_parts = dep.rsplit('@', 1)
-            dependencies.append(["npm", pkg_parts[0], '@' + pkg_parts[1]])
-        else:
-            dependencies.append(["npm", dep, "latest"])
-    # If no dependencies found, provide default empty rows
-    if not dependencies:
-        dependencies = [["python", "", ""], ["npm", "", ""]]
     sandbox_state['code_to_execute'] = code
     sandbox_state['code_language'] = code_language
-    sandbox_state["code_dependencies"] = code_dependencies
     if sandbox_state['sandbox_environment'] == SandboxEnvironment.AUTO:
         sandbox_state['auto_selected_sandbox_environment'] = env_selection
@@ -1137,66 +1129,11 @@ def on_run_code(
         # ensure gradio supports the code language
     ) in VALID_GRADIO_CODE_LANGUAGES else None
-    # Use dependencies from sandbox_state instead of re-extracting
-    code_dependencies = sandbox_state['code_dependencies']
-    python_deps, npm_deps = code_dependencies
-    # Helper function to extract package name without version
-    def get_base_package_name(pkg: str) -> str:
-        # For Python packages
-        if any(op in pkg for op in ['==', '>=', '<=', '~=', '>', '<']):
-            return pkg.split('==')[0].split('>=')[0].split('<=')[0].split('~=')[0].split('>')[0].split('<')[0]
-        # For NPM packages
-        if '@' in pkg and not pkg.startswith('@'):
-            return pkg.split('@')[0]
-        elif '@' in pkg[1:]:  # Handle scoped packages
-            return pkg.rsplit('@', 1)[0]
-        return pkg
-    # Helper function to extract version from package string
-    def get_package_version(pkg: str) -> str:
-        # For Python packages
-        if any(op in pkg for op in ['==', '>=', '<=', '~=', '>', '<']):
-            base_name = get_base_package_name(pkg)
-            return pkg[len(base_name):]
-        # For NPM packages
-        if '@' in pkg and not pkg.startswith('@'):
-            return '@' + pkg.split('@', 1)[1]
-        elif '@' in pkg[1:]:  # Handle scoped packages
-            _, version = pkg.rsplit('@', 1)
-            return '@' + version
-        return "latest"
-    # Create unified dependency dictionaries to avoid duplicates
-    python_deps_dict = {}  # pkg_name -> version
-    npm_deps_dict = {}     # pkg_name -> version
-    # Process Python dependencies
-    for dep in python_deps:
-        base_name = get_base_package_name(dep)
-        version = get_package_version(dep)
-        # Only update if we don't have a version yet or if we're replacing 'latest'
-        if base_name not in python_deps_dict or python_deps_dict[base_name] == "latest":
-            python_deps_dict[base_name] = version
-    # Process NPM dependencies
-    for dep in npm_deps:
-        base_name = get_base_package_name(dep)
-        version = get_package_version(dep)
-        # Only update if we don't have a version yet or if we're replacing 'latest'
-        if base_name not in npm_deps_dict or npm_deps_dict[base_name] == "latest":
-            npm_deps_dict[base_name] = version
-    # Convert unified dictionaries to dataframe format
-    dependencies = []
-    for pkg_name, version in python_deps_dict.items():
-        dependencies.append(["python", pkg_name, version])
-    for pkg_name, version in npm_deps_dict.items():
-        dependencies.append(["npm", pkg_name, version])
-    # If no dependencies found, provide default empty rows
-    if not dependencies:
-        dependencies = [["python", "", ""], ["npm", "", ""]]
     # Initialize output with loading message
     markdown_output_text = "### Sandbox Execution Log\n\n"
@@ -1237,12 +1174,12 @@ def on_run_code(
             yield update_markdown_output("🔄 Setting up HTML sandbox...")
             sandbox_url, sandbox_id, sandbox_error = run_html_sandbox(
                 code=code,
-                code_dependencies=code_dependencies,
                 existing_sandbox_id=sandbox_state['sandbox_id'],
             )
             if sandbox_error:
                 yield update_markdown_output("❌ HTML sandbox failed to run!", clear_output=True)
-                yield update_markdown_output(f"### Stderr:\n```markdown\n{sandbox_error}\n```\n\n")
             else:
                 yield update_markdown_output("✅ HTML sandbox is ready!", clear_output=True)
                 yield (
@@ -1260,13 +1197,13 @@ def on_run_code(
             yield update_markdown_output("🔄 Setting up React sandbox...")
             code_run_result = run_react_sandbox(
                 code=code,
-                code_dependencies=code_dependencies,
                 existing_sandbox_id=sandbox_state['sandbox_id'],
             )
             sandbox_id, sandbox_error = code_run_result['sandbox_id'], code_run_result['stderr']
             if code_run_result['is_run_success'] is False and sandbox_error:
                 yield update_markdown_output("❌ React sandbox failed to run!", clear_output=True)
-                yield update_markdown_output(f"### Stderr:\n```markdown\n{sandbox_error}\n```\n\n")
             else:
                 yield update_markdown_output("✅ React sandbox is ready!", clear_output=True)
                 yield (
@@ -1284,13 +1221,13 @@ def on_run_code(
             yield update_markdown_output("🔄 Setting up Vue sandbox...")
             code_run_result = run_vue_sandbox(
                 code=code,
-                code_dependencies=code_dependencies,
                 existing_sandbox_id=sandbox_state['sandbox_id'],
             )
             sandbox_id, sandbox_error = code_run_result['sandbox_id'], code_run_result['stderr']
             if code_run_result['is_run_success'] is False and code_run_result['stderr']:
                 yield update_markdown_output("❌ Vue sandbox failed to run!", clear_output=True)
-                yield update_markdown_output(f"### Stderr:\n```markdown\n{code_run_result['stderr']}\n```\n\n")
             else:
                 yield update_markdown_output("✅ Vue sandbox is ready!", clear_output=True)
                 yield (
@@ -1308,13 +1245,13 @@ def on_run_code(
             yield update_markdown_output("🔄 Setting up PyGame sandbox...")
             code_run_result = run_pygame_sandbox(
                 code=code,
-                code_dependencies=code_dependencies,
                 existing_sandbox_id=sandbox_state['sandbox_id'],
             )
             sandbox_id, sandbox_error = code_run_result['sandbox_id'], code_run_result['stderr']
             if code_run_result['is_run_success'] is False and code_run_result['stderr']:
                 yield update_markdown_output("❌ PyGame sandbox failed to run!", clear_output=True)
-                yield update_markdown_output(f"### Stderr:\n```markdown\n{code_run_result['stderr']}\n```\n\n")
             else:
                 yield update_markdown_output("✅ PyGame sandbox is ready!", clear_output=True)
                 yield (
@@ -1332,12 +1269,12 @@ def on_run_code(
             yield update_markdown_output("🔄 Setting up Gradio sandbox...")
             sandbox_url, sandbox_id, sandbox_error = run_gradio_sandbox(
                 code=code,
-                code_dependencies=code_dependencies,
                 existing_sandbox_id=sandbox_state['sandbox_id'],
             )
             if sandbox_error:
                 yield update_markdown_output("❌ Gradio sandbox failed to run!", clear_output=True)
-                yield update_markdown_output(f"### Stderr:\n```markdown\n{sandbox_error}\n```\n\n")
             else:
                 yield update_markdown_output("✅ Gradio sandbox is ready!", clear_output=True)
                 yield (
@@ -1355,12 +1292,12 @@ def on_run_code(
             yield update_markdown_output("🔄 Setting up Streamlit sandbox...")
             sandbox_url, sandbox_id, sandbox_error = run_streamlit_sandbox(
                 code=code,
-                code_dependencies=code_dependencies,
                 existing_sandbox_id=sandbox_state['sandbox_id'],
             )
             if sandbox_error:
                 yield update_markdown_output("❌ Streamlit sandbox failed to run!", clear_output=True)
-                yield update_markdown_output(f"### Stderr:\n```markdown\n{sandbox_error}\n```\n\n")
             else:
                 yield update_markdown_output("✅ Streamlit sandbox is ready!", clear_output=True)
                 yield (
@@ -1380,12 +1317,12 @@ def on_run_code(
             html_code = mermaid_to_html(code, theme='light')
             sandbox_url, sandbox_id, sandbox_error = run_html_sandbox(
                 code=html_code,
-                code_dependencies=code_dependencies,
                 existing_sandbox_id=sandbox_state['sandbox_id'],
             )
             if sandbox_error:
                 yield update_markdown_output("❌ Mermaid visualization failed to render!", clear_output=True)
-                yield update_markdown_output(f"### Stderr:\n```markdown\n{sandbox_error}\n```\n\n")
             else:
                 yield update_markdown_output("✅ Mermaid visualization is ready!", clear_output=True)
                 yield (
@@ -1402,11 +1339,11 @@ def on_run_code(
         case SandboxEnvironment.PYTHON_RUNNER:
             yield update_markdown_output("🔄 Running Python Runner...", clear_output=True)
             sandbox_output, sandbox_error = run_code_interpreter(
-                code=code, code_language='python', code_dependencies=code_dependencies
             )
             if sandbox_error:
                 yield update_markdown_output("❌ Python Runner failed to run!", clear_output=True)
-                yield update_markdown_output(f"### Stderr:\n```markdown\n{sandbox_error}\n```\n\n")
             else:
                 yield update_markdown_output("✅ Code execution is ready!", clear_output=True)
                 yield (
@@ -1426,25 +1363,30 @@ def on_run_code(
                 )
         case SandboxEnvironment.JAVASCRIPT_RUNNER:
             yield update_markdown_output("🔄 Running JavaScript Runner...", clear_output=True)
-            sandbox_output, sandbox_error = run_code_interpreter(
-                code=code, code_language='javascript', code_dependencies=code_dependencies
             )
             if sandbox_error:
                 yield update_markdown_output("❌ JavaScript Runner failed to run!", clear_output=True)
-                yield update_markdown_output(f"### Stderr:\n```markdown\n{sandbox_error}\n```\n\n")
             else:
                 yield update_markdown_output("✅ Code execution is ready!", clear_output=True)
                 yield (
                     gr.Markdown(
-                        value=markdown_output_text + "\n\n" + sandbox_output,
                         sanitize_html=False,
                         visible=True,
                     ),
                     SandboxComponent(
-                        value=("", False, []),
-                        label="Example",
-                        visible=False,
-                        key="newsandbox",
                     ),
                     gr.skip(),
                     gr.skip(),
@@ -1456,7 +1398,7 @@ def on_run_code(
             )
             if sandbox_error:
                 yield update_markdown_output("❌ C Runner failed to run!", clear_output=True)
-                yield update_markdown_output(f"### Stderr:\n```markdown\n{sandbox_error}\n```\n\n")
             else:
                 yield update_markdown_output("✅ Code execution is ready!", clear_output=True)
                 yield (
@@ -1481,7 +1423,7 @@ def on_run_code(
             )
             if sandbox_error:
                 yield update_markdown_output("❌ C++ Runner failed to run!", clear_output=True)
-                yield update_markdown_output(f"### Stderr:\n```markdown\n{sandbox_error}\n```\n\n")
             else:
                 yield update_markdown_output("✅ Code execution is ready!", clear_output=True)
                 yield (
@@ -1506,7 +1448,7 @@ def on_run_code(
             )
             if sandbox_error:
                 yield update_markdown_output("❌ Java Runner failed to run!", clear_output=True)
-                yield update_markdown_output(f"### Stderr:\n```markdown\n{sandbox_error}\n```\n\n")
             else:
                 yield update_markdown_output("✅ Code execution is ready!", clear_output=True)
                 yield (
@@ -1531,7 +1473,7 @@ def on_run_code(
             )
             if sandbox_error:
                 yield update_markdown_output("❌ Go Runner failed to run!", clear_output=True)
-                yield update_markdown_output(f"### Stderr:\n```markdown\n{sandbox_error}\n```\n\n")
             else:
                 yield update_markdown_output("✅ Code execution is ready!", clear_output=True)
                 yield (
@@ -1566,7 +1508,7 @@ def on_run_code(
             )
             if sandbox_error:
                 yield update_markdown_output("❌ Rust Runner failed to run!", clear_output=True)
-                yield update_markdown_output(f"### Stderr:\n```markdown\n{sandbox_error}\n```\n\n")
             else:
                 yield update_markdown_output("✅ Code execution is ready!", clear_output=True)
                 yield (

 from typing import Any, Generator, Literal, TypeAlias, TypedDict, Set
 import uuid
+import time
 import gradio as gr
 import base64
         'sandbox_instruction': DEFAULT_SANDBOX_INSTRUCTIONS[SandboxEnvironment.AUTO],
         'code_to_execute': "",
         'code_language': None,
+        'install_command': "",
         'btn_list_length': btn_list_length,
         'sandbox_id': None,
         'chat_session_id': None,
     state['sandbox_instruction'] = DEFAULT_SANDBOX_INSTRUCTIONS[SandboxEnvironment.AUTO]
     state['code_to_execute'] = ""
     state['code_language'] = None
+    state['install_command'] = ""
     state['sandbox_error'] = None
     state['sandbox_output'] = None
         return str(result)
+def run_code_interpreter(code: str, code_language: str | None, install_command: str) -> tuple[str, str]:
     """
     Executes the provided code within a sandboxed environment and returns the output.
     """
     sandbox = CodeSandbox()
     stderrs = []
+    # Run install command if provided
+    if install_command.strip():
+        is_success, stdout, stderr = run_command_in_sandbox(
+            sandbox=sandbox,
+            command=install_command,
+            timeout=60 * 3,
+        )
+        if stderr:
+            stderrs.extend(stderr)
+        if not is_success:
+            print(f"Install command failed: {stderr}")
     execution = sandbox.run_code(
         code=code,
         stderr += f"\n{execution.error.name}: {execution.error.value}"
     output = ""
     if stdout:
+        output += f"```markdown\n{stdout}\n```\n\n"
     stderrs.append(stderr)
     return output, "" if output else stderrs
+def run_html_sandbox(code: str, install_command: str, existing_sandbox_id: str | None = None) -> tuple[str, str, str]:
     """
     Executes the provided code within a sandboxed environment and returns the output.
     Supports both React and Vue.js rendering in HTML files.
     Args:
         code (str): The code to be executed.
+        install_command (str): Bash command to install dependencies
     Returns:
         tuple: (sandbox_url, sandbox_id, stderr)
     project_root = "~/html_app"
     sandbox.files.make_dir(project_root)
+    # Run install command if provided
+    if install_command.strip():
+        is_success, stdout, stderr = run_command_in_sandbox(
+            sandbox=sandbox,
+            command=install_command,
+            timeout=60 * 3,
+        )
+        if not is_success:
+            print(f"Install command failed: {stderr}")
+            return "", sandbox.sandbox_id, '\n'.join(stderr)
     # replace placeholder URLs with SVG data URLs
     code = replace_placeholder_urls(code)
     return (sandbox_url, sandbox.sandbox_id, '')
+def run_react_sandbox(code: str, install_command: str, existing_sandbox_id: str | None = None) -> CodeRunResult:
     """
     Executes the provided code within a sandboxed environment and returns the output.
     Args:
         code (str): The code to be executed.
+        install_command (str): Bash command to install dependencies
     Returns:
         url for remote sandbox
     stderrs: list[str] = [] # to collect errors
     # replace placeholder URLs with SVG data URLs
     code = replace_placeholder_urls(code)
+    # set up the sandbox directory structure first
     print("Setting up sandbox directory structure...")
     file_path = "~/react_app/src/App.tsx"
     sandbox.files.write(file_path, code, "user", 60)
     print("Code files written successfully.")
+    # Run install command AFTER setting up the project structure
+    if install_command.strip():
+        is_success, stdout, stderr = run_command_in_sandbox(
+            sandbox=sandbox,
+            command=install_command,
+            timeout=60 * 3,
+            working_directory=project_root,  # Run in the correct directory
+        )
+        if stderr:
+            stderrs.extend(stderr)
+        if not is_success:
+            print(f"Install command failed: {stderr}")
+            # Don't return early - continue with build attempt
+            stderrs.append(f"Install command failed: {' '.join(stderr)}")
+    # Attempt to build the React app
     is_run_success, _, build_stderrs = run_command_in_sandbox(
         sandbox=sandbox,
         command="npm run build --loglevel=error -- --mode development --logLevel error",
     )
     stderrs.extend(build_stderrs)
+    # Always try to get the sandbox URL, even if build failed
     sandbox_url = get_sandbox_app_url(sandbox, 'react')
+    # If build failed but we have a sandbox, still return the URL
+    if not is_run_success and sandbox_url:
+        is_run_success = True  # Consider it successful if we have a working sandbox
     return {
         'sandbox_id': sandbox.sandbox_id,
         'sandbox_url': sandbox_url,
     }
+def run_vue_sandbox(code: str, install_command: str, existing_sandbox_id: str | None = None) -> CodeRunResult:
     """
     Executes the provided Vue code within a sandboxed environment and returns the output.
     Args:
         code (str): The Vue code to be executed.
+        install_command (str): Bash command to install dependencies
     Returns:
         url for remote sandbox
     # replace placeholder URLs with SVG data URLs
     code = replace_placeholder_urls(code)
+    # Set up the sandbox directory structure first
     file_path = "~/vue_app/src/App.vue"
     sandbox.files.write(file_path, code, "user", 60)
+    # Run install command AFTER setting up the project structure
+    if install_command.strip():
+        is_success, stdout, stderr = run_command_in_sandbox(
+            sandbox=sandbox,
+            command=install_command,
+            timeout=60 * 3,
+            working_directory=project_root,  # Run in the correct directory
+        )
+        if stderr:
+            stderrs.extend(stderr)
+        if not is_success:
+            print(f"Install command failed: {stderr}")
+            # Don't return early - continue with build attempt
+            stderrs.append(f"Install command failed: {' '.join(stderr)}")
+    # Attempt to build the Vue app
     is_run_success, _, build_stderrs = run_command_in_sandbox(
         sandbox=sandbox,
         command="npm run build --loglevel=error -- --mode development --logLevel error",
     )
     stderrs.extend(build_stderrs)
+    # Always try to get the sandbox URL, even if build failed
     sandbox_url = get_sandbox_app_url(sandbox, 'vue')
+    # If build failed but we have a sandbox, still return the URL
+    if not is_run_success and sandbox_url:
+        print(f"⚠️ Build failed but sandbox is available at: {sandbox_url}")
+        is_run_success = True  # Consider it successful if we have a working sandbox
     return {
         'sandbox_id': sandbox.sandbox_id,
         'sandbox_url': sandbox_url,
     }
+def run_pygame_sandbox(code: str, install_command: str, existing_sandbox_id: str | None = None) -> CodeRunResult:
     """
     Executes the provided code within a sandboxed environment and returns the output.
     Args:
         code (str): The code to be executed.
+        install_command (str): Bash command to install dependencies
     Returns:
         url for remote sandbox
     stderrs = []
+    # Set up the sandbox directory structure first
     sandbox.files.write(file_path, code, "user", 60)
+    # Run install command AFTER setting up the project structure
+    if install_command.strip():
+        is_success, stdout, stderr = run_command_in_sandbox(
+            sandbox=sandbox,
+            command=install_command,
+            timeout=60 * 3,
+            working_directory=project_root,  # Run in the correct directory
+        )
+        if stderr:
+            stderrs.extend(stderr)
+        if not is_success:
+            print(f"Install command failed: {stderr}")
+            # Don't return early - continue with build attempt
+            stderrs.append(f"Install command failed: {' '.join(stderr)}")
+    # Attempt to build the pygame code
     is_run_success, _, build_stderrs = run_command_in_sandbox(
         sandbox=sandbox,
         command="pygbag --build ~/pygame_app",
     )
     stderrs.extend(build_stderrs)
+    # Always try to get the sandbox URL, even if build failed
     sandbox_url = get_sandbox_app_url(sandbox, 'pygame')
+    # If build failed but we have a sandbox, still return the URL
+    if not is_run_success and sandbox_url:
+        print(f"⚠️ Build failed but sandbox is available at: {sandbox_url}")
+        is_run_success = True  # Consider it successful if we have a working sandbox
     return {
         'sandbox_id': sandbox.sandbox_id,
         'sandbox_url': sandbox_url,
     }
+def run_gradio_sandbox(code: str, install_command: str, existing_sandbox_id: str | None = None) -> tuple[str, str, str]:
     """
     Executes the provided code within a sandboxed environment and returns the output.
     Args:
         code (str): The code to be executed.
+        install_command (str): Bash command to install dependencies
     Returns:
         url for remote sandbox and sandbox id
     stderrs = []
+    # Run install command if provided
+    if install_command.strip():
+        is_success, stdout, stderr = run_command_in_sandbox(
+            sandbox=sandbox,
+            command=install_command,
+            timeout=60 * 3,
+        )
+        if stderr:
+            stderrs.extend(stderr)
+        if not is_success:
+            print(f"Install command failed: {stderr}")
+            return "", sandbox.sandbox_id, '\n'.join(stderr)
     stderr = run_background_command_with_timeout(
         sandbox,
     return (sandbox_url, sandbox.sandbox_id, '\n'.join(stderrs))
+def run_streamlit_sandbox(code: str, install_command: str, existing_sandbox_id: str | None = None) -> tuple[str, str, str]:
     sandbox = reuse_or_create_sandbox(sandbox_id=existing_sandbox_id)
     stderrs = []
     file_path = "~/mystreamlit/app.py"
     sandbox.files.write(file_path, code, "user", 60)
+    # Run install command if provided
+    if install_command.strip():
+        is_success, stdout, stderr = run_command_in_sandbox(
+            sandbox=sandbox,
+            command=install_command,
+            timeout=60 * 3,
+        )
+        if stderr:
+            stderrs.extend(stderr)
+        if not is_success:
+            print(f"Install command failed: {stderr}")
+            return "", sandbox.sandbox_id, '\n'.join(stderr)
     stderr = run_background_command_with_timeout(
         sandbox,
         return
     sandbox_state['code_to_execute'] = sandbox_code
+    # Create empty dependencies dataframe for UI compatibility
+    dependencies = [["python", "", ""], ["npm", "", ""]]
+    # Keep existing install command
+    # No need to update install_command here as it's set from the original message
     yield (
         gr.skip(),  # sandbox_output_md
             else:
                 npm_deps.append(pkg_name)
+    # Update sandbox state with new install command
+    # For now, we'll keep the existing install_command as dependency editing is simplified
+    # In a full implementation, you might want to convert the dependency edits to install commands
     # increase edit round
     sandbox_state['edit_round'] += 1
         yield gr.skip(), gr.skip(), gr.skip(), gr.skip()
         return
+    code, code_language, env_selection, install_command = extract_result
     # As sandbox is reused, no need to skip
     # if sandbox_state['code_to_execute'] == code and sandbox_state['code_language'] == code_language:
         # ensure gradio supports the code language
     ) in VALID_GRADIO_CODE_LANGUAGES else None
+    # Create empty dependencies dataframe for UI compatibility
+    dependencies = [["python", "", ""], ["npm", "", ""]]
     sandbox_state['code_to_execute'] = code
     sandbox_state['code_language'] = code_language
+    sandbox_state['install_command'] = install_command
     if sandbox_state['sandbox_environment'] == SandboxEnvironment.AUTO:
         sandbox_state['auto_selected_sandbox_environment'] = env_selection
         # ensure gradio supports the code language
     ) in VALID_GRADIO_CODE_LANGUAGES else None
+    # Get install command from sandbox state
+    install_command = sandbox_state.get('install_command', '')
+    # Create empty dependencies dataframe for UI compatibility
+    dependencies = [["python", "", ""], ["npm", "", ""]]
     # Initialize output with loading message
     markdown_output_text = "### Sandbox Execution Log\n\n"
             yield update_markdown_output("🔄 Setting up HTML sandbox...")
             sandbox_url, sandbox_id, sandbox_error = run_html_sandbox(
                 code=code,
+                install_command=install_command,
                 existing_sandbox_id=sandbox_state['sandbox_id'],
             )
             if sandbox_error:
                 yield update_markdown_output("❌ HTML sandbox failed to run!", clear_output=True)
+                yield update_markdown_output(f"<details open><summary><strong>🚨 Stderr</strong></summary>\n\n```\n{sandbox_error}\n```\n\n</details>\n\n")
             else:
                 yield update_markdown_output("✅ HTML sandbox is ready!", clear_output=True)
                 yield (
             yield update_markdown_output("🔄 Setting up React sandbox...")
             code_run_result = run_react_sandbox(
                 code=code,
+                install_command=install_command,
                 existing_sandbox_id=sandbox_state['sandbox_id'],
             )
             sandbox_id, sandbox_error = code_run_result['sandbox_id'], code_run_result['stderr']
             if code_run_result['is_run_success'] is False and sandbox_error:
                 yield update_markdown_output("❌ React sandbox failed to run!", clear_output=True)
+                yield update_markdown_output(f"<details open><summary><strong>🚨 Stderr</strong></summary>\n\n```\n{sandbox_error}\n```\n\n</details>\n\n")
             else:
                 yield update_markdown_output("✅ React sandbox is ready!", clear_output=True)
                 yield (
             yield update_markdown_output("🔄 Setting up Vue sandbox...")
             code_run_result = run_vue_sandbox(
                 code=code,
+                install_command=install_command,
                 existing_sandbox_id=sandbox_state['sandbox_id'],
             )
             sandbox_id, sandbox_error = code_run_result['sandbox_id'], code_run_result['stderr']
             if code_run_result['is_run_success'] is False and code_run_result['stderr']:
                 yield update_markdown_output("❌ Vue sandbox failed to run!", clear_output=True)
+                yield update_markdown_output(f"<details open><summary><strong>🚨 Stderr</strong></summary>\n\n```\n{code_run_result['stderr']}\n```\n\n</details>\n\n")
             else:
                 yield update_markdown_output("✅ Vue sandbox is ready!", clear_output=True)
                 yield (
             yield update_markdown_output("🔄 Setting up PyGame sandbox...")
             code_run_result = run_pygame_sandbox(
                 code=code,
+                install_command=install_command,
                 existing_sandbox_id=sandbox_state['sandbox_id'],
             )
             sandbox_id, sandbox_error = code_run_result['sandbox_id'], code_run_result['stderr']
             if code_run_result['is_run_success'] is False and code_run_result['stderr']:
                 yield update_markdown_output("❌ PyGame sandbox failed to run!", clear_output=True)
+                yield update_markdown_output(f"<details open><summary><strong>🚨 Stderr</strong></summary>\n\n```\n{code_run_result['stderr']}\n```\n\n</details>\n\n")
             else:
                 yield update_markdown_output("✅ PyGame sandbox is ready!", clear_output=True)
                 yield (
             yield update_markdown_output("🔄 Setting up Gradio sandbox...")
             sandbox_url, sandbox_id, sandbox_error = run_gradio_sandbox(
                 code=code,
+                install_command=install_command,
                 existing_sandbox_id=sandbox_state['sandbox_id'],
             )
             if sandbox_error:
                 yield update_markdown_output("❌ Gradio sandbox failed to run!", clear_output=True)
+                yield update_markdown_output(f"<details open><summary><strong>🚨 Stderr</strong></summary>\n\n```\n{sandbox_error}\n```\n\n</details>\n\n")
             else:
                 yield update_markdown_output("✅ Gradio sandbox is ready!", clear_output=True)
                 yield (
             yield update_markdown_output("🔄 Setting up Streamlit sandbox...")
             sandbox_url, sandbox_id, sandbox_error = run_streamlit_sandbox(
                 code=code,
+                install_command=install_command,
                 existing_sandbox_id=sandbox_state['sandbox_id'],
             )
             if sandbox_error:
                 yield update_markdown_output("❌ Streamlit sandbox failed to run!", clear_output=True)
+                yield update_markdown_output(f"<details open><summary><strong>🚨 Stderr</strong></summary>\n\n```\n{sandbox_error}\n```\n\n</details>\n\n")
             else:
                 yield update_markdown_output("✅ Streamlit sandbox is ready!", clear_output=True)
                 yield (
             html_code = mermaid_to_html(code, theme='light')
             sandbox_url, sandbox_id, sandbox_error = run_html_sandbox(
                 code=html_code,
+                install_command=install_command,
                 existing_sandbox_id=sandbox_state['sandbox_id'],
             )
             if sandbox_error:
                 yield update_markdown_output("❌ Mermaid visualization failed to render!", clear_output=True)
+                yield update_markdown_output(f"<details open><summary><strong>🚨 Stderr</strong></summary>\n\n```\n{sandbox_error}\n```\n\n</details>\n\n")
             else:
                 yield update_markdown_output("✅ Mermaid visualization is ready!", clear_output=True)
                 yield (
         case SandboxEnvironment.PYTHON_RUNNER:
             yield update_markdown_output("🔄 Running Python Runner...", clear_output=True)
             sandbox_output, sandbox_error = run_code_interpreter(
+                code=code, code_language='python', install_command=install_command
             )
             if sandbox_error:
                 yield update_markdown_output("❌ Python Runner failed to run!", clear_output=True)
+                yield update_markdown_output(f"<details open><summary><strong>🚨 Stderr</strong></summary>\n\n```\n{sandbox_error}\n```\n\n</details>\n\n")
             else:
                 yield update_markdown_output("✅ Code execution is ready!", clear_output=True)
                 yield (
                 )
         case SandboxEnvironment.JAVASCRIPT_RUNNER:
             yield update_markdown_output("🔄 Running JavaScript Runner...", clear_output=True)
+            # Convert JavaScript code to HTML
+            html_code = javascript_to_html(code)
+            # Run the HTML in sandbox
+            sandbox_url, sandbox_id, sandbox_error = run_html_sandbox(
+                code=html_code, install_command=install_command, sandbox_id=sandbox_state.get('sandbox_id')
             )
+            # Update sandbox state with the sandbox_id
+            sandbox_state['sandbox_id'] = sandbox_id
             if sandbox_error:
                 yield update_markdown_output("❌ JavaScript Runner failed to run!", clear_output=True)
+                yield update_markdown_output(f"<details open><summary><strong>🚨 Stderr</strong></summary>\n\n```\n{sandbox_error}\n```\n\n</details>\n\n")
             else:
                 yield update_markdown_output("✅ Code execution is ready!", clear_output=True)
                 yield (
                     gr.Markdown(
+                        value=markdown_output_text,
                         sanitize_html=False,
                         visible=True,
                     ),
                     SandboxComponent(
+                        value=(sandbox_url, True, []),
+                        label="JavaScript Sandbox",
+                        visible=True,
+                        key=f"js_sandbox_{int(time.time() * 1000)}",
                     ),
                     gr.skip(),
                     gr.skip(),
             )
             if sandbox_error:
                 yield update_markdown_output("❌ C Runner failed to run!", clear_output=True)
+                yield update_markdown_output(f"<details open><summary><strong>🚨 Stderr</strong></summary>\n\n```\n{sandbox_error}\n```\n\n</details>\n\n")
             else:
                 yield update_markdown_output("✅ Code execution is ready!", clear_output=True)
                 yield (
             )
             if sandbox_error:
                 yield update_markdown_output("❌ C++ Runner failed to run!", clear_output=True)
+                yield update_markdown_output(f"<details open><summary><strong>🚨 Stderr</strong></summary>\n\n```\n{sandbox_error}\n```\n\n</details>\n\n")
             else:
                 yield update_markdown_output("✅ Code execution is ready!", clear_output=True)
                 yield (
             )
             if sandbox_error:
                 yield update_markdown_output("❌ Java Runner failed to run!", clear_output=True)
+                yield update_markdown_output(f"<details open><summary><strong>🚨 Stderr</strong></summary>\n\n```\n{sandbox_error}\n```\n\n</details>\n\n")
             else:
                 yield update_markdown_output("✅ Code execution is ready!", clear_output=True)
                 yield (
             )
             if sandbox_error:
                 yield update_markdown_output("❌ Go Runner failed to run!", clear_output=True)
+                yield update_markdown_output(f"<details open><summary><strong>🚨 Stderr</strong></summary>\n\n```\n{sandbox_error}\n```\n\n</details>\n\n")
             else:
                 yield update_markdown_output("✅ Code execution is ready!", clear_output=True)
                 yield (
             )
             if sandbox_error:
                 yield update_markdown_output("❌ Rust Runner failed to run!", clear_output=True)
+                yield update_markdown_output(f"<details open><summary><strong>🚨 Stderr</strong></summary>\n\n```\n{sandbox_error}\n```\n\n</details>\n\n")
             else:
                 yield update_markdown_output("✅ Code execution is ready!", clear_output=True)
                 yield (

sandbox/prompts.py CHANGED Viewed

@@ -7,9 +7,9 @@ You are an expert Software Engineer, UI/UX designer, and product manager. Your t
 If you do a great job based on the instructions, you will be rewarded with a high salary and a promotion.
 Your code must be written using one of these supported development frameworks and environments:
-- React (JavaScript/TypeScript)
-- Vue (JavaScript/TypeScript)
-- HTML (Vanilla HTML)
 - Gradio (Python)
 - Streamlit (Python)
 - PyGame (Python)
@@ -56,7 +56,7 @@ For Python development, you must follow these constraints:
 - For any programs that require user inputs, you MUST USE `gradio` or `streamlit`
 - Choose suitable PyPI packages to be imported, e.g., `import pandas`
 - Avoid using libraries that require desktop GUI interfaces, with the exceptions of `pygame`, `gradio`, and `streamlit` which are explicitly supported
-- For PyGame applications, you have to write the main function as an async function like:
 ```python
 import asyncio
 import pygame
@@ -95,6 +95,33 @@ The code must be in the markdown format:
 ```<language>
 <code>
 ```
 """
 DEFAULT_PYTHON_RUNNER_INSTRUCTION = """
@@ -125,6 +152,9 @@ Before you begin writing any code, you must follow these fundamental rules:
 - Your response must contain a clear explanation of the solution you are providing
 - ALWAYS generate complete, self-contained code in a single file
 - If you use any external libraries, make sure to specify them for installation with `npm install`
 - Make sure to include all necessary code in one file
 - Ensure the code is self-contained and does not rely on browser-specific APIs
@@ -168,6 +198,10 @@ Before you begin writing any code, you must follow these fundamental rules:
 - Your response must contain a clear explanation of the solution you are providing
 - ALWAYS generate complete, self-contained code in a single file
 - If you use any external libraries, make sure to specify them for installation with `npm install`
 - Make sure the program is functional by creating a state when needed and having no required props
 - Make sure it can run by itself by using a default export at the end of the file
 - DO NOT CALL `ReactDOM.render()` AT THE END OF THE FILE
@@ -200,6 +234,10 @@ Before you begin writing any code, you must follow these fundamental rules:
 - Your response must contain a clear explanation of the solution you are providing
 - ALWAYS generate complete, self-contained code in a single file
 - If you use any external libraries, make sure to specify them for installation with `npm install`
 - Make sure the program is functional by creating a state when needed and having no required props
 - The component should be a simple custom page in a styled `<div>` element
 - Do not include <NuxtWelcome /> or reference any external components

 If you do a great job based on the instructions, you will be rewarded with a high salary and a promotion.
 Your code must be written using one of these supported development frameworks and environments:
+- React (JavaScript/TypeScript) -- version 18.3.1
+- Vue (JavaScript/TypeScript) -- version 3.5.13
+- HTML (Plain HTML)
 - Gradio (Python)
 - Streamlit (Python)
 - PyGame (Python)
 - For any programs that require user inputs, you MUST USE `gradio` or `streamlit`
 - Choose suitable PyPI packages to be imported, e.g., `import pandas`
 - Avoid using libraries that require desktop GUI interfaces, with the exceptions of `pygame`, `gradio`, and `streamlit` which are explicitly supported
+- For PyGame applications, we use pygbag to build the application. You have to write the main function as an async function like:
 ```python
 import asyncio
 import pygame
 ```<language>
 <code>
 ```
+YOU MUST ALWAYS PROVIDE A CODE BLOCK FOR THE CODE TO BE EXECUTED. DO NOT EXPLAIN THE CODE, JUST PROVIDE THE CODE BLOCK.
+IN ADDITION TO THE PROGRAM BLOCK, YOU MUST INSTALL ALL THE COMPATIBLE DEPENDENCIES FOR THE CODE TO BE EXECUTED, USING THE FOLLOWING FORMAT:
+```bash
+COMMAND TO INSTALL DEPENDENCIES GRACEFULLY WITHOUT BREAKING THE CONFLICTS
+```
+FOR NPM INSTALLATIONS:
+- Use `--prefer-offline --no-audit --no-fund --legacy-peer-deps` to avoid conflicts
+- Use compatible package versions (e.g., react-router-dom@^6.0.0 instead of latest)
+- DO NOT NEED TO INSTALL `react`, `react-dom`, and `vue` packages. They are already installed.
+- Include all required peer dependencies explicitly
+- For React Router, use v6+ syntax (Routes instead of Switch)
+- For CSS imports, include the base package (e.g., easymde for react-simplemde-editor)
+- Avoid packages requiring Node.js v20+ (sandbox has v18)
+FOR PIP INSTALLATIONS:
+- YOU MUST NOT INSTALL ANY DEEP LEARNING DEPENDENCIES. THE ENVIRONMENT IS CPU ONLY.
+- IF THE USER SAYS TO INSTALL A PACKAGE, YOU MUST INSTALL IT.
+- Use `uv pip install --system` to install packages.
+YOU DONT NEED TO INSTALL ANY FOLLOWING DEPENDENCIES:
+- `gradio`, `streamlit`, `pygame`, `mermaid`, `react`, `react-dom`, `vue`
+THERE IS NO EXTERNAL FILE IN THE LOCAL FILE SYSTEM.
+WHATEVER THE USER SAYS (e.g, "hello"), YOU MUST ALWAYS WRITE A PROGRAM TO RESPOND.
 """
 DEFAULT_PYTHON_RUNNER_INSTRUCTION = """
 - Your response must contain a clear explanation of the solution you are providing
 - ALWAYS generate complete, self-contained code in a single file
 - If you use any external libraries, make sure to specify them for installation with `npm install`
+- For npm installations, use compatible versions and include all peer dependencies:
+  - Use `--legacy-peer-deps` flag to avoid version conflicts
+  - Avoid packages requiring Node.js v20+ (sandbox has v18)
 - Make sure to include all necessary code in one file
 - Ensure the code is self-contained and does not rely on browser-specific APIs
 - Your response must contain a clear explanation of the solution you are providing
 - ALWAYS generate complete, self-contained code in a single file
 - If you use any external libraries, make sure to specify them for installation with `npm install`
+- For npm installations, use compatible versions and include all peer dependencies:
+  - Use `react-router-dom@^6.0.0` (not v7+) and React Router v6+ syntax (Routes instead of Switch)
+  - Include base packages for CSS (e.g., `easymde` for `react-simplemde-editor`)
+  - Use `--legacy-peer-deps` flag to avoid version conflicts
 - Make sure the program is functional by creating a state when needed and having no required props
 - Make sure it can run by itself by using a default export at the end of the file
 - DO NOT CALL `ReactDOM.render()` AT THE END OF THE FILE
 - Your response must contain a clear explanation of the solution you are providing
 - ALWAYS generate complete, self-contained code in a single file
 - If you use any external libraries, make sure to specify them for installation with `npm install`
+- For npm installations, use compatible versions and include all peer dependencies:
+  - Use `--legacy-peer-deps` flag to avoid version conflicts
+  - Include base packages for CSS when needed
+  - Avoid packages requiring Node.js v20+ (sandbox has v18)
 - Make sure the program is functional by creating a state when needed and having no required props
 - The component should be a simple custom page in a styled `<div>` element
 - Do not include <NuxtWelcome /> or reference any external components

sandbox/sandbox_manager.py CHANGED Viewed

@@ -20,7 +20,6 @@ def create_sandbox(template: str = SANDBOX_TEMPLATE_ID) -> Sandbox:
     Create a new sandbox.
     Will retry if the sandbox creation fails.
     '''
-    print("Creating new sandbox...")
     for attempt in range(1, SANDBOX_RETRY_COUNT + 1):
         try:
             return Sandbox(
@@ -90,13 +89,6 @@ def run_command_in_sandbox(
         stderrs.append(str(e))
         is_run_success = False
-    if print_output:
-        print(f"Command: {command}")
-        for stdout in stdouts:
-            print(stdout)
-        for stderr in stderrs:
-            print(stderr)
     return is_run_success, stdouts, stderrs
@@ -116,8 +108,6 @@ def install_pip_dependencies(sandbox: Sandbox, dependencies: list[str]) -> list[
                 sandbox.commands.run(
                     f"uv pip install --system {dependency}",
                     timeout=60 * 3,
-                    on_stdout=lambda message: print(message),
-                    on_stderr=lambda message: print(message),
                 )
             except Exception as e:
                 install_errors.append(f"Error during installing pip package {dependency}: {str(e)}")
@@ -178,8 +168,6 @@ def install_npm_dependencies(sandbox: Sandbox, dependencies: list[str], project_
                 f"npm install {dependency} --prefer-offline --no-audit --no-fund --legacy-peer-deps",
                 cwd=project_root,
                 timeout=60 * 3,
-                on_stdout=lambda message: print(message),
-                on_stderr=lambda message: print(message),
             )
         except Exception as e:
             install_errors.append(f"Error during installing npm package {dependency}:" + str(e))

     Create a new sandbox.
     Will retry if the sandbox creation fails.
     '''
     for attempt in range(1, SANDBOX_RETRY_COUNT + 1):
         try:
             return Sandbox(
         stderrs.append(str(e))
         is_run_success = False
     return is_run_success, stdouts, stderrs
                 sandbox.commands.run(
                     f"uv pip install --system {dependency}",
                     timeout=60 * 3,
                 )
             except Exception as e:
                 install_errors.append(f"Error during installing pip package {dependency}: {str(e)}")
                 f"npm install {dependency} --prefer-offline --no-audit --no-fund --legacy-peer-deps",
                 cwd=project_root,
                 timeout=60 * 3,
             )
         except Exception as e:
             install_errors.append(f"Error during installing npm package {dependency}:" + str(e))