mirror of https://github.com/microsoft/autogen.git
Add support for HTML, CSS and Javascript in LocalCommandLineCodeExecutor with Mapping executor/saver #2303 (#2464)
* Add support for HTML, CSS and Javascript in LocalCommandLineCodeExecutor * init branch * init branch * feat: test code execution added * fix: test update * fix: test * fix: policy test * feat: default policy --------- Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
This commit is contained in:
parent
ebde196d6b
commit
31fe75ad0e
|
@ -6,7 +6,7 @@ import warnings
|
|||
from hashlib import md5
|
||||
from pathlib import Path
|
||||
from string import Template
|
||||
from typing import Any, Callable, ClassVar, List, TypeVar, Union, cast
|
||||
from typing import Any, Callable, ClassVar, Dict, List, Optional, Union
|
||||
|
||||
from typing_extensions import ParamSpec
|
||||
|
||||
|
@ -28,7 +28,31 @@ A = ParamSpec("A")
|
|||
|
||||
|
||||
class LocalCommandLineCodeExecutor(CodeExecutor):
|
||||
SUPPORTED_LANGUAGES: ClassVar[List[str]] = ["bash", "shell", "sh", "pwsh", "powershell", "ps1", "python"]
|
||||
SUPPORTED_LANGUAGES: ClassVar[List[str]] = [
|
||||
"bash",
|
||||
"shell",
|
||||
"sh",
|
||||
"pwsh",
|
||||
"powershell",
|
||||
"ps1",
|
||||
"python",
|
||||
"javascript",
|
||||
"html",
|
||||
"css",
|
||||
]
|
||||
DEFAULT_EXECUTION_POLICY: ClassVar[Dict[str, bool]] = {
|
||||
"bash": True,
|
||||
"shell": True,
|
||||
"sh": True,
|
||||
"pwsh": True,
|
||||
"powershell": True,
|
||||
"ps1": True,
|
||||
"python": True,
|
||||
"javascript": False,
|
||||
"html": False,
|
||||
"css": False,
|
||||
}
|
||||
|
||||
FUNCTION_PROMPT_TEMPLATE: ClassVar[
|
||||
str
|
||||
] = """You have access to the following user defined functions. They can be accessed from the module called `$module_name` by their function names.
|
||||
|
@ -43,29 +67,27 @@ $functions"""
|
|||
work_dir: Union[Path, str] = Path("."),
|
||||
functions: List[Union[FunctionWithRequirements[Any, A], Callable[..., Any], FunctionWithRequirementsStr]] = [],
|
||||
functions_module: str = "functions",
|
||||
execution_policies: Optional[Dict[str, bool]] = None,
|
||||
):
|
||||
"""(Experimental) A code executor class that executes code through a local command line
|
||||
"""(Experimental) A code executor class that executes or saves LLM generated code a local command line
|
||||
environment.
|
||||
|
||||
**This will execute LLM generated code on the local machine.**
|
||||
**This will execute or save LLM generated code on the local machine.**
|
||||
|
||||
Each code block is saved as a file and executed in a separate process in
|
||||
the working directory, and a unique file is generated and saved in the
|
||||
working directory for each code block.
|
||||
The code blocks are executed in the order they are received.
|
||||
Command line code is sanitized using regular expression match against a list of dangerous commands in order to prevent self-destructive
|
||||
commands from being executed which may potentially affect the users environment.
|
||||
Currently the only supported languages is Python and shell scripts.
|
||||
For Python code, use the language "python" for the code block.
|
||||
For shell scripts, use the language "bash", "shell", or "sh" for the code
|
||||
block.
|
||||
Each code block is saved as a file in the working directory. Depending on the execution policy,
|
||||
the code may be executed in a separate process.
|
||||
The code blocks are executed or save in the order they are received.
|
||||
Command line code is sanitized against a list of dangerous commands to prevent self-destructive commands from being executed,
|
||||
which could potentially affect the user's environment. Supported languages include Python, shell scripts (bash, shell, sh),
|
||||
PowerShell (pwsh, powershell, ps1), HTML, CSS, and JavaScript.
|
||||
Execution policies determine whether each language's code blocks are executed or saved only.
|
||||
|
||||
Args:
|
||||
timeout (int): The timeout for code execution. Default is 60.
|
||||
work_dir (str): The working directory for the code execution. If None,
|
||||
a default working directory will be used. The default working
|
||||
directory is the current directory ".".
|
||||
functions (List[Union[FunctionWithRequirements[Any, A], Callable[..., Any]]]): A list of functions that are available to the code executor. Default is an empty list.
|
||||
timeout (int): The timeout for code execution, default is 60 seconds.
|
||||
work_dir (Union[Path, str]): The working directory for code execution, defaults to the current directory.
|
||||
functions (List[Union[FunctionWithRequirements[Any, A], Callable[..., Any], FunctionWithRequirementsStr]]): A list of callable functions available to the executor.
|
||||
functions_module (str): The module name under which functions are accessible.
|
||||
execution_policies (Optional[Dict[str, bool]]): A dictionary mapping languages to execution policies (True for execution, False for saving only). Defaults to class-wide DEFAULT_EXECUTION_POLICY.
|
||||
"""
|
||||
|
||||
if timeout < 1:
|
||||
|
@ -91,6 +113,10 @@ $functions"""
|
|||
else:
|
||||
self._setup_functions_complete = True
|
||||
|
||||
self.execution_policies = self.DEFAULT_EXECUTION_POLICY.copy()
|
||||
if execution_policies is not None:
|
||||
self.execution_policies.update(execution_policies)
|
||||
|
||||
def format_functions_for_prompt(self, prompt_template: str = FUNCTION_PROMPT_TEMPLATE) -> str:
|
||||
"""(Experimental) Format the functions for a prompt.
|
||||
|
||||
|
@ -104,7 +130,6 @@ $functions"""
|
|||
Returns:
|
||||
str: The formatted prompt.
|
||||
"""
|
||||
|
||||
template = Template(prompt_template)
|
||||
return template.substitute(
|
||||
module_name=self._functions_module,
|
||||
|
@ -171,26 +196,19 @@ $functions"""
|
|||
required_packages = list(set(flattened_packages))
|
||||
if len(required_packages) > 0:
|
||||
logging.info("Ensuring packages are installed in executor.")
|
||||
|
||||
cmd = [sys.executable, "-m", "pip", "install"]
|
||||
cmd.extend(required_packages)
|
||||
|
||||
cmd = [sys.executable, "-m", "pip", "install"] + required_packages
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd, cwd=self._work_dir, capture_output=True, text=True, timeout=float(self._timeout)
|
||||
)
|
||||
except subprocess.TimeoutExpired as e:
|
||||
raise ValueError("Pip install timed out") from e
|
||||
|
||||
if result.returncode != 0:
|
||||
raise ValueError(f"Pip install failed. {result.stdout}, {result.stderr}")
|
||||
|
||||
# Attempt to load the function file to check for syntax errors, imports etc.
|
||||
exec_result = self._execute_code_dont_check_setup([CodeBlock(code=func_file_content, language="python")])
|
||||
|
||||
if exec_result.exit_code != 0:
|
||||
raise ValueError(f"Functions failed to load: {exec_result.output}")
|
||||
|
||||
self._setup_functions_complete = True
|
||||
|
||||
def execute_code_blocks(self, code_blocks: List[CodeBlock]) -> CommandLineCodeResult:
|
||||
|
@ -201,10 +219,8 @@ $functions"""
|
|||
|
||||
Returns:
|
||||
CommandLineCodeResult: The result of the code execution."""
|
||||
|
||||
if not self._setup_functions_complete:
|
||||
self._setup_functions()
|
||||
|
||||
return self._execute_code_dont_check_setup(code_blocks)
|
||||
|
||||
def _execute_code_dont_check_setup(self, code_blocks: List[CodeBlock]) -> CommandLineCodeResult:
|
||||
|
@ -229,6 +245,7 @@ $functions"""
|
|||
logs_all += "\n" + f"unknown language {lang}"
|
||||
break
|
||||
|
||||
execute_code = self.execution_policies.get(lang, False)
|
||||
try:
|
||||
# Check if there is a filename comment
|
||||
filename = _get_file_name_from_content(code, self._work_dir)
|
||||
|
@ -239,15 +256,19 @@ $functions"""
|
|||
# create a file with an automatically generated name
|
||||
code_hash = md5(code.encode()).hexdigest()
|
||||
filename = f"tmp_code_{code_hash}.{'py' if lang.startswith('python') else lang}"
|
||||
|
||||
written_file = (self._work_dir / filename).resolve()
|
||||
with written_file.open("w", encoding="utf-8") as f:
|
||||
f.write(code)
|
||||
file_names.append(written_file)
|
||||
|
||||
program = sys.executable if lang.startswith("python") else _cmd(lang)
|
||||
cmd = [program, str(written_file.absolute())]
|
||||
if not execute_code:
|
||||
# Just return a message that the file is saved.
|
||||
logs_all += f"Code saved to {str(written_file)}\n"
|
||||
exitcode = 0
|
||||
continue
|
||||
|
||||
program = _cmd(lang)
|
||||
cmd = [program, str(written_file.absolute())]
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd, cwd=self._work_dir, capture_output=True, text=True, timeout=float(self._timeout)
|
||||
|
|
|
@ -3,13 +3,22 @@ import re
|
|||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
filename_patterns = [
|
||||
re.compile(r"^<!-- (filename:)?(.+?) -->", re.DOTALL),
|
||||
re.compile(r"^/\* (filename:)?(.+?) \*/", re.DOTALL),
|
||||
re.compile(r"^// (filename:)?(.+?)$", re.DOTALL),
|
||||
re.compile(r"^# (filename:)?(.+?)$", re.DOTALL),
|
||||
]
|
||||
|
||||
|
||||
# Raises ValueError if the file is not in the workspace
|
||||
def _get_file_name_from_content(code: str, workspace_path: Path) -> Optional[str]:
|
||||
first_line = code.split("\n")[0]
|
||||
first_line = code.split("\n")[0].strip()
|
||||
# TODO - support other languages
|
||||
if first_line.startswith("# filename:"):
|
||||
filename = first_line.split(":")[1].strip()
|
||||
for pattern in filename_patterns:
|
||||
matches = pattern.match(first_line)
|
||||
if matches is not None:
|
||||
filename = matches.group(2).strip()
|
||||
|
||||
# Handle relative paths in the filename
|
||||
path = Path(filename)
|
||||
|
@ -19,7 +28,6 @@ def _get_file_name_from_content(code: str, workspace_path: Path) -> Optional[str
|
|||
# Throws an error if the file is not in the workspace
|
||||
relative = path.relative_to(workspace_path.resolve())
|
||||
return str(relative)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
@ -26,6 +26,34 @@ WINDOWS_SHELLS = ["ps1", "pwsh", "powershell"]
|
|||
PYTHON_VARIANTS = ["python", "Python", "py"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"lang, should_execute",
|
||||
[
|
||||
("python", False), # Python should not execute
|
||||
("bash", False), # Bash should execute
|
||||
("html", False), # HTML should not execute
|
||||
("javascript", False), # JavaScript should not execute
|
||||
],
|
||||
)
|
||||
def test_execution_policy_enforcement(lang, should_execute):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
executor = LocalCommandLineCodeExecutor(
|
||||
work_dir=temp_dir,
|
||||
execution_policies={"python": False, "bash": False, "html": False, "javascript": False, "css": False},
|
||||
)
|
||||
code = "print('Hello, world!')" if lang == "python" else "echo 'Hello, world!'"
|
||||
code_block = CodeBlock(code=code, language=lang)
|
||||
result = executor.execute_code_blocks([code_block])
|
||||
|
||||
if should_execute:
|
||||
assert "Hello, world!" in result.output, f"Expected execution for {lang}, but it didn't execute."
|
||||
else:
|
||||
assert "Hello, world!" not in result.output, f"Expected no execution for {lang}, but it executed."
|
||||
|
||||
# Ensure files are saved regardless of execution
|
||||
assert result.code_file is not None, f"Expected code file to be saved for {lang}, but it wasn't."
|
||||
|
||||
|
||||
@pytest.mark.parametrize("cls", classes_to_test)
|
||||
def test_is_code_executor(cls) -> None:
|
||||
assert isinstance(cls, CodeExecutor)
|
||||
|
@ -114,6 +142,87 @@ def _test_execute_code(py_variant, executor: CodeExecutor) -> None:
|
|||
assert file_line.strip() == code_line.strip()
|
||||
|
||||
|
||||
def test_local_commandline_code_executor_save_files() -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
executor = LocalCommandLineCodeExecutor(work_dir=temp_dir)
|
||||
_test_save_files(executor, save_file_only=False)
|
||||
|
||||
|
||||
def test_local_commandline_code_executor_save_files_only() -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Using execution_policies to specify that no languages should execute
|
||||
executor = LocalCommandLineCodeExecutor(
|
||||
work_dir=temp_dir,
|
||||
execution_policies={"python": False, "bash": False, "javascript": False, "html": False, "css": False},
|
||||
)
|
||||
_test_save_files(executor, save_file_only=True)
|
||||
|
||||
|
||||
def _test_save_files(executor: CodeExecutor, save_file_only: bool) -> None:
|
||||
|
||||
def _check_output(code_result: CodeBlock, expected_output: str) -> None:
|
||||
if save_file_only:
|
||||
return expected_output not in code_result.output
|
||||
else:
|
||||
return expected_output in code_result.output
|
||||
|
||||
# Test executable code block.
|
||||
|
||||
# Test saving to a given filename, Python.
|
||||
code_blocks = [CodeBlock(code="# filename: test.py\nimport sys; print('hello world!')", language="python")]
|
||||
code_result = executor.execute_code_blocks(code_blocks)
|
||||
assert (
|
||||
code_result.exit_code == 0 and _check_output(code_result, "hello world!") and code_result.code_file is not None
|
||||
)
|
||||
assert os.path.basename(code_result.code_file) == "test.py"
|
||||
|
||||
# Test saving to a given filename without "filename" prefix, Python.
|
||||
code_blocks = [CodeBlock(code="# test.py\nimport sys; print('hello world!')", language="python")]
|
||||
code_result = executor.execute_code_blocks(code_blocks)
|
||||
assert (
|
||||
code_result.exit_code == 0 and _check_output(code_result, "hello world!") and code_result.code_file is not None
|
||||
)
|
||||
assert os.path.basename(code_result.code_file) == "test.py"
|
||||
|
||||
# Test non-executable code block.
|
||||
|
||||
# Test saving to a given filename, Javascript.
|
||||
code_blocks = [CodeBlock(code="// filename: test.js\nconsole.log('hello world!')", language="javascript")]
|
||||
code_result = executor.execute_code_blocks(code_blocks)
|
||||
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
|
||||
assert os.path.basename(code_result.code_file) == "test.js"
|
||||
|
||||
# Test saving to a given filename without "filename" prefix, Javascript.
|
||||
code_blocks = [CodeBlock(code="// test.js\nconsole.log('hello world!')", language="javascript")]
|
||||
code_result = executor.execute_code_blocks(code_blocks)
|
||||
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
|
||||
assert os.path.basename(code_result.code_file) == "test.js"
|
||||
|
||||
# Test saving to a given filename, CSS.
|
||||
code_blocks = [CodeBlock(code="/* filename: test.css */\nh1 { color: red; }", language="css")]
|
||||
code_result = executor.execute_code_blocks(code_blocks)
|
||||
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
|
||||
assert os.path.basename(code_result.code_file) == "test.css"
|
||||
|
||||
# Test saving to a given filename without "filename" prefix, CSS.
|
||||
code_blocks = [CodeBlock(code="/* test.css */\nh1 { color: red; }", language="css")]
|
||||
code_result = executor.execute_code_blocks(code_blocks)
|
||||
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
|
||||
assert os.path.basename(code_result.code_file) == "test.css"
|
||||
|
||||
# Test saving to a given filename, HTML.
|
||||
code_blocks = [CodeBlock(code="<!-- filename: test.html -->\n<h1>hello world!</h1>", language="html")]
|
||||
code_result = executor.execute_code_blocks(code_blocks)
|
||||
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
|
||||
assert os.path.basename(code_result.code_file) == "test.html"
|
||||
|
||||
# Test saving to a given filename without "filename" prefix, HTML.
|
||||
code_blocks = [CodeBlock(code="<!-- test.html -->\n<h1>hello world!</h1>", language="html")]
|
||||
code_result = executor.execute_code_blocks(code_blocks)
|
||||
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
|
||||
assert os.path.basename(code_result.code_file) == "test.html"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("cls", classes_to_test)
|
||||
def test_commandline_code_executor_timeout(cls) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
|
|
Loading…
Reference in New Issue