Add support for HTML, CSS and Javascript in LocalCommandLineCodeExecutor with Mapping executor/saver #2303 (#2464)

* Add support for HTML, CSS and Javascript in LocalCommandLineCodeExecutor

* init branch

* init branch

* feat: test code execution added

* fix: test update

* fix: test

* fix: policy test

* feat: default policy

---------

Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
This commit is contained in:
asandez1 2024-04-23 22:21:24 -03:00 committed by GitHub
parent ebde196d6b
commit 31fe75ad0e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 183 additions and 45 deletions

View File

@ -6,7 +6,7 @@ import warnings
from hashlib import md5
from pathlib import Path
from string import Template
from typing import Any, Callable, ClassVar, List, TypeVar, Union, cast
from typing import Any, Callable, ClassVar, Dict, List, Optional, Union
from typing_extensions import ParamSpec
@ -28,7 +28,31 @@ A = ParamSpec("A")
class LocalCommandLineCodeExecutor(CodeExecutor):
SUPPORTED_LANGUAGES: ClassVar[List[str]] = ["bash", "shell", "sh", "pwsh", "powershell", "ps1", "python"]
SUPPORTED_LANGUAGES: ClassVar[List[str]] = [
"bash",
"shell",
"sh",
"pwsh",
"powershell",
"ps1",
"python",
"javascript",
"html",
"css",
]
DEFAULT_EXECUTION_POLICY: ClassVar[Dict[str, bool]] = {
"bash": True,
"shell": True,
"sh": True,
"pwsh": True,
"powershell": True,
"ps1": True,
"python": True,
"javascript": False,
"html": False,
"css": False,
}
FUNCTION_PROMPT_TEMPLATE: ClassVar[
str
] = """You have access to the following user defined functions. They can be accessed from the module called `$module_name` by their function names.
@ -43,29 +67,27 @@ $functions"""
work_dir: Union[Path, str] = Path("."),
functions: List[Union[FunctionWithRequirements[Any, A], Callable[..., Any], FunctionWithRequirementsStr]] = [],
functions_module: str = "functions",
execution_policies: Optional[Dict[str, bool]] = None,
):
"""(Experimental) A code executor class that executes code through a local command line
"""(Experimental) A code executor class that executes or saves LLM generated code a local command line
environment.
**This will execute LLM generated code on the local machine.**
**This will execute or save LLM generated code on the local machine.**
Each code block is saved as a file and executed in a separate process in
the working directory, and a unique file is generated and saved in the
working directory for each code block.
The code blocks are executed in the order they are received.
Command line code is sanitized using regular expression match against a list of dangerous commands in order to prevent self-destructive
commands from being executed which may potentially affect the users environment.
Currently the only supported languages is Python and shell scripts.
For Python code, use the language "python" for the code block.
For shell scripts, use the language "bash", "shell", or "sh" for the code
block.
Each code block is saved as a file in the working directory. Depending on the execution policy,
the code may be executed in a separate process.
The code blocks are executed or save in the order they are received.
Command line code is sanitized against a list of dangerous commands to prevent self-destructive commands from being executed,
which could potentially affect the user's environment. Supported languages include Python, shell scripts (bash, shell, sh),
PowerShell (pwsh, powershell, ps1), HTML, CSS, and JavaScript.
Execution policies determine whether each language's code blocks are executed or saved only.
Args:
timeout (int): The timeout for code execution. Default is 60.
work_dir (str): The working directory for the code execution. If None,
a default working directory will be used. The default working
directory is the current directory ".".
functions (List[Union[FunctionWithRequirements[Any, A], Callable[..., Any]]]): A list of functions that are available to the code executor. Default is an empty list.
timeout (int): The timeout for code execution, default is 60 seconds.
work_dir (Union[Path, str]): The working directory for code execution, defaults to the current directory.
functions (List[Union[FunctionWithRequirements[Any, A], Callable[..., Any], FunctionWithRequirementsStr]]): A list of callable functions available to the executor.
functions_module (str): The module name under which functions are accessible.
execution_policies (Optional[Dict[str, bool]]): A dictionary mapping languages to execution policies (True for execution, False for saving only). Defaults to class-wide DEFAULT_EXECUTION_POLICY.
"""
if timeout < 1:
@ -91,6 +113,10 @@ $functions"""
else:
self._setup_functions_complete = True
self.execution_policies = self.DEFAULT_EXECUTION_POLICY.copy()
if execution_policies is not None:
self.execution_policies.update(execution_policies)
def format_functions_for_prompt(self, prompt_template: str = FUNCTION_PROMPT_TEMPLATE) -> str:
"""(Experimental) Format the functions for a prompt.
@ -104,7 +130,6 @@ $functions"""
Returns:
str: The formatted prompt.
"""
template = Template(prompt_template)
return template.substitute(
module_name=self._functions_module,
@ -171,26 +196,19 @@ $functions"""
required_packages = list(set(flattened_packages))
if len(required_packages) > 0:
logging.info("Ensuring packages are installed in executor.")
cmd = [sys.executable, "-m", "pip", "install"]
cmd.extend(required_packages)
cmd = [sys.executable, "-m", "pip", "install"] + required_packages
try:
result = subprocess.run(
cmd, cwd=self._work_dir, capture_output=True, text=True, timeout=float(self._timeout)
)
except subprocess.TimeoutExpired as e:
raise ValueError("Pip install timed out") from e
if result.returncode != 0:
raise ValueError(f"Pip install failed. {result.stdout}, {result.stderr}")
# Attempt to load the function file to check for syntax errors, imports etc.
exec_result = self._execute_code_dont_check_setup([CodeBlock(code=func_file_content, language="python")])
if exec_result.exit_code != 0:
raise ValueError(f"Functions failed to load: {exec_result.output}")
self._setup_functions_complete = True
def execute_code_blocks(self, code_blocks: List[CodeBlock]) -> CommandLineCodeResult:
@ -201,10 +219,8 @@ $functions"""
Returns:
CommandLineCodeResult: The result of the code execution."""
if not self._setup_functions_complete:
self._setup_functions()
return self._execute_code_dont_check_setup(code_blocks)
def _execute_code_dont_check_setup(self, code_blocks: List[CodeBlock]) -> CommandLineCodeResult:
@ -229,6 +245,7 @@ $functions"""
logs_all += "\n" + f"unknown language {lang}"
break
execute_code = self.execution_policies.get(lang, False)
try:
# Check if there is a filename comment
filename = _get_file_name_from_content(code, self._work_dir)
@ -239,15 +256,19 @@ $functions"""
# create a file with an automatically generated name
code_hash = md5(code.encode()).hexdigest()
filename = f"tmp_code_{code_hash}.{'py' if lang.startswith('python') else lang}"
written_file = (self._work_dir / filename).resolve()
with written_file.open("w", encoding="utf-8") as f:
f.write(code)
file_names.append(written_file)
program = sys.executable if lang.startswith("python") else _cmd(lang)
cmd = [program, str(written_file.absolute())]
if not execute_code:
# Just return a message that the file is saved.
logs_all += f"Code saved to {str(written_file)}\n"
exitcode = 0
continue
program = _cmd(lang)
cmd = [program, str(written_file.absolute())]
try:
result = subprocess.run(
cmd, cwd=self._work_dir, capture_output=True, text=True, timeout=float(self._timeout)

View File

@ -3,13 +3,22 @@ import re
from pathlib import Path
from typing import Optional
filename_patterns = [
re.compile(r"^<!-- (filename:)?(.+?) -->", re.DOTALL),
re.compile(r"^/\* (filename:)?(.+?) \*/", re.DOTALL),
re.compile(r"^// (filename:)?(.+?)$", re.DOTALL),
re.compile(r"^# (filename:)?(.+?)$", re.DOTALL),
]
# Raises ValueError if the file is not in the workspace
def _get_file_name_from_content(code: str, workspace_path: Path) -> Optional[str]:
first_line = code.split("\n")[0]
first_line = code.split("\n")[0].strip()
# TODO - support other languages
if first_line.startswith("# filename:"):
filename = first_line.split(":")[1].strip()
for pattern in filename_patterns:
matches = pattern.match(first_line)
if matches is not None:
filename = matches.group(2).strip()
# Handle relative paths in the filename
path = Path(filename)
@ -19,7 +28,6 @@ def _get_file_name_from_content(code: str, workspace_path: Path) -> Optional[str
# Throws an error if the file is not in the workspace
relative = path.relative_to(workspace_path.resolve())
return str(relative)
return None

View File

@ -26,6 +26,34 @@ WINDOWS_SHELLS = ["ps1", "pwsh", "powershell"]
PYTHON_VARIANTS = ["python", "Python", "py"]
@pytest.mark.parametrize(
"lang, should_execute",
[
("python", False), # Python should not execute
("bash", False), # Bash should execute
("html", False), # HTML should not execute
("javascript", False), # JavaScript should not execute
],
)
def test_execution_policy_enforcement(lang, should_execute):
with tempfile.TemporaryDirectory() as temp_dir:
executor = LocalCommandLineCodeExecutor(
work_dir=temp_dir,
execution_policies={"python": False, "bash": False, "html": False, "javascript": False, "css": False},
)
code = "print('Hello, world!')" if lang == "python" else "echo 'Hello, world!'"
code_block = CodeBlock(code=code, language=lang)
result = executor.execute_code_blocks([code_block])
if should_execute:
assert "Hello, world!" in result.output, f"Expected execution for {lang}, but it didn't execute."
else:
assert "Hello, world!" not in result.output, f"Expected no execution for {lang}, but it executed."
# Ensure files are saved regardless of execution
assert result.code_file is not None, f"Expected code file to be saved for {lang}, but it wasn't."
@pytest.mark.parametrize("cls", classes_to_test)
def test_is_code_executor(cls) -> None:
assert isinstance(cls, CodeExecutor)
@ -114,6 +142,87 @@ def _test_execute_code(py_variant, executor: CodeExecutor) -> None:
assert file_line.strip() == code_line.strip()
def test_local_commandline_code_executor_save_files() -> None:
with tempfile.TemporaryDirectory() as temp_dir:
executor = LocalCommandLineCodeExecutor(work_dir=temp_dir)
_test_save_files(executor, save_file_only=False)
def test_local_commandline_code_executor_save_files_only() -> None:
with tempfile.TemporaryDirectory() as temp_dir:
# Using execution_policies to specify that no languages should execute
executor = LocalCommandLineCodeExecutor(
work_dir=temp_dir,
execution_policies={"python": False, "bash": False, "javascript": False, "html": False, "css": False},
)
_test_save_files(executor, save_file_only=True)
def _test_save_files(executor: CodeExecutor, save_file_only: bool) -> None:
def _check_output(code_result: CodeBlock, expected_output: str) -> None:
if save_file_only:
return expected_output not in code_result.output
else:
return expected_output in code_result.output
# Test executable code block.
# Test saving to a given filename, Python.
code_blocks = [CodeBlock(code="# filename: test.py\nimport sys; print('hello world!')", language="python")]
code_result = executor.execute_code_blocks(code_blocks)
assert (
code_result.exit_code == 0 and _check_output(code_result, "hello world!") and code_result.code_file is not None
)
assert os.path.basename(code_result.code_file) == "test.py"
# Test saving to a given filename without "filename" prefix, Python.
code_blocks = [CodeBlock(code="# test.py\nimport sys; print('hello world!')", language="python")]
code_result = executor.execute_code_blocks(code_blocks)
assert (
code_result.exit_code == 0 and _check_output(code_result, "hello world!") and code_result.code_file is not None
)
assert os.path.basename(code_result.code_file) == "test.py"
# Test non-executable code block.
# Test saving to a given filename, Javascript.
code_blocks = [CodeBlock(code="// filename: test.js\nconsole.log('hello world!')", language="javascript")]
code_result = executor.execute_code_blocks(code_blocks)
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
assert os.path.basename(code_result.code_file) == "test.js"
# Test saving to a given filename without "filename" prefix, Javascript.
code_blocks = [CodeBlock(code="// test.js\nconsole.log('hello world!')", language="javascript")]
code_result = executor.execute_code_blocks(code_blocks)
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
assert os.path.basename(code_result.code_file) == "test.js"
# Test saving to a given filename, CSS.
code_blocks = [CodeBlock(code="/* filename: test.css */\nh1 { color: red; }", language="css")]
code_result = executor.execute_code_blocks(code_blocks)
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
assert os.path.basename(code_result.code_file) == "test.css"
# Test saving to a given filename without "filename" prefix, CSS.
code_blocks = [CodeBlock(code="/* test.css */\nh1 { color: red; }", language="css")]
code_result = executor.execute_code_blocks(code_blocks)
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
assert os.path.basename(code_result.code_file) == "test.css"
# Test saving to a given filename, HTML.
code_blocks = [CodeBlock(code="<!-- filename: test.html -->\n<h1>hello world!</h1>", language="html")]
code_result = executor.execute_code_blocks(code_blocks)
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
assert os.path.basename(code_result.code_file) == "test.html"
# Test saving to a given filename without "filename" prefix, HTML.
code_blocks = [CodeBlock(code="<!-- test.html -->\n<h1>hello world!</h1>", language="html")]
code_result = executor.execute_code_blocks(code_blocks)
assert code_result.exit_code == 0 and "hello world!" not in code_result.output and code_result.code_file is not None
assert os.path.basename(code_result.code_file) == "test.html"
@pytest.mark.parametrize("cls", classes_to_test)
def test_commandline_code_executor_timeout(cls) -> None:
with tempfile.TemporaryDirectory() as temp_dir: