Reverts TeamOne prompts to those used in Autogen experiments. (#331)

* Revised prompts to match autogen experiments. * Handle sh code blocks * Move executor prompt into coder.py * Fixed formatting.
2024-08-07 13:52:45 -07:00 · 2024-08-07 13:52:45 -07:00 · c7f5931dc8
parent 39489ba395
commit c7f5931dc8
3 changed files with 41 additions and 40 deletions
--- a/python/benchmarks/GAIA/Templates/TeamOne/scenario.py
+++ b/python/benchmarks/GAIA/Templates/TeamOne/scenario.py
@ -128,6 +128,7 @@ async def main() -> None:
        api_version="2024-02-15-preview",
        azure_endpoint="https://aif-complex-tasks-west-us-3.openai.azure.com/",
        model="gpt-4o-2024-05-13",
+        temperature=0.1,
        model_capabilities=ModelCapabilities(
            function_calling=True, json_output=True, vision=True
        ),
@ -140,19 +141,17 @@ async def main() -> None:

    # Register agents.
    coder = await runtime.register_and_get_proxy(
-        "Coder",
+        "Assistant",
        lambda: Coder(model_client=client),
    )

    executor = await runtime.register_and_get_proxy(
-        "Executor",
-        lambda: Executor(
-            "A agent for executing code", executor=LocalCommandLineCodeExecutor()
-        ),
+        "ComputerTerminal",
+        lambda: Executor(executor=LocalCommandLineCodeExecutor()),
    )

    file_surfer = await runtime.register_and_get_proxy(
-        "file_surfer",
+        "FileSurfer",
        lambda: FileSurfer(model_client=client),
    )

@ -161,7 +160,7 @@ async def main() -> None:
        lambda: MultimodalWebSurfer(), # Configuration is set later by init()
    )

-    orchestrator = await runtime.register_and_get_proxy("orchestrator", lambda: LedgerOrchestrator(
+    orchestrator = await runtime.register_and_get_proxy("Orchestrator", lambda: LedgerOrchestrator(
        agents=[coder, executor, file_surfer, web_surfer],
        model_client=client,
    ))
@ -171,8 +170,6 @@ async def main() -> None:
    actual_surfer = await runtime.try_get_underlying_agent_instance(web_surfer.id, type=MultimodalWebSurfer)
    await actual_surfer.init(model_client=client, downloads_folder=os.getcwd(), browser_channel="chromium")

-    #await runtime.send_message(RequestReplyMessage(), user_proxy.id)
-
    filename_prompt = ""
    if len(filename) > 0:
        #relpath = os.path.join("coding", filename)
--- a/python/teams/team-one/src/team_one/agents/coder.py
+++ b/python/teams/team-one/src/team_one/agents/coder.py
@ -17,26 +17,20 @@ from .base_worker import BaseWorker
 class Coder(BaseWorker):
    """An agent that uses tools to write, execute, and debug Python code."""

-    DEFAULT_DESCRIPTION = "A Python coder assistant."
+    DEFAULT_DESCRIPTION = "A helpful and general-purpose AI assistant that has strong language skills, Python skills, and Linux command line skills."

    DEFAULT_SYSTEM_MESSAGES = [
-        SystemMessage("""You are a helpful AI assistant. Solve tasks using your Python coding skills. The code you output must be formatted in Markdown code blocks demarcated by triple backticks (```), and must print their final output to console. As an example:
-
-```python
-
-def main():
-    print("Hello world.")
-
-if __name__ == "__main__":
-    main()
-```
-
-The user cannot provide any feedback or perform any other action beyond executing the code you suggest. In particular, the user can't modify your code, and can't copy and paste anything, and can't fill in missing values. Thus, do not suggest incomplete code which requires users to perform any of these actions.
-
-The user will run all code that you provide, and will report back the results. When receiving the results, check if the output indicates an error. Fix the error. When fixing the error, output the full code, as before, instead of partial code or code changes -- code blocks must stand alone and be ready to execute without modification. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, and think of a different approach to try.
-
-If the code was executed, and the output appears to indicate that the original prolem was solved successful, reply "TERMINATE". UNDER NO OTHER CONDITIONS SHOULD "TERMINATE" BE USED.
-""")
+        SystemMessage("""You are a helpful AI assistant.
+Solve tasks using your coding and language skills.
+In the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute.
+    1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time, check the operating system. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself.
+    2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly.
+Solve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill.
+When using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user.
+If you want the user to save the code in a file before executing it, put # filename: <filename> inside the code block as the first line. Don't include multiple code blocks in one response. Do not ask users to copy and paste the result. Instead, use 'print' function for the output when relevant. Check the execution result returned by the user.
+If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.
+When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible.
+Reply "TERMINATE" in the end when everything is done.""")
    ]

    def __init__(
@ -59,8 +53,13 @@ If the code was executed, and the output appears to indicate that the original p


 class Executor(BaseWorker):
+    DEFAULT_DESCRIPTION = "A computer terminal that performs no other action than running Python scripts (provided to it quoted in ```python code blocks), or sh shell scripts (provided to it quoted in ```sh code blocks)"
+
    def __init__(
-        self, description: str, executor: Optional[CodeExecutor] = None, check_last_n_message: int = 5
+        self,
+        description: str = DEFAULT_DESCRIPTION,
+        executor: Optional[CodeExecutor] = None,
+        check_last_n_message: int = 5,
    ) -> None:
        super().__init__(description)
        self._executor = executor or LocalCommandLineCodeExecutor()
@ -78,8 +77,13 @@ class Executor(BaseWorker):

            # Extract code block from the message.
            code = self._extract_execution_request(message_content_to_str(message.content))
+
            if code is not None:
-                execution_requests = [CodeBlock(code=code, language="python")]
+                code_lang = code[0]
+                code_block = code[1]
+                if code_lang == "py":
+                    code_lang = "python"
+                execution_requests = [CodeBlock(code=code_block, language=code_lang)]
                result = await self._executor.execute_code_blocks(execution_requests, cancellation_token)

                if result.output.strip() == "":
@ -103,11 +107,11 @@ class Executor(BaseWorker):
            "No code block detected in the messages. Please provide a markdown-encoded code block to execute for the original task.",
        )

-    def _extract_execution_request(self, markdown_text: str) -> Union[str, None]:
+    def _extract_execution_request(self, markdown_text: str) -> Union[Tuple[str, str], None]:
        pattern = r"```(\w+)\n(.*?)\n```"
        # Search for the pattern in the markdown text
        match = re.search(pattern, markdown_text, re.DOTALL)
        # Extract the language and code block if a match is found
        if match:
-            return match.group(2)
+            return (match.group(1), match.group(2))
        return None
--- a/python/teams/team-one/src/team_one/agents/orchestrator_prompts.py
+++ b/python/teams/team-one/src/team_one/agents/orchestrator_prompts.py
@ -67,15 +67,7 @@ To make progress on the request, please answer the following questions, includin
 Please output an answer in pure JSON format according to the following schema. The JSON object must be parsable as-is. DO NOT OUTPUT ANYTHING OTHER THAN JSON, AND DO NOT DEVIATE FROM THIS SCHEMA:

    {{
-        "next_speaker": {{
-            "reason": string,
-            "answer": string (select from: {names})
-        }},
-        "instruction_or_question": {{
-            "reason": string,
-            "answer": string
-        }},
-        "is_request_satisfied": {{
+       "is_request_satisfied": {{
            "reason": string,
            "answer": boolean
        }},
@ -86,6 +78,14 @@ Please output an answer in pure JSON format according to the following schema. T
        "is_progress_being_made": {{
            "reason": string,
            "answer": boolean
+        }},
+        "next_speaker": {{
+            "reason": string,
+            "answer": string (select from: {names})
+        }},
+        "instruction_or_question": {{
+            "reason": string,
+            "answer": string
        }}
    }}
 """