diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py index c9ebe82c32..90f19c327a 100644 --- a/autogen/agentchat/groupchat.py +++ b/autogen/agentchat/groupchat.py @@ -3,7 +3,7 @@ import random import re import sys from dataclasses import dataclass, field -from typing import Dict, List, Optional, Union, Tuple +from typing import Dict, List, Optional, Union, Tuple, Callable from ..code_utils import content_str @@ -42,7 +42,16 @@ class GroupChat: - "manual": the next speaker is selected manually by user input. - "random": the next speaker is selected randomly. - "round_robin": the next speaker is selected in a round robin fashion, i.e., iterating in the same order as provided in `agents`. - + - a customized speaker selection function (Callable): the function will be called to select the next speaker. + The function should take the last speaker and the group chat as input and return one of the following: + 1. an `Agent` class, it must be one of the agents in the group chat. + 2. a string from ['auto', 'manual', 'random', 'round_robin'] to select a default method to use. + 3. None, which would terminate the conversation gracefully. + ```python + def custom_speaker_selection_func( + last_speaker: Agent, groupchat: GroupChat + ) -> Union[Agent, str, None]: + ``` - allow_repeat_speaker: whether to allow the same speaker to speak consecutively. Default is True, in which case all speakers are allowed to speak consecutively. If `allow_repeat_speaker` is a list of Agents, then only those listed agents are allowed to repeat. @@ -67,7 +76,7 @@ class GroupChat: max_round: Optional[int] = 10 admin_name: Optional[str] = "Admin" func_call_filter: Optional[bool] = True - speaker_selection_method: Optional[str] = "auto" + speaker_selection_method: Optional[Union[str, Callable]] = "auto" allow_repeat_speaker: Optional[Union[bool, List[Agent]]] = None allowed_or_disallowed_speaker_transitions: Optional[Dict] = None speaker_transitions_type: Optional[str] = None @@ -277,11 +286,36 @@ Then select the next role from {[agent.name for agent in agents]} to play. Only return random.choice(agents) def _prepare_and_select_agents( - self, last_speaker: Agent + self, + last_speaker: Agent, ) -> Tuple[Optional[Agent], List[Agent], Optional[List[Dict]]]: - if self.speaker_selection_method.lower() not in self._VALID_SPEAKER_SELECTION_METHODS: + # If self.speaker_selection_method is a callable, call it to get the next speaker. + # If self.speaker_selection_method is a string, return it. + speaker_selection_method = self.speaker_selection_method + if isinstance(self.speaker_selection_method, Callable): + selected_agent = self.speaker_selection_method(last_speaker, self) + if selected_agent is None: + raise NoEligibleSpeakerException( + "Custom speaker selection function returned None. Terminating conversation." + ) + elif isinstance(selected_agent, Agent): + if selected_agent in self.agents: + return selected_agent, self.agents, None + else: + raise ValueError( + f"Custom speaker selection function returned an agent {selected_agent.name} not in the group chat." + ) + elif isinstance(selected_agent, str): + # If returned a string, assume it is a speaker selection method + speaker_selection_method = selected_agent + else: + raise ValueError( + f"Custom speaker selection function returned an object of type {type(selected_agent)} instead of Agent or str." + ) + + if speaker_selection_method.lower() not in self._VALID_SPEAKER_SELECTION_METHODS: raise ValueError( - f"GroupChat speaker_selection_method is set to '{self.speaker_selection_method}'. " + f"GroupChat speaker_selection_method is set to '{speaker_selection_method}'. " f"It should be one of {self._VALID_SPEAKER_SELECTION_METHODS} (case insensitive). " ) @@ -300,7 +334,7 @@ Then select the next role from {[agent.name for agent in agents]} to play. Only f"GroupChat is underpopulated with {n_agents} agents. " "Please add more agents to the GroupChat or use direct communication instead." ) - elif n_agents == 2 and self.speaker_selection_method.lower() != "round_robin" and allow_repeat_speaker: + elif n_agents == 2 and speaker_selection_method.lower() != "round_robin" and allow_repeat_speaker: logger.warning( f"GroupChat is underpopulated with {n_agents} agents. " "Consider setting speaker_selection_method to 'round_robin' or allow_repeat_speaker to False, " @@ -366,11 +400,11 @@ Then select the next role from {[agent.name for agent in agents]} to play. Only # Use the selected speaker selection method select_speaker_messages = None - if self.speaker_selection_method.lower() == "manual": + if speaker_selection_method.lower() == "manual": selected_agent = self.manual_select_speaker(graph_eligible_agents) - elif self.speaker_selection_method.lower() == "round_robin": + elif speaker_selection_method.lower() == "round_robin": selected_agent = self.next_agent(last_speaker, graph_eligible_agents) - elif self.speaker_selection_method.lower() == "random": + elif speaker_selection_method.lower() == "random": selected_agent = self.random_select_speaker(graph_eligible_agents) else: selected_agent = None diff --git a/notebook/agentchat_custom_model.ipynb b/notebook/agentchat_custom_model.ipynb index 8af9ebf1f5..6a42906743 100644 --- a/notebook/agentchat_custom_model.ipynb +++ b/notebook/agentchat_custom_model.ipynb @@ -383,6 +383,7 @@ "source": [ "# load model here\n", "\n", + "\n", "config = config_list_custom[0]\n", "device = config.get(\"device\", \"cpu\")\n", "loaded_model = AutoModelForCausalLM.from_pretrained(config[\"model\"]).to(device)\n", diff --git a/notebook/agentchat_groupchat_customized.ipynb b/notebook/agentchat_groupchat_customized.ipynb new file mode 100644 index 0000000000..08f03e0f59 --- /dev/null +++ b/notebook/agentchat_groupchat_customized.ipynb @@ -0,0 +1,477 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Group Chat with Customized Speaker Selection Method\n", + "\n", + "AutoGen offers conversable agents powered by LLM, tool or human, which can be used to perform tasks collectively via automated chat. This framework allows tool use and human participation through multi-agent conversation.\n", + "Please find documentation about this feature [here](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat).\n", + "\n", + "In this notebook, we demonstrate how to pass a cumstomized agent selection method to GroupChat. The customized function looks like this:\n", + "\n", + "```python\n", + "def custom_speaker_selection_func(last_speaker, groupchat):\n", + " \"\"\"Define a customized speaker selection function.\n", + " A recommended way is to define a transition for each speaker in the groupchat.\n", + "\n", + " Parameters:\n", + " - last_speaker: Agent\n", + " The last speaker in the group chat.\n", + " - groupchat: GroupChat\n", + " The GroupChat object\n", + " Return:\n", + " Return one of the following:\n", + " 1. an `Agent` class, it must be one of the agents in the group chat.\n", + " 2. a string from ['auto', 'manual', 'random', 'round_robin'] to select a default method to use.\n", + " 3. None, which indicates the chat should be terminated.\n", + " \"\"\"\n", + " pass\n", + "\n", + "groupchat = autogen.GroupChat(\n", + " speaker_selection_method=custom_speaker_selection_func,\n", + " ...,\n", + ")\n", + "```\n", + "The last speaker and the groupchat object are passed to the function. Commonly used variables from groupchat are `groupchat.messages` an `groupchat.agents`, which is the message history and the agents in the group chat respectively. You can access other attributes of the groupchat, such as `groupchat.allowed_speaker_transitions_dict` for pre-defined allowed_speaker_transitions_dict. \n", + "\n", + "\n", + "\n", + "\n", + "````{=mdx}\n", + ":::info Requirements\n", + "Install `pyautogen`:\n", + "```bash\n", + "pip install pyautogen\n", + "```\n", + "\n", + "For more information, please refer to the [installation guide](/docs/installation/).\n", + ":::\n", + "````" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set your API Endpoint\n", + "\n", + "The [`config_list_from_json`](https://microsoft.github.io/autogen/docs/reference/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import autogen\n", + "\n", + "config_list = autogen.config_list_from_json(\n", + " \"OAI_CONFIG_LIST\",\n", + " filter_dict={\n", + " \"model\": [\"gpt-4\", \"gpt-4-1106-preview\"],\n", + " },\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "````{=mdx}\n", + ":::tip\n", + "Learn more about configuring LLMs for agents [here](/docs/llm_configuration).\n", + ":::\n", + "````\n", + "\n", + "## Construct Agents\n", + "\n", + "- Planner: Give a plan and revise.\n", + "- Admin: Human in the loop to approve or terminate the process.\n", + "- Engineer: Retrieve papers from the internet by writing code.\n", + "- Executor: Execute the code.\n", + "- Scientist: Read the papers and write a summary.\n", + "\n", + "The pipeline is the following:\n", + "\n", + "1. The planner interact with Admin (user) to revise a plan. Only when the Admin types \"Approve\", we can move to the next step.\n", + "2. The engineer will write code to retrieve papers from the internet. The code will be executed by executor.\n", + "3. When the code is executed successfully, the scientist will read the papers and write a summary.\n", + "4. The summary will be reviewed by the Admin and give comments. When the Admin types \"TERMINATE\", the process will be terminated.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "gpt4_config = {\n", + " \"cache_seed\": 42, # change the cache_seed for different trials\n", + " \"temperature\": 0,\n", + " \"config_list\": config_list,\n", + " \"timeout\": 120,\n", + "}\n", + "\n", + "planner = autogen.AssistantAgent(\n", + " name=\"Planner\",\n", + " system_message=\"\"\"Planner. Suggest a plan. Revise the plan based on feedback from admin and critic, until admin approval.\n", + "The plan may involve an engineer who can write code and a scientist who doesn't write code.\n", + "Explain the plan first. Be clear which step is performed by an engineer, and which step is performed by a scientist.\n", + "\"\"\",\n", + " llm_config=gpt4_config,\n", + ")\n", + "\n", + "user_proxy = autogen.UserProxyAgent(\n", + " name=\"Admin\",\n", + " system_message=\"A human admin. Interact with the planner to discuss the plan. Plan execution needs to be approved by this admin.\",\n", + " code_execution_config=False,\n", + ")\n", + "\n", + "engineer = autogen.AssistantAgent(\n", + " name=\"Engineer\",\n", + " llm_config=gpt4_config,\n", + " system_message=\"\"\"Engineer. You follow an approved plan. You write python/shell code to solve tasks. Wrap the code in a code block that specifies the script type. The user can't modify your code. So do not suggest incomplete code which requires others to modify. Don't use a code block if it's not intended to be executed by the executor.\n", + "Don't include multiple code blocks in one response. Do not ask others to copy and paste the result. Check the execution result returned by the executor.\n", + "If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.\n", + "\"\"\",\n", + ")\n", + "scientist = autogen.AssistantAgent(\n", + " name=\"Scientist\",\n", + " llm_config=gpt4_config,\n", + " system_message=\"\"\"Scientist. You follow an approved plan. You are able to categorize papers after seeing their abstracts printed. You don't write code.\"\"\",\n", + ")\n", + "\n", + "executor = autogen.UserProxyAgent(\n", + " name=\"Executor\",\n", + " system_message=\"Executor. Execute the code written by the engineer and report the result.\",\n", + " human_input_mode=\"NEVER\",\n", + " code_execution_config={\n", + " \"last_n_messages\": 3,\n", + " \"work_dir\": \"paper\",\n", + " \"use_docker\": False,\n", + " }, # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.\n", + ")\n", + "\n", + "from autogen import Agent\n", + "from typing import List, Dict\n", + "\n", + "\n", + "def custom_speaker_selection_func(last_speaker: Agent, groupchat: autogen.GroupChat):\n", + " \"\"\"Define a customized speaker selection function.\n", + " A recommended way is to define a transition for each speaker in the groupchat.\n", + "\n", + " Returns:\n", + " Return an `Agent` class or a string from ['auto', 'manual', 'random', 'round_robin'] to select a default method to use.\n", + " \"\"\"\n", + " messages = groupchat.messages\n", + "\n", + " if len(messages) <= 1:\n", + " return planner\n", + "\n", + " if last_speaker is user_proxy:\n", + " if \"Approve\" in messages[-1][\"content\"]:\n", + " # If the last message is approved, let the engineer to speak\n", + " return engineer\n", + " elif messages[-2][\"name\"] == \"Planner\":\n", + " # If it is the planning stage, let the planner to continue\n", + " return planner\n", + " elif messages[-2][\"name\"] == \"Scientist\":\n", + " # If the last message is from the scientist, let the scientist to continue\n", + " return scientist\n", + "\n", + " elif last_speaker is planner:\n", + " # Always let the user to speak after the planner\n", + " return user_proxy\n", + "\n", + " elif last_speaker is engineer:\n", + " if \"```python\" in messages[-1][\"content\"]:\n", + " # If the last message is a python code block, let the executor to speak\n", + " return executor\n", + " else:\n", + " # Otherwise, let the engineer to continue\n", + " return engineer\n", + "\n", + " elif last_speaker is executor:\n", + " if \"exitcode: 1\" in messages[-1][\"content\"]:\n", + " # If the last message indicates an error, let the engineer to improve the code\n", + " return engineer\n", + " else:\n", + " # Otherwise, let the scientist to speak\n", + " return scientist\n", + "\n", + " elif last_speaker is scientist:\n", + " # Always let the user to speak after the scientist\n", + " return user_proxy\n", + "\n", + " else:\n", + " return \"random\"\n", + "\n", + "\n", + "groupchat = autogen.GroupChat(\n", + " agents=[user_proxy, engineer, scientist, planner, executor],\n", + " messages=[],\n", + " max_round=20,\n", + " speaker_selection_method=custom_speaker_selection_func,\n", + ")\n", + "manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=gpt4_config)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start Chat" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mAdmin\u001b[0m (to chat_manager):\n", + "\n", + "Find a latest paper about gpt-4 on arxiv and find its potential applications in software.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mPlanner\u001b[0m (to chat_manager):\n", + "\n", + "**Initial Plan:**\n", + "\n", + "1. **Scientist's Task: Literature Review**\n", + " - The scientist will conduct a comprehensive literature review to find the latest paper about GPT-4 on arXiv. This involves using search queries related to GPT-4 and filtering results by the most recent publications.\n", + "\n", + "2. **Scientist's Task: Analysis of the Paper**\n", + " - Once the latest paper is identified, the scientist will read through the paper to understand its contents, focusing on the methodology, results, and discussions about potential applications in software.\n", + "\n", + "3. **Scientist's Task: Identifying Potential Applications**\n", + " - The scientist will then brainstorm and list potential applications of GPT-4 in software, based on the findings from the paper. This may include applications in natural language processing, code generation, chatbots, and more.\n", + "\n", + "4. **Engineer's Task: Technical Feasibility Assessment**\n", + " - The engineer will review the list of potential applications provided by the scientist and assess the technical feasibility of each application. This involves considering the current state of software technology, the capabilities of GPT-4, and the practicality of integrating GPT-4 into existing systems.\n", + "\n", + "5. **Engineer's Task: Prototype Development Plan**\n", + " - For applications deemed technically feasible, the engineer will draft a plan for developing a prototype that demonstrates the use of GPT-4 in a software application. This plan will outline the required resources, estimated timeline, and the steps for implementation.\n", + "\n", + "6. **Joint Task: Finalizing the Plan**\n", + " - The scientist and engineer will collaborate to finalize the plan, ensuring that it is scientifically sound and technically viable. They will prepare a document detailing the plan for potential applications and the prototype development.\n", + "\n", + "7. **Presentation to Admin**\n", + " - The finalized plan will be presented to the admin for approval. The admin will review the plan and provide feedback.\n", + "\n", + "8. **Revisions Based on Feedback**\n", + " - Based on the admin's feedback, the scientist and engineer will make necessary revisions to the plan. This iterative process will continue until the admin approves the plan.\n", + "\n", + "**Awaiting Admin's Feedback:** Please review the initial plan and provide feedback on any adjustments or additional details you would like to see.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mAdmin\u001b[0m (to chat_manager):\n", + "\n", + "Approve\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mEngineer\u001b[0m (to chat_manager):\n", + "\n", + "Since the plan has been approved, I will now proceed with the first step, which is to find the latest paper about GPT-4 on arXiv. To do this, I will write a Python script that uses the arXiv API to search for papers related to GPT-4 and filter them by the most recent publications.\n", + "\n", + "Here is the Python script that accomplishes this task:\n", + "\n", + "```python\n", + "import requests\n", + "from datetime import datetime\n", + "\n", + "# Define the URL for the arXiv API\n", + "ARXIV_API_URL = \"http://export.arxiv.org/api/query\"\n", + "\n", + "# Define the search parameters\n", + "search_query = \"all:gpt-4\"\n", + "start = 0\n", + "max_results = 1\n", + "sort_by = \"submittedDate\"\n", + "sort_order = \"descending\"\n", + "\n", + "# Construct the query\n", + "query_params = {\n", + " \"search_query\": search_query,\n", + " \"start\": start,\n", + " \"max_results\": max_results,\n", + " \"sortBy\": sort_by,\n", + " \"sortOrder\": sort_order\n", + "}\n", + "\n", + "# Send the request to the arXiv API\n", + "response = requests.get(ARXIV_API_URL, params=query_params)\n", + "\n", + "# Check if the request was successful\n", + "if response.status_code == 200:\n", + " # Parse the response\n", + " feed = response.text\n", + " # Find the entry element, which contains the paper information\n", + " start_entry = feed.find('')\n", + " end_entry = feed.find('')\n", + " entry = feed[start_entry:end_entry]\n", + " \n", + " # Extract the title\n", + " start_title = entry.find('') + 7\n", + " end_title = entry.find('')\n", + " title = entry[start_title:end_title].strip()\n", + " \n", + " # Extract the published date\n", + " start_published = entry.find('') + 12\n", + " end_published = entry.find('')\n", + " published = entry[start_published:end_published].strip()\n", + " \n", + " # Extract the summary\n", + " start_summary = entry.find('') + 9\n", + " end_summary = entry.find('')\n", + " summary = entry[start_summary:end_summary].strip()\n", + " \n", + " # Extract the authors\n", + " authors = []\n", + " start_author = entry.find('')\n", + " end_author = entry.find('')\n", + " while start_author != -1 and end_author != -1:\n", + " start_name = entry.find('', start_author) + 6\n", + " end_name = entry.find('', start_author)\n", + " author_name = entry[start_name:end_name].strip()\n", + " authors.append(author_name)\n", + " start_author = entry.find('', end_author)\n", + " end_author = entry.find('', start_author)\n", + " \n", + " # Print the results\n", + " print(f\"Title: {title}\")\n", + " print(f\"Published Date: {published}\")\n", + " print(f\"Authors: {', '.join(authors)}\")\n", + " print(f\"Summary: {summary}\")\n", + "else:\n", + " print(\"Failed to retrieve data from arXiv API.\")\n", + "```\n", + "\n", + "This script will output the title, published date, authors, and summary of the most recent paper related to GPT-4 on arXiv. Please note that the actual content of the paper and its potential applications in software will need to be analyzed manually after retrieving the paper information.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[31m\n", + ">>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...\u001b[0m\n", + "\u001b[33mExecutor\u001b[0m (to chat_manager):\n", + "\n", + "exitcode: 0 (execution succeeded)\n", + "Code output: \n", + "Title: A Data-Centric Approach To Generate Faithful and High Quality Patient\n", + " Summaries with Large Language Models\n", + "Published Date: 024-02-23T16:32:28Z\n", + "Authors: Stefan Hegselmann, Shannon Zejiang Shen, Florian Gierse, Monica Agrawal, David Sontag, Xiaoyi Jiang\n", + "Summary: Patients often face difficulties in understanding their hospitalizations,\n", + "while healthcare workers have limited resources to provide explanations. In\n", + "this work, we investigate the potential of large language models to generate\n", + "patient summaries based on doctors' notes and study the effect of training data\n", + "on the faithfulness and quality of the generated summaries. To this end, we\n", + "develop a rigorous labeling protocol for hallucinations, and have two medical\n", + "experts annotate 100 real-world summaries and 100 generated summaries. We show\n", + "that fine-tuning on hallucination-free data effectively reduces hallucinations\n", + "from 2.60 to 1.55 per summary for Llama 2, while preserving relevant\n", + "information. Although the effect is still present, it is much smaller for GPT-4\n", + "when prompted with five examples (0.70 to 0.40). We also conduct a qualitative\n", + "evaluation using hallucination-free and improved training data. GPT-4 shows\n", + "very good results even in the zero-shot setting. We find that common\n", + "quantitative metrics do not correlate well with faithfulness and quality.\n", + "Finally, we test GPT-4 for automatic hallucination detection, which yields\n", + "promising results.\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mScientist\u001b[0m (to chat_manager):\n", + "\n", + "Based on the abstract provided, the paper titled \"A Data-Centric Approach To Generate Faithful and High Quality Patient Summaries with Large Language Models\" explores the use of large language models, including GPT-4, to generate patient summaries from doctors' notes. The study focuses on the impact of training data on the faithfulness and quality of the generated summaries and also investigates the potential of GPT-4 for automatic hallucination detection.\n", + "\n", + "**Potential Applications in Software:**\n", + "\n", + "1. **Healthcare Documentation Automation:**\n", + " - GPT-4 could be used to develop software that assists healthcare professionals in creating accurate and comprehensive patient summaries by automatically processing doctors' notes and other medical records.\n", + "\n", + "2. **Clinical Decision Support Systems:**\n", + " - Integrating GPT-4 into clinical decision support systems could provide healthcare workers with insights and suggestions based on a patient's medical history, potentially improving diagnosis and treatment planning.\n", + "\n", + "3. **Patient Education and Communication:**\n", + " - Software applications could leverage GPT-4 to translate complex medical information into patient-friendly summaries, enhancing patient understanding of their health conditions and treatments.\n", + "\n", + "4. **Medical Training and Simulation:**\n", + " - GPT-4 could be used to create realistic medical scenarios for training medical students and professionals, simulating patient interactions and generating case studies.\n", + "\n", + "5. **Data Quality Assurance:**\n", + " - The paper suggests that GPT-4 can be used for automatic hallucination detection, which refers to the identification of inaccuracies or fabrications in generated text. This could be applied to software that ensures the quality and reliability of medical documentation.\n", + "\n", + "6. **Research and Development:**\n", + " - GPT-4 could assist researchers in summarizing and synthesizing large volumes of medical literature, aiding in the discovery of new insights and the development of novel treatments.\n", + "\n", + "7. **Personalized Health Monitoring:**\n", + " - Software applications could use GPT-4 to provide personalized health monitoring and advice by analyzing user input, such as symptoms or lifestyle factors, and generating tailored health recommendations.\n", + "\n", + "These potential applications highlight the versatility of GPT-4 in the realm of healthcare software, offering opportunities to enhance patient care, improve healthcare workflows, and support medical education and research.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mAdmin\u001b[0m (to chat_manager):\n", + "\n", + "TERMINATE\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "ChatResult(chat_id=None, chat_history=[{'content': 'Find a latest paper about gpt-4 on arxiv and find its potential applications in software.', 'role': 'assistant'}, {'content': \"**Initial Plan:**\\n\\n1. **Scientist's Task: Literature Review**\\n - The scientist will conduct a comprehensive literature review to find the latest paper about GPT-4 on arXiv. This involves using search queries related to GPT-4 and filtering results by the most recent publications.\\n\\n2. **Scientist's Task: Analysis of the Paper**\\n - Once the latest paper is identified, the scientist will read through the paper to understand its contents, focusing on the methodology, results, and discussions about potential applications in software.\\n\\n3. **Scientist's Task: Identifying Potential Applications**\\n - The scientist will then brainstorm and list potential applications of GPT-4 in software, based on the findings from the paper. This may include applications in natural language processing, code generation, chatbots, and more.\\n\\n4. **Engineer's Task: Technical Feasibility Assessment**\\n - The engineer will review the list of potential applications provided by the scientist and assess the technical feasibility of each application. This involves considering the current state of software technology, the capabilities of GPT-4, and the practicality of integrating GPT-4 into existing systems.\\n\\n5. **Engineer's Task: Prototype Development Plan**\\n - For applications deemed technically feasible, the engineer will draft a plan for developing a prototype that demonstrates the use of GPT-4 in a software application. This plan will outline the required resources, estimated timeline, and the steps for implementation.\\n\\n6. **Joint Task: Finalizing the Plan**\\n - The scientist and engineer will collaborate to finalize the plan, ensuring that it is scientifically sound and technically viable. They will prepare a document detailing the plan for potential applications and the prototype development.\\n\\n7. **Presentation to Admin**\\n - The finalized plan will be presented to the admin for approval. The admin will review the plan and provide feedback.\\n\\n8. **Revisions Based on Feedback**\\n - Based on the admin's feedback, the scientist and engineer will make necessary revisions to the plan. This iterative process will continue until the admin approves the plan.\\n\\n**Awaiting Admin's Feedback:** Please review the initial plan and provide feedback on any adjustments or additional details you would like to see.\", 'name': 'Planner', 'role': 'user'}, {'content': 'Approve', 'role': 'assistant'}, {'content': 'Since the plan has been approved, I will now proceed with the first step, which is to find the latest paper about GPT-4 on arXiv. To do this, I will write a Python script that uses the arXiv API to search for papers related to GPT-4 and filter them by the most recent publications.\\n\\nHere is the Python script that accomplishes this task:\\n\\n```python\\nimport requests\\nfrom datetime import datetime\\n\\n# Define the URL for the arXiv API\\nARXIV_API_URL = \"http://export.arxiv.org/api/query\"\\n\\n# Define the search parameters\\nsearch_query = \"all:gpt-4\"\\nstart = 0\\nmax_results = 1\\nsort_by = \"submittedDate\"\\nsort_order = \"descending\"\\n\\n# Construct the query\\nquery_params = {\\n \"search_query\": search_query,\\n \"start\": start,\\n \"max_results\": max_results,\\n \"sortBy\": sort_by,\\n \"sortOrder\": sort_order\\n}\\n\\n# Send the request to the arXiv API\\nresponse = requests.get(ARXIV_API_URL, params=query_params)\\n\\n# Check if the request was successful\\nif response.status_code == 200:\\n # Parse the response\\n feed = response.text\\n # Find the entry element, which contains the paper information\\n start_entry = feed.find(\\'\\')\\n end_entry = feed.find(\\'\\')\\n entry = feed[start_entry:end_entry]\\n \\n # Extract the title\\n start_title = entry.find(\\'\\') + 7\\n end_title = entry.find(\\'\\')\\n title = entry[start_title:end_title].strip()\\n \\n # Extract the published date\\n start_published = entry.find(\\'\\') + 12\\n end_published = entry.find(\\'\\')\\n published = entry[start_published:end_published].strip()\\n \\n # Extract the summary\\n start_summary = entry.find(\\'\\') + 9\\n end_summary = entry.find(\\'\\')\\n summary = entry[start_summary:end_summary].strip()\\n \\n # Extract the authors\\n authors = []\\n start_author = entry.find(\\'\\')\\n end_author = entry.find(\\'\\')\\n while start_author != -1 and end_author != -1:\\n start_name = entry.find(\\'\\', start_author) + 6\\n end_name = entry.find(\\'\\', start_author)\\n author_name = entry[start_name:end_name].strip()\\n authors.append(author_name)\\n start_author = entry.find(\\'\\', end_author)\\n end_author = entry.find(\\'\\', start_author)\\n \\n # Print the results\\n print(f\"Title: {title}\")\\n print(f\"Published Date: {published}\")\\n print(f\"Authors: {\\', \\'.join(authors)}\")\\n print(f\"Summary: {summary}\")\\nelse:\\n print(\"Failed to retrieve data from arXiv API.\")\\n```\\n\\nThis script will output the title, published date, authors, and summary of the most recent paper related to GPT-4 on arXiv. Please note that the actual content of the paper and its potential applications in software will need to be analyzed manually after retrieving the paper information.', 'name': 'Engineer', 'role': 'user'}, {'content': \"exitcode: 0 (execution succeeded)\\nCode output: \\nTitle: A Data-Centric Approach To Generate Faithful and High Quality Patient\\n Summaries with Large Language Models\\nPublished Date: 024-02-23T16:32:28Z\\nAuthors: Stefan Hegselmann, Shannon Zejiang Shen, Florian Gierse, Monica Agrawal, David Sontag, Xiaoyi Jiang\\nSummary: Patients often face difficulties in understanding their hospitalizations,\\nwhile healthcare workers have limited resources to provide explanations. In\\nthis work, we investigate the potential of large language models to generate\\npatient summaries based on doctors' notes and study the effect of training data\\non the faithfulness and quality of the generated summaries. To this end, we\\ndevelop a rigorous labeling protocol for hallucinations, and have two medical\\nexperts annotate 100 real-world summaries and 100 generated summaries. We show\\nthat fine-tuning on hallucination-free data effectively reduces hallucinations\\nfrom 2.60 to 1.55 per summary for Llama 2, while preserving relevant\\ninformation. Although the effect is still present, it is much smaller for GPT-4\\nwhen prompted with five examples (0.70 to 0.40). We also conduct a qualitative\\nevaluation using hallucination-free and improved training data. GPT-4 shows\\nvery good results even in the zero-shot setting. We find that common\\nquantitative metrics do not correlate well with faithfulness and quality.\\nFinally, we test GPT-4 for automatic hallucination detection, which yields\\npromising results.\\n\", 'name': 'Executor', 'role': 'user'}, {'content': 'Based on the abstract provided, the paper titled \"A Data-Centric Approach To Generate Faithful and High Quality Patient Summaries with Large Language Models\" explores the use of large language models, including GPT-4, to generate patient summaries from doctors\\' notes. The study focuses on the impact of training data on the faithfulness and quality of the generated summaries and also investigates the potential of GPT-4 for automatic hallucination detection.\\n\\n**Potential Applications in Software:**\\n\\n1. **Healthcare Documentation Automation:**\\n - GPT-4 could be used to develop software that assists healthcare professionals in creating accurate and comprehensive patient summaries by automatically processing doctors\\' notes and other medical records.\\n\\n2. **Clinical Decision Support Systems:**\\n - Integrating GPT-4 into clinical decision support systems could provide healthcare workers with insights and suggestions based on a patient\\'s medical history, potentially improving diagnosis and treatment planning.\\n\\n3. **Patient Education and Communication:**\\n - Software applications could leverage GPT-4 to translate complex medical information into patient-friendly summaries, enhancing patient understanding of their health conditions and treatments.\\n\\n4. **Medical Training and Simulation:**\\n - GPT-4 could be used to create realistic medical scenarios for training medical students and professionals, simulating patient interactions and generating case studies.\\n\\n5. **Data Quality Assurance:**\\n - The paper suggests that GPT-4 can be used for automatic hallucination detection, which refers to the identification of inaccuracies or fabrications in generated text. This could be applied to software that ensures the quality and reliability of medical documentation.\\n\\n6. **Research and Development:**\\n - GPT-4 could assist researchers in summarizing and synthesizing large volumes of medical literature, aiding in the discovery of new insights and the development of novel treatments.\\n\\n7. **Personalized Health Monitoring:**\\n - Software applications could use GPT-4 to provide personalized health monitoring and advice by analyzing user input, such as symptoms or lifestyle factors, and generating tailored health recommendations.\\n\\nThese potential applications highlight the versatility of GPT-4 in the realm of healthcare software, offering opportunities to enhance patient care, improve healthcare workflows, and support medical education and research.', 'name': 'Scientist', 'role': 'user'}, {'content': 'TERMINATE', 'role': 'assistant'}], summary='', cost=({'total_cost': 0}, {'total_cost': 0}), human_input=['Approve', 'TERMINATE'])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "user_proxy.initiate_chat(\n", + " manager, message=\"Find a latest paper about gpt-4 on arxiv and find its potential applications in software.\"\n", + ")\n", + "# type exit to terminate the chat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "flaml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebook/agentchat_lmm_gpt-4v.ipynb b/notebook/agentchat_lmm_gpt-4v.ipynb index c56c6e6a1d..b49f4472a5 100644 --- a/notebook/agentchat_lmm_gpt-4v.ipynb +++ b/notebook/agentchat_lmm_gpt-4v.ipynb @@ -637,8 +637,6 @@ } ], "source": [ - "\n", - "\n", "creator = FigureCreator(name=\"Figure Creator~\", llm_config=gpt4_llm_config)\n", "\n", "user_proxy = autogen.UserProxyAgent(\n", diff --git a/test/agentchat/test_groupchat.py b/test/agentchat/test_groupchat.py index d3d07c5b1e..54b8e9f7b1 100755 --- a/test/agentchat/test_groupchat.py +++ b/test/agentchat/test_groupchat.py @@ -1,14 +1,13 @@ #!/usr/bin/env python3 -m pytest from typing import Any, Dict, List, Optional, Type -from autogen import AgentNameConflict +from autogen import AgentNameConflict, Agent, GroupChat import pytest from unittest import mock import builtins import autogen import json import sys -from autogen import Agent, GroupChat def test_func_call_groupchat(): @@ -663,7 +662,7 @@ def test_graceful_exit_before_max_round(): max_consecutive_auto_reply=10, human_input_mode="NEVER", llm_config=False, - default_auto_reply="This is sam speaking. TERMINATE", + default_auto_reply="This is sam speaking.", ) # This speaker_transitions limits the transition to be only from agent1 to agent2, and from agent2 to agent3 and end. @@ -682,7 +681,7 @@ def test_graceful_exit_before_max_round(): group_chat_manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=False, is_termination_msg=None) - agent1.initiate_chat(group_chat_manager, message="'None' is_termination_msg function.") + agent1.initiate_chat(group_chat_manager, message="") # Note that 3 is much lower than 10 (max_round), so the conversation should end before 10 rounds. assert len(groupchat.messages) == 3 @@ -1007,6 +1006,184 @@ def test_nested_teams_chat(): assert reply["content"] == team2_msg["content"] +def test_custom_speaker_selection(): + a1 = autogen.UserProxyAgent( + name="a1", + default_auto_reply="This is a1 speaking.", + human_input_mode="NEVER", + code_execution_config={}, + ) + + a2 = autogen.UserProxyAgent( + name="a2", + default_auto_reply="This is a2 speaking.", + human_input_mode="NEVER", + code_execution_config={}, + ) + + a3 = autogen.UserProxyAgent( + name="a3", + default_auto_reply="TERMINATE", + human_input_mode="NEVER", + code_execution_config={}, + ) + + def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat) -> Agent: + """Define a customized speaker selection function. + A recommended way is to define a transition for each speaker using the groupchat allowed_or_disallowed_speaker_transitions parameter. + """ + if last_speaker is a1: + return a2 + elif last_speaker is a2: + return a3 + + groupchat = autogen.GroupChat( + agents=[a1, a2, a3], + messages=[], + max_round=20, + speaker_selection_method=custom_speaker_selection_func, + ) + manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=False) + + result = a1.initiate_chat(manager, message="Hello, this is a1 speaking.") + assert len(result.chat_history) == 3 + + +def test_custom_speaker_selection_with_transition_graph(): + """ + In this test, although speaker_selection_method is defined, the speaker transitions are also defined. + There are 26 agents here, a to z. + The speaker transitions are defined such that the agents can transition to the next alphabet. + In addition, because we want the transition order to be a,u,t,o,g,e,n, we also define the speaker transitions for these agents. + The speaker_selection_method is defined to return the next agent in the expected sequence. + """ + + # For loop that creates UserProxyAgent with names from a to z + agents = [ + autogen.UserProxyAgent( + name=chr(97 + i), + default_auto_reply=f"My name is {chr(97 + i)}", + human_input_mode="NEVER", + code_execution_config={}, + ) + for i in range(26) + ] + + # Initiate allowed speaker transitions + allowed_or_disallowed_speaker_transitions = {} + + # Each agent can transition to the next alphabet as a baseline + # Key is Agent, value is a list of Agents that the key Agent can transition to + for i in range(25): + allowed_or_disallowed_speaker_transitions[agents[i]] = [agents[i + 1]] + + # The test is to make sure that the agent sequence is a,u,t,o,g,e,n, so we need to add those transitions + expected_sequence = ["a", "u", "t", "o", "g", "e", "n"] + current_agent = None + previous_agent = None + + for char in expected_sequence: + # convert char to i so that we can use chr(97+i) + current_agent = agents[ord(char) - 97] + if previous_agent is not None: + # Add transition + allowed_or_disallowed_speaker_transitions[previous_agent].append(current_agent) + previous_agent = current_agent + + def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat) -> Optional[Agent]: + """ + Define a customized speaker selection function. + """ + expected_sequence = ["a", "u", "t", "o", "g", "e", "n"] + + last_speaker_char = last_speaker.name + # Find the index of last_speaker_char in the expected_sequence + last_speaker_index = expected_sequence.index(last_speaker_char) + # Return the next agent in the expected sequence + if last_speaker_index == len(expected_sequence) - 1: + return None # terminate the conversation + else: + next_agent = agents[ord(expected_sequence[last_speaker_index + 1]) - 97] + return next_agent + + groupchat = autogen.GroupChat( + agents=agents, + messages=[], + max_round=20, + speaker_selection_method=custom_speaker_selection_func, + allowed_or_disallowed_speaker_transitions=allowed_or_disallowed_speaker_transitions, + speaker_transitions_type="allowed", + ) + manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=False) + + results = agents[0].initiate_chat(manager, message="My name is a") + actual_sequence = [] + + # Append to actual_sequence using results.chat_history[idx]['content'][-1] + for idx in range(len(results.chat_history)): + actual_sequence.append(results.chat_history[idx]["content"][-1]) # append the last character of the content + + assert expected_sequence == actual_sequence + + +def test_custom_speaker_selection_overrides_transition_graph(): + """ + In this test, team A engineer can transition to team A executor and team B engineer, but team B engineer cannot transition to team A executor. + The expected behaviour is that the custom speaker selection function will override the constraints of the graph. + """ + + # For loop that creates UserProxyAgent with names from a to z + agents = [ + autogen.UserProxyAgent( + name="teamA_engineer", + default_auto_reply="My name is teamA_engineer", + human_input_mode="NEVER", + code_execution_config={}, + ), + autogen.UserProxyAgent( + name="teamA_executor", + default_auto_reply="My name is teamA_executor", + human_input_mode="NEVER", + code_execution_config={}, + ), + autogen.UserProxyAgent( + name="teamB_engineer", + default_auto_reply="My name is teamB_engineer", + human_input_mode="NEVER", + code_execution_config={}, + ), + ] + + allowed_or_disallowed_speaker_transitions = {} + + # teamA_engineer can transition to teamA_executor and teamB_engineer + # teamB_engineer can transition to no one + allowed_or_disallowed_speaker_transitions[agents[0]] = [agents[1], agents[2]] + + def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat) -> Optional[Agent]: + if last_speaker.name == "teamA_engineer": + return agents[2] # Goto teamB_engineer + elif last_speaker.name == "teamB_engineer": + return agents[1] # Goto teamA_executor and contradict the graph + + groupchat = autogen.GroupChat( + agents=agents, + messages=[], + max_round=20, + speaker_selection_method=custom_speaker_selection_func, + allowed_or_disallowed_speaker_transitions=allowed_or_disallowed_speaker_transitions, + speaker_transitions_type="allowed", + ) + manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=False) + results = agents[0].initiate_chat(manager, message="My name is teamA_engineer") + + speakers = [] + for idx in range(len(results.chat_history)): + speakers.append(results.chat_history[idx].get("name")) + + assert "teamA_executor" in speakers + + if __name__ == "__main__": # test_func_call_groupchat() # test_broadcast() @@ -1017,7 +1194,9 @@ if __name__ == "__main__": # test_agent_mentions() # test_termination() # test_next_agent() - test_send_intros() + # test_send_intros() # test_invalid_allow_repeat_speaker() # test_graceful_exit_before_max_round() # test_clear_agents_history() + test_custom_speaker_selection_overrides_transition_graph() + # pass diff --git a/website/docs/Examples.md b/website/docs/Examples.md index 797f9e4897..70cd985c03 100644 --- a/website/docs/Examples.md +++ b/website/docs/Examples.md @@ -22,6 +22,7 @@ Links to notebook examples: - Automated Task Solving with Coding & Planning Agents - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_planning.ipynb) - Automated Task Solving with transition paths specified in a graph - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_graph_modelling_language_using_select_speaker.ipynb) - Running a group chat as an inner-monolgue via the SocietyOfMindAgent - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_society_of_mind.ipynb) + - Running a group chat with custom speaker selection function - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_groupchat_customized.ipynb) 1. **Sequential Multi-Agent Chats** - Solving Multiple Tasks in a Sequence of Chats Initiated by a Single Agent - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_multi_task_chats.ipynb)