mirror of https://github.com/microsoft/autogen.git
Message "content" now supports both `str` and `List` in Agents (#713)
* Change "content" type in Conversable Agent * content and system_message support str and List Update for all other agents * Content_str now also takes None as input * Group Chat now works with LMM too * Style: newline for import in Conversable Agentt * Add test for gourpchat + lmm * Resolve comments 1. Undo AssistantAgent changes 2. Modify the asserts and raises in `content_str` function and update test accordingly. * Undo AssistantAgent * Update comments and add assertion for LMM * Typo fix in docstring for content_str * Remove “None” out conversable_agent.py * Lint message to dict in multimodal_conversable_agent.py * Address lint issues * linting * Move lmm test into contrib test * Resolve 2 comments * Move img_utils into contrib folder * Resolve img_utils path issues
This commit is contained in:
parent
77e1d28c1b
commit
c19f234149
|
@ -1,60 +0,0 @@
|
|||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: ContribTests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: ['main', 'dev/v0.2']
|
||||
paths:
|
||||
- 'autogen/img_utils.py'
|
||||
- 'autogen/agentchat/contrib/multimodal_conversable_agent.py'
|
||||
- 'autogen/agentchat/contrib/llava_agent.py'
|
||||
- 'test/test_img_utils.py'
|
||||
- 'test/agentchat/contrib/test_lmm.py'
|
||||
- 'test/agentchat/contrib/test_llava.py'
|
||||
- '.github/workflows/lmm-test.yml'
|
||||
- 'setup.py'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
|
||||
jobs:
|
||||
LMMTest:
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
python-version: ["3.8", "3.9", "3.10", "3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install packages and dependencies for all tests
|
||||
run: |
|
||||
python -m pip install --upgrade pip wheel
|
||||
pip install pytest
|
||||
- name: Install packages and dependencies for LMM
|
||||
run: |
|
||||
pip install -e .[lmm]
|
||||
pip uninstall -y openai
|
||||
- name: Test LMM and LLaVA
|
||||
run: |
|
||||
pytest test/test_img_utils.py test/agentchat/contrib/test_lmm.py test/agentchat/contrib/test_llava.py
|
||||
- name: Coverage
|
||||
if: matrix.python-version == '3.10'
|
||||
run: |
|
||||
pip install coverage>=5.3
|
||||
coverage run -a -m pytest test/test_img_utils.py test/agentchat/contrib/test_lmm.py test/agentchat/contrib/test_llava.py
|
||||
coverage xml
|
||||
- name: Upload coverage to Codecov
|
||||
if: matrix.python-version == '3.10'
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
file: ./coverage.xml
|
||||
flags: unittests
|
|
@ -136,3 +136,40 @@ jobs:
|
|||
- name: Test TeachableAgent
|
||||
run: |
|
||||
pytest test/agentchat/contrib/test_teachable_agent.py
|
||||
|
||||
LMMTest:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
python-version: ["3.8", "3.9", "3.10", "3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install packages and dependencies for all tests
|
||||
run: |
|
||||
python -m pip install --upgrade pip wheel
|
||||
pip install pytest
|
||||
- name: Install packages and dependencies for LMM
|
||||
run: |
|
||||
pip install -e .[lmm]
|
||||
pip uninstall -y openai
|
||||
- name: Test LMM and LLaVA
|
||||
run: |
|
||||
pytest test/agentchat/contrib/test_img_utils.py test/agentchat/contrib/test_lmm.py test/agentchat/contrib/test_llava.py
|
||||
- name: Coverage
|
||||
if: matrix.python-version == '3.10'
|
||||
run: |
|
||||
pip install coverage>=5.3
|
||||
coverage run -a -m pytest test/agentchat/contrib/test_img_utils.py test/agentchat/contrib/test_lmm.py test/agentchat/contrib/test_llava.py
|
||||
coverage xml
|
||||
- name: Upload coverage to Codecov
|
||||
if: matrix.python-version == '3.10'
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
file: ./coverage.xml
|
||||
flags: unittests
|
||||
|
|
|
@ -167,6 +167,7 @@ wolfram.txt
|
|||
|
||||
# DB on disk for TeachableAgent
|
||||
tmp/
|
||||
test/my_tmp/*
|
||||
|
||||
# Storage for the AgentEval output
|
||||
test/test_files/agenteval-in-out/out/
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from .conversable_agent import ConversableAgent
|
||||
from typing import Callable, Dict, Literal, Optional, Union
|
||||
|
||||
from .conversable_agent import ConversableAgent
|
||||
|
||||
|
||||
class AssistantAgent(ConversableAgent):
|
||||
"""(In preview) Assistant agent, designed to solve a task with LLM.
|
||||
|
|
|
@ -10,9 +10,9 @@ import requests
|
|||
from regex import R
|
||||
|
||||
from autogen.agentchat.agent import Agent
|
||||
from autogen.agentchat.contrib.img_utils import get_image_data, llava_formater
|
||||
from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent
|
||||
from autogen.code_utils import content_str
|
||||
from autogen.img_utils import get_image_data, llava_formater
|
||||
|
||||
try:
|
||||
from termcolor import colored
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
import copy
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from autogen import OpenAIWrapper
|
||||
from autogen.agentchat import Agent, ConversableAgent
|
||||
from autogen.img_utils import gpt4v_formatter
|
||||
from autogen.agentchat.contrib.img_utils import gpt4v_formatter
|
||||
|
||||
try:
|
||||
from termcolor import colored
|
||||
|
@ -41,19 +42,14 @@ class MultimodalConversableAgent(ConversableAgent):
|
|||
*args,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# call the setter to handle special format.
|
||||
self.update_system_message(system_message)
|
||||
self._is_termination_msg = (
|
||||
is_termination_msg
|
||||
if is_termination_msg is not None
|
||||
else (lambda x: any([item["text"] == "TERMINATE" for item in x.get("content") if item["type"] == "text"]))
|
||||
else (lambda x: content_str(x.get("content")) == "TERMINATE")
|
||||
)
|
||||
|
||||
@property
|
||||
def system_message(self) -> List:
|
||||
"""Return the system message."""
|
||||
return self._oai_system_message[0]["content"]
|
||||
|
||||
def update_system_message(self, system_message: Union[Dict, List, str]):
|
||||
"""Update the system message.
|
||||
|
||||
|
@ -64,44 +60,29 @@ class MultimodalConversableAgent(ConversableAgent):
|
|||
self._oai_system_message[0]["role"] = "system"
|
||||
|
||||
@staticmethod
|
||||
def _message_to_dict(message: Union[Dict, List, str]):
|
||||
"""Convert a message to a dictionary.
|
||||
def _message_to_dict(message: Union[Dict, List, str]) -> Dict:
|
||||
"""Convert a message to a dictionary. This implementation
|
||||
handles the GPT-4V formatting for easier prompts.
|
||||
|
||||
The message can be a string or a dictionary. The string will be put in the "content" field of the new dictionary.
|
||||
The message can be a string, a dictionary, or a list of dictionaries:
|
||||
- If it's a string, it will be cast into a list and placed in the 'content' field.
|
||||
- If it's a list, it will be directly placed in the 'content' field.
|
||||
- If it's a dictionary, it is already in message dict format. The 'content' field of this dictionary
|
||||
will be processed using the gpt4v_formatter.
|
||||
"""
|
||||
if isinstance(message, str):
|
||||
return {"content": gpt4v_formatter(message)}
|
||||
if isinstance(message, list):
|
||||
return {"content": message}
|
||||
else:
|
||||
if isinstance(message, dict):
|
||||
assert "content" in message, "The message dict must have a `content` field"
|
||||
if isinstance(message["content"], str):
|
||||
message = copy.deepcopy(message)
|
||||
message["content"] = gpt4v_formatter(message["content"])
|
||||
try:
|
||||
content_str(message["content"])
|
||||
except (TypeError, ValueError) as e:
|
||||
print("The `content` field should be compatible with the content_str function!")
|
||||
raise e
|
||||
return message
|
||||
|
||||
def _print_received_message(self, message: Union[Dict, str], sender: Agent):
|
||||
# print the message received
|
||||
print(colored(sender.name, "yellow"), "(to", f"{self.name}):\n", flush=True)
|
||||
if message.get("role") == "function":
|
||||
func_print = f"***** Response from calling function \"{message['name']}\" *****"
|
||||
print(colored(func_print, "green"), flush=True)
|
||||
print(content_str(message["content"]), flush=True)
|
||||
print(colored("*" * len(func_print), "green"), flush=True)
|
||||
else:
|
||||
content = message.get("content")
|
||||
if content is not None:
|
||||
if "context" in message:
|
||||
content = OpenAIWrapper.instantiate(
|
||||
content,
|
||||
message["context"],
|
||||
self.llm_config and self.llm_config.get("allow_format_str_template", False),
|
||||
)
|
||||
print(content_str(content), flush=True)
|
||||
if "function_call" in message:
|
||||
func_print = f"***** Suggested function Call: {message['function_call'].get('name', '(No function name found)')} *****"
|
||||
print(colored(func_print, "green"), flush=True)
|
||||
print(
|
||||
"Arguments: \n",
|
||||
message["function_call"].get("arguments", "(No arguments found)"),
|
||||
flush=True,
|
||||
sep="",
|
||||
)
|
||||
print(colored("*" * len(func_print), "green"), flush=True)
|
||||
print("\n", "-" * 80, flush=True, sep="")
|
||||
raise ValueError(f"Unsupported message type: {type(message)}")
|
||||
|
|
|
@ -1,18 +1,14 @@
|
|||
import asyncio
|
||||
from collections import defaultdict
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Type, Union
|
||||
|
||||
from autogen import OpenAIWrapper
|
||||
from autogen.code_utils import DEFAULT_MODEL, UNKNOWN, content_str, execute_code, extract_code, infer_lang
|
||||
|
||||
from .agent import Agent
|
||||
from autogen.code_utils import (
|
||||
DEFAULT_MODEL,
|
||||
UNKNOWN,
|
||||
execute_code,
|
||||
extract_code,
|
||||
infer_lang,
|
||||
)
|
||||
|
||||
try:
|
||||
from termcolor import colored
|
||||
|
@ -50,7 +46,7 @@ class ConversableAgent(Agent):
|
|||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
system_message: Optional[str] = "You are a helpful AI Assistant.",
|
||||
system_message: Optional[Union[str, List]] = "You are a helpful AI Assistant.",
|
||||
is_termination_msg: Optional[Callable[[Dict], bool]] = None,
|
||||
max_consecutive_auto_reply: Optional[int] = None,
|
||||
human_input_mode: Optional[str] = "TERMINATE",
|
||||
|
@ -62,7 +58,7 @@ class ConversableAgent(Agent):
|
|||
"""
|
||||
Args:
|
||||
name (str): name of the agent.
|
||||
system_message (str): system message for the ChatCompletion inference.
|
||||
system_message (str or list): system message for the ChatCompletion inference.
|
||||
is_termination_msg (function): a function that takes a message in the form of a dictionary
|
||||
and returns a boolean value indicating if this received message is a termination message.
|
||||
The dict can contain the following keys: "content", "role", "name", "function_call".
|
||||
|
@ -105,8 +101,11 @@ class ConversableAgent(Agent):
|
|||
self._oai_messages = defaultdict(list)
|
||||
self._oai_system_message = [{"content": system_message, "role": "system"}]
|
||||
self._is_termination_msg = (
|
||||
is_termination_msg if is_termination_msg is not None else (lambda x: x.get("content") == "TERMINATE")
|
||||
is_termination_msg
|
||||
if is_termination_msg is not None
|
||||
else (lambda x: content_str(x.get("content")) == "TERMINATE")
|
||||
)
|
||||
|
||||
if llm_config is False:
|
||||
self.llm_config = False
|
||||
self.client = None
|
||||
|
@ -190,15 +189,15 @@ class ConversableAgent(Agent):
|
|||
)
|
||||
|
||||
@property
|
||||
def system_message(self):
|
||||
def system_message(self) -> Union[str, List]:
|
||||
"""Return the system message."""
|
||||
return self._oai_system_message[0]["content"]
|
||||
|
||||
def update_system_message(self, system_message: str):
|
||||
def update_system_message(self, system_message: Union[str, List]):
|
||||
"""Update the system message.
|
||||
|
||||
Args:
|
||||
system_message (str): system message for the ChatCompletion inference.
|
||||
system_message (str or List): system message for the ChatCompletion inference.
|
||||
"""
|
||||
self._oai_system_message[0]["content"] = system_message
|
||||
|
||||
|
@ -258,7 +257,7 @@ class ConversableAgent(Agent):
|
|||
return None if self._code_execution_config is False else self._code_execution_config.get("use_docker")
|
||||
|
||||
@staticmethod
|
||||
def _message_to_dict(message: Union[Dict, str]):
|
||||
def _message_to_dict(message: Union[Dict, str]) -> Dict:
|
||||
"""Convert a message to a dictionary.
|
||||
|
||||
The message can be a string or a dictionary. The string will be put in the "content" field of the new dictionary.
|
||||
|
@ -314,7 +313,7 @@ class ConversableAgent(Agent):
|
|||
Args:
|
||||
message (dict or str): message to be sent.
|
||||
The message could contain the following fields:
|
||||
- content (str): Required, the content of the message. (Can be None)
|
||||
- content (str or List): Required, the content of the message. (Can be None)
|
||||
- function_call (str): the name of the function to be called.
|
||||
- name (str): the name of the function to be called.
|
||||
- role (str): the role of the message, any role that is not "function"
|
||||
|
@ -363,7 +362,7 @@ class ConversableAgent(Agent):
|
|||
Args:
|
||||
message (dict or str): message to be sent.
|
||||
The message could contain the following fields:
|
||||
- content (str): Required, the content of the message. (Can be None)
|
||||
- content (str or List): Required, the content of the message. (Can be None)
|
||||
- function_call (str): the name of the function to be called.
|
||||
- name (str): the name of the function to be called.
|
||||
- role (str): the role of the message, any role that is not "function"
|
||||
|
@ -419,7 +418,7 @@ class ConversableAgent(Agent):
|
|||
message["context"],
|
||||
self.llm_config and self.llm_config.get("allow_format_str_template", False),
|
||||
)
|
||||
print(content, flush=True)
|
||||
print(content_str(content), flush=True)
|
||||
if "function_call" in message:
|
||||
function_call = dict(message["function_call"])
|
||||
func_print = (
|
||||
|
@ -435,7 +434,7 @@ class ConversableAgent(Agent):
|
|||
print(colored("*" * len(func_print), "green"), flush=True)
|
||||
print("\n", "-" * 80, flush=True, sep="")
|
||||
|
||||
def _process_received_message(self, message, sender, silent):
|
||||
def _process_received_message(self, message: Union[Dict, str], sender: Agent, silent: bool):
|
||||
message = self._message_to_dict(message)
|
||||
# When the agent receives a message, the role of the message is "user". (If 'role' exists and is 'function', it will remain unchanged.)
|
||||
valid = self._append_oai_message(message, "user", sender)
|
||||
|
@ -681,7 +680,7 @@ class ConversableAgent(Agent):
|
|||
messages: Optional[List[Dict]] = None,
|
||||
sender: Optional[Agent] = None,
|
||||
config: Optional[Any] = None,
|
||||
):
|
||||
) -> Tuple[bool, Union[Dict, None]]:
|
||||
"""Generate a reply using function call."""
|
||||
if config is None:
|
||||
config = self
|
||||
|
@ -698,7 +697,7 @@ class ConversableAgent(Agent):
|
|||
messages: Optional[List[Dict]] = None,
|
||||
sender: Optional[Agent] = None,
|
||||
config: Optional[Any] = None,
|
||||
):
|
||||
) -> Tuple[bool, Union[Dict, None]]:
|
||||
"""Generate a reply using async function call."""
|
||||
if config is None:
|
||||
config = self
|
||||
|
@ -720,8 +719,26 @@ class ConversableAgent(Agent):
|
|||
messages: Optional[List[Dict]] = None,
|
||||
sender: Optional[Agent] = None,
|
||||
config: Optional[Any] = None,
|
||||
) -> Tuple[bool, Union[str, Dict, None]]:
|
||||
"""Check if the conversation should be terminated, and if human reply is provided."""
|
||||
) -> Tuple[bool, Union[str, None]]:
|
||||
"""Check if the conversation should be terminated, and if human reply is provided.
|
||||
|
||||
This method checks for conditions that require the conversation to be terminated, such as reaching
|
||||
a maximum number of consecutive auto-replies or encountering a termination message. Additionally,
|
||||
it prompts for and processes human input based on the configured human input mode, which can be
|
||||
'ALWAYS', 'NEVER', or 'TERMINATE'. The method also manages the consecutive auto-reply counter
|
||||
for the conversation and prints relevant messages based on the human input received.
|
||||
|
||||
Args:
|
||||
- messages (Optional[List[Dict]]): A list of message dictionaries, representing the conversation history.
|
||||
- sender (Optional[Agent]): The agent object representing the sender of the message.
|
||||
- config (Optional[Any]): Configuration object, defaults to the current instance if not provided.
|
||||
|
||||
Returns:
|
||||
- Tuple[bool, Union[str, Dict, None]]: A tuple containing a boolean indicating if the conversation
|
||||
should be terminated, and a human reply which can be a string, a dictionary, or None.
|
||||
"""
|
||||
# Function implementation...
|
||||
|
||||
if config is None:
|
||||
config = self
|
||||
if messages is None:
|
||||
|
@ -791,8 +808,24 @@ class ConversableAgent(Agent):
|
|||
messages: Optional[List[Dict]] = None,
|
||||
sender: Optional[Agent] = None,
|
||||
config: Optional[Any] = None,
|
||||
) -> Tuple[bool, Union[str, Dict, None]]:
|
||||
"""(async) Check if the conversation should be terminated, and if human reply is provided."""
|
||||
) -> Tuple[bool, Union[str, None]]:
|
||||
"""(async) Check if the conversation should be terminated, and if human reply is provided.
|
||||
|
||||
This method checks for conditions that require the conversation to be terminated, such as reaching
|
||||
a maximum number of consecutive auto-replies or encountering a termination message. Additionally,
|
||||
it prompts for and processes human input based on the configured human input mode, which can be
|
||||
'ALWAYS', 'NEVER', or 'TERMINATE'. The method also manages the consecutive auto-reply counter
|
||||
for the conversation and prints relevant messages based on the human input received.
|
||||
|
||||
Args:
|
||||
- messages (Optional[List[Dict]]): A list of message dictionaries, representing the conversation history.
|
||||
- sender (Optional[Agent]): The agent object representing the sender of the message.
|
||||
- config (Optional[Any]): Configuration object, defaults to the current instance if not provided.
|
||||
|
||||
Returns:
|
||||
- Tuple[bool, Union[str, Dict, None]]: A tuple containing a boolean indicating if the conversation
|
||||
should be terminated, and a human reply which can be a string, a dictionary, or None.
|
||||
"""
|
||||
if config is None:
|
||||
config = self
|
||||
if messages is None:
|
||||
|
@ -962,8 +995,20 @@ class ConversableAgent(Agent):
|
|||
return reply
|
||||
return self._default_auto_reply
|
||||
|
||||
def _match_trigger(self, trigger, sender):
|
||||
"""Check if the sender matches the trigger."""
|
||||
def _match_trigger(self, trigger: Union[None, str, type, Agent, Callable, List], sender: Agent) -> bool:
|
||||
"""Check if the sender matches the trigger.
|
||||
|
||||
Args:
|
||||
- trigger (Union[None, str, type, Agent, Callable, List]): The condition to match against the sender.
|
||||
Can be `None`, string, type, `Agent` instance, callable, or a list of these.
|
||||
- sender (Agent): The sender object or type to be matched against the trigger.
|
||||
|
||||
Returns:
|
||||
- bool: Returns `True` if the sender matches the trigger, otherwise `False`.
|
||||
|
||||
Raises:
|
||||
- ValueError: If the trigger type is unsupported.
|
||||
"""
|
||||
if trigger is None:
|
||||
return sender is None
|
||||
elif isinstance(trigger, str):
|
||||
|
@ -971,9 +1016,12 @@ class ConversableAgent(Agent):
|
|||
elif isinstance(trigger, type):
|
||||
return isinstance(sender, trigger)
|
||||
elif isinstance(trigger, Agent):
|
||||
# return True if the sender is the same type (class) as the trigger
|
||||
return trigger == sender
|
||||
elif isinstance(trigger, Callable):
|
||||
return trigger(sender)
|
||||
rst = trigger(sender)
|
||||
assert rst in [True, False], f"trigger {trigger} must return a boolean value."
|
||||
return rst
|
||||
elif isinstance(trigger, list):
|
||||
return any(self._match_trigger(t, sender) for t in trigger)
|
||||
else:
|
||||
|
@ -1095,7 +1143,7 @@ class ConversableAgent(Agent):
|
|||
result.append(char)
|
||||
return "".join(result)
|
||||
|
||||
def execute_function(self, func_call):
|
||||
def execute_function(self, func_call) -> Tuple[bool, Dict[str, str]]:
|
||||
"""Execute a function call and return the result.
|
||||
|
||||
Override this function to modify the way to execute a function call.
|
||||
|
@ -1195,7 +1243,10 @@ class ConversableAgent(Agent):
|
|||
"""Generate the initial message for the agent.
|
||||
|
||||
Override this function to customize the initial message based on user's request.
|
||||
If not overridden, "message" needs to be provided in the context.
|
||||
If not overriden, "message" needs to be provided in the context.
|
||||
|
||||
Args:
|
||||
**context: any context information, and "message" parameter needs to be provided.
|
||||
"""
|
||||
return context["message"]
|
||||
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
import logging
|
||||
import sys
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Optional, Union
|
||||
import re
|
||||
|
||||
from ..code_utils import content_str
|
||||
from .agent import Agent
|
||||
from .conversable_agent import ConversableAgent
|
||||
|
||||
|
@ -50,6 +52,14 @@ class GroupChat:
|
|||
"""Reset the group chat."""
|
||||
self.messages.clear()
|
||||
|
||||
def append(self, message: Dict):
|
||||
"""Append a message to the group chat.
|
||||
We cast the content to str here so that it can be managed by text-based
|
||||
model.
|
||||
"""
|
||||
message["content"] = content_str(message["content"])
|
||||
self.messages.append(message)
|
||||
|
||||
def agent_by_name(self, name: str) -> Agent:
|
||||
"""Returns the agent with a given name."""
|
||||
return self.agents[self.agent_names.index(name)]
|
||||
|
@ -64,7 +74,7 @@ class GroupChat:
|
|||
if self.agents[(offset + i) % len(self.agents)] in agents:
|
||||
return self.agents[(offset + i) % len(self.agents)]
|
||||
|
||||
def select_speaker_msg(self, agents: List[Agent]):
|
||||
def select_speaker_msg(self, agents: List[Agent]) -> str:
|
||||
"""Return the message for selecting the next speaker."""
|
||||
return f"""You are in a role play game. The following roles are available:
|
||||
{self._participant_roles(agents)}.
|
||||
|
@ -72,7 +82,7 @@ class GroupChat:
|
|||
Read the following conversation.
|
||||
Then select the next role from {[agent.name for agent in agents]} to play. Only return the role."""
|
||||
|
||||
def manual_select_speaker(self, agents: List[Agent]) -> Agent:
|
||||
def manual_select_speaker(self, agents: List[Agent]) -> Union[Agent, None]:
|
||||
"""Manually select the next speaker."""
|
||||
|
||||
print("Please select the next speaker from the following list:")
|
||||
|
@ -190,19 +200,26 @@ Then select the next role from {[agent.name for agent in agents]} to play. Only
|
|||
|
||||
roles = []
|
||||
for agent in agents:
|
||||
if agent.system_message.strip() == "":
|
||||
if content_str(agent.system_message).strip() == "":
|
||||
logger.warning(
|
||||
f"The agent '{agent.name}' has an empty system_message, and may not work well with GroupChat."
|
||||
)
|
||||
roles.append(f"{agent.name}: {agent.system_message}")
|
||||
return "\n".join(roles)
|
||||
|
||||
def _mentioned_agents(self, message_content: str, agents: List[Agent]) -> Dict:
|
||||
"""
|
||||
Finds and counts agent mentions in the string message_content, taking word boundaries into account.
|
||||
def _mentioned_agents(self, message_content: Union[str, List], agents: List[Agent]) -> Dict:
|
||||
"""Counts the number of times each agent is mentioned in the provided message content.
|
||||
|
||||
Returns: A dictionary mapping agent names to mention counts (to be included, at least one mention must occur)
|
||||
Args:
|
||||
message_content (Union[str, List]): The content of the message, either as a single string or a list of strings.
|
||||
agents (List[Agent]): A list of Agent objects, each having a 'name' attribute to be searched in the message content.
|
||||
|
||||
Returns:
|
||||
Dict: a counter for mentioned agents.
|
||||
"""
|
||||
# Cast message content to str
|
||||
message_content = content_str(message_content)
|
||||
|
||||
mentions = dict()
|
||||
for agent in agents:
|
||||
regex = (
|
||||
|
@ -224,7 +241,7 @@ class GroupChatManager(ConversableAgent):
|
|||
# unlimited consecutive auto reply by default
|
||||
max_consecutive_auto_reply: Optional[int] = sys.maxsize,
|
||||
human_input_mode: Optional[str] = "NEVER",
|
||||
system_message: Optional[str] = "Group chat manager.",
|
||||
system_message: Optional[Union[str, List]] = "Group chat manager.",
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(
|
||||
|
@ -256,12 +273,12 @@ class GroupChatManager(ConversableAgent):
|
|||
# set the name to speaker's name if the role is not function
|
||||
if message["role"] != "function":
|
||||
message["name"] = speaker.name
|
||||
groupchat.messages.append(message)
|
||||
|
||||
groupchat.append(message)
|
||||
|
||||
if self._is_termination_msg(message):
|
||||
# The conversation is over
|
||||
break
|
||||
|
||||
# broadcast the message to all agents except the speaker
|
||||
for agent in groupchat.agents:
|
||||
if agent != speaker:
|
||||
|
@ -306,7 +323,8 @@ class GroupChatManager(ConversableAgent):
|
|||
# set the name to speaker's name if the role is not function
|
||||
if message["role"] != "function":
|
||||
message["name"] = speaker.name
|
||||
groupchat.messages.append(message)
|
||||
|
||||
groupchat.append(message)
|
||||
|
||||
if self._is_termination_msg(message):
|
||||
# The conversation is over
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from typing import Callable, Dict, List, Literal, Optional, Union
|
||||
|
||||
from .conversable_agent import ConversableAgent
|
||||
from typing import Callable, Dict, Literal, Optional, Union
|
||||
|
||||
|
||||
class UserProxyAgent(ConversableAgent):
|
||||
|
@ -25,7 +26,7 @@ class UserProxyAgent(ConversableAgent):
|
|||
code_execution_config: Optional[Union[Dict, Literal[False]]] = None,
|
||||
default_auto_reply: Optional[Union[str, Dict, None]] = "",
|
||||
llm_config: Optional[Union[Dict, Literal[False]]] = False,
|
||||
system_message: Optional[str] = "",
|
||||
system_message: Optional[Union[str, List]] = "",
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
|
@ -66,7 +67,7 @@ class UserProxyAgent(ConversableAgent):
|
|||
Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
|
||||
for available options.
|
||||
Default to false, which disables llm-based auto reply.
|
||||
system_message (str): system message for ChatCompletion inference.
|
||||
system_message (str or List): system message for ChatCompletion inference.
|
||||
Only used when llm_config is not False. Use it to reprogram the agent.
|
||||
"""
|
||||
super().__init__(
|
||||
|
|
|
@ -38,16 +38,44 @@ PATH_SEPARATOR = WIN32 and "\\" or "/"
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def content_str(content: Union[str, List]) -> str:
|
||||
if type(content) is str:
|
||||
def content_str(content: Union[str, List, None]) -> str:
|
||||
"""Converts `content` into a string format.
|
||||
|
||||
This function processes content that may be a string, a list of mixed text and image URLs, or None,
|
||||
and converts it into a string. Text is directly appended to the result string, while image URLs are
|
||||
represented by a placeholder image token. If the content is None, an empty string is returned.
|
||||
|
||||
Args:
|
||||
- content (Union[str, List, None]): The content to be processed. Can be a string, a list of dictionaries
|
||||
representing text and image URLs, or None.
|
||||
|
||||
Returns:
|
||||
str: A string representation of the input content. Image URLs are replaced with an image token.
|
||||
|
||||
Note:
|
||||
- The function expects each dictionary in the list to have a "type" key that is either "text" or "image_url".
|
||||
For "text" type, the "text" key's value is appended to the result. For "image_url", an image token is appended.
|
||||
- This function is useful for handling content that may include both text and image references, especially
|
||||
in contexts where images need to be represented as placeholders.
|
||||
"""
|
||||
if content is None:
|
||||
return ""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if not isinstance(content, list):
|
||||
raise TypeError(f"content must be None, str, or list, but got {type(content)}")
|
||||
|
||||
rst = ""
|
||||
for item in content:
|
||||
if not isinstance(item, dict):
|
||||
raise TypeError("Wrong content format: every element should be dict if the content is a list.")
|
||||
assert "type" in item, "Wrong content format. Missing 'type' key in content's dict."
|
||||
if item["type"] == "text":
|
||||
rst += item["text"]
|
||||
else:
|
||||
assert isinstance(item, dict) and item["type"] == "image_url", "Wrong content format."
|
||||
elif item["type"] == "image_url":
|
||||
rst += "<image>"
|
||||
else:
|
||||
raise ValueError(f"Wrong content format: unknown type {item['type']} within the content")
|
||||
return rst
|
||||
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
"import autogen\n",
|
||||
"from autogen import AssistantAgent, Agent, UserProxyAgent, ConversableAgent\n",
|
||||
"\n",
|
||||
"from autogen.img_utils import get_image_data, _to_pil\n",
|
||||
"from autogen.agentchat.contrib.img_utils import get_image_data, _to_pil\n",
|
||||
"from termcolor import colored\n",
|
||||
"import random"
|
||||
]
|
||||
|
|
|
@ -91,38 +91,6 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "57462351",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['openai']"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Remove the `api_type` param as it is not needed for 4V\n",
|
||||
"[config.pop(\"api_type\", None) for config in config_list_4v]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "e23df0dd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# image_agent._oai_messages[user_proxy]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "67157629",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
|
@ -180,7 +148,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"id": "73a2b234",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
|
@ -236,7 +204,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 5,
|
||||
"id": "e8eca993",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -339,7 +307,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 6,
|
||||
"id": "977b9017",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
|
@ -724,7 +692,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 7,
|
||||
"id": "f0a58827",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -736,10 +704,100 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b95bf449",
|
||||
"id": "c6206648",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a95d87c2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Group Chat Example with Multimodal Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "56bd5742",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[33mUser_proxy\u001b[0m (to chat_manager):\n",
|
||||
"\n",
|
||||
"Describe the image:\n",
|
||||
" <img https://th.bing.com/th/id/R.422068ce8af4e15b0634fe2540adea7a?rik=y4OcXBE%2fqutDOw&pid=ImgRaw&r=0>.\n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"\u001b[31m\n",
|
||||
">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
|
||||
"\u001b[33mimage-explainer-1\u001b[0m (to chat_manager):\n",
|
||||
"\n",
|
||||
"In a soft-focus world, a caramel-colored puppy with a coat of curly fur sits serenely, its innocent eyes gazing into the distance. Adorned with a collar that hosts a vibrant, multicolored bandana and a shiny tag engraved with the name \"Webster,\" the pup exudes a sense of youthful curiosity and gentle charm. Behind this bundle of joy, the muted backdrop of a home's interior whispers tales of comfort and domesticity, with a pair of black boots resting by the door, hinting at the comings and goings of human life amidst which this little creature finds its love and belonging.\n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"\u001b[31m\n",
|
||||
">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
|
||||
"\u001b[33mimage-explainer-2\u001b[0m (to chat_manager):\n",
|
||||
"\n",
|
||||
"The image shows a young, caramel-colored puppy with curly fur sitting on the floor. The puppy is wearing a blue collar with a colorful bandana and a tag that appears to have the name \"Webster\" engraved on it. In the background, there are black boots near a white door, suggesting an indoor, home setting. The focus is on the puppy, making the background appear softly blurred. The puppy's expression is gentle, with a hint of curiosity in its eyes.\n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"\u001b[31m\n",
|
||||
">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
|
||||
"\u001b[33mUser_proxy\u001b[0m (to chat_manager):\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"\u001b[31m\n",
|
||||
">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
|
||||
"\u001b[33mUser_proxy\u001b[0m (to chat_manager):\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent1 = MultimodalConversableAgent(\n",
|
||||
" name=\"image-explainer-1\",\n",
|
||||
" max_consecutive_auto_reply=10,\n",
|
||||
" llm_config={\"config_list\": config_list_4v, \"temperature\": 0.5, \"max_tokens\": 300},\n",
|
||||
" system_message=\"Your image description is poetic and engaging.\",\n",
|
||||
")\n",
|
||||
"agent2 = MultimodalConversableAgent(\n",
|
||||
" name=\"image-explainer-2\",\n",
|
||||
" max_consecutive_auto_reply=10,\n",
|
||||
" llm_config={\"config_list\": config_list_4v, \"temperature\": 0.5, \"max_tokens\": 300},\n",
|
||||
" system_message=\"Your image description is factual and to the point.\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"user_proxy = autogen.UserProxyAgent(\n",
|
||||
" name=\"User_proxy\",\n",
|
||||
" system_message=\"Ask both image explainer 1 and 2 for their description.\",\n",
|
||||
" human_input_mode=\"TERMINATE\", # Try between ALWAYS, NEVER, and TERMINATE\n",
|
||||
" max_consecutive_auto_reply=10,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# We set max_round to 5\n",
|
||||
"groupchat = autogen.GroupChat(agents=[agent1, agent2, user_proxy], \n",
|
||||
" messages=[], \n",
|
||||
" max_round=5)\n",
|
||||
"group_chat_manager = autogen.GroupChatManager(groupchat=groupchat, \n",
|
||||
" llm_config=gpt4_llm_config)\n",
|
||||
"\n",
|
||||
"user_proxy.initiate_chat(group_chat_manager,\n",
|
||||
" message=f\"\"\"Describe the image:\n",
|
||||
" <img https://th.bing.com/th/id/R.422068ce8af4e15b0634fe2540adea7a?rik=y4OcXBE%2fqutDOw&pid=ImgRaw&r=0>.\"\"\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
|
@ -10,7 +10,7 @@ import requests
|
|||
try:
|
||||
from PIL import Image
|
||||
|
||||
from autogen.img_utils import extract_img_paths, get_image_data, gpt4v_formatter, llava_formater
|
||||
from autogen.agentchat.contrib.img_utils import extract_img_paths, get_image_data, gpt4v_formatter, llava_formater
|
||||
except ImportError:
|
||||
skip = True
|
||||
else:
|
||||
|
@ -71,7 +71,7 @@ class TestLlavaFormater(unittest.TestCase):
|
|||
result = llava_formater(prompt)
|
||||
self.assertEqual(result, expected_output)
|
||||
|
||||
@patch("autogen.img_utils.get_image_data")
|
||||
@patch("autogen.agentchat.contrib.img_utils.get_image_data")
|
||||
def test_with_images(self, mock_get_image_data):
|
||||
"""
|
||||
Test the llava_formater function with a prompt containing images.
|
||||
|
@ -84,7 +84,7 @@ class TestLlavaFormater(unittest.TestCase):
|
|||
result = llava_formater(prompt)
|
||||
self.assertEqual(result, expected_output)
|
||||
|
||||
@patch("autogen.img_utils.get_image_data")
|
||||
@patch("autogen.agentchat.contrib.img_utils.get_image_data")
|
||||
def test_with_ordered_images(self, mock_get_image_data):
|
||||
"""
|
||||
Test the llava_formater function with ordered image tokens.
|
||||
|
@ -109,7 +109,7 @@ class TestGpt4vFormatter(unittest.TestCase):
|
|||
result = gpt4v_formatter(prompt)
|
||||
self.assertEqual(result, expected_output)
|
||||
|
||||
@patch("autogen.img_utils.get_image_data")
|
||||
@patch("autogen.agentchat.contrib.img_utils.get_image_data")
|
||||
def test_with_images(self, mock_get_image_data):
|
||||
"""
|
||||
Test the gpt4v_formatter function with a prompt containing images.
|
||||
|
@ -126,7 +126,7 @@ class TestGpt4vFormatter(unittest.TestCase):
|
|||
result = gpt4v_formatter(prompt)
|
||||
self.assertEqual(result, expected_output)
|
||||
|
||||
@patch("autogen.img_utils.get_image_data")
|
||||
@patch("autogen.agentchat.contrib.img_utils.get_image_data")
|
||||
def test_multiple_images(self, mock_get_image_data):
|
||||
"""
|
||||
Test the gpt4v_formatter function with a prompt containing multiple images.
|
|
@ -79,5 +79,53 @@ class TestMultimodalConversableAgent(unittest.TestCase):
|
|||
self.agent._print_received_message.assert_called_with(message_str, sender)
|
||||
|
||||
|
||||
@pytest.mark.skipif(skip, reason="Dependency not installed")
|
||||
def test_group_chat_with_lmm():
|
||||
"""
|
||||
Tests the group chat functionality with two MultimodalConversable Agents.
|
||||
Verifies that the chat is correctly limited by the max_round parameter.
|
||||
Each agent is set to describe an image in a unique style, but the chat should not exceed the specified max_rounds.
|
||||
"""
|
||||
|
||||
# Configuration parameters
|
||||
max_round = 5
|
||||
max_consecutive_auto_reply = 10
|
||||
llm_config = False
|
||||
|
||||
# Creating two MultimodalConversable Agents with different descriptive styles
|
||||
agent1 = MultimodalConversableAgent(
|
||||
name="image-explainer-1",
|
||||
max_consecutive_auto_reply=max_consecutive_auto_reply,
|
||||
llm_config=llm_config,
|
||||
system_message="Your image description is poetic and engaging.",
|
||||
)
|
||||
agent2 = MultimodalConversableAgent(
|
||||
name="image-explainer-2",
|
||||
max_consecutive_auto_reply=max_consecutive_auto_reply,
|
||||
llm_config=llm_config,
|
||||
system_message="Your image description is factual and to the point.",
|
||||
)
|
||||
|
||||
# Creating a user proxy agent for initiating the group chat
|
||||
user_proxy = autogen.UserProxyAgent(
|
||||
name="User_proxy",
|
||||
system_message="Ask both image explainer 1 and 2 for their description.",
|
||||
human_input_mode="NEVER", # Options: 'ALWAYS' or 'NEVER'
|
||||
max_consecutive_auto_reply=max_consecutive_auto_reply,
|
||||
)
|
||||
|
||||
# Setting up the group chat
|
||||
groupchat = autogen.GroupChat(agents=[agent1, agent2, user_proxy], messages=[], max_round=max_round)
|
||||
group_chat_manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)
|
||||
|
||||
# Initiating the group chat and observing the number of rounds
|
||||
user_proxy.initiate_chat(group_chat_manager, message=f"What do you see? <img {base64_encoded_image}>")
|
||||
|
||||
# Assertions to check if the number of rounds does not exceed max_round
|
||||
assert all(len(arr) <= max_round for arr in agent1._oai_messages.values()), "Agent 1 exceeded max rounds"
|
||||
assert all(len(arr) <= max_round for arr in agent2._oai_messages.values()), "Agent 2 exceeded max rounds"
|
||||
assert all(len(arr) <= max_round for arr in user_proxy._oai_messages.values()), "User proxy exceeded max rounds"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
@ -403,7 +403,7 @@ class TestContentStr(unittest.TestCase):
|
|||
|
||||
def test_invalid_content(self):
|
||||
content = [{"type": "text", "text": "hello"}, {"type": "wrong_type", "url": "http://example.com/image.png"}]
|
||||
with self.assertRaises(AssertionError) as context:
|
||||
with self.assertRaises(ValueError) as context:
|
||||
content_str(content)
|
||||
self.assertIn("Wrong content format", str(context.exception))
|
||||
|
||||
|
|
Loading…
Reference in New Issue