Removes Support For `TransformChatHistory` and `CompressibleAgent` (#3313)

* remove old files

* removes ci

* removes faq

---------

Co-authored-by: Li Jiang <bnujli@gmail.com>
This commit is contained in:
Wael Karkoub 2024-08-12 22:28:02 -05:00 committed by GitHub
parent afdaa4c7cb
commit 6682b6d2d0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 2 additions and 2757 deletions

View File

@ -111,46 +111,7 @@ jobs:
with:
file: ./coverage.xml
flags: unittests
CompressionTest:
strategy:
matrix:
os: [ubuntu-latest]
python-version: ["3.9"]
runs-on: ${{ matrix.os }}
environment: openai1
steps:
# checkout to pr branch
- name: Checkout
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install packages and dependencies
run: |
docker --version
python -m pip install --upgrade pip wheel
pip install -e .
python -c "import autogen"
pip install pytest-cov>=5 pytest-asyncio
- name: Install packages for test when needed
run: |
pip install docker
- name: Coverage
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
run: |
pytest test/agentchat/contrib/test_compressible_agent.py
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
flags: unittests
GPTAssistantAgent:
strategy:
matrix:
@ -306,44 +267,7 @@ jobs:
with:
file: ./coverage.xml
flags: unittests
ContextHandling:
strategy:
matrix:
os: [ubuntu-latest]
python-version: ["3.11"]
runs-on: ${{ matrix.os }}
environment: openai1
steps:
# checkout to pr branch
- name: Checkout
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install packages and dependencies
run: |
docker --version
python -m pip install --upgrade pip wheel
pip install -e .
python -c "import autogen"
pip install pytest-cov>=5
- name: Coverage
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
BING_API_KEY: ${{ secrets.BING_API_KEY }}
run: |
pytest test/agentchat/contrib/capabilities/test_context_handling.py
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
flags: unittests
ImageGen:
strategy:
matrix:

View File

@ -163,41 +163,6 @@ jobs:
file: ./coverage.xml
flags: unittests
CompressionTest:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-2019]
python-version: ["3.10"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install packages and dependencies for all tests
run: |
python -m pip install --upgrade pip wheel
pip install pytest-cov>=5
- name: Install packages and dependencies for Compression
run: |
pip install -e .
- name: Set AUTOGEN_USE_DOCKER based on OS
shell: bash
run: |
if [[ ${{ matrix.os }} != ubuntu-latest ]]; then
echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV
fi
- name: Coverage
run: |
pytest test/agentchat/contrib/test_compressible_agent.py --skip-openai
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
flags: unittests
GPTAssistantAgent:
runs-on: ${{ matrix.os }}
strategy:
@ -384,41 +349,6 @@ jobs:
file: ./coverage.xml
flags: unittests
ContextHandling:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-2019]
python-version: ["3.11"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install packages and dependencies for all tests
run: |
python -m pip install --upgrade pip wheel
pip install pytest-cov>=5
- name: Install packages and dependencies for Context Handling
run: |
pip install -e .
- name: Set AUTOGEN_USE_DOCKER based on OS
shell: bash
run: |
if [[ ${{ matrix.os }} != ubuntu-latest ]]; then
echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV
fi
- name: Coverage
run: |
pytest test/agentchat/contrib/capabilities/test_context_handling.py --skip-openai
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
flags: unittests
TransformMessages:
runs-on: ${{ matrix.os }}
strategy:
@ -485,7 +415,6 @@ jobs:
file: ./coverage.xml
flags: unittests
AnthropicTest:
runs-on: ${{ matrix.os }}
strategy:

View File

@ -1,138 +0,0 @@
import sys
from typing import Dict, List, Optional
from warnings import warn
import tiktoken
from termcolor import colored
from autogen import ConversableAgent, token_count_utils
warn(
"Context handling with TransformChatHistory is deprecated and will be removed in `0.2.30`. "
"Please use `TransformMessages`, documentation can be found at https://microsoft.github.io/autogen/docs/topics/handling_long_contexts/intro_to_transform_messages",
DeprecationWarning,
stacklevel=2,
)
class TransformChatHistory:
"""
An agent's chat history with other agents is a common context that it uses to generate a reply.
This capability allows the agent to transform its chat history prior to using it to generate a reply.
It does not permanently modify the chat history, but rather processes it on every invocation.
This capability class enables various strategies to transform chat history, such as:
- Truncate messages: Truncate each message to first maximum number of tokens.
- Limit number of messages: Truncate the chat history to a maximum number of (recent) messages.
- Limit number of tokens: Truncate the chat history to number of recent N messages that fit in
maximum number of tokens.
Note that the system message, because of its special significance, is always kept as is.
The three strategies can be combined. For example, when each of these parameters are specified
they are used in the following order:
1. First truncate messages to a maximum number of tokens
2. Second, it limits the number of message to keep
3. Third, it limits the total number of tokens in the chat history
When adding this capability to an agent, the following are modified:
- A hook is added to the hookable method `process_all_messages_before_reply` to transform the
received messages for possible truncation.
Not modifying the stored message history.
"""
def __init__(
self,
*,
max_tokens_per_message: Optional[int] = None,
max_messages: Optional[int] = None,
max_tokens: Optional[int] = None,
):
"""
Args:
max_tokens_per_message (Optional[int]): Maximum number of tokens to keep in each message.
max_messages (Optional[int]): Maximum number of messages to keep in the context.
max_tokens (Optional[int]): Maximum number of tokens to keep in the context.
"""
self.max_tokens_per_message = max_tokens_per_message if max_tokens_per_message else sys.maxsize
self.max_messages = max_messages if max_messages else sys.maxsize
self.max_tokens = max_tokens if max_tokens else sys.maxsize
def add_to_agent(self, agent: ConversableAgent):
"""
Adds TransformChatHistory capability to the given agent.
"""
agent.register_hook(hookable_method="process_all_messages_before_reply", hook=self._transform_messages)
def _transform_messages(self, messages: List[Dict]) -> List[Dict]:
"""
Args:
messages: List of messages to process.
Returns:
List of messages with the first system message and the last max_messages messages,
ensuring each message does not exceed max_tokens_per_message.
"""
temp_messages = messages.copy()
processed_messages = []
system_message = None
processed_messages_tokens = 0
if messages[0]["role"] == "system":
system_message = messages[0].copy()
temp_messages.pop(0)
total_tokens = sum(
token_count_utils.count_token(msg["content"]) for msg in temp_messages
) # Calculate tokens for all messages
# Truncate each message's content to a maximum token limit of each message
# Process recent messages first
for msg in reversed(temp_messages[-self.max_messages :]):
msg["content"] = truncate_str_to_tokens(msg["content"], self.max_tokens_per_message)
msg_tokens = token_count_utils.count_token(msg["content"])
if processed_messages_tokens + msg_tokens > self.max_tokens:
break
# append the message to the beginning of the list to preserve order
processed_messages = [msg] + processed_messages
processed_messages_tokens += msg_tokens
if system_message:
processed_messages.insert(0, system_message)
# Optionally, log the number of truncated messages and tokens if needed
num_truncated = len(messages) - len(processed_messages)
if num_truncated > 0 or total_tokens > processed_messages_tokens:
print(
colored(
f"Truncated {num_truncated} messages. Reduced from {len(messages)} to {len(processed_messages)}.",
"yellow",
)
)
print(
colored(
f"Truncated {total_tokens - processed_messages_tokens} tokens. Tokens reduced from {total_tokens} to {processed_messages_tokens}",
"yellow",
)
)
return processed_messages
def truncate_str_to_tokens(text: str, max_tokens: int, model: str = "gpt-3.5-turbo-0613") -> str:
"""Truncate a string so that the number of tokens is less than or equal to max_tokens using tiktoken.
Args:
text: The string to truncate.
max_tokens: The maximum number of tokens to keep.
model: The target OpenAI model for tokenization alignment.
Returns:
The truncated string.
"""
encoding = tiktoken.encoding_for_model(model) # Get the appropriate tokenizer
encoded_tokens = encoding.encode(text)
truncated_tokens = encoded_tokens[:max_tokens]
truncated_text = encoding.decode(truncated_tokens) # Decode back to text
return truncated_text

View File

@ -1,436 +0,0 @@
import copy
import inspect
import logging
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
from warnings import warn
from autogen import Agent, ConversableAgent, OpenAIWrapper
from autogen.token_count_utils import count_token, get_max_token_limit, num_tokens_from_functions
from ...formatting_utils import colored
logger = logging.getLogger(__name__)
warn(
"Context handling with CompressibleAgent is deprecated and will be removed in `0.2.30`. "
"Please use `TransformMessages`, documentation can be found at https://microsoft.github.io/autogen/docs/topics/handling_long_contexts/intro_to_transform_messages",
DeprecationWarning,
stacklevel=2,
)
class CompressibleAgent(ConversableAgent):
"""CompressibleAgent agent. While this agent retains all the default functionalities of the `AssistantAgent`,
it also provides the added feature of compression when activated through the `compress_config` setting.
`compress_config` is set to False by default, making this agent equivalent to the `AssistantAgent`.
This agent does not work well in a GroupChat: The compressed messages will not be sent to all the agents in the group.
The default system message is the same as AssistantAgent.
`human_input_mode` is default to "NEVER"
and `code_execution_config` is default to False.
This agent doesn't execute code or function call by default.
"""
DEFAULT_SYSTEM_MESSAGE = """You are a helpful AI assistant.
Solve tasks using your coding and language skills.
In the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute.
1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time, check the operating system. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself.
2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly.
Solve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill.
When using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user.
If you want the user to save the code in a file before executing it, put # filename: <filename> inside the code block as the first line. Don't include multiple code blocks in one response. Do not ask users to copy and paste the result. Instead, use 'print' function for the output when relevant. Check the execution result returned by the user.
If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.
When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible.
Reply "TERMINATE" in the end when everything is done.
"""
DEFAULT_COMPRESS_CONFIG = {
"mode": "TERMINATE",
"compress_function": None,
"trigger_count": 0.7,
"async": False,
"broadcast": True,
"verbose": False,
"leave_last_n": 2,
}
def __init__(
self,
name: str,
system_message: Optional[str] = DEFAULT_SYSTEM_MESSAGE,
is_termination_msg: Optional[Callable[[Dict], bool]] = None,
max_consecutive_auto_reply: Optional[int] = None,
human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER",
function_map: Optional[Dict[str, Callable]] = None,
code_execution_config: Optional[Union[Dict, bool]] = False,
llm_config: Optional[Union[Dict, bool]] = None,
default_auto_reply: Optional[Union[str, Dict, None]] = "",
compress_config: Optional[Dict] = False,
description: Optional[str] = None,
**kwargs,
):
"""
Args:
name (str): agent name.
system_message (str): system message for the ChatCompletion inference.
Please override this attribute if you want to reprogram the agent.
llm_config (dict): llm inference configuration.
Note: you must set `model` in llm_config. It will be used to compute the token count.
Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
for available options.
is_termination_msg (function): a function that takes a message in the form of a dictionary
and returns a boolean value indicating if this received message is a termination message.
The dict can contain the following keys: "content", "role", "name", "function_call".
max_consecutive_auto_reply (int): the maximum number of consecutive auto replies.
default to None (no limit provided, class attribute MAX_CONSECUTIVE_AUTO_REPLY will be used as the limit in this case).
The limit only plays a role when human_input_mode is not "ALWAYS".
compress_config (dict or True/False): config for compression before oai_reply. Default to False.
You should contain the following keys:
- "mode" (Optional, str, default to "TERMINATE"): Choose from ["COMPRESS", "TERMINATE", "CUSTOMIZED"].
1. `TERMINATE`: terminate the conversation ONLY when token count exceeds the max limit of current model. `trigger_count` is NOT used in this mode.
2. `COMPRESS`: compress the messages when the token count exceeds the limit.
3. `CUSTOMIZED`: pass in a customized function to compress the messages.
- "compress_function" (Optional, callable, default to None): Must be provided when mode is "CUSTOMIZED".
The function should takes a list of messages and returns a tuple of (is_compress_success: bool, compressed_messages: List[Dict]).
- "trigger_count" (Optional, float, int, default to 0.7): the threshold to trigger compression.
If a float between (0, 1], it is the percentage of token used. if a int, it is the number of tokens used.
- "async" (Optional, bool, default to False): whether to compress asynchronously.
- "broadcast" (Optional, bool, default to True): whether to update the compressed message history to sender.
- "verbose" (Optional, bool, default to False): Whether to print the content before and after compression. Used when mode="COMPRESS".
- "leave_last_n" (Optional, int, default to 0): If provided, the last n messages will not be compressed. Used when mode="COMPRESS".
description (str): a short description of the agent. This description is used by other agents
(e.g. the GroupChatManager) to decide when to call upon this agent. (Default: system_message)
**kwargs (dict): Please refer to other kwargs in
[ConversableAgent](../conversable_agent#__init__).
"""
super().__init__(
name=name,
system_message=system_message,
is_termination_msg=is_termination_msg,
max_consecutive_auto_reply=max_consecutive_auto_reply,
human_input_mode=human_input_mode,
function_map=function_map,
code_execution_config=code_execution_config,
llm_config=llm_config,
default_auto_reply=default_auto_reply,
description=description,
**kwargs,
)
self._set_compress_config(compress_config)
# create a separate client for compression.
if llm_config is False:
self.llm_compress_config = False
self.compress_client = None
else:
if "model" not in llm_config:
raise ValueError("llm_config must contain the 'model' field.")
self.llm_compress_config = self.llm_config.copy()
# remove functions
if "functions" in self.llm_compress_config:
del self.llm_compress_config["functions"]
self.compress_client = OpenAIWrapper(**self.llm_compress_config)
self._reply_func_list.clear()
self.register_reply([Agent, None], ConversableAgent.generate_oai_reply)
self.register_reply([Agent], CompressibleAgent.on_oai_token_limit) # check token limit
self.register_reply([Agent, None], ConversableAgent.generate_code_execution_reply)
self.register_reply([Agent, None], ConversableAgent.generate_function_call_reply)
self.register_reply([Agent, None], ConversableAgent.check_termination_and_human_reply)
def _set_compress_config(self, compress_config: Optional[Dict] = False):
if compress_config:
if compress_config is True:
compress_config = {}
if not isinstance(compress_config, dict):
raise ValueError("compress_config must be a dict or True/False.")
allowed_modes = ["COMPRESS", "TERMINATE", "CUSTOMIZED"]
if compress_config.get("mode", "TERMINATE") not in allowed_modes:
raise ValueError(f"Invalid compression mode. Allowed values are: {', '.join(allowed_modes)}")
self.compress_config = self.DEFAULT_COMPRESS_CONFIG.copy()
self.compress_config.update(compress_config)
if not isinstance(self.compress_config["leave_last_n"], int) or self.compress_config["leave_last_n"] < 0:
raise ValueError("leave_last_n must be a non-negative integer.")
# convert trigger_count to int, default to 0.7
trigger_count = self.compress_config["trigger_count"]
if not (isinstance(trigger_count, int) or isinstance(trigger_count, float)) or trigger_count <= 0:
raise ValueError("trigger_count must be a positive number.")
if isinstance(trigger_count, float) and 0 < trigger_count <= 1:
self.compress_config["trigger_count"] = int(
trigger_count * get_max_token_limit(self.llm_config["model"])
)
trigger_count = self.compress_config["trigger_count"]
init_count = self._compute_init_token_count()
if trigger_count < init_count:
print(
f"Warning: trigger_count {trigger_count} is less than the initial token count {init_count} (system message + function description if passed), compression will be disabled. Please increase trigger_count if you want to enable compression."
)
self.compress_config = False
if self.compress_config["mode"] == "CUSTOMIZED" and self.compress_config["compress_function"] is None:
raise ValueError("compress_function must be provided when mode is CUSTOMIZED.")
if self.compress_config["mode"] != "CUSTOMIZED" and self.compress_config["compress_function"] is not None:
print("Warning: compress_function is provided but mode is not 'CUSTOMIZED'.")
else:
self.compress_config = False
def generate_reply(
self,
messages: Optional[List[Dict]] = None,
sender: Optional[Agent] = None,
exclude: Optional[List[Callable]] = None,
) -> Union[str, Dict, None]:
"""
Adding to line 202:
```
if messages is not None and messages != self._oai_messages[sender]:
messages = self._oai_messages[sender]
```
"""
if all((messages is None, sender is None)):
error_msg = f"Either {messages=} or {sender=} must be provided."
logger.error(error_msg)
raise AssertionError(error_msg)
if messages is None:
messages = self._oai_messages[sender]
for reply_func_tuple in self._reply_func_list:
reply_func = reply_func_tuple["reply_func"]
if exclude and reply_func in exclude:
continue
if inspect.iscoroutinefunction(reply_func):
continue
if self._match_trigger(reply_func_tuple["trigger"], sender):
final, reply = reply_func(self, messages=messages, sender=sender, config=reply_func_tuple["config"])
if messages is not None and sender is not None and messages != self._oai_messages[sender]:
messages = self._oai_messages[sender]
if final:
return reply
return self._default_auto_reply
def _compute_init_token_count(self):
"""Check if the agent is LLM-based and compute the initial token count."""
if self.llm_config is False:
return 0
func_count = 0
if "functions" in self.llm_config:
func_count = num_tokens_from_functions(self.llm_config["functions"], self.llm_config["model"])
return func_count + count_token(self._oai_system_message, self.llm_config["model"])
def _manage_history_on_token_limit(self, messages, token_used, max_token_allowed, model):
"""Manage the message history with different modes when token limit is reached.
Return:
final (bool): whether to terminate the agent.
compressed_messages (List[Dict]): the compressed messages. None if no compression or compression failed.
"""
# 1. mode = "TERMINATE", terminate the agent if no token left.
if self.compress_config["mode"] == "TERMINATE":
if max_token_allowed - token_used <= 0:
# Terminate if no token left.
print(
colored(
f'Warning: Terminate Agent "{self.name}" due to no token left for oai reply. max token for {model}: {max_token_allowed}, existing token count: {token_used}',
"yellow",
),
flush=True,
)
return True, None
return False, None
# if token_used is less than trigger_count, no compression will be used.
if token_used < self.compress_config["trigger_count"]:
return False, None
# 2. mode = "COMPRESS" or mode = "CUSTOMIZED", compress the messages
copied_messages = copy.deepcopy(messages)
if self.compress_config["mode"] == "COMPRESS":
_, compress_messages = self.compress_messages(copied_messages)
elif self.compress_config["mode"] == "CUSTOMIZED":
_, compress_messages = self.compress_config["compress_function"](copied_messages)
else:
raise ValueError(f"Unknown compression mode: {self.compress_config['mode']}")
if compress_messages is not None:
for i in range(len(compress_messages)):
compress_messages[i] = self._get_valid_oai_message(compress_messages[i])
return False, compress_messages
def _get_valid_oai_message(self, message):
"""Convert a message into a valid OpenAI ChatCompletion message."""
oai_message = {k: message[k] for k in ("content", "function_call", "name", "context", "role") if k in message}
if "content" not in oai_message:
if "function_call" in oai_message:
oai_message["content"] = None # if only function_call is provided, content will be set to None.
else:
raise ValueError(
"Message can't be converted into a valid ChatCompletion message. Either content or function_call must be provided."
)
if "function_call" in oai_message:
oai_message["role"] = "assistant" # only messages with role 'assistant' can have a function call.
oai_message["function_call"] = dict(oai_message["function_call"])
return oai_message
def _print_compress_info(self, init_token_count, token_used, token_after_compression):
to_print = "Token Count (including {} tokens from system msg and function descriptions). Before compression : {} | After: {}".format(
init_token_count,
token_used,
token_after_compression,
)
print(colored(to_print, "magenta"), flush=True)
print("-" * 80, flush=True)
def on_oai_token_limit(
self,
messages: Optional[List[Dict]] = None,
sender: Optional[Agent] = None,
config: Optional[Any] = None,
) -> Tuple[bool, Union[str, Dict, None]]:
"""(Experimental) Compress previous messages when a threshold of tokens is reached.
TODO: async compress
TODO: maintain a list for old oai messages (messages before compression)
"""
llm_config = self.llm_config if config is None else config
if self.compress_config is False:
return False, None
if messages is None:
messages = self._oai_messages[sender]
model = llm_config["model"]
init_token_count = self._compute_init_token_count()
token_used = init_token_count + count_token(messages, model)
final, compressed_messages = self._manage_history_on_token_limit(
messages, token_used, get_max_token_limit(model), model
)
# update message history with compressed messages
if compressed_messages is not None:
self._print_compress_info(
init_token_count, token_used, count_token(compressed_messages, model) + init_token_count
)
self._oai_messages[sender] = compressed_messages
if self.compress_config["broadcast"]:
# update the compressed message history to sender
sender._oai_messages[self] = copy.deepcopy(compressed_messages)
# switching the role of the messages for the sender
for i in range(len(sender._oai_messages[self])):
cmsg = sender._oai_messages[self][i]
if "function_call" in cmsg or cmsg["role"] == "user":
cmsg["role"] = "assistant"
elif cmsg["role"] == "assistant":
cmsg["role"] = "user"
sender._oai_messages[self][i] = cmsg
# successfully compressed, return False, None for generate_oai_reply to be called with the updated messages
return False, None
return final, None
def compress_messages(
self,
messages: Optional[List[Dict]] = None,
config: Optional[Any] = None,
) -> Tuple[bool, Union[str, Dict, None, List]]:
"""Compress a list of messages into one message.
The first message (the initial prompt) will not be compressed.
The rest of the messages will be compressed into one message, the model is asked to distinguish the role of each message: USER, ASSISTANT, FUNCTION_CALL, FUNCTION_RETURN.
Check out the compress_sys_msg.
TODO: model used in compression agent is different from assistant agent: For example, if original model used by is gpt-4; we start compressing at 70% of usage, 70% of 8092 = 5664; and we use gpt 3.5 here max_toke = 4096, it will raise error. choosinng model automatically?
"""
# 1. use the compression client
client = self.compress_client if config is None else config
# 2. stop if there is only one message in the list
leave_last_n = self.compress_config.get("leave_last_n", 0)
if leave_last_n + 1 >= len(messages):
logger.warning(
f"Warning: Compression skipped at trigger count threshold. The first msg and last {leave_last_n} msgs will not be compressed. current msg count: {len(messages)}. Consider raising trigger_count."
)
return False, None
# 3. put all history into one, except the first one
if self.compress_config["verbose"]:
print(colored("*" * 30 + "Start compressing the following content:" + "*" * 30, "magenta"), flush=True)
compressed_prompt = "Below is the compressed content from the previous conversation, evaluate the process and continue if necessary:\n"
chat_to_compress = "To be compressed:\n"
for m in messages[1 : len(messages) - leave_last_n]: # 0, 1, 2, 3, 4
# Handle function role
if m.get("role") == "function":
chat_to_compress += f"##FUNCTION_RETURN## (from function \"{m['name']}\"): \n{m['content']}\n"
# If name exists in the message
elif "name" in m:
chat_to_compress += f"##{m['name']}({m['role'].upper()})## {m['content']}\n"
# Handle case where content is not None and name is absent
elif m.get("content"): # This condition will also handle None and empty string
if compressed_prompt in m["content"]:
chat_to_compress += m["content"].replace(compressed_prompt, "") + "\n"
else:
chat_to_compress += f"##{m['role'].upper()}## {m['content']}\n"
# Handle function_call in the message
if "function_call" in m:
function_name = m["function_call"].get("name")
function_args = m["function_call"].get("arguments")
if not function_name or not function_args:
chat_to_compress += f"##FUNCTION_CALL## {m['function_call']}\n"
else:
chat_to_compress += f"##FUNCTION_CALL## \nName: {function_name}\nArgs: {function_args}\n"
chat_to_compress = [{"role": "user", "content": chat_to_compress}]
if self.compress_config["verbose"]:
print(chat_to_compress[0]["content"])
# 4. use LLM to compress
compress_sys_msg = """You are a helpful assistant that will summarize and compress conversation history.
Rules:
1. Please summarize each of the message and reserve the exact titles: ##USER##, ##ASSISTANT##, ##FUNCTION_CALL##, ##FUNCTION_RETURN##, ##SYSTEM##, ##<Name>(<Title>)## (e.g. ##Bob(ASSISTANT)##).
2. Try to compress the content but reserve important information (a link, a specific number, etc.).
3. Use words to summarize the code blocks or functions calls (##FUNCTION_CALL##) and their goals. For code blocks, please use ##CODE## to mark it.
4. For returns from functions (##FUNCTION_RETURN##) or returns from code execution: summarize the content and indicate the status of the return (e.g. success, error, etc.).
"""
try:
response = client.create(
context=None,
messages=[{"role": "system", "content": compress_sys_msg}] + chat_to_compress,
)
except Exception as e:
print(colored(f"Failed to compress the content due to {e}", "red"), flush=True)
return False, None
compressed_message = self.client.extract_text_or_completion_object(response)[0]
assert isinstance(compressed_message, str), f"compressed_message should be a string: {compressed_message}"
if self.compress_config["verbose"]:
print(
colored("*" * 30 + "Content after compressing:" + "*" * 30, "magenta"),
flush=True,
)
print(compressed_message, colored("\n" + "*" * 80, "magenta"))
# 5. add compressed message to the first message and return
return (
True,
[
messages[0],
{
"content": compressed_prompt + compressed_message,
"role": "system",
},
]
+ messages[len(messages) - leave_last_n :],
)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,231 +0,0 @@
#!/usr/bin/env python3 -m pytest
import os
import sys
import pytest
import autogen
from autogen import AssistantAgent, UserProxyAgent, token_count_utils
from autogen.agentchat.contrib.capabilities.context_handling import TransformChatHistory
# from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
sys.path.append(os.path.join(os.path.dirname(__file__), "../../.."))
from conftest import skip_openai # noqa: E402
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
try:
from openai import OpenAI
except ImportError:
skip = True
else:
skip = False or skip_openai
def test_transform_chat_history():
"""
Test the TransformChatHistory capability.
In particular, test the following methods:
- _transform_messages
- truncate_string_to_tokens
"""
messages = [
{"role": "system", "content": "System message"},
{"role": "user", "content": "Hi"},
{"role": "assistant", "content": "This is another test message"},
]
# check whether num of messages is less than max_messages
transform_chat_history = TransformChatHistory(max_messages=1)
transformed_messages = transform_chat_history._transform_messages(messages)
assert len(transformed_messages) == 2 # System message and the last message
# check whether num of tokens per message are is less than max_tokens
transform_chat_history = TransformChatHistory(max_tokens_per_message=5)
transformed_messages = transform_chat_history._transform_messages(messages)
for message in transformed_messages:
if message["role"] == "system":
continue
else:
assert token_count_utils.count_token(message["content"]) <= 5
transform_chat_history = TransformChatHistory(max_tokens=5)
transformed_messages = transform_chat_history._transform_messages(messages)
token_count = 0
for message in transformed_messages:
if message["role"] == "system":
continue
token_count += token_count_utils.count_token(message["content"])
assert token_count <= 5
@pytest.mark.skipif(skip, reason="openai not installed OR requested to skip")
def test_transform_chat_history_with_agents():
"""
This test create a GPT 3.5 agent with this capability and test the add_to_agent method.
Including whether it prevents a crash when chat histories become excessively long.
"""
config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
KEY_LOC,
filter_dict={"tags": ["gpt-3.5-turbo"]},
)
assistant = AssistantAgent("assistant", llm_config={"config_list": config_list}, max_consecutive_auto_reply=1)
context_handling = TransformChatHistory(max_messages=10, max_tokens_per_message=5, max_tokens=1000)
context_handling.add_to_agent(assistant)
user = UserProxyAgent(
"user",
code_execution_config={"work_dir": "coding"},
human_input_mode="NEVER",
is_termination_msg=lambda x: "TERMINATE" in x.get("content", ""),
max_consecutive_auto_reply=1,
)
# Create a very long chat history that is bound to cause a crash
# for gpt 3.5
for i in range(1000):
assitant_msg = {"role": "assistant", "content": "test " * 1000}
user_msg = {"role": "user", "content": ""}
assistant.send(assitant_msg, user, request_reply=False)
user.send(user_msg, assistant, request_reply=False)
try:
user.initiate_chat(
assistant, message="Plot a chart of nvidia and tesla stock prices for the last 5 years", clear_history=False
)
except Exception as e:
assert False, f"Chat initiation failed with error {str(e)}"
def test_transform_messages():
"""
Test transform_messages_retain_order()
"""
# Test case 1: Test that the order of messages is retained after transformation and Test that the messages are properly truncating.
messages = [
{"role": "system", "content": "System message"},
{"role": "user", "content": "Hi"},
{"role": "user", "content": "user sending the 2nd test message"},
{"role": "assistant", "content": "assistant sending the 3rd test message"},
{"role": "assistant", "content": "assistant sending the 4th test message"},
]
transform_chat_history = TransformChatHistory(max_messages=3, max_tokens_per_message=10, max_tokens=100)
transformed_messages = transform_chat_history._transform_messages(messages)
assert transformed_messages[0]["role"] == "system"
assert transformed_messages[0]["content"] == "System message"
assert transformed_messages[1]["role"] == "user"
assert transformed_messages[1]["content"] == "user sending the 2nd test message"
assert transformed_messages[2]["role"] == "assistant"
assert transformed_messages[2]["content"] == "assistant sending the 3rd test message"
assert transformed_messages[3]["role"] == "assistant"
assert transformed_messages[3]["content"] == "assistant sending the 4th test message"
# Test case 2: Test when no system message
messages = [
{"role": "user", "content": "Hi"},
{"role": "user", "content": "user sending the 2nd test message"},
{"role": "assistant", "content": "assistant sending the 3rd test message"},
{"role": "assistant", "content": "assistant sending the 4th test message"},
]
transform_chat_history = TransformChatHistory(max_messages=3, max_tokens_per_message=10, max_tokens=100)
transformed_messages = transform_chat_history._transform_messages(messages)
assert transformed_messages[0]["role"] == "user"
assert transformed_messages[0]["content"] == "user sending the 2nd test message"
assert transformed_messages[1]["role"] == "assistant"
assert transformed_messages[1]["content"] == "assistant sending the 3rd test message"
assert transformed_messages[2]["role"] == "assistant"
assert transformed_messages[2]["content"] == "assistant sending the 4th test message"
messages = [
{"role": "user", "content": "Out of max messages"},
{"role": "assistant", "content": "first second third fourth"},
{"role": "user", "content": "a"},
]
print(f"----Messages (N={len(messages)})----")
orignal_tokens = 0
for i, msg in enumerate(messages):
print(f"[{msg['role']}-{i}]: {msg['content']}")
tokens = token_count_utils.count_token(msg["content"])
print("Number of tokens: ", tokens)
orignal_tokens += tokens
print("-----Total tokens: ", orignal_tokens, "-----")
allowed_max_tokens = 2
transform_chat_history = TransformChatHistory(max_messages=2, max_tokens=allowed_max_tokens)
transformed_messages = transform_chat_history._transform_messages(messages)
print("Max allowed tokens: ", allowed_max_tokens)
print("Transformed contents")
for msg in transformed_messages:
print(msg["content"])
print("Number of tokens: ", token_count_utils.count_token(msg["content"]))
assert len(transformed_messages) == 1
assert transformed_messages[0]["role"] == "user"
def test_truncate_str_to_tokens():
"""
Test the truncate_str_to_tokens function.
"""
from autogen.agentchat.contrib.capabilities.context_handling import truncate_str_to_tokens
# Test case 1: Truncate string with fewer tokens than max_tokens
text = "This is a test"
max_tokens = 5
truncated_text = truncate_str_to_tokens(text, max_tokens)
assert truncated_text == text
# Test case 2: Truncate string with more tokens than max_tokens
text = "This is a test"
max_tokens = 3
truncated_text = truncate_str_to_tokens(text, max_tokens)
assert truncated_text == "This is a"
# Test case 3: Truncate empty string
text = ""
max_tokens = 5
truncated_text = truncate_str_to_tokens(text, max_tokens)
assert truncated_text == ""
# Test case 4: Truncate string with exact number of tokens as max_tokens
text = "This is a test"
max_tokens = 4
truncated_text = truncate_str_to_tokens(text, max_tokens)
assert truncated_text == "This is a test"
# Test case 5: Truncate string with no tokens found
text = "This is a test"
max_tokens = 0
truncated_text = truncate_str_to_tokens(text, max_tokens)
assert truncated_text == ""
# Test case 6: Truncate string when actual tokens are more than max_tokens
text = "This is a test with a looooooonngggg word"
max_tokens = 8
truncated_text = truncate_str_to_tokens(text, max_tokens)
word_count = len(truncated_text.split())
assert word_count <= max_tokens
# Test case 7: Truncate string with exact number of tokens as max_tokens
text = "This\nis\na test"
max_tokens = 4
truncated_text = truncate_str_to_tokens(text, max_tokens)
assert "This\nis" in truncated_text
if __name__ == "__main__":
test_transform_chat_history()
test_transform_chat_history_with_agents()
test_truncate_str_to_tokens()
test_transform_messages()

View File

@ -1,230 +0,0 @@
#!/usr/bin/env python3 -m pytest
import os
import sys
import pytest
import autogen
from autogen.agentchat.contrib.compressible_agent import CompressibleAgent
sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
from conftest import skip_openai # noqa: E402
here = os.path.abspath(os.path.dirname(__file__))
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
try:
import openai
except ImportError:
skip = True
else:
skip = False or skip_openai
if not skip:
config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={
"model": ["gpt-3.5-turbo", "gpt-35-turbo", "gpt-3.5-turbo-16k", "gpt-35-turbo-16k"],
},
)
@pytest.mark.skipif(
sys.platform in ["darwin", "win32"] or skip,
reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
)
def test_mode_compress():
conversations = {}
assistant = CompressibleAgent(
name="assistant",
llm_config={
"timeout": 600,
"cache_seed": 43,
"config_list": config_list,
"model": "gpt-3.5-turbo",
},
compress_config={
"mode": "COMPRESS",
"trigger_count": 600,
"verbose": True,
},
)
user_proxy = autogen.UserProxyAgent(
name="user_proxy",
human_input_mode="NEVER",
max_consecutive_auto_reply=5,
is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE")
or x.get("content", "").rstrip().endswith("TERMINATE."),
code_execution_config={"work_dir": here},
)
user_proxy.initiate_chat(
assistant,
message="Find all $x$ that satisfy the inequality $(2x+10)(x+3)<(3x+9)(x+8)$. Express your answer in interval notation.",
)
assistant.reset()
print(conversations)
@pytest.mark.skipif(
sys.platform in ["darwin", "win32"] or skip,
reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
)
def test_mode_customized():
try:
assistant = CompressibleAgent(
name="assistant",
llm_config={
"timeout": 600,
"cache_seed": 43,
"config_list": config_list,
"model": "gpt-3.5-turbo",
},
compress_config={
"mode": "CUSTOMIZED",
},
)
except ValueError:
print("ValueError raised as expected.")
def constrain_num_messages(messages):
"""Constrain the number of messages to 3.
This is an example of a customized compression function.
Returns:
bool: whether the compression is successful.
list: the compressed messages.
"""
if len(messages) <= 3:
# do nothing
return False, None
# save the first and last two messages
return True, messages[:1] + messages[-2:]
# create a CompressibleAgent instance named "assistant"
assistant = CompressibleAgent(
name="assistant",
llm_config={
"timeout": 600,
"cache_seed": 43,
"config_list": config_list,
"model": "gpt-3.5-turbo",
},
compress_config={
"mode": "CUSTOMIZED",
"compress_function": constrain_num_messages, # this is required for customized compression
"trigger_count": 1000,
},
)
# create a UserProxyAgent instance named "user_proxy"
user_proxy = autogen.UserProxyAgent(
name="user_proxy",
human_input_mode="NEVER",
max_consecutive_auto_reply=5,
is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE")
or x.get("content", "").rstrip().endswith("TERMINATE."),
code_execution_config={"work_dir": "web"},
system_message="""Reply TERMINATE if the task has been solved at full satisfaction.
Otherwise, reply CONTINUE, or the reason why the task is not solved yet.""",
)
user_proxy.initiate_chat(
assistant,
message="""Show me the YTD gain of 10 largest technology companies as of today.""",
)
@pytest.mark.skipif(
sys.platform in ["darwin", "win32"] or skip,
reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
)
def test_compress_message():
assistant = CompressibleAgent(
name="assistant",
llm_config={
"timeout": 600,
"cache_seed": 43,
"config_list": config_list,
"model": "gpt-3.5-turbo",
},
compress_config={
"mode": "COMPRESS",
"trigger_count": 600,
"verbose": True,
"leave_last_n": 0,
},
)
assert assistant.compress_messages([{"content": "hello world", "role": "user"}]) == (
False,
None,
), "Single message should not be compressed"
is_success, _ = assistant.compress_messages(
[
{"content": "Hello!", "role": "user"},
{"content": "How can I help you today?", "role": "assistant"},
{"content": "Can you tell me a joke about programming?", "role": "assistant"},
]
)
assert is_success, "Compression failed."
@pytest.mark.skipif(True, reason="Flaky test, CompressibleAgent no longer supported")
def test_mode_terminate():
assistant = CompressibleAgent(
name="assistant",
llm_config={
"timeout": 600,
"cache_seed": 43,
"config_list": config_list,
"model": "gpt-3.5-turbo",
},
compress_config=True,
)
user_proxy = autogen.UserProxyAgent(
name="user_proxy",
is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"),
human_input_mode="NEVER",
max_consecutive_auto_reply=5,
code_execution_config={"work_dir": "coding"},
)
final, _ = assistant.on_oai_token_limit(
[
{"content": "Hello!", "role": "user"},
{"content": "How can I help you today?", "role": "assistant"},
{"content": "1&" * 5000, "role": "assistant"},
],
sender=user_proxy,
)
assert final, "Terminating the conversation at max token limit is not working."
@pytest.mark.skipif(
sys.platform in ["darwin", "win32"] or skip,
reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
)
def test_new_compressible_agent_description():
assistant = CompressibleAgent(name="assistant", description="this is a description", llm_config=False)
assert assistant.description == "this is a description", "description is not set correctly"
if __name__ == "__main__":
# test_mode_compress()
# test_mode_customized()
# test_compress_message()
# test_mode_terminate()
test_new_compressible_agent_description()

View File

@ -259,16 +259,6 @@ user_proxy = autogen.UserProxyAgent(
code_execution_config={"work_dir":"coding", "use_docker":False})
```
## Migrating from `CompressibleAgent` and `TransformChatHistory` to `TransformMessages`
### Why migrate to `TransformMessages`?
Migrating enhances flexibility, modularity, and customization in handling chat message transformations. `TransformMessages` introduces an improved, extensible approach for pre-processing messages for conversational agents.
### How to migrate?
To ensure a smooth migration process, simply follow the detailed guide provided in [Introduction to TransformMessages](/docs/topics/handling_long_contexts/intro_to_transform_messages.md).
### What should I do if I get the error "TypeError: Assistants.create() got an unexpected keyword argument 'file_ids'"?
This error typically occurs when using Autogen version earlier than 0.2.27 in combination with OpenAI library version 1.21 or later. The issue arises because the older version of Autogen does not support the file_ids parameter used by newer versions of the OpenAI API.