mirror of https://github.com/microsoft/autogen.git
Text Compression Transform (#2225)
* adds implementation * handles optional import * cleanup * updates github workflows * skip test if dependencies not installed * skip test if dependencies not installed * use cpu * skip openai * unskip openai * adds protocol * better docstr * minor fixes * updates optional dependencies docs * wip * update docstrings * wip * adds back llmlingua requirement * finalized protocol * improve docstr * guide complete * improve docstr * fix FAQ * added cache support * improve cache key * cache key fix + faq fix * improve docs * improve guide * args -> params * spelling
This commit is contained in:
parent
5a3a8a5541
commit
372ac1e794
|
@ -400,7 +400,7 @@ jobs:
|
||||||
pip install pytest-cov>=5
|
pip install pytest-cov>=5
|
||||||
- name: Install packages and dependencies for Transform Messages
|
- name: Install packages and dependencies for Transform Messages
|
||||||
run: |
|
run: |
|
||||||
pip install -e .
|
pip install -e '.[long-context]'
|
||||||
- name: Set AUTOGEN_USE_DOCKER based on OS
|
- name: Set AUTOGEN_USE_DOCKER based on OS
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
from typing import Any, Dict, Optional, Protocol
|
||||||
|
|
||||||
|
IMPORT_ERROR: Optional[Exception] = None
|
||||||
|
try:
|
||||||
|
import llmlingua
|
||||||
|
except ImportError:
|
||||||
|
IMPORT_ERROR = ImportError(
|
||||||
|
"LLMLingua is not installed. Please install it with `pip install pyautogen[long-context]`"
|
||||||
|
)
|
||||||
|
PromptCompressor = object
|
||||||
|
else:
|
||||||
|
from llmlingua import PromptCompressor
|
||||||
|
|
||||||
|
|
||||||
|
class TextCompressor(Protocol):
|
||||||
|
"""Defines a protocol for text compression to optimize agent interactions."""
|
||||||
|
|
||||||
|
def compress_text(self, text: str, **compression_params) -> Dict[str, Any]:
|
||||||
|
"""This method takes a string as input and returns a dictionary containing the compressed text and other
|
||||||
|
relevant information. The compressed text should be stored under the 'compressed_text' key in the dictionary.
|
||||||
|
To calculate the number of saved tokens, the dictionary should include 'origin_tokens' and 'compressed_tokens' keys.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class LLMLingua:
|
||||||
|
"""Compresses text messages using LLMLingua for improved efficiency in processing and response generation.
|
||||||
|
|
||||||
|
NOTE: The effectiveness of compression and the resultant token savings can vary based on the content of the messages
|
||||||
|
and the specific configurations used for the PromptCompressor.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
prompt_compressor_kwargs: Dict = dict(
|
||||||
|
model_name="microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank",
|
||||||
|
use_llmlingua2=True,
|
||||||
|
device_map="cpu",
|
||||||
|
),
|
||||||
|
structured_compression: bool = False,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
prompt_compressor_kwargs (dict): A dictionary of keyword arguments for the PromptCompressor. Defaults to a
|
||||||
|
dictionary with model_name set to "microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank",
|
||||||
|
use_llmlingua2 set to True, and device_map set to "cpu".
|
||||||
|
structured_compression (bool): A flag indicating whether to use structured compression. If True, the
|
||||||
|
structured_compress_prompt method of the PromptCompressor is used. Otherwise, the compress_prompt method
|
||||||
|
is used. Defaults to False.
|
||||||
|
dictionary.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ImportError: If the llmlingua library is not installed.
|
||||||
|
"""
|
||||||
|
if IMPORT_ERROR:
|
||||||
|
raise IMPORT_ERROR
|
||||||
|
|
||||||
|
self._prompt_compressor = PromptCompressor(**prompt_compressor_kwargs)
|
||||||
|
|
||||||
|
assert isinstance(self._prompt_compressor, llmlingua.PromptCompressor)
|
||||||
|
self._compression_method = (
|
||||||
|
self._prompt_compressor.structured_compress_prompt
|
||||||
|
if structured_compression
|
||||||
|
else self._prompt_compressor.compress_prompt
|
||||||
|
)
|
||||||
|
|
||||||
|
def compress_text(self, text: str, **compression_params) -> Dict[str, Any]:
|
||||||
|
return self._compression_method([text], **compression_params)
|
|
@ -1,4 +1,5 @@
|
||||||
import copy
|
import copy
|
||||||
|
import json
|
||||||
import sys
|
import sys
|
||||||
from typing import Any, Dict, List, Optional, Protocol, Tuple, Union
|
from typing import Any, Dict, List, Optional, Protocol, Tuple, Union
|
||||||
|
|
||||||
|
@ -6,6 +7,9 @@ import tiktoken
|
||||||
from termcolor import colored
|
from termcolor import colored
|
||||||
|
|
||||||
from autogen import token_count_utils
|
from autogen import token_count_utils
|
||||||
|
from autogen.cache import AbstractCache, Cache
|
||||||
|
|
||||||
|
from .text_compressors import LLMLingua, TextCompressor
|
||||||
|
|
||||||
|
|
||||||
class MessageTransform(Protocol):
|
class MessageTransform(Protocol):
|
||||||
|
@ -156,7 +160,7 @@ class MessageTokenLimiter:
|
||||||
assert self._min_tokens is not None
|
assert self._min_tokens is not None
|
||||||
|
|
||||||
# if the total number of tokens in the messages is less than the min_tokens, return the messages as is
|
# if the total number of tokens in the messages is less than the min_tokens, return the messages as is
|
||||||
if not self._are_min_tokens_reached(messages):
|
if not _min_tokens_reached(messages, self._min_tokens):
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
temp_messages = copy.deepcopy(messages)
|
temp_messages = copy.deepcopy(messages)
|
||||||
|
@ -205,19 +209,6 @@ class MessageTokenLimiter:
|
||||||
return logs_str, True
|
return logs_str, True
|
||||||
return "No tokens were truncated.", False
|
return "No tokens were truncated.", False
|
||||||
|
|
||||||
def _are_min_tokens_reached(self, messages: List[Dict]) -> bool:
|
|
||||||
"""
|
|
||||||
Returns True if no minimum tokens restrictions are applied.
|
|
||||||
|
|
||||||
Either if the total number of tokens in the messages is greater than or equal to the `min_theshold_tokens`,
|
|
||||||
or no minimum tokens threshold is set.
|
|
||||||
"""
|
|
||||||
if not self._min_tokens:
|
|
||||||
return True
|
|
||||||
|
|
||||||
messages_tokens = sum(_count_tokens(msg["content"]) for msg in messages if "content" in msg)
|
|
||||||
return messages_tokens >= self._min_tokens
|
|
||||||
|
|
||||||
def _truncate_str_to_tokens(self, contents: Union[str, List], n_tokens: int) -> Union[str, List]:
|
def _truncate_str_to_tokens(self, contents: Union[str, List], n_tokens: int) -> Union[str, List]:
|
||||||
if isinstance(contents, str):
|
if isinstance(contents, str):
|
||||||
return self._truncate_tokens(contents, n_tokens)
|
return self._truncate_tokens(contents, n_tokens)
|
||||||
|
@ -268,7 +259,7 @@ class MessageTokenLimiter:
|
||||||
|
|
||||||
return max_tokens if max_tokens is not None else sys.maxsize
|
return max_tokens if max_tokens is not None else sys.maxsize
|
||||||
|
|
||||||
def _validate_min_tokens(self, min_tokens: int, max_tokens: int) -> int:
|
def _validate_min_tokens(self, min_tokens: Optional[int], max_tokens: Optional[int]) -> int:
|
||||||
if min_tokens is None:
|
if min_tokens is None:
|
||||||
return 0
|
return 0
|
||||||
if min_tokens < 0:
|
if min_tokens < 0:
|
||||||
|
@ -278,6 +269,154 @@ class MessageTokenLimiter:
|
||||||
return min_tokens
|
return min_tokens
|
||||||
|
|
||||||
|
|
||||||
|
class TextMessageCompressor:
|
||||||
|
"""A transform for compressing text messages in a conversation history.
|
||||||
|
|
||||||
|
It uses a specified text compression method to reduce the token count of messages, which can lead to more efficient
|
||||||
|
processing and response generation by downstream models.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
text_compressor: Optional[TextCompressor] = None,
|
||||||
|
min_tokens: Optional[int] = None,
|
||||||
|
compression_params: Dict = dict(),
|
||||||
|
cache: Optional[AbstractCache] = Cache.disk(),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
text_compressor (TextCompressor or None): An instance of a class that implements the TextCompressor
|
||||||
|
protocol. If None, it defaults to LLMLingua.
|
||||||
|
min_tokens (int or None): Minimum number of tokens in messages to apply the transformation. Must be greater
|
||||||
|
than or equal to 0 if not None. If None, no threshold-based compression is applied.
|
||||||
|
compression_args (dict): A dictionary of arguments for the compression method. Defaults to an empty
|
||||||
|
dictionary.
|
||||||
|
cache (None or AbstractCache): The cache client to use to store and retrieve previously compressed messages.
|
||||||
|
If None, no caching will be used.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if text_compressor is None:
|
||||||
|
text_compressor = LLMLingua()
|
||||||
|
|
||||||
|
self._validate_min_tokens(min_tokens)
|
||||||
|
|
||||||
|
self._text_compressor = text_compressor
|
||||||
|
self._min_tokens = min_tokens
|
||||||
|
self._compression_args = compression_params
|
||||||
|
self._cache = cache
|
||||||
|
|
||||||
|
# Optimizing savings calculations to optimize log generation
|
||||||
|
self._recent_tokens_savings = 0
|
||||||
|
|
||||||
|
def apply_transform(self, messages: List[Dict]) -> List[Dict]:
|
||||||
|
"""Applies compression to messages in a conversation history based on the specified configuration.
|
||||||
|
|
||||||
|
The function processes each message according to the `compression_args` and `min_tokens` settings, applying
|
||||||
|
the specified compression configuration and returning a new list of messages with reduced token counts
|
||||||
|
where possible.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
messages (List[Dict]): A list of message dictionaries to be compressed.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[Dict]: A list of dictionaries with the message content compressed according to the configured
|
||||||
|
method and scope.
|
||||||
|
"""
|
||||||
|
# Make sure there is at least one message
|
||||||
|
if not messages:
|
||||||
|
return messages
|
||||||
|
|
||||||
|
# if the total number of tokens in the messages is less than the min_tokens, return the messages as is
|
||||||
|
if not _min_tokens_reached(messages, self._min_tokens):
|
||||||
|
return messages
|
||||||
|
|
||||||
|
total_savings = 0
|
||||||
|
processed_messages = messages.copy()
|
||||||
|
for message in processed_messages:
|
||||||
|
# Some messages may not have content.
|
||||||
|
if not isinstance(message.get("content"), (str, list)):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if _is_content_text_empty(message["content"]):
|
||||||
|
continue
|
||||||
|
|
||||||
|
cached_content = self._cache_get(message["content"])
|
||||||
|
if cached_content is not None:
|
||||||
|
savings, compressed_content = cached_content
|
||||||
|
else:
|
||||||
|
savings, compressed_content = self._compress(message["content"])
|
||||||
|
|
||||||
|
self._cache_set(message["content"], compressed_content, savings)
|
||||||
|
|
||||||
|
message["content"] = compressed_content
|
||||||
|
total_savings += savings
|
||||||
|
|
||||||
|
self._recent_tokens_savings = total_savings
|
||||||
|
return processed_messages
|
||||||
|
|
||||||
|
def get_logs(self, pre_transform_messages: List[Dict], post_transform_messages: List[Dict]) -> Tuple[str, bool]:
|
||||||
|
if self._recent_tokens_savings > 0:
|
||||||
|
return f"{self._recent_tokens_savings} tokens saved with text compression.", True
|
||||||
|
else:
|
||||||
|
return "No tokens saved with text compression.", False
|
||||||
|
|
||||||
|
def _compress(self, content: Union[str, List[Dict]]) -> Tuple[int, Union[str, List[Dict]]]:
|
||||||
|
"""Compresses the given text or multimodal content using the specified compression method."""
|
||||||
|
if isinstance(content, str):
|
||||||
|
return self._compress_text(content)
|
||||||
|
elif isinstance(content, list):
|
||||||
|
return self._compress_multimodal(content)
|
||||||
|
else:
|
||||||
|
return 0, content
|
||||||
|
|
||||||
|
def _compress_multimodal(self, content: List[Dict]) -> Tuple[int, List[Dict]]:
|
||||||
|
tokens_saved = 0
|
||||||
|
for msg in content:
|
||||||
|
if "text" in msg:
|
||||||
|
savings, msg["text"] = self._compress_text(msg["text"])
|
||||||
|
tokens_saved += savings
|
||||||
|
return tokens_saved, content
|
||||||
|
|
||||||
|
def _compress_text(self, text: str) -> Tuple[int, str]:
|
||||||
|
"""Compresses the given text using the specified compression method."""
|
||||||
|
compressed_text = self._text_compressor.compress_text(text, **self._compression_args)
|
||||||
|
|
||||||
|
savings = 0
|
||||||
|
if "origin_tokens" in compressed_text and "compressed_tokens" in compressed_text:
|
||||||
|
savings = compressed_text["origin_tokens"] - compressed_text["compressed_tokens"]
|
||||||
|
|
||||||
|
return savings, compressed_text["compressed_prompt"]
|
||||||
|
|
||||||
|
def _cache_get(self, content: Union[str, List[Dict]]) -> Optional[Tuple[int, Union[str, List[Dict]]]]:
|
||||||
|
if self._cache:
|
||||||
|
cached_value = self._cache.get(self._cache_key(content))
|
||||||
|
if cached_value:
|
||||||
|
return cached_value
|
||||||
|
|
||||||
|
def _cache_set(
|
||||||
|
self, content: Union[str, List[Dict]], compressed_content: Union[str, List[Dict]], tokens_saved: int
|
||||||
|
):
|
||||||
|
if self._cache:
|
||||||
|
value = (tokens_saved, json.dumps(compressed_content))
|
||||||
|
self._cache.set(self._cache_key(content), value)
|
||||||
|
|
||||||
|
def _cache_key(self, content: Union[str, List[Dict]]) -> str:
|
||||||
|
return f"{json.dumps(content)}_{self._min_tokens}"
|
||||||
|
|
||||||
|
def _validate_min_tokens(self, min_tokens: Optional[int]):
|
||||||
|
if min_tokens is not None and min_tokens <= 0:
|
||||||
|
raise ValueError("min_tokens must be greater than 0 or None")
|
||||||
|
|
||||||
|
|
||||||
|
def _min_tokens_reached(messages: List[Dict], min_tokens: Optional[int]) -> bool:
|
||||||
|
"""Returns True if the total number of tokens in the messages is greater than or equal to the specified value."""
|
||||||
|
if not min_tokens:
|
||||||
|
return True
|
||||||
|
|
||||||
|
messages_tokens = sum(_count_tokens(msg["content"]) for msg in messages if "content" in msg)
|
||||||
|
return messages_tokens >= min_tokens
|
||||||
|
|
||||||
|
|
||||||
def _count_tokens(content: Union[str, List[Dict[str, Any]]]) -> int:
|
def _count_tokens(content: Union[str, List[Dict[str, Any]]]) -> int:
|
||||||
token_count = 0
|
token_count = 0
|
||||||
if isinstance(content, str):
|
if isinstance(content, str):
|
||||||
|
@ -286,3 +425,12 @@ def _count_tokens(content: Union[str, List[Dict[str, Any]]]) -> int:
|
||||||
for item in content:
|
for item in content:
|
||||||
token_count += _count_tokens(item.get("text", ""))
|
token_count += _count_tokens(item.get("text", ""))
|
||||||
return token_count
|
return token_count
|
||||||
|
|
||||||
|
|
||||||
|
def _is_content_text_empty(content: Union[str, List[Dict[str, Any]]]) -> bool:
|
||||||
|
if isinstance(content, str):
|
||||||
|
return content == ""
|
||||||
|
elif isinstance(content, list):
|
||||||
|
return all(_is_content_text_empty(item.get("text", "")) for item in content)
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -79,6 +79,7 @@ extra_require = {
|
||||||
"websockets": ["websockets>=12.0,<13"],
|
"websockets": ["websockets>=12.0,<13"],
|
||||||
"jupyter-executor": jupyter_executor,
|
"jupyter-executor": jupyter_executor,
|
||||||
"types": ["mypy==1.9.0", "pytest>=6.1.1,<8"] + jupyter_executor,
|
"types": ["mypy==1.9.0", "pytest>=6.1.1,<8"] + jupyter_executor,
|
||||||
|
"long-context": ["llmlingua<0.3"],
|
||||||
}
|
}
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import copy
|
import copy
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
@ -118,13 +119,82 @@ def test_message_token_limiter_get_logs(message_token_limiter, messages, expecte
|
||||||
assert logs_str == expected_logs
|
assert logs_str == expected_logs
|
||||||
|
|
||||||
|
|
||||||
|
def test_text_compression():
|
||||||
|
"""Test the TextMessageCompressor transform."""
|
||||||
|
try:
|
||||||
|
from autogen.agentchat.contrib.capabilities.transforms import TextMessageCompressor
|
||||||
|
|
||||||
|
text_compressor = TextMessageCompressor()
|
||||||
|
except ImportError:
|
||||||
|
pytest.skip("LLM Lingua is not installed.")
|
||||||
|
|
||||||
|
text = "Run this test with a long string. "
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "text", "text": "".join([text] * 3)}],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "text", "text": "".join([text] * 3)}],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": [{"type": "text", "text": "".join([text] * 3)}],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
transformed_messages = text_compressor.apply_transform([{"content": text}])
|
||||||
|
|
||||||
|
assert len(transformed_messages[0]["content"]) < len(text)
|
||||||
|
|
||||||
|
# Test compressing all messages
|
||||||
|
text_compressor = TextMessageCompressor()
|
||||||
|
transformed_messages = text_compressor.apply_transform(copy.deepcopy(messages))
|
||||||
|
for message in transformed_messages:
|
||||||
|
assert len(message["content"][0]["text"]) < len(messages[0]["content"][0]["text"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_text_compression_cache():
|
||||||
|
try:
|
||||||
|
from autogen.agentchat.contrib.capabilities.transforms import TextMessageCompressor
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
pytest.skip("LLM Lingua is not installed.")
|
||||||
|
|
||||||
|
messages = get_long_messages()
|
||||||
|
mock_compressed_content = (1, {"content": "mock"})
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"autogen.agentchat.contrib.capabilities.transforms.TextMessageCompressor._cache_get",
|
||||||
|
MagicMock(return_value=(1, {"content": "mock"})),
|
||||||
|
) as mocked_get, patch(
|
||||||
|
"autogen.agentchat.contrib.capabilities.transforms.TextMessageCompressor._cache_set", MagicMock()
|
||||||
|
) as mocked_set:
|
||||||
|
text_compressor = TextMessageCompressor()
|
||||||
|
|
||||||
|
text_compressor.apply_transform(messages)
|
||||||
|
text_compressor.apply_transform(messages)
|
||||||
|
|
||||||
|
assert mocked_get.call_count == len(messages)
|
||||||
|
assert mocked_set.call_count == len(messages)
|
||||||
|
|
||||||
|
# We already populated the cache with the mock content
|
||||||
|
# We need to test if we retrieve the correct content
|
||||||
|
text_compressor = TextMessageCompressor()
|
||||||
|
compressed_messages = text_compressor.apply_transform(messages)
|
||||||
|
|
||||||
|
for message in compressed_messages:
|
||||||
|
assert message["content"] == mock_compressed_content[1]
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
long_messages = get_long_messages()
|
long_messages = get_long_messages()
|
||||||
short_messages = get_short_messages()
|
short_messages = get_short_messages()
|
||||||
no_content_messages = get_no_content_messages()
|
no_content_messages = get_no_content_messages()
|
||||||
message_history_limiter = MessageHistoryLimiter(max_messages=3)
|
msg_history_limiter = MessageHistoryLimiter(max_messages=3)
|
||||||
message_token_limiter = MessageTokenLimiter(max_tokens_per_message=3)
|
msg_token_limiter = MessageTokenLimiter(max_tokens_per_message=3)
|
||||||
message_token_limiter_with_threshold = MessageTokenLimiter(max_tokens_per_message=1, min_tokens=10)
|
msg_token_limiter_with_threshold = MessageTokenLimiter(max_tokens_per_message=1, min_tokens=10)
|
||||||
|
|
||||||
# Test Parameters
|
# Test Parameters
|
||||||
message_history_limiter_apply_transform_parameters = {
|
message_history_limiter_apply_transform_parameters = {
|
||||||
|
@ -170,14 +240,14 @@ if __name__ == "__main__":
|
||||||
message_history_limiter_apply_transform_parameters["messages"],
|
message_history_limiter_apply_transform_parameters["messages"],
|
||||||
message_history_limiter_apply_transform_parameters["expected_messages_len"],
|
message_history_limiter_apply_transform_parameters["expected_messages_len"],
|
||||||
):
|
):
|
||||||
test_message_history_limiter_apply_transform(message_history_limiter, messages, expected_messages_len)
|
test_message_history_limiter_apply_transform(msg_history_limiter, messages, expected_messages_len)
|
||||||
|
|
||||||
for messages, expected_logs, expected_effect in zip(
|
for messages, expected_logs, expected_effect in zip(
|
||||||
message_history_limiter_get_logs_parameters["messages"],
|
message_history_limiter_get_logs_parameters["messages"],
|
||||||
message_history_limiter_get_logs_parameters["expected_logs"],
|
message_history_limiter_get_logs_parameters["expected_logs"],
|
||||||
message_history_limiter_get_logs_parameters["expected_effect"],
|
message_history_limiter_get_logs_parameters["expected_effect"],
|
||||||
):
|
):
|
||||||
test_message_history_limiter_get_logs(message_history_limiter, messages, expected_logs, expected_effect)
|
test_message_history_limiter_get_logs(msg_history_limiter, messages, expected_logs, expected_effect)
|
||||||
|
|
||||||
# Call the MessageTokenLimiter tests
|
# Call the MessageTokenLimiter tests
|
||||||
|
|
||||||
|
@ -187,7 +257,7 @@ if __name__ == "__main__":
|
||||||
message_token_limiter_apply_transform_parameters["expected_messages_len"],
|
message_token_limiter_apply_transform_parameters["expected_messages_len"],
|
||||||
):
|
):
|
||||||
test_message_token_limiter_apply_transform(
|
test_message_token_limiter_apply_transform(
|
||||||
message_token_limiter, messages, expected_token_count, expected_messages_len
|
msg_token_limiter, messages, expected_token_count, expected_messages_len
|
||||||
)
|
)
|
||||||
|
|
||||||
for messages, expected_token_count, expected_messages_len in zip(
|
for messages, expected_token_count, expected_messages_len in zip(
|
||||||
|
@ -196,7 +266,7 @@ if __name__ == "__main__":
|
||||||
message_token_limiter_with_threshold_apply_transform_parameters["expected_messages_len"],
|
message_token_limiter_with_threshold_apply_transform_parameters["expected_messages_len"],
|
||||||
):
|
):
|
||||||
test_message_token_limiter_with_threshold_apply_transform(
|
test_message_token_limiter_with_threshold_apply_transform(
|
||||||
message_token_limiter_with_threshold, messages, expected_token_count, expected_messages_len
|
msg_token_limiter_with_threshold, messages, expected_token_count, expected_messages_len
|
||||||
)
|
)
|
||||||
|
|
||||||
for messages, expected_logs, expected_effect in zip(
|
for messages, expected_logs, expected_effect in zip(
|
||||||
|
@ -204,4 +274,4 @@ if __name__ == "__main__":
|
||||||
message_token_limiter_get_logs_parameters["expected_logs"],
|
message_token_limiter_get_logs_parameters["expected_logs"],
|
||||||
message_token_limiter_get_logs_parameters["expected_effect"],
|
message_token_limiter_get_logs_parameters["expected_effect"],
|
||||||
):
|
):
|
||||||
test_message_token_limiter_get_logs(message_token_limiter, messages, expected_logs, expected_effect)
|
test_message_token_limiter_get_logs(msg_token_limiter, messages, expected_logs, expected_effect)
|
||||||
|
|
|
@ -267,7 +267,7 @@ Migrating enhances flexibility, modularity, and customization in handling chat m
|
||||||
|
|
||||||
### How to migrate?
|
### How to migrate?
|
||||||
|
|
||||||
To ensure a smooth migration process, simply follow the detailed guide provided in [Handling Long Context Conversations with Transform Messages](/docs/topics/long_contexts.md).
|
To ensure a smooth migration process, simply follow the detailed guide provided in [Introduction to TransformMessages](/docs/topics/handling_long_contexts/intro_to_transform_messages.md).
|
||||||
|
|
||||||
### What should I do if I get the error "TypeError: Assistants.create() got an unexpected keyword argument 'file_ids'"?
|
### What should I do if I get the error "TypeError: Assistants.create() got an unexpected keyword argument 'file_ids'"?
|
||||||
|
|
||||||
|
|
|
@ -115,9 +115,16 @@ Example notebooks:
|
||||||
|
|
||||||
To use a graph in `GroupChat`, particularly for graph visualization, please install AutoGen with the [graph] option.
|
To use a graph in `GroupChat`, particularly for graph visualization, please install AutoGen with the [graph] option.
|
||||||
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install "pyautogen[graph]"
|
pip install "pyautogen[graph]"
|
||||||
```
|
```
|
||||||
|
|
||||||
Example notebook: [Graph Modeling Language with using select_speaker](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_graph_modelling_language_using_select_speaker.ipynb)
|
Example notebook: [Graph Modeling Language with using select_speaker](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_graph_modelling_language_using_select_speaker.ipynb)
|
||||||
|
|
||||||
|
## Long Context Handling
|
||||||
|
|
||||||
|
AutoGen includes support for handling long textual contexts by leveraging the LLMLingua library for text compression. To enable this functionality, please install AutoGen with the `[long-context]` option:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install "pyautogen[long-context]"
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
{
|
||||||
|
"label": "Handling Long Contexts",
|
||||||
|
"collapsible": true
|
||||||
|
}
|
|
@ -0,0 +1,171 @@
|
||||||
|
# Compressing Text with LLMLingua
|
||||||
|
|
||||||
|
Text compression is crucial for optimizing interactions with LLMs, especially when dealing with long prompts that can lead to higher costs and slower response times. LLMLingua is a tool designed to compress prompts effectively, enhancing the efficiency and cost-effectiveness of LLM operations.
|
||||||
|
|
||||||
|
This guide introduces LLMLingua's integration with AutoGen, demonstrating how to use this tool to compress text, thereby optimizing the usage of LLMs for various applications.
|
||||||
|
|
||||||
|
:::info Requirements
|
||||||
|
Install `pyautogen[long-context]` and `PyMuPDF`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install "pyautogen[long-context]" PyMuPDF
|
||||||
|
```
|
||||||
|
|
||||||
|
For more information, please refer to the [installation guide](/docs/installation/).
|
||||||
|
:::
|
||||||
|
|
||||||
|
## Example 1: Compressing AutoGen Research Paper using LLMLingua
|
||||||
|
|
||||||
|
We will look at how we can use `TextMessageCompressor` to compress an AutoGen research paper using `LLMLingua`. Here's how you can initialize `TextMessageCompressor` with LLMLingua, a text compressor that adheres to the `TextCompressor` protocol.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
import fitz # PyMuPDF
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from autogen.agentchat.contrib.capabilities.text_compressors import LLMLingua
|
||||||
|
from autogen.agentchat.contrib.capabilities.transforms import TextMessageCompressor
|
||||||
|
|
||||||
|
AUTOGEN_PAPER = "https://arxiv.org/pdf/2308.08155"
|
||||||
|
|
||||||
|
|
||||||
|
def extract_text_from_pdf():
|
||||||
|
# Download the PDF
|
||||||
|
response = requests.get(AUTOGEN_PAPER)
|
||||||
|
response.raise_for_status() # Ensure the download was successful
|
||||||
|
|
||||||
|
text = ""
|
||||||
|
# Save the PDF to a temporary file
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
with open(temp_dir + "temp.pdf", "wb") as f:
|
||||||
|
f.write(response.content)
|
||||||
|
|
||||||
|
# Open the PDF
|
||||||
|
with fitz.open(temp_dir + "temp.pdf") as doc:
|
||||||
|
# Read and extract text from each page
|
||||||
|
for page in doc:
|
||||||
|
text += page.get_text()
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
pdf_text = extract_text_from_pdf()
|
||||||
|
|
||||||
|
llm_lingua = LLMLingua()
|
||||||
|
text_compressor = TextMessageCompressor(text_compressor=llm_lingua)
|
||||||
|
compressed_text = text_compressor.apply_transform([{"content": pdf_text}])
|
||||||
|
|
||||||
|
print(text_compressor.get_logs([], []))
|
||||||
|
```
|
||||||
|
|
||||||
|
```console
|
||||||
|
('19765 tokens saved with text compression.', True)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example 2: Integrating LLMLingua with `ConversableAgent`
|
||||||
|
|
||||||
|
Now, let's integrate `LLMLingua` into a conversational agent within AutoGen. This allows dynamic compression of prompts before they are sent to the LLM.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
|
||||||
|
import autogen
|
||||||
|
from autogen.agentchat.contrib.capabilities import transform_messages
|
||||||
|
|
||||||
|
system_message = "You are a world class researcher."
|
||||||
|
config_list = [{"model": "gpt-4-turbo", "api_key": os.getenv("OPENAI_API_KEY")}]
|
||||||
|
|
||||||
|
# Define your agent; the user proxy and an assistant
|
||||||
|
researcher = autogen.ConversableAgent(
|
||||||
|
"assistant",
|
||||||
|
llm_config={"config_list": config_list},
|
||||||
|
max_consecutive_auto_reply=1,
|
||||||
|
system_message=system_message,
|
||||||
|
human_input_mode="NEVER",
|
||||||
|
)
|
||||||
|
user_proxy = autogen.UserProxyAgent(
|
||||||
|
"user_proxy",
|
||||||
|
human_input_mode="NEVER",
|
||||||
|
is_termination_msg=lambda x: "TERMINATE" in x.get("content", ""),
|
||||||
|
max_consecutive_auto_reply=1,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
:::tip
|
||||||
|
Learn more about configuring LLMs for agents [here](/docs/topics/llm_configuration).
|
||||||
|
:::
|
||||||
|
|
||||||
|
```python
|
||||||
|
context_handling = transform_messages.TransformMessages(transforms=[text_compressor])
|
||||||
|
context_handling.add_to_agent(researcher)
|
||||||
|
|
||||||
|
message = "Summarize this research paper for me, include the important information" + pdf_text
|
||||||
|
result = user_proxy.initiate_chat(recipient=researcher, clear_history=True, message=message, silent=True)
|
||||||
|
|
||||||
|
print(result.chat_history[1]["content"])
|
||||||
|
```
|
||||||
|
|
||||||
|
```console
|
||||||
|
19953 tokens saved with text compression.
|
||||||
|
The paper describes AutoGen, a framework designed to facilitate the development of diverse large language model (LLM) applications through conversational multi-agent systems. The framework emphasizes customization and flexibility, enabling developers to define agent interaction behaviors in natural language or computer code.
|
||||||
|
|
||||||
|
Key components of AutoGen include:
|
||||||
|
1. **Conversable Agents**: These are customizable agents designed to operate autonomously or through human interaction. They are capable of initiating, maintaining, and responding within conversations, contributing effectively to multi-agent dialogues.
|
||||||
|
|
||||||
|
2. **Conversation Programming**: AutoGen introduces a programming paradigm centered around conversational interactions among agents. This approach simplifies the development of complex applications by streamlining how agents communicate and interact, focusing on conversational logic rather than traditional coding for
|
||||||
|
mats.
|
||||||
|
|
||||||
|
3. **Agent Customization and Flexibility**: Developers have the freedom to define the capabilities and behaviors of agents within the system, allowing for a wide range of applications across different domains.
|
||||||
|
|
||||||
|
4. **Application Versatility**: The paper outlines various use cases from mathematics and coding to decision-making and entertainment, demonstrating AutoGen's ability to cope with a broad spectrum of complexities and requirements.
|
||||||
|
|
||||||
|
5. **Hierarchical and Joint Chat Capabilities**: The system supports complex conversation patterns including hierarchical and multi-agent interactions, facilitating robust dialogues that can dynamically adjust based on the conversation context and the agents' roles.
|
||||||
|
|
||||||
|
6. **Open-source and Community Engagement**: AutoGen is presented as an open-source framework, inviting contributions and adaptations from the global development community to expand its capabilities and applications.
|
||||||
|
|
||||||
|
The framework's architecture is designed so that it can be seamlessly integrated into existing systems, providing a robust foundation for developing sophisticated multi-agent applications that leverage the capabilities of modern LLMs. The paper also discusses potential ethical considerations and future improvements, highlighting the importance of continual development in response to evolving tech landscapes and user needs.
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example 3: Modifying LLMLingua's Compression Parameters
|
||||||
|
|
||||||
|
LLMLingua's flexibility allows for various configurations, such as customizing instructions for the LLM or setting specific token counts for compression. This example demonstrates how to set a target token count, enabling the use of models with smaller context sizes like gpt-3.5.
|
||||||
|
|
||||||
|
```python
|
||||||
|
config_list = [{"model": "gpt-3.5-turbo", "api_key": os.getenv("OPENAI_API_KEY")}]
|
||||||
|
researcher = autogen.ConversableAgent(
|
||||||
|
"assistant",
|
||||||
|
llm_config={"config_list": config_list},
|
||||||
|
max_consecutive_auto_reply=1,
|
||||||
|
system_message=system_message,
|
||||||
|
human_input_mode="NEVER",
|
||||||
|
)
|
||||||
|
|
||||||
|
text_compressor = TextMessageCompressor(
|
||||||
|
text_compressor=llm_lingua,
|
||||||
|
compression_params={"target_token": 13000},
|
||||||
|
cache=None,
|
||||||
|
)
|
||||||
|
context_handling = transform_messages.TransformMessages(transforms=[text_compressor])
|
||||||
|
context_handling.add_to_agent(researcher)
|
||||||
|
|
||||||
|
compressed_text = text_compressor.apply_transform([{"content": message}])
|
||||||
|
|
||||||
|
result = user_proxy.initiate_chat(recipient=researcher, clear_history=True, message=message, silent=True)
|
||||||
|
|
||||||
|
print(result.chat_history[1]["content"])
|
||||||
|
```
|
||||||
|
|
||||||
|
```console
|
||||||
|
25308 tokens saved with text compression.
|
||||||
|
Based on the extensive research paper information provided, it seems that the focus is on developing a framework called AutoGen for creating multi-agent conversations based on Large Language Models (LLMs) for a variety of applications such as math problem solving, coding, decision-making, and more.
|
||||||
|
|
||||||
|
The paper discusses the importance of incorporating diverse roles of LLMs, human inputs, and tools to enhance the capabilities of the conversable agents within the AutoGen framework. It also delves into the effectiveness of different systems in various scenarios, showcases the implementation of AutoGen in pilot studies, and compares its performance with other systems in tasks like math problem-solving, coding, and decision-making.
|
||||||
|
|
||||||
|
The paper also highlights the different features and components of AutoGen such as the AssistantAgent, UserProxyAgent, ExecutorAgent, and GroupChatManager, emphasizing its flexibility, ease of use, and modularity in managing multi-agent interactions. It presents case analyses to demonstrate the effectiveness of AutoGen in various applications and scenarios.
|
||||||
|
|
||||||
|
Furthermore, the paper includes manual evaluations, scenario testing, code examples, and detailed comparisons with other systems like ChatGPT, OptiGuide, MetaGPT, and more, to showcase the performance and capabilities of the AutoGen framework.
|
||||||
|
|
||||||
|
Overall, the research paper showcases the potential of AutoGen in facilitating dynamic multi-agent conversations, enhancing decision-making processes, and improving problem-solving tasks with the integration of LLMs, human inputs, and tools in a collaborative framework.
|
||||||
|
```
|
|
@ -1,4 +1,4 @@
|
||||||
# Handling Long Context Conversations with Transform Messages
|
# Introduction to Transform Messages
|
||||||
|
|
||||||
Why do we need to handle long contexts? The problem arises from several constraints and requirements:
|
Why do we need to handle long contexts? The problem arises from several constraints and requirements:
|
||||||
|
|
||||||
|
@ -14,6 +14,7 @@ The `TransformMessages` capability is designed to modify incoming messages befor
|
||||||
|
|
||||||
:::info Requirements
|
:::info Requirements
|
||||||
Install `pyautogen`:
|
Install `pyautogen`:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install pyautogen
|
pip install pyautogen
|
||||||
```
|
```
|
||||||
|
@ -99,9 +100,9 @@ pprint.pprint(processed_short_messages)
|
||||||
```console
|
```console
|
||||||
[{'content': 'hello there, how are you?', 'role': 'user'},
|
[{'content': 'hello there, how are you?', 'role': 'user'},
|
||||||
{'content': [{'text': 'hello', 'type': 'text'}], 'role': 'assistant'}]
|
{'content': [{'text': 'hello', 'type': 'text'}], 'role': 'assistant'}]
|
||||||
```
|
```
|
||||||
|
|
||||||
We can see that no transformation was applied, because the threshold of 10 total tokens was not reached.
|
We can see that no transformation was applied, because the threshold of 10 total tokens was not reached.
|
||||||
|
|
||||||
### Apply Transformations Using Agents
|
### Apply Transformations Using Agents
|
||||||
|
|
||||||
|
@ -318,7 +319,7 @@ result = user_proxy.initiate_chat(
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
````console
|
```console
|
||||||
user_proxy (to assistant):
|
user_proxy (to assistant):
|
||||||
|
|
||||||
What are the two API keys that I just provided
|
What are the two API keys that I just provided
|
||||||
|
@ -340,4 +341,4 @@ user_proxy (to assistant):
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
Redacted 2 OpenAI API keys.
|
Redacted 2 OpenAI API keys.
|
||||||
````
|
```
|
Loading…
Reference in New Issue