Removes Support For `TransformChatHistory` and `CompressibleAgent` (#3313)

* remove old files * removes ci * removes faq --------- Co-authored-by: Li Jiang <bnujli@gmail.com>
2024-08-12 22:28:02 -05:00 · 2024-08-12 22:28:02 -05:00 · 6682b6d2d0
parent afdaa4c7cb
commit 6682b6d2d0
9 changed files with 2 additions and 2757 deletions
--- a/.github/workflows/contrib-openai.yml
+++ b/.github/workflows/contrib-openai.yml
@ -111,46 +111,7 @@ jobs:
        with:
          file: ./coverage.xml
          flags: unittests
-  CompressionTest:
-    strategy:
-      matrix:
-        os: [ubuntu-latest]
-        python-version: ["3.9"]
-    runs-on: ${{ matrix.os }}
-    environment: openai1
-    steps:
-      # checkout to pr branch
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.pull_request.head.sha }}
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install packages and dependencies
-        run: |
-          docker --version
-          python -m pip install --upgrade pip wheel
-          pip install -e .
-          python -c "import autogen"
-          pip install pytest-cov>=5 pytest-asyncio
-      - name: Install packages for test when needed
-        run: |
-          pip install docker
-      - name: Coverage
-        env:
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
-          AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
-          OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
-        run: |
-          pytest test/agentchat/contrib/test_compressible_agent.py
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v3
-        with:
-          file: ./coverage.xml
-          flags: unittests
+
  GPTAssistantAgent:
    strategy:
      matrix:
@ -306,44 +267,7 @@ jobs:
        with:
          file: ./coverage.xml
          flags: unittests
-  ContextHandling:
-    strategy:
-      matrix:
-        os: [ubuntu-latest]
-        python-version: ["3.11"]
-    runs-on: ${{ matrix.os }}
-    environment: openai1
-    steps:
-      # checkout to pr branch
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.pull_request.head.sha }}
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install packages and dependencies
-        run: |
-          docker --version
-          python -m pip install --upgrade pip wheel
-          pip install -e .
-          python -c "import autogen"
-          pip install pytest-cov>=5
-      - name: Coverage
-        env:
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
-          AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
-          OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
-          BING_API_KEY: ${{ secrets.BING_API_KEY }}
-        run: |
-          pytest test/agentchat/contrib/capabilities/test_context_handling.py
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v3
-        with:
-          file: ./coverage.xml
-          flags: unittests
+
  ImageGen:
    strategy:
      matrix:
--- a/.github/workflows/contrib-tests.yml
+++ b/.github/workflows/contrib-tests.yml
@ -163,41 +163,6 @@ jobs:
          file: ./coverage.xml
          flags: unittests

-  CompressionTest:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest, macos-latest, windows-2019]
-        python-version: ["3.10"]
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install packages and dependencies for all tests
-        run: |
-          python -m pip install --upgrade pip wheel
-          pip install pytest-cov>=5
-      - name: Install packages and dependencies for Compression
-        run: |
-          pip install -e .
-      - name: Set AUTOGEN_USE_DOCKER based on OS
-        shell: bash
-        run: |
-          if [[ ${{ matrix.os }} != ubuntu-latest ]]; then
-            echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV
-          fi
-      - name: Coverage
-        run: |
-          pytest test/agentchat/contrib/test_compressible_agent.py --skip-openai
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v3
-        with:
-          file: ./coverage.xml
-          flags: unittests
-
  GPTAssistantAgent:
    runs-on: ${{ matrix.os }}
    strategy:
@ -384,41 +349,6 @@ jobs:
          file: ./coverage.xml
          flags: unittests

-  ContextHandling:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest, macos-latest, windows-2019]
-        python-version: ["3.11"]
-    steps:
-      - uses: actions/checkout@v4
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install packages and dependencies for all tests
-        run: |
-          python -m pip install --upgrade pip wheel
-          pip install pytest-cov>=5
-      - name: Install packages and dependencies for Context Handling
-        run: |
-          pip install -e .
-      - name: Set AUTOGEN_USE_DOCKER based on OS
-        shell: bash
-        run: |
-          if [[ ${{ matrix.os }} != ubuntu-latest ]]; then
-            echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV
-          fi
-      - name: Coverage
-        run: |
-          pytest test/agentchat/contrib/capabilities/test_context_handling.py --skip-openai
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v3
-        with:
-          file: ./coverage.xml
-          flags: unittests
-
  TransformMessages:
    runs-on: ${{ matrix.os }}
    strategy:
@ -485,7 +415,6 @@ jobs:
          file: ./coverage.xml
          flags: unittests

-
  AnthropicTest:
    runs-on: ${{ matrix.os }}
    strategy:
--- a/autogen/agentchat/contrib/capabilities/context_handling.py
+++ b/autogen/agentchat/contrib/capabilities/context_handling.py
@ -1,138 +0,0 @@
-import sys
-from typing import Dict, List, Optional
-from warnings import warn
-
-import tiktoken
-from termcolor import colored
-
-from autogen import ConversableAgent, token_count_utils
-
-warn(
-    "Context handling with TransformChatHistory is deprecated and will be removed in `0.2.30`. "
-    "Please use `TransformMessages`, documentation can be found at https://microsoft.github.io/autogen/docs/topics/handling_long_contexts/intro_to_transform_messages",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-
-class TransformChatHistory:
-    """
-    An agent's chat history with other agents is a common context that it uses to generate a reply.
-    This capability allows the agent to transform its chat history prior to using it to generate a reply.
-    It does not permanently modify the chat history, but rather processes it on every invocation.
-
-    This capability class enables various strategies to transform chat history, such as:
-    - Truncate messages: Truncate each message to first maximum number of tokens.
-    - Limit number of messages: Truncate the chat history to a maximum number of (recent) messages.
-    - Limit number of tokens: Truncate the chat history to number of recent N messages that fit in
-    maximum number of tokens.
-    Note that the system message, because of its special significance, is always kept as is.
-
-    The three strategies can be combined. For example, when each of these parameters are specified
-    they are used in the following order:
-    1. First truncate messages to a maximum number of tokens
-    2. Second, it limits the number of message to keep
-    3. Third, it limits the total number of tokens in the chat history
-
-    When adding this capability to an agent, the following are modified:
-    - A hook is added to the hookable method `process_all_messages_before_reply` to transform the
-    received messages for possible truncation.
-    Not modifying the stored message history.
-    """
-
-    def __init__(
-        self,
-        *,
-        max_tokens_per_message: Optional[int] = None,
-        max_messages: Optional[int] = None,
-        max_tokens: Optional[int] = None,
-    ):
-        """
-        Args:
-            max_tokens_per_message (Optional[int]): Maximum number of tokens to keep in each message.
-            max_messages (Optional[int]): Maximum number of messages to keep in the context.
-            max_tokens (Optional[int]): Maximum number of tokens to keep in the context.
-        """
-        self.max_tokens_per_message = max_tokens_per_message if max_tokens_per_message else sys.maxsize
-        self.max_messages = max_messages if max_messages else sys.maxsize
-        self.max_tokens = max_tokens if max_tokens else sys.maxsize
-
-    def add_to_agent(self, agent: ConversableAgent):
-        """
-        Adds TransformChatHistory capability to the given agent.
-        """
-        agent.register_hook(hookable_method="process_all_messages_before_reply", hook=self._transform_messages)
-
-    def _transform_messages(self, messages: List[Dict]) -> List[Dict]:
-        """
-        Args:
-            messages: List of messages to process.
-
-        Returns:
-            List of messages with the first system message and the last max_messages messages,
-            ensuring each message does not exceed max_tokens_per_message.
-        """
-        temp_messages = messages.copy()
-        processed_messages = []
-        system_message = None
-        processed_messages_tokens = 0
-
-        if messages[0]["role"] == "system":
-            system_message = messages[0].copy()
-            temp_messages.pop(0)
-
-        total_tokens = sum(
-            token_count_utils.count_token(msg["content"]) for msg in temp_messages
-        )  # Calculate tokens for all messages
-
-        # Truncate each message's content to a maximum token limit of each message
-
-        # Process recent messages first
-        for msg in reversed(temp_messages[-self.max_messages :]):
-            msg["content"] = truncate_str_to_tokens(msg["content"], self.max_tokens_per_message)
-            msg_tokens = token_count_utils.count_token(msg["content"])
-            if processed_messages_tokens + msg_tokens > self.max_tokens:
-                break
-            # append the message to the beginning of the list to preserve order
-            processed_messages = [msg] + processed_messages
-            processed_messages_tokens += msg_tokens
-        if system_message:
-            processed_messages.insert(0, system_message)
-        # Optionally, log the number of truncated messages and tokens if needed
-        num_truncated = len(messages) - len(processed_messages)
-
-        if num_truncated > 0 or total_tokens > processed_messages_tokens:
-            print(
-                colored(
-                    f"Truncated {num_truncated} messages. Reduced from {len(messages)} to {len(processed_messages)}.",
-                    "yellow",
-                )
-            )
-            print(
-                colored(
-                    f"Truncated {total_tokens - processed_messages_tokens} tokens. Tokens reduced from {total_tokens} to {processed_messages_tokens}",
-                    "yellow",
-                )
-            )
-        return processed_messages
-
-
-def truncate_str_to_tokens(text: str, max_tokens: int, model: str = "gpt-3.5-turbo-0613") -> str:
-    """Truncate a string so that the number of tokens is less than or equal to max_tokens using tiktoken.
-
-    Args:
-        text: The string to truncate.
-        max_tokens: The maximum number of tokens to keep.
-        model: The target OpenAI model for tokenization alignment.
-
-    Returns:
-        The truncated string.
-    """
-
-    encoding = tiktoken.encoding_for_model(model)  # Get the appropriate tokenizer
-
-    encoded_tokens = encoding.encode(text)
-    truncated_tokens = encoded_tokens[:max_tokens]
-    truncated_text = encoding.decode(truncated_tokens)  # Decode back to text
-
-    return truncated_text
--- a/autogen/agentchat/contrib/compressible_agent.py
+++ b/autogen/agentchat/contrib/compressible_agent.py
@ -1,436 +0,0 @@
-import copy
-import inspect
-import logging
-from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
-from warnings import warn
-
-from autogen import Agent, ConversableAgent, OpenAIWrapper
-from autogen.token_count_utils import count_token, get_max_token_limit, num_tokens_from_functions
-
-from ...formatting_utils import colored
-
-logger = logging.getLogger(__name__)
-
-warn(
-    "Context handling with CompressibleAgent is deprecated and will be removed in `0.2.30`. "
-    "Please use `TransformMessages`, documentation can be found at https://microsoft.github.io/autogen/docs/topics/handling_long_contexts/intro_to_transform_messages",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-
-class CompressibleAgent(ConversableAgent):
-    """CompressibleAgent agent. While this agent retains all the default functionalities of the `AssistantAgent`,
-    it also provides the added feature of compression when activated through the `compress_config` setting.
-
-    `compress_config` is set to False by default, making this agent equivalent to the `AssistantAgent`.
-    This agent does not work well in a GroupChat: The compressed messages will not be sent to all the agents in the group.
-    The default system message is the same as AssistantAgent.
-    `human_input_mode` is default to "NEVER"
-    and `code_execution_config` is default to False.
-    This agent doesn't execute code or function call by default.
-    """
-
-    DEFAULT_SYSTEM_MESSAGE = """You are a helpful AI assistant.
-Solve tasks using your coding and language skills.
-In the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute.
-    1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time, check the operating system. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself.
-    2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly.
-Solve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill.
-When using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user.
-If you want the user to save the code in a file before executing it, put # filename: <filename> inside the code block as the first line. Don't include multiple code blocks in one response. Do not ask users to copy and paste the result. Instead, use 'print' function for the output when relevant. Check the execution result returned by the user.
-If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.
-When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible.
-Reply "TERMINATE" in the end when everything is done.
-    """
-    DEFAULT_COMPRESS_CONFIG = {
-        "mode": "TERMINATE",
-        "compress_function": None,
-        "trigger_count": 0.7,
-        "async": False,
-        "broadcast": True,
-        "verbose": False,
-        "leave_last_n": 2,
-    }
-
-    def __init__(
-        self,
-        name: str,
-        system_message: Optional[str] = DEFAULT_SYSTEM_MESSAGE,
-        is_termination_msg: Optional[Callable[[Dict], bool]] = None,
-        max_consecutive_auto_reply: Optional[int] = None,
-        human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER",
-        function_map: Optional[Dict[str, Callable]] = None,
-        code_execution_config: Optional[Union[Dict, bool]] = False,
-        llm_config: Optional[Union[Dict, bool]] = None,
-        default_auto_reply: Optional[Union[str, Dict, None]] = "",
-        compress_config: Optional[Dict] = False,
-        description: Optional[str] = None,
-        **kwargs,
-    ):
-        """
-        Args:
-            name (str): agent name.
-            system_message (str): system message for the ChatCompletion inference.
-                Please override this attribute if you want to reprogram the agent.
-            llm_config (dict): llm inference configuration.
-                Note: you must set `model` in llm_config. It will be used to compute the token count.
-                Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
-                for available options.
-            is_termination_msg (function): a function that takes a message in the form of a dictionary
-                and returns a boolean value indicating if this received message is a termination message.
-                The dict can contain the following keys: "content", "role", "name", "function_call".
-            max_consecutive_auto_reply (int): the maximum number of consecutive auto replies.
-                default to None (no limit provided, class attribute MAX_CONSECUTIVE_AUTO_REPLY will be used as the limit in this case).
-                The limit only plays a role when human_input_mode is not "ALWAYS".
-            compress_config (dict or True/False): config for compression before oai_reply. Default to False.
-                You should contain the following keys:
-                - "mode" (Optional, str, default to "TERMINATE"): Choose from ["COMPRESS", "TERMINATE", "CUSTOMIZED"].
-                    1. `TERMINATE`: terminate the conversation ONLY when token count exceeds the max limit of current model. `trigger_count` is NOT used in this mode.
-                    2. `COMPRESS`: compress the messages when the token count exceeds the limit.
-                    3. `CUSTOMIZED`: pass in a customized function to compress the messages.
-                - "compress_function" (Optional, callable, default to None): Must be provided when mode is "CUSTOMIZED".
-                    The function should takes a list of messages and returns a tuple of (is_compress_success: bool, compressed_messages: List[Dict]).
-                - "trigger_count" (Optional, float, int, default to 0.7): the threshold to trigger compression.
-                    If a float between (0, 1], it is the percentage of token used. if a int, it is the number of tokens used.
-                - "async" (Optional, bool, default to False): whether to compress asynchronously.
-                - "broadcast" (Optional, bool, default to True): whether to update the compressed message history to sender.
-                - "verbose" (Optional, bool, default to False): Whether to print the content before and after compression. Used when mode="COMPRESS".
-                - "leave_last_n" (Optional, int, default to 0): If provided, the last n messages will not be compressed. Used when mode="COMPRESS".
-            description (str): a short description of the agent. This description is used by other agents
-                (e.g. the GroupChatManager) to decide when to call upon this agent. (Default: system_message)
-            **kwargs (dict): Please refer to other kwargs in
-                [ConversableAgent](../conversable_agent#__init__).
-        """
-        super().__init__(
-            name=name,
-            system_message=system_message,
-            is_termination_msg=is_termination_msg,
-            max_consecutive_auto_reply=max_consecutive_auto_reply,
-            human_input_mode=human_input_mode,
-            function_map=function_map,
-            code_execution_config=code_execution_config,
-            llm_config=llm_config,
-            default_auto_reply=default_auto_reply,
-            description=description,
-            **kwargs,
-        )
-
-        self._set_compress_config(compress_config)
-
-        # create a separate client for compression.
-        if llm_config is False:
-            self.llm_compress_config = False
-            self.compress_client = None
-        else:
-            if "model" not in llm_config:
-                raise ValueError("llm_config must contain the 'model' field.")
-            self.llm_compress_config = self.llm_config.copy()
-            # remove functions
-            if "functions" in self.llm_compress_config:
-                del self.llm_compress_config["functions"]
-            self.compress_client = OpenAIWrapper(**self.llm_compress_config)
-
-        self._reply_func_list.clear()
-        self.register_reply([Agent, None], ConversableAgent.generate_oai_reply)
-        self.register_reply([Agent], CompressibleAgent.on_oai_token_limit)  # check token limit
-        self.register_reply([Agent, None], ConversableAgent.generate_code_execution_reply)
-        self.register_reply([Agent, None], ConversableAgent.generate_function_call_reply)
-        self.register_reply([Agent, None], ConversableAgent.check_termination_and_human_reply)
-
-    def _set_compress_config(self, compress_config: Optional[Dict] = False):
-        if compress_config:
-            if compress_config is True:
-                compress_config = {}
-            if not isinstance(compress_config, dict):
-                raise ValueError("compress_config must be a dict or True/False.")
-
-            allowed_modes = ["COMPRESS", "TERMINATE", "CUSTOMIZED"]
-            if compress_config.get("mode", "TERMINATE") not in allowed_modes:
-                raise ValueError(f"Invalid compression mode. Allowed values are: {', '.join(allowed_modes)}")
-
-            self.compress_config = self.DEFAULT_COMPRESS_CONFIG.copy()
-            self.compress_config.update(compress_config)
-
-            if not isinstance(self.compress_config["leave_last_n"], int) or self.compress_config["leave_last_n"] < 0:
-                raise ValueError("leave_last_n must be a non-negative integer.")
-
-            # convert trigger_count to int, default to 0.7
-            trigger_count = self.compress_config["trigger_count"]
-            if not (isinstance(trigger_count, int) or isinstance(trigger_count, float)) or trigger_count <= 0:
-                raise ValueError("trigger_count must be a positive number.")
-            if isinstance(trigger_count, float) and 0 < trigger_count <= 1:
-                self.compress_config["trigger_count"] = int(
-                    trigger_count * get_max_token_limit(self.llm_config["model"])
-                )
-                trigger_count = self.compress_config["trigger_count"]
-            init_count = self._compute_init_token_count()
-            if trigger_count < init_count:
-                print(
-                    f"Warning: trigger_count {trigger_count} is less than the initial token count {init_count} (system message + function description if passed), compression will be disabled. Please increase trigger_count if you want to enable compression."
-                )
-                self.compress_config = False
-
-            if self.compress_config["mode"] == "CUSTOMIZED" and self.compress_config["compress_function"] is None:
-                raise ValueError("compress_function must be provided when mode is CUSTOMIZED.")
-            if self.compress_config["mode"] != "CUSTOMIZED" and self.compress_config["compress_function"] is not None:
-                print("Warning: compress_function is provided but mode is not 'CUSTOMIZED'.")
-
-        else:
-            self.compress_config = False
-
-    def generate_reply(
-        self,
-        messages: Optional[List[Dict]] = None,
-        sender: Optional[Agent] = None,
-        exclude: Optional[List[Callable]] = None,
-    ) -> Union[str, Dict, None]:
-        """
-
-        Adding to line 202:
-        ```
-            if messages is not None and messages != self._oai_messages[sender]:
-                messages = self._oai_messages[sender]
-        ```
-        """
-        if all((messages is None, sender is None)):
-            error_msg = f"Either {messages=} or {sender=} must be provided."
-            logger.error(error_msg)
-            raise AssertionError(error_msg)
-
-        if messages is None:
-            messages = self._oai_messages[sender]
-
-        for reply_func_tuple in self._reply_func_list:
-            reply_func = reply_func_tuple["reply_func"]
-            if exclude and reply_func in exclude:
-                continue
-            if inspect.iscoroutinefunction(reply_func):
-                continue
-            if self._match_trigger(reply_func_tuple["trigger"], sender):
-                final, reply = reply_func(self, messages=messages, sender=sender, config=reply_func_tuple["config"])
-                if messages is not None and sender is not None and messages != self._oai_messages[sender]:
-                    messages = self._oai_messages[sender]
-                if final:
-                    return reply
-        return self._default_auto_reply
-
-    def _compute_init_token_count(self):
-        """Check if the agent is LLM-based and compute the initial token count."""
-        if self.llm_config is False:
-            return 0
-
-        func_count = 0
-        if "functions" in self.llm_config:
-            func_count = num_tokens_from_functions(self.llm_config["functions"], self.llm_config["model"])
-
-        return func_count + count_token(self._oai_system_message, self.llm_config["model"])
-
-    def _manage_history_on_token_limit(self, messages, token_used, max_token_allowed, model):
-        """Manage the message history with different modes when token limit is reached.
-        Return:
-            final (bool): whether to terminate the agent.
-            compressed_messages (List[Dict]): the compressed messages. None if no compression or compression failed.
-        """
-        # 1. mode = "TERMINATE", terminate the agent if no token left.
-        if self.compress_config["mode"] == "TERMINATE":
-            if max_token_allowed - token_used <= 0:
-                # Terminate if no token left.
-                print(
-                    colored(
-                        f'Warning: Terminate Agent "{self.name}" due to no token left for oai reply. max token for {model}: {max_token_allowed}, existing token count: {token_used}',
-                        "yellow",
-                    ),
-                    flush=True,
-                )
-                return True, None
-            return False, None
-
-        # if token_used is less than trigger_count, no compression will be used.
-        if token_used < self.compress_config["trigger_count"]:
-            return False, None
-
-        # 2. mode = "COMPRESS" or mode = "CUSTOMIZED", compress the messages
-        copied_messages = copy.deepcopy(messages)
-        if self.compress_config["mode"] == "COMPRESS":
-            _, compress_messages = self.compress_messages(copied_messages)
-        elif self.compress_config["mode"] == "CUSTOMIZED":
-            _, compress_messages = self.compress_config["compress_function"](copied_messages)
-        else:
-            raise ValueError(f"Unknown compression mode: {self.compress_config['mode']}")
-
-        if compress_messages is not None:
-            for i in range(len(compress_messages)):
-                compress_messages[i] = self._get_valid_oai_message(compress_messages[i])
-        return False, compress_messages
-
-    def _get_valid_oai_message(self, message):
-        """Convert a message into a valid OpenAI ChatCompletion message."""
-        oai_message = {k: message[k] for k in ("content", "function_call", "name", "context", "role") if k in message}
-        if "content" not in oai_message:
-            if "function_call" in oai_message:
-                oai_message["content"] = None  # if only function_call is provided, content will be set to None.
-            else:
-                raise ValueError(
-                    "Message can't be converted into a valid ChatCompletion message. Either content or function_call must be provided."
-                )
-        if "function_call" in oai_message:
-            oai_message["role"] = "assistant"  # only messages with role 'assistant' can have a function call.
-            oai_message["function_call"] = dict(oai_message["function_call"])
-        return oai_message
-
-    def _print_compress_info(self, init_token_count, token_used, token_after_compression):
-        to_print = "Token Count (including {} tokens from system msg and function descriptions). Before compression : {} | After: {}".format(
-            init_token_count,
-            token_used,
-            token_after_compression,
-        )
-        print(colored(to_print, "magenta"), flush=True)
-        print("-" * 80, flush=True)
-
-    def on_oai_token_limit(
-        self,
-        messages: Optional[List[Dict]] = None,
-        sender: Optional[Agent] = None,
-        config: Optional[Any] = None,
-    ) -> Tuple[bool, Union[str, Dict, None]]:
-        """(Experimental) Compress previous messages when a threshold of tokens is reached.
-
-        TODO: async compress
-        TODO: maintain a list for old oai messages (messages before compression)
-        """
-        llm_config = self.llm_config if config is None else config
-        if self.compress_config is False:
-            return False, None
-        if messages is None:
-            messages = self._oai_messages[sender]
-
-        model = llm_config["model"]
-        init_token_count = self._compute_init_token_count()
-        token_used = init_token_count + count_token(messages, model)
-        final, compressed_messages = self._manage_history_on_token_limit(
-            messages, token_used, get_max_token_limit(model), model
-        )
-
-        # update message history with compressed messages
-        if compressed_messages is not None:
-            self._print_compress_info(
-                init_token_count, token_used, count_token(compressed_messages, model) + init_token_count
-            )
-            self._oai_messages[sender] = compressed_messages
-            if self.compress_config["broadcast"]:
-                # update the compressed message history to sender
-                sender._oai_messages[self] = copy.deepcopy(compressed_messages)
-                # switching the role of the messages for the sender
-                for i in range(len(sender._oai_messages[self])):
-                    cmsg = sender._oai_messages[self][i]
-                    if "function_call" in cmsg or cmsg["role"] == "user":
-                        cmsg["role"] = "assistant"
-                    elif cmsg["role"] == "assistant":
-                        cmsg["role"] = "user"
-                    sender._oai_messages[self][i] = cmsg
-
-            # successfully compressed, return False, None for generate_oai_reply to be called with the updated messages
-            return False, None
-        return final, None
-
-    def compress_messages(
-        self,
-        messages: Optional[List[Dict]] = None,
-        config: Optional[Any] = None,
-    ) -> Tuple[bool, Union[str, Dict, None, List]]:
-        """Compress a list of messages into one message.
-
-        The first message (the initial prompt) will not be compressed.
-        The rest of the messages will be compressed into one message, the model is asked to distinguish the role of each message: USER, ASSISTANT, FUNCTION_CALL, FUNCTION_RETURN.
-        Check out the compress_sys_msg.
-
-        TODO: model used in compression agent is different from assistant agent: For example, if original model used by is gpt-4; we start compressing at 70% of usage, 70% of 8092 = 5664; and we use gpt 3.5 here max_toke = 4096, it will raise error. choosinng model automatically?
-        """
-        # 1. use the compression client
-        client = self.compress_client if config is None else config
-
-        # 2. stop if there is only one message in the list
-        leave_last_n = self.compress_config.get("leave_last_n", 0)
-        if leave_last_n + 1 >= len(messages):
-            logger.warning(
-                f"Warning: Compression skipped at trigger count threshold. The first msg and last {leave_last_n} msgs will not be compressed. current msg count: {len(messages)}. Consider raising trigger_count."
-            )
-            return False, None
-
-        # 3. put all history into one, except the first one
-        if self.compress_config["verbose"]:
-            print(colored("*" * 30 + "Start compressing the following content:" + "*" * 30, "magenta"), flush=True)
-
-        compressed_prompt = "Below is the compressed content from the previous conversation, evaluate the process and continue if necessary:\n"
-        chat_to_compress = "To be compressed:\n"
-
-        for m in messages[1 : len(messages) - leave_last_n]:  # 0, 1, 2, 3, 4
-            # Handle function role
-            if m.get("role") == "function":
-                chat_to_compress += f"##FUNCTION_RETURN## (from function \"{m['name']}\"): \n{m['content']}\n"
-
-            # If name exists in the message
-            elif "name" in m:
-                chat_to_compress += f"##{m['name']}({m['role'].upper()})## {m['content']}\n"
-
-            # Handle case where content is not None and name is absent
-            elif m.get("content"):  # This condition will also handle None and empty string
-                if compressed_prompt in m["content"]:
-                    chat_to_compress += m["content"].replace(compressed_prompt, "") + "\n"
-                else:
-                    chat_to_compress += f"##{m['role'].upper()}## {m['content']}\n"
-
-            # Handle function_call in the message
-            if "function_call" in m:
-                function_name = m["function_call"].get("name")
-                function_args = m["function_call"].get("arguments")
-
-                if not function_name or not function_args:
-                    chat_to_compress += f"##FUNCTION_CALL## {m['function_call']}\n"
-                else:
-                    chat_to_compress += f"##FUNCTION_CALL## \nName: {function_name}\nArgs: {function_args}\n"
-
-        chat_to_compress = [{"role": "user", "content": chat_to_compress}]
-
-        if self.compress_config["verbose"]:
-            print(chat_to_compress[0]["content"])
-
-        # 4. use LLM to compress
-        compress_sys_msg = """You are a helpful assistant that will summarize and compress conversation history.
-Rules:
-1. Please summarize each of the message and reserve the exact titles: ##USER##, ##ASSISTANT##, ##FUNCTION_CALL##, ##FUNCTION_RETURN##, ##SYSTEM##, ##<Name>(<Title>)## (e.g. ##Bob(ASSISTANT)##).
-2. Try to compress the content but reserve important information (a link, a specific number, etc.).
-3. Use words to summarize the code blocks or functions calls (##FUNCTION_CALL##) and their goals. For code blocks, please use ##CODE## to mark it.
-4. For returns from functions (##FUNCTION_RETURN##) or returns from code execution: summarize the content and indicate the status of the return (e.g. success, error, etc.).
-"""
-        try:
-            response = client.create(
-                context=None,
-                messages=[{"role": "system", "content": compress_sys_msg}] + chat_to_compress,
-            )
-        except Exception as e:
-            print(colored(f"Failed to compress the content due to {e}", "red"), flush=True)
-            return False, None
-
-        compressed_message = self.client.extract_text_or_completion_object(response)[0]
-        assert isinstance(compressed_message, str), f"compressed_message should be a string: {compressed_message}"
-        if self.compress_config["verbose"]:
-            print(
-                colored("*" * 30 + "Content after compressing:" + "*" * 30, "magenta"),
-                flush=True,
-            )
-            print(compressed_message, colored("\n" + "*" * 80, "magenta"))
-
-        # 5. add compressed message to the first message and return
-        return (
-            True,
-            [
-                messages[0],
-                {
-                    "content": compressed_prompt + compressed_message,
-                    "role": "system",
-                },
-            ]
-            + messages[len(messages) - leave_last_n :],
-        )
--- a/notebook/agentchat_capability_long_context_handling.ipynb
+++ b/notebook/agentchat_capability_long_context_handling.ipynb
--- a/notebook/agentchat_compression.ipynb
+++ b/notebook/agentchat_compression.ipynb
--- a/test/agentchat/contrib/capabilities/test_context_handling.py
+++ b/test/agentchat/contrib/capabilities/test_context_handling.py
@ -1,231 +0,0 @@
-#!/usr/bin/env python3 -m pytest
-
-import os
-import sys
-
-import pytest
-
-import autogen
-from autogen import AssistantAgent, UserProxyAgent, token_count_utils
-from autogen.agentchat.contrib.capabilities.context_handling import TransformChatHistory
-
-# from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
-
-sys.path.append(os.path.join(os.path.dirname(__file__), "../../.."))
-from conftest import skip_openai  # noqa: E402
-
-sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
-from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
-
-try:
-    from openai import OpenAI
-except ImportError:
-    skip = True
-else:
-    skip = False or skip_openai
-
-
-def test_transform_chat_history():
-    """
-    Test the TransformChatHistory capability.
-
-    In particular, test the following methods:
-    - _transform_messages
-    - truncate_string_to_tokens
-    """
-    messages = [
-        {"role": "system", "content": "System message"},
-        {"role": "user", "content": "Hi"},
-        {"role": "assistant", "content": "This is another test message"},
-    ]
-
-    # check whether num of messages is less than max_messages
-    transform_chat_history = TransformChatHistory(max_messages=1)
-    transformed_messages = transform_chat_history._transform_messages(messages)
-    assert len(transformed_messages) == 2  # System message and the last message
-
-    # check whether num of tokens per message are  is less than max_tokens
-    transform_chat_history = TransformChatHistory(max_tokens_per_message=5)
-    transformed_messages = transform_chat_history._transform_messages(messages)
-    for message in transformed_messages:
-        if message["role"] == "system":
-            continue
-        else:
-            assert token_count_utils.count_token(message["content"]) <= 5
-
-    transform_chat_history = TransformChatHistory(max_tokens=5)
-    transformed_messages = transform_chat_history._transform_messages(messages)
-
-    token_count = 0
-    for message in transformed_messages:
-        if message["role"] == "system":
-            continue
-        token_count += token_count_utils.count_token(message["content"])
-    assert token_count <= 5
-
-
-@pytest.mark.skipif(skip, reason="openai not installed OR requested to skip")
-def test_transform_chat_history_with_agents():
-    """
-    This test create a GPT 3.5 agent with this capability and test the add_to_agent method.
-    Including whether it prevents a crash when chat histories become excessively long.
-    """
-    config_list = autogen.config_list_from_json(
-        OAI_CONFIG_LIST,
-        KEY_LOC,
-        filter_dict={"tags": ["gpt-3.5-turbo"]},
-    )
-    assistant = AssistantAgent("assistant", llm_config={"config_list": config_list}, max_consecutive_auto_reply=1)
-    context_handling = TransformChatHistory(max_messages=10, max_tokens_per_message=5, max_tokens=1000)
-    context_handling.add_to_agent(assistant)
-    user = UserProxyAgent(
-        "user",
-        code_execution_config={"work_dir": "coding"},
-        human_input_mode="NEVER",
-        is_termination_msg=lambda x: "TERMINATE" in x.get("content", ""),
-        max_consecutive_auto_reply=1,
-    )
-
-    # Create a very long chat history that is bound to cause a crash
-    # for gpt 3.5
-    for i in range(1000):
-        assitant_msg = {"role": "assistant", "content": "test " * 1000}
-        user_msg = {"role": "user", "content": ""}
-
-        assistant.send(assitant_msg, user, request_reply=False)
-        user.send(user_msg, assistant, request_reply=False)
-
-    try:
-        user.initiate_chat(
-            assistant, message="Plot a chart of nvidia and tesla stock prices for the last 5 years", clear_history=False
-        )
-    except Exception as e:
-        assert False, f"Chat initiation failed with error {str(e)}"
-
-
-def test_transform_messages():
-    """
-    Test transform_messages_retain_order()
-    """
-    # Test case 1: Test that the order of messages is retained after transformation and Test that the messages are properly truncating.
-    messages = [
-        {"role": "system", "content": "System message"},
-        {"role": "user", "content": "Hi"},
-        {"role": "user", "content": "user sending the 2nd test message"},
-        {"role": "assistant", "content": "assistant sending the 3rd test message"},
-        {"role": "assistant", "content": "assistant sending the 4th test message"},
-    ]
-
-    transform_chat_history = TransformChatHistory(max_messages=3, max_tokens_per_message=10, max_tokens=100)
-    transformed_messages = transform_chat_history._transform_messages(messages)
-
-    assert transformed_messages[0]["role"] == "system"
-    assert transformed_messages[0]["content"] == "System message"
-    assert transformed_messages[1]["role"] == "user"
-    assert transformed_messages[1]["content"] == "user sending the 2nd test message"
-    assert transformed_messages[2]["role"] == "assistant"
-    assert transformed_messages[2]["content"] == "assistant sending the 3rd test message"
-    assert transformed_messages[3]["role"] == "assistant"
-    assert transformed_messages[3]["content"] == "assistant sending the 4th test message"
-
-    # Test case 2: Test when no system message
-    messages = [
-        {"role": "user", "content": "Hi"},
-        {"role": "user", "content": "user sending the 2nd test message"},
-        {"role": "assistant", "content": "assistant sending the 3rd test message"},
-        {"role": "assistant", "content": "assistant sending the 4th test message"},
-    ]
-
-    transform_chat_history = TransformChatHistory(max_messages=3, max_tokens_per_message=10, max_tokens=100)
-    transformed_messages = transform_chat_history._transform_messages(messages)
-
-    assert transformed_messages[0]["role"] == "user"
-    assert transformed_messages[0]["content"] == "user sending the 2nd test message"
-    assert transformed_messages[1]["role"] == "assistant"
-    assert transformed_messages[1]["content"] == "assistant sending the 3rd test message"
-    assert transformed_messages[2]["role"] == "assistant"
-    assert transformed_messages[2]["content"] == "assistant sending the 4th test message"
-
-    messages = [
-        {"role": "user", "content": "Out of max messages"},
-        {"role": "assistant", "content": "first second third fourth"},
-        {"role": "user", "content": "a"},
-    ]
-    print(f"----Messages (N={len(messages)})----")
-    orignal_tokens = 0
-    for i, msg in enumerate(messages):
-        print(f"[{msg['role']}-{i}]: {msg['content']}")
-        tokens = token_count_utils.count_token(msg["content"])
-        print("Number of tokens: ", tokens)
-        orignal_tokens += tokens
-    print("-----Total tokens: ", orignal_tokens, "-----")
-
-    allowed_max_tokens = 2
-    transform_chat_history = TransformChatHistory(max_messages=2, max_tokens=allowed_max_tokens)
-    transformed_messages = transform_chat_history._transform_messages(messages)
-
-    print("Max allowed tokens: ", allowed_max_tokens)
-
-    print("Transformed contents")
-    for msg in transformed_messages:
-        print(msg["content"])
-        print("Number of tokens: ", token_count_utils.count_token(msg["content"]))
-    assert len(transformed_messages) == 1
-    assert transformed_messages[0]["role"] == "user"
-
-
-def test_truncate_str_to_tokens():
-    """
-    Test the truncate_str_to_tokens function.
-    """
-    from autogen.agentchat.contrib.capabilities.context_handling import truncate_str_to_tokens
-
-    # Test case 1: Truncate string with fewer tokens than max_tokens
-    text = "This is a test"
-    max_tokens = 5
-    truncated_text = truncate_str_to_tokens(text, max_tokens)
-    assert truncated_text == text
-
-    # Test case 2: Truncate string with more tokens than max_tokens
-    text = "This is a test"
-    max_tokens = 3
-    truncated_text = truncate_str_to_tokens(text, max_tokens)
-    assert truncated_text == "This is a"
-
-    # Test case 3: Truncate empty string
-    text = ""
-    max_tokens = 5
-    truncated_text = truncate_str_to_tokens(text, max_tokens)
-    assert truncated_text == ""
-
-    # Test case 4: Truncate string with exact number of tokens as max_tokens
-    text = "This is a test"
-    max_tokens = 4
-    truncated_text = truncate_str_to_tokens(text, max_tokens)
-    assert truncated_text == "This is a test"
-
-    # Test case 5: Truncate string with no tokens found
-    text = "This is a test"
-    max_tokens = 0
-    truncated_text = truncate_str_to_tokens(text, max_tokens)
-    assert truncated_text == ""
-
-    # Test case 6: Truncate string when actual tokens are more than max_tokens
-    text = "This is a test with a looooooonngggg word"
-    max_tokens = 8
-    truncated_text = truncate_str_to_tokens(text, max_tokens)
-    word_count = len(truncated_text.split())
-    assert word_count <= max_tokens
-
-    # Test case 7: Truncate string with exact number of tokens as max_tokens
-    text = "This\nis\na test"
-    max_tokens = 4
-    truncated_text = truncate_str_to_tokens(text, max_tokens)
-    assert "This\nis" in truncated_text
-
-
-if __name__ == "__main__":
-    test_transform_chat_history()
-    test_transform_chat_history_with_agents()
-    test_truncate_str_to_tokens()
-    test_transform_messages()
--- a/test/agentchat/contrib/test_compressible_agent.py
+++ b/test/agentchat/contrib/test_compressible_agent.py
@ -1,230 +0,0 @@
-#!/usr/bin/env python3 -m pytest
-
-import os
-import sys
-
-import pytest
-
-import autogen
-from autogen.agentchat.contrib.compressible_agent import CompressibleAgent
-
-sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
-from conftest import skip_openai  # noqa: E402
-
-here = os.path.abspath(os.path.dirname(__file__))
-
-sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
-from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
-
-try:
-    import openai
-except ImportError:
-    skip = True
-else:
-    skip = False or skip_openai
-
-if not skip:
-    config_list = autogen.config_list_from_json(
-        OAI_CONFIG_LIST,
-        file_location=KEY_LOC,
-        filter_dict={
-            "model": ["gpt-3.5-turbo", "gpt-35-turbo", "gpt-3.5-turbo-16k", "gpt-35-turbo-16k"],
-        },
-    )
-
-
-@pytest.mark.skipif(
-    sys.platform in ["darwin", "win32"] or skip,
-    reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
-)
-def test_mode_compress():
-    conversations = {}
-
-    assistant = CompressibleAgent(
-        name="assistant",
-        llm_config={
-            "timeout": 600,
-            "cache_seed": 43,
-            "config_list": config_list,
-            "model": "gpt-3.5-turbo",
-        },
-        compress_config={
-            "mode": "COMPRESS",
-            "trigger_count": 600,
-            "verbose": True,
-        },
-    )
-
-    user_proxy = autogen.UserProxyAgent(
-        name="user_proxy",
-        human_input_mode="NEVER",
-        max_consecutive_auto_reply=5,
-        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE")
-        or x.get("content", "").rstrip().endswith("TERMINATE."),
-        code_execution_config={"work_dir": here},
-    )
-
-    user_proxy.initiate_chat(
-        assistant,
-        message="Find all $x$ that satisfy the inequality $(2x+10)(x+3)<(3x+9)(x+8)$. Express your answer in interval notation.",
-    )
-
-    assistant.reset()
-    print(conversations)
-
-
-@pytest.mark.skipif(
-    sys.platform in ["darwin", "win32"] or skip,
-    reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
-)
-def test_mode_customized():
-    try:
-        assistant = CompressibleAgent(
-            name="assistant",
-            llm_config={
-                "timeout": 600,
-                "cache_seed": 43,
-                "config_list": config_list,
-                "model": "gpt-3.5-turbo",
-            },
-            compress_config={
-                "mode": "CUSTOMIZED",
-            },
-        )
-    except ValueError:
-        print("ValueError raised as expected.")
-
-    def constrain_num_messages(messages):
-        """Constrain the number of messages to 3.
-
-        This is an example of a customized compression function.
-
-        Returns:
-            bool: whether the compression is successful.
-            list: the compressed messages.
-        """
-        if len(messages) <= 3:
-            # do nothing
-            return False, None
-
-        # save the first and last two messages
-        return True, messages[:1] + messages[-2:]
-
-    # create a CompressibleAgent instance named "assistant"
-    assistant = CompressibleAgent(
-        name="assistant",
-        llm_config={
-            "timeout": 600,
-            "cache_seed": 43,
-            "config_list": config_list,
-            "model": "gpt-3.5-turbo",
-        },
-        compress_config={
-            "mode": "CUSTOMIZED",
-            "compress_function": constrain_num_messages,  # this is required for customized compression
-            "trigger_count": 1000,
-        },
-    )
-
-    # create a UserProxyAgent instance named "user_proxy"
-    user_proxy = autogen.UserProxyAgent(
-        name="user_proxy",
-        human_input_mode="NEVER",
-        max_consecutive_auto_reply=5,
-        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE")
-        or x.get("content", "").rstrip().endswith("TERMINATE."),
-        code_execution_config={"work_dir": "web"},
-        system_message="""Reply TERMINATE if the task has been solved at full satisfaction.
-    Otherwise, reply CONTINUE, or the reason why the task is not solved yet.""",
-    )
-
-    user_proxy.initiate_chat(
-        assistant,
-        message="""Show me the YTD gain of 10 largest technology companies as of today.""",
-    )
-
-
-@pytest.mark.skipif(
-    sys.platform in ["darwin", "win32"] or skip,
-    reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
-)
-def test_compress_message():
-    assistant = CompressibleAgent(
-        name="assistant",
-        llm_config={
-            "timeout": 600,
-            "cache_seed": 43,
-            "config_list": config_list,
-            "model": "gpt-3.5-turbo",
-        },
-        compress_config={
-            "mode": "COMPRESS",
-            "trigger_count": 600,
-            "verbose": True,
-            "leave_last_n": 0,
-        },
-    )
-
-    assert assistant.compress_messages([{"content": "hello world", "role": "user"}]) == (
-        False,
-        None,
-    ), "Single message should not be compressed"
-
-    is_success, _ = assistant.compress_messages(
-        [
-            {"content": "Hello!", "role": "user"},
-            {"content": "How can I help you today?", "role": "assistant"},
-            {"content": "Can you tell me a joke about programming?", "role": "assistant"},
-        ]
-    )
-    assert is_success, "Compression failed."
-
-
-@pytest.mark.skipif(True, reason="Flaky test, CompressibleAgent no longer supported")
-def test_mode_terminate():
-    assistant = CompressibleAgent(
-        name="assistant",
-        llm_config={
-            "timeout": 600,
-            "cache_seed": 43,
-            "config_list": config_list,
-            "model": "gpt-3.5-turbo",
-        },
-        compress_config=True,
-    )
-
-    user_proxy = autogen.UserProxyAgent(
-        name="user_proxy",
-        is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"),
-        human_input_mode="NEVER",
-        max_consecutive_auto_reply=5,
-        code_execution_config={"work_dir": "coding"},
-    )
-
-    final, _ = assistant.on_oai_token_limit(
-        [
-            {"content": "Hello!", "role": "user"},
-            {"content": "How can I help you today?", "role": "assistant"},
-            {"content": "1&" * 5000, "role": "assistant"},
-        ],
-        sender=user_proxy,
-    )
-    assert final, "Terminating the conversation at max token limit is not working."
-
-
-@pytest.mark.skipif(
-    sys.platform in ["darwin", "win32"] or skip,
-    reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
-)
-def test_new_compressible_agent_description():
-    assistant = CompressibleAgent(name="assistant", description="this is a description", llm_config=False)
-
-    assert assistant.description == "this is a description", "description is not set correctly"
-
-
-if __name__ == "__main__":
-    # test_mode_compress()
-    # test_mode_customized()
-    # test_compress_message()
-    # test_mode_terminate()
-    test_new_compressible_agent_description()
--- a/website/docs/FAQ.mdx
+++ b/website/docs/FAQ.mdx
@ -259,16 +259,6 @@ user_proxy = autogen.UserProxyAgent(
    code_execution_config={"work_dir":"coding", "use_docker":False})
 ```

-## Migrating from `CompressibleAgent` and `TransformChatHistory` to `TransformMessages`
-
-### Why migrate to `TransformMessages`?
-
-Migrating enhances flexibility, modularity, and customization in handling chat message transformations. `TransformMessages` introduces an improved, extensible approach for pre-processing messages for conversational agents.
-
-### How to migrate?
-
-To ensure a smooth migration process, simply follow the detailed guide provided in [Introduction to TransformMessages](/docs/topics/handling_long_contexts/intro_to_transform_messages.md).
-
 ### What should I do if I get the error "TypeError: Assistants.create() got an unexpected keyword argument 'file_ids'"?

 This error typically occurs when using Autogen version earlier than 0.2.27 in combination with OpenAI library version 1.21 or later. The issue arises because the older version of Autogen does not support the file_ids parameter used by newer versions of the OpenAI API.