mirror of https://github.com/microsoft/autogen.git
Add a web surfer agent that can search and browse the web. (#1093)
* Initial commit of WebSurfer. Adds the browser_utils, and related tests. WebSurfer will be added in a subsequent commit. * Added the web surfer agent, and related tests. * Added a notebook to show how WebSurferAgent works. * Fixed a typo. * Updated test_web_surfer for compatibility with #1110. * Updated skip_oai logic. * Fixed code formatting. * More pre-commit fixes. * Added block to contrib-openai.yml * Added block to contrib-openai.yml * Added hook for BING_API_KEY * Temporarily commented out other tests, per request. * Fixed indentation (maybe?) * Restoring contrib-openai.yml
This commit is contained in:
parent
ca56782a7f
commit
708eb4d884
|
@ -217,3 +217,42 @@ jobs:
|
|||
with:
|
||||
file: ./coverage.xml
|
||||
flags: unittests
|
||||
WebSurfer:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
python-version: ["3.11"]
|
||||
runs-on: ${{ matrix.os }}
|
||||
environment: openai1
|
||||
steps:
|
||||
# checkout to pr branch
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install packages and dependencies
|
||||
run: |
|
||||
docker --version
|
||||
python -m pip install --upgrade pip wheel
|
||||
pip install -e .[websurfer]
|
||||
python -c "import autogen"
|
||||
pip install coverage pytest
|
||||
- name: Coverage
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
|
||||
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
|
||||
OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
|
||||
BING_API_KEY: ${{ secrets.BING_API_KEY }}
|
||||
run: |
|
||||
coverage run -a -m pytest test/agentchat/contrib/test_web_surfer.py
|
||||
coverage xml
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
file: ./coverage.xml
|
||||
flags: unittests
|
||||
|
|
|
@ -196,6 +196,49 @@ jobs:
|
|||
file: ./coverage.xml
|
||||
flags: unittests
|
||||
|
||||
WebSurfer:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
python-version: ["3.8", "3.9", "3.10", "3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install packages and dependencies for all tests
|
||||
run: |
|
||||
python -m pip install --upgrade pip wheel
|
||||
pip install pytest
|
||||
- name: Install packages and dependencies for WebSurfer
|
||||
run: |
|
||||
pip install -e .[websurfer]
|
||||
- name: Set AUTOGEN_USE_DOCKER based on OS
|
||||
shell: bash
|
||||
run: |
|
||||
if [[ ${{ matrix.os }} != ubuntu-latest ]]; then
|
||||
echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV
|
||||
fi
|
||||
- name: Test WebSurfer
|
||||
if: matrix.python-version != '3.10' # diversify the python versions
|
||||
run: |
|
||||
pytest test/test_browser_utils.py test/agentchat/contrib/test_web_surfer.py --skip-openai
|
||||
- name: Coverage
|
||||
if: matrix.python-version == '3.10'
|
||||
run: |
|
||||
pip install coverage>=5.3
|
||||
coverage run -a -m pytest test/test_browser_utils.py test/agentchat/contrib/test_web_surfer.py --skip-openai
|
||||
coverage xml
|
||||
- name: Upload coverage to Codecov
|
||||
if: matrix.python-version == '3.10'
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
file: ./coverage.xml
|
||||
flags: unittests
|
||||
|
||||
LMMTest:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
|
|
|
@ -0,0 +1,356 @@
|
|||
import json
|
||||
import copy
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Optional, Union, Callable, Literal, Tuple
|
||||
from autogen import Agent, ConversableAgent, AssistantAgent, UserProxyAgent, GroupChatManager, GroupChat, OpenAIWrapper
|
||||
from autogen.browser_utils import SimpleTextBrowser
|
||||
from autogen.code_utils import content_str
|
||||
from datetime import datetime
|
||||
from autogen.token_count_utils import count_token, get_max_token_limit
|
||||
from autogen.oai.openai_utils import filter_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WebSurferAgent(ConversableAgent):
|
||||
"""(In preview) An agent that acts as a basic web surfer that can search the web and visit web pages."""
|
||||
|
||||
DEFAULT_PROMPT = (
|
||||
"You are a helpful AI assistant with access to a web browser (via the provided functions). In fact, YOU ARE THE ONLY MEMBER OF YOUR PARTY WITH ACCESS TO A WEB BROWSER, so please help out where you can by performing web searches, navigating pages, and reporting what you find. Today's date is "
|
||||
+ datetime.now().date().isoformat()
|
||||
)
|
||||
|
||||
DEFAULT_DESCRIPTION = "A helpful assistant with access to a web browser. Ask them to perform web searches, open pages, navigate to Wikipedia, answer questions from pages, and or generate summaries."
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name,
|
||||
system_message: Optional[Union[str, List]] = DEFAULT_PROMPT,
|
||||
description: Optional[str] = DEFAULT_DESCRIPTION,
|
||||
is_termination_msg: Optional[Callable[[Dict], bool]] = None,
|
||||
max_consecutive_auto_reply: Optional[int] = None,
|
||||
human_input_mode: Optional[str] = "TERMINATE",
|
||||
function_map: Optional[Dict[str, Callable]] = None,
|
||||
code_execution_config: Optional[Union[Dict, Literal[False]]] = None,
|
||||
llm_config: Optional[Union[Dict, Literal[False]]] = None,
|
||||
summarizer_llm_config: Optional[Union[Dict, Literal[False]]] = None,
|
||||
default_auto_reply: Optional[Union[str, Dict, None]] = "",
|
||||
browser_config: Optional[Union[Dict, None]] = None,
|
||||
):
|
||||
super().__init__(
|
||||
name=name,
|
||||
system_message=system_message,
|
||||
description=description,
|
||||
is_termination_msg=is_termination_msg,
|
||||
max_consecutive_auto_reply=max_consecutive_auto_reply,
|
||||
human_input_mode=human_input_mode,
|
||||
function_map=function_map,
|
||||
code_execution_config=code_execution_config,
|
||||
llm_config=llm_config,
|
||||
default_auto_reply=default_auto_reply,
|
||||
)
|
||||
|
||||
# If the summarizer_llm_config is None, we copy it from the llm_config
|
||||
if summarizer_llm_config is None:
|
||||
if llm_config is None: # Nothing to copy
|
||||
self.summarizer_llm_config = None
|
||||
elif llm_config is False: # LLMs disabled
|
||||
self.summarizer_llm_config = False
|
||||
else: # Create a suitable config
|
||||
self.summarizer_llm_config = copy.deepcopy(llm_config)
|
||||
if "config_list" in self.summarizer_llm_config:
|
||||
preferred_models = filter_config(
|
||||
self.summarizer_llm_config["config_list"],
|
||||
{"model": ["gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-16k"]},
|
||||
)
|
||||
if len(preferred_models) == 0:
|
||||
logger.warning(
|
||||
"The summarizer did not find the preferred model (gpt-3.5-turbo-16k) in the config list. "
|
||||
"Semantic operations on webpages (summarization or Q&A) might be costly or ineffective."
|
||||
)
|
||||
else:
|
||||
self.summarizer_llm_config["config_list"] = preferred_models
|
||||
else:
|
||||
self.summarizer_llm_config = summarizer_llm_config
|
||||
|
||||
# Create the summarizer client
|
||||
self.summarization_client = None
|
||||
if self.summarizer_llm_config is not False:
|
||||
self.summarization_client = OpenAIWrapper(**self.summarizer_llm_config)
|
||||
|
||||
# Create the browser
|
||||
if browser_config is None:
|
||||
self.browser = SimpleTextBrowser()
|
||||
else:
|
||||
self.browser = SimpleTextBrowser(**browser_config)
|
||||
|
||||
# Create a copy of the llm_config for the inner monologue agents to use, and set them up with function calling
|
||||
if llm_config is None: # Nothing to copy
|
||||
inner_llm_config = None
|
||||
elif llm_config is False: # LLMs disabled
|
||||
inner_llm_config = False
|
||||
else:
|
||||
inner_llm_config = copy.deepcopy(llm_config)
|
||||
inner_llm_config["functions"] = [
|
||||
{
|
||||
"name": "informational_web_search",
|
||||
"description": "Perform an INFORMATIONAL web search query then return the search results.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The informational web search query to perform.",
|
||||
}
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
},
|
||||
{
|
||||
"name": "navigational_web_search",
|
||||
"description": "Perform a NAVIGATIONAL web search query then immediately navigate to the top result. Useful, for example, to navigate to a particular Wikipedia article or other known destination. Equivalent to Google's \"I'm Feeling Lucky\" button.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The navigational web search query to perform.",
|
||||
}
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
},
|
||||
{
|
||||
"name": "visit_page",
|
||||
"description": "Visit a webpage at a given URL and return its text.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "The relative or absolute url of the webapge to visit.",
|
||||
}
|
||||
},
|
||||
},
|
||||
"required": ["url"],
|
||||
},
|
||||
{
|
||||
"name": "page_up",
|
||||
"description": "Scroll the viewport UP one page-length in the current webpage and return the new viewport content.",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
"required": [],
|
||||
},
|
||||
{
|
||||
"name": "page_down",
|
||||
"description": "Scroll the viewport DOWN one page-length in the current webpage and return the new viewport content.",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
"required": [],
|
||||
},
|
||||
]
|
||||
|
||||
# Enable semantic operations
|
||||
if self.summarization_client is not None:
|
||||
inner_llm_config["functions"].append(
|
||||
{
|
||||
"name": "answer_from_page",
|
||||
"description": "Uses AI to read the page and directly answer a given question based on the content.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question to directly answer.",
|
||||
},
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "[Optional] The url of the page. (Defaults to the current page)",
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": ["question"],
|
||||
}
|
||||
)
|
||||
inner_llm_config["functions"].append(
|
||||
{
|
||||
"name": "summarize_page",
|
||||
"description": "Uses AI to summarize the content found at a given url. If the url is not provided, the current page is summarized.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "[Optional] The url of the page to summarize. (Defaults to current page)",
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": [],
|
||||
}
|
||||
)
|
||||
|
||||
# Set up the inner monologue
|
||||
self._assistant = AssistantAgent(
|
||||
self.name + "_inner_assistant",
|
||||
system_message=system_message,
|
||||
llm_config=inner_llm_config,
|
||||
is_termination_msg=lambda m: False,
|
||||
)
|
||||
|
||||
self._user_proxy = UserProxyAgent(
|
||||
self.name + "_inner_user_proxy",
|
||||
human_input_mode="NEVER",
|
||||
code_execution_config=False,
|
||||
default_auto_reply="",
|
||||
is_termination_msg=lambda m: False,
|
||||
)
|
||||
|
||||
# Helper functions
|
||||
def _browser_state():
|
||||
header = f"Address: {self.browser.address}\n"
|
||||
if self.browser.page_title is not None:
|
||||
header += f"Title: {self.browser.page_title}\n"
|
||||
|
||||
current_page = self.browser.viewport_current_page
|
||||
total_pages = len(self.browser.viewport_pages)
|
||||
|
||||
header += f"Viewport position: Showing page {current_page+1} of {total_pages}.\n"
|
||||
return (header, self.browser.viewport)
|
||||
|
||||
def _informational_search(query):
|
||||
self.browser.visit_page(f"bing: {query}")
|
||||
header, content = _browser_state()
|
||||
return header.strip() + "\n=======================\n" + content
|
||||
|
||||
def _navigational_search(query):
|
||||
self.browser.visit_page(f"bing: {query}")
|
||||
|
||||
# Extract the first linl
|
||||
m = re.search(r"\[.*?\]\((http.*?)\)", self.browser.page_content)
|
||||
if m:
|
||||
self.browser.visit_page(m.group(1))
|
||||
|
||||
# Return where we ended up
|
||||
header, content = _browser_state()
|
||||
return header.strip() + "\n=======================\n" + content
|
||||
|
||||
def _visit_page(url):
|
||||
self.browser.visit_page(url)
|
||||
header, content = _browser_state()
|
||||
return header.strip() + "\n=======================\n" + content
|
||||
|
||||
def _page_up():
|
||||
self.browser.page_up()
|
||||
header, content = _browser_state()
|
||||
return header.strip() + "\n=======================\n" + content
|
||||
|
||||
def _page_down():
|
||||
self.browser.page_down()
|
||||
header, content = _browser_state()
|
||||
return header.strip() + "\n=======================\n" + content
|
||||
|
||||
def _summarize_page(question, url):
|
||||
if url is not None and url != self.browser.address:
|
||||
self.browser.visit_page(url)
|
||||
|
||||
# We are likely going to need to fix this later, but summarize only as many tokens that fit in the buffer
|
||||
limit = 4096
|
||||
try:
|
||||
limit = get_max_token_limit(self.summarizer_llm_config["config_list"][0]["model"])
|
||||
except ValueError:
|
||||
pass # limit is unknown
|
||||
except TypeError:
|
||||
pass # limit is unknown
|
||||
|
||||
if limit < 16000:
|
||||
logger.warning(
|
||||
f"The token limit ({limit}) of the WebSurferAgent.summarizer_llm_config, is below the recommended 16k."
|
||||
)
|
||||
|
||||
buffer = ""
|
||||
for line in re.split(r"([\r\n]+)", self.browser.page_content):
|
||||
tokens = count_token(buffer + line)
|
||||
if tokens + 1024 > limit: # Leave room for our summary
|
||||
break
|
||||
buffer += line
|
||||
|
||||
buffer = buffer.strip()
|
||||
if len(buffer) == 0:
|
||||
return "Nothing to summarize."
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant that can summarize long documents to answer question.",
|
||||
}
|
||||
]
|
||||
|
||||
prompt = f"Please summarize the following into one or two paragraph:\n\n{buffer}"
|
||||
if question is not None:
|
||||
prompt = f"Please summarize the following into one or two paragraphs with respect to '{question}':\n\n{buffer}"
|
||||
|
||||
messages.append(
|
||||
{"role": "user", "content": prompt},
|
||||
)
|
||||
|
||||
response = self.summarization_client.create(context=None, messages=messages)
|
||||
extracted_response = self.summarization_client.extract_text_or_completion_object(response)[0]
|
||||
return str(extracted_response)
|
||||
|
||||
self._user_proxy.register_function(
|
||||
function_map={
|
||||
"informational_web_search": lambda query: _informational_search(query),
|
||||
"navigational_web_search": lambda query: _navigational_search(query),
|
||||
"visit_page": lambda url: _visit_page(url),
|
||||
"page_up": lambda: _page_up(),
|
||||
"page_down": lambda: _page_down(),
|
||||
"answer_from_page": lambda question=None, url=None: _summarize_page(question, url),
|
||||
"summarize_page": lambda question=None, url=None: _summarize_page(None, url),
|
||||
}
|
||||
)
|
||||
|
||||
self._reply_func_list = []
|
||||
self.register_reply([Agent, None], WebSurferAgent.generate_surfer_reply)
|
||||
self.register_reply([Agent, None], ConversableAgent.generate_code_execution_reply)
|
||||
self.register_reply([Agent, None], ConversableAgent.generate_function_call_reply)
|
||||
self.register_reply([Agent, None], ConversableAgent.check_termination_and_human_reply)
|
||||
|
||||
def generate_surfer_reply(
|
||||
self,
|
||||
messages: Optional[List[Dict]] = None,
|
||||
sender: Optional[Agent] = None,
|
||||
config: Optional[OpenAIWrapper] = None,
|
||||
) -> Tuple[bool, Union[str, Dict, None]]:
|
||||
"""Generate a reply using autogen.oai."""
|
||||
if messages is None:
|
||||
messages = self._oai_messages[sender]
|
||||
|
||||
self._user_proxy.reset()
|
||||
self._assistant.reset()
|
||||
|
||||
# Clone the messages to give context
|
||||
self._assistant.chat_messages[self._user_proxy] = list()
|
||||
history = messages[0 : len(messages) - 1]
|
||||
for message in history:
|
||||
self._assistant.chat_messages[self._user_proxy].append(message)
|
||||
|
||||
# Remind the agent where it is
|
||||
self._user_proxy.send(
|
||||
f"Your browser is currently open to the page '{self.browser.page_title}' at the address '{self.browser.address}'.",
|
||||
self._assistant,
|
||||
request_reply=False,
|
||||
silent=True,
|
||||
)
|
||||
|
||||
self._user_proxy.send(messages[-1]["content"], self._assistant, request_reply=True, silent=True)
|
||||
agent_reply = self._user_proxy.chat_messages[self._assistant][-1]
|
||||
# print("Agent Reply: " + str(agent_reply))
|
||||
proxy_reply = self._user_proxy.generate_reply(
|
||||
messages=self._user_proxy.chat_messages[self._assistant], sender=self._assistant
|
||||
)
|
||||
# print("Proxy Reply: " + str(proxy_reply))
|
||||
|
||||
if proxy_reply == "": # Was the default reply
|
||||
return True, None if agent_reply is None else agent_reply["content"]
|
||||
else:
|
||||
return True, None if proxy_reply is None else proxy_reply["content"]
|
|
@ -0,0 +1,283 @@
|
|||
import json
|
||||
import os
|
||||
import requests
|
||||
import re
|
||||
import markdownify
|
||||
import io
|
||||
import uuid
|
||||
import mimetypes
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from bs4 import BeautifulSoup
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Optional, Union, Callable, Literal, Tuple
|
||||
|
||||
# Optional PDF support
|
||||
IS_PDF_CAPABLE = False
|
||||
try:
|
||||
import pdfminer
|
||||
import pdfminer.high_level
|
||||
|
||||
IS_PDF_CAPABLE = True
|
||||
except ModuleNotFoundError:
|
||||
pass
|
||||
|
||||
# Other optional dependencies
|
||||
try:
|
||||
import pathvalidate
|
||||
except ModuleNotFoundError:
|
||||
pass
|
||||
|
||||
|
||||
class SimpleTextBrowser:
|
||||
"""(In preview) An extremely simple text-based web browser comparable to Lynx. Suitable for Agentic use."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
start_page: Optional[str] = "about:blank",
|
||||
viewport_size: Optional[int] = 1024 * 8,
|
||||
downloads_folder: Optional[Union[str, None]] = None,
|
||||
bing_api_key: Optional[Union[str, None]] = None,
|
||||
request_kwargs: Optional[Union[Dict, None]] = None,
|
||||
):
|
||||
self.start_page = start_page
|
||||
self.viewport_size = viewport_size # Applies only to the standard uri types
|
||||
self.downloads_folder = downloads_folder
|
||||
self.history = list()
|
||||
self.page_title = None
|
||||
self.viewport_current_page = 0
|
||||
self.viewport_pages = list()
|
||||
self.set_address(start_page)
|
||||
self.bing_api_key = bing_api_key
|
||||
self.request_kwargs = request_kwargs
|
||||
|
||||
self._page_content = ""
|
||||
|
||||
@property
|
||||
def address(self) -> str:
|
||||
"""Return the address of the current page."""
|
||||
return self.history[-1]
|
||||
|
||||
def set_address(self, uri_or_path):
|
||||
self.history.append(uri_or_path)
|
||||
|
||||
# Handle special URIs
|
||||
if uri_or_path == "about:blank":
|
||||
self._set_page_content("")
|
||||
elif uri_or_path.startswith("bing:"):
|
||||
self._bing_search(uri_or_path[len("bing:") :].strip())
|
||||
else:
|
||||
if not uri_or_path.startswith("http:") and not uri_or_path.startswith("https:"):
|
||||
uri_or_path = urljoin(self.address, uri_or_path)
|
||||
self.history[-1] = uri_or_path # Update the address with the fully-qualified path
|
||||
self._fetch_page(uri_or_path)
|
||||
|
||||
self.viewport_current_page = 0
|
||||
|
||||
@property
|
||||
def viewport(self) -> str:
|
||||
"""Return the content of the current viewport."""
|
||||
bounds = self.viewport_pages[self.viewport_current_page]
|
||||
return self.page_content[bounds[0] : bounds[1]]
|
||||
|
||||
@property
|
||||
def page_content(self) -> str:
|
||||
"""Return the full contents of the current page."""
|
||||
return self._page_content
|
||||
|
||||
def _set_page_content(self, content) -> str:
|
||||
"""Sets the text content of the current page."""
|
||||
self._page_content = content
|
||||
self._split_pages()
|
||||
if self.viewport_current_page >= len(self.viewport_pages):
|
||||
self.viewport_current_page = len(self.viewport_pages) - 1
|
||||
|
||||
def page_down(self):
|
||||
self.viewport_current_page = min(self.viewport_current_page + 1, len(self.viewport_pages) - 1)
|
||||
|
||||
def page_up(self):
|
||||
self.viewport_current_page = max(self.viewport_current_page - 1, 0)
|
||||
|
||||
def visit_page(self, path_or_uri):
|
||||
"""Update the address, visit the page, and return the content of the viewport."""
|
||||
self.set_address(path_or_uri)
|
||||
return self.viewport
|
||||
|
||||
def _split_pages(self):
|
||||
# Split only regular pages
|
||||
if not self.address.startswith("http:") and not self.address.startswith("https:"):
|
||||
self.viewport_pages = [(0, len(self._page_content))]
|
||||
return
|
||||
|
||||
# Handle empty pages
|
||||
if len(self._page_content) == 0:
|
||||
self.viewport_pages = [(0, 0)]
|
||||
return
|
||||
|
||||
# Break the viewport into pages
|
||||
self.viewport_pages = []
|
||||
start_idx = 0
|
||||
while start_idx < len(self._page_content):
|
||||
end_idx = min(start_idx + self.viewport_size, len(self._page_content))
|
||||
# Adjust to end on a space
|
||||
while end_idx < len(self._page_content) and self._page_content[end_idx - 1] not in [" ", "\t", "\r", "\n"]:
|
||||
end_idx += 1
|
||||
self.viewport_pages.append((start_idx, end_idx))
|
||||
start_idx = end_idx
|
||||
|
||||
def _bing_api_call(self, query):
|
||||
# Make sure the key was set
|
||||
if self.bing_api_key is None:
|
||||
raise ValueError("Missing Bing API key.")
|
||||
|
||||
# Prepare the request parameters
|
||||
request_kwargs = self.request_kwargs.copy() if self.request_kwargs is not None else {}
|
||||
|
||||
if "headers" not in request_kwargs:
|
||||
request_kwargs["headers"] = {}
|
||||
request_kwargs["headers"]["Ocp-Apim-Subscription-Key"] = self.bing_api_key
|
||||
|
||||
if "params" not in request_kwargs:
|
||||
request_kwargs["params"] = {}
|
||||
request_kwargs["params"]["q"] = query
|
||||
request_kwargs["params"]["textDecorations"] = False
|
||||
request_kwargs["params"]["textFormat"] = "raw"
|
||||
|
||||
request_kwargs["stream"] = False
|
||||
|
||||
# Make the request
|
||||
response = requests.get("https://api.bing.microsoft.com/v7.0/search", **request_kwargs)
|
||||
response.raise_for_status()
|
||||
results = response.json()
|
||||
|
||||
return results
|
||||
|
||||
def _bing_search(self, query):
|
||||
results = self._bing_api_call(query)
|
||||
|
||||
web_snippets = list()
|
||||
idx = 0
|
||||
for page in results["webPages"]["value"]:
|
||||
idx += 1
|
||||
web_snippets.append(f"{idx}. [{page['name']}]({page['url']})\n{page['snippet']}")
|
||||
if "deepLinks" in page:
|
||||
for dl in page["deepLinks"]:
|
||||
idx += 1
|
||||
web_snippets.append(
|
||||
f"{idx}. [{dl['name']}]({dl['url']})\n{dl['snippet'] if 'snippet' in dl else ''}"
|
||||
)
|
||||
|
||||
news_snippets = list()
|
||||
if "news" in results:
|
||||
for page in results["news"]["value"]:
|
||||
idx += 1
|
||||
news_snippets.append(f"{idx}. [{page['name']}]({page['url']})\n{page['description']}")
|
||||
|
||||
self.page_title = f"{query} - Search"
|
||||
|
||||
content = (
|
||||
f"A Bing search for '{query}' found {len(web_snippets) + len(news_snippets)} results:\n\n## Web Results\n"
|
||||
+ "\n\n".join(web_snippets)
|
||||
)
|
||||
if len(news_snippets) > 0:
|
||||
content += "\n\n## News Results:\n" + "\n\n".join(news_snippets)
|
||||
self._set_page_content(content)
|
||||
|
||||
def _fetch_page(self, url):
|
||||
try:
|
||||
# Prepare the request parameters
|
||||
request_kwargs = self.request_kwargs.copy() if self.request_kwargs is not None else {}
|
||||
request_kwargs["stream"] = True
|
||||
|
||||
# Send a HTTP request to the URL
|
||||
response = requests.get(url, **request_kwargs)
|
||||
response.raise_for_status()
|
||||
|
||||
# If the HTTP request returns a status code 200, proceed
|
||||
if response.status_code == 200:
|
||||
content_type = response.headers.get("content-type", "")
|
||||
for ct in ["text/html", "text/plain", "application/pdf"]:
|
||||
if ct in content_type.lower():
|
||||
content_type = ct
|
||||
break
|
||||
|
||||
if content_type == "text/html":
|
||||
# Get the content of the response
|
||||
html = ""
|
||||
for chunk in response.iter_content(chunk_size=512, decode_unicode=True):
|
||||
html += chunk
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
# Remove javascript and style blocks
|
||||
for script in soup(["script", "style"]):
|
||||
script.extract()
|
||||
|
||||
# Convert to markdown -- Wikipedia gets special attention to get a clean version of the page
|
||||
if url.startswith("https://en.wikipedia.org/"):
|
||||
body_elm = soup.find("div", {"id": "mw-content-text"})
|
||||
title_elm = soup.find("span", {"class": "mw-page-title-main"})
|
||||
|
||||
if body_elm:
|
||||
# What's the title
|
||||
main_title = soup.title.string
|
||||
if title_elm and len(title_elm) > 0:
|
||||
main_title = title_elm.string
|
||||
webpage_text = (
|
||||
"# " + main_title + "\n\n" + markdownify.MarkdownConverter().convert_soup(body_elm)
|
||||
)
|
||||
else:
|
||||
webpage_text = markdownify.MarkdownConverter().convert_soup(soup)
|
||||
else:
|
||||
webpage_text = markdownify.MarkdownConverter().convert_soup(soup)
|
||||
|
||||
# Convert newlines
|
||||
webpage_text = re.sub(r"\r\n", "\n", webpage_text)
|
||||
|
||||
# Remove excessive blank lines
|
||||
self.page_title = soup.title.string
|
||||
self._set_page_content(re.sub(r"\n{2,}", "\n\n", webpage_text).strip())
|
||||
elif content_type == "text/plain":
|
||||
# Get the content of the response
|
||||
plain_text = ""
|
||||
for chunk in response.iter_content(chunk_size=512, decode_unicode=True):
|
||||
plain_text += chunk
|
||||
|
||||
self.page_title = None
|
||||
self._set_page_content(plain_text)
|
||||
elif IS_PDF_CAPABLE and content_type == "application/pdf":
|
||||
pdf_data = io.BytesIO(response.raw.read())
|
||||
self.page_title = None
|
||||
self._set_page_content(pdfminer.high_level.extract_text(pdf_data))
|
||||
elif self.downloads_folder is not None:
|
||||
# Try producing a safe filename
|
||||
fname = None
|
||||
try:
|
||||
fname = pathvalidate.sanitize_filename(os.path.basename(urlparse(url).path)).strip()
|
||||
except NameError:
|
||||
pass
|
||||
|
||||
# No suitable name, so make one
|
||||
if fname is None:
|
||||
extension = mimetypes.guess_extension(content_type)
|
||||
if extension is None:
|
||||
extension = ".download"
|
||||
fname = str(uuid.uuid4()) + extension
|
||||
|
||||
# Open a file for writing
|
||||
download_path = os.path.abspath(os.path.join(self.downloads_folder, fname))
|
||||
with open(download_path, "wb") as fh:
|
||||
for chunk in response.iter_content(chunk_size=512):
|
||||
fh.write(chunk)
|
||||
|
||||
# Return a page describing what just happened
|
||||
self.page_title = "Download complete."
|
||||
self._set_page_content(f"Downloaded '{url}' to '{download_path}'.")
|
||||
else:
|
||||
self.page_title = f"Error - Unsupported Content-Type '{content_type}'"
|
||||
self._set_page_content(self.page_title)
|
||||
else:
|
||||
self.page_title = "Error"
|
||||
self._set_page_content("Failed to retrieve " + url)
|
||||
except requests.exceptions.RequestException as e:
|
||||
self.page_title = "Error"
|
||||
self._set_page_content(str(e))
|
|
@ -0,0 +1,627 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# WebSurferAgent\n",
|
||||
"\n",
|
||||
"AutoGen provides a proof-of-concept WebSurferAgent that can command a simple text-based browser (similar to [Lynx](https://en.wikipedia.org/wiki/Lynx_(web_browser))) to search the web, visit pages, navigate within pages, download files, etc. The browsing is stateful, meaning that browsing history, viewport state, and other details are maintained throughout the conversation. \n",
|
||||
"\n",
|
||||
"This work was largely inspired by OpenAI's [WebGPT](https://openai.com/research/webgpt) project from December 2021. \n",
|
||||
"\n",
|
||||
"## Requirements\n",
|
||||
"\n",
|
||||
"AutoGen requires `Python>=3.8`. To run this notebook example, please install AutoGen with the optional `websurfer` dependencies:\n",
|
||||
"```bash\n",
|
||||
"pip install \"pyautogen[websurfer]\"\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# %pip install --quiet \"pyautogen[websurfer]\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set your API Endpoint\n",
|
||||
"\n",
|
||||
"The [`config_list_from_json`](https://microsoft.github.io/autogen/docs/reference/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file.\n",
|
||||
"\n",
|
||||
"It first looks for environment variable \"OAI_CONFIG_LIST\" which needs to be a valid json string. If that variable is not found, it then looks for a json file named \"OAI_CONFIG_LIST\". It filters the configs by models (you can filter by other keys as well).\n",
|
||||
"\n",
|
||||
"The WebSurferAgent uses a combination of models. GPT-4 and GPT-3.5-turbo-16 are recommended.\n",
|
||||
"\n",
|
||||
"Your json config should look something like the following:\n",
|
||||
"```json\n",
|
||||
"[\n",
|
||||
" {\n",
|
||||
" \"model\": \"gpt-4\",\n",
|
||||
" \"api_key\": \"<your OpenAI API key here>\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"model\": \"gpt-3.5-turbo-16k\",\n",
|
||||
" \"api_key\": \"<your OpenAI API key here>\"\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"If you open this notebook in colab, you can upload your files by clicking the file icon on the left panel and then choose \"upload file\" icon.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import autogen # noqa: E402\n",
|
||||
"\n",
|
||||
"llm_config = {\n",
|
||||
" \"timeout\": 600,\n",
|
||||
" \"cache_seed\": 44, # change the seed for different trials\n",
|
||||
" \"config_list\": autogen.config_list_from_json(\n",
|
||||
" \"OAI_CONFIG_LIST\",\n",
|
||||
" filter_dict={\"model\": [\"gpt-4\", \"gpt-4-0613\", \"gpt-4-32k\", \"gpt-4-32k-0613\", \"gpt-4-1106-preview\"]},\n",
|
||||
" ),\n",
|
||||
" \"temperature\": 0,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"summarizer_llm_config = {\n",
|
||||
" \"timeout\": 600,\n",
|
||||
" \"cache_seed\": 44, # change the seed for different trials\n",
|
||||
" \"config_list\": autogen.config_list_from_json(\n",
|
||||
" \"OAI_CONFIG_LIST\",\n",
|
||||
" filter_dict={\"model\": [\"gpt-3.5-turbo-1106\", \"gpt-3.5-turbo-16k-0613\", \"gpt-3.5-turbo-16k\"]},\n",
|
||||
" ),\n",
|
||||
" \"temperature\": 0,\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure Bing\n",
|
||||
"\n",
|
||||
"For WebSurferAgent to be reasonably useful, it needs to be able to search the web -- and that means it needs a Bing API key. \n",
|
||||
"You can read more about how to get an API on the [Bing Web Search API](https://www.microsoft.com/en-us/bing/apis/bing-web-search-api) page.\n",
|
||||
"\n",
|
||||
"Once you have your key, either set it as the `BING_API_KEY` system environment variable, or simply input your key below.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os # noqa: E402\n",
|
||||
"\n",
|
||||
"bing_api_key = os.environ[\"BING_API_KEY\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Construct Agents\n",
|
||||
"\n",
|
||||
"We now create out WebSurferAgent, and a UserProxyAgent to surf the web. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from autogen.agentchat.contrib.web_surfer import WebSurferAgent # noqa: E402\n",
|
||||
"\n",
|
||||
"web_surfer = WebSurferAgent(\n",
|
||||
" \"web_surfer\",\n",
|
||||
" llm_config=llm_config,\n",
|
||||
" summarizer_llm_config=summarizer_llm_config,\n",
|
||||
" browser_config={\"viewport_size\": 4096, \"bing_api_key\": bing_api_key},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"user_proxy = autogen.UserProxyAgent(\n",
|
||||
" \"user_proxy\",\n",
|
||||
" human_input_mode=\"NEVER\",\n",
|
||||
" code_execution_config=False,\n",
|
||||
" default_auto_reply=\"\",\n",
|
||||
" is_termination_msg=lambda x: True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example 1: Search, summarize\n",
|
||||
"- Search for information aobut Microsoft AutoGen\n",
|
||||
"- Summarize the results\n",
|
||||
"- Visit the Getting Started Docs page"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[33muser_proxy\u001b[0m (to web_surfer):\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Search the web for information about Microsoft AutoGen\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"\u001b[31m\n",
|
||||
">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
|
||||
"\u001b[35m\n",
|
||||
">>>>>>>> EXECUTING FUNCTION informational_web_search...\u001b[0m\n",
|
||||
"\u001b[33mweb_surfer\u001b[0m (to user_proxy):\n",
|
||||
"\n",
|
||||
"Address: bing: Microsoft AutoGen\n",
|
||||
"Title: Microsoft AutoGen - Search\n",
|
||||
"Viewport position: Showing page 1 of 1.\n",
|
||||
"=======================\n",
|
||||
"A Bing search for 'Microsoft AutoGen' found 10 results:\n",
|
||||
"\n",
|
||||
"## Web Results\n",
|
||||
"1. [AutoGen: Enabling next-generation large language model applications](https://www.microsoft.com/en-us/research/blog/autogen-enabling-next-generation-large-language-model-applications/)\n",
|
||||
"AutoGen is a Python package that simplifies the orchestration, optimization, and automation of large language model applications. It enables customizable and conversable agents that integrate with humans, tools, and other agents to solve tasks using GPT-4 and other advanced LLMs. Learn how to use AutoGen for code-based question answering, supply-chain optimization, conversational chess, and more.\n",
|
||||
"\n",
|
||||
"2. [GitHub - microsoft/autogen: Enable Next-Gen Large Language Model ...](https://github.com/microsoft/autogen)\n",
|
||||
"AutoGen is a Python library that enables the development of large language model applications using multiple agents that can converse with each other to solve tasks. It supports various conversation patterns, enhanced LLM inference, and customizable and conversable agents based on OpenAI models.\n",
|
||||
"\n",
|
||||
"3. [Getting Started | AutoGen](https://microsoft.github.io/autogen/docs/Getting-Started/)\n",
|
||||
"AutoGen is a framework that enables development of LLM applications using multiple agents that can converse with each other to solve tasks. AutoGen agents are customizable, conversable, and seamlessly allow human participation. They can operate in various modes that employ combinations of LLMs, human inputs, and tools. Main Features\n",
|
||||
"\n",
|
||||
"4. [AutoGen | AutoGen - microsoft.github.io](https://microsoft.github.io/autogen/)\n",
|
||||
"AutoGen is a tool that enables next-gen large language model applications by providing a high-level abstraction for building diverse and enhanced LLM workflows. It offers a collection of working systems for various domains and complexities, as well as enhanced LLM inference and optimization APIs.\n",
|
||||
"\n",
|
||||
"5. [AutoGen - Microsoft Research](https://www.microsoft.com/en-us/research/project/autogen/)\n",
|
||||
"AutoGen is an open-source library for building next-generation LLM applications with multiple agents, teachability and personalization. It supports agents that can be backed by various LLM configurations, code generation and execution, and human proxy agent integration.\n",
|
||||
"\n",
|
||||
"6. [Installation | AutoGen](https://microsoft.github.io/autogen/docs/Installation/)\n",
|
||||
"Installation Setup Virtual Environment When not using a docker container, we recommend using a virtual environment to install AutoGen. This will ensure that the dependencies for AutoGen are isolated from the rest of your system. Option 1: venv You can create a virtual environment with venv as below: python3 -m venv pyautogen\n",
|
||||
"\n",
|
||||
"7. [AutoGen: Downloads - Microsoft Research](https://www.microsoft.com/en-us/research/project/autogen/downloads/)\n",
|
||||
"AutoGen allows developers to build LLM applications via multiple agents that can converse with each other to accomplish tasks.\n",
|
||||
"\n",
|
||||
"8. [Multi-agent Conversation Framework | AutoGen - microsoft.github.io](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat/)\n",
|
||||
"AutoGen offers a unified multi-agent conversation framework as a high-level abstraction of using foundation models. It features capable, customizable and conversable agents which integrate LLMs, tools, and humans via automated agent chat.\n",
|
||||
"\n",
|
||||
"9. [[2308.08155] AutoGen: Enabling Next-Gen LLM Applications via Multi ...](https://arxiv.org/abs/2308.08155)\n",
|
||||
"AutoGen is an open-source framework that allows developers to create and customize agents that can converse with each other to perform tasks using various types of language models (LLMs). The framework supports natural language and code-based conversation patterns, and is effective for diverse applications such as mathematics, coding, question answering, and more.\n",
|
||||
"\n",
|
||||
"10. [How to setup and use the new Microsoft AutoGen AI agent](https://www.geeky-gadgets.com/microsoft-autogen/)\n",
|
||||
"Learn how to use AutoGen, a tool that simplifies the automation and optimization of complex language model applications using multiple agents that can converse with each other. AutoGen supports diverse conversation patterns, human participation, and the tuning of expensive LLMs like ChatGPT and GPT-4.\n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"task1 = \"\"\"\n",
|
||||
"Search the web for information about Microsoft AutoGen\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"user_proxy.initiate_chat(web_surfer, message=task1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[33muser_proxy\u001b[0m (to web_surfer):\n",
|
||||
"\n",
|
||||
"Summarize these results\n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"\u001b[31m\n",
|
||||
">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
|
||||
"\u001b[35m\n",
|
||||
">>>>>>>> EXECUTING FUNCTION summarize_page...\u001b[0m\n",
|
||||
"\u001b[33mweb_surfer\u001b[0m (to user_proxy):\n",
|
||||
"\n",
|
||||
"AutoGen is a Python package and framework developed by Microsoft that simplifies the orchestration, optimization, and automation of large language model (LLM) applications. It enables the development of customizable and conversable agents that can solve tasks using advanced LLMs like GPT-4. AutoGen supports various conversation patterns, enhanced LLM inference, and seamless integration with humans, tools, and other agents. It offers a high-level abstraction for building diverse and enhanced LLM workflows and provides a collection of working systems for different domains and complexities. AutoGen is open-source and supports natural language and code-based conversation patterns for applications such as question answering, coding, mathematics, and more.\n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"task2 = \"Summarize these results\"\n",
|
||||
"user_proxy.initiate_chat(web_surfer, message=task2, clear_history=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[33muser_proxy\u001b[0m (to web_surfer):\n",
|
||||
"\n",
|
||||
"Click the 'Getting Started' result\n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"\u001b[31m\n",
|
||||
">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
|
||||
"\u001b[35m\n",
|
||||
">>>>>>>> EXECUTING FUNCTION navigational_web_search...\u001b[0m\n",
|
||||
"\u001b[33mweb_surfer\u001b[0m (to user_proxy):\n",
|
||||
"\n",
|
||||
"Address: https://microsoft.github.io/autogen/docs/Getting-Started/\n",
|
||||
"Title: Getting Started | AutoGen\n",
|
||||
"Viewport position: Showing page 1 of 2.\n",
|
||||
"=======================\n",
|
||||
"Getting Started | AutoGen\n",
|
||||
"\n",
|
||||
"[Skip to main content](#)[**AutoGen**](/autogen/)[Docs](/autogen/docs/Getting-Started)[SDK](/autogen/docs/reference/agentchat/conversable_agent)[Blog](/autogen/blog)[FAQ](/autogen/docs/FAQ)[Examples](/autogen/docs/Examples)Resources* [Ecosystem](/autogen/docs/Ecosystem)\n",
|
||||
"* [Gallery](/autogen/docs/Gallery)\n",
|
||||
"[GitHub](https://github.com/microsoft/autogen)🌜🌞`ctrl``K`* [Getting Started](/autogen/docs/Getting-Started)\n",
|
||||
"* [Installation](/autogen/docs/Installation)\n",
|
||||
"* [Use Cases](#)\n",
|
||||
"* [Contributing](/autogen/docs/Contribute)\n",
|
||||
"* [Research](/autogen/docs/Research)\n",
|
||||
"On this pageGetting Started\n",
|
||||
"===============\n",
|
||||
"\n",
|
||||
"AutoGen is a framework that enables development of LLM applications using multiple agents that can converse with each other to solve tasks. AutoGen agents are customizable, conversable, and seamlessly allow human participation. They can operate in various modes that employ combinations of LLMs, human inputs, and tools.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Main Features[](#main-features \"Direct link to heading\")\n",
|
||||
"\n",
|
||||
"* AutoGen enables building next-gen LLM applications based on [multi-agent conversations](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat) with minimal effort. It simplifies the orchestration, automation, and optimization of a complex LLM workflow. It maximizes the performance of LLM models and overcomes their weaknesses.\n",
|
||||
"* It supports [diverse conversation patterns](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat#supporting-diverse-conversation-patterns) for complex workflows. With customizable and conversable agents, developers can use AutoGen to build a wide range of conversation patterns concerning conversation autonomy,\n",
|
||||
"the number of agents, and agent conversation topology.\n",
|
||||
"* It provides a collection of working systems with different complexities. These systems span a [wide range of applications](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat#diverse-applications-implemented-with-autogen) from various domains and complexities. This demonstrates how AutoGen can easily support diverse conversation patterns.\n",
|
||||
"* AutoGen provides [enhanced LLM inference](https://microsoft.github.io/autogen/docs/Use-Cases/enhanced_inference#api-unification). It offers utilities like API unification and caching, and advanced usage patterns, such as error handling, multi-config inference, context programming, etc.\n",
|
||||
"\n",
|
||||
"AutoGen is powered by collaborative [research studies](/autogen/docs/Research) from Microsoft, Penn State University, and University of Washington.\n",
|
||||
"\n",
|
||||
"### Quickstart[](#quickstart \"Direct link to heading\")\n",
|
||||
"\n",
|
||||
"Install from pip: `pip install pyautogen`. Find more options in [Installation](/autogen/docs/Installation).\n",
|
||||
"For [code execution](/autogen/docs/FAQ#code-execution), we strongly recommend installing the python docker package, and using docker.\n",
|
||||
"\n",
|
||||
"#### Multi-Agent Conversation Framework[](#multi-agent-conversation-framework \"Direct link to heading\")\n",
|
||||
"\n",
|
||||
"Autogen enables the next-gen LLM applications with a generic multi-agent conversation framework. It offers customizable and conversable agents which integrate LLMs, tools, and humans.\n",
|
||||
"By automating chat among multiple capable agents, one can easily make them collectively perform tasks autonomously or with human feedback, including tasks that require using tools via code. For [example](https://github.com/microsoft/autogen/blob/main/test/twoagent.py),\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"from autogen import AssistantAgent, UserProxyAgent, config\\_list\\_from\\_json \n",
|
||||
" \n",
|
||||
"# Load LLM inference endpoints from an env variable or a file \n",
|
||||
"# See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints \n",
|
||||
"# and OAI\\_CONFIG\\_LIST\\_sample.json \n",
|
||||
"config\\_list = config\\_list\\_from\\_json(env\\_or\\_file=\"OAI\\_CONFIG\\_LIST\") \n",
|
||||
"assistant = AssistantAgent(\"assistant\", llm\\_config={\"config\\_list\": config\\_list}) \n",
|
||||
"user\\_proxy = UserProxyAgent(\"user\\_proxy\", code\\_execution\\_config={\"work\\_dir\": \"coding\"}) \n",
|
||||
"user\\_proxy.initiate\\_chat(assistant, \n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"task3 = \"Click the 'Getting Started' result\"\n",
|
||||
"user_proxy.initiate_chat(web_surfer, message=task3, clear_history=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example 2: Navigational search, scroll, answer questions\n",
|
||||
"- Search for Microsoft's wikipedia page, then naviagate to it\n",
|
||||
"- Scroll down\n",
|
||||
"- Answer questions about the content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[33muser_proxy\u001b[0m (to web_surfer):\n",
|
||||
"\n",
|
||||
"Find Microsoft's Wikipedia page.\n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"\u001b[31m\n",
|
||||
">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
|
||||
"\u001b[35m\n",
|
||||
">>>>>>>> EXECUTING FUNCTION navigational_web_search...\u001b[0m\n",
|
||||
"\u001b[33mweb_surfer\u001b[0m (to user_proxy):\n",
|
||||
"\n",
|
||||
"Address: https://en.wikipedia.org/wiki/Microsoft\n",
|
||||
"Title: Microsoft - Wikipedia\n",
|
||||
"Viewport position: Showing page 1 of 64.\n",
|
||||
"=======================\n",
|
||||
"# Microsoft\n",
|
||||
"\n",
|
||||
"American multinational technology corporation\n",
|
||||
"\n",
|
||||
"Microsoft Corporation| [A square divided into four sub-squares, colored red-orange, green, yellow and blue (clockwise), with the company name appearing to its right](/wiki/File:Microsoft_logo_(2012).svg) |\n",
|
||||
"| Building 92 on the [Microsoft Redmond campus](/wiki/Microsoft_Redmond_campus \"Microsoft Redmond campus\") |\n",
|
||||
"| Type | [Public](/wiki/Public_company \"Public company\") |\n",
|
||||
"| [Traded as](/wiki/Ticker_symbol \"Ticker symbol\") | * [Nasdaq](/wiki/Nasdaq \"Nasdaq\"): [MSFT](https://www.nasdaq.com/market-activity/stocks/msft)\n",
|
||||
"* [Nasdaq-100](/wiki/Nasdaq-100 \"Nasdaq-100\") component\n",
|
||||
"* [DJIA](/wiki/Dow_Jones_Industrial_Average \"Dow Jones Industrial Average\") component\n",
|
||||
"* [S&P 100](/wiki/S%26P_100 \"S&P 100\") component\n",
|
||||
"* [S&P 500](/wiki/S%26P_500 \"S&P 500\") component\n",
|
||||
" |\n",
|
||||
"| [ISIN](/wiki/International_Securities_Identification_Number \"International Securities Identification Number\") | [US5949181045](https://isin.toolforge.org/?language=en&isin=US5949181045) |\n",
|
||||
"| Industry | [Information technology](/wiki/Information_technology \"Information technology\") |\n",
|
||||
"| Founded | April 4, 1975; 48 years ago (1975-04-04) in [Albuquerque, New Mexico](/wiki/Albuquerque,_New_Mexico \"Albuquerque, New Mexico\"), U.S. |\n",
|
||||
"| Founders | * [Bill Gates](/wiki/Bill_Gates \"Bill Gates\")\n",
|
||||
"* [Paul Allen](/wiki/Paul_Allen \"Paul Allen\")\n",
|
||||
" |\n",
|
||||
"| Headquarters | [One Microsoft Way](/wiki/Microsoft_campus \"Microsoft campus\")[Redmond, Washington](/wiki/Redmond,_Washington \"Redmond, Washington\"), U.S. |\n",
|
||||
"| Area served | Worldwide |\n",
|
||||
"| Key people | * [Satya Nadella](/wiki/Satya_Nadella \"Satya Nadella\")([Chairman](/wiki/Chairman \"Chairman\") & [CEO](/wiki/Chief_executive_officer \"Chief executive officer\"))\n",
|
||||
"* [Brad Smith](/wiki/Brad_Smith_(American_lawyer) \"Brad Smith (American lawyer)\")([Vice Chairman](/wiki/Vice-Chairman \"Vice-Chairman\") & [President](/wiki/President_(corporate_title) \"President (corporate title)\"))\n",
|
||||
"* [Bill Gates](/wiki/Bill_Gates \"Bill Gates\")([technical adviser](/wiki/Adviser \"Adviser\"))\n",
|
||||
" |\n",
|
||||
"| Products | * [Software development](/wiki/Software_development \"Software development\")\n",
|
||||
"* [Computer hardware](/wiki/Computer_hardware \"Computer hardware\")\n",
|
||||
"* [Consumer electronics](/wiki/Consumer_electronics \"Consumer electronics\")\n",
|
||||
"* [Social networking service](/wiki/Social_networking_service \"Social networking service\")\n",
|
||||
"* [Cloud computing](/wiki/Cloud_computing \"Cloud computing\")\n",
|
||||
"* [Video games](/wiki/Video_game_industry \"Video game industry\")\n",
|
||||
"* [Internet](/wiki/Internet \"Internet\")\n",
|
||||
"* [Corporate venture capital](/wiki/Corporate_venture_capital \"Corporate venture capital\")\n",
|
||||
" |\n",
|
||||
"| Brands | \n",
|
||||
"* [Windows](/wiki/Microsoft_Windows \"Microsoft Windows\")\n",
|
||||
"* [Microsoft 365](/wiki/Microsoft_365 \"Microsoft 365\")\n",
|
||||
"* [Skype](/wiki/Skype \"Skype\")\n",
|
||||
"* [Visual Studio](/wiki/Visual_Studio \"Visual Studio\")\n",
|
||||
"* [Xbox](/wiki/Xbox \"Xbox\")\n",
|
||||
"* [Dynamics](/wiki/Microsoft_Dynamics_365 \"Microsoft Dynamics 365\")\n",
|
||||
"* [Surface](/wiki/Microsoft_Surface \"Microsoft Surface\")\n",
|
||||
"\n",
|
||||
" |\n",
|
||||
"| Services | \n",
|
||||
"* [Edge](/wiki/Microsoft_Edge \"Microsoft Edge\")\n",
|
||||
"* [Azure](/wiki/Microsoft_Azure \"Microsoft Azure\")\n",
|
||||
"* [Bing](/wiki/Microsoft_Bing \"Microsoft Bing\")\n",
|
||||
"* [LinkedIn](/wiki/LinkedIn \"LinkedIn\")\n",
|
||||
"* [Yammer](/wiki/Yammer \"Yammer\")\n",
|
||||
"* [Microsoft 365](/wiki/Microsoft_365 \"Microsoft 365\")\n",
|
||||
"* [OneDrive](/wiki/OneDrive \"OneDrive\")\n",
|
||||
"* [Outlook](/wiki/Microsoft_Outlook \"Microsoft Outlook\")\n",
|
||||
"* [GitHub](/wiki/GitHub \"GitHub\")\n",
|
||||
"* [Microsoft Store](/wiki/Microsoft_Store_(digital) \"Microsoft Store (digital)\")\n",
|
||||
"* [Windows Update](/wiki/Windows_Update \"Windows Update\")\n",
|
||||
"* [Xbox Game Pass](/wiki/Xbox_Game_Pass \"Xbox Game Pass\")\n",
|
||||
"* [Xbox network](/wiki/Xbox_network \"Xbox network\")\n",
|
||||
"\n",
|
||||
" |\n",
|
||||
"| Revenue | Increase [US$](/wiki/United_States_dollar \"United States dollar\")211.9 billion (2023) |\n",
|
||||
"| [Operating income](/wiki/Earnings_before_interest_and_taxes \"Earnings before interest and taxes\") | Increase US$88.5 billion (2023) |\n",
|
||||
"| [Net income](/wiki/Net_income \"Net income\") | Increase US$73.4 billion (2023) |\n",
|
||||
"| [Total assets](/wiki/Asset \"Asset\") | Increase US$411.9 billion (2023) |\n",
|
||||
"| [Total equity](/wiki/Equity_(finance) \"Equity \n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"task4 = \"\"\"Find Microsoft's Wikipedia page.\"\"\"\n",
|
||||
"user_proxy.initiate_chat(web_surfer, message=task4, clear_history=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[33muser_proxy\u001b[0m (to web_surfer):\n",
|
||||
"\n",
|
||||
"Scroll down.\n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"\u001b[31m\n",
|
||||
">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
|
||||
"\u001b[35m\n",
|
||||
">>>>>>>> EXECUTING FUNCTION page_down...\u001b[0m\n",
|
||||
"\u001b[33mweb_surfer\u001b[0m (to user_proxy):\n",
|
||||
"\n",
|
||||
"Address: https://en.wikipedia.org/wiki/Microsoft\n",
|
||||
"Title: Microsoft - Wikipedia\n",
|
||||
"Viewport position: Showing page 2 of 64.\n",
|
||||
"=======================\n",
|
||||
"(finance)\") | Increase US$206.2 billion (2023) |\n",
|
||||
"| Number of employees | 238,000 (2023) |\n",
|
||||
"| [Divisions](/wiki/Division_(business) \"Division (business)\") | \n",
|
||||
"* [Microsoft Engineering Groups](/wiki/Microsoft_engineering_groups \"Microsoft engineering groups\")\n",
|
||||
"* [Microsoft Digital Crimes Unit](/wiki/Microsoft_Digital_Crimes_Unit \"Microsoft Digital Crimes Unit\")\n",
|
||||
"* [Microsoft Press](/wiki/Microsoft_Press \"Microsoft Press\")\n",
|
||||
"* [Microsoft Japan](/wiki/Microsoft_Japan \"Microsoft Japan\")\n",
|
||||
"* [Microsoft Gaming](/wiki/Microsoft_Gaming \"Microsoft Gaming\")\n",
|
||||
"\n",
|
||||
" |\n",
|
||||
"| [Subsidiaries](/wiki/Subsidiary \"Subsidiary\") | \n",
|
||||
"* [GitHub](/wiki/GitHub \"GitHub\")\n",
|
||||
"* [LinkedIn](/wiki/LinkedIn \"LinkedIn\")\n",
|
||||
"* [Metaswitch](/wiki/Metaswitch \"Metaswitch\")\n",
|
||||
"* [Nuance Communications](/wiki/Nuance_Communications \"Nuance Communications\")\n",
|
||||
"* [RiskIQ](/wiki/RiskIQ \"RiskIQ\")\n",
|
||||
"* [Skype Technologies](/wiki/Skype_Technologies \"Skype Technologies\")\n",
|
||||
"* [OpenAI](/wiki/OpenAI \"OpenAI\") (49%)[[1]](#cite_note-1)\n",
|
||||
"* [Xamarin](/wiki/Xamarin \"Xamarin\")\n",
|
||||
"* [Xandr](/wiki/Xandr \"Xandr\")\n",
|
||||
"\n",
|
||||
" |\n",
|
||||
"| |\n",
|
||||
"| [ASN](/wiki/Autonomous_System_Number \"Autonomous System Number\") | * [8075](https://bgp.tools/as/8075)\n",
|
||||
" |\n",
|
||||
"| |\n",
|
||||
"| Website | [microsoft.com](https://www.microsoft.com/) |\n",
|
||||
"| **Footnotes / references**Financials as of June 30, 2023[[update]](https://en.wikipedia.org/w/index.php?title=Microsoft&action=edit)[[2]](#cite_note-2) |\n",
|
||||
"\n",
|
||||
"| | | |\n",
|
||||
"| --- | --- | --- |\n",
|
||||
"| \n",
|
||||
"\n",
|
||||
"| | |\n",
|
||||
"| --- | --- |\n",
|
||||
"| [Bill Gates in 2023](/wiki/File:Bill_Gates_2017_(cropped).jpg) | This article is part of a series about\n",
|
||||
"[Bill Gates](/wiki/Bill_Gates \"Bill Gates\") |\n",
|
||||
"\n",
|
||||
" |\n",
|
||||
"| * [Awards and honors](/wiki/Bill_Gates#Recognition \"Bill Gates\")\n",
|
||||
"* [Philanthropy](/wiki/Bill_Gates#Philanthropy \"Bill Gates\")\n",
|
||||
"* [Political positions](/wiki/Bill_Gates#Political_positions \"Bill Gates\")\n",
|
||||
"* [Public image](/wiki/Bill_Gates#Public_image \"Bill Gates\")\n",
|
||||
"* [Residence](/wiki/Bill_Gates%27s_house \"Bill Gates's house\")\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"Companies* [Traf-O-Data](/wiki/Traf-O-Data \"Traf-O-Data\")\n",
|
||||
"* Microsoft ([criticism](/wiki/Criticism_of_Microsoft \"Criticism of Microsoft\"))\n",
|
||||
"* [BEN](/wiki/Branded_Entertainment_Network \"Branded Entertainment Network\")\n",
|
||||
"* [Cascade Investment](/wiki/Cascade_Investment \"Cascade Investment\")\n",
|
||||
"* [TerraPower](/wiki/TerraPower \"TerraPower\")\n",
|
||||
"* [Gates Ventures](/wiki/Gates_Ventures \"Gates Ventures\")\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"Charitable organizations* [Bill & Melinda Gates Foundation](/wiki/Bill_%26_Melinda_Gates_Foundation \"Bill & Melinda Gates Foundation\")\n",
|
||||
"* [Match for Africa](/wiki/Match_for_Africa \"Match for Africa\")\n",
|
||||
"* [The Giving Pledge](/wiki/The_Giving_Pledge \"The Giving Pledge\")\n",
|
||||
"* [OER Project](/wiki/OER_Project \"OER Project\")\n",
|
||||
"* [Breakthrough Energy](/wiki/Breakthrough_Energy \"Breakthrough Energy\")\n",
|
||||
"* [Mission Innovation](/wiki/Mission_Innovation \"Mission Innovation\")\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"Writings* \"[An Open Letter to Hobbyists](/wiki/An_Open_Letter_to_Hobbyists \"An Open Letter to Hobbyists\")\"\n",
|
||||
"* *[The Road Ahead](/wiki/The_Road_Ahead_(Gates_book) \"The Road Ahead (Gates book)\")*\n",
|
||||
"* *[Business @ the Speed of Thought](/wiki/Business_@_the_Speed_of_Thought \"Business @ the Speed of Thought\")*\n",
|
||||
"* *[How to Avoid a Climate Disaster](/wiki/How_to_Avoid_a_Climate_Disaster \"How to Avoid a Climate Disaster\")*\n",
|
||||
"* *[How to Prevent the Next Pandemic](/wiki/How_to_Prevent_the_Next_Pandemic \"How to Prevent the Next Pandemic\")*\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"Related* [Bill Gates' flower fly](/wiki/Bill_Gates%27_flower_fly \"Bill Gates' flower fly\")\n",
|
||||
"* [Codex Leicester](/wiki/Codex_Leicester \"Codex Leicester\")\n",
|
||||
"* *[Lost on the Grand Banks](/wiki/Lost_on_the_Grand_Banks \"Lost on the Grand Banks\")*\n",
|
||||
"* [History of Microsoft](/wiki/History_of_Microsoft \"History of Microsoft\")\n",
|
||||
"* [Timeline of Microsoft](/wiki/Timeline_of_Microsoft \"Timeline of Microsoft\")\n",
|
||||
"* [Paul Allen](/wiki/Paul_Allen \"Paul Allen\")\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
" |\n",
|
||||
"| * [v](/wiki/Template:Bill_Gates_series \"Template:Bill Gates series\")\n",
|
||||
"* [t](/wiki/Template_talk:Bill_Gates_series \"Template talk:Bill Gates series\")\n",
|
||||
"* [e](/wiki/Special:EditPage/Template:Bill_Gates_series \"Special:EditPage/Template:Bill Gates series\")\n",
|
||||
" |\n",
|
||||
"\n",
|
||||
"**Microsoft Corporation** is an American multinational [technology corporation](/wiki/Technology_company \n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"task5 = \"\"\"Scroll down.\"\"\"\n",
|
||||
"user_proxy.initiate_chat(web_surfer, message=task5, clear_history=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[33muser_proxy\u001b[0m (to web_surfer):\n",
|
||||
"\n",
|
||||
"Where was the first office location, and when did they move to Redmond?\n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"\u001b[31m\n",
|
||||
">>>>>>>> USING AUTO REPLY...\u001b[0m\n",
|
||||
"\u001b[35m\n",
|
||||
">>>>>>>> EXECUTING FUNCTION answer_from_page...\u001b[0m\n",
|
||||
"\u001b[33mweb_surfer\u001b[0m (to user_proxy):\n",
|
||||
"\n",
|
||||
"Microsoft's first office location was in Albuquerque, New Mexico, where it was founded on April 4, 1975. However, Microsoft later moved its headquarters to Redmond, Washington in January 1979. Since then, Redmond has been the main office location for Microsoft.\n",
|
||||
"\n",
|
||||
"--------------------------------------------------------------------------------\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"task6 = \"\"\"Where was the first office location, and when did they move to Redmond?\"\"\"\n",
|
||||
"user_proxy.initiate_chat(web_surfer, message=task6, clear_history=False)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
1
setup.py
1
setup.py
|
@ -52,6 +52,7 @@ setuptools.setup(
|
|||
"teachable": ["chromadb"],
|
||||
"lmm": ["replicate", "pillow"],
|
||||
"graphs": ["networkx~=3.2.1", "matplotlib~=3.8.1"],
|
||||
"websurfer": ["beautifulsoup4", "markdownify", "pdfminer.six", "pathvalidate"],
|
||||
"redis": ["redis"],
|
||||
},
|
||||
classifiers=[
|
||||
|
|
|
@ -0,0 +1,172 @@
|
|||
import os
|
||||
import sys
|
||||
import re
|
||||
import pytest
|
||||
from autogen import ConversableAgent, UserProxyAgent, config_list_from_json
|
||||
from autogen.oai.openai_utils import filter_config
|
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
|
||||
from conftest import skip_openai # noqa: E402
|
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
||||
from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
|
||||
|
||||
BLOG_POST_URL = "https://microsoft.github.io/autogen/blog/2023/04/21/LLM-tuning-math"
|
||||
BLOG_POST_TITLE = "Does Model and Inference Parameter Matter in LLM Applications? - A Case Study for MATH | AutoGen"
|
||||
BING_QUERY = "Microsoft"
|
||||
|
||||
try:
|
||||
from autogen.agentchat.contrib.web_surfer import WebSurferAgent
|
||||
except ImportError:
|
||||
skip_all = True
|
||||
else:
|
||||
skip_all = False
|
||||
|
||||
try:
|
||||
from openai import OpenAI
|
||||
except ImportError:
|
||||
skip_oai = True
|
||||
else:
|
||||
skip_oai = False or skip_openai
|
||||
|
||||
try:
|
||||
BING_API_KEY = os.environ["BING_API_KEY"]
|
||||
except KeyError:
|
||||
skip_bing = True
|
||||
else:
|
||||
skip_bing = False
|
||||
|
||||
if not skip_oai:
|
||||
config_list = config_list_from_json(env_or_file=OAI_CONFIG_LIST, file_location=KEY_LOC)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
skip_all,
|
||||
reason="do not run if dependency is not installed",
|
||||
)
|
||||
def test_web_surfer():
|
||||
page_size = 4096
|
||||
web_surfer = WebSurferAgent("web_surfer", llm_config=False, browser_config={"viewport_size": page_size})
|
||||
|
||||
# Sneak a peak at the function map, allowing us to call the functions for testing here
|
||||
function_map = web_surfer._user_proxy._function_map
|
||||
|
||||
# Test some basic navigations
|
||||
response = function_map["visit_page"](BLOG_POST_URL)
|
||||
assert f"Address: {BLOG_POST_URL}".strip() in response
|
||||
assert f"Title: {BLOG_POST_TITLE}".strip() in response
|
||||
|
||||
# Test scrolling
|
||||
m = re.search(r"\bViewport position: Showing page 1 of (\d+).", response)
|
||||
total_pages = int(m.group(1))
|
||||
|
||||
response = function_map["page_down"]()
|
||||
assert (
|
||||
f"Viewport position: Showing page 2 of {total_pages}." in response
|
||||
) # Assumes the content is longer than one screen
|
||||
|
||||
response = function_map["page_up"]()
|
||||
assert f"Viewport position: Showing page 1 of {total_pages}." in response
|
||||
|
||||
# Try to scroll too far back up
|
||||
response = function_map["page_up"]()
|
||||
assert f"Viewport position: Showing page 1 of {total_pages}." in response
|
||||
|
||||
# Try to scroll too far down
|
||||
for i in range(0, total_pages + 1):
|
||||
response = function_map["page_down"]()
|
||||
assert f"Viewport position: Showing page {total_pages} of {total_pages}." in response
|
||||
|
||||
# Test web search -- we don't have a key in this case, so we expect it to raise an error (but it means the code path is correct)
|
||||
with pytest.raises(ValueError, match="Missing Bing API key."):
|
||||
response = function_map["informational_web_search"](BING_QUERY)
|
||||
|
||||
with pytest.raises(ValueError, match="Missing Bing API key."):
|
||||
response = function_map["navigational_web_search"](BING_QUERY)
|
||||
|
||||
# Test Q&A and summarization -- we don't have a key so we expect it to fail (but it means the code path is correct)
|
||||
with pytest.raises(AttributeError, match="'NoneType' object has no attribute 'create'"):
|
||||
response = function_map["answer_from_page"]("When was it founded?")
|
||||
|
||||
with pytest.raises(AttributeError, match="'NoneType' object has no attribute 'create'"):
|
||||
response = function_map["summarize_page"]()
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
skip_oai,
|
||||
reason="do not run if oai is not installed",
|
||||
)
|
||||
def test_web_surfer_oai():
|
||||
llm_config = {"config_list": config_list, "timeout": 180, "cache_seed": None}
|
||||
|
||||
summarizer_llm_config = {
|
||||
"config_list": filter_config(
|
||||
config_list, {"model": ["gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-16k"]}
|
||||
),
|
||||
"timeout": 180,
|
||||
"cache_seed": None,
|
||||
}
|
||||
|
||||
assert len(llm_config["config_list"]) > 0
|
||||
assert len(summarizer_llm_config["config_list"]) > 0
|
||||
|
||||
page_size = 4096
|
||||
web_surfer = WebSurferAgent(
|
||||
"web_surfer",
|
||||
llm_config=llm_config,
|
||||
summarizer_llm_config=summarizer_llm_config,
|
||||
browser_config={"viewport_size": page_size},
|
||||
)
|
||||
|
||||
user_proxy = UserProxyAgent(
|
||||
"user_proxy",
|
||||
human_input_mode="NEVER",
|
||||
code_execution_config=False,
|
||||
default_auto_reply="",
|
||||
is_termination_msg=lambda x: True,
|
||||
)
|
||||
|
||||
# Make some requests that should test function calling
|
||||
user_proxy.initiate_chat(web_surfer, message="Please visit the page 'https://en.wikipedia.org/wiki/Microsoft'")
|
||||
|
||||
user_proxy.initiate_chat(web_surfer, message="Please scroll down.")
|
||||
|
||||
user_proxy.initiate_chat(web_surfer, message="Please scroll up.")
|
||||
|
||||
user_proxy.initiate_chat(web_surfer, message="When was it founded?")
|
||||
|
||||
user_proxy.initiate_chat(web_surfer, message="What's this page about?")
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
skip_bing,
|
||||
reason="do not run if bing api key is not available",
|
||||
)
|
||||
def test_web_surfer_bing():
|
||||
page_size = 4096
|
||||
web_surfer = WebSurferAgent(
|
||||
"web_surfer",
|
||||
llm_config=False,
|
||||
browser_config={"viewport_size": page_size, "bing_api_key": BING_API_KEY},
|
||||
)
|
||||
|
||||
# Sneak a peak at the function map, allowing us to call the functions for testing here
|
||||
function_map = web_surfer._user_proxy._function_map
|
||||
|
||||
# Test informational queries
|
||||
response = function_map["informational_web_search"](BING_QUERY)
|
||||
assert f"Address: bing: {BING_QUERY}" in response
|
||||
assert f"Title: {BING_QUERY} - Search" in response
|
||||
assert "Viewport position: Showing page 1 of 1." in response
|
||||
assert f"A Bing search for '{BING_QUERY}' found " in response
|
||||
|
||||
# Test informational queries
|
||||
response = function_map["navigational_web_search"](BING_QUERY + " Wikipedia")
|
||||
assert "Address: https://en.wikipedia.org/wiki/" in response
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""Runs this file's tests from the command line."""
|
||||
test_web_surfer()
|
||||
# test_web_surfer_oai()
|
||||
# test_web_surfer_bing()
|
|
@ -0,0 +1,173 @@
|
|||
import pytest
|
||||
import os
|
||||
import sys
|
||||
import requests
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from agentchat.test_assistant_agent import KEY_LOC # noqa: E402
|
||||
|
||||
BLOG_POST_URL = "https://microsoft.github.io/autogen/blog/2023/04/21/LLM-tuning-math"
|
||||
BLOG_POST_TITLE = "Does Model and Inference Parameter Matter in LLM Applications? - A Case Study for MATH | AutoGen"
|
||||
BLOG_POST_STRING = "Large language models (LLMs) are powerful tools that can generate natural language texts for various applications, such as chatbots, summarization, translation, and more. GPT-4 is currently the state of the art LLM in the world. Is model selection irrelevant? What about inference parameters?"
|
||||
|
||||
WIKIPEDIA_URL = "https://en.wikipedia.org/wiki/Microsoft"
|
||||
WIKIPEDIA_TITLE = "Microsoft - Wikipedia"
|
||||
WIKIPEDIA_STRING = "Redmond"
|
||||
|
||||
PLAIN_TEXT_URL = "https://raw.githubusercontent.com/microsoft/autogen/main/README.md"
|
||||
IMAGE_URL = "https://github.com/afourney.png"
|
||||
|
||||
PDF_URL = "https://arxiv.org/pdf/2308.08155.pdf"
|
||||
PDF_STRING = "Figure 1: AutoGen enables diverse LLM-based applications using multi-agent conversations."
|
||||
|
||||
BING_QUERY = "Microsoft"
|
||||
BING_TITLE = f"{BING_QUERY} - Search"
|
||||
BING_STRING = f"A Bing search for '{BING_QUERY}' found"
|
||||
|
||||
try:
|
||||
from autogen.browser_utils import SimpleTextBrowser
|
||||
except ImportError:
|
||||
skip_all = True
|
||||
else:
|
||||
skip_all = False
|
||||
|
||||
try:
|
||||
BING_API_KEY = os.environ["BING_API_KEY"]
|
||||
except KeyError:
|
||||
skip_bing = True
|
||||
else:
|
||||
skip_bing = False
|
||||
|
||||
|
||||
def _rm_folder(path):
|
||||
"""Remove all the regular files in a folder, then deletes the folder. Assumes a flat file structure, with no subdirectories."""
|
||||
for fname in os.listdir(path):
|
||||
fpath = os.path.join(path, fname)
|
||||
if os.path.isfile(fpath):
|
||||
os.unlink(fpath)
|
||||
os.rmdir(path)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
skip_all,
|
||||
reason="do not run if dependency is not installed",
|
||||
)
|
||||
def test_simple_text_browser():
|
||||
# Create a downloads folder (removing any leftover ones from prior tests)
|
||||
downloads_folder = os.path.join(KEY_LOC, "downloads")
|
||||
if os.path.isdir(downloads_folder):
|
||||
_rm_folder(downloads_folder)
|
||||
os.mkdir(downloads_folder)
|
||||
|
||||
# Instantiate the browser
|
||||
user_agent = "python-requests/" + requests.__version__
|
||||
viewport_size = 1024
|
||||
browser = SimpleTextBrowser(
|
||||
downloads_folder=downloads_folder,
|
||||
viewport_size=viewport_size,
|
||||
request_kwargs={
|
||||
"headers": {"User-Agent": user_agent},
|
||||
},
|
||||
)
|
||||
|
||||
# Test that we can visit a page and find what we expect there
|
||||
top_viewport = browser.visit_page(BLOG_POST_URL)
|
||||
assert browser.viewport == top_viewport
|
||||
assert browser.page_title.strip() == BLOG_POST_TITLE.strip()
|
||||
assert BLOG_POST_STRING in browser.page_content
|
||||
|
||||
# Check if page splitting works
|
||||
approx_pages = int(len(browser.page_content) / viewport_size + 0.5) # May be fewer, since it aligns to word breaks
|
||||
assert len(browser.viewport_pages) <= approx_pages
|
||||
assert abs(len(browser.viewport_pages) - approx_pages) <= 1 # allow only a small deviation
|
||||
assert browser.viewport_pages[0][0] == 0
|
||||
assert browser.viewport_pages[-1][1] == len(browser.page_content)
|
||||
|
||||
# Make sure we can reconstruct the full contents from the split pages
|
||||
buffer = ""
|
||||
for bounds in browser.viewport_pages:
|
||||
buffer += browser.page_content[bounds[0] : bounds[1]]
|
||||
assert buffer == browser.page_content
|
||||
|
||||
# Test scrolling (scroll all the way to the bottom)
|
||||
for i in range(1, len(browser.viewport_pages)):
|
||||
browser.page_down()
|
||||
assert browser.viewport_current_page == i
|
||||
# Test scrolloing beyond the limits
|
||||
for i in range(0, 5):
|
||||
browser.page_down()
|
||||
assert browser.viewport_current_page == len(browser.viewport_pages) - 1
|
||||
|
||||
# Test scrolling (scroll all the way to the bottom)
|
||||
for i in range(len(browser.viewport_pages) - 2, 0, -1):
|
||||
browser.page_up()
|
||||
assert browser.viewport_current_page == i
|
||||
# Test scrolloing beyond the limits
|
||||
for i in range(0, 5):
|
||||
browser.page_up()
|
||||
assert browser.viewport_current_page == 0
|
||||
|
||||
# Test Wikipedia handling
|
||||
assert WIKIPEDIA_STRING in browser.visit_page(WIKIPEDIA_URL)
|
||||
assert WIKIPEDIA_TITLE.strip() == browser.page_title.strip()
|
||||
|
||||
# Visit a plain-text file
|
||||
response = requests.get(PLAIN_TEXT_URL)
|
||||
response.raise_for_status()
|
||||
expected_results = response.text
|
||||
|
||||
browser.visit_page(PLAIN_TEXT_URL)
|
||||
assert browser.page_content.strip() == expected_results.strip()
|
||||
|
||||
# Directly download an image, and compute its md5
|
||||
response = requests.get(IMAGE_URL, stream=True)
|
||||
response.raise_for_status()
|
||||
expected_md5 = hashlib.md5(response.raw.read()).hexdigest()
|
||||
|
||||
# Visit an image causing it to be downloaded by the SimpleTextBrowser, then compute its md5
|
||||
viewport = browser.visit_page(IMAGE_URL)
|
||||
m = re.search(r"Downloaded '(.*?)' to '(.*?)'", viewport)
|
||||
fetched_url = m.group(1)
|
||||
download_loc = m.group(2)
|
||||
assert fetched_url == IMAGE_URL
|
||||
|
||||
with open(download_loc, "rb") as fh:
|
||||
downloaded_md5 = hashlib.md5(fh.read()).hexdigest()
|
||||
|
||||
# MD%s should match
|
||||
assert expected_md5 == downloaded_md5
|
||||
|
||||
# Fetch a PDF
|
||||
viewport = browser.visit_page(PDF_URL)
|
||||
assert PDF_STRING in viewport
|
||||
|
||||
# Clean up
|
||||
_rm_folder(downloads_folder)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
skip_bing,
|
||||
reason="do not run bing tests if key is missing",
|
||||
)
|
||||
def test_bing_search():
|
||||
# Instantiate the browser
|
||||
user_agent = "python-requests/" + requests.__version__
|
||||
browser = SimpleTextBrowser(
|
||||
bing_api_key=BING_API_KEY,
|
||||
viewport_size=1024,
|
||||
request_kwargs={
|
||||
"headers": {"User-Agent": user_agent},
|
||||
},
|
||||
)
|
||||
|
||||
assert BING_STRING in browser.visit_page("bing: " + BING_QUERY)
|
||||
assert BING_TITLE == browser.page_title
|
||||
assert len(browser.viewport_pages) == 1
|
||||
assert browser.viewport_pages[0] == (0, len(browser.page_content))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""Runs this file's tests from the command line."""
|
||||
test_simple_text_browser()
|
||||
test_bing_search()
|
|
@ -39,6 +39,7 @@ Links to notebook examples:
|
|||
- Function Inception: Enable AutoGen agents to update/remove functions during conversations. - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_inception_function.ipynb)
|
||||
- Agent Chat with Whisper - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_video_transcript_translate_with_whisper.ipynb)
|
||||
- Constrained Responses via Guidance - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_guidance.ipynb)
|
||||
- Browse the Web with Agents - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_surfer.ipynb)
|
||||
1. **Human Involvement**
|
||||
- Simple example in ChatGPT style [View example](https://github.com/microsoft/autogen/blob/main/samples/simple_chat.py)
|
||||
- Auto Code Generation, Execution, Debugging and **Human Feedback** - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_human_feedback.ipynb)
|
||||
|
|
Loading…
Reference in New Issue