mirror of https://github.com/microsoft/autogen.git
Together AI Client (#2919)
* First pass together.ai client class * Config handling, models and cost * Added tests, moved param management to create function * Tests, parameter, validation, logging updates * Added use of client_utils PR 2949 * Updated to return OAI response * Notebook example * Improved function calling, updated tests, updated notebook with Chess example * Tidied up together client class, better parameter handling, simpler exception capture, warning for no cost, reuse in tests, cleaner tests * Update of documentation notebook, replacement of old version * Fix of messages parameter for hide_tools function call * Update autogen/oai/together.py Co-authored-by: Qingyun Wu <qingyun0327@gmail.com> * Update together.py to fix text --------- Co-authored-by: Qingyun Wu <qingyun0327@gmail.com> Co-authored-by: Yiran Wu <32823396+yiranwu0@users.noreply.github.com> Co-authored-by: Chi Wang <wang.chi@microsoft.com>
This commit is contained in:
parent
843c343383
commit
b1ec3ae545
|
@ -558,3 +558,43 @@ jobs:
|
|||
with:
|
||||
file: ./coverage.xml
|
||||
flags: unittests
|
||||
|
||||
TogetherTest:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
||||
exclude:
|
||||
- os: macos-latest
|
||||
python-version: "3.9"
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
lfs: true
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install packages and dependencies for all tests
|
||||
run: |
|
||||
python -m pip install --upgrade pip wheel
|
||||
pip install pytest-cov>=5
|
||||
- name: Install packages and dependencies for Together
|
||||
run: |
|
||||
pip install -e .[together,test]
|
||||
- name: Set AUTOGEN_USE_DOCKER based on OS
|
||||
shell: bash
|
||||
run: |
|
||||
if [[ ${{ matrix.os }} != ubuntu-latest ]]; then
|
||||
echo "AUTOGEN_USE_DOCKER=False" >> $GITHUB_ENV
|
||||
fi
|
||||
- name: Coverage
|
||||
run: |
|
||||
pytest test/oai/test_together.py --skip-openai
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
file: ./coverage.xml
|
||||
flags: unittests
|
||||
|
|
|
@ -20,6 +20,7 @@ if TYPE_CHECKING:
|
|||
from autogen.oai.anthropic import AnthropicClient
|
||||
from autogen.oai.gemini import GeminiClient
|
||||
from autogen.oai.mistral import MistralAIClient
|
||||
from autogen.oai.together import TogetherClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -203,7 +204,7 @@ class FileLogger(BaseLogger):
|
|||
|
||||
def log_new_client(
|
||||
self,
|
||||
client: AzureOpenAI | OpenAI | GeminiClient | AnthropicClient | MistralAIClient,
|
||||
client: AzureOpenAI | OpenAI | GeminiClient | AnthropicClient | MistralAIClient | TogetherClient,
|
||||
wrapper: OpenAIWrapper,
|
||||
init_args: Dict[str, Any],
|
||||
) -> None:
|
||||
|
|
|
@ -21,6 +21,7 @@ if TYPE_CHECKING:
|
|||
from autogen.oai.anthropic import AnthropicClient
|
||||
from autogen.oai.gemini import GeminiClient
|
||||
from autogen.oai.mistral import MistralAIClient
|
||||
from autogen.oai.together import TogetherClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
lock = threading.Lock()
|
||||
|
@ -390,7 +391,7 @@ class SqliteLogger(BaseLogger):
|
|||
|
||||
def log_new_client(
|
||||
self,
|
||||
client: Union[AzureOpenAI, OpenAI, GeminiClient, AnthropicClient, MistralAIClient],
|
||||
client: Union[AzureOpenAI, OpenAI, GeminiClient, AnthropicClient, MistralAIClient, TogetherClient],
|
||||
wrapper: OpenAIWrapper,
|
||||
init_args: Dict[str, Any],
|
||||
) -> None:
|
||||
|
|
|
@ -63,6 +63,13 @@ try:
|
|||
except ImportError as e:
|
||||
mistral_import_exception = e
|
||||
|
||||
try:
|
||||
from autogen.oai.together import TogetherClient
|
||||
|
||||
together_import_exception: Optional[ImportError] = None
|
||||
except ImportError as e:
|
||||
together_import_exception = e
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
if not logger.handlers:
|
||||
# Add the console handler.
|
||||
|
@ -473,6 +480,10 @@ class OpenAIWrapper:
|
|||
raise ImportError("Please install `mistralai` to use the Mistral.AI API.")
|
||||
client = MistralAIClient(**openai_config)
|
||||
self._clients.append(client)
|
||||
elif api_type is not None and api_type.startswith("together"):
|
||||
if together_import_exception:
|
||||
raise ImportError("Please install `together` to use the Together.AI API.")
|
||||
self._clients.append(TogetherClient(**config))
|
||||
else:
|
||||
client = OpenAI(**openai_config)
|
||||
self._clients.append(OpenAIClient(client))
|
||||
|
|
|
@ -0,0 +1,351 @@
|
|||
"""Create an OpenAI-compatible client using Together.AI's API.
|
||||
|
||||
Example:
|
||||
llm_config={
|
||||
"config_list": [{
|
||||
"api_type": "together",
|
||||
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
||||
"api_key": os.environ.get("TOGETHER_API_KEY")
|
||||
}
|
||||
]}
|
||||
|
||||
agent = autogen.AssistantAgent("my_agent", llm_config=llm_config)
|
||||
|
||||
Install Together.AI python library using: pip install --upgrade together
|
||||
|
||||
Resources:
|
||||
- https://docs.together.ai/docs/inference-python
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import copy
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
import warnings
|
||||
from io import BytesIO
|
||||
from typing import Any, Dict, List, Mapping, Tuple, Union
|
||||
|
||||
import requests
|
||||
from openai.types.chat import ChatCompletion, ChatCompletionMessageToolCall
|
||||
from openai.types.chat.chat_completion import ChatCompletionMessage, Choice
|
||||
from openai.types.completion_usage import CompletionUsage
|
||||
from PIL import Image
|
||||
from together import Together, error
|
||||
|
||||
from autogen.oai.client_utils import should_hide_tools, validate_parameter
|
||||
|
||||
|
||||
class TogetherClient:
|
||||
"""Client for Together.AI's API."""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Requires api_key or environment variable to be set
|
||||
|
||||
Args:
|
||||
api_key (str): The API key for using Together.AI (or environment variable TOGETHER_API_KEY needs to be set)
|
||||
"""
|
||||
# Ensure we have the api_key upon instantiation
|
||||
self.api_key = kwargs.get("api_key", None)
|
||||
if not self.api_key:
|
||||
self.api_key = os.getenv("TOGETHER_API_KEY")
|
||||
|
||||
assert (
|
||||
self.api_key
|
||||
), "Please include the api_key in your config list entry for Together.AI or set the TOGETHER_API_KEY env variable."
|
||||
|
||||
def message_retrieval(self, response) -> List:
|
||||
"""
|
||||
Retrieve and return a list of strings or a list of Choice.Message from the response.
|
||||
|
||||
NOTE: if a list of Choice.Message is returned, it currently needs to contain the fields of OpenAI's ChatCompletion Message object,
|
||||
since that is expected for function or tool calling in the rest of the codebase at the moment, unless a custom agent is being used.
|
||||
"""
|
||||
return [choice.message for choice in response.choices]
|
||||
|
||||
def cost(self, response) -> float:
|
||||
return response.cost
|
||||
|
||||
@staticmethod
|
||||
def get_usage(response) -> Dict:
|
||||
"""Return usage summary of the response using RESPONSE_USAGE_KEYS."""
|
||||
# ... # pragma: no cover
|
||||
return {
|
||||
"prompt_tokens": response.usage.prompt_tokens,
|
||||
"completion_tokens": response.usage.completion_tokens,
|
||||
"total_tokens": response.usage.total_tokens,
|
||||
"cost": response.cost,
|
||||
"model": response.model,
|
||||
}
|
||||
|
||||
def parse_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Loads the parameters for Together.AI API from the passed in parameters and returns a validated set. Checks types, ranges, and sets defaults"""
|
||||
together_params = {}
|
||||
|
||||
# Check that we have what we need to use Together.AI's API
|
||||
together_params["model"] = params.get("model", None)
|
||||
assert together_params[
|
||||
"model"
|
||||
], "Please specify the 'model' in your config list entry to nominate the Together.AI model to use."
|
||||
|
||||
# Validate allowed Together.AI parameters
|
||||
# https://github.com/togethercomputer/together-python/blob/94ffb30daf0ac3e078be986af7228f85f79bde99/src/together/resources/completions.py#L44
|
||||
together_params["max_tokens"] = validate_parameter(params, "max_tokens", int, True, 512, (0, None), None)
|
||||
together_params["stream"] = validate_parameter(params, "stream", bool, False, False, None, None)
|
||||
together_params["temperature"] = validate_parameter(params, "temperature", (int, float), True, None, None, None)
|
||||
together_params["top_p"] = validate_parameter(params, "top_p", (int, float), True, None, None, None)
|
||||
together_params["top_k"] = validate_parameter(params, "top_k", int, True, None, None, None)
|
||||
together_params["repetition_penalty"] = validate_parameter(
|
||||
params, "repetition_penalty", float, True, None, None, None
|
||||
)
|
||||
together_params["presence_penalty"] = validate_parameter(
|
||||
params, "presence_penalty", (int, float), True, None, (-2, 2), None
|
||||
)
|
||||
together_params["frequency_penalty"] = validate_parameter(
|
||||
params, "frequency_penalty", (int, float), True, None, (-2, 2), None
|
||||
)
|
||||
together_params["min_p"] = validate_parameter(params, "min_p", (int, float), True, None, (0, 1), None)
|
||||
together_params["safety_model"] = validate_parameter(
|
||||
params, "safety_model", str, True, None, None, None
|
||||
) # We won't enforce the available models as they are likely to change
|
||||
|
||||
# Check if they want to stream and use tools, which isn't currently supported (TODO)
|
||||
if together_params["stream"] and "tools" in params:
|
||||
warnings.warn(
|
||||
"Streaming is not supported when using tools, streaming will be disabled.",
|
||||
UserWarning,
|
||||
)
|
||||
|
||||
together_params["stream"] = False
|
||||
|
||||
return together_params
|
||||
|
||||
def create(self, params: Dict) -> ChatCompletion:
|
||||
|
||||
messages = params.get("messages", [])
|
||||
|
||||
# Convert AutoGen messages to Together.AI messages
|
||||
together_messages = oai_messages_to_together_messages(messages)
|
||||
|
||||
# Parse parameters to Together.AI API's parameters
|
||||
together_params = self.parse_params(params)
|
||||
|
||||
# Add tools to the call if we have them and aren't hiding them
|
||||
if "tools" in params:
|
||||
hide_tools = validate_parameter(
|
||||
params, "hide_tools", str, False, "never", None, ["if_all_run", "if_any_run", "never"]
|
||||
)
|
||||
if not should_hide_tools(together_messages, params["tools"], hide_tools):
|
||||
together_params["tools"] = params["tools"]
|
||||
|
||||
together_params["messages"] = together_messages
|
||||
|
||||
# We use chat model by default
|
||||
client = Together(api_key=self.api_key)
|
||||
|
||||
# Token counts will be returned
|
||||
prompt_tokens = 0
|
||||
completion_tokens = 0
|
||||
total_tokens = 0
|
||||
|
||||
max_retries = 5
|
||||
for attempt in range(max_retries):
|
||||
ans = None
|
||||
try:
|
||||
response = client.chat.completions.create(**together_params)
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Together.AI exception occurred: {e}")
|
||||
else:
|
||||
|
||||
if together_params["stream"]:
|
||||
# Read in the chunks as they stream
|
||||
ans = ""
|
||||
for chunk in response:
|
||||
ans = ans + (chunk.choices[0].delta.content or "")
|
||||
|
||||
prompt_tokens = chunk.usage.prompt_tokens
|
||||
completion_tokens = chunk.usage.completion_tokens
|
||||
total_tokens = chunk.usage.total_tokens
|
||||
else:
|
||||
ans: str = response.choices[0].message.content
|
||||
|
||||
prompt_tokens = response.usage.prompt_tokens
|
||||
completion_tokens = response.usage.completion_tokens
|
||||
total_tokens = response.usage.total_tokens
|
||||
break
|
||||
|
||||
if response is not None:
|
||||
# If we have tool calls as the response, populate completed tool calls for our return OAI response
|
||||
if response.choices[0].finish_reason == "tool_calls":
|
||||
together_finish = "tool_calls"
|
||||
tool_calls = []
|
||||
for tool_call in response.choices[0].message.tool_calls:
|
||||
tool_calls.append(
|
||||
ChatCompletionMessageToolCall(
|
||||
id=tool_call.id,
|
||||
function={"name": tool_call.function.name, "arguments": tool_call.function.arguments},
|
||||
type="function",
|
||||
)
|
||||
)
|
||||
else:
|
||||
together_finish = "stop"
|
||||
tool_calls = None
|
||||
|
||||
else:
|
||||
raise RuntimeError(f"Failed to get response from Together.AI after retrying {attempt + 1} times.")
|
||||
|
||||
# 3. convert output
|
||||
message = ChatCompletionMessage(
|
||||
role="assistant",
|
||||
content=response.choices[0].message.content,
|
||||
function_call=None,
|
||||
tool_calls=tool_calls,
|
||||
)
|
||||
choices = [Choice(finish_reason=together_finish, index=0, message=message)]
|
||||
|
||||
response_oai = ChatCompletion(
|
||||
id=response.id,
|
||||
model=together_params["model"],
|
||||
created=int(time.time() * 1000),
|
||||
object="chat.completion",
|
||||
choices=choices,
|
||||
usage=CompletionUsage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
),
|
||||
cost=calculate_together_cost(prompt_tokens, completion_tokens, together_params["model"]),
|
||||
)
|
||||
|
||||
return response_oai
|
||||
|
||||
|
||||
def oai_messages_to_together_messages(messages: list[Dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Convert messages from OAI format to Together.AI format.
|
||||
We correct for any specific role orders and types.
|
||||
"""
|
||||
|
||||
together_messages = copy.deepcopy(messages)
|
||||
|
||||
# If we have a message with role='tool', which occurs when a function is executed, change it to 'user'
|
||||
for msg in together_messages:
|
||||
if "role" in msg and msg["role"] == "tool":
|
||||
msg["role"] = "user"
|
||||
|
||||
return together_messages
|
||||
|
||||
|
||||
# MODELS AND COSTS
|
||||
chat_lang_code_model_sizes = {
|
||||
"zero-one-ai/Yi-34B-Chat": 34,
|
||||
"allenai/OLMo-7B-Instruct": 7,
|
||||
"allenai/OLMo-7B-Twin-2T": 7,
|
||||
"allenai/OLMo-7B": 7,
|
||||
"Austism/chronos-hermes-13b": 13,
|
||||
"deepseek-ai/deepseek-coder-33b-instruct": 33,
|
||||
"deepseek-ai/deepseek-llm-67b-chat": 67,
|
||||
"garage-bAInd/Platypus2-70B-instruct": 70,
|
||||
"google/gemma-2b-it": 2,
|
||||
"google/gemma-7b-it": 7,
|
||||
"Gryphe/MythoMax-L2-13b": 13,
|
||||
"lmsys/vicuna-13b-v1.5": 13,
|
||||
"lmsys/vicuna-7b-v1.5": 7,
|
||||
"codellama/CodeLlama-13b-Instruct-hf": 13,
|
||||
"codellama/CodeLlama-34b-Instruct-hf": 34,
|
||||
"codellama/CodeLlama-70b-Instruct-hf": 70,
|
||||
"codellama/CodeLlama-7b-Instruct-hf": 7,
|
||||
"meta-llama/Llama-2-70b-chat-hf": 70,
|
||||
"meta-llama/Llama-2-13b-chat-hf": 13,
|
||||
"meta-llama/Llama-2-7b-chat-hf": 7,
|
||||
"meta-llama/Llama-3-8b-chat-hf": 8,
|
||||
"meta-llama/Llama-3-70b-chat-hf": 70,
|
||||
"mistralai/Mistral-7B-Instruct-v0.1": 7,
|
||||
"mistralai/Mistral-7B-Instruct-v0.2": 7,
|
||||
"mistralai/Mistral-7B-Instruct-v0.3": 7,
|
||||
"NousResearch/Nous-Capybara-7B-V1p9": 7,
|
||||
"NousResearch/Nous-Hermes-llama-2-7b": 7,
|
||||
"NousResearch/Nous-Hermes-Llama2-13b": 13,
|
||||
"NousResearch/Nous-Hermes-2-Yi-34B": 34,
|
||||
"openchat/openchat-3.5-1210": 7,
|
||||
"Open-Orca/Mistral-7B-OpenOrca": 7,
|
||||
"Qwen/Qwen1.5-0.5B-Chat": 0.5,
|
||||
"Qwen/Qwen1.5-1.8B-Chat": 1.8,
|
||||
"Qwen/Qwen1.5-4B-Chat": 4,
|
||||
"Qwen/Qwen1.5-7B-Chat": 7,
|
||||
"Qwen/Qwen1.5-14B-Chat": 14,
|
||||
"Qwen/Qwen1.5-32B-Chat": 32,
|
||||
"Qwen/Qwen1.5-72B-Chat": 72,
|
||||
"Qwen/Qwen1.5-110B-Chat": 110,
|
||||
"Qwen/Qwen2-72B-Instruct": 72,
|
||||
"snorkelai/Snorkel-Mistral-PairRM-DPO": 7,
|
||||
"togethercomputer/alpaca-7b": 7,
|
||||
"teknium/OpenHermes-2-Mistral-7B": 7,
|
||||
"teknium/OpenHermes-2p5-Mistral-7B": 7,
|
||||
"togethercomputer/Llama-2-7B-32K-Instruct": 7,
|
||||
"togethercomputer/RedPajama-INCITE-Chat-3B-v1": 3,
|
||||
"togethercomputer/RedPajama-INCITE-7B-Chat": 7,
|
||||
"togethercomputer/StripedHyena-Nous-7B": 7,
|
||||
"Undi95/ReMM-SLERP-L2-13B": 13,
|
||||
"Undi95/Toppy-M-7B": 7,
|
||||
"WizardLM/WizardLM-13B-V1.2": 13,
|
||||
"upstage/SOLAR-10.7B-Instruct-v1.0": 11,
|
||||
}
|
||||
|
||||
# Cost per million tokens based on up to X Billion parameters, e.g. up 4B is $0.1/million
|
||||
chat_lang_code_model_costs = {4: 0.1, 8: 0.2, 21: 0.3, 41: 0.8, 80: 0.9, 110: 1.8}
|
||||
|
||||
mixture_model_sizes = {
|
||||
"cognitivecomputations/dolphin-2.5-mixtral-8x7b": 56,
|
||||
"databricks/dbrx-instruct": 132,
|
||||
"mistralai/Mixtral-8x7B-Instruct-v0.1": 47,
|
||||
"mistralai/Mixtral-8x22B-Instruct-v0.1": 141,
|
||||
"NousResearch/Nous-Hermes-2-Mistral-7B-DPO": 7,
|
||||
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 47,
|
||||
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT": 47,
|
||||
"Snowflake/snowflake-arctic-instruct": 480,
|
||||
}
|
||||
|
||||
# Cost per million tokens based on up to X Billion parameters, e.g. up 56B is $0.6/million
|
||||
mixture_costs = {56: 0.6, 176: 1.2, 480: 2.4}
|
||||
|
||||
|
||||
def calculate_together_cost(input_tokens: int, output_tokens: int, model_name: str) -> float:
|
||||
"""Cost calculation for inference"""
|
||||
|
||||
if model_name in chat_lang_code_model_sizes or model_name in mixture_model_sizes:
|
||||
cost_per_mil = 0
|
||||
|
||||
# Chat, Language, Code models
|
||||
if model_name in chat_lang_code_model_sizes:
|
||||
size_in_b = chat_lang_code_model_sizes[model_name]
|
||||
|
||||
for top_size in chat_lang_code_model_costs.keys():
|
||||
if size_in_b <= top_size:
|
||||
cost_per_mil = chat_lang_code_model_costs[top_size]
|
||||
break
|
||||
|
||||
else:
|
||||
# Mixture-of-experts
|
||||
size_in_b = mixture_model_sizes[model_name]
|
||||
|
||||
for top_size in mixture_costs.keys():
|
||||
if size_in_b <= top_size:
|
||||
cost_per_mil = mixture_costs[top_size]
|
||||
break
|
||||
|
||||
if cost_per_mil == 0:
|
||||
warnings.warn("Model size doesn't align with cost structure.", UserWarning)
|
||||
|
||||
return cost_per_mil * ((input_tokens + output_tokens) / 1e6)
|
||||
|
||||
else:
|
||||
# Model is not in our list of models, can't determine the cost
|
||||
warnings.warn(
|
||||
"The model isn't catered for costing, to apply costs you can use the 'price' key on your config_list.",
|
||||
UserWarning,
|
||||
)
|
||||
|
||||
return 0
|
|
@ -16,6 +16,7 @@ if TYPE_CHECKING:
|
|||
from autogen.oai.anthropic import AnthropicClient
|
||||
from autogen.oai.gemini import GeminiClient
|
||||
from autogen.oai.mistral import MistralAIClient
|
||||
from autogen.oai.together import TogetherClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -109,7 +110,7 @@ def log_new_wrapper(wrapper: OpenAIWrapper, init_args: Dict[str, Union[LLMConfig
|
|||
|
||||
|
||||
def log_new_client(
|
||||
client: Union[AzureOpenAI, OpenAI, GeminiClient, AnthropicClient, MistralAIClient],
|
||||
client: Union[AzureOpenAI, OpenAI, GeminiClient, AnthropicClient, MistralAIClient, TogetherClient],
|
||||
wrapper: OpenAIWrapper,
|
||||
init_args: Dict[str, Any],
|
||||
) -> None:
|
||||
|
|
1
setup.py
1
setup.py
|
@ -81,6 +81,7 @@ extra_require = {
|
|||
"lmm": ["replicate", "pillow"],
|
||||
"graph": ["networkx", "matplotlib"],
|
||||
"gemini": ["google-generativeai>=0.5,<1", "google-cloud-aiplatform", "google-auth", "pillow", "pydantic"],
|
||||
"together": ["together>=1.2"],
|
||||
"websurfer": ["beautifulsoup4", "markdownify", "pdfminer.six", "pathvalidate"],
|
||||
"redis": ["redis"],
|
||||
"cosmosdb": ["azure-cosmos>=4.2.0"],
|
||||
|
|
|
@ -0,0 +1,264 @@
|
|||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
try:
|
||||
from openai.types.chat.chat_completion import ChatCompletionMessage, Choice
|
||||
|
||||
from autogen.oai.together import TogetherClient, calculate_together_cost
|
||||
|
||||
skip = False
|
||||
except ImportError:
|
||||
TogetherClient = object
|
||||
InternalServerError = object
|
||||
skip = True
|
||||
|
||||
|
||||
# Fixtures for mock data
|
||||
@pytest.fixture
|
||||
def mock_response():
|
||||
class MockResponse:
|
||||
def __init__(self, text, choices, usage, cost, model):
|
||||
self.text = text
|
||||
self.choices = choices
|
||||
self.usage = usage
|
||||
self.cost = cost
|
||||
self.model = model
|
||||
|
||||
return MockResponse
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def together_client():
|
||||
return TogetherClient(api_key="fake_api_key")
|
||||
|
||||
|
||||
# Test initialization and configuration
|
||||
@pytest.mark.skipif(skip, reason="Together.AI dependency is not installed")
|
||||
def test_initialization():
|
||||
|
||||
# Missing any api_key
|
||||
with pytest.raises(AssertionError) as assertinfo:
|
||||
TogetherClient() # Should raise an AssertionError due to missing api_key
|
||||
|
||||
assert (
|
||||
"Please include the api_key in your config list entry for Together.AI or set the TOGETHER_API_KEY env variable."
|
||||
in str(assertinfo.value)
|
||||
)
|
||||
|
||||
# Creation works
|
||||
TogetherClient(api_key="fake_api_key") # Should create okay now.
|
||||
|
||||
|
||||
# Test standard initialization
|
||||
@pytest.mark.skipif(skip, reason="Together.AI dependency is not installed")
|
||||
def test_valid_initialization(together_client):
|
||||
assert together_client.api_key == "fake_api_key", "Config api_key should be correctly set"
|
||||
|
||||
|
||||
# Test parameters
|
||||
@pytest.mark.skipif(skip, reason="Together.AI dependency is not installed")
|
||||
def test_parsing_params(together_client):
|
||||
# All parameters
|
||||
params = {
|
||||
"model": "Qwen/Qwen2-72B-Instruct",
|
||||
"max_tokens": 1000,
|
||||
"stream": False,
|
||||
"temperature": 1,
|
||||
"top_p": 0.8,
|
||||
"top_k": 50,
|
||||
"repetition_penalty": 0.5,
|
||||
"presence_penalty": 1.5,
|
||||
"frequency_penalty": 1.5,
|
||||
"min_p": 0.2,
|
||||
"safety_model": "Meta-Llama/Llama-Guard-7b",
|
||||
}
|
||||
expected_params = {
|
||||
"model": "Qwen/Qwen2-72B-Instruct",
|
||||
"max_tokens": 1000,
|
||||
"stream": False,
|
||||
"temperature": 1,
|
||||
"top_p": 0.8,
|
||||
"top_k": 50,
|
||||
"repetition_penalty": 0.5,
|
||||
"presence_penalty": 1.5,
|
||||
"frequency_penalty": 1.5,
|
||||
"min_p": 0.2,
|
||||
"safety_model": "Meta-Llama/Llama-Guard-7b",
|
||||
}
|
||||
result = together_client.parse_params(params)
|
||||
assert result == expected_params
|
||||
|
||||
# Only model, others set as defaults
|
||||
params = {
|
||||
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
||||
}
|
||||
expected_params = {
|
||||
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
||||
"max_tokens": 512,
|
||||
"stream": False,
|
||||
"temperature": None,
|
||||
"top_p": None,
|
||||
"top_k": None,
|
||||
"repetition_penalty": None,
|
||||
"presence_penalty": None,
|
||||
"frequency_penalty": None,
|
||||
"min_p": None,
|
||||
"safety_model": None,
|
||||
}
|
||||
result = together_client.parse_params(params)
|
||||
assert result == expected_params
|
||||
|
||||
# Incorrect types, defaults should be set, will show warnings but not trigger assertions
|
||||
params = {
|
||||
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
||||
"max_tokens": "512",
|
||||
"stream": "Yes",
|
||||
"temperature": "0.5",
|
||||
"top_p": "0.8",
|
||||
"top_k": "50",
|
||||
"repetition_penalty": "0.5",
|
||||
"presence_penalty": "1.5",
|
||||
"frequency_penalty": "1.5",
|
||||
"min_p": "0.2",
|
||||
"safety_model": False,
|
||||
}
|
||||
result = together_client.parse_params(params)
|
||||
assert result == expected_params
|
||||
|
||||
# Values outside bounds, should warn and set to defaults
|
||||
params = {
|
||||
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
||||
"max_tokens": -200,
|
||||
"presence_penalty": -5,
|
||||
"frequency_penalty": 5,
|
||||
"min_p": -0.5,
|
||||
}
|
||||
result = together_client.parse_params(params)
|
||||
assert result == expected_params
|
||||
|
||||
|
||||
# Test cost calculation
|
||||
@pytest.mark.skipif(skip, reason="Together.AI dependency is not installed")
|
||||
def test_cost_calculation(mock_response):
|
||||
response = mock_response(
|
||||
text="Example response",
|
||||
choices=[{"message": "Test message 1"}],
|
||||
usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
|
||||
cost=None,
|
||||
model="mistralai/Mixtral-8x22B-Instruct-v0.1",
|
||||
)
|
||||
assert (
|
||||
calculate_together_cost(response.usage["prompt_tokens"], response.usage["completion_tokens"], response.model)
|
||||
== 0.000018
|
||||
), "Cost for this should be $0.000018"
|
||||
|
||||
|
||||
# Test text generation
|
||||
@pytest.mark.skipif(skip, reason="Together.AI dependency is not installed")
|
||||
@patch("autogen.oai.together.TogetherClient.create")
|
||||
def test_create_response(mock_create, together_client):
|
||||
# Mock TogetherClient.chat response
|
||||
mock_together_response = MagicMock()
|
||||
mock_together_response.choices = [
|
||||
MagicMock(finish_reason="stop", message=MagicMock(content="Example Llama response", tool_calls=None))
|
||||
]
|
||||
mock_together_response.id = "mock_together_response_id"
|
||||
mock_together_response.model = "meta-llama/Llama-3-8b-chat-hf"
|
||||
mock_together_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20) # Example token usage
|
||||
|
||||
mock_create.return_value = mock_together_response
|
||||
|
||||
# Test parameters
|
||||
params = {
|
||||
"messages": [{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "World"}],
|
||||
"model": "meta-llama/Llama-3-8b-chat-hf",
|
||||
}
|
||||
|
||||
# Call the create method
|
||||
response = together_client.create(params)
|
||||
|
||||
# Assertions to check if response is structured as expected
|
||||
assert (
|
||||
response.choices[0].message.content == "Example Llama response"
|
||||
), "Response content should match expected output"
|
||||
assert response.id == "mock_together_response_id", "Response ID should match the mocked response ID"
|
||||
assert response.model == "meta-llama/Llama-3-8b-chat-hf", "Response model should match the mocked response model"
|
||||
assert response.usage.prompt_tokens == 10, "Response prompt tokens should match the mocked response usage"
|
||||
assert response.usage.completion_tokens == 20, "Response completion tokens should match the mocked response usage"
|
||||
|
||||
|
||||
# Test functions/tools
|
||||
@pytest.mark.skipif(skip, reason="Together.AI dependency is not installed")
|
||||
@patch("autogen.oai.together.TogetherClient.create")
|
||||
def test_create_response_with_tool_call(mock_create, together_client):
|
||||
|
||||
# Define the mock response directly within the patch
|
||||
mock_function = MagicMock(name="currency_calculator")
|
||||
mock_function.name = "currency_calculator"
|
||||
mock_function.arguments = '{"base_currency": "EUR", "quote_currency": "USD", "base_amount": 123.45}'
|
||||
|
||||
# Define the mock response directly within the patch
|
||||
mock_create.return_value = MagicMock(
|
||||
choices=[
|
||||
MagicMock(
|
||||
finish_reason="tool_calls",
|
||||
message=MagicMock(
|
||||
content="", # Message is empty for tool responses
|
||||
tool_calls=[MagicMock(id="gdRdrvnHh", function=mock_function)],
|
||||
),
|
||||
)
|
||||
],
|
||||
id="mock_together_response_id",
|
||||
model="meta-llama/Llama-3-8b-chat-hf",
|
||||
usage=MagicMock(prompt_tokens=10, completion_tokens=20),
|
||||
)
|
||||
|
||||
# Test parameters
|
||||
converted_functions = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"description": "Currency exchange calculator.",
|
||||
"name": "currency_calculator",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"base_amount": {"type": "number", "description": "Amount of currency in base_currency"},
|
||||
"base_currency": {
|
||||
"enum": ["USD", "EUR"],
|
||||
"type": "string",
|
||||
"default": "USD",
|
||||
"description": "Base currency",
|
||||
},
|
||||
"quote_currency": {
|
||||
"enum": ["USD", "EUR"],
|
||||
"type": "string",
|
||||
"default": "EUR",
|
||||
"description": "Quote currency",
|
||||
},
|
||||
},
|
||||
"required": ["base_amount"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
together_messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How much is 123.45 EUR in USD?",
|
||||
"name": None,
|
||||
"tool_calls": None,
|
||||
"tool_call_id": None,
|
||||
},
|
||||
]
|
||||
|
||||
# Call the create method (which is now mocked)
|
||||
response = together_client.create(
|
||||
{"messages": together_messages, "tools": converted_functions, "model": "meta-llama/Llama-3-8b-chat-hf"}
|
||||
)
|
||||
|
||||
# Assertions to check if response is structured as expected
|
||||
assert response.choices[0].message.content == ""
|
||||
assert response.choices[0].message.tool_calls[0].function.name == "currency_calculator"
|
File diff suppressed because one or more lines are too long
|
@ -1,182 +0,0 @@
|
|||
# Together AI
|
||||
This cloud-based proxy server example, using [together.ai](https://www.together.ai/), is a group chat between a Python developer
|
||||
and a code reviewer, who are given a coding task.
|
||||
|
||||
Start by [installing AutoGen](/docs/installation/) and getting your [together.ai API key](https://api.together.xyz/settings/profile).
|
||||
|
||||
Put your together.ai API key in an environment variable, TOGETHER_API_KEY.
|
||||
|
||||
Linux / Mac OSX:
|
||||
|
||||
```bash
|
||||
export TOGETHER_API_KEY=YourTogetherAIKeyHere
|
||||
```
|
||||
|
||||
Windows (command prompt):
|
||||
|
||||
```powershell
|
||||
set TOGETHER_API_KEY=YourTogetherAIKeyHere
|
||||
```
|
||||
|
||||
Create your LLM configuration, with the [model you want](https://docs.together.ai/docs/inference-models).
|
||||
|
||||
```python
|
||||
import os
|
||||
|
||||
config_list = [
|
||||
{
|
||||
# Available together.ai model strings:
|
||||
# https://docs.together.ai/docs/inference-models
|
||||
"model": "mistralai/Mistral-7B-Instruct-v0.1",
|
||||
"api_key": os.environ['TOGETHER_API_KEY'],
|
||||
"base_url": "https://api.together.xyz/v1"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## Construct Agents
|
||||
|
||||
```python
|
||||
from pathlib import Path
|
||||
from autogen import AssistantAgent, UserProxyAgent
|
||||
from autogen.coding import LocalCommandLineCodeExecutor
|
||||
|
||||
work_dir = Path("groupchat")
|
||||
work_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Create local command line code executor.
|
||||
code_executor = LocalCommandLineCodeExecutor(work_dir=work_dir)
|
||||
|
||||
# User Proxy will execute code and finish the chat upon typing 'exit'
|
||||
user_proxy = UserProxyAgent(
|
||||
name="UserProxy",
|
||||
system_message="A human admin",
|
||||
code_execution_config={
|
||||
"last_n_messages": 2,
|
||||
"executor": code_executor,
|
||||
},
|
||||
human_input_mode="TERMINATE",
|
||||
is_termination_msg=lambda x: "TERMINATE" in x.get("content"),
|
||||
)
|
||||
|
||||
# Python Coder agent
|
||||
coder = AssistantAgent(
|
||||
name="softwareCoder",
|
||||
description="Software Coder, writes Python code as required and reiterates with feedback from the Code Reviewer.",
|
||||
system_message="You are a senior Python developer, a specialist in writing succinct Python functions.",
|
||||
llm_config={"config_list": config_list},
|
||||
)
|
||||
|
||||
# Code Reviewer agent
|
||||
reviewer = AssistantAgent(
|
||||
name="codeReviewer",
|
||||
description="Code Reviewer, reviews written code for correctness, efficiency, and security. Asks the Software Coder to address issues.",
|
||||
system_message="You are a Code Reviewer, experienced in checking code for correctness, efficiency, and security. Review and provide feedback to the Software Coder until you are satisfied, then return the word TERMINATE",
|
||||
is_termination_msg=lambda x: "TERMINATE" in x.get("content"),
|
||||
llm_config={"config_list": config_list},
|
||||
)
|
||||
```
|
||||
|
||||
## Establish the group chat
|
||||
|
||||
```python
|
||||
from autogen import GroupChat, GroupChatManager
|
||||
|
||||
# Establish the Group Chat and disallow a speaker being selected consecutively
|
||||
groupchat = GroupChat(agents=[user_proxy, coder, reviewer], messages=[], max_round=12, allow_repeat_speaker=False)
|
||||
|
||||
# Manages the group of multiple agents
|
||||
manager = GroupChatManager(groupchat=groupchat, llm_config={"config_list": config_list})
|
||||
```
|
||||
|
||||
## Start Chat
|
||||
|
||||
```python
|
||||
from autogen.cache import Cache
|
||||
|
||||
# Cache LLM responses.
|
||||
with Cache.disk() as cache:
|
||||
# Start the chat with a request to write a function
|
||||
user_proxy.initiate_chat(
|
||||
manager,
|
||||
message="Write a Python function for the Fibonacci sequence, the function will have one parameter for the number in the sequence, which the function will return the Fibonacci number for.",
|
||||
cache=cache,
|
||||
)
|
||||
# type exit to terminate the chat
|
||||
```
|
||||
|
||||
Output:
|
||||
```` text
|
||||
UserProxy (to chat_manager):
|
||||
|
||||
Write a Python function for the Fibonacci sequence, the function will have one parameter for the number in the sequence, which the function will return the Fibonacci number for.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
softwareCoder (to chat_manager):
|
||||
|
||||
Sure, here is a simple Python function that uses recursion to calculate the Fibonacci number:
|
||||
|
||||
```python
|
||||
def fibonacci(n):
|
||||
if n <= 0:
|
||||
return "Input should be a positive integer."
|
||||
elif n == 1:
|
||||
return 0
|
||||
elif n == 2:
|
||||
return 1
|
||||
else:
|
||||
return fibonacci(n-1) + fibonacci(n-2)
|
||||
```
|
||||
|
||||
This function takes an integer `n` as input and returns the `n`th number in the Fibonacci sequence. The Fibonacci sequence is a series of numbers in which each number is the sum of the two preceding ones, usually starting with 0 and 1.
|
||||
|
||||
Note that this implementation uses recursion and may not be efficient for large values of `n`. In such cases, an iterative approach or memoization would be more appropriate.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
codeReviewer (to chat_manager):
|
||||
|
||||
I see a couple of issues with the current implementation of the `fibonacci` function:
|
||||
|
||||
1. The function does not handle negative inputs correctly. Currently, it returns a string message for any input less than or equal to 0. It would be better to raise a `ValueError` or return a more informative message.
|
||||
2. The function uses recursion to calculate the Fibonacci number, which can be inefficient for large inputs. A more efficient approach would be to use an iterative approach or memoization.
|
||||
|
||||
Here's an updated implementation that addresses these issues:
|
||||
|
||||
```python
|
||||
def fibonacci(n):
|
||||
if n <= 0:
|
||||
raise ValueError("Input should be a positive integer.")
|
||||
elif n == 1:
|
||||
return 0
|
||||
elif n == 2:
|
||||
return 1
|
||||
else:
|
||||
a, b = 0, 1
|
||||
for _ in range(n - 2):
|
||||
a, b = b, a + b
|
||||
return b
|
||||
```
|
||||
|
||||
This implementation uses a loop to calculate the Fibonacci number iteratively, which is more efficient than the recursive approach. It also raises a `ValueError` for negative inputs, which is a more appropriate way to handle invalid inputs.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
>>>>>>>> USING AUTO REPLY...
|
||||
|
||||
>>>>>>>> EXECUTING CODE BLOCK 0 (inferred language is python)...
|
||||
UserProxy (to chat_manager):
|
||||
|
||||
exitcode: 0 (execution succeeded)
|
||||
Code output:
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
codeReviewer (to chat_manager):
|
||||
|
||||
I'm glad the updated implementation addresses the issues with the original code. Let me know if you have any further questions or if there's anything else I can help you with.
|
||||
|
||||
To terminate the conversation, please type "TERMINATE".
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
Please give feedback to chat_manager. Press enter or type 'exit' to stop the conversation: exit
|
||||
````
|
Loading…
Reference in New Issue