mirror of https://github.com/QwenLM/Qwen-Agent
add examples: multimodal tool use with qwen2-vl
This commit is contained in:
parent
3c4f8d00af
commit
8f24dbf6a7
|
@ -1,57 +0,0 @@
|
|||
"""A comfort game implemented by assistant"""
|
||||
|
||||
from qwen_agent.agents import Assistant
|
||||
from qwen_agent.gui import WebUI
|
||||
|
||||
|
||||
def init_agent_service():
|
||||
llm_cfg = {'model': 'qwen-max'}
|
||||
system = ('我们来玩角色扮演游戏。你扮演用户的女友。由用户开始发言,根据他的发言,你初始化一个心情值(0到100)并作出回应。'
|
||||
'用户的任务是哄你开心,你根据每次用户说的话调整心情,每次回复开头加上(当前心情:分数)。')
|
||||
|
||||
bot = Assistant(llm=llm_cfg, name='虚拟女友', description='哄哄机器人', system_message=system)
|
||||
|
||||
return bot
|
||||
|
||||
|
||||
def test(query: str = '你今天真好看'):
|
||||
# Define the agent
|
||||
bot = init_agent_service()
|
||||
|
||||
# Chat
|
||||
messages = [{'role': 'user', 'content': query}]
|
||||
for response in bot.run(messages=messages):
|
||||
print('bot response:', response)
|
||||
|
||||
|
||||
def app_tui():
|
||||
# Define the agent
|
||||
bot = init_agent_service()
|
||||
|
||||
# Chat
|
||||
messages = []
|
||||
while True:
|
||||
query = input('user question: ')
|
||||
messages.append({'role': 'user', 'content': query})
|
||||
response = []
|
||||
for response in bot.run(messages=messages):
|
||||
print('bot response:', response)
|
||||
messages.extend(response)
|
||||
|
||||
|
||||
def app_gui():
|
||||
agent = init_agent_service()
|
||||
chatbot_config = {
|
||||
'prompt.suggestions': [
|
||||
'你今天真好看!',
|
||||
'晚上去吃好吃的嘛~',
|
||||
'宝贝,你又瘦啦!',
|
||||
]
|
||||
}
|
||||
WebUI(agent, chatbot_config=chatbot_config).run(messages=[{'role': 'assistant', 'content': [{'text': '还不快来哄哄我!'}]}])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# test()
|
||||
# app_tui()
|
||||
app_gui()
|
|
@ -1,93 +0,0 @@
|
|||
"""A girl's growth story novelist implemented by assistant"""
|
||||
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from qwen_agent.agents import Assistant
|
||||
from qwen_agent.gui import WebUI
|
||||
|
||||
ROOT_RESOURCE = os.path.join(os.path.dirname(__file__), 'resource')
|
||||
|
||||
|
||||
def init_agent_service():
|
||||
llm_cfg = {'model': 'qwen-max'}
|
||||
tools = ['image_gen']
|
||||
bot = Assistant(
|
||||
llm=llm_cfg,
|
||||
name='漫画家',
|
||||
description='根据女孩的成长阶段画出图片,串成一个故事',
|
||||
function_list=tools,
|
||||
system_message='你扮演一个漫画家,根据我给你的女孩的不同阶段,使用工具画出每个阶段女孩的的图片,'
|
||||
'并串成一个故事讲述出来。要求图片背景丰富',
|
||||
)
|
||||
return bot
|
||||
|
||||
|
||||
def test(
|
||||
query='请用image_gen开始创作!',
|
||||
file: Optional[str] = os.path.join(ROOT_RESOURCE, 'growing_girl.pdf'),
|
||||
):
|
||||
# Define the agent
|
||||
bot = init_agent_service()
|
||||
|
||||
# Chat
|
||||
messages = []
|
||||
|
||||
if not file:
|
||||
messages.append({'role': 'user', 'content': query})
|
||||
else:
|
||||
messages.append({'role': 'user', 'content': [{'text': query}, {'file': file}]})
|
||||
|
||||
for response in bot.run(messages):
|
||||
print('bot response:', response)
|
||||
|
||||
|
||||
def app_tui():
|
||||
# Define the agent
|
||||
bot = init_agent_service()
|
||||
|
||||
# Chat
|
||||
messages = []
|
||||
while True:
|
||||
# Query example: 请用image_gen开始创作!
|
||||
query = input('user question: ')
|
||||
# File example: resource/growing_girl.pdf
|
||||
file = input('file url (press enter if no file): ').strip()
|
||||
if not query:
|
||||
print('user question cannot be empty!')
|
||||
continue
|
||||
if not file:
|
||||
messages.append({'role': 'user', 'content': query})
|
||||
else:
|
||||
messages.append({'role': 'user', 'content': [{'text': query}, {'file': file}]})
|
||||
|
||||
response = []
|
||||
for response in bot.run(messages):
|
||||
print('bot response:', response)
|
||||
messages.extend(response)
|
||||
|
||||
|
||||
def app_gui():
|
||||
# Define the agent
|
||||
bot = init_agent_service()
|
||||
file = os.path.join(ROOT_RESOURCE, 'growing_girl.pdf')
|
||||
chatbot_config = {
|
||||
'prompt.suggestions': [
|
||||
{
|
||||
'text': '画一个女孩的成长故事',
|
||||
'files': [file]
|
||||
},
|
||||
{
|
||||
'text': '画一个女孩的成长故事,从小学到大学',
|
||||
'files': [file]
|
||||
},
|
||||
'画出女人的一生,要反映出人类的深刻本质',
|
||||
]
|
||||
}
|
||||
WebUI(bot, chatbot_config=chatbot_config).run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# test()
|
||||
# app_tui()
|
||||
app_gui()
|
|
@ -0,0 +1,137 @@
|
|||
import os
|
||||
import re
|
||||
import uuid
|
||||
from io import BytesIO
|
||||
from pprint import pprint
|
||||
from typing import List, Union
|
||||
|
||||
import requests
|
||||
from PIL import Image
|
||||
|
||||
from qwen_agent.agents import FnCallAgent
|
||||
from qwen_agent.llm.schema import ContentItem
|
||||
from qwen_agent.tools.base import BaseToolWithFileAccess, register_tool
|
||||
|
||||
ROOT_RESOURCE = os.path.join(os.path.dirname(__file__), 'resource')
|
||||
|
||||
|
||||
@register_tool('crop_and_resize')
|
||||
class CropResize(BaseToolWithFileAccess):
|
||||
description = '这是一个放大镜功能,截取局部图像并放大从而查看更多细节,如果你无法直接看清细节时可以调用'
|
||||
parameters = [
|
||||
{
|
||||
'name': 'image',
|
||||
'type': 'string',
|
||||
'description': '输入图片本地路径或URL',
|
||||
'required': True
|
||||
},
|
||||
{
|
||||
'name': 'rectangle',
|
||||
'type': 'string',
|
||||
'description': '需要截取的局部图像区域,使用左上角坐标和右下角坐标表示(原点在图像左上角、向右为x轴正方向、向下为y轴正方向),格式:(x1,y1),(x2,y2)',
|
||||
'required': True
|
||||
},
|
||||
]
|
||||
|
||||
def _extract_coordinates(self, text):
|
||||
pattern = r'\((\d+),\s*(\d+)\)'
|
||||
matches = re.findall(pattern, text)
|
||||
coordinates = [(int(x), int(y)) for x, y in matches]
|
||||
if len(coordinates) >= 2:
|
||||
x1, y1 = coordinates[0]
|
||||
x2, y2 = coordinates[1]
|
||||
return x1, y1, x2, y2
|
||||
|
||||
pattern = r'\((\d+),\s*(\d+),\s*(\d+),\s*(\d+)\)'
|
||||
matches = re.findall(pattern, text)
|
||||
coordinates = [(int(x1), int(y1), int(x2), int(y2)) for x1, y1, x2, y2 in matches]
|
||||
x1, y1, x2, y2 = coordinates[0]
|
||||
return coordinates[0]
|
||||
|
||||
def _expand_box(self, x1, y1, x2, y2, factor=1):
|
||||
xc = (x1 + x2) / 2
|
||||
yc = (y1 + y2) / 2
|
||||
w = x2 - x1
|
||||
h = y2 - y1
|
||||
w_new = w * factor
|
||||
h_new = h * factor
|
||||
return xc - w_new / 2, yc - h_new / 2, xc + w_new / 2, yc + h_new / 2
|
||||
|
||||
def call(self, params: Union[str, dict], files: List[str] = None, **kwargs) -> List[ContentItem]:
|
||||
super().call(params=params, files=files)
|
||||
params = self._verify_json_format_args(params)
|
||||
|
||||
image_arg = params['image'] # local path or url
|
||||
rectangle = params['rectangle']
|
||||
|
||||
# open image
|
||||
if image_arg.startswith('http'):
|
||||
response = requests.get(image_arg)
|
||||
response.raise_for_status()
|
||||
image = Image.open(BytesIO(response.content))
|
||||
elif os.path.exists(image_arg):
|
||||
image = Image.open(image_arg)
|
||||
else:
|
||||
image = Image.open(os.path.join(self.work_dir, image_arg))
|
||||
|
||||
coordinates = self._extract_coordinates(rectangle)
|
||||
x1, y1, x2, y2 = self._expand_box(*coordinates, factor=1.35)
|
||||
|
||||
w, h = image.size
|
||||
x1, y1 = round(x1 / 1000 * w), round(y1 / 1000 * h)
|
||||
x2, y2 = round(x2 / 1000 * w), round(y2 / 1000 * h)
|
||||
|
||||
# remove padding
|
||||
x1, y1, x2, y2 = max(x1, 0), max(y1, 0), min(x2, w), min(y2, h)
|
||||
|
||||
cropped_image = image.crop((x1, y1, x2, y2))
|
||||
|
||||
# save
|
||||
output_path = os.path.abspath(os.path.join(self.work_dir, f'{uuid.uuid4()}.png'))
|
||||
cropped_image.save(output_path)
|
||||
|
||||
return [
|
||||
ContentItem(image=output_path),
|
||||
ContentItem(text=f'( 这张放大的局部区域的图片的URL是 {output_path} )'),
|
||||
]
|
||||
|
||||
|
||||
def test():
|
||||
llm_cfg_vl = {
|
||||
# Using Qwen2-VL deployed at any openai-compatible service such as vLLM:
|
||||
# 'model_type': 'qwenvl_oai',
|
||||
# 'model': 'Qwen/Qwen2-VL-72B-Instruct',
|
||||
# 'model_server': 'http://localhost:8000/v1', # api_base
|
||||
# 'api_key': 'EMPTY',
|
||||
|
||||
# Using Qwen2-VL provided by Alibaba Cloud DashScope:
|
||||
# 'model_type': 'qwenvl_dashscope',
|
||||
# 'model': 'qwen2-vl-72b-instruct',
|
||||
# 'api_key': os.getenv('DASHSCOPE_API_KEY'),
|
||||
|
||||
# TODO: Use qwen2-vl instead once qwen2-vl is released.
|
||||
'model_type': 'qwenvl_dashscope',
|
||||
'model': 'qwen-vl-max',
|
||||
'api_key': os.getenv('DASHSCOPE_API_KEY'),
|
||||
'generate_cfg': dict(max_retries=10,)
|
||||
}
|
||||
|
||||
agent = FnCallAgent(function_list=['crop_and_resize'], llm=llm_cfg_vl)
|
||||
messages = [{
|
||||
'role':
|
||||
'user',
|
||||
'content': [
|
||||
{
|
||||
'image': os.path.abspath(os.path.join(ROOT_RESOURCE, 'screenshot_with_plot.jpeg'))
|
||||
},
|
||||
{
|
||||
'text': '调用工具放大右边的表格'
|
||||
},
|
||||
],
|
||||
}]
|
||||
response = agent.run_nonstream(messages=messages)
|
||||
pprint(response, indent=4)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
|
@ -0,0 +1,81 @@
|
|||
import json
|
||||
import urllib.parse
|
||||
|
||||
from qwen_agent.llm import get_chat_model
|
||||
from qwen_agent.llm.schema import ContentItem
|
||||
|
||||
|
||||
def image_gen(prompt: str) -> str:
|
||||
prompt = urllib.parse.quote(prompt)
|
||||
image_url = f'https://image.pollinations.ai/prompt/{prompt}'
|
||||
return image_url
|
||||
|
||||
|
||||
def test():
|
||||
# Config for the model
|
||||
llm_cfg_oai = {
|
||||
# Using Qwen2-VL deployed at any openai-compatible service such as vLLM:
|
||||
'model_type': 'qwenvl_oai',
|
||||
'model': 'Qwen/Qwen2-VL-72B-Instruct',
|
||||
'model_server': 'http://localhost:8000/v1', # api_base
|
||||
'api_key': 'EMPTY',
|
||||
}
|
||||
llm = get_chat_model(llm_cfg_oai)
|
||||
|
||||
# Initial conversation
|
||||
messages = [{
|
||||
'role':
|
||||
'user',
|
||||
'content': [{
|
||||
'image': 'https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg'
|
||||
}, {
|
||||
'text': '图片中的内容是什么?请画一张内容相同,风格类似的图片。'
|
||||
}]
|
||||
}]
|
||||
|
||||
functions = [
|
||||
{
|
||||
'name': 'image_gen',
|
||||
'description': 'AI绘画(图像生成)服务,输入文本描述,返回根据文本信息绘制的图片URL。',
|
||||
'parameters': {
|
||||
'name': 'prompt',
|
||||
'type': 'string',
|
||||
'description': '详细描述了希望生成的图像具有什么内容,例如人物、环境、动作等细节描述,使用英文',
|
||||
'required': True
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
print('# Assistant Response 1:')
|
||||
responses = []
|
||||
for responses in llm.chat(messages=messages, functions=functions, stream=True):
|
||||
print(responses)
|
||||
messages.extend(responses)
|
||||
|
||||
for rsp in responses:
|
||||
if rsp.get('function_call', None):
|
||||
func_name = rsp['function_call']['name']
|
||||
if func_name == 'image_gen':
|
||||
func_args = json.loads(rsp['function_call']['arguments'])
|
||||
image_url = image_gen(func_args['prompt'])
|
||||
print('# Function Response:')
|
||||
func_rsp = {
|
||||
'role': 'function',
|
||||
'name': func_name,
|
||||
'content': [ContentItem(image=image_url),
|
||||
ContentItem(text=f'( 这张图片的URL是 {image_url} )')],
|
||||
}
|
||||
messages.append(func_rsp)
|
||||
print(func_rsp)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
print('# Assistant Response 2:')
|
||||
responses = []
|
||||
for responses in llm.chat(messages=messages, functions=functions, stream=True):
|
||||
print(responses)
|
||||
messages.extend(responses)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
After Width: | Height: | Size: 349 KiB |
|
@ -7,9 +7,6 @@ sys.path.insert(0, os.path.abspath(os.path.join(__file__, '../../..'))) # noqa
|
|||
|
||||
ROOT_RESOURCE = os.path.abspath(os.path.join(__file__, '../../../examples/resource')) # noqa
|
||||
from examples.assistant_add_custom_tool import test as assistant_add_custom_tool # noqa
|
||||
from examples.assistant_angry_girlfriend import test as assistant_angry_girlfriend # noqa
|
||||
# from examples.assistant_doctor import test as assistant_doctor
|
||||
from examples.assistant_growing_girl import test as assistant_growing_girl # noqa
|
||||
from examples.assistant_weather_bot import test as assistant_weather_bot # noqa
|
||||
from examples.function_calling import test as function_calling # noqa
|
||||
from examples.function_calling_in_parallel import test as parallel_function_calling # noqa
|
||||
|
@ -19,6 +16,7 @@ from examples.group_chat_demo import test as group_chat_demo # noqa
|
|||
from examples.llm_riddles import test as llm_riddles # noqa
|
||||
from examples.llm_vl_mix_text import test as llm_vl_mix_text # noqa
|
||||
from examples.multi_agent_router import test as multi_agent_router # noqa
|
||||
from examples.qwen2vl_assistant_tooluse import test as qwen2vl_assistant_tooluse # noqa
|
||||
from examples.react_data_analysis import test as react_data_analysis # noqa
|
||||
from examples.visual_storytelling import test as visual_storytelling # noqa
|
||||
|
||||
|
@ -34,26 +32,6 @@ def test_assistant_weather_bot(query, file):
|
|||
assistant_weather_bot(query=query, file=file)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('query', ['你今天真好看'])
|
||||
def test_assistant_angry_girlfriend(query):
|
||||
assistant_angry_girlfriend(query=query)
|
||||
|
||||
|
||||
# @pytest.mark.parametrize('query', ['医生,可以帮我看看我是否健康吗?'])
|
||||
# @pytest.mark.parametrize('file', [
|
||||
# None,
|
||||
# 'https://pic4.zhimg.com/80/v2-2c8eedf3e12386fedcd5589cf5575717_720w.webp'
|
||||
# ])
|
||||
# def test_assistant_doctor(query, file):
|
||||
# assistant_doctor(query=query, file=file)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('query', ['请用image_gen开始创作!'])
|
||||
@pytest.mark.parametrize('file', [None, os.path.join(ROOT_RESOURCE, 'growing_girl.pdf')])
|
||||
def test_assistant_growing_girl(query, file):
|
||||
assistant_growing_girl(query=query, file=file)
|
||||
|
||||
|
||||
def test_llm_vl_mix_text():
|
||||
llm_vl_mix_text()
|
||||
|
||||
|
@ -104,3 +82,7 @@ def test_group_chat_chess(query):
|
|||
|
||||
def test_group_chat_demo():
|
||||
group_chat_demo()
|
||||
|
||||
|
||||
def test_qwen2vl_assistant_tooluse():
|
||||
qwen2vl_assistant_tooluse()
|
||||
|
|
|
@ -16,8 +16,8 @@ def test_memory():
|
|||
mem = Memory(llm=llm_cfg)
|
||||
messages = [
|
||||
Message('user', [
|
||||
ContentItem(text='女孩成长历程'),
|
||||
ContentItem(file=str(Path(__file__).resolve().parent.parent.parent / 'examples/resource/growing_girl.pdf'))
|
||||
ContentItem(text='how to flip images'),
|
||||
ContentItem(file=str(Path(__file__).resolve().parent.parent.parent / 'examples/resource/doc.pdf'))
|
||||
])
|
||||
]
|
||||
*_, last = mem.run(messages, max_ref_token=4000, parser_page_size=500)
|
||||
|
|
Loading…
Reference in New Issue