add examples: multimodal tool use with qwen2-vl

This commit is contained in:
gewenbin.gwb 2024-08-09 15:50:41 +08:00 committed by 兼欣
parent 3c4f8d00af
commit 8f24dbf6a7
9 changed files with 225 additions and 175 deletions

View File

@ -1,57 +0,0 @@
"""A comfort game implemented by assistant"""
from qwen_agent.agents import Assistant
from qwen_agent.gui import WebUI
def init_agent_service():
llm_cfg = {'model': 'qwen-max'}
system = ('我们来玩角色扮演游戏。你扮演用户的女友。由用户开始发言根据他的发言你初始化一个心情值0到100并作出回应。'
'用户的任务是哄你开心,你根据每次用户说的话调整心情,每次回复开头加上(当前心情:分数)。')
bot = Assistant(llm=llm_cfg, name='虚拟女友', description='哄哄机器人', system_message=system)
return bot
def test(query: str = '你今天真好看'):
# Define the agent
bot = init_agent_service()
# Chat
messages = [{'role': 'user', 'content': query}]
for response in bot.run(messages=messages):
print('bot response:', response)
def app_tui():
# Define the agent
bot = init_agent_service()
# Chat
messages = []
while True:
query = input('user question: ')
messages.append({'role': 'user', 'content': query})
response = []
for response in bot.run(messages=messages):
print('bot response:', response)
messages.extend(response)
def app_gui():
agent = init_agent_service()
chatbot_config = {
'prompt.suggestions': [
'你今天真好看!',
'晚上去吃好吃的嘛~',
'宝贝,你又瘦啦!',
]
}
WebUI(agent, chatbot_config=chatbot_config).run(messages=[{'role': 'assistant', 'content': [{'text': '还不快来哄哄我!'}]}])
if __name__ == '__main__':
# test()
# app_tui()
app_gui()

View File

@ -1,93 +0,0 @@
"""A girl's growth story novelist implemented by assistant"""
import os
from typing import Optional
from qwen_agent.agents import Assistant
from qwen_agent.gui import WebUI
ROOT_RESOURCE = os.path.join(os.path.dirname(__file__), 'resource')
def init_agent_service():
llm_cfg = {'model': 'qwen-max'}
tools = ['image_gen']
bot = Assistant(
llm=llm_cfg,
name='漫画家',
description='根据女孩的成长阶段画出图片,串成一个故事',
function_list=tools,
system_message='你扮演一个漫画家,根据我给你的女孩的不同阶段,使用工具画出每个阶段女孩的的图片,'
'并串成一个故事讲述出来。要求图片背景丰富',
)
return bot
def test(
query='请用image_gen开始创作',
file: Optional[str] = os.path.join(ROOT_RESOURCE, 'growing_girl.pdf'),
):
# Define the agent
bot = init_agent_service()
# Chat
messages = []
if not file:
messages.append({'role': 'user', 'content': query})
else:
messages.append({'role': 'user', 'content': [{'text': query}, {'file': file}]})
for response in bot.run(messages):
print('bot response:', response)
def app_tui():
# Define the agent
bot = init_agent_service()
# Chat
messages = []
while True:
# Query example: 请用image_gen开始创作
query = input('user question: ')
# File example: resource/growing_girl.pdf
file = input('file url (press enter if no file): ').strip()
if not query:
print('user question cannot be empty')
continue
if not file:
messages.append({'role': 'user', 'content': query})
else:
messages.append({'role': 'user', 'content': [{'text': query}, {'file': file}]})
response = []
for response in bot.run(messages):
print('bot response:', response)
messages.extend(response)
def app_gui():
# Define the agent
bot = init_agent_service()
file = os.path.join(ROOT_RESOURCE, 'growing_girl.pdf')
chatbot_config = {
'prompt.suggestions': [
{
'text': '画一个女孩的成长故事',
'files': [file]
},
{
'text': '画一个女孩的成长故事,从小学到大学',
'files': [file]
},
'画出女人的一生,要反映出人类的深刻本质',
]
}
WebUI(bot, chatbot_config=chatbot_config).run()
if __name__ == '__main__':
# test()
# app_tui()
app_gui()

View File

@ -0,0 +1,137 @@
import os
import re
import uuid
from io import BytesIO
from pprint import pprint
from typing import List, Union
import requests
from PIL import Image
from qwen_agent.agents import FnCallAgent
from qwen_agent.llm.schema import ContentItem
from qwen_agent.tools.base import BaseToolWithFileAccess, register_tool
ROOT_RESOURCE = os.path.join(os.path.dirname(__file__), 'resource')
@register_tool('crop_and_resize')
class CropResize(BaseToolWithFileAccess):
description = '这是一个放大镜功能,截取局部图像并放大从而查看更多细节,如果你无法直接看清细节时可以调用'
parameters = [
{
'name': 'image',
'type': 'string',
'description': '输入图片本地路径或URL',
'required': True
},
{
'name': 'rectangle',
'type': 'string',
'description': '需要截取的局部图像区域使用左上角坐标和右下角坐标表示原点在图像左上角、向右为x轴正方向、向下为y轴正方向格式(x1,y1),(x2,y2)',
'required': True
},
]
def _extract_coordinates(self, text):
pattern = r'\((\d+),\s*(\d+)\)'
matches = re.findall(pattern, text)
coordinates = [(int(x), int(y)) for x, y in matches]
if len(coordinates) >= 2:
x1, y1 = coordinates[0]
x2, y2 = coordinates[1]
return x1, y1, x2, y2
pattern = r'\((\d+),\s*(\d+),\s*(\d+),\s*(\d+)\)'
matches = re.findall(pattern, text)
coordinates = [(int(x1), int(y1), int(x2), int(y2)) for x1, y1, x2, y2 in matches]
x1, y1, x2, y2 = coordinates[0]
return coordinates[0]
def _expand_box(self, x1, y1, x2, y2, factor=1):
xc = (x1 + x2) / 2
yc = (y1 + y2) / 2
w = x2 - x1
h = y2 - y1
w_new = w * factor
h_new = h * factor
return xc - w_new / 2, yc - h_new / 2, xc + w_new / 2, yc + h_new / 2
def call(self, params: Union[str, dict], files: List[str] = None, **kwargs) -> List[ContentItem]:
super().call(params=params, files=files)
params = self._verify_json_format_args(params)
image_arg = params['image'] # local path or url
rectangle = params['rectangle']
# open image
if image_arg.startswith('http'):
response = requests.get(image_arg)
response.raise_for_status()
image = Image.open(BytesIO(response.content))
elif os.path.exists(image_arg):
image = Image.open(image_arg)
else:
image = Image.open(os.path.join(self.work_dir, image_arg))
coordinates = self._extract_coordinates(rectangle)
x1, y1, x2, y2 = self._expand_box(*coordinates, factor=1.35)
w, h = image.size
x1, y1 = round(x1 / 1000 * w), round(y1 / 1000 * h)
x2, y2 = round(x2 / 1000 * w), round(y2 / 1000 * h)
# remove padding
x1, y1, x2, y2 = max(x1, 0), max(y1, 0), min(x2, w), min(y2, h)
cropped_image = image.crop((x1, y1, x2, y2))
# save
output_path = os.path.abspath(os.path.join(self.work_dir, f'{uuid.uuid4()}.png'))
cropped_image.save(output_path)
return [
ContentItem(image=output_path),
ContentItem(text=f' 这张放大的局部区域的图片的URL是 {output_path} '),
]
def test():
llm_cfg_vl = {
# Using Qwen2-VL deployed at any openai-compatible service such as vLLM:
# 'model_type': 'qwenvl_oai',
# 'model': 'Qwen/Qwen2-VL-72B-Instruct',
# 'model_server': 'http://localhost:8000/v1', # api_base
# 'api_key': 'EMPTY',
# Using Qwen2-VL provided by Alibaba Cloud DashScope:
# 'model_type': 'qwenvl_dashscope',
# 'model': 'qwen2-vl-72b-instruct',
# 'api_key': os.getenv('DASHSCOPE_API_KEY'),
# TODO: Use qwen2-vl instead once qwen2-vl is released.
'model_type': 'qwenvl_dashscope',
'model': 'qwen-vl-max',
'api_key': os.getenv('DASHSCOPE_API_KEY'),
'generate_cfg': dict(max_retries=10,)
}
agent = FnCallAgent(function_list=['crop_and_resize'], llm=llm_cfg_vl)
messages = [{
'role':
'user',
'content': [
{
'image': os.path.abspath(os.path.join(ROOT_RESOURCE, 'screenshot_with_plot.jpeg'))
},
{
'text': '调用工具放大右边的表格'
},
],
}]
response = agent.run_nonstream(messages=messages)
pprint(response, indent=4)
if __name__ == '__main__':
test()

View File

@ -0,0 +1,81 @@
import json
import urllib.parse
from qwen_agent.llm import get_chat_model
from qwen_agent.llm.schema import ContentItem
def image_gen(prompt: str) -> str:
prompt = urllib.parse.quote(prompt)
image_url = f'https://image.pollinations.ai/prompt/{prompt}'
return image_url
def test():
# Config for the model
llm_cfg_oai = {
# Using Qwen2-VL deployed at any openai-compatible service such as vLLM:
'model_type': 'qwenvl_oai',
'model': 'Qwen/Qwen2-VL-72B-Instruct',
'model_server': 'http://localhost:8000/v1', # api_base
'api_key': 'EMPTY',
}
llm = get_chat_model(llm_cfg_oai)
# Initial conversation
messages = [{
'role':
'user',
'content': [{
'image': 'https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg'
}, {
'text': '图片中的内容是什么?请画一张内容相同,风格类似的图片。'
}]
}]
functions = [
{
'name': 'image_gen',
'description': 'AI绘画图像生成服务输入文本描述返回根据文本信息绘制的图片URL。',
'parameters': {
'name': 'prompt',
'type': 'string',
'description': '详细描述了希望生成的图像具有什么内容,例如人物、环境、动作等细节描述,使用英文',
'required': True
}
},
]
print('# Assistant Response 1:')
responses = []
for responses in llm.chat(messages=messages, functions=functions, stream=True):
print(responses)
messages.extend(responses)
for rsp in responses:
if rsp.get('function_call', None):
func_name = rsp['function_call']['name']
if func_name == 'image_gen':
func_args = json.loads(rsp['function_call']['arguments'])
image_url = image_gen(func_args['prompt'])
print('# Function Response:')
func_rsp = {
'role': 'function',
'name': func_name,
'content': [ContentItem(image=image_url),
ContentItem(text=f' 这张图片的URL是 {image_url} ')],
}
messages.append(func_rsp)
print(func_rsp)
else:
raise NotImplementedError
print('# Assistant Response 2:')
responses = []
for responses in llm.chat(messages=messages, functions=functions, stream=True):
print(responses)
messages.extend(responses)
if __name__ == '__main__':
test()

Binary file not shown.

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 349 KiB

View File

@ -7,9 +7,6 @@ sys.path.insert(0, os.path.abspath(os.path.join(__file__, '../../..'))) # noqa
ROOT_RESOURCE = os.path.abspath(os.path.join(__file__, '../../../examples/resource')) # noqa
from examples.assistant_add_custom_tool import test as assistant_add_custom_tool # noqa
from examples.assistant_angry_girlfriend import test as assistant_angry_girlfriend # noqa
# from examples.assistant_doctor import test as assistant_doctor
from examples.assistant_growing_girl import test as assistant_growing_girl # noqa
from examples.assistant_weather_bot import test as assistant_weather_bot # noqa
from examples.function_calling import test as function_calling # noqa
from examples.function_calling_in_parallel import test as parallel_function_calling # noqa
@ -19,6 +16,7 @@ from examples.group_chat_demo import test as group_chat_demo # noqa
from examples.llm_riddles import test as llm_riddles # noqa
from examples.llm_vl_mix_text import test as llm_vl_mix_text # noqa
from examples.multi_agent_router import test as multi_agent_router # noqa
from examples.qwen2vl_assistant_tooluse import test as qwen2vl_assistant_tooluse # noqa
from examples.react_data_analysis import test as react_data_analysis # noqa
from examples.visual_storytelling import test as visual_storytelling # noqa
@ -34,26 +32,6 @@ def test_assistant_weather_bot(query, file):
assistant_weather_bot(query=query, file=file)
@pytest.mark.parametrize('query', ['你今天真好看'])
def test_assistant_angry_girlfriend(query):
assistant_angry_girlfriend(query=query)
# @pytest.mark.parametrize('query', ['医生,可以帮我看看我是否健康吗?'])
# @pytest.mark.parametrize('file', [
# None,
# 'https://pic4.zhimg.com/80/v2-2c8eedf3e12386fedcd5589cf5575717_720w.webp'
# ])
# def test_assistant_doctor(query, file):
# assistant_doctor(query=query, file=file)
@pytest.mark.parametrize('query', ['请用image_gen开始创作'])
@pytest.mark.parametrize('file', [None, os.path.join(ROOT_RESOURCE, 'growing_girl.pdf')])
def test_assistant_growing_girl(query, file):
assistant_growing_girl(query=query, file=file)
def test_llm_vl_mix_text():
llm_vl_mix_text()
@ -104,3 +82,7 @@ def test_group_chat_chess(query):
def test_group_chat_demo():
group_chat_demo()
def test_qwen2vl_assistant_tooluse():
qwen2vl_assistant_tooluse()

View File

@ -16,8 +16,8 @@ def test_memory():
mem = Memory(llm=llm_cfg)
messages = [
Message('user', [
ContentItem(text='女孩成长历程'),
ContentItem(file=str(Path(__file__).resolve().parent.parent.parent / 'examples/resource/growing_girl.pdf'))
ContentItem(text='how to flip images'),
ContentItem(file=str(Path(__file__).resolve().parent.parent.parent / 'examples/resource/doc.pdf'))
])
]
*_, last = mem.run(messages, max_ref_token=4000, parser_page_size=500)