mirror of https://github.com/vllm-project/vllm
[Bugfix]: Use float32 for base64 embedding (#7855)
Signed-off-by: Hollow Man <hollowman@opensuse.org>
This commit is contained in:
parent
1856aff4d6
commit
0b769992ec
|
@ -19,7 +19,6 @@ responses = client.embeddings.create(
|
||||||
"The best thing about vLLM is that it supports many different models"
|
"The best thing about vLLM is that it supports many different models"
|
||||||
],
|
],
|
||||||
model=model,
|
model=model,
|
||||||
encoding_format="float",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
for data in responses.data:
|
for data in responses.data:
|
||||||
|
|
|
@ -128,9 +128,18 @@ async def test_batch_base64_embedding(embedding_client: openai.AsyncOpenAI,
|
||||||
for data in responses_base64.data:
|
for data in responses_base64.data:
|
||||||
decoded_responses_base64_data.append(
|
decoded_responses_base64_data.append(
|
||||||
np.frombuffer(base64.b64decode(data.embedding),
|
np.frombuffer(base64.b64decode(data.embedding),
|
||||||
dtype="float").tolist())
|
dtype="float32").tolist())
|
||||||
|
|
||||||
assert responses_float.data[0].embedding == decoded_responses_base64_data[
|
assert responses_float.data[0].embedding == decoded_responses_base64_data[
|
||||||
0]
|
0]
|
||||||
assert responses_float.data[1].embedding == decoded_responses_base64_data[
|
assert responses_float.data[1].embedding == decoded_responses_base64_data[
|
||||||
1]
|
1]
|
||||||
|
|
||||||
|
# Default response is float32 decoded from base64 by OpenAI Client
|
||||||
|
responses_default = await embedding_client.embeddings.create(
|
||||||
|
input=input_texts, model=model_name)
|
||||||
|
|
||||||
|
assert responses_float.data[0].embedding == responses_default.data[
|
||||||
|
0].embedding
|
||||||
|
assert responses_float.data[1].embedding == responses_default.data[
|
||||||
|
1].embedding
|
||||||
|
|
|
@ -31,7 +31,9 @@ def _get_embedding(
|
||||||
if encoding_format == "float":
|
if encoding_format == "float":
|
||||||
return output.embedding
|
return output.embedding
|
||||||
elif encoding_format == "base64":
|
elif encoding_format == "base64":
|
||||||
embedding_bytes = np.array(output.embedding).tobytes()
|
# Force to use float32 for base64 encoding
|
||||||
|
# to match the OpenAI python client behavior
|
||||||
|
embedding_bytes = np.array(output.embedding, dtype="float32").tobytes()
|
||||||
return base64.b64encode(embedding_bytes).decode("utf-8")
|
return base64.b64encode(embedding_bytes).decode("utf-8")
|
||||||
|
|
||||||
assert_never(encoding_format)
|
assert_never(encoding_format)
|
||||||
|
|
Loading…
Reference in New Issue