[Frontend] OpenAI API server: Do not add bos token by default when encoding (#4688)

This commit is contained in:
bofeng huang 2024-05-17 03:47:22 +02:00 committed by GitHub
parent 8e7fb5d43a
commit 0150a10630
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 22 additions and 12 deletions

View File

@ -158,7 +158,7 @@ class OpenAIServingChat(OpenAIServing):
try:
# Tokenize/detokenize depending on prompt format (string/token list)
prompt_ids, prompt_text = self._validate_prompt_and_tokenize(
request, prompt=prompt)
request, prompt=prompt, add_special_tokens=False)
sampling_params = request.to_sampling_params()
lora_request = self._maybe_get_lora(request)
decoding_config = await self.engine.get_decoding_config()

View File

@ -1,7 +1,7 @@
import json
from dataclasses import dataclass
from http import HTTPStatus
from typing import Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union
from pydantic import Field
from typing_extensions import Annotated
@ -165,13 +165,14 @@ class OpenAIServing:
raise ValueError(f"The model `{request.model}` does not exist.")
def _validate_prompt_and_tokenize(
self,
request: Union[ChatCompletionRequest, CompletionRequest,
EmbeddingRequest],
prompt: Optional[str] = None,
prompt_ids: Optional[List[int]] = None,
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
) -> Tuple[List[int], str]:
self,
request: Union[ChatCompletionRequest, CompletionRequest,
EmbeddingRequest],
prompt: Optional[str] = None,
prompt_ids: Optional[List[int]] = None,
truncate_prompt_tokens: Optional[Annotated[int,
Field(ge=1)]] = None,
add_special_tokens: bool = True) -> Tuple[List[int], str]:
if not (prompt or prompt_ids):
raise ValueError("Either prompt or prompt_ids should be provided.")
if (prompt and prompt_ids):
@ -179,10 +180,19 @@ class OpenAIServing:
"Only one of prompt or prompt_ids should be provided.")
if prompt_ids is None:
tokenizer_kwargs = {} if truncate_prompt_tokens is None else {
"truncation": True,
"max_length": truncate_prompt_tokens,
# When using OpenAIServingChat for chat completions, the
# special tokens (e.g., BOS) have already been added by the
# chat template. Therefore, we do not need to add them again.
# Set add_special_tokens to False to avoid adding the BOS tokens
# again.
tokenizer_kwargs: Dict[str, Any] = {
"add_special_tokens": add_special_tokens
}
if truncate_prompt_tokens is not None:
tokenizer_kwargs.update({
"truncation": True,
"max_length": truncate_prompt_tokens,
})
input_ids = self.tokenizer(prompt, **tokenizer_kwargs).input_ids
elif truncate_prompt_tokens is not None:
input_ids = prompt_ids[-truncate_prompt_tokens:]