Skip to content

Commit 28f350e

Browse files
authored
Support add_generation_prompt in embeddings endpoint with chat request (#23931)
Signed-off-by: biba10 <[email protected]>
1 parent 51383bd commit 28f350e

File tree

2 files changed

+9
-3
lines changed

2 files changed

+9
-3
lines changed

vllm/entrypoints/openai/protocol.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,6 +1342,14 @@ class EmbeddingChatRequest(OpenAIBaseModel):
13421342
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=-1)]] = None
13431343

13441344
# --8<-- [start:chat-embedding-extra-params]
1345+
add_generation_prompt: bool = Field(
1346+
default=False,
1347+
description=
1348+
("If true, the generation prompt will be added to the chat template. "
1349+
"This is a parameter used by chat template in tokenizer config of the "
1350+
"model."),
1351+
)
1352+
13451353
add_special_tokens: bool = Field(
13461354
default=False,
13471355
description=(

vllm/entrypoints/openai/serving_embedding.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,7 @@ async def _preprocess(
9393
or ctx.chat_template,
9494
chat_template_content_format=ctx.
9595
chat_template_content_format,
96-
# In embedding requests, we are not generating tokens,
97-
# so there is no need to append extra tokens to the input
98-
add_generation_prompt=False,
96+
add_generation_prompt=ctx.request.add_generation_prompt,
9997
continue_final_message=False,
10098
add_special_tokens=ctx.request.add_special_tokens,
10199
)

0 commit comments

Comments
 (0)