Skip to content

Commit 51ddecc

Browse files
author
llm-net
committed
Add max_model_len field to ModelCard and ModelInfo for vLLM compatibility
- Added max_model_len field to ModelInfo dataclass in service_discovery.py - Added max_model_len field to ModelCard in protocols.py - Updated main_router.py to pass max_model_len when creating ModelCard - This allows the router to properly forward the max_model_len field from vLLM endpoints This fixes the issue where /v1/models endpoint was not showing the context window size for models. Signed-off-by: llm-net <[email protected]>
1 parent 459c9e1 commit 51ddecc

File tree

3 files changed

+5
-0
lines changed

3 files changed

+5
-0
lines changed

src/vllm_router/protocols.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ class ModelCard(OpenAIBaseModel):
4949
owned_by: str = "vllm"
5050
root: Optional[str] = None
5151
parent: Optional[str] = None
52+
max_model_len: Optional[int] = None
5253

5354

5455
class ModelList(OpenAIBaseModel):

src/vllm_router/routers/main_router.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ async def show_models():
152152
created=model_info.created,
153153
owned_by=model_info.owned_by,
154154
parent=model_info.parent,
155+
max_model_len=model_info.max_model_len,
155156
)
156157
model_cards.append(model_card)
157158
existing_models.add(model_id)

src/vllm_router/service_discovery.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class ModelInfo:
5050
root: Optional[str] = None
5151
parent: Optional[str] = None
5252
is_adapter: bool = False
53+
max_model_len: Optional[int] = None
5354

5455
@classmethod
5556
def from_dict(cls, data: Dict) -> "ModelInfo":
@@ -62,6 +63,7 @@ def from_dict(cls, data: Dict) -> "ModelInfo":
6263
root=data.get("root", None),
6364
parent=data.get("parent", None),
6465
is_adapter=data.get("parent") is not None,
66+
max_model_len=data.get("max_model_len", None),
6567
)
6668

6769
def to_dict(self) -> Dict:
@@ -74,6 +76,7 @@ def to_dict(self) -> Dict:
7476
"root": self.root,
7577
"parent": self.parent,
7678
"is_adapter": self.is_adapter,
79+
"max_model_len": self.max_model_len,
7780
}
7881

7982

0 commit comments

Comments
 (0)