Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
f65ff24
test-pipeline
afeldman-nm Sep 4, 2025
d5fa076
Merge branch 'main' into speed_model_ci
afeldman-nm Sep 4, 2025
35a331b
basic tests
afeldman-nm Sep 4, 2025
d630671
common and embedding markers
afeldman-nm Sep 4, 2025
7d31d30
parallelize basic tests
afeldman-nm Sep 4, 2025
6c8c6e9
adjust
afeldman-nm Sep 4, 2025
3709f82
wip
afeldman-nm Sep 5, 2025
0a12b97
Merge branch 'main' into speed_model_ci
afeldman-nm Sep 5, 2025
183f97d
revise
afeldman-nm Sep 5, 2025
bba582f
burn changes
afeldman-nm Sep 8, 2025
b2a3d02
Merge branch 'main' into speed_model_ci
afeldman-nm Sep 8, 2025
36a4875
Break out different initialization tests
afeldman-nm Sep 8, 2025
e32e371
shard hybrid models
afeldman-nm Sep 8, 2025
1d883af
Standard language models test
afeldman-nm Sep 8, 2025
774b9e4
small fix
afeldman-nm Sep 9, 2025
4941627
Merge branch 'main' into speed_model_ci
afeldman-nm Sep 9, 2025
1fd0c68
wip
afeldman-nm Sep 9, 2025
0093c1f
typo
afeldman-nm Sep 9, 2025
62e8055
Merge branch 'main' into speed_model_ci
afeldman-nm Sep 9, 2025
0892a96
test
afeldman-nm Sep 10, 2025
a5ecbcf
test
afeldman-nm Sep 10, 2025
fa64ce0
test
afeldman-nm Sep 10, 2025
1ac85fd
test
afeldman-nm Sep 10, 2025
4f4f60c
wip
afeldman-nm Sep 10, 2025
5fc5910
wip
afeldman-nm Sep 10, 2025
39df6a1
wip
afeldman-nm Sep 10, 2025
0b9c6e7
wip
afeldman-nm Sep 10, 2025
8f58547
different approach to sharding
afeldman-nm Sep 10, 2025
d3713c0
merge
afeldman-nm Sep 10, 2025
e70c8cd
percent N
afeldman-nm Sep 10, 2025
682d675
fix
afeldman-nm Sep 10, 2025
ee2b0dc
more sharding
afeldman-nm Sep 10, 2025
4a26031
Merge branch 'main' into speed_model_ci
afeldman-nm Sep 10, 2025
c288ab3
isolate test; lower parallelism
afeldman-nm Sep 11, 2025
6ee7661
Merge branch 'main' into speed_model_ci
afeldman-nm Sep 11, 2025
ff660fc
Merge branch 'main' into speed_model_ci
afeldman-nm Sep 11, 2025
e66b7e6
lower shard factor and reorganize
afeldman-nm Sep 11, 2025
28239d4
explanatory comments
afeldman-nm Sep 11, 2025
009458f
wip
afeldman-nm Sep 11, 2025
63015ab
Merge branch 'main' into speed_model_ci
afeldman-nm Sep 11, 2025
aef7215
test names
afeldman-nm Sep 11, 2025
09e84a4
Merge branch 'main' into speed_model_ci
afeldman-nm Sep 12, 2025
bb15907
Merge branch 'main' into speed_model_ci
afeldman-nm Sep 12, 2025
a806634
Merge branch 'main' into speed_model_ci
afeldman-nm Sep 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 86 additions & 6 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,32 @@ steps:

##### models test #####

- label: Basic Models Test # 57min
# Test subset of models when vLLM source is modified.
# Always run if test file is modified.
- label: Basic Models Initialization Test (Subset)
timeout_in_minutes: 75
mirror_hardwares: [amdexperimental]
torch_nightly: true
source_file_dependencies:
- vllm/
- tests/models/test_initialization.py
commands:
- pytest -v -s models/test_initialization.py::test_can_initialize_subset

# Test all other models when any model source is modified.
# Always run if test file is modified.
- label: Basic Models Initialization Test (Full)
timeout_in_minutes: 75
mirror_hardwares: [amdexperimental]
torch_nightly: true
source_file_dependencies:
- vllm/model_executor/models/
- tests/models/test_initialization.py
commands:
- pytest -v -s models/test_initialization.py::test_can_initialize_other

# Other non-initialization models tests
- label: Basic Models Test
timeout_in_minutes: 75
mirror_hardwares: [amdexperimental]
torch_nightly: true
Expand All @@ -558,9 +583,8 @@ steps:
- pytest -v -s models/test_registry.py
- pytest -v -s models/test_utils.py
- pytest -v -s models/test_vision.py
- pytest -v -s models/test_initialization.py

- label: Language Models Test (Standard) # 35min
- label: Standard Language Models Test (Subset)
timeout_in_minutes: 45
mirror_hardwares: [amdexperimental]
torch_nightly: true
Expand All @@ -569,9 +593,65 @@ steps:
- tests/models/language
commands:
- pip freeze | grep -E 'torch'
- pytest -v -s models/language -m core_model
- pytest -v -s models/language -m "core_model and not slow_test"

- label: Standard Language Models Test (Other)
timeout_in_minutes: 45
mirror_hardwares: [amdexperimental]
torch_nightly: true
source_file_dependencies:
- vllm/model_executor/models/
- tests/models/language/pooling/test_embedding.py
- tests/models/language/generation/test_common.py
- tests/models/language/pooling/test_classification.py
commands:
- pip freeze | grep -E 'torch'
- pytest -v -s models/language -m "core_model and slow_test"

# Shard hybrid language model tests
- label: Language Models Test Shard 0 (Hybrid)
timeout_in_minutes: 45
mirror_hardwares: [amdexperimental]
torch_nightly: true
source_file_dependencies:
- vllm/
- tests/models/language/generation
commands:
# Install fast path packages for testing against transformers
# Note: also needed to run plamo2 model in vLLM
- uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/[email protected]'
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/[email protected]'
- pytest -v -s --shard-count=4 --shard-index=0 models/language/generation -m hybrid_model

- label: Language Models Test Shard 1 (Hybrid)
timeout_in_minutes: 45
mirror_hardwares: [amdexperimental]
torch_nightly: true
source_file_dependencies:
- vllm/
- tests/models/language/generation
commands:
# Install fast path packages for testing against transformers
# Note: also needed to run plamo2 model in vLLM
- uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/[email protected]'
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/[email protected]'
- pytest -v -s --shard-count=4 --shard-index=1 models/language/generation -m hybrid_model

- label: Language Models Test Shard 2 (Hybrid)
timeout_in_minutes: 45
mirror_hardwares: [amdexperimental]
torch_nightly: true
source_file_dependencies:
- vllm/
- tests/models/language/generation
commands:
# Install fast path packages for testing against transformers
# Note: also needed to run plamo2 model in vLLM
- uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/[email protected]'
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/[email protected]'
- pytest -v -s --shard-count=4 --shard-index=2 models/language/generation -m hybrid_model

- label: Language Models Test (Hybrid) # 35 min
- label: Language Models Test Shard 3 (Hybrid)
timeout_in_minutes: 45
mirror_hardwares: [amdexperimental]
torch_nightly: true
Expand All @@ -583,7 +663,7 @@ steps:
# Note: also needed to run plamo2 model in vLLM
- uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/[email protected]'
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/[email protected]'
- pytest -v -s models/language/generation -m hybrid_model
- pytest -v -s --shard-count=4 --shard-index=3 models/language/generation -m hybrid_model

- label: Language Models Test (Extended Generation) # 80min
timeout_in_minutes: 110
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ skip_gitignore = true

[tool.pytest.ini_options]
markers = [
"slow_test",
"skip_global_cleanup",
"core_model: enable this model test in each PR instead of only nightly",
"hybrid_model: models that contain mamba layers (including pure SSM and hybrid architectures)",
Expand Down
14 changes: 10 additions & 4 deletions tests/models/language/generation/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
[
pytest.param(
"bigscience/bloom-560m", # bloom - testing alibi slopes
marks=[pytest.mark.core_model],
marks=[pytest.mark.core_model, pytest.mark.slow_test],
),
pytest.param(
"openai-community/gpt2", # gpt2
Expand All @@ -50,7 +50,10 @@
pytest.param("EleutherAI/pythia-70m"), # gpt_neox
pytest.param(
"google/gemma-1.1-2b-it", # gemma
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
marks=[
pytest.mark.core_model, pytest.mark.cpu_model,
pytest.mark.slow_test
],
),
pytest.param(
"zai-org/chatglm3-6b", # chatglm (text-only)
Expand All @@ -71,14 +74,17 @@
),
pytest.param(
"microsoft/phi-2", # phi
marks=[pytest.mark.core_model],
marks=[pytest.mark.core_model, pytest.mark.slow_test],
),
pytest.param(
"Qwen/Qwen-7B-Chat", # qwen (text-only)
),
pytest.param(
"Qwen/Qwen2.5-0.5B-Instruct", # qwen2
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
marks=[
pytest.mark.core_model, pytest.mark.cpu_model,
pytest.mark.slow_test
],
),
pytest.param(
"Qwen/Qwen3-8B", # qwen (text-only)
Expand Down
5 changes: 4 additions & 1 deletion tests/models/language/pooling/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
"model",
[
pytest.param("jason9693/Qwen2.5-1.5B-apeach",
marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
marks=[
pytest.mark.core_model, pytest.mark.cpu_model,
pytest.mark.slow_test
]),
],
)
@pytest.mark.parametrize("dtype",
Expand Down
5 changes: 3 additions & 2 deletions tests/models/language/pooling/test_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,16 @@
# model code with bidirectional attention.
# [Decoder-only]
pytest.param("BAAI/bge-multilingual-gemma2",
marks=[pytest.mark.core_model]),
marks=[pytest.mark.core_model, pytest.mark.slow_test]),
pytest.param(
"intfloat/e5-mistral-7b-instruct",
# CPU v1 doesn't support sliding window
marks=[pytest.mark.core_model]),
pytest.param("ssmits/Qwen2-7B-Instruct-embed-base",
marks=[pytest.mark.cpu_model]),
# [Encoder-only]
pytest.param("BAAI/bge-base-en-v1.5", marks=[pytest.mark.core_model]),
pytest.param("BAAI/bge-base-en-v1.5",
marks=[pytest.mark.core_model, pytest.mark.slow_test]),
pytest.param("sentence-transformers/all-MiniLM-L12-v2"),
pytest.param("intfloat/multilingual-e5-small"),
# [Cross-Encoder]
Expand Down
25 changes: 23 additions & 2 deletions tests/models/test_initialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,18 @@
HF_EXAMPLE_MODELS, HfExampleModels)
from .utils import dummy_hf_overrides

MINIMAL_MODEL_ARCH_LIST = [
"LlavaForConditionalGeneration", "Llama4ForConditionalGeneration",
"BertForSequenceClassification", "Gemma3nForCausalLM", "JinaVLForRanking",
"InternVLChatModel", "InternLM2ForRewardModel",
"TransformersForMultimodalLM", "PrithviGeoSpatialMAE", "UltravoxModel,"
"DeepSeekMTPModel", "MedusaModel", "TransformersModel", "MiDashengLMModel",
"XLMRobertaModel"
]

OTHER_MODEL_ARCH_LIST = (set(HF_EXAMPLE_MODELS.get_supported_archs()) -
set(MINIMAL_MODEL_ARCH_LIST))


@create_new_process_for_each_test()
def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
Expand Down Expand Up @@ -91,13 +103,22 @@ def _initialize_kv_caches_v1(self, vllm_config):
max_num_seqs=model_info.max_num_seqs)


@pytest.mark.parametrize("model_arch", HF_EXAMPLE_MODELS.get_supported_archs())
def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch):
@pytest.mark.parametrize("model_arch", OTHER_MODEL_ARCH_LIST)
def test_can_initialize_other(model_arch: str,
monkeypatch: pytest.MonkeyPatch):
"""Test initializing all supported models"""
if model_arch == "Lfm2ForCausalLM":
pytest.skip("Skipping until test supports V1-only models")
can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS)


@pytest.mark.parametrize("model_arch", MINIMAL_MODEL_ARCH_LIST)
def test_can_initialize_subset(model_arch: str,
monkeypatch: pytest.MonkeyPatch):
"""Test initializing select subset of supported models"""
can_initialize(model_arch, monkeypatch, HF_EXAMPLE_MODELS)


@pytest.mark.parametrize("model_arch",
AUTO_EXAMPLE_MODELS.get_supported_archs())
def test_implicit_converted_models(model_arch: str,
Expand Down