From cc16128a5bf338ae83792f259cab3aa7ccf2f9a9 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Fri, 12 Sep 2025 17:53:19 +0000 Subject: [PATCH] Done Signed-off-by: Jee Jee Li --- benchmarks/kernels/benchmark_moe.py | 2 +- vllm/model_executor/layers/fused_moe/fused_moe.py | 5 ++++- vllm/triton_utils/importing.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/benchmarks/kernels/benchmark_moe.py b/benchmarks/kernels/benchmark_moe.py index 94f3f1ae11f2..837b2b0c1044 100644 --- a/benchmarks/kernels/benchmark_moe.py +++ b/benchmarks/kernels/benchmark_moe.py @@ -560,7 +560,7 @@ def save_configs( filename = os.path.join(save_dir, filename) print(f"Writing best config to {filename}...") with open(filename, "w") as f: - json.dump(configs, f, indent=4) + json.dump({"triton_version": triton.__version__, **configs}, f, indent=4) f.write("\n") diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 06edfb0552e8..30e46ffa7b17 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -720,7 +720,10 @@ def get_moe_configs( logger.info("Using configuration from %s for MoE layer.", config_file_path) # If a configuration has been found, return it - return {int(key): val for key, val in json.load(f).items()} + tuned_config = json.load(f) + # Delete triton_version from tuned_config + tuned_config.pop("triton_version", None) + return {int(key): val for key, val in tuned_config.items()} # If no optimized configuration is available, we will use the default # configuration diff --git a/vllm/triton_utils/importing.py b/vllm/triton_utils/importing.py index 372200027bf9..2a06a9b7d11e 100644 --- a/vllm/triton_utils/importing.py +++ b/vllm/triton_utils/importing.py @@ -68,7 +68,7 @@ class TritonPlaceholder(types.ModuleType): def __init__(self): super().__init__("triton") - self.__version__ = "3.3.0" + self.__version__ = "3.4.0" self.jit = self._dummy_decorator("jit") self.autotune = self._dummy_decorator("autotune") self.heuristics = self._dummy_decorator("heuristics")