Skip to content

Commit b9f6303

Browse files
committed
Minor benchmark updates
Signed-off-by: elvircrn <[email protected]>
1 parent 5329699 commit b9f6303

File tree

2 files changed

+38
-7
lines changed

2 files changed

+38
-7
lines changed

benchmarks/kernels/benchmark_silu_mul_fp8_quant.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -216,13 +216,14 @@ def benchmark(k, E, T, H, num_parallel_tokens, G=128, runs=100):
216216

217217
def benchmark_full():
218218
configs = [
219-
(256, 16, 7168),
220-
(256, 32, 7168),
221-
(256, 64, 7168),
222-
(256, 128, 7168),
223-
(256, 256, 7168),
224-
(256, 512, 7168),
225-
(256, 1024, 7168),
219+
(32, 8, 7168),
220+
(32, 16, 7168),
221+
(32, 32, 7168),
222+
(32, 64, 7168),
223+
(32, 128, 7168),
224+
(32, 256, 7168),
225+
(32, 512, 7168),
226+
(32, 1024, 7168),
226227
]
227228

228229
print(f"GPU: {torch.cuda.get_device_name()} CUDA Kernel")

tests/kernels/moe/test_silu_mul_fp8_quant_deep_gemm.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,36 @@ def gold(
172172

173173
# (E, T, H)
174174
CASES = [
175+
(8, 16, 128 * 1),
176+
(8, 16, 128 * 2),
177+
(8, 16, 128 * 3),
178+
(8, 16, 128 * 4),
179+
(8, 16, 7168),
180+
(8, 16, 7168),
181+
(8, 32, 7168),
182+
(8, 64, 7168),
183+
(8, 128, 7168),
184+
(8, 256, 7168),
185+
(8, 512, 7168),
186+
(8, 1024, 7168),
187+
(8, 32, 1024),
188+
(16, 64, 2048),
189+
(32, 128, 4096),
190+
(9, 16, 128 * 1),
191+
(9, 16, 128 * 2),
192+
(9, 16, 128 * 3),
193+
(9, 16, 128 * 4),
194+
(9, 16, 7168),
195+
(9, 16, 7168),
196+
(9, 32, 7168),
197+
(9, 64, 7168),
198+
(9, 128, 7168),
199+
(9, 256, 7168),
200+
(9, 512, 7168),
201+
(9, 1024, 7168),
202+
(9, 32, 1024),
203+
(9, 64, 2048),
204+
(9, 128, 4096),
175205
(256, 1024, 7168),
176206
]
177207

0 commit comments

Comments
 (0)