From 9f2165e5032737aa48b531115e47174ee93b46a4 Mon Sep 17 00:00:00 2001 From: amd-asalykov Date: Mon, 15 Jun 2026 05:18:06 -0500 Subject: [PATCH 1/3] enable FlyDSL MoE for Kimi int4 --- .github/configs/amd-master.yaml | 8 +++++--- .../single_node/fixed_seq_len/kimik2.5_int4_mi355x.sh | 2 ++ perf-changelog.yaml | 8 ++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 7e4918e09..eba201771 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -739,7 +739,7 @@ glm5.1-fp4-mi355x-atom: - { tp: 4, conc-start: 4, conc-end: 256 } kimik2.5-int4-mi355x-vllm: - image: vllm/vllm-openai-rocm:v0.21.0 + image: vllm/vllm-openai-rocm:nightly model: moonshotai/Kimi-K2.5 model-prefix: kimik2.5 runner: mi355x @@ -751,11 +751,13 @@ kimik2.5-int4-mi355x-vllm: - isl: 1024 osl: 1024 search-space: - - { tp: 8, conc-start: 4, conc-end: 64 } + - { tp: 8, conc-start: 4, conc-end: 128 } + - { tp: 4, conc-start: 4, conc-end: 128 } - isl: 8192 osl: 1024 search-space: - - { tp: 8, conc-start: 4, conc-end: 64 } + - { tp: 8, conc-start: 4, conc-end: 128 } + - { tp: 4, conc-start: 4, conc-end: 128 } kimik2.5-int4-mi325x-vllm: image: vllm/vllm-openai-rocm:v0.21.0 diff --git a/benchmarks/single_node/fixed_seq_len/kimik2.5_int4_mi355x.sh b/benchmarks/single_node/fixed_seq_len/kimik2.5_int4_mi355x.sh index 5c6b8c73a..dc16f1e53 100755 --- a/benchmarks/single_node/fixed_seq_len/kimik2.5_int4_mi355x.sh +++ b/benchmarks/single_node/fixed_seq_len/kimik2.5_int4_mi355x.sh @@ -42,6 +42,8 @@ vllm serve $MODEL --port $PORT \ --trust-remote-code \ --no-enable-prefix-caching \ --max-num-seqs 256 \ +--moe-backend flydsl \ +--compilation-config '{"pass_config": {"fuse_allreduce_rms": false}}' \ --mm-encoder-tp-mode data > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/perf-changelog.yaml b/perf-changelog.yaml index bee038a7a..a7e78351b 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3842,3 +3842,11 @@ - "Recipes sourced from NVIDIA/srt-slurm branch sa-submission-q2-2026" - "Runner script updated to support dsv4 model prefix with dynamo-trt framework on GB300" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1689 + +- config-keys: + - kimik2.5-int4-mi355x-vllm + description: + - "Replace triton w4a16 MoE with FlyDSL w4a16 MoE" + - "Image: vllm/vllm-openai-rocm:nightly" + - "Add more sweep points" + pr-link: From 907dee9c423d3d0443e7d741d627b4fcb8c6e835 Mon Sep 17 00:00:00 2001 From: amd-asalykov Date: Mon, 15 Jun 2026 05:20:42 -0500 Subject: [PATCH 2/3] add PR link --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index a7e78351b..f78e49d3b 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3849,4 +3849,4 @@ - "Replace triton w4a16 MoE with FlyDSL w4a16 MoE" - "Image: vllm/vllm-openai-rocm:nightly" - "Add more sweep points" - pr-link: + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1777 From be23347d4ee132914be30eb73017186bb201db18 Mon Sep 17 00:00:00 2001 From: amd-asalykov Date: Mon, 15 Jun 2026 08:22:10 -0500 Subject: [PATCH 3/3] update --- .github/configs/amd-master.yaml | 2 +- perf-changelog.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index eba201771..e9824eb41 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -739,7 +739,7 @@ glm5.1-fp4-mi355x-atom: - { tp: 4, conc-start: 4, conc-end: 256 } kimik2.5-int4-mi355x-vllm: - image: vllm/vllm-openai-rocm:nightly + image: vllm/vllm-openai-rocm:nightly-b8336c3c7c298e0878f22a7bf70f4e295b2f4e01 model: moonshotai/Kimi-K2.5 model-prefix: kimik2.5 runner: mi355x diff --git a/perf-changelog.yaml b/perf-changelog.yaml index f78e49d3b..08fde40c4 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3850,3 +3850,4 @@ - "Image: vllm/vllm-openai-rocm:nightly" - "Add more sweep points" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1777 +