diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 7e4918e09..e9824eb41 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -739,7 +739,7 @@ glm5.1-fp4-mi355x-atom: - { tp: 4, conc-start: 4, conc-end: 256 } kimik2.5-int4-mi355x-vllm: - image: vllm/vllm-openai-rocm:v0.21.0 + image: vllm/vllm-openai-rocm:nightly-b8336c3c7c298e0878f22a7bf70f4e295b2f4e01 model: moonshotai/Kimi-K2.5 model-prefix: kimik2.5 runner: mi355x @@ -751,11 +751,13 @@ kimik2.5-int4-mi355x-vllm: - isl: 1024 osl: 1024 search-space: - - { tp: 8, conc-start: 4, conc-end: 64 } + - { tp: 8, conc-start: 4, conc-end: 128 } + - { tp: 4, conc-start: 4, conc-end: 128 } - isl: 8192 osl: 1024 search-space: - - { tp: 8, conc-start: 4, conc-end: 64 } + - { tp: 8, conc-start: 4, conc-end: 128 } + - { tp: 4, conc-start: 4, conc-end: 128 } kimik2.5-int4-mi325x-vllm: image: vllm/vllm-openai-rocm:v0.21.0 diff --git a/benchmarks/single_node/fixed_seq_len/kimik2.5_int4_mi355x.sh b/benchmarks/single_node/fixed_seq_len/kimik2.5_int4_mi355x.sh index 5c6b8c73a..dc16f1e53 100755 --- a/benchmarks/single_node/fixed_seq_len/kimik2.5_int4_mi355x.sh +++ b/benchmarks/single_node/fixed_seq_len/kimik2.5_int4_mi355x.sh @@ -42,6 +42,8 @@ vllm serve $MODEL --port $PORT \ --trust-remote-code \ --no-enable-prefix-caching \ --max-num-seqs 256 \ +--moe-backend flydsl \ +--compilation-config '{"pass_config": {"fuse_allreduce_rms": false}}' \ --mm-encoder-tp-mode data > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/perf-changelog.yaml b/perf-changelog.yaml index bee038a7a..08fde40c4 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3842,3 +3842,12 @@ - "Recipes sourced from NVIDIA/srt-slurm branch sa-submission-q2-2026" - "Runner script updated to support dsv4 model prefix with dynamo-trt framework on GB300" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1689 + +- config-keys: + - kimik2.5-int4-mi355x-vllm + description: + - "Replace triton w4a16 MoE with FlyDSL w4a16 MoE" + - "Image: vllm/vllm-openai-rocm:nightly" + - "Add more sweep points" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1777 +