OpenPipe · FurtherAI · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 16, 2026
diff --git a/.github/workflows/prek.yml b/.github/workflows/prek.yml
@@ -202,7 +202,7 @@ jobs:
           done < "${part_paths_file}" | zstd -d -c | tar -xf - -C "${UV_CACHE_DIR}"
           du -sh "${UV_CACHE_DIR}"
 
-      - name: Install dependencies (with all optional extras for complete type checking)
+      - name: Install Megatron dependencies
         run: |
           original_pyproject="$(mktemp)"
           cp pyproject.toml "${original_pyproject}"
@@ -229,12 +229,31 @@ jobs:
             --apex-nvcc-threads "${CI_APEX_NVCC_THREADS}"
           echo "CI uv build overrides: APEX_PARALLEL_BUILD=${CI_APEX_PARALLEL_BUILD}, NVCC_APPEND_FLAGS=--threads ${CI_APEX_NVCC_THREADS}, UV_CONCURRENT_BUILDS=${CI_UV_BUILD_SLOTS}"
           uv --version
-          uv sync --all-extras --group dev --frozen --python "${CI_PYTHON_MM}"
+          uv sync --extra megatron --extra langgraph --extra plotting --group dev --frozen --python "${CI_PYTHON_MM}"
 
-      - name: Run prek hooks (lint, format, typecheck, uv.lock, tests)
+      - name: Run prek hooks (lint, format, typecheck, uv.lock)
         run: |
-          uv run --no-sync prek run --all-files
+          uv run --no-sync prek run ruff --all-files
+          uv run --no-sync prek run ruff-format --all-files
+          uv run --no-sync prek run ty --all-files
+          uv run --no-sync prek run uv-lock-check --all-files
 
-      - name: Run unit tests (via prek)
+      - name: Run Megatron unit tests
         run: |
-          uv run --no-sync prek run pytest
+          uv run --no-sync pytest --nbval --current-env --tb=short \
+            tests/unit/test_megatron_reference_logprobs.py \
+            tests/unit/test_moe_routing_replay.py \
+            tests/unit/test_moe_routing_real_path.py \
+            tests/unit/test_pipeline_trainer_local_backend.py
+
+      - name: Install backend dependencies
+        run: |
+          uv sync --extra backend --extra tinker --extra langgraph --extra plotting --group dev --frozen --python "${CI_PYTHON_MM}"
+
+      - name: Run unit tests
+        run: |
+          uv run --no-sync pytest --nbval --current-env --tb=short tests/unit \
+            --ignore=tests/unit/test_megatron_reference_logprobs.py \
+            --ignore=tests/unit/test_moe_routing_replay.py \
+            --ignore=tests/unit/test_moe_routing_real_path.py \
+            --ignore=tests/unit/test_pipeline_trainer_local_backend.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -24,7 +24,7 @@ backend = [
     "bitsandbytes>=0.45.2",
     "unsloth==2026.3.3",
     "unsloth-zoo==2026.3.1",
-    "torch>=2.11.0",
+    "torch==2.11.0",
     "torchao==0.16.0",
     "accelerate==1.7.0",
     "awscli>=1.38.1",
@@ -43,7 +43,7 @@ backend = [
 ]
 megatron = [
     "numpy<2",
-    "torch>=2.11.0",
+    "torch==2.11.0",
     "flash-attn-4==4.0.0b5",
     "ninja>=1.11.1",
     "quack-kernels==0.3.7",
@@ -61,6 +61,7 @@ megatron = [
     "nvidia-ml-py==13.580.82",
     "nvidia-modelopt>=0.42.0a0 ; sys_platform != 'darwin'",
     "nvidia-resiliency-ext<0.5 ; sys_platform == 'linux'",
+    "transformers==5.6.2",
     "ml-dtypes>=0.5.0 ; python_full_version < '3.13'",
 ]
 
@@ -79,8 +80,8 @@ tinker = [
     "protobuf>=6.31.1",
     "tinker-cookbook>=0.4.1,<0.5",
     "tinker>=0.21.0,<0.22",
-    "torch>=2.11.0",
-    "transformers==5.2.0",
+    "torch==2.11.0",
+    "transformers>=5.2.0,<=5.5.3",
     "uvicorn>=0.35.0",
     "datrie>=0.8.3",
 ]
@@ -150,14 +151,23 @@ markers = [
 
 [tool.uv]
 required-version = ">=0.11.7"
+conflicts = [
+    [
+        { extra = "backend" },
+        { extra = "megatron" },
+    ],
+    [
+        { extra = "tinker" },
+        { extra = "megatron" },
+    ],
+]
 override-dependencies = [
-    "flashinfer-python==0.6.1",
+    "flashinfer-python==0.6.8.post1",
     "megatron-core==0.17.0",
     "numpy<2",
     "nvidia-resiliency-ext<0.5",
     "quack-kernels==0.3.7",
     "transformer-engine==2.11.0",
-    "transformers==5.2.0",
     "torch==2.11.0",
 ]
 exclude-dependencies = ["pynvml", "emerging-optimizers", "causal-conv1d", "mamba-ssm"]
@@ -184,6 +194,46 @@ name = "deep-ep"
 version = "1.2.1+9af0e0d"
 requires-dist = []
 
+# The Megatron Bridge source metadata currently requires Transformers 5.8.x,
+# but this branch is validated against Transformers 5.6.2 for Gemma 4.
+# Keep Bridge's runtime deps explicit here and let ART's megatron extra own the
+# Transformers pin.
+[[tool.uv.dependency-metadata]]
+name = "megatron-bridge"
+version = "0.5.0+e1a207ac"
+requires-dist = [
+    "accelerate",
+    "comet-ml",
+    "datasets",
+    "diffusers",
+    "einops",
+    "flash-linear-attention",
+    "flashinfer-cubin",
+    "flashinfer-python",
+    "hydra-core",
+    "imageio",
+    "imageio-ffmpeg",
+    "megatron-core",
+    "mistral-common",
+    "mlflow",
+    "nvidia-resiliency-ext",
+    "omegaconf",
+    "open-clip-torch",
+    "peft",
+    "pyyaml",
+    "qwen-vl-utils",
+    "regex",
+    "rich",
+    "six",
+    "tensorboard",
+    "timm",
+    "torch",
+    "tqdm",
+    "transformers",
+    "typing-extensions",
+    "wandb",
+]
+
 [[tool.uv.dependency-metadata]]
 name = "transformer-engine-torch"
 version = "2.11.0"
@@ -276,7 +326,7 @@ torch = [{ index = "pytorch-cu128", marker = "sys_platform == 'linux' or sys_pla
 apex = { git = "https://github.com/NVIDIA/apex.git", rev = "25.09" }
 deep-ep = { git = "https://github.com/deepseek-ai/DeepEP.git", rev = "v1.2.1" }
 flash-attn-4 = { url = "https://files.pythonhosted.org/packages/24/f7/01ee2576ce41f9884d291ee21861ef194afc0b2b1ce3bd175fc7a6e1b133/flash_attn_4-4.0.0b5-py3-none-any.whl" }
-megatron-bridge = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", rev = "e049cc00c24d03e2ae45d2608c7a44e2d2364e3d" }
+megatron-bridge = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", rev = "e1a207ac757e5d0ed94d8ffbe1cbd28e81d8c084" }
 panza = { git = "https://github.com/corbt/panza.git" }
 transformer-engine-torch = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "v2.11", subdirectory = "transformer_engine/pytorch" }
 

diff --git a/scripts/ci/build_and_push_uv_cache.sh b/scripts/ci/build_and_push_uv_cache.sh
@@ -283,8 +283,9 @@ build_cache_archive() {
   export LIBRARY_PATH="${CUDNN_LIBRARY_PATH}${LIBRARY_PATH:+:${LIBRARY_PATH}}"
   export LD_LIBRARY_PATH="${CUDNN_LIBRARY_PATH}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
 
-  log "Building full uv cache with compile_jobs=${compile_jobs}, apex_parallel_build=${apex_parallel_build}, nvcc_threads=${CI_APEX_NVCC_THREADS}, cuda_arch_list=${TORCH_CUDA_ARCH_LIST}, and uv_concurrent_builds=${UV_BUILD_SLOTS}."
-  uv sync --frozen --all-extras --group dev --no-install-project --python "${PYTHON_MM}"
+  log "Building split uv cache with compile_jobs=${compile_jobs}, apex_parallel_build=${apex_parallel_build}, nvcc_threads=${CI_APEX_NVCC_THREADS}, cuda_arch_list=${TORCH_CUDA_ARCH_LIST}, and uv_concurrent_builds=${UV_BUILD_SLOTS}."
+  uv sync --frozen --extra megatron --extra langgraph --extra plotting --group dev --no-install-project --python "${PYTHON_MM}"
+  uv sync --frozen --extra backend --extra tinker --extra langgraph --extra plotting --group dev --no-install-project --python "${PYTHON_MM}"
   rm -rf .venv
 
   log "Packing uv cache archive to ${archive_path}."

diff --git a/scripts/ci/compute_uv_fingerprint.py b/scripts/ci/compute_uv_fingerprint.py
@@ -83,9 +83,9 @@ def main() -> int:
             "uv_lock_sha256": _sha256_file(args.uv_lock),
         },
         "ci_context": {
-            "fingerprint_schema_version": 9,
+            "fingerprint_schema_version": 10,
             "cache_kind": "full_uv_cache",
-            "cache_scope": "prek_all_extras_group_dev",
+            "cache_scope": "prek_split_extras_group_dev",
             "cache_target": "uv_cache",
             "cache_python_platform": "linux_x86_64",
             "cache_package_manager": "uv",

diff --git a/scripts/setup.sh b/scripts/setup.sh
@@ -72,7 +72,7 @@ fi
 
 # Sync the dependencies
 if [ "${INSTALL_EXTRAS:-false}" = "true" ]; then
-    uv sync --all-extras --frozen
+    uv sync --extra backend --extra tinker --extra langgraph --extra plotting --frozen
 else
     uv sync --extra backend --frozen
 fi
diff --git a/src/art/__init__.py b/src/art/__init__.py
@@ -64,11 +64,16 @@
 from .batches import trajectory_group_batches
 from .dev import LoRAConfig
 from .gather import gather_trajectories, gather_trajectory_groups
+from .megatron.runtime_config import (
+    get_megatron_runtime_config,
+    init_megatron_runtime_config,
+)
 from .model import Model, TrainableModel
 from .serverless import ServerlessBackend
 from .trajectories import Trajectory, TrajectoryGroup
 from .types import (
     LocalTrainResult,
+    MegatronRuntimeConfig,
     MegatronTopologyConfig,
     Messages,
     MessagesAndChoices,
@@ -91,7 +96,10 @@
     "Backend",
     "LocalTrainResult",
     "LoRAConfig",
+    "MegatronRuntimeConfig",
     "MegatronTopologyConfig",
+    "get_megatron_runtime_config",
+    "init_megatron_runtime_config",
     "ServerlessBackend",
     "ServerlessTrainResult",
     "Messages",

diff --git a/src/art/_backend_training.py b/src/art/_backend_training.py
@@ -9,7 +9,7 @@
     summarize_trajectory_groups,
 )
 from .trajectories import TrajectoryGroup
-from .types import MegatronTopologyConfig, TrainConfig
+from .types import TrainConfig
 
 
 def build_rl_train_configs(
@@ -35,7 +35,6 @@ def build_rl_train_configs(
     scale_learning_rate_by_reward_std_dev: bool | None = None,
     logprob_calculation_chunk_size: int | None = None,
     packed_sequence_length: int | None = None,
-    megatron_topology: MegatronTopologyConfig | dict[str, int | None] | None = None,
     num_trajectories_learning_rate_multiplier_power: float | None = None,
     kl_ref_adapter_path: str | None = None,
 ) -> tuple[TrainConfig, dev.TrainConfig]:
@@ -69,10 +68,6 @@ def build_rl_train_configs(
         dev_config["logprob_calculation_chunk_size"] = logprob_calculation_chunk_size
     if packed_sequence_length is not None:
         dev_config["packed_sequence_length"] = packed_sequence_length
-    if megatron_topology is not None:
-        dev_config["megatron_topology"] = MegatronTopologyConfig.model_validate(
-            megatron_topology
-        ).model_dump(mode="json")
     if num_trajectories_learning_rate_multiplier_power is not None:
         dev_config["num_trajectories_learning_rate_multiplier_power"] = (
             num_trajectories_learning_rate_multiplier_power

diff --git a/src/art/auto_trajectory.py b/src/art/auto_trajectory.py
@@ -9,6 +9,7 @@
 
 from .openai import init_chat_completion, update_chat_completion
 from .preprocessing.moe_routing import attach_moe_routing_metadata_to_choice
+from .preprocessing.vllm_tokens import attach_vllm_token_metadata_to_choice
 from .trajectories import History, Trajectory
 
 logger = logging.getLogger(__name__)
@@ -105,9 +106,25 @@ def handle_httpx_response(self, response: httpx._models.Response) -> None:
                 # Parse SSE content directly from buffered bytes
                 chat_completion = parse_sse_to_chat_completion(content)
                 choice = chat_completion.choices[0]
+                response_payload = chat_completion.model_dump(mode="python")
+                attach_vllm_token_metadata_to_choice(
+                    choice=choice,
+                    response_payload=response_payload,
+                    choice_index=0,
+                )
+                attach_moe_routing_metadata_to_choice(
+                    choice=choice,
+                    response_payload=response_payload,
+                    choice_index=0,
+                )
             else:
                 response_payload = json.loads(content)
                 choice = Choice(**response_payload["choices"][0])
+                attach_vllm_token_metadata_to_choice(
+                    choice=choice,
+                    response_payload=response_payload,
+                    choice_index=0,
+                )
                 attach_moe_routing_metadata_to_choice(
                     choice=choice,
                     response_payload=response_payload,

diff --git a/src/art/dev/get_model_config.py b/src/art/dev/get_model_config.py
@@ -8,6 +8,7 @@
     LoRAConfig,
     TrainerArgs,
 )
+from .sequence_lengths import max_seq_length_from_model_config
 from .validate import is_dedicated_mode
 
 
@@ -36,9 +37,14 @@ def get_model_config(
     else:
         enable_sleep_mode = config.get("engine_args", {}).get("enable_sleep_mode", True)
 
+    configured_init_args = config.get("init_args", {})
     init_args = InitArgs(
         load_in_4bit=True,
-        max_seq_length=32768,
+        max_seq_length=max_seq_length_from_model_config(
+            base_model,
+            revision=configured_init_args.get("revision"),
+            token=configured_init_args.get("token"),
+        ),
         model_name=base_model,
     )
     engine_args = EngineArgs(
@@ -48,7 +54,7 @@ def get_model_config(
         model=base_model,
     )
     engine_args.update(config.get("engine_args", {}))
-    init_args.update(config.get("init_args", {}))
+    init_args.update(configured_init_args)
     if last_checkpoint_dir := get_last_checkpoint_dir(output_dir):
         init_args["model_name"] = last_checkpoint_dir
     merged_lora_config = LoRAConfig(
@@ -95,6 +101,4 @@ def get_model_config(
         result["trainer_gpu_ids"] = config["trainer_gpu_ids"]
     if "inference_gpu_ids" in config:
         result["inference_gpu_ids"] = config["inference_gpu_ids"]
-    if "megatron_topology" in config:
-        result["megatron_topology"] = config["megatron_topology"]
     return result
diff --git a/src/art/dev/model.py b/src/art/dev/model.py
@@ -1,13 +1,10 @@
 from enum import Enum
-from typing import TYPE_CHECKING, Literal, NoReturn
+from typing import Literal, NoReturn
 
 from typing_extensions import Required, TypedDict
 
 from .engine import EngineArgs
 
-if TYPE_CHECKING:
-    from ..types import MegatronTopologyConfig
-
 RolloutWeightsMode = Literal["lora", "merged"]
 
 
@@ -138,7 +135,6 @@ class InternalModelConfig(TypedDict, total=False):
         chat_template_content_format: vLLM chat template content format.
         chat_template_tool_schema_format: Tool schema rendering format used for
             local training tokenization.
-        megatron_topology: Fixed Megatron parallel topology for this model.
         allow_unvalidated_arch: Permit model-support validation workflows to run
             architectures that are not yet in the supported-model registry.
     """
@@ -156,7 +152,6 @@ class InternalModelConfig(TypedDict, total=False):
     chat_template_path: str
     chat_template_content_format: str
     chat_template_tool_schema_format: Literal["default", "vllm_openai"]
-    megatron_topology: "MegatronTopologyConfig | dict[str, int | None]"
     allow_unvalidated_arch: bool