From e375e3498f75feb7af34abfc3c2544a367da6812 Mon Sep 17 00:00:00 2001
From: Akhil Appana <akhilapp@google.com>
Date: Mon, 15 Jun 2026 23:24:55 +0000
Subject: [PATCH 1/2] fix: handle large (1M) context windows for Opus 4.x and
 compaction

Two related fixes so large context windows are used fully instead of
being treated as 200K / capped to a small hardcoded compaction threshold.

1. get_context_length() returned 200K for 1M-token Opus models. The
   generic `claude-opus-4-` prefix branch captured claude-opus-4-6/4-7/4-8,
   which are 1M-token models. Add an explicit 1M branch before it.

2. Make the compaction `token_threshold` optional so it no longer forces a
   small cap on large windows. The configurable default is kept in
   crates/forge_config/.forge.toml (token_threshold = 100000) so users can
   still tune it. When it is set it is treated as an absolute cap (lower of
   it and 70% of the context window) preserving headroom on small windows;
   when it is unset the threshold is derived purely from the context window
   (70%), so large windows (e.g. 1M-token models) are not capped to a small
   hardcoded value.

Fixes #3518
---
 .../forge_app/src/dto/anthropic/response.rs   | 25 ++++++++
 crates/forge_domain/src/agent.rs              | 64 ++++++++++++++-----
 2 files changed, 72 insertions(+), 17 deletions(-)
diff --git a/crates/forge_app/src/dto/anthropic/response.rs b/crates/forge_app/src/dto/anthropic/response.rs
index d964b05953..4f17bb8b21 100644
--- a/crates/forge_app/src/dto/anthropic/response.rs
+++ b/crates/forge_app/src/dto/anthropic/response.rs
@@ -84,6 +84,16 @@ fn get_context_length(model_id: &str) -> Option<u64> {
         return Some(1_000_000);
     }
 
+    // Claude Opus 4.6 / 4.7 / 4.8 (1M context). Must be checked before the
+    // generic `claude-opus-4-` branch below, which would otherwise cap these
+    // 1M-token models at 200K.
+    if model_id.starts_with("claude-opus-4-6")
+        || model_id.starts_with("claude-opus-4-7")
+        || model_id.starts_with("claude-opus-4-8")
+    {
+        return Some(1_000_000);
+    }
+
     // Current models (200K context)
     if model_id.starts_with("claude-sonnet-4-5-")
         || model_id.starts_with("claude-haiku-4-5-")
@@ -709,6 +719,21 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_get_context_length_opus_1m_models() {
+        // Claude Opus 4.6 / 4.7 / 4.8 are 1M-token models and must not be
+        // captured by the generic `claude-opus-4-` 200K branch.
+        assert_eq!(get_context_length("claude-opus-4-6"), Some(1_000_000));
+        assert_eq!(get_context_length("claude-opus-4-7"), Some(1_000_000));
+        assert_eq!(get_context_length("claude-opus-4-8"), Some(1_000_000));
+        // Older Opus 4.x models remain at 200K.
+        assert_eq!(
+            get_context_length("claude-opus-4-1-20250805"),
+            Some(200_000)
+        );
+        assert_eq!(get_context_length("claude-opus-4-20250514"), Some(200_000));
+    }
+
     #[test]
     fn test_get_context_length_legacy_claude_4() {
         // Legacy Claude 4 models (200K context)
diff --git a/crates/forge_domain/src/agent.rs b/crates/forge_domain/src/agent.rs
index ace8bfdfc0..72b9dcbd6a 100644
--- a/crates/forge_domain/src/agent.rs
+++ b/crates/forge_domain/src/agent.rs
@@ -231,16 +231,19 @@ impl Agent {
         self
     }
 
-    /// Applies a safe `token_threshold` by taking the minimum of an absolute
-    /// token cap and a percentage-based context-window cap.
+    /// Applies a safe `token_threshold` derived from the selected model's
+    /// context window.
     ///
-    /// The absolute cap comes from `compact.token_threshold`, or falls back to
-    /// a default of 100,000 tokens. The context-window cap comes from
-    /// `compact.token_threshold_percentage`, or falls back to 70%
-    /// of the selected model's context window. If model metadata is
-    /// unavailable, a default 128K context window is used. The lower of
-    /// these two values is used to preserve headroom for tool outputs and
-    /// follow-up messages.
+    /// The percentage-based cap comes from `compact.token_threshold_percentage`,
+    /// or falls back to 70% of the selected model's context window. If model
+    /// metadata is unavailable, a default 128K context window is used.
+    ///
+    /// When `compact.token_threshold` is explicitly configured, it is treated
+    /// as an absolute cap and the lower of it and the percentage-based cap is
+    /// used, preserving headroom for tool outputs and follow-up messages on
+    /// small context windows. When it is unset, the threshold is derived purely
+    /// from the context window (the percentage-based cap) so that large windows
+    /// (e.g. 1M-token models) are not capped to a small hardcoded value.
     ///
     /// # Arguments
     /// * `selected_model` - The model that will be used for this agent
@@ -249,7 +252,6 @@ impl Agent {
     /// The agent with a safe token_threshold configured
     pub fn compaction_threshold(mut self, selected_model: Option<&Model>) -> Self {
         const DEFAULT_CONTEXT_WINDOW: usize = 128_000;
-        const DEFAULT_TOKEN_THRESHOLD: usize = 100_000;
         const DEFAULT_CONTEXT_WINDOW_PERCENTAGE: f64 = 0.7;
 
         let context_window = selected_model
@@ -257,10 +259,6 @@ impl Agent {
             .and_then(|context_window| usize::try_from(context_window).ok())
             .unwrap_or(DEFAULT_CONTEXT_WINDOW);
 
-        let configured_threshold = self
-            .compact
-            .token_threshold
-            .unwrap_or(DEFAULT_TOKEN_THRESHOLD);
         let context_window_percentage = self
             .compact
             .token_threshold_percentage
@@ -268,7 +266,17 @@ impl Agent {
         let context_window_threshold =
             ((context_window as f64) * context_window_percentage).floor() as usize;
 
-        self.compact.token_threshold = Some(configured_threshold.min(context_window_threshold));
+        // By default the threshold is derived from the model's context window so
+        // that large windows (e.g. 1M-token models) are used fully instead of
+        // being capped to a small hardcoded value. When the user explicitly
+        // configures a `token_threshold` it is treated as an absolute upper
+        // bound, capped to the context-window-derived value for safety headroom.
+        let token_threshold = match self.compact.token_threshold {
+            Some(configured_threshold) => configured_threshold.min(context_window_threshold),
+            None => context_window_threshold,
+        };
+
+        self.compact.token_threshold = Some(token_threshold);
 
         self
     }
@@ -375,7 +383,10 @@ mod tests {
     }
 
     #[test]
-    fn test_compaction_threshold_uses_hardcoded_cap_when_context_window_cap_is_higher() {
+    fn test_compaction_threshold_uses_context_window_percentage_when_unset() {
+        // With no configured token_threshold, the threshold is derived purely
+        // from the model's context window (70%), so large windows aren't capped
+        // to a small hardcoded value.
         let fixture = Agent::new(
             AgentId::new("test"),
             ProviderId::OPENAI,
@@ -385,7 +396,26 @@ mod tests {
         let selected_model = model_fixture("selected-model", Some(200_000));
 
         let actual = fixture.compaction_threshold(Some(&selected_model));
-        let expected = Some(100_000);
+        // 70% of 200K = 140K
+        let expected = Some(140_000);
+
+        assert_eq!(actual.compact.token_threshold, expected);
+    }
+
+    #[test]
+    fn test_compaction_threshold_large_window_not_capped_to_hardcoded_default() {
+        // Regression: a 1M-token model with no configured threshold should get a
+        // threshold of 70% of the window (700K), not a small hardcoded default.
+        let fixture = Agent::new(
+            AgentId::new("test"),
+            ProviderId::OPENAI,
+            ModelId::new("selected-model"),
+        );
+
+        let selected_model = model_fixture("selected-model", Some(1_000_000));
+
+        let actual = fixture.compaction_threshold(Some(&selected_model));
+        let expected = Some(700_000);
 
         assert_eq!(actual.compact.token_threshold, expected);
     }

From a7d04fbba17e80bddc87e2395990aeeace8a1bc4 Mon Sep 17 00:00:00 2001
From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com>
Date: Tue, 16 Jun 2026 05:53:35 +0000
Subject: [PATCH 2/2] [autofix.ci] apply automated fixes

---
 crates/forge_domain/src/agent.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/crates/forge_domain/src/agent.rs b/crates/forge_domain/src/agent.rs
index 72b9dcbd6a..e2ff8ec29b 100644
--- a/crates/forge_domain/src/agent.rs
+++ b/crates/forge_domain/src/agent.rs
@@ -234,9 +234,10 @@ impl Agent {
     /// Applies a safe `token_threshold` derived from the selected model's
     /// context window.
     ///
-    /// The percentage-based cap comes from `compact.token_threshold_percentage`,
-    /// or falls back to 70% of the selected model's context window. If model
-    /// metadata is unavailable, a default 128K context window is used.
+    /// The percentage-based cap comes from
+    /// `compact.token_threshold_percentage`, or falls back to 70% of the
+    /// selected model's context window. If model metadata is unavailable, a
+    /// default 128K context window is used.
     ///
     /// When `compact.token_threshold` is explicitly configured, it is treated
     /// as an absolute cap and the lower of it and the percentage-based cap is