From e375e3498f75feb7af34abfc3c2544a367da6812 Mon Sep 17 00:00:00 2001 From: Akhil Appana Date: Mon, 15 Jun 2026 23:24:55 +0000 Subject: [PATCH 1/2] fix: handle large (1M) context windows for Opus 4.x and compaction Two related fixes so large context windows are used fully instead of being treated as 200K / capped to a small hardcoded compaction threshold. 1. get_context_length() returned 200K for 1M-token Opus models. The generic `claude-opus-4-` prefix branch captured claude-opus-4-6/4-7/4-8, which are 1M-token models. Add an explicit 1M branch before it. 2. Make the compaction `token_threshold` optional so it no longer forces a small cap on large windows. The configurable default is kept in crates/forge_config/.forge.toml (token_threshold = 100000) so users can still tune it. When it is set it is treated as an absolute cap (lower of it and 70% of the context window) preserving headroom on small windows; when it is unset the threshold is derived purely from the context window (70%), so large windows (e.g. 1M-token models) are not capped to a small hardcoded value. Fixes #3518 --- .../forge_app/src/dto/anthropic/response.rs | 25 ++++++++ crates/forge_domain/src/agent.rs | 64 ++++++++++++++----- 2 files changed, 72 insertions(+), 17 deletions(-) diff --git a/crates/forge_app/src/dto/anthropic/response.rs b/crates/forge_app/src/dto/anthropic/response.rs index d964b05953..4f17bb8b21 100644 --- a/crates/forge_app/src/dto/anthropic/response.rs +++ b/crates/forge_app/src/dto/anthropic/response.rs @@ -84,6 +84,16 @@ fn get_context_length(model_id: &str) -> Option { return Some(1_000_000); } + // Claude Opus 4.6 / 4.7 / 4.8 (1M context). Must be checked before the + // generic `claude-opus-4-` branch below, which would otherwise cap these + // 1M-token models at 200K. + if model_id.starts_with("claude-opus-4-6") + || model_id.starts_with("claude-opus-4-7") + || model_id.starts_with("claude-opus-4-8") + { + return Some(1_000_000); + } + // Current models (200K context) if model_id.starts_with("claude-sonnet-4-5-") || model_id.starts_with("claude-haiku-4-5-") @@ -709,6 +719,21 @@ mod tests { ); } + #[test] + fn test_get_context_length_opus_1m_models() { + // Claude Opus 4.6 / 4.7 / 4.8 are 1M-token models and must not be + // captured by the generic `claude-opus-4-` 200K branch. + assert_eq!(get_context_length("claude-opus-4-6"), Some(1_000_000)); + assert_eq!(get_context_length("claude-opus-4-7"), Some(1_000_000)); + assert_eq!(get_context_length("claude-opus-4-8"), Some(1_000_000)); + // Older Opus 4.x models remain at 200K. + assert_eq!( + get_context_length("claude-opus-4-1-20250805"), + Some(200_000) + ); + assert_eq!(get_context_length("claude-opus-4-20250514"), Some(200_000)); + } + #[test] fn test_get_context_length_legacy_claude_4() { // Legacy Claude 4 models (200K context) diff --git a/crates/forge_domain/src/agent.rs b/crates/forge_domain/src/agent.rs index ace8bfdfc0..72b9dcbd6a 100644 --- a/crates/forge_domain/src/agent.rs +++ b/crates/forge_domain/src/agent.rs @@ -231,16 +231,19 @@ impl Agent { self } - /// Applies a safe `token_threshold` by taking the minimum of an absolute - /// token cap and a percentage-based context-window cap. + /// Applies a safe `token_threshold` derived from the selected model's + /// context window. /// - /// The absolute cap comes from `compact.token_threshold`, or falls back to - /// a default of 100,000 tokens. The context-window cap comes from - /// `compact.token_threshold_percentage`, or falls back to 70% - /// of the selected model's context window. If model metadata is - /// unavailable, a default 128K context window is used. The lower of - /// these two values is used to preserve headroom for tool outputs and - /// follow-up messages. + /// The percentage-based cap comes from `compact.token_threshold_percentage`, + /// or falls back to 70% of the selected model's context window. If model + /// metadata is unavailable, a default 128K context window is used. + /// + /// When `compact.token_threshold` is explicitly configured, it is treated + /// as an absolute cap and the lower of it and the percentage-based cap is + /// used, preserving headroom for tool outputs and follow-up messages on + /// small context windows. When it is unset, the threshold is derived purely + /// from the context window (the percentage-based cap) so that large windows + /// (e.g. 1M-token models) are not capped to a small hardcoded value. /// /// # Arguments /// * `selected_model` - The model that will be used for this agent @@ -249,7 +252,6 @@ impl Agent { /// The agent with a safe token_threshold configured pub fn compaction_threshold(mut self, selected_model: Option<&Model>) -> Self { const DEFAULT_CONTEXT_WINDOW: usize = 128_000; - const DEFAULT_TOKEN_THRESHOLD: usize = 100_000; const DEFAULT_CONTEXT_WINDOW_PERCENTAGE: f64 = 0.7; let context_window = selected_model @@ -257,10 +259,6 @@ impl Agent { .and_then(|context_window| usize::try_from(context_window).ok()) .unwrap_or(DEFAULT_CONTEXT_WINDOW); - let configured_threshold = self - .compact - .token_threshold - .unwrap_or(DEFAULT_TOKEN_THRESHOLD); let context_window_percentage = self .compact .token_threshold_percentage @@ -268,7 +266,17 @@ impl Agent { let context_window_threshold = ((context_window as f64) * context_window_percentage).floor() as usize; - self.compact.token_threshold = Some(configured_threshold.min(context_window_threshold)); + // By default the threshold is derived from the model's context window so + // that large windows (e.g. 1M-token models) are used fully instead of + // being capped to a small hardcoded value. When the user explicitly + // configures a `token_threshold` it is treated as an absolute upper + // bound, capped to the context-window-derived value for safety headroom. + let token_threshold = match self.compact.token_threshold { + Some(configured_threshold) => configured_threshold.min(context_window_threshold), + None => context_window_threshold, + }; + + self.compact.token_threshold = Some(token_threshold); self } @@ -375,7 +383,10 @@ mod tests { } #[test] - fn test_compaction_threshold_uses_hardcoded_cap_when_context_window_cap_is_higher() { + fn test_compaction_threshold_uses_context_window_percentage_when_unset() { + // With no configured token_threshold, the threshold is derived purely + // from the model's context window (70%), so large windows aren't capped + // to a small hardcoded value. let fixture = Agent::new( AgentId::new("test"), ProviderId::OPENAI, @@ -385,7 +396,26 @@ mod tests { let selected_model = model_fixture("selected-model", Some(200_000)); let actual = fixture.compaction_threshold(Some(&selected_model)); - let expected = Some(100_000); + // 70% of 200K = 140K + let expected = Some(140_000); + + assert_eq!(actual.compact.token_threshold, expected); + } + + #[test] + fn test_compaction_threshold_large_window_not_capped_to_hardcoded_default() { + // Regression: a 1M-token model with no configured threshold should get a + // threshold of 70% of the window (700K), not a small hardcoded default. + let fixture = Agent::new( + AgentId::new("test"), + ProviderId::OPENAI, + ModelId::new("selected-model"), + ); + + let selected_model = model_fixture("selected-model", Some(1_000_000)); + + let actual = fixture.compaction_threshold(Some(&selected_model)); + let expected = Some(700_000); assert_eq!(actual.compact.token_threshold, expected); } From a7d04fbba17e80bddc87e2395990aeeace8a1bc4 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Tue, 16 Jun 2026 05:53:35 +0000 Subject: [PATCH 2/2] [autofix.ci] apply automated fixes --- crates/forge_domain/src/agent.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/forge_domain/src/agent.rs b/crates/forge_domain/src/agent.rs index 72b9dcbd6a..e2ff8ec29b 100644 --- a/crates/forge_domain/src/agent.rs +++ b/crates/forge_domain/src/agent.rs @@ -234,9 +234,10 @@ impl Agent { /// Applies a safe `token_threshold` derived from the selected model's /// context window. /// - /// The percentage-based cap comes from `compact.token_threshold_percentage`, - /// or falls back to 70% of the selected model's context window. If model - /// metadata is unavailable, a default 128K context window is used. + /// The percentage-based cap comes from + /// `compact.token_threshold_percentage`, or falls back to 70% of the + /// selected model's context window. If model metadata is unavailable, a + /// default 128K context window is used. /// /// When `compact.token_threshold` is explicitly configured, it is treated /// as an absolute cap and the lower of it and the percentage-based cap is