Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions crates/forge_app/src/dto/anthropic/response.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@ fn get_context_length(model_id: &str) -> Option<u64> {
return Some(1_000_000);
}

// Claude Opus 4.6 / 4.7 / 4.8 (1M context). Must be checked before the
// generic `claude-opus-4-` branch below, which would otherwise cap these
// 1M-token models at 200K.
if model_id.starts_with("claude-opus-4-6")
|| model_id.starts_with("claude-opus-4-7")
|| model_id.starts_with("claude-opus-4-8")
{
return Some(1_000_000);
}

// Current models (200K context)
if model_id.starts_with("claude-sonnet-4-5-")
|| model_id.starts_with("claude-haiku-4-5-")
Expand Down Expand Up @@ -709,6 +719,21 @@ mod tests {
);
}

#[test]
fn test_get_context_length_opus_1m_models() {
// Claude Opus 4.6 / 4.7 / 4.8 are 1M-token models and must not be
// captured by the generic `claude-opus-4-` 200K branch.
assert_eq!(get_context_length("claude-opus-4-6"), Some(1_000_000));
assert_eq!(get_context_length("claude-opus-4-7"), Some(1_000_000));
assert_eq!(get_context_length("claude-opus-4-8"), Some(1_000_000));
// Older Opus 4.x models remain at 200K.
assert_eq!(
get_context_length("claude-opus-4-1-20250805"),
Some(200_000)
);
assert_eq!(get_context_length("claude-opus-4-20250514"), Some(200_000));
}

#[test]
fn test_get_context_length_legacy_claude_4() {
// Legacy Claude 4 models (200K context)
Expand Down
65 changes: 48 additions & 17 deletions crates/forge_domain/src/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,16 +231,20 @@ impl Agent {
self
}

/// Applies a safe `token_threshold` by taking the minimum of an absolute
/// token cap and a percentage-based context-window cap.
/// Applies a safe `token_threshold` derived from the selected model's
/// context window.
///
/// The absolute cap comes from `compact.token_threshold`, or falls back to
/// a default of 100,000 tokens. The context-window cap comes from
/// `compact.token_threshold_percentage`, or falls back to 70%
/// of the selected model's context window. If model metadata is
/// unavailable, a default 128K context window is used. The lower of
/// these two values is used to preserve headroom for tool outputs and
/// follow-up messages.
/// The percentage-based cap comes from
/// `compact.token_threshold_percentage`, or falls back to 70% of the
/// selected model's context window. If model metadata is unavailable, a
/// default 128K context window is used.
///
/// When `compact.token_threshold` is explicitly configured, it is treated
/// as an absolute cap and the lower of it and the percentage-based cap is
/// used, preserving headroom for tool outputs and follow-up messages on
/// small context windows. When it is unset, the threshold is derived purely
/// from the context window (the percentage-based cap) so that large windows
/// (e.g. 1M-token models) are not capped to a small hardcoded value.
///
/// # Arguments
/// * `selected_model` - The model that will be used for this agent
Expand All @@ -249,26 +253,31 @@ impl Agent {
/// The agent with a safe token_threshold configured
pub fn compaction_threshold(mut self, selected_model: Option<&Model>) -> Self {
const DEFAULT_CONTEXT_WINDOW: usize = 128_000;
const DEFAULT_TOKEN_THRESHOLD: usize = 100_000;
const DEFAULT_CONTEXT_WINDOW_PERCENTAGE: f64 = 0.7;

let context_window = selected_model
.and_then(|model| model.context_length)
.and_then(|context_window| usize::try_from(context_window).ok())
.unwrap_or(DEFAULT_CONTEXT_WINDOW);

let configured_threshold = self
.compact
.token_threshold
.unwrap_or(DEFAULT_TOKEN_THRESHOLD);
let context_window_percentage = self
.compact
.token_threshold_percentage
.unwrap_or(DEFAULT_CONTEXT_WINDOW_PERCENTAGE);
let context_window_threshold =
((context_window as f64) * context_window_percentage).floor() as usize;

self.compact.token_threshold = Some(configured_threshold.min(context_window_threshold));
// By default the threshold is derived from the model's context window so
// that large windows (e.g. 1M-token models) are used fully instead of
// being capped to a small hardcoded value. When the user explicitly
// configures a `token_threshold` it is treated as an absolute upper
// bound, capped to the context-window-derived value for safety headroom.
let token_threshold = match self.compact.token_threshold {
Some(configured_threshold) => configured_threshold.min(context_window_threshold),
None => context_window_threshold,
};

self.compact.token_threshold = Some(token_threshold);

self
}
Expand Down Expand Up @@ -375,7 +384,10 @@ mod tests {
}

#[test]
fn test_compaction_threshold_uses_hardcoded_cap_when_context_window_cap_is_higher() {
fn test_compaction_threshold_uses_context_window_percentage_when_unset() {
// With no configured token_threshold, the threshold is derived purely
// from the model's context window (70%), so large windows aren't capped
// to a small hardcoded value.
let fixture = Agent::new(
AgentId::new("test"),
ProviderId::OPENAI,
Expand All @@ -385,7 +397,26 @@ mod tests {
let selected_model = model_fixture("selected-model", Some(200_000));

let actual = fixture.compaction_threshold(Some(&selected_model));
let expected = Some(100_000);
// 70% of 200K = 140K
let expected = Some(140_000);

assert_eq!(actual.compact.token_threshold, expected);
}

#[test]
fn test_compaction_threshold_large_window_not_capped_to_hardcoded_default() {
// Regression: a 1M-token model with no configured threshold should get a
// threshold of 70% of the window (700K), not a small hardcoded default.
let fixture = Agent::new(
AgentId::new("test"),
ProviderId::OPENAI,
ModelId::new("selected-model"),
);

let selected_model = model_fixture("selected-model", Some(1_000_000));

let actual = fixture.compaction_threshold(Some(&selected_model));
let expected = Some(700_000);

assert_eq!(actual.compact.token_threshold, expected);
}
Expand Down
Loading