diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs index b7437de2..f401759b 100644 --- a/problemreductions-cli/src/cli.rs +++ b/problemreductions-cli/src/cli.rs @@ -159,6 +159,21 @@ Examples: Inspect(InspectArgs), /// Solve a problem instance Solve(SolveArgs), + /// Extract a source-space solution from a reduction bundle and a target-space config + #[command(after_help = "\ +Examples: + pred extract bundle.json --config 1,0,1,0 + pred extract bundle.json --config 1,0,1,0 -o source.json + cat bundle.json | pred extract - --config 1,0,1,0 + +Use this when an external solver has solved the bundle's target problem +(e.g. a QUBO sampler, a neutral-atom platform, a QAOA runtime) and you want +the corresponding solution in the original source problem space without +having to shell back into `pred solve`. + +Input: a reduction bundle JSON (from `pred reduce`). Use - to read from stdin. +--config is the target-space configuration (comma-separated, e.g. 1,0,1,0).")] + Extract(ExtractArgs), /// Start MCP (Model Context Protocol) server for AI assistant integration #[cfg(feature = "mcp")] #[command(after_help = "\ @@ -1209,6 +1224,15 @@ pub struct ReduceArgs { pub via: Option, } +#[derive(clap::Args)] +pub struct ExtractArgs { + /// Reduction bundle JSON (from `pred reduce`). Use - for stdin. + pub input: PathBuf, + /// Target-space configuration to map back (comma-separated, e.g. 1,0,1,0) + #[arg(long)] + pub config: String, +} + #[derive(clap::Args)] pub struct InspectArgs { /// Problem JSON file or reduction bundle. Use - for stdin. @@ -1242,6 +1266,7 @@ pub fn print_subcommand_help_hint(error_msg: &str) { let subcmds = [ ("pred solve", "solve"), ("pred reduce", "reduce"), + ("pred extract", "extract"), ("pred create", "create"), ("pred evaluate", "evaluate"), ("pred inspect", "inspect"), diff --git a/problemreductions-cli/src/commands/extract.rs b/problemreductions-cli/src/commands/extract.rs new file mode 100644 index 00000000..18f12c37 --- /dev/null +++ b/problemreductions-cli/src/commands/extract.rs @@ -0,0 +1,87 @@ +use crate::dispatch::{read_input, BundleReplay, ReductionBundle}; +use crate::output::OutputConfig; +use anyhow::{Context, Result}; +use std::path::Path; + +/// Extract a source-space configuration from a target-space configuration and a reduction bundle. +/// +/// This lets external solvers (that solved the bundle's target problem on their own) +/// recover a solution in the original source problem space without having to +/// re-solve through `pred solve`. +pub fn extract(input: &Path, config_str: &str, out: &OutputConfig) -> Result<()> { + let content = read_input(input)?; + let json: serde_json::Value = + serde_json::from_str(&content).context("Input is not valid JSON")?; + + if !(json.get("source").is_some() && json.get("target").is_some() && json.get("path").is_some()) + { + anyhow::bail!( + "Input is not a reduction bundle.\n\ + `pred extract` requires a bundle produced by `pred reduce`.\n\ + Got a plain problem file; did you mean `pred evaluate`?" + ); + } + + let bundle: ReductionBundle = + serde_json::from_value(json).context("Failed to parse reduction bundle")?; + + // An empty --config means an empty target configuration (zero-variable target problem). + let target_config: Vec = if config_str.trim().is_empty() { + Vec::new() + } else { + config_str + .split(',') + .map(|s| { + s.trim() + .parse::() + .map_err(|e| anyhow::anyhow!("Invalid config value '{}': {}", s.trim(), e)) + }) + .collect::>>()? + }; + + let replay = BundleReplay::prepare(&bundle)?; + + let target_dims = replay.target.dims_dyn(); + if target_config.len() != target_dims.len() { + anyhow::bail!( + "Target config has {} values but target problem {} has {} variables", + target_config.len(), + replay.target_name, + target_dims.len() + ); + } + for (i, (val, dim)) in target_config.iter().zip(target_dims.iter()).enumerate() { + if *val >= *dim { + anyhow::bail!( + "Target config value {} at position {} is out of range: variable {} has {} possible values (0..{})", + val, i, i, dim, dim.saturating_sub(1) + ); + } + } + let target_eval = replay.target.evaluate_dyn(&target_config); + + let (source_config, source_eval) = replay.extract(&target_config); + + let text = format!( + "Problem: {}\nSolver: external (via {})\nSolution: {:?}\nEvaluation: {}", + replay.source_name, replay.target_name, source_config, source_eval, + ); + + // Schema aligned with `pred solve` on a bundle: `problem`, `reduced_to`, `solution`, + // `evaluation`, `intermediate { problem, solution, evaluation }`. `solver` is "external" + // to signal that pred did not run a solver — the target config came from outside. + let json = serde_json::json!({ + "problem": replay.source_name, + "solver": "external", + "reduced_to": replay.target_name, + "solution": source_config, + "evaluation": source_eval, + "intermediate": { + "problem": replay.target_name, + "solution": target_config, + "evaluation": target_eval, + }, + }); + + out.emit_with_default_name("pred_extract.json", &text, &json) +} diff --git a/problemreductions-cli/src/commands/mod.rs b/problemreductions-cli/src/commands/mod.rs index f42382f2..ab6eca71 100644 --- a/problemreductions-cli/src/commands/mod.rs +++ b/problemreductions-cli/src/commands/mod.rs @@ -1,5 +1,6 @@ pub mod create; pub mod evaluate; +pub mod extract; pub mod graph; pub mod inspect; pub mod reduce; diff --git a/problemreductions-cli/src/commands/solve.rs b/problemreductions-cli/src/commands/solve.rs index 67141ce1..80207d44 100644 --- a/problemreductions-cli/src/commands/solve.rs +++ b/problemreductions-cli/src/commands/solve.rs @@ -1,7 +1,6 @@ -use crate::dispatch::{load_problem, read_input, ProblemJson, ReductionBundle}; +use crate::dispatch::{load_problem, read_input, BundleReplay, ProblemJson, ReductionBundle}; use crate::output::OutputConfig; use anyhow::{Context, Result}; -use problemreductions::rules::ReductionGraph; use std::path::Path; use std::time::Duration; @@ -166,75 +165,39 @@ fn solve_problem( /// Solve a reduction bundle: solve the target problem, then map the solution back. fn solve_bundle(bundle: ReductionBundle, solver_name: &str, out: &OutputConfig) -> Result<()> { - // 1. Load the target problem from the bundle - let target = load_problem( - &bundle.target.problem_type, - &bundle.target.variant, - bundle.target.data.clone(), - )?; - let target_name = target.problem_name(); + let replay = BundleReplay::prepare(&bundle)?; - // 2. Solve the target problem let target_result = match solver_name { - "brute-force" => target.solve_brute_force_witness().ok_or_else(|| { + "brute-force" => replay.target.solve_brute_force_witness().ok_or_else(|| { anyhow::anyhow!( "Bundle solving requires a witness-capable target problem and witness-capable reduction path; {} only supports aggregate-value solving.", - target_name + replay.target_name ) })?, - "ilp" => target.solve_with_ilp().map_err(add_ilp_solver_hint)?, - "customized" => target + "ilp" => replay.target.solve_with_ilp().map_err(add_ilp_solver_hint)?, + "customized" => replay + .target .solve_with_customized() .map_err(add_customized_solver_hint)?, _ => unreachable!(), }; - // 3. Load source problem and re-execute the reduction chain to get extract_solution - let source = load_problem( - &bundle.source.problem_type, - &bundle.source.variant, - bundle.source.data.clone(), - )?; - let source_name = source.problem_name(); + let (source_config, source_eval) = replay.extract(&target_result.config); - let graph = ReductionGraph::new(); - - // Reconstruct the ReductionPath from the bundle's path steps - let reduction_path = problemreductions::rules::ReductionPath { - steps: bundle - .path - .iter() - .map(|s| problemreductions::rules::ReductionStep { - name: s.name.clone(), - variant: s.variant.clone(), - }) - .collect(), - }; - - let chain = graph - .reduce_along_path(&reduction_path, source.as_any()) - .ok_or_else(|| anyhow::anyhow!( - "Bundle solving requires a witness-capable reduction path; this bundle cannot recover a source solution." - ))?; - - // 4. Extract solution back to source problem space - let source_config = chain.extract_solution(&target_result.config); - let source_eval = source.evaluate_dyn(&source_config); - - let solver_desc = format!("{} (via {})", solver_name, target_name); + let solver_desc = format!("{} (via {})", solver_name, replay.target_name); let text = format!( "Problem: {}\nSolver: {}\nSolution: {:?}\nEvaluation: {}", - source_name, solver_desc, source_config, source_eval, + replay.source_name, solver_desc, source_config, source_eval, ); let json = serde_json::json!({ - "problem": source_name, + "problem": replay.source_name, "solver": solver_name, - "reduced_to": target_name, + "reduced_to": replay.target_name, "solution": source_config, "evaluation": source_eval, "intermediate": { - "problem": target_name, + "problem": replay.target_name, "solution": target_result.config, "evaluation": target_result.evaluation, }, diff --git a/problemreductions-cli/src/dispatch.rs b/problemreductions-cli/src/dispatch.rs index 3c1972b1..4849373b 100644 --- a/problemreductions-cli/src/dispatch.rs +++ b/problemreductions-cli/src/dispatch.rs @@ -114,6 +114,130 @@ impl LoadedProblem { } } +/// A validated reduction bundle ready to replay: +/// source, target, and the reconstructed reduction chain. Construct via +/// [`BundleReplay::prepare`]. All three CLI/MCP bundle workflows +/// (`pred solve `, `pred extract `, MCP `solve_problem`) +/// share this setup so validation and error text stay in sync. +pub struct BundleReplay { + pub(crate) source: LoadedProblem, + pub(crate) source_name: String, + pub(crate) target: LoadedProblem, + pub(crate) target_name: String, + pub(crate) chain: problemreductions::rules::ReductionChain, +} + +impl BundleReplay { + /// Validate the bundle and replay the reduction chain. + /// + /// Checks: + /// - `path` has at least two steps + /// - `path[0]` matches `source` (name + variant) + /// - `path[-1]` matches `target` (name + variant) + /// - serializing the chain's replayed target equals `bundle.target.data` + /// (tampered/stale bundles where `target.data` disagrees with what + /// `reduce_along_path` actually produced are rejected) + /// + /// Returns an error (not a panic) for malformed bundles or aggregate-only paths. + pub fn prepare(bundle: &ReductionBundle) -> Result { + if bundle.path.len() < 2 { + anyhow::bail!( + "Malformed bundle: `path` must contain at least two steps (source and target), got {}", + bundle.path.len() + ); + } + let first = bundle.path.first().unwrap(); + let last = bundle.path.last().unwrap(); + if first.name != bundle.source.problem_type || first.variant != bundle.source.variant { + anyhow::bail!( + "Malformed bundle: path starts with {} but source is {}", + format_step(&first.name, &first.variant), + format_step(&bundle.source.problem_type, &bundle.source.variant), + ); + } + if last.name != bundle.target.problem_type || last.variant != bundle.target.variant { + anyhow::bail!( + "Malformed bundle: path ends with {} but target is {}", + format_step(&last.name, &last.variant), + format_step(&bundle.target.problem_type, &bundle.target.variant), + ); + } + + let source = load_problem( + &bundle.source.problem_type, + &bundle.source.variant, + bundle.source.data.clone(), + )?; + let source_name = source.problem_name().to_string(); + + let target = load_problem( + &bundle.target.problem_type, + &bundle.target.variant, + bundle.target.data.clone(), + )?; + let target_name = target.problem_name().to_string(); + + let reduction_path = problemreductions::rules::ReductionPath { + steps: bundle + .path + .iter() + .map(|s| problemreductions::rules::ReductionStep { + name: s.name.clone(), + variant: s.variant.clone(), + }) + .collect(), + }; + + let graph = ReductionGraph::new(); + let chain = graph + .reduce_along_path(&reduction_path, source.as_any()) + .ok_or_else(|| anyhow::anyhow!( + "Bundle requires a witness-capable reduction path; this bundle cannot map a target solution back to the source." + ))?; + + // Coherence check: `bundle.target.data` must equal what replaying + // `source` along `path` actually produces. Without this, a caller + // could solve/validate against the bundle's stated target but then + // extract through a completely different chain target. + let replayed_target_data = + serialize_any_problem(&last.name, &last.variant, chain.target_problem_any())?; + if replayed_target_data != bundle.target.data { + anyhow::bail!( + "Malformed bundle: `target.data` does not match the result of replaying \ + `source` along `path`. The bundle is tampered or was produced by \ + incompatible code." + ); + } + + Ok(Self { + source, + source_name, + target, + target_name, + chain, + }) + } + + /// Map a target-space configuration back to the source space and evaluate it. + pub fn extract(&self, target_config: &[usize]) -> (Vec, String) { + let source_config = self.chain.extract_solution(target_config); + let source_eval = self.source.evaluate_dyn(&source_config); + (source_config, source_eval) + } +} + +fn format_step(name: &str, variant: &BTreeMap) -> String { + if variant.is_empty() { + name.to_string() + } else { + let parts: Vec = variant + .iter() + .map(|(k, v)| format!("{}={}", k, v)) + .collect(); + format!("{}{{{}}}", name, parts.join(", ")) + } +} + /// Load a problem from JSON type/variant/data. pub fn load_problem( name: &str, diff --git a/problemreductions-cli/src/main.rs b/problemreductions-cli/src/main.rs index ce436213..702199e4 100644 --- a/problemreductions-cli/src/main.rs +++ b/problemreductions-cli/src/main.rs @@ -34,7 +34,11 @@ fn main() -> anyhow::Result<()> { // Data-producing commands auto-output JSON when piped let auto_json = matches!( cli.command, - Commands::Reduce(_) | Commands::Solve(_) | Commands::Evaluate(_) | Commands::Inspect(_) + Commands::Reduce(_) + | Commands::Solve(_) + | Commands::Evaluate(_) + | Commands::Inspect(_) + | Commands::Extract(_) ); let out = OutputConfig { @@ -72,6 +76,7 @@ fn main() -> anyhow::Result<()> { commands::reduce::reduce(&args.input, args.to.as_deref(), args.via.as_deref(), &out) } Commands::Evaluate(args) => commands::evaluate::evaluate(&args.input, &args.config, &out), + Commands::Extract(args) => commands::extract::extract(&args.input, &args.config, &out), #[cfg(feature = "mcp")] Commands::Mcp => mcp::run(), Commands::Completions { shell } => { diff --git a/problemreductions-cli/src/mcp/tools.rs b/problemreductions-cli/src/mcp/tools.rs index e6a5df56..a0e2f113 100644 --- a/problemreductions-cli/src/mcp/tools.rs +++ b/problemreductions-cli/src/mcp/tools.rs @@ -21,7 +21,8 @@ use serde::Serialize; use std::collections::BTreeMap; use crate::dispatch::{ - load_problem, serialize_any_problem, PathStep, ProblemJson, ProblemJsonOutput, ReductionBundle, + load_problem, serialize_any_problem, BundleReplay, PathStep, ProblemJson, ProblemJsonOutput, + ReductionBundle, }; use crate::problem_name::{aliases_for, resolve_problem_ref, unknown_problem_error}; @@ -1511,62 +1512,30 @@ fn solve_problem_inner( /// Solve a reduction bundle: solve the target, then map the solution back. fn solve_bundle_inner(bundle: ReductionBundle, solver_name: &str) -> anyhow::Result { - let target = load_problem( - &bundle.target.problem_type, - &bundle.target.variant, - bundle.target.data.clone(), - )?; - let target_name = target.problem_name(); + let replay = BundleReplay::prepare(&bundle)?; let target_result = match solver_name { - "brute-force" => target.solve_brute_force_witness().ok_or_else(|| { + "brute-force" => replay.target.solve_brute_force_witness().ok_or_else(|| { anyhow::anyhow!( "Bundle solving requires a witness-capable target problem and witness-capable reduction path; {} only supports aggregate-value solving.", - target_name + replay.target_name ) })?, - "ilp" => target.solve_with_ilp()?, - "customized" => target.solve_with_customized()?, + "ilp" => replay.target.solve_with_ilp()?, + "customized" => replay.target.solve_with_customized()?, _ => unreachable!(), }; - let source = load_problem( - &bundle.source.problem_type, - &bundle.source.variant, - bundle.source.data.clone(), - )?; - let source_name = source.problem_name(); - - let graph = ReductionGraph::new(); - - let reduction_path = problemreductions::rules::ReductionPath { - steps: bundle - .path - .iter() - .map(|s| problemreductions::rules::ReductionStep { - name: s.name.clone(), - variant: s.variant.clone(), - }) - .collect(), - }; - - let chain = graph - .reduce_along_path(&reduction_path, source.as_any()) - .ok_or_else(|| anyhow::anyhow!( - "Bundle solving requires a witness-capable reduction path; this bundle cannot recover a source solution." - ))?; - - let source_config = chain.extract_solution(&target_result.config); - let source_eval = source.evaluate_dyn(&source_config); + let (source_config, source_eval) = replay.extract(&target_result.config); let json = serde_json::json!({ - "problem": source_name, + "problem": replay.source_name, "solver": solver_name, - "reduced_to": target_name, + "reduced_to": replay.target_name, "solution": source_config, "evaluation": source_eval, "intermediate": { - "problem": target_name, + "problem": replay.target_name, "solution": target_result.config, "evaluation": target_result.evaluation, }, diff --git a/problemreductions-cli/tests/cli_tests.rs b/problemreductions-cli/tests/cli_tests.rs index 809670b6..4a8c19af 100644 --- a/problemreductions-cli/tests/cli_tests.rs +++ b/problemreductions-cli/tests/cli_tests.rs @@ -8734,3 +8734,477 @@ fn test_inspect_minimum_cardinality_key_lists_customized_solver() { std::fs::remove_file(&problem_file).ok(); } + +/// Solve a bundle with brute-force and return `(target_config_csv, source_evaluation)`. +/// +/// Used by extract tests so they do not depend on the exact reduction path chosen +/// (which differs between `--features mcp` and default builds). +fn extract_test_solve_bundle(bundle_file: &std::path::Path) -> (String, String) { + let solve_out = pred() + .args([ + "--json", + "solve", + bundle_file.to_str().unwrap(), + "--solver", + "brute-force", + ]) + .output() + .unwrap(); + assert!( + solve_out.status.success(), + "solve stderr: {}", + String::from_utf8_lossy(&solve_out.stderr) + ); + let json: serde_json::Value = serde_json::from_slice(&solve_out.stdout).unwrap(); + let target_cfg: Vec = json["intermediate"]["solution"] + .as_array() + .unwrap() + .iter() + .map(|v| v.as_u64().unwrap().to_string()) + .collect(); + let source_eval = json["evaluation"].as_str().unwrap().to_string(); + (target_cfg.join(","), source_eval) +} + +#[test] +fn test_extract_roundtrip_mis_to_qubo() { + let problem_file = std::env::temp_dir().join("pred_test_extract_in.json"); + let bundle_file = std::env::temp_dir().join("pred_test_extract_bundle.json"); + + let create_out = pred() + .args([ + "-o", + problem_file.to_str().unwrap(), + "create", + "MIS", + "--graph", + "0-1,1-2,2-3", + ]) + .output() + .unwrap(); + assert!(create_out.status.success()); + + let reduce_out = pred() + .args([ + "-o", + bundle_file.to_str().unwrap(), + "reduce", + problem_file.to_str().unwrap(), + "--to", + "QUBO", + ]) + .output() + .unwrap(); + assert!( + reduce_out.status.success(), + "reduce stderr: {}", + String::from_utf8_lossy(&reduce_out.stderr) + ); + + // Derive a valid target config from `pred solve`, so this test works + // regardless of which reduction path is chosen (path length varies with + // feature flags — e.g. mcp build picks MIS -> ... -> ILP -> QUBO instead + // of the shorter MaxSetPacking -> QUBO path). + let (target_cfg, expected_source_eval) = extract_test_solve_bundle(&bundle_file); + + let extract_out = pred() + .args([ + "--json", + "extract", + bundle_file.to_str().unwrap(), + "--config", + &target_cfg, + ]) + .output() + .unwrap(); + assert!( + extract_out.status.success(), + "extract stderr: {}", + String::from_utf8_lossy(&extract_out.stderr) + ); + let stdout = String::from_utf8(extract_out.stdout).unwrap(); + let json: serde_json::Value = serde_json::from_str(&stdout).unwrap(); + assert_eq!(json["problem"].as_str().unwrap(), "MaximumIndependentSet"); + assert_eq!(json["reduced_to"].as_str().unwrap(), "QUBO"); + assert_eq!(json["solver"].as_str().unwrap(), "external"); + // extract on pred-solve's own target config must round-trip to the same source evaluation. + assert_eq!(json["evaluation"].as_str().unwrap(), expected_source_eval); + assert_eq!(json["intermediate"]["problem"].as_str().unwrap(), "QUBO"); + + // intermediate.solution must be exactly the target config we passed in + // (extract echoes the input target config unchanged). + let expected_target: Vec = target_cfg + .split(',') + .map(|s| serde_json::json!(s.parse::().unwrap())) + .collect(); + assert_eq!( + json["intermediate"]["solution"].as_array().unwrap(), + &expected_target + ); + + // Source config is over 4 MIS variables and must describe an independent set + // whose size matches `expected_source_eval` (e.g. "Max(2)" -> 2 ones). + let source_sol: Vec = json["solution"] + .as_array() + .unwrap() + .iter() + .map(|v| v.as_u64().unwrap()) + .collect(); + assert_eq!(source_sol.len(), 4); + assert!(source_sol.iter().all(|b| *b == 0 || *b == 1)); + let ones = source_sol.iter().filter(|b| **b == 1).count(); + assert_eq!( + expected_source_eval, + format!("Max({ones})"), + "MIS size in solution should match declared evaluation" + ); + + std::fs::remove_file(&problem_file).ok(); + std::fs::remove_file(&bundle_file).ok(); +} + +#[test] +fn test_extract_rejects_plain_problem_file() { + let problem_file = std::env::temp_dir().join("pred_test_extract_plain.json"); + + let create_out = pred() + .args([ + "-o", + problem_file.to_str().unwrap(), + "create", + "MIS", + "--graph", + "0-1,1-2", + ]) + .output() + .unwrap(); + assert!(create_out.status.success()); + + let extract_out = pred() + .args([ + "extract", + problem_file.to_str().unwrap(), + "--config", + "0,1,0", + ]) + .output() + .unwrap(); + assert!(!extract_out.status.success()); + let stderr = String::from_utf8(extract_out.stderr).unwrap(); + assert!( + stderr.contains("not a reduction bundle"), + "unexpected stderr: {stderr}" + ); + + std::fs::remove_file(&problem_file).ok(); +} + +#[test] +fn test_extract_rejects_wrong_config_length() { + let problem_file = std::env::temp_dir().join("pred_test_extract_wrong_len_in.json"); + let bundle_file = std::env::temp_dir().join("pred_test_extract_wrong_len_bundle.json"); + + pred() + .args([ + "-o", + problem_file.to_str().unwrap(), + "create", + "MIS", + "--graph", + "0-1,1-2", + ]) + .output() + .unwrap(); + pred() + .args([ + "-o", + bundle_file.to_str().unwrap(), + "reduce", + problem_file.to_str().unwrap(), + "--to", + "QUBO", + ]) + .output() + .unwrap(); + + let extract_out = pred() + .args(["extract", bundle_file.to_str().unwrap(), "--config", "0,1"]) + .output() + .unwrap(); + assert!(!extract_out.status.success()); + let stderr = String::from_utf8(extract_out.stderr).unwrap(); + assert!( + stderr.contains("Target config has 2 values"), + "unexpected stderr: {stderr}" + ); + + std::fs::remove_file(&problem_file).ok(); + std::fs::remove_file(&bundle_file).ok(); +} + +#[test] +fn test_extract_rejects_out_of_range_config_value() { + let problem_file = std::env::temp_dir().join("pred_test_extract_range_in.json"); + let bundle_file = std::env::temp_dir().join("pred_test_extract_range_bundle.json"); + + pred() + .args([ + "-o", + problem_file.to_str().unwrap(), + "create", + "MIS", + "--graph", + "0-1,1-2", + ]) + .output() + .unwrap(); + pred() + .args([ + "-o", + bundle_file.to_str().unwrap(), + "reduce", + problem_file.to_str().unwrap(), + "--to", + "QUBO", + ]) + .output() + .unwrap(); + + // Build a valid-length config from pred solve, then flip one entry to 9 + // (always out of range for a binary QUBO regardless of path). + let (target_cfg, _) = extract_test_solve_bundle(&bundle_file); + let mut parts: Vec = target_cfg.split(',').map(|s| s.to_string()).collect(); + parts[0] = "9".to_string(); + let bad_cfg = parts.join(","); + + let extract_out = pred() + .args([ + "extract", + bundle_file.to_str().unwrap(), + "--config", + &bad_cfg, + ]) + .output() + .unwrap(); + assert!(!extract_out.status.success()); + let stderr = String::from_utf8(extract_out.stderr).unwrap(); + assert!( + stderr.contains("out of range"), + "unexpected stderr: {stderr}" + ); + + std::fs::remove_file(&problem_file).ok(); + std::fs::remove_file(&bundle_file).ok(); +} + +#[test] +fn test_extract_rejects_malformed_bundle_path_source_mismatch() { + use std::io::Write; + + let problem_file = std::env::temp_dir().join("pred_test_extract_malformed_in.json"); + let bundle_file = std::env::temp_dir().join("pred_test_extract_malformed_bundle.json"); + let tampered_file = std::env::temp_dir().join("pred_test_extract_malformed_tampered.json"); + + pred() + .args([ + "-o", + problem_file.to_str().unwrap(), + "create", + "MIS", + "--graph", + "0-1,1-2", + ]) + .output() + .unwrap(); + pred() + .args([ + "-o", + bundle_file.to_str().unwrap(), + "reduce", + problem_file.to_str().unwrap(), + "--to", + "QUBO", + ]) + .output() + .unwrap(); + + let bundle_text = std::fs::read_to_string(&bundle_file).unwrap(); + let mut bundle: serde_json::Value = serde_json::from_str(&bundle_text).unwrap(); + // Tamper: make the source type disagree with path[0]. + bundle["source"]["type"] = serde_json::json!("NotTheRealSource"); + let mut f = std::fs::File::create(&tampered_file).unwrap(); + f.write_all(bundle.to_string().as_bytes()).unwrap(); + + let extract_out = pred() + .args([ + "extract", + tampered_file.to_str().unwrap(), + "--config", + "0,1,0", + ]) + .output() + .unwrap(); + assert!( + !extract_out.status.success(), + "expected failure on malformed bundle; stdout: {}", + String::from_utf8_lossy(&extract_out.stdout) + ); + let stderr = String::from_utf8(extract_out.stderr).unwrap(); + assert!( + stderr.contains("Malformed bundle"), + "unexpected stderr: {stderr}" + ); + + std::fs::remove_file(&problem_file).ok(); + std::fs::remove_file(&bundle_file).ok(); + std::fs::remove_file(&tampered_file).ok(); +} + +#[test] +fn test_extract_rejects_tampered_target_data() { + use std::io::Write; + + let problem_file = std::env::temp_dir().join("pred_test_extract_tampered_target_in.json"); + let bundle_file = std::env::temp_dir().join("pred_test_extract_tampered_target_bundle.json"); + let tampered_file = + std::env::temp_dir().join("pred_test_extract_tampered_target_tampered.json"); + + pred() + .args([ + "-o", + problem_file.to_str().unwrap(), + "create", + "MIS", + "--graph", + "0-1,1-2,2-3", + ]) + .output() + .unwrap(); + pred() + .args([ + "-o", + bundle_file.to_str().unwrap(), + "reduce", + problem_file.to_str().unwrap(), + "--to", + "QUBO", + ]) + .output() + .unwrap(); + + // Tamper: flip one QUBO matrix entry so target.data no longer matches + // what the reduction chain actually produces. + let bundle_text = std::fs::read_to_string(&bundle_file).unwrap(); + let mut bundle: serde_json::Value = serde_json::from_str(&bundle_text).unwrap(); + bundle["target"]["data"]["matrix"][0][0] = serde_json::json!(999.0); + let mut f = std::fs::File::create(&tampered_file).unwrap(); + f.write_all(bundle.to_string().as_bytes()).unwrap(); + + // Any config long enough to reach the coherence check; it must fail before + // config validation kicks in because prepare() runs first. + let (target_cfg, _) = extract_test_solve_bundle(&bundle_file); + let extract_out = pred() + .args([ + "extract", + tampered_file.to_str().unwrap(), + "--config", + &target_cfg, + ]) + .output() + .unwrap(); + assert!( + !extract_out.status.success(), + "expected failure on tampered target.data; stdout: {}", + String::from_utf8_lossy(&extract_out.stdout) + ); + let stderr = String::from_utf8(extract_out.stderr).unwrap(); + assert!( + stderr.contains("`target.data` does not match"), + "unexpected stderr: {stderr}" + ); + + // Same check must also fire through `pred solve` on the tampered bundle — + // BundleReplay::prepare is the shared gate. + let solve_out = pred() + .args([ + "solve", + tampered_file.to_str().unwrap(), + "--solver", + "brute-force", + ]) + .output() + .unwrap(); + assert!(!solve_out.status.success()); + let solve_err = String::from_utf8(solve_out.stderr).unwrap(); + assert!( + solve_err.contains("`target.data` does not match"), + "pred solve should also reject tampered bundles; got: {solve_err}" + ); + + std::fs::remove_file(&problem_file).ok(); + std::fs::remove_file(&bundle_file).ok(); + std::fs::remove_file(&tampered_file).ok(); +} + +#[test] +fn test_extract_reads_bundle_from_stdin() { + use std::io::Write; + use std::process::Stdio; + + let problem_file = std::env::temp_dir().join("pred_test_extract_stdin_in.json"); + let bundle_file = std::env::temp_dir().join("pred_test_extract_stdin_bundle.json"); + + pred() + .args([ + "-o", + problem_file.to_str().unwrap(), + "create", + "MIS", + "--graph", + "0-1,1-2,2-3", + ]) + .output() + .unwrap(); + pred() + .args([ + "-o", + bundle_file.to_str().unwrap(), + "reduce", + problem_file.to_str().unwrap(), + "--to", + "QUBO", + ]) + .output() + .unwrap(); + let (target_cfg, _) = extract_test_solve_bundle(&bundle_file); + let bundle_text = std::fs::read_to_string(&bundle_file).unwrap(); + + let mut child = pred() + .args(["--json", "extract", "-", "--config", &target_cfg]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + child + .stdin + .as_mut() + .unwrap() + .write_all(bundle_text.as_bytes()) + .unwrap(); + let output = child.wait_with_output().unwrap(); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + let stdout = String::from_utf8(output.stdout).unwrap(); + let json: serde_json::Value = serde_json::from_str(&stdout).unwrap(); + assert_eq!(json["problem"].as_str().unwrap(), "MaximumIndependentSet"); + assert_eq!(json["reduced_to"].as_str().unwrap(), "QUBO"); + assert_eq!(json["solver"].as_str().unwrap(), "external"); + assert_eq!(json["evaluation"].as_str().unwrap(), "Max(2)"); + + std::fs::remove_file(&problem_file).ok(); + std::fs::remove_file(&bundle_file).ok(); +}