From 9254f944d926a78f9b7e829bc3f2eab6597ae7f2 Mon Sep 17 00:00:00 2001 From: Brendan <2bndy5@gmail.com> Date: Sat, 13 Jun 2026 20:44:46 -0700 Subject: [PATCH 1/2] feat: merge review suggestions from both clang tools This is a culmination of - #354 - #347 It it the last step needed before providing a patch that consumers can push to auto-fix lints. # Intended flow of data 1. run clang-tidy before running clang-format 2. after running clang-tidy save the patched file to cache 3. after running clang-format: - check for clang-tidy patch in cache. - if cached patch is present, run clang-format on the clang-tidy patch. This step includes formatting lines that clang-tidy changed and lines that clang-format changed. - if cached patch is not present (clang-tidy was not run on the file), then cache the clang-format fixes instead. - `--lines-changed-only` is respected, but this may need tweaking in the future. 4. when creating a PR review, only add hunks that are not present in the review comments. I added a parameter to also calculate (and display) how many review comments were reused in previous PR reviews. If PR review is a summary only, then the patch will include all fixes that would have been posted in the diff. Adjusted tests accordingly. --- cpp-linter/src/clang_tools/clang_format.rs | 121 +++++++--- cpp-linter/src/clang_tools/clang_tidy.rs | 29 +-- cpp-linter/src/clang_tools/mod.rs | 259 +++++++++------------ cpp-linter/src/cli/structs.rs | 8 + cpp-linter/src/common_fs.rs | 118 ++++++++-- cpp-linter/src/error.rs | 4 + cpp-linter/src/rest_client.rs | 16 +- cpp-linter/src/run.rs | 4 +- cpp-linter/tests/reviews.rs | 37 ++- docs/pyproject.toml | 2 +- 10 files changed, 342 insertions(+), 256 deletions(-) diff --git a/cpp-linter/src/clang_tools/clang_format.rs b/cpp-linter/src/clang_tools/clang_format.rs index aac933a0..01ec3681 100644 --- a/cpp-linter/src/clang_tools/clang_format.rs +++ b/cpp-linter/src/clang_tools/clang_format.rs @@ -4,36 +4,22 @@ use std::{ fs, ops::RangeInclusive, - path::PathBuf, process::Command, sync::{Arc, Mutex, MutexGuard}, }; -use gix_imara_diff::{Diff, InternedInput}; use log::Level; // project-specific crates/modules -use super::{CACHE_DIR, MakeSuggestions}; -use crate::{cli::ClangParams, common_fs::FileObj, error::ClangCaptureError}; +use crate::{ + clang_tools::make_patch, cli::ClangParams, common_fs::FileObj, error::ClangCaptureError, +}; /// A struct to hold clang-format advice for a single file. #[derive(Debug, Clone, PartialEq, Eq, Default)] pub struct FormatAdvice { /// A list of line ranges that clang-format wants to replace. pub replacements: Vec>, - - /// A path to a cached file containing the full contents of the file after applying clang-format fixes. - pub patched: PathBuf, -} - -impl MakeSuggestions for FormatAdvice { - fn get_suggestion_help(&self, _start_line: u32, _end_line: u32) -> String { - String::from("### clang-format suggestions\n") - } - - fn get_tool_name(&self) -> String { - "clang-format".to_string() - } } /// Get a string that summarizes the given `--style` @@ -82,14 +68,7 @@ pub fn run_clang_format( for range in &ranges { cmd.arg(format!("--lines={}:{}", range.start(), range.end())); } - let cache_path = clang_params.repo_root.join(CACHE_DIR).join("patches"); - let cache_format_fixes = cache_path.join(file.name.with_added_extension("format")); - fs::create_dir_all( - cache_format_fixes - .parent() - .ok_or(ClangCaptureError::UnknownCacheParentPath)?, - ) - .map_err(ClangCaptureError::MkDirFailed)?; + let cache_path = clang_params.get_cache_path(); let file_name = file.name.to_string_lossy().to_string(); cmd.arg(file.name.to_path_buf().as_os_str()); logs.push(( @@ -109,12 +88,6 @@ pub fn run_clang_format( task: format!("get fixes from clang-format {file_name}"), source: e, })?; - fs::write(&cache_format_fixes, &output.stdout).map_err(|e| { - ClangCaptureError::WriteFileFailed { - file_name: cache_format_fixes.to_string_lossy().to_string(), - source: e, - } - })?; if !output.stderr.is_empty() || !output.status.success() { logs.push(( @@ -140,9 +113,7 @@ pub fn run_clang_format( source: e, } })?; - let input = InternedInput::new(original_contents.as_str(), patched_contents.as_str()); - let mut diff = Diff::compute(gix_imara_diff::Algorithm::Histogram, &input); - diff.postprocess_lines(&input); + let (diff, _) = make_patch(&patched_contents, &original_contents); let format_advice = FormatAdvice { replacements: diff .hunks() @@ -166,8 +137,88 @@ pub fn run_clang_format( } }) .collect(), - patched: cache_format_fixes, }; + + // if a clang-tidy patched file exists in cache, + // get the diff between it and the original file, + // then format both clang-tidy fixes and any other changes by clang-format fixes. + if let Some(patched_path) = &file.patched_path + && patched_path.exists() + { + let tidy_patch_contents = + fs::read_to_string(patched_path).map_err(|e| ClangCaptureError::ReadFileFailed { + file_name: patched_path.to_string_lossy().to_string(), + source: e, + })?; + let (tidy_diff, _) = make_patch(&tidy_patch_contents, &original_contents); + let mut cmd = Command::new(cmd_path); + cmd.current_dir(&cache_path); + // edit the clang-tody patched file in-place (`-i`) + cmd.args(["--style", &clang_params.style, "-i"]); + // if ranges is empty, then we're just formatting the entire file. + if !ranges.is_empty() { + // We're concerned about formatting what clang-tidy changed (tidy_diff.hunks().before), + // but we also want to include any clang-format changes that do not overlap clang-tidy fixes. + let mut joint_ranges = tidy_diff + .hunks() + // hunk is partially inclusive: [start, end), + // but clang-format expects fully inclusive line ranges. + // subtract 1 from hunk.before.end + .map(|hunk| { + RangeInclusive::new(hunk.before.start, hunk.before.end.saturating_sub(1)) + }) + .collect::>(); + for range in &ranges { + let mut contained = false; + for hunk in tidy_diff.hunks() { + if hunk.before.contains(range.start()) && hunk.before.contains(range.end()) { + contained = true; + break; + } + } + if !contained { + joint_ranges.push(range.clone()); + } + } + for range in &joint_ranges { + cmd.arg(format!("--lines={}:{}", range.start(), range.end()).as_str()); + } + } + cmd.arg(&file_name); + let output = cmd + .output() + .map_err(|e| ClangCaptureError::FailedToRunCommand { + task: format!("apply clang-format to clang-tidy fixes ({file_name})"), + source: e, + })?; + if !output.stderr.is_empty() || !output.status.success() { + logs.push(( + log::Level::Debug, + format!( + "clang-format raised the follow errors about clang-tidy fixes:\n{}", + String::from_utf8_lossy(&output.stderr) + ), + )); + } + } else { + // clang-tidy was not run on this file, + // so just use the clang-format fixes as the patched content. + let cache_format_fixes = cache_path.join(&file.name); + fs::create_dir_all( + cache_format_fixes + .parent() + .ok_or(ClangCaptureError::UnknownCacheParentPath)?, + ) + .map_err(ClangCaptureError::MkDirFailed)?; + fs::write(&cache_format_fixes, &output.stdout).map_err(|e| { + ClangCaptureError::WriteFileFailed { + file_name: cache_format_fixes.to_string_lossy().to_string(), + source: e, + } + })?; + file.patched_path = Some(cache_format_fixes); + } + file.format_advice = Some(format_advice); Ok(logs) } diff --git a/cpp-linter/src/clang_tools/clang_tidy.rs b/cpp-linter/src/clang_tools/clang_tidy.rs index 6d87adc7..fdf0a9af 100644 --- a/cpp-linter/src/clang_tools/clang_tidy.rs +++ b/cpp-linter/src/clang_tools/clang_tidy.rs @@ -15,10 +15,7 @@ use regex::Regex; use serde::Deserialize; // project-specific modules/crates -use super::MakeSuggestions; -use crate::{ - clang_tools::CACHE_DIR, cli::ClangParams, common_fs::FileObj, error::ClangCaptureError, -}; +use crate::{cli::ClangParams, common_fs::FileObj, error::ClangCaptureError}; /// Used to deserialize a json compilation database's translation unit. /// @@ -104,13 +101,10 @@ impl TidyNotification { pub struct TidyAdvice { /// A list of notifications parsed from clang-tidy stdout. pub notes: Vec, - - /// A path to the cached contents of the file after applying clang-tidy fixes. - pub patched: PathBuf, } -impl MakeSuggestions for TidyAdvice { - fn get_suggestion_help(&self, start_line: u32, end_line: u32) -> String { +impl TidyAdvice { + pub(crate) fn get_suggestion_help(&self, start_line: u32, end_line: u32) -> String { let mut diagnostics = vec![]; for note in &self.notes { for fixed_line in ¬e.fixed_lines { @@ -133,10 +127,6 @@ impl MakeSuggestions for TidyAdvice { diagnostics.join("") ) } - - fn get_tool_name(&self) -> String { - "clang-tidy".to_string() - } } /// A regex pattern to capture the clang-tidy note header. @@ -336,10 +326,8 @@ pub fn run_clang_tidy( .join(" ") ), )); - let cache_patch_path = clang_params - .repo_root - .join(CACHE_DIR) - .join(file.name.with_added_extension("tidy")); + let cache_path = clang_params.get_cache_path(); + let cache_patch_path = cache_path.join(&file.name); fs::create_dir_all( cache_patch_path .parent() @@ -403,10 +391,8 @@ pub fn run_clang_tidy( &clang_params.repo_root, )?; - let tidy_advice = TidyAdvice { - notes, - patched: cache_patch_path.to_path_buf(), - }; + let tidy_advice = TidyAdvice { notes }; + file.patched_path = Some(cache_patch_path.to_path_buf()); file.tidy_advice = Some(tidy_advice); Ok(logs) } @@ -539,6 +525,7 @@ mod test { repo_root: tmp_workspace.path().to_path_buf(), }; let mut file_lock = arc_file.lock().unwrap(); + fs::create_dir_all(clang_params.get_cache_path()).unwrap(); let logs = run_clang_tidy(&mut file_lock, &clang_params) .unwrap() .into_iter() diff --git a/cpp-linter/src/clang_tools/mod.rs b/cpp-linter/src/clang_tools/mod.rs index e4f25b2a..24483f9a 100644 --- a/cpp-linter/src/clang_tools/mod.rs +++ b/cpp-linter/src/clang_tools/mod.rs @@ -10,7 +10,7 @@ use std::{ // non-std crates use clang_tools_manager::{ClangTool, RequestedVersion}; use git_bot_feedback::ReviewComment; -use gix_imara_diff::{BasicLineDiffPrinter, Diff, InternedInput, UnifiedDiffConfig}; +use gix_imara_diff::{Diff, InternedInput}; use semver::Version; use tokio::task::JoinSet; @@ -27,9 +27,6 @@ use clang_format::run_clang_format; pub mod clang_tidy; use clang_tidy::run_clang_tidy; -/// The directory name to use for caching clang-tidy and clang-format results. -pub const CACHE_DIR: &str = ".cpp-linter-cache"; - /// This creates a task to run clang-tidy and clang-format on a single file. /// /// Returns a Future that infallibly resolves to a 2-tuple that contains @@ -44,39 +41,39 @@ fn analyze_single_file( ) -> Result<(PathBuf, Vec<(log::Level, String)>), ClangCaptureError> { let mut file = file.lock().map_err(|_| ClangCaptureError::MutexPoisoned)?; let mut logs = vec![]; - if clang_params.clang_format_command.is_some() { + if clang_params.clang_tidy_command.is_some() { if clang_params - .format_filter + .tidy_filter .as_ref() .is_some_and(|f| f.is_qualified(file.name.as_path())) - || clang_params.format_filter.is_none() + || clang_params.tidy_filter.is_none() { - let format_result = run_clang_format(&mut file, &clang_params)?; - logs.extend(format_result); + let tidy_result = run_clang_tidy(&mut file, &clang_params)?; + logs.extend(tidy_result); } else { logs.push(( log::Level::Info, format!( - "{} not scanned by clang-format due to `--ignore-format`", + "{} not scanned by clang-tidy due to `--ignore-tidy`", file.name.as_os_str().to_string_lossy() ), )); } } - if clang_params.clang_tidy_command.is_some() { + if clang_params.clang_format_command.is_some() { if clang_params - .tidy_filter + .format_filter .as_ref() .is_some_and(|f| f.is_qualified(file.name.as_path())) - || clang_params.tidy_filter.is_none() + || clang_params.format_filter.is_none() { - let tidy_result = run_clang_tidy(&mut file, &clang_params)?; - logs.extend(tidy_result); + let format_result = run_clang_format(&mut file, &clang_params)?; + logs.extend(format_result); } else { logs.push(( log::Level::Info, format!( - "{} not scanned by clang-tidy due to `--ignore-tidy`", + "{} not scanned by clang-format due to `--ignore-format`", file.name.as_os_str().to_string_lossy() ), )); @@ -200,7 +197,11 @@ pub struct Suggestion { impl Suggestion { pub(crate) fn as_review_comment(&self) -> ReviewComment { ReviewComment { - line_start: Some(self.line_start), + line_start: if self.line_start == self.line_end { + None + } else { + Some(self.line_start) + }, line_end: self.line_end, comment: self.suggestion.clone(), path: self.path.clone(), @@ -215,7 +216,7 @@ pub struct ReviewComments { /// /// This differs from `comments.len()` because some suggestions may /// not fit within the file's diff. - pub tool_total: [Option; 2], + pub tool_total: u32, /// A list of comment suggestions to be posted. /// /// These suggestions are guaranteed to fit in the file's diff. @@ -224,68 +225,83 @@ pub struct ReviewComments { /// /// This includes changes from both clang-tidy and clang-format /// (assembled in that order). - pub full_patch: [String; 2], + pub full_patch: String, } impl ReviewComments { /// Get a markdown-formatted string that summarizes the given [`ReviewComment`]s. + /// + /// The total_review_comments parameter describes the number of comments before + /// removing duplicates found in previous reviews. pub fn summarize( &self, clang_versions: &ClangVersions, - comments: &Vec, + comments: &[ReviewComment], + total_review_comments: u32, + summary_only: bool, ) -> String { let mut body = String::from("## Cpp-linter Review\n"); - for t in 0_usize..=1 { - let mut total = 0; - let (tool_name, tool_version) = if t == 0 { - ("clang-format", clang_versions.format_version.as_ref()) - } else { - ("clang-tidy", clang_versions.tidy_version.as_ref()) - }; - if tool_version.is_none() { - // this tool was not used at all - continue; - } - let tool_total = self.tool_total[t].unwrap_or_default(); - - // If the tool's version is unknown, then we don't need to output this line. - // NOTE: If the tool was invoked at all, then the tool's version shall be known. - if let Some(ver_str) = tool_version { - body.push_str(format!("\n### Used {tool_name} v{ver_str}\n").as_str()); - } - for comment in comments { - if comment - .comment - .contains(format!("### {tool_name}").as_str()) - { - total += 1; - } + let versions = [ + ( + ClangTool::ClangFormat, + clang_versions.format_version.as_ref(), + ), + (ClangTool::ClangTidy, clang_versions.tidy_version.as_ref()), + ]; + for (tool_name, tool_version) in versions { + if let Some(ver) = tool_version { + body.push_str(format!("### Used {tool_name} v{ver}\n").as_str()); } + } - if total != tool_total { - body.push_str( - format!( - "\nOnly {total} out of {tool_total} {tool_name} concerns fit within this pull request's diff.\n", - ) - .as_str(), - ); - } - if !self.full_patch[t].is_empty() { - body.push_str( - format!( - "\n
Click here for the full {tool_name} patch\n\n```diff\n{}```\n\n
\n", - self.full_patch[t] - ).as_str() - ); - } else { - body.push_str( - format!( - "\nNo concerns reported by {}. Great job! :tada:\n", - tool_name - ) - .as_str(), + let total = comments.len() as u32; + if summary_only && self.tool_total > 0 { + body.push_str( + format!( + "\nFound {} areas of concern according to clang tools output.\n", + self.tool_total + ) + .as_str(), + ); + } + if !summary_only && total_review_comments != self.tool_total { + body.push_str( + format!( + "\nOnly {total_review_comments} out of {} concerns fit within this pull request's diff.\n", + self.tool_total, ) + .as_str(), + ); + } + // total number of comments can only go down after culling comments found in previous reviews. + if total_review_comments > total { + body.push_str( + format!( + "\n{} suggestions were duplicates of previous reviews.\n", + total_review_comments - total, + ) + .as_str(), + ); + } + // The `full_patch` includes all suggestions that didn't fit in the diff. + // It can also contain suggestions that were duplicates of previous reviews. + if !self.full_patch.is_empty() { + body.push_str("\n
Click here for "); + if summary_only { + body.push_str("the full patch of fixes"); + } else { + body.push_str("a patch of fixes outside the diff"); } + body.push_str( + format!( + "

\n\n```diff\n{}```\n\n

\n", + self.full_patch + ) + .as_str(), + ); + } else if total_review_comments == 0 { + // Only congratulate if there was no reused comments + body.push_str("\nNo concerns to report. Great job! :tada:\n"); } body.push_str(USER_OUTREACH); body @@ -319,95 +335,14 @@ pub fn make_patch<'buffer>( (diff, input) } -/// A trait for generating suggestions from a [`FileObj`]'s advice's generated `patched` buffer. -pub trait MakeSuggestions { - /// Create some user-facing helpful info about what the suggestion aims to resolve. - fn get_suggestion_help(&self, start_line: u32, end_line: u32) -> String; - - /// Get the tool's name which generated the advice. - fn get_tool_name(&self) -> String; - - /// Create a bunch of suggestions from a [`FileObj`]'s advice's generated `patched` buffer. - fn get_suggestions( - &self, - review_comments: &mut ReviewComments, - file_obj: &FileObj, - diff: &Diff, - input: &InternedInput<&str>, - summary_only: bool, - ) { - let is_tidy_tool = (&self.get_tool_name() == "clang-tidy") as usize; - let file_name = file_obj - .name - .to_string_lossy() - .replace("\\", "/") - .trim_start_matches("./") - .to_owned(); - let mut config = UnifiedDiffConfig::default(); - config.context_len(0); - let printer = BasicLineDiffPrinter(&input.interner); - let unified_diff = diff.unified_diff(&printer, config, input).to_string(); - if !unified_diff.is_empty() { - let patch_buf = format!("--- a/{file_name}\n+++ b/{file_name}\n{unified_diff}"); - review_comments.full_patch[is_tidy_tool].push_str(patch_buf.as_str()); - } - if summary_only { - review_comments.tool_total[is_tidy_tool].get_or_insert(0); - return; - } - let mut hunks_in_patch = 0u32; - for hunk in diff.hunks() { - hunks_in_patch += 1; - let hunk_range = file_obj.is_hunk_in_diff(&hunk); - match hunk_range { - None => continue, - Some((start_line, end_line)) => { - let mut suggestion = String::new(); - let suggestion_help = self.get_suggestion_help(start_line, end_line); - if hunk.is_pure_removal() { - suggestion.push_str( - format!( - "Please remove the line(s)\n- {}", - hunk.before - .map(|l| l.to_string()) - .collect::>() - .join("\n- ") - ) - .as_str(), - ); - } else { - suggestion.push_str("```suggestion\n"); - for token in - &input.after[hunk.after.start as usize..hunk.after.end as usize] - { - let line = &input.interner[*token]; - suggestion.push_str(line); - } - suggestion.push_str("```\n"); - } - let comment = Suggestion { - line_start: start_line, - line_end: end_line, - suggestion: format!("{suggestion_help}\n{suggestion}"), - path: file_name.clone(), - }; - if !review_comments.is_comment_in_suggestions(&comment) { - review_comments.comments.push(comment); - } - } - } - } - let tool_total = review_comments.tool_total[is_tidy_tool].get_or_insert(0); - *tool_total += hunks_in_patch; - } -} - #[cfg(test)] mod tests { #![allow(clippy::unwrap_used)] use std::{env, fs, path::Path}; + use git_bot_feedback::ReviewComment; + use super::*; #[cfg(feature = "bin")] use crate::logger::try_init; @@ -441,4 +376,28 @@ mod tests { fs::write(&db_path, "not a valid json").unwrap(); test_db_parse(tmp_dir.path()).await.unwrap(); } + + #[test] + fn summarize_reused_reviews() { + let comments = vec![ReviewComment { + line_start: Some(1), + line_end: 1, + comment: "First comment".to_string(), + path: "src/demo.cpp".to_string(), + }]; + let pseudo_version = Version::new(15, 0, 0); + let clang_versions = ClangVersions { + format_version: Some(pseudo_version.clone()), + tidy_version: Some(pseudo_version), + }; + let total_review_comments = 2; + let summary_only = false; + let review_summary = ReviewComments::default().summarize( + &clang_versions, + &comments, + total_review_comments, + summary_only, + ); + assert!(review_summary.contains("suggestions were duplicates of previous reviews")); + } } diff --git a/cpp-linter/src/cli/structs.rs b/cpp-linter/src/cli/structs.rs index 37254b76..f19073cd 100644 --- a/cpp-linter/src/cli/structs.rs +++ b/cpp-linter/src/cli/structs.rs @@ -230,6 +230,14 @@ pub struct ClangParams { pub repo_root: PathBuf, } +impl ClangParams { + /// The directory name to use for caching clang-tidy and clang-format results. + pub(crate) const CACHE_DIR: &str = ".cpp-linter-cache"; + pub(crate) fn get_cache_path(&self) -> PathBuf { + self.repo_root.join(Self::CACHE_DIR).join("patched") + } +} + #[cfg(feature = "bin")] impl From<&Cli> for ClangParams { /// Construct a [`ClangParams`] instance from a [`Cli`] instance. diff --git a/cpp-linter/src/common_fs.rs b/cpp-linter/src/common_fs.rs index 419b787c..82cbda60 100644 --- a/cpp-linter/src/common_fs.rs +++ b/cpp-linter/src/common_fs.rs @@ -8,12 +8,13 @@ use std::{ path::{Path, PathBuf}, }; -use gix_imara_diff::Hunk; +use gix_imara_diff::{ + BasicLineDiffPrinter, Diff, Hunk, InternedInput, UnifiedDiffConfig, UnifiedDiffPrinter, +}; use crate::{ clang_tools::{ - MakeSuggestions, ReviewComments, Suggestion, clang_format::FormatAdvice, - clang_tidy::TidyAdvice, make_patch, + ReviewComments, Suggestion, clang_format::FormatAdvice, clang_tidy::TidyAdvice, make_patch, }, cli::LinesChangedOnly, error::FileObjError, @@ -39,6 +40,9 @@ pub struct FileObj { /// The collection of clang-format advice for this file. pub tidy_advice: Option, + + /// A path to a cached file with all/any patches applied. + pub(crate) patched_path: Option, } impl FileObj { @@ -53,6 +57,7 @@ impl FileObj { diff_chunks: Vec::>::new(), format_advice: None, tidy_advice: None, + patched_path: None, } } @@ -75,6 +80,7 @@ impl FileObj { diff_chunks, format_advice: None, tidy_advice: None, + patched_path: None, } } @@ -168,24 +174,23 @@ impl FileObj { summary_only: bool, repo_root: &Path, ) -> Result<(), FileObjError> { + let patched = match &self.patched_path { + Some(patched_path) if patched_path.exists() => { + fs::read_to_string(patched_path).map_err(FileObjError::ReadFile)? + } + _ => return Ok(()), + }; let original_content = fs::read_to_string(repo_root.join(&self.name)).map_err(FileObjError::ReadFile)?; + let (diff, input) = make_patch(patched.as_str(), &original_content); let file_name = self.name.to_str().unwrap_or_default().replace("\\", "/"); - if let Some(advice) = &self.format_advice { - let patched = fs::read_to_string(&advice.patched).map_err(FileObjError::ReadFile)?; - let (diff, input) = make_patch(patched.as_str(), &original_content); - advice.get_suggestions(review_comments, self, &diff, &input, summary_only); - } + self.get_suggestions(review_comments, &diff, &input, summary_only) + .map_err(FileObjError::DisplayStringFailed)?; + if summary_only { + return Ok(()); + } if let Some(advice) = &self.tidy_advice { - let patched = fs::read_to_string(&advice.patched).map_err(FileObjError::ReadFile)?; - let (diff, input) = make_patch(patched.as_str(), &original_content); - advice.get_suggestions(review_comments, self, &diff, &input, summary_only); - - if summary_only { - return Ok(()); - } - // now check for clang-tidy warnings with no fixes applied let file_ext = self .name @@ -234,9 +239,86 @@ impl FileObj { } } } - review_comments.tool_total[1] = - Some(review_comments.tool_total[1].unwrap_or_default() + total); + review_comments.tool_total += total; + } + Ok(()) + } + + /// Create a bunch of suggestions from a [`FileObj`]'s advice's generated `patched` buffer. + fn get_suggestions( + &self, + review_comments: &mut ReviewComments, + diff: &Diff, + input: &InternedInput<&str>, + summary_only: bool, + ) -> Result<(), std::fmt::Error> { + let file_name = self + .name + .to_string_lossy() + .replace("\\", "/") + .trim_start_matches("./") + .to_owned(); + let mut config = UnifiedDiffConfig::default(); + config.context_len(0); + let printer = BasicLineDiffPrinter(&input.interner); + let mut patch_buff = String::new(); + let mut hunks_in_patch = 0u32; + for hunk in diff.hunks() { + hunks_in_patch += 1; + let hunk_range = self.is_hunk_in_diff(&hunk); + match hunk_range { + Some((start_line, end_line)) if !summary_only => { + let mut suggestion = String::new(); + let suggestion_help = self + .tidy_advice + .as_ref() + .map(|t| t.get_suggestion_help(start_line, end_line)) + .unwrap_or_default(); + if hunk.is_pure_removal() { + suggestion.push_str( + format!( + "Please remove the line(s)\n- {}", + hunk.before + .map(|l| l.to_string()) + .collect::>() + .join("\n- ") + ) + .as_str(), + ); + } else { + suggestion.push_str("```suggestion\n"); + for token in + &input.after[hunk.after.start as usize..hunk.after.end as usize] + { + let line = &input.interner[*token]; + suggestion.push_str(line); + } + suggestion.push_str("```\n"); + } + let comment = Suggestion { + line_start: start_line, + line_end: end_line, + suggestion: format!("{suggestion_help}\n{suggestion}"), + path: file_name.clone(), + }; + if !review_comments.is_comment_in_suggestions(&comment) { + review_comments.comments.push(comment); + } + } + _ => { + printer.display_hunk( + &mut patch_buff, + &input.before[hunk.before.start as usize..hunk.before.end as usize], + &input.after[hunk.after.start as usize..hunk.after.end as usize], + )?; + } + } + } + if !patch_buff.is_empty() { + let patch_buf = format!("--- a/{file_name}\n+++ b/{file_name}\n{patch_buff}"); + review_comments.full_patch.push_str(patch_buf.as_str()); } + review_comments.tool_total += hunks_in_patch; Ok(()) } } diff --git a/cpp-linter/src/error.rs b/cpp-linter/src/error.rs index 32bd094f..ade82831 100644 --- a/cpp-linter/src/error.rs +++ b/cpp-linter/src/error.rs @@ -13,6 +13,10 @@ pub enum FileObjError { /// Error when failing to convert a file's contents to a UTF-8 string. #[error("Failed to convert patch buffer to UTF-8 string for file {0}: {1}")] FromUtf8Error(String, #[source] std::string::FromUtf8Error), + + /// Error when failing to generate a patch for a file. + #[error("Failed to print a hunk to a string buffer: {0}")] + DisplayStringFailed(#[source] std::fmt::Error), } /// Errors related to the REST client used for posting feedback and special logging. diff --git a/cpp-linter/src/rest_client.rs b/cpp-linter/src/rest_client.rs index 0c83d0eb..2038cc69 100644 --- a/cpp-linter/src/rest_client.rs +++ b/cpp-linter/src/rest_client.rs @@ -215,8 +215,9 @@ impl RestClient { ..Default::default() }; + let total_review_comments = options.comments.len() as u32; self.client.cull_pr_reviews(&mut options).await?; - let has_changes = review_comments.full_patch.iter().any(|p| !p.is_empty()); + let has_changes = review_comments.tool_total > 0; options.action = if feedback_inputs.passive_reviews { ReviewAction::Comment } else if options.comments.is_empty() && !has_changes { @@ -224,7 +225,12 @@ impl RestClient { } else { ReviewAction::RequestChanges }; - options.summary = review_comments.summarize(&clang_versions, &options.comments); + options.summary = review_comments.summarize( + &clang_versions, + &options.comments, + total_review_comments, + summary_only, + ); self.client.post_pr_review(&options).await?; } Ok(format_checks_failed + tidy_checks_failed) @@ -521,14 +527,10 @@ mod test { suggestion: vec![], fixed_lines: vec![], }]; - file.tidy_advice = Some(TidyAdvice { - notes, - patched: PathBuf::new(), - }); + file.tidy_advice = Some(TidyAdvice { notes }); file.format_advice = Some(FormatAdvice { #[allow(clippy::single_range_in_vec_init)] replacements: vec![1..=2], - patched: PathBuf::new(), }); files.push(Arc::new(Mutex::new(file))); } diff --git a/cpp-linter/src/run.rs b/cpp-linter/src/run.rs index ec2b29d9..161d52dc 100644 --- a/cpp-linter/src/run.rs +++ b/cpp-linter/src/run.rs @@ -18,7 +18,7 @@ use log::{LevelFilter, set_max_level}; // project specific modules/crates use crate::{ - clang_tools::{CACHE_DIR, capture_clang_tools_output}, + clang_tools::capture_clang_tools_output, cli::{ClangParams, Cli, CliCommand, FeedbackInput, LinesChangedOnly}, common_fs::FileObj, logger, @@ -155,7 +155,7 @@ pub async fn run_main(args: Vec) -> Result<()> { let mut clang_params = ClangParams::from(&cli); // mkdir -p .cpp-linter-cache/ - let cache_dir = clang_params.repo_root.join(CACHE_DIR); + let cache_dir = clang_params.repo_root.join(ClangParams::CACHE_DIR); std::fs::create_dir_all(&cache_dir) .with_context(|| "Failed to create a local cache directory.")?; // add gitignore file in project cache dir diff --git a/cpp-linter/tests/reviews.rs b/cpp-linter/tests/reviews.rs index 3d6134d7..00d30010 100644 --- a/cpp-linter/tests/reviews.rs +++ b/cpp-linter/tests/reviews.rs @@ -59,17 +59,6 @@ impl Default for TestParams { } } -fn generate_tool_summary(review_enabled: bool, force_lgtm: bool, tool_name: &str) -> String { - if !review_enabled { - return String::new(); - } - if force_lgtm { - format!("No concerns reported by {}. Great job! :tada:", tool_name) - } else { - format!("Click here for the full {} patch", tool_name) - } -} - async fn setup(lib_root: &Path, tmp_dir: &TempDir, test_params: &TestParams) { let mut event_payload_path = NamedTempFile::new_in("./").unwrap(); let event_payload = if test_params.bad_pr_info { @@ -185,18 +174,22 @@ async fn setup(lib_root: &Path, tmp_dir: &TempDir, test_params: &TestParams) { } else { "REQUEST_CHANGES" }; - let tidy_summary = generate_tool_summary( - test_params.tidy_review, - test_params.force_lgtm, - "clang-tidy", - ); - let format_summary = generate_tool_summary( - test_params.format_review, - test_params.force_lgtm, - "clang-format", - ); + let tool_summary = { + if !(test_params.format_review || test_params.tidy_review) + || test_params.lines_changed_only == LinesChangedOnly::On + { + // results can be non-deterministic because different clang-versions output different diagnostic/changes + ".*" // match anything + } else if test_params.force_lgtm { + ".*No concerns to report. Great job! :tada:.*" + } else if test_params.summary_only { + ".*Click here for the full patch of fixes.*" + } else { + ".*Click here for a patch of fixes outside the diff.*" + } + }; let review_summary = format!( - "{}## Cpp-linter Review.*{format_summary}.*{tidy_summary}.*{}", + "{}## Cpp-linter Review{tool_summary}{}", regex::escape(format!("{}", COMMENT_MARKER.escape_default()).as_str()), regex::escape(format!("{}", USER_OUTREACH.escape_default()).as_str()), ); diff --git a/docs/pyproject.toml b/docs/pyproject.toml index ccb5fbcd..e7ec71c2 100644 --- a/docs/pyproject.toml +++ b/docs/pyproject.toml @@ -24,7 +24,7 @@ docs = [ "mkdocs==1.6.1", "mkdocs-gen-files==0.6.0", "mkdocs-include-markdown-plugin==7.2.0", - "mkdocs-material==9.7.1", + "mkdocs-material==9.7.6", "pyyaml==6.0.3", ] From aa76d5766cd6fd298575bbab5836719a935231d3 Mon Sep 17 00:00:00 2001 From: Brendan <2bndy5@gmail.com> Date: Wed, 17 Jun 2026 18:46:00 -0700 Subject: [PATCH 2/2] impl 3-way diff and add tests and some other review changes --- cpp-linter/src/clang_tools/clang_format.rs | 184 +++++++++++++++++---- cpp-linter/src/common_fs.rs | 15 +- 2 files changed, 166 insertions(+), 33 deletions(-) diff --git a/cpp-linter/src/clang_tools/clang_format.rs b/cpp-linter/src/clang_tools/clang_format.rs index 01ec3681..65716f78 100644 --- a/cpp-linter/src/clang_tools/clang_format.rs +++ b/cpp-linter/src/clang_tools/clang_format.rs @@ -8,6 +8,7 @@ use std::{ sync::{Arc, Mutex, MutexGuard}, }; +use gix_imara_diff::Diff; use log::Level; // project-specific crates/modules @@ -145,41 +146,20 @@ pub fn run_clang_format( if let Some(patched_path) = &file.patched_path && patched_path.exists() { - let tidy_patch_contents = - fs::read_to_string(patched_path).map_err(|e| ClangCaptureError::ReadFileFailed { - file_name: patched_path.to_string_lossy().to_string(), - source: e, - })?; - let (tidy_diff, _) = make_patch(&tidy_patch_contents, &original_contents); let mut cmd = Command::new(cmd_path); cmd.current_dir(&cache_path); // edit the clang-tody patched file in-place (`-i`) cmd.args(["--style", &clang_params.style, "-i"]); // if ranges is empty, then we're just formatting the entire file. if !ranges.is_empty() { - // We're concerned about formatting what clang-tidy changed (tidy_diff.hunks().before), - // but we also want to include any clang-format changes that do not overlap clang-tidy fixes. - let mut joint_ranges = tidy_diff - .hunks() - // hunk is partially inclusive: [start, end), - // but clang-format expects fully inclusive line ranges. - // subtract 1 from hunk.before.end - .map(|hunk| { - RangeInclusive::new(hunk.before.start, hunk.before.end.saturating_sub(1)) - }) - .collect::>(); - for range in &ranges { - let mut contained = false; - for hunk in tidy_diff.hunks() { - if hunk.before.contains(range.start()) && hunk.before.contains(range.end()) { - contained = true; - break; - } - } - if !contained { - joint_ranges.push(range.clone()); + let tidy_patch_contents = fs::read_to_string(patched_path).map_err(|e| { + ClangCaptureError::ReadFileFailed { + file_name: patched_path.to_string_lossy().to_string(), + source: e, } - } + })?; + let (tidy_diff, _) = make_patch(&tidy_patch_contents, &original_contents); + let joint_ranges = three_way_diff(&ranges, tidy_diff); for range in &joint_ranges { cmd.arg(format!("--lines={}:{}", range.start(), range.end()).as_str()); } @@ -223,11 +203,119 @@ pub fn run_clang_format( Ok(logs) } +/// Essentially does a three way diff without the original source that generated the given `ranges` (simplified hunks). +/// +/// The returned list of ranges are lines that need formatting in the clang-tidy patched file, +/// provided by the `tidy_diff`. The given `ranges` are the line numbers in the original file +/// that clang-tidy patched. +fn three_way_diff(ranges: &[RangeInclusive], tidy_diff: Diff) -> Vec> { + // We're concerned about the formatting cases: + // + // 1. changes that clang-tidy made: `tidy_diff.hunks().after` + // 2. changes in the current CI event's diff (`ranges`) + // that clang-tidy did not touch (`tidy_diff.hunks().before`) + // 3. changes that do not overlap clang-tidy fixes: `ranges` - `tidy_diff.hunks().before` + // 4. changes that overlap with clang-tidy fixes. This one is complex because + // - tidy fixes can prefix an og range + // - tidy fixes can suffix an og range + // - tidy fixes can be contained within an og range + // - multiple tidy fixes can (in order) suffix, be contained within, and prefix an og range + let mut joint_ranges = vec![]; + let mut tidy_iter = tidy_diff.hunks().peekable(); + let mut line_shift = 0i32; + + /// Prevent pure removals from causing invalid inclusive ranges. + fn maybe_push_range(joint_ranges: &mut Vec>, start: u32, end: u32) { + if start <= end { + joint_ranges.push(RangeInclusive::new(start, end)); + } + } + + for og_range in ranges { + let og_start = *og_range.start(); + let og_end = *og_range.end(); + + // track the start and end of a merged range that gets pushed into joint_ranges. + let mut merged_start = (og_start as i32 + line_shift) as u32; + let mut merged_end = (og_end as i32 + line_shift) as u32; + + while let Some(tidy_hunk) = tidy_iter.peek() { + // alias for readability and prevent some repeated calculations + let before_start = tidy_hunk.before.start; + let before_end = tidy_hunk.before.end.saturating_sub(1); + let after_start = tidy_hunk.after.start; + let after_end = tidy_hunk.after.end.saturating_sub(1); + let delta = tidy_hunk.after.len() as i32 - tidy_hunk.before.len() as i32; + + // The tidy hunk is a pure removal that exactly matches the og range. + if tidy_hunk.is_pure_removal() && before_start == og_start && before_end == og_end { + // Skip the og range and tidy hunk entirely. + // The line shift must still be adjusted for the pure removal though + line_shift += delta; + merged_end = 0; // causes invalid inclusive range which does not get pushed. + tidy_iter.next(); // skip this tidy hunk + break; // skip og range and iterate to the next one. + } + + // tidy hunk is before the og range. + if before_end < og_start { + maybe_push_range(&mut joint_ranges, after_start, after_end); + line_shift += delta; + tidy_iter.next(); + continue; + } + + // tidy hunk is after the og range. + if before_start > og_end { + // handle the og range before iterating the next tidy hunk + break; + } + + // tidy hunk overlaps with the og range in some way (case 4). + if tidy_hunk.before.contains(&og_start) { + merged_start = after_start; + } + + // commit the line shift now that the tidy hunk start is checked. + line_shift += delta; + + // tidy hunk suffixes the og range. + if tidy_hunk.before.contains(&og_end) { + merged_end = after_end; + tidy_iter.next(); // this tidy hunk is handled. + break; // break from loop to push the merged range into joint_ranges. + } + + // tidy hunk is contained within the og range. + // so adjust the og range end accordingly and continue iterating tidy hunks + merged_end = (og_end as i32 + line_shift) as u32; + tidy_iter.next(); + } + + maybe_push_range(&mut joint_ranges, merged_start, merged_end); + } + + // handle any remaining tidy hunks that are after all og ranges. + for tidy_hunk in tidy_iter { + maybe_push_range( + &mut joint_ranges, + tidy_hunk.after.start, + tidy_hunk.after.end.saturating_sub(1), + ); + } + + joint_ranges +} + #[cfg(test)] mod tests { #![allow(clippy::unwrap_used)] - use super::summarize_style; + use std::ops::RangeInclusive; + + use gix_imara_diff::{Diff, InternedInput}; + + use super::{summarize_style, three_way_diff}; fn formalize_style(style: &str, expected: &str) { assert_eq!(summarize_style(style), expected); @@ -247,4 +335,42 @@ mod tests { fn formalize_custom_style() { formalize_style("file", "Custom"); } + + #[test] + fn three_way_diff_mixed() { + const OG_SRC: &str = + "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10\nline11"; + // TIDY_SRC replaces line3->StringA (hunk before=2..3) and + // line8+line9+line10->StringB+StringC (hunk before=7..10), then appends StringE. + // The second hunk's before=7..10 contains og_end=9 but not og_start=6, + // which exercises the "tidy hunk suffixes og range" branch. + const TIDY_SRC: &str = + "line1\nline2\nStringA\nline4\nline5\nline6\nline7\nStringB\nStringC\nline11\nStringE"; + let input = InternedInput::new(OG_SRC, TIDY_SRC); + let mut tidy_diff = Diff::compute(gix_imara_diff::Algorithm::Histogram, &input); + tidy_diff.postprocess_lines(&input); + let ranges = vec![RangeInclusive::new(2, 4), RangeInclusive::new(6, 9)]; + println!("tidy diff: {tidy_diff:#?}\ncompared to og ranges: {ranges:?}"); + let joint_ranges = three_way_diff(&ranges, tidy_diff); + println!("joint ranges: {joint_ranges:#?}"); + assert_eq!(joint_ranges, vec![2..=4, 6..=10]); + } + + #[test] + fn three_way_diff_separated() { + const OG_SRC: &str = + "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10\nline11"; + // TIDY_SRC removes "line3" (index 2) which decrements offsets in ranges[5,8] and removes ranges[2,2]. + // TIDY_SRC appends StringE, which handles remaining tidy hunks after done iterating ranges + const TIDY_SRC: &str = + "line1\nline2\nline4\nline5\nline6\nline7\nline8\nline9\nline10\nline11\nStringE"; + let input = InternedInput::new(OG_SRC, TIDY_SRC); + let mut tidy_diff = Diff::compute(gix_imara_diff::Algorithm::Histogram, &input); + tidy_diff.postprocess_lines(&input); + let ranges = vec![2..=2, 5..=8]; + println!("tidy diff: {tidy_diff:#?}\ncompared to og ranges: {ranges:?}"); + let joint_ranges = three_way_diff(&ranges, tidy_diff); + println!("joint ranges: {joint_ranges:#?}"); + assert_eq!(joint_ranges, vec![4..=7, 9..=10]); + } } diff --git a/cpp-linter/src/common_fs.rs b/cpp-linter/src/common_fs.rs index 82cbda60..ac4da153 100644 --- a/cpp-linter/src/common_fs.rs +++ b/cpp-linter/src/common_fs.rs @@ -187,9 +187,6 @@ impl FileObj { self.get_suggestions(review_comments, &diff, &input, summary_only) .map_err(FileObjError::DisplayStringFailed)?; - if summary_only { - return Ok(()); - } if let Some(advice) = &self.tidy_advice { // now check for clang-tidy warnings with no fixes applied let file_ext = self @@ -203,6 +200,10 @@ impl FileObj { for note in &advice.notes { if note.fixed_lines.is_empty() && self.is_line_in_diff(¬e.line) { // notification had no suggestion applied in `patched` + total += 1; + if summary_only { + continue; + } let mut suggestion = format!( "### clang-tidy diagnostic\n**{file_name}:{}:{}** {}: [{}]\n\n> {}\n", ¬e.line, @@ -217,7 +218,6 @@ impl FileObj { .as_str(), ); } - total += 1; let mut is_merged = false; for s in &mut review_comments.comments { if s.path == file_name @@ -306,6 +306,13 @@ impl FileObj { } } _ => { + printer.display_header( + &mut patch_buff, + hunk.before.start, + hunk.after.start, + hunk.before.len() as u32, + hunk.after.len() as u32, + )?; printer.display_hunk( &mut patch_buff, &input.before[hunk.before.start as usize..hunk.before.end as usize],