From d9f398891b744e78c5b1e5735adab62d33ff9f9f Mon Sep 17 00:00:00 2001 From: gargsaumya Date: Tue, 19 May 2026 09:11:10 +0530 Subject: [PATCH] CHORE: Exclude LOG statements from coverage using lcov --omit-lines Replace manual LCOV_EXCL_LINE markers with lcov's built-in --omit-lines flag. This approach is cleaner, more maintainable, and catches all LOG variants. Changes: - Add eng/scripts/join_logs_for_coverage.py to join multi-line LOG calls during coverage builds - Modify build.sh to temporarily join LOG statements in codecov mode - Replace Python filter in generate_codecov.sh with --omit-lines '\bLOG[A-Z_]*\(' - Update .gitignore to exclude local development scripts Benefits: - No source code clutter (600+ markers removed) - Catches LOG_ERROR, LOG_WARNING, and all LOG variants - Cleaner, more maintainable approach - Source files remain unchanged in repository Addresses review feedback from @bewithgaurav on PR #556 --- .gitignore | 18 ++++++ eng/scripts/join_logs_for_coverage.py | 92 +++++++++++++++++++++++++++ generate_codecov.sh | 14 ++-- mssql_python/pybind/build.sh | 28 ++++++++ 4 files changed, 146 insertions(+), 6 deletions(-) create mode 100644 eng/scripts/join_logs_for_coverage.py diff --git a/.gitignore b/.gitignore index 3f9bd64e1..080fc9e5d 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,16 @@ build/ # wheel files *.whl + +# Coverage reports and artifacts +.coverage +coverage.json +coverage*.xml +htmlcov/ +unified-coverage/ +*.profraw +*.profdata +*.info *.tar.gz *.zip @@ -66,3 +76,11 @@ mssql_py_core/ # learning files learnings/ + +# Local development and experimental scripts (not part of the PR) +add_platform_exclusions.py +add_lcov_exclusions.py +fix_multiline_log_exclusions.py +test_pyodbc_decimal.py +run_coverage_docker.ps1 +TRIAGE_REPORT_*.md diff --git a/eng/scripts/join_logs_for_coverage.py b/eng/scripts/join_logs_for_coverage.py new file mode 100644 index 000000000..89ca3b3d1 --- /dev/null +++ b/eng/scripts/join_logs_for_coverage.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +""" +Join multi-line LOG() calls onto single lines for LCOV coverage filtering. + +This script is used only during coverage builds to simplify LOG statement exclusion. +It doesn't modify the original source files - it works on copies during the build. +Adjacent string literals are concatenated at compile time, so runtime behavior is identical. +""" + +import re +import sys +from pathlib import Path + + +def join_log_statements(content: str) -> str: + """Join multi-line LOG macro calls onto a single line.""" + lines = content.split('\n') + result = [] + i = 0 + + while i < len(lines): + line = lines[i] + + # Check if this line contains a LOG macro start + if re.search(r'\bLOG[A-Z_]*\s*\(', line): + # Start collecting the full statement + full_statement = line + paren_depth = line.count('(') - line.count(')') + i += 1 + + # Continue collecting until we close all parentheses + while i < len(lines) and paren_depth > 0: + next_line = lines[i] + full_statement += ' ' + next_line.strip() + paren_depth += next_line.count('(') - next_line.count(')') + i += 1 + + # Add the joined statement + result.append(full_statement) + else: + result.append(line) + i += 1 + + return '\n'.join(result) + + +def process_file(filepath: Path) -> None: + """Process a single C++ source file.""" + try: + with open(filepath, 'r', encoding='utf-8') as f: + content = f.read() + + modified = join_log_statements(content) + + with open(filepath, 'w', encoding='utf-8') as f: + f.write(modified) + + print(f"[INFO] Processed: {filepath}") + except Exception as e: + print(f"[ERROR] Failed to process {filepath}: {e}", file=sys.stderr) + sys.exit(1) + + +def main(): + """Process all .cpp and .hpp files in the pybind directory.""" + if len(sys.argv) > 1: + # Process specific directory passed as argument + base_dir = Path(sys.argv[1]) + else: + # Default to current directory + base_dir = Path.cwd() + + if not base_dir.exists(): + print(f"[ERROR] Directory not found: {base_dir}", file=sys.stderr) + sys.exit(1) + + # Find all C++ source files + cpp_files = list(base_dir.rglob('*.cpp')) + list(base_dir.rglob('*.hpp')) + + if not cpp_files: + print(f"[WARNING] No .cpp or .hpp files found in {base_dir}") + return + + print(f"[INFO] Processing {len(cpp_files)} C++ files in {base_dir}") + for filepath in cpp_files: + process_file(filepath) + + print(f"[SUCCESS] Joined LOG statements in {len(cpp_files)} files") + + +if __name__ == '__main__': + main() diff --git a/generate_codecov.sh b/generate_codecov.sh index f24dd78d5..86a37b974 100644 --- a/generate_codecov.sh +++ b/generate_codecov.sh @@ -81,19 +81,21 @@ llvm-cov export "$PYBIND_SO" \ --skip-functions \ -format=lcov > cpp-coverage.info -# Note: LCOV exclusion markers (LCOV_EXCL_LINE) should be added to source code -# to exclude LOG() statements from coverage. However, for automated exclusion -# of all LOG lines without modifying source code, we can use geninfo's --omit-lines -# feature during the merge step (see below). +# Note: LCOV exclusion markers (LCOV_EXCL_LINE) are processed below echo "===================================" echo "[STEP 4] Merging Python + C++ coverage" echo "===================================" # Merge LCOV reports (ignore inconsistencies in Python LCOV export) -echo "[ACTION] Merging Python and C++ coverage" +# Use --omit-lines to exclude LOG macro calls from coverage +# The regex matches LOG, LOG_ERROR, LOG_WARNING, etc. +echo "[ACTION] Merging Python and C++ coverage with LOG exclusion" lcov -a python-coverage.info -a cpp-coverage.info -o total.info \ - --ignore-errors inconsistent,corrupt + --ignore-errors inconsistent,corrupt \ + --omit-lines '\bLOG[A-Z_]*\(' + +echo "[INFO] LOG statements excluded from coverage using --omit-lines" # Normalize paths so everything starts from mssql_python/ echo "[ACTION] Normalizing paths in LCOV report" diff --git a/mssql_python/pybind/build.sh b/mssql_python/pybind/build.sh index 811777285..1f589c763 100755 --- a/mssql_python/pybind/build.sh +++ b/mssql_python/pybind/build.sh @@ -31,6 +31,34 @@ COVERAGE_MODE=false if [[ "${1:-}" == "codecov" || "${1:-}" == "--coverage" ]]; then COVERAGE_MODE=true echo "[MODE] Enabling Clang coverage instrumentation" + + # For coverage builds, join multi-line LOG statements to simplify LCOV filtering + # This works on a temporary copy - original source is restored on exit + echo "[ACTION] Preparing source for coverage build (joining LOG statements)" + + # Save current directory + ORIGINAL_DIR=$(pwd) + + # Create backup directory + BACKUP_DIR="${ORIGINAL_DIR}/.source_backup_coverage" + rm -rf "$BACKUP_DIR" + mkdir -p "$BACKUP_DIR" + + # Backup all .cpp and .hpp files + find . -maxdepth 2 -type f \( -name "*.cpp" -o -name "*.hpp" \) -exec cp {} "$BACKUP_DIR/" \; + + # Set trap to restore source files on exit (success or failure) + trap 'echo "[CLEANUP] Restoring original source files"; cp -f "$BACKUP_DIR"/* "$ORIGINAL_DIR/" 2>/dev/null || true; rm -rf "$BACKUP_DIR"' EXIT + + # Join LOG statements using the helper script + SCRIPT_PATH="${ORIGINAL_DIR}/../../eng/scripts/join_logs_for_coverage.py" + if [[ -f "$SCRIPT_PATH" ]]; then + python3 "$SCRIPT_PATH" "$ORIGINAL_DIR" + echo "[SUCCESS] LOG statements joined for coverage build" + else + echo "[WARNING] join_logs_for_coverage.py not found at $SCRIPT_PATH" + echo "[WARNING] Continuing with original source (LOG filtering may be incomplete)" + fi fi # Get Python version from active interpreter