From c9427a2f566a8b14160dd04b9ccb220cdf4a372d Mon Sep 17 00:00:00 2001 From: Gaurav Sharma Date: Thu, 14 May 2026 20:08:20 +0530 Subject: [PATCH] CHORE: Exclude vendored simdutf sources from coverage report PR #526 added simdutf as a FetchContent dependency. CMake places its sources at mssql_python/pybind/build/_deps/simdutf-src/ at build time, and clang's coverage instrumentation picks them up because they get linked into ddbc_bindings.so. The current ignore-filename-regex in generate_codecov.sh excludes Python headers, pybind11, and system includes, but not _deps/. This adds two layers of filtering: 1. Primary: extend the llvm-cov ignore-filename-regex with build/_deps/ so simdutf (and any future FetchContent dependency) is dropped at the C++ export step. 2. Defense-in-depth: lcov --remove '*/build/_deps/*' after the python + cpp merge, to catch anything that slips through the primary filter in future dependencies. Verified locally in an ubuntu:22.04 container matching the CI image: - Before: 79 cpp sources in coverage report, 70 of them simdutf - After: 9 cpp sources, all owned by mssql-python mssql-python's own coverage numbers are unchanged. The "Files Needing Attention" list no longer surfaces simdutf SIMD dispatch variants that inherently run at 0% on a single CI runner (haswell, westmere, arm64, etc are all built into the same .so but only one variant executes per CPU). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- generate_codecov.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/generate_codecov.sh b/generate_codecov.sh index f24dd78d5..6c2554f51 100644 --- a/generate_codecov.sh +++ b/generate_codecov.sh @@ -77,7 +77,7 @@ echo "[INFO] Using pybind module: $PYBIND_SO" # Export C++ coverage, excluding Python headers, pybind11, and system includes llvm-cov export "$PYBIND_SO" \ -instr-profile=default.profdata \ - -ignore-filename-regex='(python3\.[0-9]+|cpython|pybind11|/usr/include/|/usr/lib/)' \ + -ignore-filename-regex='(python3\.[0-9]+|cpython|pybind11|/usr/include/|/usr/lib/|build/_deps/)' \ --skip-functions \ -format=lcov > cpp-coverage.info @@ -95,6 +95,13 @@ echo "[ACTION] Merging Python and C++ coverage" lcov -a python-coverage.info -a cpp-coverage.info -o total.info \ --ignore-errors inconsistent,corrupt +# Defense-in-depth: drop any vendored third-party sources pulled in via CMake +# FetchContent (e.g. simdutf). The llvm-cov ignore-filename-regex above is the +# primary filter; this catches anything that slips through future deps. +echo "[ACTION] Removing vendored third-party sources from merged coverage" +lcov --remove total.info '*/build/_deps/*' -o total.info \ + --ignore-errors inconsistent,unused + # Normalize paths so everything starts from mssql_python/ echo "[ACTION] Normalizing paths in LCOV report" sed -i "s|$(pwd)/||g" total.info