diff --git a/.github/workflows/static-analysis-pr.yml b/.github/workflows/static-analysis-pr.yml index ce4083ee..de6bfa1d 100644 --- a/.github/workflows/static-analysis-pr.yml +++ b/.github/workflows/static-analysis-pr.yml @@ -11,6 +11,7 @@ on: - '**/*.cmake' - '**/.clang-tidy' - '.github/workflows/static-analysis-pr.yml' + - 'scripts/check_task_backend_apis.py' concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} @@ -162,3 +163,17 @@ jobs: done echo "No linter suppression markers found in changed files." + task-backend-api-check: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + - name: Check task backend API usage + env: + BASE_SHA: ${{ github.event.pull_request.base.sha }} + HEAD_SHA: ${{ github.sha }} + run: | + set -euo pipefail + + python3 scripts/check_task_backend_apis.py --base "$BASE_SHA" --head "$HEAD_SHA" diff --git a/docs/locale/ru/LC_MESSAGES/user_guide/ci.po b/docs/locale/ru/LC_MESSAGES/user_guide/ci.po index 30cc598c..359f65d0 100644 --- a/docs/locale/ru/LC_MESSAGES/user_guide/ci.po +++ b/docs/locale/ru/LC_MESSAGES/user_guide/ci.po @@ -106,11 +106,12 @@ msgstr "" #: ../../../../docs/user_guide/ci.rst:23 msgid "" "Security and static analysis — clang‑tidy on PRs (avoid ``NOLINT``/``IWYU" -" pragma``), scheduled CodeQL (C++/Python) and OpenSSF Scorecard." +" pragma``), task backend API checks, scheduled CodeQL (C++/Python) and " +"OpenSSF Scorecard." msgstr "" "Безопасность и статический анализ — clang‑tidy в PR (избегайте " -"``NOLINT``/``IWYU pragma``), плановые CodeQL (C++/Python) и OpenSSF " -"Scorecard." +"``NOLINT``/``IWYU pragma``), проверки API для backend'ов задач, плановые " +"CodeQL (C++/Python) и OpenSSF Scorecard." #: ../../../../docs/user_guide/ci.rst:26 msgid "Diagram" @@ -259,11 +260,13 @@ msgstr "" #: ../../../../docs/user_guide/ci.rst:89 msgid "" -"Static analysis (clang-tidy) fails: address comments; do not use " -"``NOLINT``/``IWYU pragma`` in task code." +"Static analysis fails: address clang-tidy comments; do not use " +"``NOLINT``/``IWYU pragma`` in task code; keep OpenMP/TBB/MPI/std::thread APIs " +"in their matching task backend directories." msgstr "" -"Падает статический анализ (clang-tidy): поправьте замечания; не " -"используйте ``NOLINT``/``IWYU pragma`` в коде задач." +"Падает статический анализ: поправьте замечания clang-tidy; не используйте " +"``NOLINT``/``IWYU pragma`` в коде задач; держите API OpenMP/TBB/MPI/" +"std::thread в соответствующих backend-директориях задач." #: ../../../../docs/user_guide/ci.rst:90 msgid "" diff --git a/docs/user_guide/ci.rst b/docs/user_guide/ci.rst index 12bc54a2..7351fca2 100644 --- a/docs/user_guide/ci.rst +++ b/docs/user_guide/ci.rst @@ -20,7 +20,7 @@ High‑level pipeline - Pages (docs and scoreboard) — builds Doxygen XML and Sphinx (EN+RU) + scoreboard; on ``master`` deploys with coverage to GitHub Pages. -- Security and static analysis — clang‑tidy on PRs (avoid ``NOLINT``/``IWYU pragma``), scheduled CodeQL (C++/Python) and OpenSSF Scorecard. +- Security and static analysis — clang‑tidy on PRs (avoid ``NOLINT``/``IWYU pragma``), task backend API checks, scheduled CodeQL (C++/Python) and OpenSSF Scorecard. Diagram ------- @@ -86,7 +86,7 @@ Docs and scoreboard artifacts Troubleshooting --------------- - Pre-commit fails: run ``pre-commit run -a`` locally (install with ``pre-commit install``) and commit fixes. -- Static analysis (clang-tidy) fails: address comments; do not use ``NOLINT``/``IWYU pragma`` in task code. +- Static analysis fails: address clang-tidy comments; do not use ``NOLINT``/``IWYU pragma`` in task code; keep OpenMP/TBB/MPI/std::thread APIs in their matching task backend directories. - Tests not found/not running: verify ``settings.json`` enables required technologies and tests exist; see :doc:`submit_work`. - Time limits exceeded: reduce data sizes; prefer env vars (:doc:`environment_variables`) like ``PPC_TASK_MAX_TIME``/``PPC_PERF_MAX_TIME``; avoid sleeps/randomness. - MPI runs fail locally: set ``PPC_NUM_PROC`` and try ``--additional-mpi-args=\"--oversubscribe\"``. diff --git a/scripts/check_task_backend_apis.py b/scripts/check_task_backend_apis.py new file mode 100644 index 00000000..278168e0 --- /dev/null +++ b/scripts/check_task_backend_apis.py @@ -0,0 +1,445 @@ +import argparse +import fnmatch +import re +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path + +SOURCE_EXTENSIONS = { + ".c", + ".cpp", + ".h", + ".hpp", +} + + +@dataclass(frozen=True) +class ApiPattern: + category: str + regex: re.Pattern + strip_literals: bool + + +@dataclass(frozen=True) +class Violation: + path: Path + line_number: int + category: str + matched_api: str + backend: str + + +def join_regex_patterns(patterns: tuple[str, ...]) -> str: + return "|".join(patterns) + + +def compile_include_regex(headers: tuple[str, ...]) -> re.Pattern: + escaped_headers = tuple(re.escape(header) for header in headers) + return re.compile( + r"^\s*#\s*include\s*[<\"](?:" + join_regex_patterns(escaped_headers) + r")[>\"]" + ) + + +CPP_THREAD_HEADERS = ( + "thread", + "future", + "mutex", + "shared_mutex", + "condition_variable", + "execution", + "barrier", + "latch", + "semaphore", + "stop_token", +) + +CPP_THREAD_STD_SYMBOL_PATTERNS = ( + r"j?thread", + "this_thread", + "async", + "future", + "shared_future", + "promise", + "packaged_task", + "mutex", + "recursive_mutex", + "timed_mutex", + "recursive_timed_mutex", + "shared_mutex", + "shared_timed_mutex", + "unique_lock", + "shared_lock", + "lock_guard", + "scoped_lock", + r"condition_variable(?:_any)?", + r"memory_order(?:_[a-z_]+)?", + "barrier", + "latch", + "counting_semaphore", + "binary_semaphore", + "stop_source", + "stop_token", + "stop_callback", + "execution", +) + + +API_PATTERNS = { + "openmp": [ + ApiPattern("OpenMP", re.compile(r"^\s*#\s*include\s*[<\"]omp\.h[>\"]"), False), + ApiPattern("OpenMP", re.compile(r"^\s*#\s*pragma\s+omp\b"), False), + ApiPattern("OpenMP", re.compile(r"\bomp_[A-Za-z0-9_]*\b"), True), + ApiPattern("OpenMP", re.compile(r"\b_OPENMP\b"), True), + ], + "tbb": [ + ApiPattern( + "TBB", + re.compile(r"^\s*#\s*include\s*[<\"](?:oneapi/)?tbb/[^>\"]+[>\"]"), + False, + ), + ApiPattern("TBB", re.compile(r"\boneapi::tbb::"), True), + ApiPattern("TBB", re.compile(r"\btbb::"), True), + ApiPattern("TBB", re.compile(r"\bTBB_[A-Za-z0-9_]*\b"), True), + ], + "mpi": [ + ApiPattern("MPI", re.compile(r"^\s*#\s*include\s*[<\"]mpi\.h[>\"]"), False), + ApiPattern("MPI", re.compile(r"\bMPI_[A-Za-z0-9_]*\b"), True), + ApiPattern("MPI", re.compile(r"\bMPI::"), True), + ], + "cpp_thread": [ + ApiPattern( + "C++ thread API", + compile_include_regex(CPP_THREAD_HEADERS), + False, + ), + ApiPattern( + "C++ thread API", + re.compile( + r"\bstd::(?:" + + join_regex_patterns(CPP_THREAD_STD_SYMBOL_PATTERNS) + + r")\b" + ), + True, + ), + ApiPattern( + "POSIX thread API", + re.compile(r"^\s*#\s*include\s*[<\"]pthread\.h[>\"]"), + False, + ), + ApiPattern("POSIX thread API", re.compile(r"\bpthread_[A-Za-z0-9_]*\b"), True), + ], +} + + +API_ORDER = ("openmp", "tbb", "mpi", "cpp_thread") + + +BACKEND_RULES = { + "seq": {"openmp", "tbb", "mpi", "cpp_thread"}, + "omp": {"tbb", "mpi", "cpp_thread"}, + "tbb": {"openmp", "mpi", "cpp_thread"}, + "mpi": {"openmp", "tbb", "cpp_thread"}, + "stl": {"openmp", "tbb", "mpi"}, +} + + +def strip_comments(text: str) -> str: + result = [] + index = 0 + state = "normal" + + while index < len(text): + current = text[index] + next_char = text[index + 1] if index + 1 < len(text) else "" + + if state == "normal": + if current == "R" and next_char == '"': + raw_end = consume_raw_string(text, index) + if raw_end is not None: + result.extend(text[index:raw_end]) + index = raw_end + continue + if current in {"'", '"'}: + quote = current + result.append(current) + index += 1 + escaped = False + while index < len(text): + char = text[index] + result.append(char) + index += 1 + if char == "\n": + break + if escaped: + escaped = False + continue + if char == "\\": + escaped = True + continue + if char == quote: + break + continue + if current == "/" and next_char == "/": + result.extend(" ") + index += 2 + state = "line_comment" + continue + if current == "/" and next_char == "*": + result.extend(" ") + index += 2 + state = "block_comment" + continue + result.append(current) + index += 1 + continue + + if state == "line_comment": + if current == "\n": + result.append(current) + state = "normal" + else: + result.append(" ") + index += 1 + continue + + if current == "\n": + result.append(current) + index += 1 + continue + if current == "*" and next_char == "/": + result.extend(" ") + index += 2 + state = "normal" + continue + result.append(" ") + index += 1 + + return "".join(result) + + +def consume_raw_string(text: str, start: int) -> int | None: + delimiter_start = start + 2 + delimiter_end = delimiter_start + + while delimiter_end < len(text) and text[delimiter_end] != "(": + if text[delimiter_end] in "\\ \t\r\n": + return None + delimiter_end += 1 + + if delimiter_end >= len(text): + return None + + delimiter = text[delimiter_start:delimiter_end] + closing = f'){delimiter}"' + closing_start = text.find(closing, delimiter_end + 1) + if closing_start == -1: + return None + return closing_start + len(closing) + + +def strip_literals(text: str) -> str: + result = [] + index = 0 + + while index < len(text): + current = text[index] + next_char = text[index + 1] if index + 1 < len(text) else "" + + if current == "R" and next_char == '"': + raw_end = consume_raw_string(text, index) + if raw_end is not None: + result.extend( + "\n" if char == "\n" else " " for char in text[index:raw_end] + ) + index = raw_end + continue + + if current not in {"'", '"'}: + result.append(current) + index += 1 + continue + + quote = current + result.append(" ") + index += 1 + escaped = False + while index < len(text): + char = text[index] + if char == "\n": + result.append(char) + index += 1 + break + result.append(" ") + index += 1 + if escaped: + escaped = False + continue + if char == "\\": + escaped = True + continue + if char == quote: + break + + return "".join(result) + + +def get_backend(path: Path) -> str | None: + parts = path.parts + try: + tasks_index = parts.index("tasks") + 1 + except ValueError: + return None + + for part in parts[tasks_index:]: + if part in BACKEND_RULES: + return part + return None + + +def is_source_file(path: Path) -> bool: + return path.suffix in SOURCE_EXTENSIONS + + +def read_changed_files(base: str, head: str) -> list[Path]: + command = [ + "git", + "diff", + "--name-only", + "--diff-filter=ACMRT", + f"{base}...{head}", + "--", + "tasks/", + ] + result = subprocess.run(command, check=True, stdout=subprocess.PIPE, text=True) + return [Path(line) for line in result.stdout.splitlines() if line] + + +def read_all_task_files() -> list[Path]: + return [path for path in Path("tasks").rglob("*") if path.is_file()] + + +def is_mpi_openmp_allowed(path: Path, allow_mpi_openmp: list[str]) -> bool: + path_name = path.as_posix() + return any(fnmatch.fnmatch(path_name, pattern) for pattern in allow_mpi_openmp) + + +def find_violations(path: Path, allow_mpi_openmp: list[str]) -> list[Violation]: + backend = get_backend(path) + if backend is None or not is_source_file(path) or not path.exists(): + return [] + + forbidden_apis = set(BACKEND_RULES[backend]) + if backend == "mpi" and is_mpi_openmp_allowed(path, allow_mpi_openmp): + forbidden_apis.discard("openmp") + + if not forbidden_apis: + return [] + + text = path.read_text(encoding="utf-8") + without_comments = strip_comments(text) + without_literals = strip_literals(without_comments) + lines = { + False: without_comments.splitlines(), + True: without_literals.splitlines(), + } + + violations = [] + seen = set() + for api in API_ORDER: + if api not in forbidden_apis: + continue + for pattern in API_PATTERNS[api]: + for line_number, line in enumerate(lines[pattern.strip_literals], start=1): + match = pattern.regex.search(line) + if match is None: + continue + key = (path, line_number, pattern.category) + if key in seen: + continue + seen.add(key) + violations.append( + Violation( + path, + line_number, + pattern.category, + match.group(0).strip(), + backend, + ) + ) + + return violations + + +def github_escape(value: str) -> str: + return value.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A") + + +def print_violation(violation: Violation) -> None: + message = ( + f"{violation.category} usage ({violation.matched_api}) is forbidden in " + f"tasks/{violation.backend} implementation files." + ) + print( + f"::error file={github_escape(str(violation.path))},line={violation.line_number}," + f"title=Forbidden backend API::{github_escape(message)}" + ) + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Check task backend source files for forbidden parallel API usage." + ) + source_group = parser.add_mutually_exclusive_group(required=True) + source_group.add_argument( + "--all", action="store_true", help="check all task source files" + ) + source_group.add_argument("--base", help="base commit for changed-file mode") + parser.add_argument("--head", help="head commit for changed-file mode") + parser.add_argument( + "--allow-mpi-openmp", + action="append", + default=[], + metavar="GLOB", + help="allow OpenMP in matching tasks/*/mpi source files", + ) + args = parser.parse_args() + + if args.base and not args.head: + parser.error("--head is required when --base is used") + if args.head and not args.base: + parser.error("--base is required when --head is used") + + paths = ( + read_all_task_files() if args.all else read_changed_files(args.base, args.head) + ) + checked_paths = [ + path + for path in paths + if get_backend(path) is not None and is_source_file(path) and path.exists() + ] + + if not checked_paths: + print("No changed backend source files to check.") + return 0 + + violations = [] + for path in checked_paths: + violations.extend(find_violations(path, args.allow_mpi_openmp)) + + for violation in violations: + print_violation(violation) + + if violations: + print( + f"Found {len(violations)} forbidden backend API usage issue(s).", + file=sys.stderr, + ) + return 1 + + print(f"Task backend API check passed for {len(checked_paths)} file(s).") + return 0 + + +if __name__ == "__main__": + sys.exit(main())