Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
5fa4030
Add automated security audit workflow
PascalThuet May 2, 2026
db64121
Address security audit review feedback
PascalThuet May 5, 2026
672a7f7
Add security workflow regression tests
PascalThuet May 5, 2026
480f4e3
Address follow-up security workflow review
PascalThuet May 5, 2026
233f777
Use compile for security audit requirements
PascalThuet May 5, 2026
18cba37
Address latest security workflow review
PascalThuet May 5, 2026
7668a25
Address latest security audit review
PascalThuet May 5, 2026
c110353
Harden security-sensitive repository surfaces
PascalThuet May 5, 2026
ae7144a
Address remaining security review feedback
PascalThuet May 6, 2026
59b3161
ci(security): tighten PR checks for security regressions
PascalThuet May 14, 2026
093884d
ci(security): address review feedback
PascalThuet May 15, 2026
6883e2e
ci(security): tidy follow-up details
PascalThuet May 15, 2026
bc42bff
ci(security): apply self-review follow-ups
PascalThuet May 15, 2026
d603978
ci(security): apply review #2 follow-ups
PascalThuet May 15, 2026
6214a22
ci(security): apply review #3 follow-ups
PascalThuet May 15, 2026
fa5c64c
test(upgrade): polish TestBoundedRead readability
PascalThuet May 16, 2026
3a827b9
ci(security): address Copilot review #4300554119
PascalThuet May 16, 2026
158f737
ci(security): refresh audit baselines
PascalThuet May 16, 2026
6b397df
fix: address copilot security review follow-up
PascalThuet May 21, 2026
3d588f4
fix: wrap unsafe zip extraction errors
PascalThuet May 27, 2026
932def4
fix: redact secrets baseline hash logs
PascalThuet May 29, 2026
6054501
fix: keep secrets baseline hashes out of repr
PascalThuet May 29, 2026
6330af5
fix: address Copilot review on bounded reads and redirect-safety
PascalThuet May 30, 2026
282b6df
fix: address follow-up Copilot review (error typing, docs, tests)
PascalThuet May 30, 2026
339dcf3
fix(security): bound inline ZIP manifest read; guard ADO token redirects
PascalThuet Jun 6, 2026
9a2571a
fix(security): pin tight read bounds on JSON responses; cap actual ZI…
PascalThuet Jun 8, 2026
6817285
fix: align checkout pins and centralize loopback predicate
PascalThuet Jun 10, 2026
6d11a78
fix: pre-empt review feedback on pins, predicate reuse, and baseline …
PascalThuet Jun 10, 2026
d33b000
fix: error messages and docstring name the exact loopback hosts
PascalThuet Jun 11, 2026
095cad1
docs(http): clarify redirect scheme guard is unconditional
PascalThuet Jun 11, 2026
f869b22
harden: reject hostless URLs in is_https_or_localhost_http
PascalThuet Jun 12, 2026
840043d
fix(workflows): reject hostless catalog URLs during fetch
PascalThuet Jun 16, 2026
1f8b508
docs(cli): clarify host requirement for URL validation
PascalThuet Jun 17, 2026
bba929d
fix: stabilize security rebase follow-ups
PascalThuet Jun 17, 2026
23723bf
fix: address security audit follow-ups
PascalThuet Jun 18, 2026
701b2c7
fix: enforce strict redirects for catalog downloads
PascalThuet Jun 18, 2026
4e5bc51
fix(security): refresh audit baseline after rebase
PascalThuet Jun 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions .github/bandit-baseline.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"results": [
{
"code": "168 return opener.open(req, timeout=timeout)\n169 return urllib.request.urlopen(req, timeout=timeout) # noqa: S310\n",
"col_offset": 11,
"end_col_offset": 55,
"filename": "src/specify_cli/authentication/http.py",
"issue_confidence": "HIGH",
"issue_cwe": {
"id": 22,
"link": "https://cwe.mitre.org/data/definitions/22.html"
},
"issue_severity": "MEDIUM",
"issue_text": "Audit url open for permitted schemes. Allowing use of file:/ or custom schemes is often unexpected.",
"line_number": 169,
"line_range": [
169
],
"more_info": "https://bandit.readthedocs.io/en/1.9.4/blacklists/blacklist_calls.html#b310-urllib-urlopen",
"test_id": "B310",
"test_name": "blacklist"
},
{
"code": "34 run_cmd,\n35 shell=True,\n36 capture_output=True,\n37 text=True,\n38 cwd=cwd,\n39 timeout=300,\n40 )\n41 output = {\n42 \"exit_code\": proc.returncode,\n43 \"stdout\": proc.stdout,\n",
"col_offset": 19,
"end_col_offset": 13,
"filename": "src/specify_cli/workflows/steps/shell/__init__.py",
"issue_confidence": "HIGH",
"issue_cwe": {
"id": 78,
"link": "https://cwe.mitre.org/data/definitions/78.html"
},
"issue_severity": "HIGH",
"issue_text": "subprocess call with shell=True identified, security issue.",
"line_number": 35,
"line_range": [
33,
34,
35,
36,
37,
38,
39,
40
],
"more_info": "https://bandit.readthedocs.io/en/1.9.4/plugins/b602_subprocess_popen_with_shell_equals_true.html",
"test_id": "B602",
"test_name": "subprocess_popen_with_shell_equals_true"
}
]
}
213 changes: 213 additions & 0 deletions .github/scripts/check_bandit_baseline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
"""Fail if new entries appear in the Bandit baseline without acknowledgement.

The bandit baseline whitelists known findings so they don't fail CI. If a
contributor adds a new entry, silent whitelisting becomes invisible in
review. This script compares the set of result *identities* in the
baseline at the PR head against the baseline at its base; if any new
identity appears, the PR must carry the label ``security-baseline-change``
to confirm the addition is intentional.

We compare identities (filename + line + test_id + issue_severity +
issue_confidence + hash-of-code-snippet) rather than raw counts so a PR
cannot remove one existing entry and add a different new one to keep the
count constant — which would silently whitelist a new finding.

When the baseline file does not exist at the base ref, this is the PR
that introduces it; we treat all entries as the starting baseline and
do not require the label.

For the head side we read the working tree directly (the CI runner is
checked out at the PR head, so the working-tree file IS the head state).
Reading via ``git show <head_ref>:`` would fail-open on unfetched refs
or detached checkouts — for a security gate we want fail-closed.

Required environment variables:
- ``BANDIT_BASELINE_BASE``: git ref of the PR base
- ``BANDIT_BASELINE_LABELS``: comma-separated PR labels

Outside of PR events, all inputs may be empty and the script no-ops.
"""

from __future__ import annotations

import hashlib
import json
import os
import re
import subprocess
import sys
from pathlib import Path

REPO_ROOT = Path(__file__).resolve().parents[2]
BASELINE_PATH = ".github/bandit-baseline.json"
ACK_LABEL = "security-baseline-change"


def _git_ok(*args: str) -> bool:
"""True if the git command exits 0 (output discarded)."""
return (
subprocess.run(
["git", *args],
cwd=REPO_ROOT,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
).returncode
== 0
)


def _read_baseline_at(ref: str) -> tuple[dict, bool]:
"""Return (baseline_json, file_existed_at_ref).

Used for the base side. The head side reads the working tree to avoid
silently fail-opening on an unfetched/invalid head ref.

Only a missing *path* at a resolvable ref counts as "did not exist";
an unresolvable ref or a failing ``git show`` aborts instead, so a
transient git failure cannot silently disable the gate.
"""
if not ref:
return {"results": []}, False
if not _git_ok("rev-parse", "--verify", "--quiet", f"{ref}^{{commit}}"):
raise SystemExit(
f"Base ref {ref!r} cannot be resolved (unfetched or invalid). "
f"Refusing to fail-open on a security gate."
)
if not _git_ok("cat-file", "-e", f"{ref}:{BASELINE_PATH}"):
return {"results": []}, False
try:
blob = subprocess.run(
["git", "show", f"{ref}:{BASELINE_PATH}"],
check=True,
cwd=REPO_ROOT,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
).stdout
except subprocess.CalledProcessError as exc:
raise SystemExit(
f"Could not read baseline at {ref!r}: {exc.stderr.strip()}. "
f"Refusing to fail-open on a security gate."
)
try:
return json.loads(blob), True
except json.JSONDecodeError:
print(f"Could not parse baseline at {ref}; treating as empty.", file=sys.stderr)
return {"results": []}, True


def _read_baseline_from_worktree() -> tuple[dict, bool]:
"""Return (baseline_json, file_exists_on_disk).

The CI runner is checked out at the PR head, so the working-tree
file IS the head state. Reading it directly sidesteps spurious
``git show`` failures that would otherwise let an unreadable head
silently pass the gate.

Asymmetric with the base reader: a corrupt JSON on disk is the
proposed PR state — we fail-closed there rather than treating
it as an empty baseline (which would silently drop the gate).
"""
path = REPO_ROOT / BASELINE_PATH
if not path.exists():
return {"results": []}, False
try:
return json.loads(path.read_text(encoding="utf-8")), True
except json.JSONDecodeError as exc:
raise SystemExit(
f"Working-tree baseline at {BASELINE_PATH} is corrupt: {exc}. "
f"Refusing to fail-open on a security gate."
)


_WHITESPACE_RE = re.compile(r"\s+")


def _identity(result: dict) -> str:
"""Stable identity for a baseline entry.

Combines location, test, severity, confidence, and a hash of the
pinned code snippet (whitespace-normalized) so reformatting changes
or upstream bandit-output tweaks don't register as new findings,
but a different finding at the same line does.
"""
code = result.get("code", "") or ""
normalized = _WHITESPACE_RE.sub(" ", code).strip()
code_hash = hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:16]
return "|".join(
[
str(result.get("filename", "")),
str(result.get("line_number", "")),
str(result.get("test_id", "")),
str(result.get("issue_severity", "")),
str(result.get("issue_confidence", "")),
code_hash,
]
)


def main() -> int:
base_ref = os.environ.get("BANDIT_BASELINE_BASE", "").strip()

if not base_ref or set(base_ref) <= {"0"}:
Comment thread
PascalThuet marked this conversation as resolved.
print("No PR base ref; baseline diff check skipped.")
return 0

base_baseline, base_existed = _read_baseline_at(base_ref)
head_baseline, head_existed = _read_baseline_from_worktree()

Comment thread
PascalThuet marked this conversation as resolved.
if not base_existed:
print(
"Baseline file not present at base ref; treating this PR as the "
"introduction of the baseline. No acknowledgement required."
)
return 0

if not head_existed:
# Fail-closed: the file existed at base but is missing in the
# working tree. Either the PR deleted it (suspicious — the gate
# would no longer protect anything) or the workspace is incomplete.
print(
f"Baseline file {BASELINE_PATH} existed at the base ref but is "
f"missing in the working tree. Refusing to fail-open on a "
f"security gate.",
file=sys.stderr,
)
return 1

base_ids = {_identity(r) for r in base_baseline.get("results", [])}
head_ids = {_identity(r) for r in head_baseline.get("results", [])}

new_ids = head_ids - base_ids
if not new_ids:
print(
f"Bandit baseline entries: {len(base_ids)} -> {len(head_ids)} "
f"(no new identities)."
)
return 0

labels = {
label.strip()
for label in os.environ.get("BANDIT_BASELINE_LABELS", "").split(",")
if label.strip()
}
if ACK_LABEL in labels:
print(
f"Bandit baseline gained {len(new_ids)} new identities; "
f"acknowledged via label '{ACK_LABEL}'."
)
return 0

print(
f"Bandit baseline gained {len(new_ids)} new identities. "
f"Add label '{ACK_LABEL}' to the PR to acknowledge that the new "
f"whitelist entries are intentional.",
file=sys.stderr,
)
for identity in sorted(new_ids):
print(f" + {identity}", file=sys.stderr)
return 1


if __name__ == "__main__":
raise SystemExit(main())
Loading