diff --git a/extensions/EXTENSION-PUBLISHING-GUIDE.md b/extensions/EXTENSION-PUBLISHING-GUIDE.md index be5b375241..13fd08b79c 100644 --- a/extensions/EXTENSION-PUBLISHING-GUIDE.md +++ b/extensions/EXTENSION-PUBLISHING-GUIDE.md @@ -320,6 +320,7 @@ A: Extensions should be free and open-source. Commercial support/services are al "author": "string (required)", "version": "string (required, semver)", "download_url": "string (required, valid URL)", + "sha256": "string (optional, SHA-256 hex digest of the archive at download_url; verified before install)", "repository": "string (required, valid URL)", "homepage": "string (optional, valid URL)", "documentation": "string (optional, valid URL)", diff --git a/presets/PUBLISHING.md b/presets/PUBLISHING.md index 661614e5c0..f823a6ef15 100644 --- a/presets/PUBLISHING.md +++ b/presets/PUBLISHING.md @@ -185,6 +185,7 @@ Edit `presets/catalog.community.json` and add your preset. "author": "Your Name", "version": "1.0.0", "download_url": "https://github.com/your-org/spec-kit-preset-your-preset/archive/refs/tags/v1.0.0.zip", + "sha256": "OPTIONAL: SHA-256 hex digest of the archive above; verified before install", "repository": "https://github.com/your-org/spec-kit-preset-your-preset", "license": "MIT", "requires": { diff --git a/src/specify_cli/extensions/__init__.py b/src/specify_cli/extensions/__init__.py index 19cc0f0910..98b4c048d3 100644 --- a/src/specify_cli/extensions/__init__.py +++ b/src/specify_cli/extensions/__init__.py @@ -31,6 +31,7 @@ from .._utils import dump_frontmatter, relative_extension_path_violation from ..catalogs import CatalogEntry as BaseCatalogEntry from ..catalogs import CatalogStackBase +from ..shared_infra import verify_archive_sha256 _FALLBACK_CORE_COMMAND_NAMES = frozenset( { @@ -2617,6 +2618,10 @@ def download_extension( ) as response: zip_data = response.read() + verify_archive_sha256( + zip_data, ext_info.get("sha256"), extension_id, ExtensionError + ) + zip_path.write_bytes(zip_data) return zip_path diff --git a/src/specify_cli/presets/__init__.py b/src/specify_cli/presets/__init__.py index 66f1bbc5e5..07e31185ec 100644 --- a/src/specify_cli/presets/__init__.py +++ b/src/specify_cli/presets/__init__.py @@ -31,6 +31,7 @@ from .._init_options import is_ai_skills_enabled from ..integrations.base import IntegrationBase from .._utils import dump_frontmatter +from ..shared_infra import verify_archive_sha256 def _substitute_core_template( @@ -2505,6 +2506,10 @@ def download_pack( with self._open_url(download_url, timeout=60, extra_headers=extra_headers) as response: zip_data = response.read() + verify_archive_sha256( + zip_data, pack_info.get("sha256"), pack_id, PresetError + ) + zip_path.write_bytes(zip_data) return zip_path diff --git a/src/specify_cli/shared_infra.py b/src/specify_cli/shared_infra.py index 0db8687058..a812323704 100644 --- a/src/specify_cli/shared_infra.py +++ b/src/specify_cli/shared_infra.py @@ -2,6 +2,9 @@ from __future__ import annotations +import hashlib +import hmac +import logging import os import re import tempfile @@ -11,6 +14,71 @@ from .integrations.base import IntegrationBase from .integrations.manifest import IntegrationManifest +logger = logging.getLogger(__name__) + +# A SHA-256 digest is exactly 64 lowercase hexadecimal characters. +_SHA256_HEX_RE = re.compile(r"^[0-9a-f]{64}$") + + +def verify_archive_sha256( + data: bytes, + expected: str | None, + name: str, + error_cls: type[Exception], +) -> None: + """Verify downloaded archive bytes against a catalog-declared SHA-256. + + Catalog entries may pin the expected digest of their release archive in a + ``sha256`` field (optionally prefixed with ``"sha256:"``). When present, the + downloaded bytes must match before they are written to disk and installed, + so a corrupted or tampered archive is rejected even though the transport was + HTTPS. Entries without a declared digest are accepted unchanged, keeping the + check backwards compatible. + + Args: + data: The raw downloaded archive bytes. + expected: The catalog-declared SHA-256 hex digest, or ``None``. + name: The extension/preset id, used in the error message. + error_cls: Exception type to raise on mismatch (e.g. ``ExtensionError``). + + Raises: + error_cls: If ``expected`` is provided and is not a well-formed + SHA-256 hex digest, or does not match ``data``. + """ + # Skip only when no digest is declared at all (``None``). A declared but + # empty/blank value (e.g. ``sha256: ""``) is an authoring error, not an + # opt-out: let it fall through to the format check below so it is rejected + # rather than silently disabling verification. + if expected is None: + logger.debug( + "No sha256 declared for %r; archive integrity was not verified.", + name, + ) + return + # Strip *only* a literal ``sha256:`` algorithm prefix (case-insensitive). + # Any other prefix is part of the value and must not be silently dropped, + # otherwise a malformed or wrong-algorithm digest (e.g. ``md5:...``) would + # be quietly accepted as if it were a valid SHA-256. + raw = str(expected).strip() + if raw[:7].lower() == "sha256:": + raw = raw[7:].strip() + expected_hex = raw.lower() + if not _SHA256_HEX_RE.match(expected_hex): + raise error_cls( + f"Invalid sha256 declared for {name!r}: expected 64 hexadecimal " + f"characters (optionally prefixed with 'sha256:'), got " + f"{expected!r}." + ) + actual_hex = hashlib.sha256(data).hexdigest() + # Constant-time comparison: both sides are fixed-length hex digests, so use + # ``hmac.compare_digest`` to avoid leaking information through timing. + if not hmac.compare_digest(actual_hex, expected_hex): + raise error_cls( + f"Integrity check failed for {name!r}: the catalog declares " + f"sha256 {expected_hex}, but the downloaded archive is " + f"{actual_hex}. The archive may be corrupted or tampered with." + ) + class SymlinkedSharedPathError(ValueError): """Raised when a shared infrastructure path or ancestor is a symlink. diff --git a/tests/test_extensions.py b/tests/test_extensions.py index c60a7e430f..7d31938b88 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -3716,6 +3716,89 @@ def fake_open(req, timeout=None): assert captured[1].get_header("Authorization") == "Bearer ghp_testtoken" assert captured[1].get_header("Accept") == "application/octet-stream" + def _make_zip_bytes(self): + """Build a minimal valid extension ZIP in memory for download tests.""" + import zipfile + import io + + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + zf.writestr("extension.yml", "id: test-ext\nname: Test\nversion: 1.0.0\n") + return buf.getvalue() + + def _mock_response(self, data): + """Build a context-manager mock HTTP response returning ``data``.""" + from unittest.mock import MagicMock + + resp = MagicMock() + resp.read.return_value = data + # Configure the context-manager protocol explicitly so `with resp` + # yields `resp` itself, independent of how the protocol is invoked. + resp.__enter__.return_value = resp + resp.__exit__.return_value = False + return resp + + def test_download_extension_accepts_matching_sha256(self, temp_dir): + """A catalog ``sha256`` that matches the archive is accepted.""" + import hashlib + from unittest.mock import patch + + catalog = self._make_catalog(temp_dir) + zip_bytes = self._make_zip_bytes() + ext_info = { + "id": "test-ext", + "name": "Test Extension", + "version": "1.0.0", + "download_url": "https://example.com/test-ext.zip", + "sha256": hashlib.sha256(zip_bytes).hexdigest(), + } + + with patch.object(catalog, "get_extension_info", return_value=ext_info), \ + patch.object(catalog, "_open_url", return_value=self._mock_response(zip_bytes)): + zip_path = catalog.download_extension("test-ext", target_dir=temp_dir) + + assert zip_path.read_bytes() == zip_bytes + + def test_download_extension_rejects_sha256_mismatch(self, temp_dir): + """A catalog ``sha256`` that does not match the downloaded archive + aborts the install — a tampered or swapped archive is rejected. + """ + from unittest.mock import patch + + catalog = self._make_catalog(temp_dir) + zip_bytes = self._make_zip_bytes() + ext_info = { + "id": "test-ext", + "name": "Test Extension", + "version": "1.0.0", + "download_url": "https://example.com/test-ext.zip", + "sha256": "0" * 64, # deliberately wrong + } + + with patch.object(catalog, "get_extension_info", return_value=ext_info), \ + patch.object(catalog, "_open_url", return_value=self._mock_response(zip_bytes)): + with pytest.raises(ExtensionError, match="[Ii]ntegrity"): + catalog.download_extension("test-ext", target_dir=temp_dir) + + def test_download_extension_without_sha256_still_succeeds(self, temp_dir): + """Entries without ``sha256`` keep working (backwards compatible).""" + from unittest.mock import patch + + catalog = self._make_catalog(temp_dir) + zip_bytes = self._make_zip_bytes() + ext_info = { + "id": "test-ext", + "name": "Test Extension", + "version": "1.0.0", + "download_url": "https://example.com/test-ext.zip", + } + + with patch.object(catalog, "get_extension_info", return_value=ext_info), \ + patch.object(catalog, "_open_url", return_value=self._mock_response(zip_bytes)): + zip_path = catalog.download_extension("test-ext", target_dir=temp_dir) + + assert zip_path.read_bytes() == zip_bytes + def test_download_extension_accepts_direct_github_rest_asset_url(self, temp_dir, monkeypatch): """download_extension can use a GitHub REST release asset URL directly.""" from unittest.mock import patch, MagicMock diff --git a/tests/test_presets.py b/tests/test_presets.py index 58574bbc9c..f6e66ebab5 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -2019,6 +2019,66 @@ def fake_open(req, timeout=None): assert captured[1].get_header("Authorization") == "Bearer ghp_testtoken" assert captured[1].get_header("Accept") == "application/octet-stream" + def _pack_zip_and_response(self): + """Build a minimal preset ZIP and a context-manager mock response.""" + from unittest.mock import MagicMock + import io + + zip_buf = io.BytesIO() + with zipfile.ZipFile(zip_buf, "w") as zf: + zf.writestr("preset.yml", "id: test-pack\nname: Test\nversion: 1.0.0\n") + zip_bytes = zip_buf.getvalue() + + resp = MagicMock() + resp.read.return_value = zip_bytes + # Configure the context-manager protocol explicitly so `with resp` + # yields `resp` itself, independent of how the protocol is invoked. + resp.__enter__.return_value = resp + resp.__exit__.return_value = False + return zip_bytes, resp + + def test_download_pack_accepts_matching_sha256(self, project_dir): + """A catalog ``sha256`` that matches the preset archive is accepted.""" + import hashlib + from unittest.mock import patch + + catalog = PresetCatalog(project_dir) + zip_bytes, resp = self._pack_zip_and_response() + pack_info = { + "id": "test-pack", + "name": "Test Pack", + "version": "1.0.0", + "download_url": "https://example.com/test-pack.zip", + "sha256": hashlib.sha256(zip_bytes).hexdigest(), + "_install_allowed": True, + } + + with patch.object(catalog, "get_pack_info", return_value=pack_info), \ + patch.object(catalog, "_open_url", return_value=resp): + zip_path = catalog.download_pack("test-pack", target_dir=project_dir) + + assert zip_path.read_bytes() == zip_bytes + + def test_download_pack_rejects_sha256_mismatch(self, project_dir): + """A catalog ``sha256`` that does not match the archive aborts install.""" + from unittest.mock import patch + + catalog = PresetCatalog(project_dir) + _zip_bytes, resp = self._pack_zip_and_response() + pack_info = { + "id": "test-pack", + "name": "Test Pack", + "version": "1.0.0", + "download_url": "https://example.com/test-pack.zip", + "sha256": "0" * 64, # deliberately wrong + "_install_allowed": True, + } + + with patch.object(catalog, "get_pack_info", return_value=pack_info), \ + patch.object(catalog, "_open_url", return_value=resp): + with pytest.raises(PresetError, match="[Ii]ntegrity"): + catalog.download_pack("test-pack", target_dir=project_dir) + def test_download_pack_accepts_direct_github_rest_asset_url(self, project_dir, monkeypatch): """download_pack can use a GitHub REST release asset URL directly.""" from unittest.mock import patch, MagicMock diff --git a/tests/test_shared_infra_integrity.py b/tests/test_shared_infra_integrity.py new file mode 100644 index 0000000000..548d2d5f0b --- /dev/null +++ b/tests/test_shared_infra_integrity.py @@ -0,0 +1,101 @@ +"""Unit tests for the shared archive-integrity helper. + +These exercise ``verify_archive_sha256`` directly (independently of the +extension/preset download paths that call it) so the digest-matching, +mismatch, normalisation and "no digest declared" behaviours are pinned in +one place. +""" + +from __future__ import annotations + +import hashlib +import logging + +import pytest + +from specify_cli.shared_infra import verify_archive_sha256 + + +class _BoomError(Exception): + """Sentinel error type used to assert the helper raises ``error_cls``.""" + + +def test_matching_digest_passes(): + """A digest that matches the data returns without raising.""" + data = b"hello-archive" + digest = hashlib.sha256(data).hexdigest() + verify_archive_sha256(data, digest, "thing", _BoomError) + + +def test_mismatch_raises_error_cls(): + """A non-matching digest raises the caller-supplied error type.""" + with pytest.raises(_BoomError, match="[Ii]ntegrity"): + verify_archive_sha256(b"data", "0" * 64, "thing", _BoomError) + + +def test_sha256_prefix_is_accepted(): + """A ``sha256:`` prefix on the expected digest is tolerated.""" + data = b"prefixed" + digest = hashlib.sha256(data).hexdigest() + verify_archive_sha256(data, f"sha256:{digest}", "thing", _BoomError) + + +def test_comparison_is_case_insensitive(): + """An upper-cased expected digest still matches the lower-case actual.""" + data = b"casing" + digest = hashlib.sha256(data).hexdigest().upper() + verify_archive_sha256(data, digest, "thing", _BoomError) + + +def test_malformed_digest_is_rejected(): + """A declared digest that is not 64 hex chars is rejected up front. + + A too-short, too-long, or non-hex value is an authoring/catalog error and + must surface clearly instead of being treated as a digest that simply does + not match the archive. + """ + for bad in ("deadbeef", "z" * 64, "0" * 63, "0" * 65): + with pytest.raises(_BoomError, match="[Ii]nvalid sha256"): + verify_archive_sha256(b"data", bad, "thing", _BoomError) + + +def test_non_sha256_prefix_is_not_silently_stripped(): + """Only a literal ``sha256:`` prefix is stripped. + + A different algorithm prefix (e.g. ``md5:``) must not be silently dropped + and accepted as if the remaining characters were a valid SHA-256 digest; + the value is rejected as malformed. + """ + data = b"prefixed" + digest = hashlib.sha256(data).hexdigest() + with pytest.raises(_BoomError, match="[Ii]nvalid sha256"): + verify_archive_sha256(data, f"md5:{digest}", "thing", _BoomError) + + +def test_absent_digest_skips_and_logs_debug(caplog): + """When no digest is declared the helper returns and logs at DEBUG. + + Installs stay backwards compatible (no error, no user-facing warning), + but the unverified download leaves an audit trail for operators who opt + into debug logging. + """ + with caplog.at_level(logging.DEBUG, logger="specify_cli.shared_infra"): + verify_archive_sha256(b"data", None, "thing", _BoomError) + assert any( + "not verified" in r.getMessage() and "thing" in r.getMessage() + for r in caplog.records + ) + + +def test_blank_declared_digest_is_rejected(): + """A present-but-empty ``sha256`` is an authoring error, not an opt-out. + + Catalog entries reach the helper via ``...get("sha256")``; a blank value + (``""``, whitespace, or a bare ``sha256:`` prefix) means the digest was + declared but left empty. It must surface as a malformed digest rather than + silently disabling the integrity check, which a bare ``if not expected`` + guard would have done. + """ + for blank in ("", " ", "sha256:"): + with pytest.raises(_BoomError, match="[Ii]nvalid sha256"): + verify_archive_sha256(b"data", blank, "thing", _BoomError)