From 81d2ed206aa55c6f556ab50274f1282e36f741c4 Mon Sep 17 00:00:00 2001 From: lselvar Date: Tue, 23 Jun 2026 17:36:04 -0400 Subject: [PATCH 1/3] fix: resolve GitHub release asset API URL for private repo bundle downloads For private/SSO-protected GitHub repos, browser release download URLs (https://github.com///releases/download//) redirect to an HTML/SSO page instead of delivering the asset, causing bundle manifest downloads to fail. Extends the pattern from #2855 (presets/workflows) to cover the bundle manifest download path in _download_remote_manifest: - Resolves browser release URLs to GitHub REST API asset URLs via resolve_github_release_asset_api_url before downloading - Direct REST API asset URLs (api.github.com/repos/.../releases/assets/) are passed through directly - Both cases use Accept: application/octet-stream so the API returns the binary payload rather than JSON metadata - The original catalog URL is used to determine artifact format (.zip vs YAML) since the resolved API URL does not carry the file extension Adds two CLI-level contract tests: - bundle info resolves browser release URL via GitHub tags API - bundle info passes direct API asset URL through with octet-stream Co-Authored-By: Claude Sonnet 4.6 --- src/specify_cli/commands/bundle/__init__.py | 25 +++- tests/contract/test_bundle_cli.py | 120 ++++++++++++++++++++ 2 files changed, 143 insertions(+), 2 deletions(-) diff --git a/src/specify_cli/commands/bundle/__init__.py b/src/specify_cli/commands/bundle/__init__.py index 185e00acf6..8816f03e3e 100644 --- a/src/specify_cli/commands/bundle/__init__.py +++ b/src/specify_cli/commands/bundle/__init__.py @@ -794,22 +794,43 @@ def _download_remote_manifest(entry_id: str, url: str): import tempfile from ...authentication.http import open_url + from ..._github_http import resolve_github_release_asset_api_url def _validate_redirect(old_url: str, new_url: str) -> None: _require_https(f"bundle '{entry_id}'", new_url) _require_https(f"bundle '{entry_id}'", url) + + # For private/SSO-protected GitHub repos, browser release download URLs + # (https://github.com///releases/download//) + # redirect to an HTML/SSO page instead of delivering the asset. Resolve + # such URLs to the GitHub REST API asset URL so the authenticated client + # can download the actual file. + extra_headers = None + effective_url = url + resolved = resolve_github_release_asset_api_url(url, open_url, timeout=30) + if resolved: + effective_url = resolved + extra_headers = {"Accept": "application/octet-stream"} + try: - with open_url(url, timeout=30, redirect_validator=_validate_redirect) as resp: + with open_url( + effective_url, + timeout=30, + redirect_validator=_validate_redirect, + extra_headers=extra_headers, + ) as resp: _require_https(f"bundle '{entry_id}'", resp.geturl()) raw = resp.read() except BundlerError: raise except Exception as exc: # noqa: BLE001 - raise BundlerError(f"Failed to download bundle '{entry_id}' from {url}: {exc}") from exc + raise BundlerError(f"Failed to download bundle '{entry_id}' from {effective_url}: {exc}") from exc # A .zip artifact is written to a temp file and parsed via the local-source # path (which extracts bundle.yml); any other payload is treated as YAML. + # Use the original catalog URL (``url``) to determine the artifact format + # since the resolved API URL does not carry the file extension. if url.lower().endswith(".zip"): with tempfile.TemporaryDirectory() as tmp: artifact = Path(tmp) / "bundle.zip" diff --git a/tests/contract/test_bundle_cli.py b/tests/contract/test_bundle_cli.py index 018b2bbec1..d5867acba7 100644 --- a/tests/contract/test_bundle_cli.py +++ b/tests/contract/test_bundle_cli.py @@ -8,6 +8,7 @@ import json from pathlib import Path +from unittest.mock import patch import pytest import yaml @@ -389,3 +390,122 @@ def test_install_integration_override_cannot_bypass_clash_guard(project: Path): ) assert result.exit_code == 1 assert "claude" in result.output and "copilot" in result.output + + +# ===== Private GitHub release asset URL resolution ===== + + +class FakeBundleResponse: + """Minimal context-manager response stub for open_url fakes.""" + + def __init__(self, data: bytes, url: str = "https://api.github.com/repos/org/repo/releases/assets/99"): + self._data = data + self._url = url + + def read(self) -> bytes: + return self._data + + def geturl(self) -> str: + return self._url + + def __enter__(self): + return self + + def __exit__(self, *_): + return False + + +def _make_catalog_config(catalog_path: Path, project: Path) -> None: + """Write a bundle-catalogs.yml pointing at *catalog_path* in *project*.""" + config = { + "schema_version": "1.0", + "catalogs": [ + { + "id": "test", + "url": str(catalog_path), + "priority": 1, + "install_policy": "install-allowed", + } + ], + } + (project / ".specify" / "bundle-catalogs.yml").write_text( + yaml.safe_dump(config), encoding="utf-8" + ) + + +def test_bundle_info_resolves_github_browser_release_url(project: Path): + """bundle info resolves a private-repo browser release URL via the GitHub API.""" + browser_url = "https://github.com/org/repo/releases/download/v1.0/bundle.yml" + api_asset_url = "https://api.github.com/repos/org/repo/releases/assets/99" + + captured = [] + manifest_yaml = yaml.safe_dump(valid_manifest_dict()).encode() + + def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None): + captured.append((url, extra_headers)) + if "releases/tags/" in url: + # GitHub API release-tags lookup — return asset list + return FakeBundleResponse( + json.dumps({ + "assets": [{"name": "bundle.yml", "url": api_asset_url}] + }).encode(), + url=url, + ) + # Actual asset download + return FakeBundleResponse(manifest_yaml, url=api_asset_url) + + catalog = project / "catalog.json" + write_catalog_file( + catalog, + {"demo-bundle": catalog_entry_dict("demo-bundle", download_url=browser_url)}, + ) + _make_catalog_config(catalog, project) + + with patch("specify_cli.authentication.http.open_url", side_effect=fake_open_url): + result = runner.invoke(app, ["bundle", "info", "demo-bundle", "--json"]) + + assert result.exit_code == 0, result.output + + # The browser release URL must have been resolved via the GitHub tags API + tag_calls = [url for url, _ in captured if "releases/tags/" in url] + assert len(tag_calls) == 1, f"Expected exactly one tags API call; got {captured}" + assert "releases/tags/v1.0" in tag_calls[0] + + # The actual download must use the resolved API asset URL with octet-stream + asset_calls = [(url, h) for url, h in captured if "releases/assets/" in url] + assert len(asset_calls) >= 1 + assert asset_calls[0][1] == {"Accept": "application/octet-stream"} + + +def test_bundle_info_passes_through_api_asset_url(project: Path): + """bundle info passes a direct GitHub API asset URL through with octet-stream.""" + api_asset_url = "https://api.github.com/repos/org/repo/releases/assets/77" + + captured = [] + manifest_yaml = yaml.safe_dump(valid_manifest_dict()).encode() + + def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None): + captured.append((url, extra_headers)) + return FakeBundleResponse(manifest_yaml, url=api_asset_url) + + catalog = project / "catalog.json" + write_catalog_file( + catalog, + {"demo-bundle": catalog_entry_dict("demo-bundle", download_url=api_asset_url)}, + ) + _make_catalog_config(catalog, project) + + with patch("specify_cli.authentication.http.open_url", side_effect=fake_open_url): + result = runner.invoke(app, ["bundle", "info", "demo-bundle", "--json"]) + + assert result.exit_code == 0, result.output + + # No tags API call — URL was already a REST asset URL + tag_calls = [url for url, _ in captured if "releases/tags/" in url] + assert len(tag_calls) == 0 + + # Exactly one download call to the asset URL with octet-stream + asset_calls = [(url, h) for url, h in captured if "releases/assets/" in url] + assert len(asset_calls) == 1 + assert asset_calls[0][0] == api_asset_url + assert asset_calls[0][1] == {"Accept": "application/octet-stream"} From 29e043f3bcaade657e05ab432b2a4791682d145d Mon Sep 17 00:00:00 2001 From: lselvar Date: Thu, 25 Jun 2026 22:16:53 -0400 Subject: [PATCH 2/3] fix: detect ZIP payload by magic bytes; add zip and API-asset tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Copilot review feedback on PR #3136: 1. Detect ZIP payloads by magic bytes (PK\x03\x04) in addition to the '.zip' URL suffix so that direct GitHub REST asset URLs — which carry no file extension — are correctly routed through the ZIP extraction path when the asset is a ZIP bundle artifact. 2. Add two new contract tests: - test_bundle_info_resolves_github_browser_release_url_zip: exercises the '.zip' browser release URL path end-to-end, verifying the tags API lookup fires, octet-stream header is used, and bundle.yml is successfully extracted from the ZIP payload. - test_bundle_info_api_asset_url_zip_detected_by_magic_bytes: verifies that a direct REST asset URL returning ZIP bytes is detected by magic and parsed correctly without a tags API call. Co-Authored-By: Claude Sonnet 4.6 --- src/specify_cli/commands/bundle/__init__.py | 8 +- tests/contract/test_bundle_cli.py | 99 +++++++++++++++++++++ 2 files changed, 104 insertions(+), 3 deletions(-) diff --git a/src/specify_cli/commands/bundle/__init__.py b/src/specify_cli/commands/bundle/__init__.py index 8816f03e3e..e752daf9b2 100644 --- a/src/specify_cli/commands/bundle/__init__.py +++ b/src/specify_cli/commands/bundle/__init__.py @@ -829,9 +829,11 @@ def _validate_redirect(old_url: str, new_url: str) -> None: # A .zip artifact is written to a temp file and parsed via the local-source # path (which extracts bundle.yml); any other payload is treated as YAML. - # Use the original catalog URL (``url``) to determine the artifact format - # since the resolved API URL does not carry the file extension. - if url.lower().endswith(".zip"): + # Detection uses the original catalog URL's extension when available (browser + # release URLs carry the filename), and falls back to the ZIP magic bytes for + # direct REST API asset URLs which have no file extension in their path. + _ZIP_MAGIC = b"PK\x03\x04" + if url.lower().endswith(".zip") or raw[:4] == _ZIP_MAGIC: with tempfile.TemporaryDirectory() as tmp: artifact = Path(tmp) / "bundle.zip" artifact.write_bytes(raw) diff --git a/tests/contract/test_bundle_cli.py b/tests/contract/test_bundle_cli.py index d5867acba7..c955b45034 100644 --- a/tests/contract/test_bundle_cli.py +++ b/tests/contract/test_bundle_cli.py @@ -509,3 +509,102 @@ def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None assert len(asset_calls) == 1 assert asset_calls[0][0] == api_asset_url assert asset_calls[0][1] == {"Accept": "application/octet-stream"} + + +def test_bundle_info_resolves_github_browser_release_url_zip(project: Path, tmp_path: Path): + """bundle info resolves a browser release URL for a .zip artifact and extracts bundle.yml.""" + import io + import zipfile + + browser_url = "https://github.com/org/repo/releases/download/v2.0/bundle.zip" + api_asset_url = "https://api.github.com/repos/org/repo/releases/assets/88" + + # Build a minimal in-memory ZIP containing bundle.yml + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + zf.writestr("bundle.yml", yaml.safe_dump(valid_manifest_dict())) + zip_bytes = buf.getvalue() + + captured = [] + + def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None): + captured.append((url, extra_headers)) + if "releases/tags/" in url: + return FakeBundleResponse( + json.dumps({ + "assets": [{"name": "bundle.zip", "url": api_asset_url}] + }).encode(), + url=url, + ) + return FakeBundleResponse(zip_bytes, url=api_asset_url) + + catalog = project / "catalog.json" + write_catalog_file( + catalog, + {"demo-bundle": catalog_entry_dict("demo-bundle", download_url=browser_url)}, + ) + _make_catalog_config(catalog, project) + + with patch("specify_cli.authentication.http.open_url", side_effect=fake_open_url): + result = runner.invoke(app, ["bundle", "info", "demo-bundle", "--json"]) + + assert result.exit_code == 0, result.output + + # tags API lookup must have fired + tag_calls = [url for url, _ in captured if "releases/tags/" in url] + assert len(tag_calls) == 1 + assert "releases/tags/v2.0" in tag_calls[0] + + # Asset download uses octet-stream + asset_calls = [(url, h) for url, h in captured if "releases/assets/" in url] + assert len(asset_calls) >= 1 + assert asset_calls[0][1] == {"Accept": "application/octet-stream"} + + # Manifest was successfully parsed from the ZIP + payload = json.loads(result.output) + assert payload["id"] == "demo-bundle" + + +def test_bundle_info_api_asset_url_zip_detected_by_magic_bytes(project: Path): + """bundle info correctly handles a direct API asset URL that serves ZIP bytes.""" + import io + import zipfile + + api_asset_url = "https://api.github.com/repos/org/repo/releases/assets/55" + + # Build a minimal in-memory ZIP containing bundle.yml + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + zf.writestr("bundle.yml", yaml.safe_dump(valid_manifest_dict())) + zip_bytes = buf.getvalue() + + captured = [] + + def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None): + captured.append((url, extra_headers)) + return FakeBundleResponse(zip_bytes, url=api_asset_url) + + catalog = project / "catalog.json" + write_catalog_file( + catalog, + {"demo-bundle": catalog_entry_dict("demo-bundle", download_url=api_asset_url)}, + ) + _make_catalog_config(catalog, project) + + with patch("specify_cli.authentication.http.open_url", side_effect=fake_open_url): + result = runner.invoke(app, ["bundle", "info", "demo-bundle", "--json"]) + + assert result.exit_code == 0, result.output + + # No tags API call — URL was already a REST asset URL + tag_calls = [url for url, _ in captured if "releases/tags/" in url] + assert len(tag_calls) == 0 + + # Download used octet-stream header + asset_calls = [(url, h) for url, h in captured if "releases/assets/" in url] + assert len(asset_calls) == 1 + assert asset_calls[0][1] == {"Accept": "application/octet-stream"} + + # ZIP bytes were detected by magic and bundle.yml extracted correctly + payload = json.loads(result.output) + assert payload["id"] == "demo-bundle" From 684893dd23e749e0ff15b6419b18c1eca9127b06 Mon Sep 17 00:00:00 2001 From: lselvar Date: Fri, 26 Jun 2026 14:57:49 -0400 Subject: [PATCH 3/3] fix: improve error message, broaden ZIP magic, drop unused tmp_path Address second-round Copilot review feedback on PR #3136: - Error message: when the download fails, report the original catalog download_url so the user knows which entry to fix; include the resolved REST API URL when it differs for easier debugging. - ZIP detection: broaden the magic-bytes check from PK\x03\x04 to raw[:2] == b"PK", covering all valid ZIP variants (local-file header PK\x03\x04, empty-archive PK\x05\x06, spanned/split PK\x07\x08). - Tests: remove the unused tmp_path parameter from test_bundle_info_resolves_github_browser_release_url_zip. Co-Authored-By: Claude Sonnet 4.6 --- src/specify_cli/commands/bundle/__init__.py | 16 +++++++++++----- tests/contract/test_bundle_cli.py | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/specify_cli/commands/bundle/__init__.py b/src/specify_cli/commands/bundle/__init__.py index e752daf9b2..fc6c22eebb 100644 --- a/src/specify_cli/commands/bundle/__init__.py +++ b/src/specify_cli/commands/bundle/__init__.py @@ -825,15 +825,21 @@ def _validate_redirect(old_url: str, new_url: str) -> None: except BundlerError: raise except Exception as exc: # noqa: BLE001 - raise BundlerError(f"Failed to download bundle '{entry_id}' from {effective_url}: {exc}") from exc + # Report the original catalog URL so users know which entry to fix, + # and include the resolved URL when it differs for easier debugging. + if effective_url != url: + msg = f"Failed to download bundle '{entry_id}' from {url} (resolved to {effective_url}): {exc}" + else: + msg = f"Failed to download bundle '{entry_id}' from {url}: {exc}" + raise BundlerError(msg) from exc # A .zip artifact is written to a temp file and parsed via the local-source # path (which extracts bundle.yml); any other payload is treated as YAML. # Detection uses the original catalog URL's extension when available (browser - # release URLs carry the filename), and falls back to the ZIP magic bytes for - # direct REST API asset URLs which have no file extension in their path. - _ZIP_MAGIC = b"PK\x03\x04" - if url.lower().endswith(".zip") or raw[:4] == _ZIP_MAGIC: + # release URLs carry the filename), and falls back to the ZIP magic bytes + # (``PK`` prefix) for direct REST API asset URLs which have no file extension. + # All valid ZIP variants start with ``PK`` (local-file, empty-archive, split). + if url.lower().endswith(".zip") or raw[:2] == b"PK": with tempfile.TemporaryDirectory() as tmp: artifact = Path(tmp) / "bundle.zip" artifact.write_bytes(raw) diff --git a/tests/contract/test_bundle_cli.py b/tests/contract/test_bundle_cli.py index c955b45034..83a80496da 100644 --- a/tests/contract/test_bundle_cli.py +++ b/tests/contract/test_bundle_cli.py @@ -511,7 +511,7 @@ def fake_open_url(url, timeout=None, extra_headers=None, redirect_validator=None assert asset_calls[0][1] == {"Accept": "application/octet-stream"} -def test_bundle_info_resolves_github_browser_release_url_zip(project: Path, tmp_path: Path): +def test_bundle_info_resolves_github_browser_release_url_zip(project: Path): """bundle info resolves a browser release URL for a .zip artifact and extracts bundle.yml.""" import io import zipfile