From 3beeb691a4b91794bcbeeb691c7c57e7c3fd41b4 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:04:03 -0500 Subject: [PATCH 01/14] fix: validate command 'file' field against path traversal in registrar CommandRegistrar.register_commands() read each command body from source_dir / cmd_file without validating the manifest 'file' field, unlike the parallel skill and preset readers which already reject absolute paths and '..' traversal. A malicious extension/preset/bundle manifest with file: ../../../etc/passwd (or an absolute path) could read arbitrary host files verbatim into a generated agent command at a predictable path (GHSA-w5fv-7w9x-7fc5, CWE-22). Add the same containment guard at the command read site and reject a traversal/absolute 'file' at manifest-load time in ExtensionManifest._validate() for defense-in-depth, plus regression tests for both the read path and the manifest validator. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/specify_cli/agents.py | 17 ++++++- src/specify_cli/extensions.py | 16 +++++++ tests/test_extensions.py | 20 ++++++++ tests/test_registrar_path_traversal.py | 66 ++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 1 deletion(-) diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index 3c06418014..a85135f7c3 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -546,7 +546,22 @@ def register_commands( aliases = cmd_info.get("aliases", []) cmd_file = cmd_info["file"] - source_file = source_dir / cmd_file + # Guard against path traversal: reject absolute paths and ensure + # the resolved file stays within the source directory. Mirrors the + # containment checks already applied on the skill, preset, and + # restore paths (see extensions.py and presets/__init__.py) so a + # malicious manifest ``file`` field (e.g. ``../../../etc/passwd``) + # cannot read arbitrary host files into a generated command. + cmd_path = Path(cmd_file) + if cmd_path.is_absolute(): + continue + try: + source_root = source_dir.resolve() + source_file = (source_root / cmd_path).resolve() + source_file.relative_to(source_root) # raises ValueError if outside + except (OSError, ValueError): + continue + if not source_file.exists(): continue diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 42ba2fe888..8cf0bca37b 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -290,6 +290,22 @@ def _validate(self): if "name" not in cmd or "file" not in cmd: raise ValidationError("Command missing 'name' or 'file'") + # Validate the 'file' field for path traversal at manifest-load + # time. This is defense-in-depth: the command/skill/preset readers + # also contain the resolved path, but rejecting a traversal here + # surfaces a clear error instead of silently skipping the command. + cmd_file = cmd["file"] + if not isinstance(cmd_file, str) or not cmd_file: + raise ValidationError( + f"Command 'file' for '{cmd.get('name')}' must be a non-empty string" + ) + cmd_file_path = Path(cmd_file) + if cmd_file_path.is_absolute() or ".." in cmd_file_path.parts: + raise ValidationError( + f"Invalid command 'file' '{cmd_file}': must be a relative path " + "within the extension directory (no absolute paths or '..' segments)" + ) + # Validate command name format if not EXTENSION_COMMAND_NAME_PATTERN.match(cmd["name"]): corrected = self._try_correct_command_name(cmd["name"], ext["id"]) diff --git a/tests/test_extensions.py b/tests/test_extensions.py index e063571b14..5c885c3a3f 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -377,6 +377,26 @@ def test_invalid_command_name(self, temp_dir, valid_manifest_data): with pytest.raises(ValidationError, match="Invalid command name"): ExtensionManifest(manifest_path) + @pytest.mark.parametrize( + "bad_file", + ["../../../etc/passwd", "../escape.md", "a/../../escape.md", "/etc/passwd"], + ) + def test_command_file_traversal_rejected(self, temp_dir, valid_manifest_data, bad_file): + """Manifest 'file' field with traversal/absolute path raises ValidationError. + + Defense-in-depth for GHSA-w5fv-7w9x-7fc5. + """ + import yaml + + valid_manifest_data["provides"]["commands"][0]["file"] = bad_file + + manifest_path = temp_dir / "extension.yml" + with open(manifest_path, 'w') as f: + yaml.dump(valid_manifest_data, f) + + with pytest.raises(ValidationError, match="Invalid command 'file'"): + ExtensionManifest(manifest_path) + def test_command_name_autocorrect_speckit_prefix(self, temp_dir, valid_manifest_data): """Test that 'speckit.command' is auto-corrected to 'speckit.{ext_id}.command'.""" import yaml diff --git a/tests/test_registrar_path_traversal.py b/tests/test_registrar_path_traversal.py index fc423b4056..3de794743e 100644 --- a/tests/test_registrar_path_traversal.py +++ b/tests/test_registrar_path_traversal.py @@ -135,6 +135,72 @@ def test_rejects_traversal_names(self, tmp_path, bad_name): _assert_no_stray_files(tmp_path, Path(bad_name).name.replace("/", "")) +FILE_FIELD_PAYLOADS = [ + "../secret.txt", + "../../secret.txt", + "commands/../../secret.txt", + "/etc/passwd", +] + + +class TestCommandFileTraversal: + """The manifest ``file`` field must not read host files outside source_dir. + + Regression for GHSA-w5fv-7w9x-7fc5: ``register_commands`` read + ``source_dir / cmd_file`` with no containment check, so a manifest with + ``file: ../../../etc/passwd`` (or an absolute path) leaked arbitrary host + files verbatim into the generated agent command. + """ + + @pytest.mark.parametrize("bad_file", FILE_FIELD_PAYLOADS) + def test_claude_skips_traversal_in_file_field(self, tmp_path, bad_file): + project, ext_dir = _project_and_source(tmp_path) + (project / ".claude" / "skills").mkdir(parents=True) + + secret = tmp_path / "secret.txt" + secret.write_text("TOP-SECRET-CREDENTIAL", encoding="utf-8") + + registrar = CommandRegistrar() + registered = registrar.register_commands( + "claude", + [{"name": "speckit.myext.hello", "file": bad_file, "aliases": []}], + "myext", + ext_dir, + project, + ) + + assert registered == [] + leaked = [ + p for p in (project).rglob("*") + if p.is_file() and "TOP-SECRET-CREDENTIAL" in p.read_text(encoding="utf-8", errors="ignore") + ] + assert leaked == [], f"Secret leaked into generated command: {leaked}" + + @pytest.mark.parametrize("bad_file", FILE_FIELD_PAYLOADS) + def test_gemini_skips_traversal_in_file_field(self, tmp_path, bad_file): + project, ext_dir = _project_and_source(tmp_path) + (project / ".gemini" / "commands").mkdir(parents=True) + + secret = tmp_path / "secret.txt" + secret.write_text("TOP-SECRET-CREDENTIAL", encoding="utf-8") + + registrar = CommandRegistrar() + registered = registrar.register_commands( + "gemini", + [{"name": "speckit.myext.hello", "file": bad_file, "aliases": []}], + "myext", + ext_dir, + project, + ) + + assert registered == [] + leaked = [ + p for p in (project).rglob("*") + if p.is_file() and "TOP-SECRET-CREDENTIAL" in p.read_text(encoding="utf-8", errors="ignore") + ] + assert leaked == [], f"Secret leaked into generated command: {leaked}" + + class TestSafeRegistration: """Positive regression — well-formed names continue to register.""" From b2e71f5b091ecd411fe2d59d00dead13e9c2f434 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:23:31 -0500 Subject: [PATCH 02/14] =?UTF-8?q?test/fix:=20address=20review=20=E2=80=94?= =?UTF-8?q?=20robust=20absolute-path=20test=20and=20tolerant=20reads?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - register_commands(): use is_file() instead of exists() and skip the command if read_text() raises (directory or non-UTF8 file), aligning with the other command/skill readers. - Traversal tests: point the absolute-path payload at the real temp secret.txt (guaranteed to exist on all platforms) instead of /etc/passwd, so the absolute-path guard is genuinely exercised and the test fails if it regresses, rather than passing because the target happens not to exist (e.g. on Windows runners). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/specify_cli/agents.py | 7 +++++-- tests/test_registrar_path_traversal.py | 19 ++++++++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index a85135f7c3..67972080fe 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -562,10 +562,13 @@ def register_commands( except (OSError, ValueError): continue - if not source_file.exists(): + if not source_file.is_file(): continue - content = source_file.read_text(encoding="utf-8") + try: + content = source_file.read_text(encoding="utf-8") + except (OSError, UnicodeDecodeError): + continue frontmatter, body = self.parse_frontmatter(content) if frontmatter.get("strategy") == "wrap": diff --git a/tests/test_registrar_path_traversal.py b/tests/test_registrar_path_traversal.py index 3de794743e..9a9bae3aea 100644 --- a/tests/test_registrar_path_traversal.py +++ b/tests/test_registrar_path_traversal.py @@ -135,14 +135,27 @@ def test_rejects_traversal_names(self, tmp_path, bad_name): _assert_no_stray_files(tmp_path, Path(bad_name).name.replace("/", "")) +ABS_SECRET = "__ABS_SECRET__" + FILE_FIELD_PAYLOADS = [ "../secret.txt", "../../secret.txt", "commands/../../secret.txt", - "/etc/passwd", + ABS_SECRET, ] +def _resolve_payload(bad_file: str, secret: Path) -> str: + """Map the absolute-path sentinel to the real, existing secret file. + + Using the temp secret's own absolute path (instead of ``/etc/passwd``) + guarantees the file exists on every platform — so the test fails if the + absolute-path guard regresses, rather than passing because the target + happens not to exist (e.g. on Windows runners). + """ + return str(secret) if bad_file == ABS_SECRET else bad_file + + class TestCommandFileTraversal: """The manifest ``file`` field must not read host files outside source_dir. @@ -163,7 +176,7 @@ def test_claude_skips_traversal_in_file_field(self, tmp_path, bad_file): registrar = CommandRegistrar() registered = registrar.register_commands( "claude", - [{"name": "speckit.myext.hello", "file": bad_file, "aliases": []}], + [{"name": "speckit.myext.hello", "file": _resolve_payload(bad_file, secret), "aliases": []}], "myext", ext_dir, project, @@ -187,7 +200,7 @@ def test_gemini_skips_traversal_in_file_field(self, tmp_path, bad_file): registrar = CommandRegistrar() registered = registrar.register_commands( "gemini", - [{"name": "speckit.myext.hello", "file": bad_file, "aliases": []}], + [{"name": "speckit.myext.hello", "file": _resolve_payload(bad_file, secret), "aliases": []}], "myext", ext_dir, project, From 639150f79e251da84763f641b4c1bcbbaa0b63c4 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:28:45 -0500 Subject: [PATCH 03/14] test: rename traversal fixtures to avoid CodeQL secret-storage false positive The regression fixtures named an out-of-tree file secret.txt with TOP-SECRET-CREDENTIAL content. CodeQL's clear-text-storage heuristic treated that read content as sensitive and followed the static path into the pre-existing write_text sinks in _write_registered_output, raising false 'clear-text storage of sensitive information' alerts on PR 3088. Rename the fixtures to neutral outside.txt / OUTSIDE-FILE-MARKER and drop /etc/passwd payloads; the test semantics (a file outside source_dir must never be read into a generated command) are unchanged. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/specify_cli/agents.py | 2 +- tests/test_extensions.py | 2 +- tests/test_registrar_path_traversal.py | 44 +++++++++++++------------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index 67972080fe..b6484ab278 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -550,7 +550,7 @@ def register_commands( # the resolved file stays within the source directory. Mirrors the # containment checks already applied on the skill, preset, and # restore paths (see extensions.py and presets/__init__.py) so a - # malicious manifest ``file`` field (e.g. ``../../../etc/passwd``) + # malicious manifest ``file`` field (e.g. ``../../../outside.txt``) # cannot read arbitrary host files into a generated command. cmd_path = Path(cmd_file) if cmd_path.is_absolute(): diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 5c885c3a3f..101dfc6f61 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -379,7 +379,7 @@ def test_invalid_command_name(self, temp_dir, valid_manifest_data): @pytest.mark.parametrize( "bad_file", - ["../../../etc/passwd", "../escape.md", "a/../../escape.md", "/etc/passwd"], + ["../../../outside.md", "../escape.md", "a/../../escape.md", "/abs/outside.md"], ) def test_command_file_traversal_rejected(self, temp_dir, valid_manifest_data, bad_file): """Manifest 'file' field with traversal/absolute path raises ValidationError. diff --git a/tests/test_registrar_path_traversal.py b/tests/test_registrar_path_traversal.py index 9a9bae3aea..777e8ea899 100644 --- a/tests/test_registrar_path_traversal.py +++ b/tests/test_registrar_path_traversal.py @@ -135,34 +135,34 @@ def test_rejects_traversal_names(self, tmp_path, bad_name): _assert_no_stray_files(tmp_path, Path(bad_name).name.replace("/", "")) -ABS_SECRET = "__ABS_SECRET__" +ABS_OUTSIDE = "__ABS_OUTSIDE__" FILE_FIELD_PAYLOADS = [ - "../secret.txt", - "../../secret.txt", - "commands/../../secret.txt", - ABS_SECRET, + "../outside.txt", + "../../outside.txt", + "commands/../../outside.txt", + ABS_OUTSIDE, ] -def _resolve_payload(bad_file: str, secret: Path) -> str: - """Map the absolute-path sentinel to the real, existing secret file. +def _resolve_payload(bad_file: str, outside_file: Path) -> str: + """Map the absolute-path sentinel to the real, existing outside file. - Using the temp secret's own absolute path (instead of ``/etc/passwd``) + Using the temp file's own absolute path (instead of ``/etc/passwd``) guarantees the file exists on every platform — so the test fails if the absolute-path guard regresses, rather than passing because the target happens not to exist (e.g. on Windows runners). """ - return str(secret) if bad_file == ABS_SECRET else bad_file + return str(outside_file) if bad_file == ABS_OUTSIDE else bad_file class TestCommandFileTraversal: - """The manifest ``file`` field must not read host files outside source_dir. + """The manifest ``file`` field must not read files outside source_dir. Regression for GHSA-w5fv-7w9x-7fc5: ``register_commands`` read ``source_dir / cmd_file`` with no containment check, so a manifest with - ``file: ../../../etc/passwd`` (or an absolute path) leaked arbitrary host - files verbatim into the generated agent command. + a traversal (``file: ../../../outside.txt``) or an absolute path read an + arbitrary host file verbatim into the generated agent command. """ @pytest.mark.parametrize("bad_file", FILE_FIELD_PAYLOADS) @@ -170,13 +170,13 @@ def test_claude_skips_traversal_in_file_field(self, tmp_path, bad_file): project, ext_dir = _project_and_source(tmp_path) (project / ".claude" / "skills").mkdir(parents=True) - secret = tmp_path / "secret.txt" - secret.write_text("TOP-SECRET-CREDENTIAL", encoding="utf-8") + outside_file = tmp_path / "outside.txt" + outside_file.write_text("OUTSIDE-FILE-MARKER", encoding="utf-8") registrar = CommandRegistrar() registered = registrar.register_commands( "claude", - [{"name": "speckit.myext.hello", "file": _resolve_payload(bad_file, secret), "aliases": []}], + [{"name": "speckit.myext.hello", "file": _resolve_payload(bad_file, outside_file), "aliases": []}], "myext", ext_dir, project, @@ -185,22 +185,22 @@ def test_claude_skips_traversal_in_file_field(self, tmp_path, bad_file): assert registered == [] leaked = [ p for p in (project).rglob("*") - if p.is_file() and "TOP-SECRET-CREDENTIAL" in p.read_text(encoding="utf-8", errors="ignore") + if p.is_file() and "OUTSIDE-FILE-MARKER" in p.read_text(encoding="utf-8", errors="ignore") ] - assert leaked == [], f"Secret leaked into generated command: {leaked}" + assert leaked == [], f"Outside file leaked into generated command: {leaked}" @pytest.mark.parametrize("bad_file", FILE_FIELD_PAYLOADS) def test_gemini_skips_traversal_in_file_field(self, tmp_path, bad_file): project, ext_dir = _project_and_source(tmp_path) (project / ".gemini" / "commands").mkdir(parents=True) - secret = tmp_path / "secret.txt" - secret.write_text("TOP-SECRET-CREDENTIAL", encoding="utf-8") + outside_file = tmp_path / "outside.txt" + outside_file.write_text("OUTSIDE-FILE-MARKER", encoding="utf-8") registrar = CommandRegistrar() registered = registrar.register_commands( "gemini", - [{"name": "speckit.myext.hello", "file": _resolve_payload(bad_file, secret), "aliases": []}], + [{"name": "speckit.myext.hello", "file": _resolve_payload(bad_file, outside_file), "aliases": []}], "myext", ext_dir, project, @@ -209,9 +209,9 @@ def test_gemini_skips_traversal_in_file_field(self, tmp_path, bad_file): assert registered == [] leaked = [ p for p in (project).rglob("*") - if p.is_file() and "TOP-SECRET-CREDENTIAL" in p.read_text(encoding="utf-8", errors="ignore") + if p.is_file() and "OUTSIDE-FILE-MARKER" in p.read_text(encoding="utf-8", errors="ignore") ] - assert leaked == [], f"Secret leaked into generated command: {leaked}" + assert leaked == [], f"Outside file leaked into generated command: {leaked}" class TestSafeRegistration: From 3d5f039084afdea4dd3931e48cc491f0fb64ee2a Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:35:16 -0500 Subject: [PATCH 04/14] fix: reject Windows drive-relative 'file' values in traversal guards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit is_absolute() is False for Windows drive-relative paths like C:outside.txt, which contain no '..' yet resolve against the process CWD on that drive — bypassing the containment guard on Windows. Evaluate the 'file' value under PureWindowsPath as well so both the registrar runtime guard and the manifest-load validator reject drive letters (and backslash '..' segments) cross-platform. Extend the regression tests with drive-relative cases. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/specify_cli/agents.py | 20 ++++++++++++-------- src/specify_cli/extensions.py | 21 +++++++++++++++++---- tests/test_extensions.py | 2 +- tests/test_registrar_path_traversal.py | 1 + 4 files changed, 31 insertions(+), 13 deletions(-) diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index b6484ab278..4366cce2a7 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -10,7 +10,7 @@ import platform import re from copy import deepcopy -from pathlib import Path +from pathlib import Path, PureWindowsPath from typing import Any, Dict, List, Optional import yaml @@ -546,14 +546,18 @@ def register_commands( aliases = cmd_info.get("aliases", []) cmd_file = cmd_info["file"] - # Guard against path traversal: reject absolute paths and ensure - # the resolved file stays within the source directory. Mirrors the - # containment checks already applied on the skill, preset, and - # restore paths (see extensions.py and presets/__init__.py) so a - # malicious manifest ``file`` field (e.g. ``../../../outside.txt``) - # cannot read arbitrary host files into a generated command. + # Guard against path traversal: reject absolute paths (including + # Windows drive-relative paths like ``C:foo``, which are not + # ``is_absolute()`` but resolve against the CWD on their drive) and + # ensure the resolved file stays within the source directory. + # Mirrors the containment checks already applied on the skill, + # preset, and restore paths (see extensions.py and + # presets/__init__.py) so a malicious manifest ``file`` field + # (e.g. ``../../../outside.txt``) cannot read arbitrary host files + # into a generated command. cmd_path = Path(cmd_file) - if cmd_path.is_absolute(): + win_path = PureWindowsPath(cmd_file) + if cmd_path.is_absolute() or win_path.is_absolute() or win_path.drive: continue try: source_root = source_dir.resolve() diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 8cf0bca37b..1cb741e755 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -18,7 +18,7 @@ import zipfile from dataclasses import dataclass from datetime import datetime, timezone -from pathlib import Path +from pathlib import Path, PureWindowsPath from typing import Any, Callable, Dict, List, Optional, Set import pathspec @@ -299,11 +299,24 @@ def _validate(self): raise ValidationError( f"Command 'file' for '{cmd.get('name')}' must be a non-empty string" ) - cmd_file_path = Path(cmd_file) - if cmd_file_path.is_absolute() or ".." in cmd_file_path.parts: + # Evaluate the value under both POSIX and Windows path semantics so + # the check is platform-independent: reject absolute paths, Windows + # drive-relative paths (e.g. ``C:foo``, which are not is_absolute() + # but resolve against the CWD on their drive), and ``..`` segments + # written with either separator. + posix_path = Path(cmd_file) + win_path = PureWindowsPath(cmd_file) + if ( + posix_path.is_absolute() + or win_path.is_absolute() + or win_path.drive + or ".." in posix_path.parts + or ".." in win_path.parts + ): raise ValidationError( f"Invalid command 'file' '{cmd_file}': must be a relative path " - "within the extension directory (no absolute paths or '..' segments)" + "within the extension directory (no absolute paths, drive " + "letters, or '..' segments)" ) # Validate command name format diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 101dfc6f61..43a7050adf 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -379,7 +379,7 @@ def test_invalid_command_name(self, temp_dir, valid_manifest_data): @pytest.mark.parametrize( "bad_file", - ["../../../outside.md", "../escape.md", "a/../../escape.md", "/abs/outside.md"], + ["../../../outside.md", "../escape.md", "a/../../escape.md", "/abs/outside.md", "C:escape.md", "C:\\Windows\\x.md", "..\\..\\escape.md"], ) def test_command_file_traversal_rejected(self, temp_dir, valid_manifest_data, bad_file): """Manifest 'file' field with traversal/absolute path raises ValidationError. diff --git a/tests/test_registrar_path_traversal.py b/tests/test_registrar_path_traversal.py index 777e8ea899..7d1fd54e1d 100644 --- a/tests/test_registrar_path_traversal.py +++ b/tests/test_registrar_path_traversal.py @@ -141,6 +141,7 @@ def test_rejects_traversal_names(self, tmp_path, bad_name): "../outside.txt", "../../outside.txt", "commands/../../outside.txt", + "C:outside.txt", ABS_OUTSIDE, ] From 8041cddb3a2039dd4d81229e0888e51d6813487c Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:44:09 -0500 Subject: [PATCH 05/14] fix: use anchor under both path flavors so POSIX-absolute is rejected on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a Windows runner WindowsPath('/abs/outside.md').is_absolute() is False (no drive), so the prior native-Path check let a leading-slash 'file' value through and the manifest validator did not raise. Evaluate the value under both PurePosixPath and PureWindowsPath and reject any non-empty anchor — covering POSIX-absolute, Windows drive-relative, Windows absolute, and rooted-without-drive — in both the registrar guard and the manifest validator. The registrar join now uses the raw 'file' string so native separators are handled by the resolve()/relative_to() containment check. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/specify_cli/agents.py | 27 +++++++++++++-------------- src/specify_cli/extensions.py | 19 ++++++++++--------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index 4366cce2a7..c211a69d6c 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -10,7 +10,7 @@ import platform import re from copy import deepcopy -from pathlib import Path, PureWindowsPath +from pathlib import Path, PurePosixPath, PureWindowsPath from typing import Any, Dict, List, Optional import yaml @@ -546,22 +546,21 @@ def register_commands( aliases = cmd_info.get("aliases", []) cmd_file = cmd_info["file"] - # Guard against path traversal: reject absolute paths (including - # Windows drive-relative paths like ``C:foo``, which are not - # ``is_absolute()`` but resolve against the CWD on their drive) and - # ensure the resolved file stays within the source directory. - # Mirrors the containment checks already applied on the skill, - # preset, and restore paths (see extensions.py and - # presets/__init__.py) so a malicious manifest ``file`` field - # (e.g. ``../../../outside.txt``) cannot read arbitrary host files - # into a generated command. - cmd_path = Path(cmd_file) - win_path = PureWindowsPath(cmd_file) - if cmd_path.is_absolute() or win_path.is_absolute() or win_path.drive: + # Guard against path traversal: reject any anchored value under + # either POSIX or Windows semantics — POSIX-absolute (``/abs``), + # Windows drive-relative (``C:foo``, not ``is_absolute()`` but + # resolved against the CWD on its drive), Windows absolute + # (``C:\foo``), and rooted-without-drive (``\foo``) — then ensure + # the resolved file stays within the source directory. Mirrors the + # containment checks already applied on the skill, preset, and + # restore paths (see extensions.py and presets/__init__.py) so a + # malicious manifest ``file`` field (e.g. ``../../../outside.txt``) + # cannot read arbitrary host files into a generated command. + if PurePosixPath(cmd_file).anchor or PureWindowsPath(cmd_file).anchor: continue try: source_root = source_dir.resolve() - source_file = (source_root / cmd_path).resolve() + source_file = (source_root / cmd_file).resolve() source_file.relative_to(source_root) # raises ValueError if outside except (OSError, ValueError): continue diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 1cb741e755..34a4774a85 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -18,7 +18,7 @@ import zipfile from dataclasses import dataclass from datetime import datetime, timezone -from pathlib import Path, PureWindowsPath +from pathlib import Path, PurePosixPath, PureWindowsPath from typing import Any, Callable, Dict, List, Optional, Set import pathspec @@ -300,16 +300,17 @@ def _validate(self): f"Command 'file' for '{cmd.get('name')}' must be a non-empty string" ) # Evaluate the value under both POSIX and Windows path semantics so - # the check is platform-independent: reject absolute paths, Windows - # drive-relative paths (e.g. ``C:foo``, which are not is_absolute() - # but resolve against the CWD on their drive), and ``..`` segments - # written with either separator. - posix_path = Path(cmd_file) + # the check is platform-independent. Reject any non-empty anchor — + # which covers POSIX-absolute (``/abs``), Windows drive-relative + # (``C:foo``), Windows absolute (``C:\foo``), and rooted-without- + # drive (``\foo``) — plus ``..`` segments written with either + # separator. Native ``Path`` alone is insufficient: on Windows + # ``WindowsPath('/abs').is_absolute()`` is False (no drive). + posix_path = PurePosixPath(cmd_file) win_path = PureWindowsPath(cmd_file) if ( - posix_path.is_absolute() - or win_path.is_absolute() - or win_path.drive + posix_path.anchor + or win_path.anchor or ".." in posix_path.parts or ".." in win_path.parts ): From 364535c86909dbfce34923bac16c5123af424892 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:04:03 -0500 Subject: [PATCH 06/14] fix: validate command 'file' field against path traversal in registrar CommandRegistrar.register_commands() read each command body from source_dir / cmd_file without validating the manifest 'file' field, unlike the parallel skill and preset readers which already reject absolute paths and '..' traversal. A malicious extension/preset/bundle manifest with file: ../../../etc/passwd (or an absolute path) could read arbitrary host files verbatim into a generated agent command at a predictable path (GHSA-w5fv-7w9x-7fc5, CWE-22). Add the same containment guard at the command read site and reject a traversal/absolute 'file' at manifest-load time in ExtensionManifest._validate() for defense-in-depth, plus regression tests for both the read path and the manifest validator. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/specify_cli/agents.py | 17 ++++++- src/specify_cli/extensions.py | 16 +++++++ tests/test_extensions.py | 20 ++++++++ tests/test_registrar_path_traversal.py | 66 ++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 1 deletion(-) diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index 3c06418014..a85135f7c3 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -546,7 +546,22 @@ def register_commands( aliases = cmd_info.get("aliases", []) cmd_file = cmd_info["file"] - source_file = source_dir / cmd_file + # Guard against path traversal: reject absolute paths and ensure + # the resolved file stays within the source directory. Mirrors the + # containment checks already applied on the skill, preset, and + # restore paths (see extensions.py and presets/__init__.py) so a + # malicious manifest ``file`` field (e.g. ``../../../etc/passwd``) + # cannot read arbitrary host files into a generated command. + cmd_path = Path(cmd_file) + if cmd_path.is_absolute(): + continue + try: + source_root = source_dir.resolve() + source_file = (source_root / cmd_path).resolve() + source_file.relative_to(source_root) # raises ValueError if outside + except (OSError, ValueError): + continue + if not source_file.exists(): continue diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 42ba2fe888..8cf0bca37b 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -290,6 +290,22 @@ def _validate(self): if "name" not in cmd or "file" not in cmd: raise ValidationError("Command missing 'name' or 'file'") + # Validate the 'file' field for path traversal at manifest-load + # time. This is defense-in-depth: the command/skill/preset readers + # also contain the resolved path, but rejecting a traversal here + # surfaces a clear error instead of silently skipping the command. + cmd_file = cmd["file"] + if not isinstance(cmd_file, str) or not cmd_file: + raise ValidationError( + f"Command 'file' for '{cmd.get('name')}' must be a non-empty string" + ) + cmd_file_path = Path(cmd_file) + if cmd_file_path.is_absolute() or ".." in cmd_file_path.parts: + raise ValidationError( + f"Invalid command 'file' '{cmd_file}': must be a relative path " + "within the extension directory (no absolute paths or '..' segments)" + ) + # Validate command name format if not EXTENSION_COMMAND_NAME_PATTERN.match(cmd["name"]): corrected = self._try_correct_command_name(cmd["name"], ext["id"]) diff --git a/tests/test_extensions.py b/tests/test_extensions.py index e063571b14..5c885c3a3f 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -377,6 +377,26 @@ def test_invalid_command_name(self, temp_dir, valid_manifest_data): with pytest.raises(ValidationError, match="Invalid command name"): ExtensionManifest(manifest_path) + @pytest.mark.parametrize( + "bad_file", + ["../../../etc/passwd", "../escape.md", "a/../../escape.md", "/etc/passwd"], + ) + def test_command_file_traversal_rejected(self, temp_dir, valid_manifest_data, bad_file): + """Manifest 'file' field with traversal/absolute path raises ValidationError. + + Defense-in-depth for GHSA-w5fv-7w9x-7fc5. + """ + import yaml + + valid_manifest_data["provides"]["commands"][0]["file"] = bad_file + + manifest_path = temp_dir / "extension.yml" + with open(manifest_path, 'w') as f: + yaml.dump(valid_manifest_data, f) + + with pytest.raises(ValidationError, match="Invalid command 'file'"): + ExtensionManifest(manifest_path) + def test_command_name_autocorrect_speckit_prefix(self, temp_dir, valid_manifest_data): """Test that 'speckit.command' is auto-corrected to 'speckit.{ext_id}.command'.""" import yaml diff --git a/tests/test_registrar_path_traversal.py b/tests/test_registrar_path_traversal.py index fc423b4056..3de794743e 100644 --- a/tests/test_registrar_path_traversal.py +++ b/tests/test_registrar_path_traversal.py @@ -135,6 +135,72 @@ def test_rejects_traversal_names(self, tmp_path, bad_name): _assert_no_stray_files(tmp_path, Path(bad_name).name.replace("/", "")) +FILE_FIELD_PAYLOADS = [ + "../secret.txt", + "../../secret.txt", + "commands/../../secret.txt", + "/etc/passwd", +] + + +class TestCommandFileTraversal: + """The manifest ``file`` field must not read host files outside source_dir. + + Regression for GHSA-w5fv-7w9x-7fc5: ``register_commands`` read + ``source_dir / cmd_file`` with no containment check, so a manifest with + ``file: ../../../etc/passwd`` (or an absolute path) leaked arbitrary host + files verbatim into the generated agent command. + """ + + @pytest.mark.parametrize("bad_file", FILE_FIELD_PAYLOADS) + def test_claude_skips_traversal_in_file_field(self, tmp_path, bad_file): + project, ext_dir = _project_and_source(tmp_path) + (project / ".claude" / "skills").mkdir(parents=True) + + secret = tmp_path / "secret.txt" + secret.write_text("TOP-SECRET-CREDENTIAL", encoding="utf-8") + + registrar = CommandRegistrar() + registered = registrar.register_commands( + "claude", + [{"name": "speckit.myext.hello", "file": bad_file, "aliases": []}], + "myext", + ext_dir, + project, + ) + + assert registered == [] + leaked = [ + p for p in (project).rglob("*") + if p.is_file() and "TOP-SECRET-CREDENTIAL" in p.read_text(encoding="utf-8", errors="ignore") + ] + assert leaked == [], f"Secret leaked into generated command: {leaked}" + + @pytest.mark.parametrize("bad_file", FILE_FIELD_PAYLOADS) + def test_gemini_skips_traversal_in_file_field(self, tmp_path, bad_file): + project, ext_dir = _project_and_source(tmp_path) + (project / ".gemini" / "commands").mkdir(parents=True) + + secret = tmp_path / "secret.txt" + secret.write_text("TOP-SECRET-CREDENTIAL", encoding="utf-8") + + registrar = CommandRegistrar() + registered = registrar.register_commands( + "gemini", + [{"name": "speckit.myext.hello", "file": bad_file, "aliases": []}], + "myext", + ext_dir, + project, + ) + + assert registered == [] + leaked = [ + p for p in (project).rglob("*") + if p.is_file() and "TOP-SECRET-CREDENTIAL" in p.read_text(encoding="utf-8", errors="ignore") + ] + assert leaked == [], f"Secret leaked into generated command: {leaked}" + + class TestSafeRegistration: """Positive regression — well-formed names continue to register.""" From e6acae8b2c03f61e706156716cf6ba697cfe6f0a Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:23:31 -0500 Subject: [PATCH 07/14] =?UTF-8?q?test/fix:=20address=20review=20=E2=80=94?= =?UTF-8?q?=20robust=20absolute-path=20test=20and=20tolerant=20reads?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - register_commands(): use is_file() instead of exists() and skip the command if read_text() raises (directory or non-UTF8 file), aligning with the other command/skill readers. - Traversal tests: point the absolute-path payload at the real temp secret.txt (guaranteed to exist on all platforms) instead of /etc/passwd, so the absolute-path guard is genuinely exercised and the test fails if it regresses, rather than passing because the target happens not to exist (e.g. on Windows runners). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/specify_cli/agents.py | 7 +++++-- tests/test_registrar_path_traversal.py | 19 ++++++++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index a85135f7c3..67972080fe 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -562,10 +562,13 @@ def register_commands( except (OSError, ValueError): continue - if not source_file.exists(): + if not source_file.is_file(): continue - content = source_file.read_text(encoding="utf-8") + try: + content = source_file.read_text(encoding="utf-8") + except (OSError, UnicodeDecodeError): + continue frontmatter, body = self.parse_frontmatter(content) if frontmatter.get("strategy") == "wrap": diff --git a/tests/test_registrar_path_traversal.py b/tests/test_registrar_path_traversal.py index 3de794743e..9a9bae3aea 100644 --- a/tests/test_registrar_path_traversal.py +++ b/tests/test_registrar_path_traversal.py @@ -135,14 +135,27 @@ def test_rejects_traversal_names(self, tmp_path, bad_name): _assert_no_stray_files(tmp_path, Path(bad_name).name.replace("/", "")) +ABS_SECRET = "__ABS_SECRET__" + FILE_FIELD_PAYLOADS = [ "../secret.txt", "../../secret.txt", "commands/../../secret.txt", - "/etc/passwd", + ABS_SECRET, ] +def _resolve_payload(bad_file: str, secret: Path) -> str: + """Map the absolute-path sentinel to the real, existing secret file. + + Using the temp secret's own absolute path (instead of ``/etc/passwd``) + guarantees the file exists on every platform — so the test fails if the + absolute-path guard regresses, rather than passing because the target + happens not to exist (e.g. on Windows runners). + """ + return str(secret) if bad_file == ABS_SECRET else bad_file + + class TestCommandFileTraversal: """The manifest ``file`` field must not read host files outside source_dir. @@ -163,7 +176,7 @@ def test_claude_skips_traversal_in_file_field(self, tmp_path, bad_file): registrar = CommandRegistrar() registered = registrar.register_commands( "claude", - [{"name": "speckit.myext.hello", "file": bad_file, "aliases": []}], + [{"name": "speckit.myext.hello", "file": _resolve_payload(bad_file, secret), "aliases": []}], "myext", ext_dir, project, @@ -187,7 +200,7 @@ def test_gemini_skips_traversal_in_file_field(self, tmp_path, bad_file): registrar = CommandRegistrar() registered = registrar.register_commands( "gemini", - [{"name": "speckit.myext.hello", "file": bad_file, "aliases": []}], + [{"name": "speckit.myext.hello", "file": _resolve_payload(bad_file, secret), "aliases": []}], "myext", ext_dir, project, From 1e9956b69de5060bfbb13220dd4d0da5bb22e0e7 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:28:45 -0500 Subject: [PATCH 08/14] test: rename traversal fixtures to avoid CodeQL secret-storage false positive The regression fixtures named an out-of-tree file secret.txt with TOP-SECRET-CREDENTIAL content. CodeQL's clear-text-storage heuristic treated that read content as sensitive and followed the static path into the pre-existing write_text sinks in _write_registered_output, raising false 'clear-text storage of sensitive information' alerts on PR 3088. Rename the fixtures to neutral outside.txt / OUTSIDE-FILE-MARKER and drop /etc/passwd payloads; the test semantics (a file outside source_dir must never be read into a generated command) are unchanged. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/specify_cli/agents.py | 2 +- tests/test_extensions.py | 2 +- tests/test_registrar_path_traversal.py | 44 +++++++++++++------------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index 67972080fe..b6484ab278 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -550,7 +550,7 @@ def register_commands( # the resolved file stays within the source directory. Mirrors the # containment checks already applied on the skill, preset, and # restore paths (see extensions.py and presets/__init__.py) so a - # malicious manifest ``file`` field (e.g. ``../../../etc/passwd``) + # malicious manifest ``file`` field (e.g. ``../../../outside.txt``) # cannot read arbitrary host files into a generated command. cmd_path = Path(cmd_file) if cmd_path.is_absolute(): diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 5c885c3a3f..101dfc6f61 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -379,7 +379,7 @@ def test_invalid_command_name(self, temp_dir, valid_manifest_data): @pytest.mark.parametrize( "bad_file", - ["../../../etc/passwd", "../escape.md", "a/../../escape.md", "/etc/passwd"], + ["../../../outside.md", "../escape.md", "a/../../escape.md", "/abs/outside.md"], ) def test_command_file_traversal_rejected(self, temp_dir, valid_manifest_data, bad_file): """Manifest 'file' field with traversal/absolute path raises ValidationError. diff --git a/tests/test_registrar_path_traversal.py b/tests/test_registrar_path_traversal.py index 9a9bae3aea..777e8ea899 100644 --- a/tests/test_registrar_path_traversal.py +++ b/tests/test_registrar_path_traversal.py @@ -135,34 +135,34 @@ def test_rejects_traversal_names(self, tmp_path, bad_name): _assert_no_stray_files(tmp_path, Path(bad_name).name.replace("/", "")) -ABS_SECRET = "__ABS_SECRET__" +ABS_OUTSIDE = "__ABS_OUTSIDE__" FILE_FIELD_PAYLOADS = [ - "../secret.txt", - "../../secret.txt", - "commands/../../secret.txt", - ABS_SECRET, + "../outside.txt", + "../../outside.txt", + "commands/../../outside.txt", + ABS_OUTSIDE, ] -def _resolve_payload(bad_file: str, secret: Path) -> str: - """Map the absolute-path sentinel to the real, existing secret file. +def _resolve_payload(bad_file: str, outside_file: Path) -> str: + """Map the absolute-path sentinel to the real, existing outside file. - Using the temp secret's own absolute path (instead of ``/etc/passwd``) + Using the temp file's own absolute path (instead of ``/etc/passwd``) guarantees the file exists on every platform — so the test fails if the absolute-path guard regresses, rather than passing because the target happens not to exist (e.g. on Windows runners). """ - return str(secret) if bad_file == ABS_SECRET else bad_file + return str(outside_file) if bad_file == ABS_OUTSIDE else bad_file class TestCommandFileTraversal: - """The manifest ``file`` field must not read host files outside source_dir. + """The manifest ``file`` field must not read files outside source_dir. Regression for GHSA-w5fv-7w9x-7fc5: ``register_commands`` read ``source_dir / cmd_file`` with no containment check, so a manifest with - ``file: ../../../etc/passwd`` (or an absolute path) leaked arbitrary host - files verbatim into the generated agent command. + a traversal (``file: ../../../outside.txt``) or an absolute path read an + arbitrary host file verbatim into the generated agent command. """ @pytest.mark.parametrize("bad_file", FILE_FIELD_PAYLOADS) @@ -170,13 +170,13 @@ def test_claude_skips_traversal_in_file_field(self, tmp_path, bad_file): project, ext_dir = _project_and_source(tmp_path) (project / ".claude" / "skills").mkdir(parents=True) - secret = tmp_path / "secret.txt" - secret.write_text("TOP-SECRET-CREDENTIAL", encoding="utf-8") + outside_file = tmp_path / "outside.txt" + outside_file.write_text("OUTSIDE-FILE-MARKER", encoding="utf-8") registrar = CommandRegistrar() registered = registrar.register_commands( "claude", - [{"name": "speckit.myext.hello", "file": _resolve_payload(bad_file, secret), "aliases": []}], + [{"name": "speckit.myext.hello", "file": _resolve_payload(bad_file, outside_file), "aliases": []}], "myext", ext_dir, project, @@ -185,22 +185,22 @@ def test_claude_skips_traversal_in_file_field(self, tmp_path, bad_file): assert registered == [] leaked = [ p for p in (project).rglob("*") - if p.is_file() and "TOP-SECRET-CREDENTIAL" in p.read_text(encoding="utf-8", errors="ignore") + if p.is_file() and "OUTSIDE-FILE-MARKER" in p.read_text(encoding="utf-8", errors="ignore") ] - assert leaked == [], f"Secret leaked into generated command: {leaked}" + assert leaked == [], f"Outside file leaked into generated command: {leaked}" @pytest.mark.parametrize("bad_file", FILE_FIELD_PAYLOADS) def test_gemini_skips_traversal_in_file_field(self, tmp_path, bad_file): project, ext_dir = _project_and_source(tmp_path) (project / ".gemini" / "commands").mkdir(parents=True) - secret = tmp_path / "secret.txt" - secret.write_text("TOP-SECRET-CREDENTIAL", encoding="utf-8") + outside_file = tmp_path / "outside.txt" + outside_file.write_text("OUTSIDE-FILE-MARKER", encoding="utf-8") registrar = CommandRegistrar() registered = registrar.register_commands( "gemini", - [{"name": "speckit.myext.hello", "file": _resolve_payload(bad_file, secret), "aliases": []}], + [{"name": "speckit.myext.hello", "file": _resolve_payload(bad_file, outside_file), "aliases": []}], "myext", ext_dir, project, @@ -209,9 +209,9 @@ def test_gemini_skips_traversal_in_file_field(self, tmp_path, bad_file): assert registered == [] leaked = [ p for p in (project).rglob("*") - if p.is_file() and "TOP-SECRET-CREDENTIAL" in p.read_text(encoding="utf-8", errors="ignore") + if p.is_file() and "OUTSIDE-FILE-MARKER" in p.read_text(encoding="utf-8", errors="ignore") ] - assert leaked == [], f"Secret leaked into generated command: {leaked}" + assert leaked == [], f"Outside file leaked into generated command: {leaked}" class TestSafeRegistration: From b84ea663336593e62883c68dc9607fd72a642051 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:35:16 -0500 Subject: [PATCH 09/14] fix: reject Windows drive-relative 'file' values in traversal guards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit is_absolute() is False for Windows drive-relative paths like C:outside.txt, which contain no '..' yet resolve against the process CWD on that drive — bypassing the containment guard on Windows. Evaluate the 'file' value under PureWindowsPath as well so both the registrar runtime guard and the manifest-load validator reject drive letters (and backslash '..' segments) cross-platform. Extend the regression tests with drive-relative cases. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/specify_cli/agents.py | 20 ++++++++++++-------- src/specify_cli/extensions.py | 21 +++++++++++++++++---- tests/test_extensions.py | 2 +- tests/test_registrar_path_traversal.py | 1 + 4 files changed, 31 insertions(+), 13 deletions(-) diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index b6484ab278..4366cce2a7 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -10,7 +10,7 @@ import platform import re from copy import deepcopy -from pathlib import Path +from pathlib import Path, PureWindowsPath from typing import Any, Dict, List, Optional import yaml @@ -546,14 +546,18 @@ def register_commands( aliases = cmd_info.get("aliases", []) cmd_file = cmd_info["file"] - # Guard against path traversal: reject absolute paths and ensure - # the resolved file stays within the source directory. Mirrors the - # containment checks already applied on the skill, preset, and - # restore paths (see extensions.py and presets/__init__.py) so a - # malicious manifest ``file`` field (e.g. ``../../../outside.txt``) - # cannot read arbitrary host files into a generated command. + # Guard against path traversal: reject absolute paths (including + # Windows drive-relative paths like ``C:foo``, which are not + # ``is_absolute()`` but resolve against the CWD on their drive) and + # ensure the resolved file stays within the source directory. + # Mirrors the containment checks already applied on the skill, + # preset, and restore paths (see extensions.py and + # presets/__init__.py) so a malicious manifest ``file`` field + # (e.g. ``../../../outside.txt``) cannot read arbitrary host files + # into a generated command. cmd_path = Path(cmd_file) - if cmd_path.is_absolute(): + win_path = PureWindowsPath(cmd_file) + if cmd_path.is_absolute() or win_path.is_absolute() or win_path.drive: continue try: source_root = source_dir.resolve() diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 8cf0bca37b..1cb741e755 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -18,7 +18,7 @@ import zipfile from dataclasses import dataclass from datetime import datetime, timezone -from pathlib import Path +from pathlib import Path, PureWindowsPath from typing import Any, Callable, Dict, List, Optional, Set import pathspec @@ -299,11 +299,24 @@ def _validate(self): raise ValidationError( f"Command 'file' for '{cmd.get('name')}' must be a non-empty string" ) - cmd_file_path = Path(cmd_file) - if cmd_file_path.is_absolute() or ".." in cmd_file_path.parts: + # Evaluate the value under both POSIX and Windows path semantics so + # the check is platform-independent: reject absolute paths, Windows + # drive-relative paths (e.g. ``C:foo``, which are not is_absolute() + # but resolve against the CWD on their drive), and ``..`` segments + # written with either separator. + posix_path = Path(cmd_file) + win_path = PureWindowsPath(cmd_file) + if ( + posix_path.is_absolute() + or win_path.is_absolute() + or win_path.drive + or ".." in posix_path.parts + or ".." in win_path.parts + ): raise ValidationError( f"Invalid command 'file' '{cmd_file}': must be a relative path " - "within the extension directory (no absolute paths or '..' segments)" + "within the extension directory (no absolute paths, drive " + "letters, or '..' segments)" ) # Validate command name format diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 101dfc6f61..43a7050adf 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -379,7 +379,7 @@ def test_invalid_command_name(self, temp_dir, valid_manifest_data): @pytest.mark.parametrize( "bad_file", - ["../../../outside.md", "../escape.md", "a/../../escape.md", "/abs/outside.md"], + ["../../../outside.md", "../escape.md", "a/../../escape.md", "/abs/outside.md", "C:escape.md", "C:\\Windows\\x.md", "..\\..\\escape.md"], ) def test_command_file_traversal_rejected(self, temp_dir, valid_manifest_data, bad_file): """Manifest 'file' field with traversal/absolute path raises ValidationError. diff --git a/tests/test_registrar_path_traversal.py b/tests/test_registrar_path_traversal.py index 777e8ea899..7d1fd54e1d 100644 --- a/tests/test_registrar_path_traversal.py +++ b/tests/test_registrar_path_traversal.py @@ -141,6 +141,7 @@ def test_rejects_traversal_names(self, tmp_path, bad_name): "../outside.txt", "../../outside.txt", "commands/../../outside.txt", + "C:outside.txt", ABS_OUTSIDE, ] From 92daad6f63c16d579974df03e7104e3572762b3a Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:44:09 -0500 Subject: [PATCH 10/14] fix: use anchor under both path flavors so POSIX-absolute is rejected on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a Windows runner WindowsPath('/abs/outside.md').is_absolute() is False (no drive), so the prior native-Path check let a leading-slash 'file' value through and the manifest validator did not raise. Evaluate the value under both PurePosixPath and PureWindowsPath and reject any non-empty anchor — covering POSIX-absolute, Windows drive-relative, Windows absolute, and rooted-without-drive — in both the registrar guard and the manifest validator. The registrar join now uses the raw 'file' string so native separators are handled by the resolve()/relative_to() containment check. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/specify_cli/agents.py | 27 +++++++++++++-------------- src/specify_cli/extensions.py | 19 ++++++++++--------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index 4366cce2a7..c211a69d6c 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -10,7 +10,7 @@ import platform import re from copy import deepcopy -from pathlib import Path, PureWindowsPath +from pathlib import Path, PurePosixPath, PureWindowsPath from typing import Any, Dict, List, Optional import yaml @@ -546,22 +546,21 @@ def register_commands( aliases = cmd_info.get("aliases", []) cmd_file = cmd_info["file"] - # Guard against path traversal: reject absolute paths (including - # Windows drive-relative paths like ``C:foo``, which are not - # ``is_absolute()`` but resolve against the CWD on their drive) and - # ensure the resolved file stays within the source directory. - # Mirrors the containment checks already applied on the skill, - # preset, and restore paths (see extensions.py and - # presets/__init__.py) so a malicious manifest ``file`` field - # (e.g. ``../../../outside.txt``) cannot read arbitrary host files - # into a generated command. - cmd_path = Path(cmd_file) - win_path = PureWindowsPath(cmd_file) - if cmd_path.is_absolute() or win_path.is_absolute() or win_path.drive: + # Guard against path traversal: reject any anchored value under + # either POSIX or Windows semantics — POSIX-absolute (``/abs``), + # Windows drive-relative (``C:foo``, not ``is_absolute()`` but + # resolved against the CWD on its drive), Windows absolute + # (``C:\foo``), and rooted-without-drive (``\foo``) — then ensure + # the resolved file stays within the source directory. Mirrors the + # containment checks already applied on the skill, preset, and + # restore paths (see extensions.py and presets/__init__.py) so a + # malicious manifest ``file`` field (e.g. ``../../../outside.txt``) + # cannot read arbitrary host files into a generated command. + if PurePosixPath(cmd_file).anchor or PureWindowsPath(cmd_file).anchor: continue try: source_root = source_dir.resolve() - source_file = (source_root / cmd_path).resolve() + source_file = (source_root / cmd_file).resolve() source_file.relative_to(source_root) # raises ValueError if outside except (OSError, ValueError): continue diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 1cb741e755..34a4774a85 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -18,7 +18,7 @@ import zipfile from dataclasses import dataclass from datetime import datetime, timezone -from pathlib import Path, PureWindowsPath +from pathlib import Path, PurePosixPath, PureWindowsPath from typing import Any, Callable, Dict, List, Optional, Set import pathspec @@ -300,16 +300,17 @@ def _validate(self): f"Command 'file' for '{cmd.get('name')}' must be a non-empty string" ) # Evaluate the value under both POSIX and Windows path semantics so - # the check is platform-independent: reject absolute paths, Windows - # drive-relative paths (e.g. ``C:foo``, which are not is_absolute() - # but resolve against the CWD on their drive), and ``..`` segments - # written with either separator. - posix_path = Path(cmd_file) + # the check is platform-independent. Reject any non-empty anchor — + # which covers POSIX-absolute (``/abs``), Windows drive-relative + # (``C:foo``), Windows absolute (``C:\foo``), and rooted-without- + # drive (``\foo``) — plus ``..`` segments written with either + # separator. Native ``Path`` alone is insufficient: on Windows + # ``WindowsPath('/abs').is_absolute()`` is False (no drive). + posix_path = PurePosixPath(cmd_file) win_path = PureWindowsPath(cmd_file) if ( - posix_path.is_absolute() - or win_path.is_absolute() - or win_path.drive + posix_path.anchor + or win_path.anchor or ".." in posix_path.parts or ".." in win_path.parts ): From c4855b4215eb2f77de6730211116b739997c1578 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:56:51 -0500 Subject: [PATCH 11/14] refactor: harden register_commands inputs and tighten manifest 'file' validation Address review feedback on #3088: - register_commands(): skip non-string/empty 'file' values instead of raising TypeError, and hoist source_dir.resolve() out of the per-command loop. - ExtensionManifest._validate(): reject 'file' values with leading/trailing whitespace with a clear ValidationError instead of a confusing missing-file failure later. - tests: add non-string 'file' and whitespace cases; use yaml.safe_dump with explicit utf-8 encoding in the manifest validation test. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/specify_cli/agents.py | 7 ++++++- src/specify_cli/extensions.py | 5 +++++ tests/test_extensions.py | 18 ++++++++++++++++-- tests/test_registrar_path_traversal.py | 17 +++++++++++++++++ 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index c211a69d6c..1cb09b56d3 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -540,12 +540,18 @@ def register_commands( registered = [] is_cline_ext = agent_name == "cline" and source_id != "core" + source_root = source_dir.resolve() for cmd_info in commands: cmd_name = cmd_info["name"] aliases = cmd_info.get("aliases", []) cmd_file = cmd_info["file"] + # Skip malformed entries: a non-string/empty ``file`` cannot be a + # valid command body and would otherwise raise when used as a path. + if not isinstance(cmd_file, str) or not cmd_file: + continue + # Guard against path traversal: reject any anchored value under # either POSIX or Windows semantics — POSIX-absolute (``/abs``), # Windows drive-relative (``C:foo``, not ``is_absolute()`` but @@ -559,7 +565,6 @@ def register_commands( if PurePosixPath(cmd_file).anchor or PureWindowsPath(cmd_file).anchor: continue try: - source_root = source_dir.resolve() source_file = (source_root / cmd_file).resolve() source_file.relative_to(source_root) # raises ValueError if outside except (OSError, ValueError): diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 34a4774a85..5e82aba897 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -299,6 +299,11 @@ def _validate(self): raise ValidationError( f"Command 'file' for '{cmd.get('name')}' must be a non-empty string" ) + if cmd_file.strip() != cmd_file: + raise ValidationError( + f"Invalid command 'file' '{cmd_file}': must not have leading or " + "trailing whitespace" + ) # Evaluate the value under both POSIX and Windows path semantics so # the check is platform-independent. Reject any non-empty anchor — # which covers POSIX-absolute (``/abs``), Windows drive-relative diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 43a7050adf..9cf0167ce5 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -391,12 +391,26 @@ def test_command_file_traversal_rejected(self, temp_dir, valid_manifest_data, ba valid_manifest_data["provides"]["commands"][0]["file"] = bad_file manifest_path = temp_dir / "extension.yml" - with open(manifest_path, 'w') as f: - yaml.dump(valid_manifest_data, f) + with open(manifest_path, 'w', encoding='utf-8') as f: + yaml.safe_dump(valid_manifest_data, f) with pytest.raises(ValidationError, match="Invalid command 'file'"): ExtensionManifest(manifest_path) + @pytest.mark.parametrize("bad_file", [" commands/hello.md", "commands/hello.md ", "\tcommands/hello.md"]) + def test_command_file_whitespace_rejected(self, temp_dir, valid_manifest_data, bad_file): + """Manifest 'file' with leading/trailing whitespace raises ValidationError.""" + import yaml + + valid_manifest_data["provides"]["commands"][0]["file"] = bad_file + + manifest_path = temp_dir / "extension.yml" + with open(manifest_path, 'w', encoding='utf-8') as f: + yaml.safe_dump(valid_manifest_data, f) + + with pytest.raises(ValidationError, match="leading or trailing whitespace"): + ExtensionManifest(manifest_path) + def test_command_name_autocorrect_speckit_prefix(self, temp_dir, valid_manifest_data): """Test that 'speckit.command' is auto-corrected to 'speckit.{ext_id}.command'.""" import yaml diff --git a/tests/test_registrar_path_traversal.py b/tests/test_registrar_path_traversal.py index 7d1fd54e1d..b2ba44363f 100644 --- a/tests/test_registrar_path_traversal.py +++ b/tests/test_registrar_path_traversal.py @@ -214,6 +214,23 @@ def test_gemini_skips_traversal_in_file_field(self, tmp_path, bad_file): ] assert leaked == [], f"Outside file leaked into generated command: {leaked}" + @pytest.mark.parametrize("bad_value", [None, 123, "", ["x"]]) + def test_non_string_file_is_skipped(self, tmp_path, bad_value): + """A non-string/empty ``file`` must be skipped, not raise TypeError.""" + project, ext_dir = _project_and_source(tmp_path) + (project / ".gemini" / "commands").mkdir(parents=True) + + registrar = CommandRegistrar() + registered = registrar.register_commands( + "gemini", + [{"name": "speckit.myext.hello", "file": bad_value, "aliases": []}], + "myext", + ext_dir, + project, + ) + + assert registered == [] + class TestSafeRegistration: """Positive regression — well-formed names continue to register.""" From 7b9a11308122c4b66877541eff4e2e061f4d3e22 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Mon, 22 Jun 2026 09:10:00 -0500 Subject: [PATCH 12/14] refactor: align runtime '..' policy, correct comment, dedupe test helper Address review feedback on #3088: - register_commands(): also reject '..' segments under both POSIX and Windows semantics, keeping runtime policy consistent with ExtensionManifest._validate() and the skill/preset readers (not just relying on the resolve()/relative_to() containment backstop). - Replace the version-dependent is_absolute() claim in the extensions.py comment with the actual portability rationale (native Path is OS- dependent; C:foo is anchored but not absolute). - Extract the duplicated leak-detection assertion into _assert_no_marker_leak() and add an in-bounds '..' payload that exercises the new runtime '..' rejection. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/specify_cli/agents.py | 30 +++++++++++++++++--------- src/specify_cli/extensions.py | 13 +++++------ tests/test_registrar_path_traversal.py | 22 ++++++++++--------- 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index 1cb09b56d3..885293952c 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -552,17 +552,27 @@ def register_commands( if not isinstance(cmd_file, str) or not cmd_file: continue - # Guard against path traversal: reject any anchored value under - # either POSIX or Windows semantics — POSIX-absolute (``/abs``), - # Windows drive-relative (``C:foo``, not ``is_absolute()`` but + # Guard against path traversal, keeping runtime policy aligned with + # ExtensionManifest._validate() and the skill/preset/restore readers. + # Evaluate the value under both POSIX and Windows path flavors + # because a native ``Path`` is OS-dependent (a ``PurePosixPath`` on + # POSIX does not interpret Windows drive/UNC forms). Reject any + # non-empty anchor — which covers POSIX-absolute (``/abs``), Windows + # drive-relative (``C:foo``, anchored but not ``is_absolute()`` yet # resolved against the CWD on its drive), Windows absolute - # (``C:\foo``), and rooted-without-drive (``\foo``) — then ensure - # the resolved file stays within the source directory. Mirrors the - # containment checks already applied on the skill, preset, and - # restore paths (see extensions.py and presets/__init__.py) so a - # malicious manifest ``file`` field (e.g. ``../../../outside.txt``) - # cannot read arbitrary host files into a generated command. - if PurePosixPath(cmd_file).anchor or PureWindowsPath(cmd_file).anchor: + # (``C:\foo``), and UNC roots — and any ``..`` segment in either + # separator style, so a malicious manifest ``file`` field + # (e.g. ``../../../outside.txt``) cannot read arbitrary host files + # into a generated command. The resolve()/relative_to() check below + # is the final containment backstop. + posix_path = PurePosixPath(cmd_file) + win_path = PureWindowsPath(cmd_file) + if ( + posix_path.anchor + or win_path.anchor + or ".." in posix_path.parts + or ".." in win_path.parts + ): continue try: source_file = (source_root / cmd_file).resolve() diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 5e82aba897..f502133eb6 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -305,12 +305,13 @@ def _validate(self): "trailing whitespace" ) # Evaluate the value under both POSIX and Windows path semantics so - # the check is platform-independent. Reject any non-empty anchor — - # which covers POSIX-absolute (``/abs``), Windows drive-relative - # (``C:foo``), Windows absolute (``C:\foo``), and rooted-without- - # drive (``\foo``) — plus ``..`` segments written with either - # separator. Native ``Path`` alone is insufficient: on Windows - # ``WindowsPath('/abs').is_absolute()`` is False (no drive). + # the check is platform-independent. A native ``Path`` is OS- + # dependent — a ``PurePosixPath`` on POSIX won't interpret Windows + # drive/UNC forms, and ``C:foo`` is anchored but not + # ``is_absolute()``. Reject any non-empty anchor — which covers + # POSIX-absolute (``/abs``), Windows drive-relative (``C:foo``), + # Windows absolute (``C:\foo``), and UNC/rooted forms — plus ``..`` + # segments written with either separator. posix_path = PurePosixPath(cmd_file) win_path = PureWindowsPath(cmd_file) if ( diff --git a/tests/test_registrar_path_traversal.py b/tests/test_registrar_path_traversal.py index b2ba44363f..346e96621c 100644 --- a/tests/test_registrar_path_traversal.py +++ b/tests/test_registrar_path_traversal.py @@ -141,6 +141,7 @@ def test_rejects_traversal_names(self, tmp_path, bad_name): "../outside.txt", "../../outside.txt", "commands/../../outside.txt", + "commands/../cmd.md", # in-bounds after resolve; only the '..' check rejects it "C:outside.txt", ABS_OUTSIDE, ] @@ -157,6 +158,15 @@ def _resolve_payload(bad_file: str, outside_file: Path) -> str: return str(outside_file) if bad_file == ABS_OUTSIDE else bad_file +def _assert_no_marker_leak(project: Path, marker: str) -> None: + """Fail if ``marker`` content was written into any file under ``project``.""" + leaked = [ + p for p in project.rglob("*") + if p.is_file() and marker in p.read_text(encoding="utf-8", errors="ignore") + ] + assert leaked == [], f"Outside file leaked into generated command: {leaked}" + + class TestCommandFileTraversal: """The manifest ``file`` field must not read files outside source_dir. @@ -184,11 +194,7 @@ def test_claude_skips_traversal_in_file_field(self, tmp_path, bad_file): ) assert registered == [] - leaked = [ - p for p in (project).rglob("*") - if p.is_file() and "OUTSIDE-FILE-MARKER" in p.read_text(encoding="utf-8", errors="ignore") - ] - assert leaked == [], f"Outside file leaked into generated command: {leaked}" + _assert_no_marker_leak(project, "OUTSIDE-FILE-MARKER") @pytest.mark.parametrize("bad_file", FILE_FIELD_PAYLOADS) def test_gemini_skips_traversal_in_file_field(self, tmp_path, bad_file): @@ -208,11 +214,7 @@ def test_gemini_skips_traversal_in_file_field(self, tmp_path, bad_file): ) assert registered == [] - leaked = [ - p for p in (project).rglob("*") - if p.is_file() and "OUTSIDE-FILE-MARKER" in p.read_text(encoding="utf-8", errors="ignore") - ] - assert leaked == [], f"Outside file leaked into generated command: {leaked}" + _assert_no_marker_leak(project, "OUTSIDE-FILE-MARKER") @pytest.mark.parametrize("bad_value", [None, 123, "", ["x"]]) def test_non_string_file_is_skipped(self, tmp_path, bad_value): From 85246ccf44faccec48f3019fdab7ceac8a18c076 Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Mon, 22 Jun 2026 09:29:40 -0500 Subject: [PATCH 13/14] Extract shared path-safety policy and warn on unreadable command files Introduce relative_extension_path_violation() in _utils.py as the single source of truth for the extension-relative `file` path-safety policy, and use it from both the runtime registrar guard (agents.py) and the manifest-load validator (extensions.py) so the two cannot drift. Warn (instead of silently skipping) when an in-bounds command file exists but cannot be read/decoded, surfacing misconfigured extensions. Add unit tests for the shared helper, a read-skip warning test, and make the in-bounds `..` test create its target file so the skip is attributable to the `..` rejection rather than file absence. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/specify_cli/_utils.py | 40 +++++++++++- src/specify_cli/agents.py | 46 ++++++-------- src/specify_cli/extensions.py | 48 ++++----------- tests/test_registrar_path_traversal.py | 85 +++++++++++++++++++++++++- 4 files changed, 151 insertions(+), 68 deletions(-) diff --git a/src/specify_cli/_utils.py b/src/specify_cli/_utils.py index 30c59f553a..d921e591d9 100644 --- a/src/specify_cli/_utils.py +++ b/src/specify_cli/_utils.py @@ -9,7 +9,7 @@ import subprocess import tempfile import yaml -from pathlib import Path +from pathlib import Path, PurePosixPath, PureWindowsPath from typing import Any from ._console import console @@ -17,6 +17,44 @@ CLAUDE_NPM_LOCAL_PATH = Path.home() / ".claude" / "local" / "node_modules" / ".bin" / "claude" +def relative_extension_path_violation(value: Any) -> str | None: + """Return why ``value`` is unsafe as an extension-relative ``file`` path. + + Single source of truth for the path-safety policy shared by + ``ExtensionManifest._validate()`` (manifest-load validation) and + ``CommandRegistrar.register_commands()`` (runtime guard), so the two cannot + drift. Returns a human-readable reason string when ``value`` is unsafe, or + ``None`` when it is an acceptable relative path within the extension + directory. + + Policy: the value must be a non-empty string with no leading/trailing + whitespace, no absolute/anchored form, and no ``..`` traversal. The value is + evaluated under both POSIX and Windows path semantics because a native + ``Path`` is OS-dependent (a ``PurePosixPath`` on POSIX does not interpret + Windows drive/UNC forms, and ``C:foo`` is anchored but not ``is_absolute()`` + yet resolves against the CWD on its drive). Rejecting any non-empty anchor + covers POSIX-absolute (``/abs``), Windows drive-relative (``C:foo``), Windows + absolute (``C:\\foo``), and UNC/rooted forms. + """ + if not isinstance(value, str) or not value: + return "must be a non-empty string" + if value.strip() != value: + return "must not have leading or trailing whitespace" + posix_path = PurePosixPath(value) + win_path = PureWindowsPath(value) + if ( + posix_path.anchor + or win_path.anchor + or ".." in posix_path.parts + or ".." in win_path.parts + ): + return ( + "must be a relative path within the extension directory " + "(no absolute paths, drive letters, or '..' segments)" + ) + return None + + def dump_frontmatter(data: dict[str, Any]) -> str: """Serialize skill/command frontmatter to a YAML string. diff --git a/src/specify_cli/agents.py b/src/specify_cli/agents.py index 885293952c..881517b427 100644 --- a/src/specify_cli/agents.py +++ b/src/specify_cli/agents.py @@ -10,12 +10,13 @@ import platform import re from copy import deepcopy -from pathlib import Path, PurePosixPath, PureWindowsPath +from pathlib import Path from typing import Any, Dict, List, Optional import yaml from ._init_options import is_ai_skills_enabled, load_init_options +from ._utils import relative_extension_path_violation def _build_agent_configs() -> dict[str, Any]: @@ -547,32 +548,14 @@ def register_commands( aliases = cmd_info.get("aliases", []) cmd_file = cmd_info["file"] - # Skip malformed entries: a non-string/empty ``file`` cannot be a - # valid command body and would otherwise raise when used as a path. - if not isinstance(cmd_file, str) or not cmd_file: - continue - - # Guard against path traversal, keeping runtime policy aligned with - # ExtensionManifest._validate() and the skill/preset/restore readers. - # Evaluate the value under both POSIX and Windows path flavors - # because a native ``Path`` is OS-dependent (a ``PurePosixPath`` on - # POSIX does not interpret Windows drive/UNC forms). Reject any - # non-empty anchor — which covers POSIX-absolute (``/abs``), Windows - # drive-relative (``C:foo``, anchored but not ``is_absolute()`` yet - # resolved against the CWD on its drive), Windows absolute - # (``C:\foo``), and UNC roots — and any ``..`` segment in either - # separator style, so a malicious manifest ``file`` field - # (e.g. ``../../../outside.txt``) cannot read arbitrary host files - # into a generated command. The resolve()/relative_to() check below - # is the final containment backstop. - posix_path = PurePosixPath(cmd_file) - win_path = PureWindowsPath(cmd_file) - if ( - posix_path.anchor - or win_path.anchor - or ".." in posix_path.parts - or ".." in win_path.parts - ): + # Guard against path traversal using the single shared policy in + # relative_extension_path_violation(), so the runtime guard stays + # aligned with ExtensionManifest._validate() and the skill/preset + # readers. Skip a malformed/unsafe ``file`` (non-string, empty, + # whitespace, absolute/anchored, or ``..`` traversal); the + # resolve()/relative_to() check below is the final containment + # backstop. + if relative_extension_path_violation(cmd_file): continue try: source_file = (source_root / cmd_file).resolve() @@ -585,7 +568,14 @@ def register_commands( try: content = source_file.read_text(encoding="utf-8") - except (OSError, UnicodeDecodeError): + except (OSError, UnicodeDecodeError) as exc: + import warnings + + warnings.warn( + f"Skipping command '{cmd_name}': could not read source file " + f"'{cmd_file}' ({exc.__class__.__name__}: {exc}).", + stacklevel=2, + ) continue frontmatter, body = self.parse_frontmatter(content) diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index f502133eb6..03bfa6c4b7 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -18,7 +18,7 @@ import zipfile from dataclasses import dataclass from datetime import datetime, timezone -from pathlib import Path, PurePosixPath, PureWindowsPath +from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Set import pathspec @@ -28,7 +28,7 @@ from ._init_options import is_ai_skills_enabled from ._invocation_style import is_slash_skills_agent -from ._utils import dump_frontmatter +from ._utils import dump_frontmatter, relative_extension_path_violation from .catalogs import CatalogEntry as BaseCatalogEntry from .catalogs import CatalogStackBase @@ -290,41 +290,17 @@ def _validate(self): if "name" not in cmd or "file" not in cmd: raise ValidationError("Command missing 'name' or 'file'") - # Validate the 'file' field for path traversal at manifest-load - # time. This is defense-in-depth: the command/skill/preset readers - # also contain the resolved path, but rejecting a traversal here - # surfaces a clear error instead of silently skipping the command. + # Validate the 'file' field at manifest-load time using the single + # shared policy in relative_extension_path_violation(), so manifest + # validation cannot drift from the runtime registrar guard. This is + # defense-in-depth: the command/skill/preset readers also contain + # the resolved path, but rejecting an unsafe value here surfaces a + # clear error instead of silently skipping the command. cmd_file = cmd["file"] - if not isinstance(cmd_file, str) or not cmd_file: - raise ValidationError( - f"Command 'file' for '{cmd.get('name')}' must be a non-empty string" - ) - if cmd_file.strip() != cmd_file: - raise ValidationError( - f"Invalid command 'file' '{cmd_file}': must not have leading or " - "trailing whitespace" - ) - # Evaluate the value under both POSIX and Windows path semantics so - # the check is platform-independent. A native ``Path`` is OS- - # dependent — a ``PurePosixPath`` on POSIX won't interpret Windows - # drive/UNC forms, and ``C:foo`` is anchored but not - # ``is_absolute()``. Reject any non-empty anchor — which covers - # POSIX-absolute (``/abs``), Windows drive-relative (``C:foo``), - # Windows absolute (``C:\foo``), and UNC/rooted forms — plus ``..`` - # segments written with either separator. - posix_path = PurePosixPath(cmd_file) - win_path = PureWindowsPath(cmd_file) - if ( - posix_path.anchor - or win_path.anchor - or ".." in posix_path.parts - or ".." in win_path.parts - ): - raise ValidationError( - f"Invalid command 'file' '{cmd_file}': must be a relative path " - "within the extension directory (no absolute paths, drive " - "letters, or '..' segments)" - ) + reason = relative_extension_path_violation(cmd_file) + if reason: + label = repr(cmd_file) if isinstance(cmd_file, str) else f"for command '{cmd.get('name')}'" + raise ValidationError(f"Invalid command 'file' {label}: {reason}") # Validate command name format if not EXTENSION_COMMAND_NAME_PATTERN.match(cmd["name"]): diff --git a/tests/test_registrar_path_traversal.py b/tests/test_registrar_path_traversal.py index 346e96621c..1f97a98d87 100644 --- a/tests/test_registrar_path_traversal.py +++ b/tests/test_registrar_path_traversal.py @@ -6,6 +6,7 @@ import pytest from specify_cli.agents import CommandRegistrar +from specify_cli._utils import relative_extension_path_violation TRAVERSAL_PAYLOADS = [ @@ -141,7 +142,6 @@ def test_rejects_traversal_names(self, tmp_path, bad_name): "../outside.txt", "../../outside.txt", "commands/../../outside.txt", - "commands/../cmd.md", # in-bounds after resolve; only the '..' check rejects it "C:outside.txt", ABS_OUTSIDE, ] @@ -233,9 +233,88 @@ def test_non_string_file_is_skipped(self, tmp_path, bad_value): assert registered == [] + def test_dotdot_rejected_even_when_target_is_in_bounds(self, tmp_path): + """An in-bounds ``..`` payload is rejected by the ``..`` check itself. -class TestSafeRegistration: - """Positive regression — well-formed names continue to register.""" + ``commands/../cmd.md`` resolves to ``ext_dir/cmd.md`` — inside + source_dir — so the resolve()/relative_to() containment backstop would + allow it. Creating that target file ensures the command is skipped + because of the ``..`` rejection, not merely because the file is absent. + """ + project, ext_dir = _project_and_source(tmp_path) + (project / ".gemini" / "commands").mkdir(parents=True) + (ext_dir / "cmd.md").write_text( + "---\ndescription: test\n---\n\nbody\n", encoding="utf-8" + ) + + registrar = CommandRegistrar() + registered = registrar.register_commands( + "gemini", + [{"name": "speckit.myext.hello", "file": "commands/../cmd.md", "aliases": []}], + "myext", + ext_dir, + project, + ) + + assert registered == [] + + +class TestRelativeExtensionPathPolicy: + """Unit tests for the shared ``relative_extension_path_violation`` policy.""" + + @pytest.mark.parametrize( + "value", + [ + "commands/hello.md", + "hello.md", + "a/b/c/hello.md", + ], + ) + def test_safe_relative_paths_have_no_violation(self, value): + assert relative_extension_path_violation(value) is None + + @pytest.mark.parametrize( + "value", + [ + None, + 123, + ["x"], + "", + " ", + " hello.md", + "hello.md ", + "/abs/outside.md", + "/etc/passwd", + "C:foo.md", + "C:\\Windows\\system32", + "\\\\server\\share\\x.md", + "../escape.md", + "commands/../../escape.md", + ], + ) + def test_unsafe_values_report_violation(self, value): + assert relative_extension_path_violation(value) is not None + + +class TestReadSkipWarning: + """Unregisterable but in-bounds files warn instead of failing silently.""" + + def test_unreadable_target_warns_and_skips(self, tmp_path): + project, ext_dir = _project_and_source(tmp_path) + (project / ".gemini" / "commands").mkdir(parents=True) + (ext_dir / "cmd.md").write_bytes(b"\xff\xfe\x00\x80bad") + + registrar = CommandRegistrar() + with pytest.warns(UserWarning): + registered = registrar.register_commands( + "gemini", + [{"name": "speckit.myext.hello", "file": "cmd.md", "aliases": []}], + "myext", + ext_dir, + project, + ) + + assert registered == [] def test_symlinked_subdir_under_commands_dir_is_preserved(self, tmp_path): """Lexical check must not block legitimately symlinked sub-directories. From 46aaeac16e3bb470ab9e3565cbf3377bb121f0eb Mon Sep 17 00:00:00 2001 From: Manfred Riem <15701806+mnriem@users.noreply.github.com> Date: Mon, 22 Jun 2026 10:08:46 -0500 Subject: [PATCH 14/14] Retrigger CI Empty commit to re-trigger code scanning / CodeQL analysis on the PR merge ref. Assisted-by: GitHub Copilot CLI (model: Claude Opus 4.8, autonomous) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>