Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4,916 changes: 2,504 additions & 2,412 deletions src/specify_cli/__init__.py

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions src/specify_cli/workflows/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,18 @@ class StepContext:
#: Current run ID.
run_id: str | None = None

#: When ``True``, every step implementation must short-circuit and
#: return a synthetic ``StepResult`` carrying a preview of what would
#: have been dispatched — no subprocess, no CLI call, no network I/O.
#: Step implementations publish the preview on ``output["message"]``
#: (the original, so ``{{ steps.<id>.output.message }}`` keeps
#: resolving) and ``output["dry_run_message"]`` (the rendered
#: ``[DRY RUN] ...`` body, consumed by the CLI's preview loop).
#: Persisted on the ``RunState`` so :meth:`WorkflowEngine.resume` can
#: restore it after a process restart — an interrupted dry-run must
#: not silently turn into a real run.
Comment on lines +77 to +86

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks — addressed in b2ea446: b2ea446

Replaced the every step will short-circuit / no subprocess, no CLI call, no network I/O claim in StepContext.dry_run (base.py) with an accurate description: the flag is honored by CommandStep, PromptStep, and GateStep; other step types (e.g. init, shell) may still perform their normal work until they adopt the flag; the engine itself does not enforce a dry-run contract.

dry_run: bool = False


@dataclass
class StepResult:
Expand Down
60 changes: 59 additions & 1 deletion src/specify_cli/workflows/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ def __init__(
run_id: str | None = None,
workflow_id: str = "",
project_root: Path | None = None,
dry_run: bool = False,
) -> None:
# ``run_id is None`` (omitted) → auto-generate. An explicit empty
# string is *not* the same as "omitted" and must be validated like
Expand All @@ -334,6 +335,13 @@ def __init__(
self.created_at = datetime.now(timezone.utc).isoformat()
self.updated_at = self.created_at
self.log_entries: list[dict[str, Any]] = []
# Persisted into ``state.json`` (and restored by :meth:`load`) so
# :meth:`WorkflowEngine.resume` can rebuild the ``StepContext``
# with the same ``dry_run`` flag. Without persistence, an
# interrupted dry-run would silently turn into a real run after
# a process restart, which is the exact bug the CLI's
# ``--dry-run`` promise is meant to prevent.
self.dry_run = dry_run

@property
def runs_dir(self) -> Path:
Expand All @@ -354,6 +362,12 @@ def save(self) -> None:
"step_results": self.step_results,
"created_at": self.created_at,
"updated_at": self.updated_at,
# Persisted with a default of ``False`` so state files written
# by older releases (before ``dry_run`` was added) still load
# — ``get(..., False)`` makes the older format behave
# identically to a fresh non-dry-run run. Older runs were
# never dry-runs, so the default is safe.
"dry_run": self.dry_run,
}
with open(runs_dir / "state.json", "w", encoding="utf-8") as f:
json.dump(state_data, f, indent=2)
Expand Down Expand Up @@ -398,6 +412,11 @@ def load(cls, run_id: str, project_root: Path) -> RunState:
state.step_results = state_data.get("step_results", {})
state.created_at = state_data.get("created_at", "")
state.updated_at = state_data.get("updated_at", "")
# ``dry_run`` is the load-bearing field for ``resume()``'s
# ``StepContext`` rebuild — without restoring it, a resumed
# dry-run would step right past every short-circuit and invoke
# the real CLI. See ``test_resume_restores_dry_run``.
state.dry_run = state_data.get("dry_run", False)

inputs_path = runs_dir / "inputs.json"
if inputs_path.exists():
Expand Down Expand Up @@ -478,6 +497,7 @@ def execute(
definition: WorkflowDefinition,
inputs: dict[str, Any] | None = None,
run_id: str | None = None,
dry_run: bool = False,
) -> RunState:
"""Execute a workflow definition.

Expand All @@ -489,6 +509,16 @@ def execute(
User-provided input values.
run_id:
Optional run ID (uses SPECKIT_WORKFLOW_RUN_ID when set, otherwise auto-generated).
dry_run:
Preview-only mode. When ``True``, step implementations skip
side-effecting work (AI invocations, file writes) and emit a
synthetic ``dry_run_message`` instead. ``dry_run`` propagates
into each step's ``StepContext`` and is persisted on the
resulting ``RunState`` so ``resume()`` keeps the run in
preview mode across restarts. Step ``output`` shape is
Comment on lines +512 to +518

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks — addressed in b2ea446: b2ea446

Same fix as above but for the WorkflowEngine.execute() docstring (engine.py). The skipping file writes claim is softened to a step-by-step contract: the engine propagates dry_run into each StepContext and step types that honor the flag (CommandStep, PromptStep, GateStep) skip side-effecting work; init/shell may still perform their normal work in dry-run mode.

unchanged; downstream ``switch``/``do-while`` gates coerce
any dry-run-only fields (e.g. ``output.choice``) so the
preview branch is deterministic.

Returns
-------
Expand All @@ -506,6 +536,7 @@ def execute(
run_id=effective_run_id,
workflow_id=definition.id,
project_root=self.project_root,
dry_run=dry_run,
)

# Persist a copy of the workflow definition so resume can
Expand All @@ -531,6 +562,15 @@ def execute(
default_options=definition.default_options,
project_root=str(self.project_root),
run_id=state.run_id,
# The ``dry_run`` flag travels into ``StepContext`` so each
# step implementation can short-circuit independently — see
# ``CommandStep.execute`` / ``PromptStep.execute`` /
# ``GateStep.execute``. Keeping it on the per-call context
# (rather than reading it off ``RunState`` inside each
# step) means library consumers that build a ``StepContext``
# directly (e.g. tests, custom runners) get the same
# contract without needing a ``RunState``.
dry_run=dry_run,
)

# Execute steps
Expand All @@ -545,6 +585,13 @@ def execute(
state.status = RunStatus.FAILED
state.append_log({"event": "workflow_failed", "error": str(exc)})
state.save()
# Attach the partially-populated state to the exception so
# callers (most notably the CLI's exception handler) can
# surface any dry-run previews already resolved by earlier
# steps — the most useful debug signal a dry-run failure
# can offer. The state has been saved to disk by this point
# so consumers can also reload it via ``RunState.load()``.
exc.partial_state = state # type: ignore[attr-defined]
raise
Comment thread
mnriem marked this conversation as resolved.

if state.status == RunStatus.RUNNING:
Expand Down Expand Up @@ -587,7 +634,11 @@ def resume(
merged = {**state.inputs, **inputs}
state.inputs = self._resolve_inputs(definition, merged)

# Restore context
# Restore context — ``dry_run`` is read from the persisted
# ``RunState`` (not from the caller's flag, since ``resume`` has
# no caller-supplied dry-run flag) so a paused dry-run stays a
# dry-run after a process restart. Mirrors the same plumbing in
# :meth:`execute`.
context = StepContext(
inputs=state.inputs,
steps=state.step_results,
Expand All @@ -596,6 +647,7 @@ def resume(
default_options=definition.default_options,
project_root=str(self.project_root),
run_id=state.run_id,
dry_run=state.dry_run,
)

from . import STEP_REGISTRY
Expand All @@ -622,6 +674,12 @@ def resume(
state.status = RunStatus.FAILED
state.append_log({"event": "resume_failed", "error": str(exc)})
state.save()
# Attach the partially-populated state to the exception so
# callers (e.g. the CLI's exception handler for ``workflow
# resume``) can surface any dry-run previews already
# resolved by earlier resumed steps. Mirrors the same
# contract used by :meth:`execute`.
exc.partial_state = state # type: ignore[attr-defined]
raise

if state.status == RunStatus.RUNNING:
Expand Down
69 changes: 68 additions & 1 deletion src/specify_cli/workflows/steps/command/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,69 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult:
if step_options:
options.update(step_options)

# Attempt CLI dispatch
args_str = str(resolved_input.get("args", ""))

# Dry-run: never invoke the integration CLI. Synthesize a
# ``COMPLETED`` result carrying the rendered dispatch preview so
# ``specify workflow run --dry-run`` can show what would have
# been sent. ``message`` stays user-facing (``DRY RUN: ...``) so
# downstream ``{{ steps.<id>.output.message }}`` resolves; the
# same body is duplicated on ``dry_run_message`` for the CLI's
# preview loop. Mirrors the same contract used by ``PromptStep``
# and ``GateStep`` so the CLI never has to special-case step
# types. See ``test_dry_run_returns_completed_without_dispatch``.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks — test_dry_run_returns_completed_without_dispatch already exists in tests/test_workflows.py (it was restored in commit bca1879 "test(workflows): restore 9 dry_run tests lost during rebase onto main"). Verified PASSED in the current run:

tests/test_workflows.py::TestWorkflowRunDryRun::test_dry_run_returns_completed_without_dispatch PASSED

The full -k dry_run filter is green (9 passed, 300 deselected in 0.39s). The docstring comment in command/__init__.py line ~66 refers to that test, which is present on this branch.

if context.dry_run:
# Try to build the integration-specific invocation string
# (e.g. ``/speckit.specify ...`` for markdown agents,
# ``/speckit-specify ...`` for skills agents) so the dry-run
# preview matches what a real run would dispatch. Falls back
# to the bare ``command`` name when no integration is
# configured or the integration is not registered (e.g.,
# dry-run on a brand-new project before setup).
preview_invocation: str | None = None
if isinstance(integration, str) and integration:
try:
from specify_cli.integrations import get_integration
impl = get_integration(integration)
if impl is not None:
preview_invocation = impl.build_command_invocation(
command, args_str
)
except Exception:
preview_invocation = None
Comment on lines +77 to +85

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks — addressed in b2ea446: b2ea446

Narrowed except Exceptionexcept (ValueError, TypeError, AttributeError, KeyError) in the dry-run preview helper. Unexpected runtime errors (e.g. bugs in third-party integration subclasses) now propagate instead of being silently swallowed and degraded to the bare-command fallback preview.

Comment was about the narrow-except goal; verified all 536 tests in tests/test_workflows.py + tests/test_extensions.py pass.

if preview_invocation:
preview = f"DRY RUN: would invoke {preview_invocation!r}"
else:
preview = (
f"DRY RUN: would invoke {command!r} "
f"(integration={integration!r}, model={model!r}, "
f"args={args_str!r})"
)
return StepResult(
status=StepStatus.COMPLETED,
output={
"command": command,
"integration": integration,
"model": model,
"options": options,
"input": resolved_input,
# Dispatch contract: a real run would have invoked
# the CLI; in dry-run mode we did not, so both
# ``dispatched`` and ``executed`` are ``False``.
# ``invoke_command`` preserves the command name so
# the CLI's preview loop can label the preview
# block even when the integration is unknown.
"dispatched": False,
"executed": False,
"exit_code": 0,
"invoke_command": command,
"dry_run": True,
"message": preview,
"dry_run_message": preview,
},
)

# Attempt CLI dispatch
dispatch_result = self._try_dispatch(
command, integration, model, args_str, context
)
Expand All @@ -72,6 +133,12 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult:
output["stdout"] = dispatch_result["stdout"]
output["stderr"] = dispatch_result["stderr"]
output["dispatched"] = True
# Real runs must set ``executed=True`` so downstream
# ``{{ steps.<id>.output.executed }}`` resolves to truthy —
# the boolean is the documented signal that a real
# invocation occurred. The dry-run branch above is the only
# path that sets ``executed=False`` for this step type.
output["executed"] = True
if dispatch_result["exit_code"] != 0:
return StepResult(
status=StepStatus.FAILED,
Expand Down
104 changes: 103 additions & 1 deletion src/specify_cli/workflows/steps/gate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,57 @@
#: contents and the path itself — so neither can inject ANSI/terminal escapes.
_CONTROL_CHARS = re.compile(r"[\x00-\x08\x0a-\x1f\x7f-\x9f]")

#: Choices that, if returned by the gate, abort or skip the run. The
#: dry-run branch must not pre-select one of these (see
#: ``_first_non_sentinel``) so a downstream ``if`` doesn't accidentally
#: follow a sentinel that was chosen on the user's behalf.
_REJECT_SENTINELS = frozenset({"reject", "abort"})


def _coerce_options(raw: object) -> list[str]:
"""Strictly normalize ``options`` to a ``list[str]`` of valid choices.

Only a list/tuple of strings is treated as valid input — every other
shape (``None``, string, dict, scalar, sequence of non-strings) is
coerced to an empty list so the gate can fail loudly instead of
silently inheriting an author's typo. ``list[bool]`` and
``list[int]`` are rejected on purpose: a workflow that bypassed
validation must not pass through with ``['True', 'False']`` as
option labels, since the rendered prompt would expose Python's
``repr`` strings instead of author intent.

An empty list is the documented signal that the gate has no
choices — the dry-run path leaves ``choice`` as ``None`` and the
non-dry-run interactive path emits a clear ``FAILURE`` instead of
indexing into ``[]`` and crashing with a confusing
``Choose [1-0]`` prompt.
"""
if isinstance(raw, (list, tuple)):
coerced: list[str] = []
for item in raw:
if isinstance(item, str) and item:
coerced.append(item)
return coerced
# Anything else — ``None``, ``str``, ``dict``, ``int``, ``bool`` —
# is invalid. The validation layer already rejects these shapes
# for the real-run path; this defensive coercion ensures a workflow
# that bypassed validation does not crash the prompt loop.
return []


def _first_non_sentinel(options: list[str]) -> str | None:
"""Return the first ``options`` entry that is not a reject/abort sentinel.

Used by the dry-run branch so a synthetic ``choice`` doesn't
unintentionally steer downstream branching into a reject path. If
every option is a sentinel — an authoring mistake, but one the gate
must not crash on — return ``None`` (neutral default).
"""
for option in options:
if isinstance(option, str) and option.lower() not in _REJECT_SENTINELS:
return option
return None


class GateStep(StepBase):
"""Interactive review gate.
Expand All @@ -40,7 +91,13 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult:
if isinstance(message, str) and "{{" in message:
message = evaluate_expression(message, context)

options = config.get("options", ["approve", "reject"])
# ``options`` is normalized defensively even on the non-dry-run
# path: a workflow that bypassed validation can pass strings,
# dicts, scalars, or non-string Sequences. Coercing here keeps
# the interactive branch honest (no IndexError on a string)
# without changing the validated happy path. Mirrors the same
# normalization used by the dry-run branch below.
options = _coerce_options(config.get("options", ["approve", "reject"]))
on_reject = config.get("on_reject", "abort")

show_file = config.get("show_file")
Expand All @@ -61,10 +118,55 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult:
"choice": None,
}

# Dry-run: never pause for interactive input. The original
# ``message`` is preserved verbatim on ``output["message"]`` so
# downstream ``{{ steps.<id>.output.message }}`` references
# resolve to the gate prompt unchanged; the ``[DRY RUN]`` body
# is duplicated on ``output["dry_run_message"]`` for the CLI's
# preview loop. ``choice`` is the first non-reject/abort option
# so a downstream ``if`` doesn't accidentally follow a reject
# sentinel; if every option is a sentinel we leave ``choice``
# ``None`` so a downstream gate defaults to neutral.
# See ``test_dry_run_skips_interactive_gate``,
# ``test_dry_run_accepts_tuple_options``, and
# ``test_dry_run_skips_reject_sentinels_for_choice``.
if context.dry_run:
choice = _first_non_sentinel(options)
preview = f"[DRY RUN] Gate: {message}"
return StepResult(
status=StepStatus.COMPLETED,
output={
"message": message,
"options": options,
"on_reject": on_reject,
"show_file": show_file,
"choice": choice,
"dry_run": True,
"dry_run_message": preview,
},
)

# Non-interactive: pause for later resume (the file is not read here)
if not sys.stdin.isatty():
return StepResult(status=StepStatus.PAUSED, output=output)

# Interactive with no options: a workflow that bypassed validation
# can hand us ``options=None`` (normalized to ``[]``). Indexing
# into an empty list would emit ``Choose [1-0]:`` and crash the
# gate on the first ``int(raw)``. Fail loudly instead so the
# operator sees the authoring mistake on stdout, not a stack
# trace. ``output`` carries the empty ``options`` so the engine
# can still record what the gate saw.
if not options:
return StepResult(
status=StepStatus.FAILED,
output=output,
error=(
f"Gate {config.get('id', '?')!r} has no options — "
"interactive review requires at least one choice."
),
)

# Interactive: prompt the user. ``show_file`` contents are folded
# into the displayed message so the operator can review the
# referenced material before choosing. Composing the prompt text
Expand Down
Loading