github · fuleinist · Jun 17, 2026 · Jun 19, 2026 · Jun 19, 2026 · Jun 21, 2026
@@ -74,6 +74,18 @@ class StepContext:
     #: Current run ID.
     run_id: str | None = None
 
+    #: When ``True``, every step implementation must short-circuit and
+    #: return a synthetic ``StepResult`` carrying a preview of what would
+    #: have been dispatched — no subprocess, no CLI call, no network I/O.
+    #: Step implementations publish the preview on ``output["message"]``
+    #: (the original, so ``{{ steps.<id>.output.message }}`` keeps
+    #: resolving) and ``output["dry_run_message"]`` (the rendered
+    #: ``[DRY RUN] ...`` body, consumed by the CLI's preview loop).
+    #: Persisted on the ``RunState`` so :meth:`WorkflowEngine.resume` can
+    #: restore it after a process restart — an interrupted dry-run must
+    #: not silently turn into a real run.
+    dry_run: bool = False
+
 
 @dataclass
 class StepResult:

@@ -313,6 +313,7 @@ def __init__(
         run_id: str | None = None,
         workflow_id: str = "",
         project_root: Path | None = None,
+        dry_run: bool = False,
     ) -> None:
         # ``run_id is None`` (omitted) → auto-generate. An explicit empty
         # string is *not* the same as "omitted" and must be validated like
@@ -334,6 +335,13 @@ def __init__(
         self.created_at = datetime.now(timezone.utc).isoformat()
         self.updated_at = self.created_at
         self.log_entries: list[dict[str, Any]] = []
+        # Persisted into ``state.json`` (and restored by :meth:`load`) so
+        # :meth:`WorkflowEngine.resume` can rebuild the ``StepContext``
+        # with the same ``dry_run`` flag. Without persistence, an
+        # interrupted dry-run would silently turn into a real run after
+        # a process restart, which is the exact bug the CLI's
+        # ``--dry-run`` promise is meant to prevent.
+        self.dry_run = dry_run
 
     @property
     def runs_dir(self) -> Path:
@@ -354,6 +362,12 @@ def save(self) -> None:
             "step_results": self.step_results,
             "created_at": self.created_at,
             "updated_at": self.updated_at,
+            # Persisted with a default of ``False`` so state files written
+            # by older releases (before ``dry_run`` was added) still load
+            # — ``get(..., False)`` makes the older format behave
+            # identically to a fresh non-dry-run run. Older runs were
+            # never dry-runs, so the default is safe.
+            "dry_run": self.dry_run,
         }
         with open(runs_dir / "state.json", "w", encoding="utf-8") as f:
             json.dump(state_data, f, indent=2)
@@ -398,6 +412,11 @@ def load(cls, run_id: str, project_root: Path) -> RunState:
         state.step_results = state_data.get("step_results", {})
         state.created_at = state_data.get("created_at", "")
         state.updated_at = state_data.get("updated_at", "")
+        # ``dry_run`` is the load-bearing field for ``resume()``'s
+        # ``StepContext`` rebuild — without restoring it, a resumed
+        # dry-run would step right past every short-circuit and invoke
+        # the real CLI. See ``test_resume_restores_dry_run``.
+        state.dry_run = state_data.get("dry_run", False)
 
         inputs_path = runs_dir / "inputs.json"
         if inputs_path.exists():
@@ -478,6 +497,7 @@ def execute(
         definition: WorkflowDefinition,
         inputs: dict[str, Any] | None = None,
         run_id: str | None = None,
+        dry_run: bool = False,
     ) -> RunState:
         """Execute a workflow definition.
 
@@ -489,6 +509,16 @@ def execute(
             User-provided input values.
         run_id:
             Optional run ID (uses SPECKIT_WORKFLOW_RUN_ID when set, otherwise auto-generated).
+        dry_run:
+            Preview-only mode. When ``True``, step implementations skip
+            side-effecting work (AI invocations, file writes) and emit a
+            synthetic ``dry_run_message`` instead. ``dry_run`` propagates
+            into each step's ``StepContext`` and is persisted on the
+            resulting ``RunState`` so ``resume()`` keeps the run in
+            preview mode across restarts. Step ``output`` shape is
+            unchanged; downstream ``switch``/``do-while`` gates coerce
+            any dry-run-only fields (e.g. ``output.choice``) so the
+            preview branch is deterministic.
 
         Returns
         -------
@@ -506,6 +536,7 @@ def execute(
             run_id=effective_run_id,
             workflow_id=definition.id,
             project_root=self.project_root,
+            dry_run=dry_run,
         )
 
         # Persist a copy of the workflow definition so resume can
@@ -531,6 +562,15 @@ def execute(
             default_options=definition.default_options,
             project_root=str(self.project_root),
             run_id=state.run_id,
+            # The ``dry_run`` flag travels into ``StepContext`` so each
+            # step implementation can short-circuit independently — see
+            # ``CommandStep.execute`` / ``PromptStep.execute`` /
+            # ``GateStep.execute``. Keeping it on the per-call context
+            # (rather than reading it off ``RunState`` inside each
+            # step) means library consumers that build a ``StepContext``
+            # directly (e.g. tests, custom runners) get the same
+            # contract without needing a ``RunState``.
+            dry_run=dry_run,
         )
 
         # Execute steps
@@ -545,6 +585,13 @@ def execute(
             state.status = RunStatus.FAILED
             state.append_log({"event": "workflow_failed", "error": str(exc)})
             state.save()
+            # Attach the partially-populated state to the exception so
+            # callers (most notably the CLI's exception handler) can
+            # surface any dry-run previews already resolved by earlier
+            # steps — the most useful debug signal a dry-run failure
+            # can offer. The state has been saved to disk by this point
+            # so consumers can also reload it via ``RunState.load()``.
+            exc.partial_state = state  # type: ignore[attr-defined]
             raise
 
         if state.status == RunStatus.RUNNING:
@@ -587,7 +634,11 @@ def resume(
             merged = {**state.inputs, **inputs}
             state.inputs = self._resolve_inputs(definition, merged)
 
-        # Restore context
+        # Restore context — ``dry_run`` is read from the persisted
+        # ``RunState`` (not from the caller's flag, since ``resume`` has
+        # no caller-supplied dry-run flag) so a paused dry-run stays a
+        # dry-run after a process restart. Mirrors the same plumbing in
+        # :meth:`execute`.
         context = StepContext(
             inputs=state.inputs,
             steps=state.step_results,
@@ -596,6 +647,7 @@ def resume(
             default_options=definition.default_options,
             project_root=str(self.project_root),
             run_id=state.run_id,
+            dry_run=state.dry_run,
         )
 
         from . import STEP_REGISTRY
@@ -622,6 +674,12 @@ def resume(
             state.status = RunStatus.FAILED
             state.append_log({"event": "resume_failed", "error": str(exc)})
             state.save()
+            # Attach the partially-populated state to the exception so
+            # callers (e.g. the CLI's exception handler for ``workflow
+            # resume``) can surface any dry-run previews already
+            # resolved by earlier resumed steps. Mirrors the same
+            # contract used by :meth:`execute`.
+            exc.partial_state = state  # type: ignore[attr-defined]
             raise
 
         if state.status == RunStatus.RUNNING:

@@ -53,8 +53,69 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult:
         if step_options:
             options.update(step_options)
 
-        # Attempt CLI dispatch
         args_str = str(resolved_input.get("args", ""))
+
+        # Dry-run: never invoke the integration CLI. Synthesize a
+        # ``COMPLETED`` result carrying the rendered dispatch preview so
+        # ``specify workflow run --dry-run`` can show what would have
+        # been sent. ``message`` stays user-facing (``DRY RUN: ...``) so
+        # downstream ``{{ steps.<id>.output.message }}`` resolves; the
+        # same body is duplicated on ``dry_run_message`` for the CLI's
+        # preview loop. Mirrors the same contract used by ``PromptStep``
+        # and ``GateStep`` so the CLI never has to special-case step
+        # types. See ``test_dry_run_returns_completed_without_dispatch``.
+        if context.dry_run:
+            # Try to build the integration-specific invocation string
+            # (e.g. ``/speckit.specify ...`` for markdown agents,
+            # ``/speckit-specify ...`` for skills agents) so the dry-run
+            # preview matches what a real run would dispatch. Falls back
+            # to the bare ``command`` name when no integration is
+            # configured or the integration is not registered (e.g.,
+            # dry-run on a brand-new project before setup).
+            preview_invocation: str | None = None
+            if isinstance(integration, str) and integration:
+                try:
+                    from specify_cli.integrations import get_integration
+                    impl = get_integration(integration)
+                    if impl is not None:
+                        preview_invocation = impl.build_command_invocation(
+                            command, args_str
+                        )
+                except Exception:
+                    preview_invocation = None
+            if preview_invocation:
+                preview = f"DRY RUN: would invoke {preview_invocation!r}"
+            else:
+                preview = (
+                    f"DRY RUN: would invoke {command!r} "
+                    f"(integration={integration!r}, model={model!r}, "
+                    f"args={args_str!r})"
+                )
+            return StepResult(
+                status=StepStatus.COMPLETED,
+                output={
+                    "command": command,
+                    "integration": integration,
+                    "model": model,
+                    "options": options,
+                    "input": resolved_input,
+                    # Dispatch contract: a real run would have invoked
+                    # the CLI; in dry-run mode we did not, so both
+                    # ``dispatched`` and ``executed`` are ``False``.
+                    # ``invoke_command`` preserves the command name so
+                    # the CLI's preview loop can label the preview
+                    # block even when the integration is unknown.
+                    "dispatched": False,
+                    "executed": False,
+                    "exit_code": 0,
+                    "invoke_command": command,
+                    "dry_run": True,
+                    "message": preview,
+                    "dry_run_message": preview,
+                },
+            )
+
+        # Attempt CLI dispatch
         dispatch_result = self._try_dispatch(
             command, integration, model, args_str, context
         )
@@ -72,6 +133,12 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult:
             output["stdout"] = dispatch_result["stdout"]
             output["stderr"] = dispatch_result["stderr"]
             output["dispatched"] = True
+            # Real runs must set ``executed=True`` so downstream
+            # ``{{ steps.<id>.output.executed }}`` resolves to truthy —
+            # the boolean is the documented signal that a real
+            # invocation occurred. The dry-run branch above is the only
+            # path that sets ``executed=False`` for this step type.
+            output["executed"] = True
             if dispatch_result["exit_code"] != 0:
                 return StepResult(
                     status=StepStatus.FAILED,

@@ -16,6 +16,57 @@
 #: contents and the path itself — so neither can inject ANSI/terminal escapes.
 _CONTROL_CHARS = re.compile(r"[\x00-\x08\x0a-\x1f\x7f-\x9f]")
 
+#: Choices that, if returned by the gate, abort or skip the run. The
+#: dry-run branch must not pre-select one of these (see
+#: ``_first_non_sentinel``) so a downstream ``if`` doesn't accidentally
+#: follow a sentinel that was chosen on the user's behalf.
+_REJECT_SENTINELS = frozenset({"reject", "abort"})
+
+
+def _coerce_options(raw: object) -> list[str]:
+    """Strictly normalize ``options`` to a ``list[str]`` of valid choices.
+
+    Only a list/tuple of strings is treated as valid input — every other
+    shape (``None``, string, dict, scalar, sequence of non-strings) is
+    coerced to an empty list so the gate can fail loudly instead of
+    silently inheriting an author's typo. ``list[bool]`` and
+    ``list[int]`` are rejected on purpose: a workflow that bypassed
+    validation must not pass through with ``['True', 'False']`` as
+    option labels, since the rendered prompt would expose Python's
+    ``repr`` strings instead of author intent.
+
+    An empty list is the documented signal that the gate has no
+    choices — the dry-run path leaves ``choice`` as ``None`` and the
+    non-dry-run interactive path emits a clear ``FAILURE`` instead of
+    indexing into ``[]`` and crashing with a confusing
+    ``Choose [1-0]`` prompt.
+    """
+    if isinstance(raw, (list, tuple)):
+        coerced: list[str] = []
+        for item in raw:
+            if isinstance(item, str) and item:
+                coerced.append(item)
+        return coerced
+    # Anything else — ``None``, ``str``, ``dict``, ``int``, ``bool`` —
+    # is invalid. The validation layer already rejects these shapes
+    # for the real-run path; this defensive coercion ensures a workflow
+    # that bypassed validation does not crash the prompt loop.
+    return []
+
+
+def _first_non_sentinel(options: list[str]) -> str | None:
+    """Return the first ``options`` entry that is not a reject/abort sentinel.
+
+    Used by the dry-run branch so a synthetic ``choice`` doesn't
+    unintentionally steer downstream branching into a reject path. If
+    every option is a sentinel — an authoring mistake, but one the gate
+    must not crash on — return ``None`` (neutral default).
+    """
+    for option in options:
+        if isinstance(option, str) and option.lower() not in _REJECT_SENTINELS:
+            return option
+    return None
+
 
 class GateStep(StepBase):
     """Interactive review gate.
@@ -40,7 +91,13 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult:
         if isinstance(message, str) and "{{" in message:
             message = evaluate_expression(message, context)
 
-        options = config.get("options", ["approve", "reject"])
+        # ``options`` is normalized defensively even on the non-dry-run
+        # path: a workflow that bypassed validation can pass strings,
+        # dicts, scalars, or non-string Sequences. Coercing here keeps
+        # the interactive branch honest (no IndexError on a string)
+        # without changing the validated happy path. Mirrors the same
+        # normalization used by the dry-run branch below.
+        options = _coerce_options(config.get("options", ["approve", "reject"]))
         on_reject = config.get("on_reject", "abort")
 
         show_file = config.get("show_file")
@@ -61,10 +118,55 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult:
             "choice": None,
         }
 
+        # Dry-run: never pause for interactive input. The original
+        # ``message`` is preserved verbatim on ``output["message"]`` so
+        # downstream ``{{ steps.<id>.output.message }}`` references
+        # resolve to the gate prompt unchanged; the ``[DRY RUN]`` body
+        # is duplicated on ``output["dry_run_message"]`` for the CLI's
+        # preview loop. ``choice`` is the first non-reject/abort option
+        # so a downstream ``if`` doesn't accidentally follow a reject
+        # sentinel; if every option is a sentinel we leave ``choice``
+        # ``None`` so a downstream gate defaults to neutral.
+        # See ``test_dry_run_skips_interactive_gate``,
+        # ``test_dry_run_accepts_tuple_options``, and
+        # ``test_dry_run_skips_reject_sentinels_for_choice``.
+        if context.dry_run:
+            choice = _first_non_sentinel(options)
+            preview = f"[DRY RUN] Gate: {message}"
+            return StepResult(
+                status=StepStatus.COMPLETED,
+                output={
+                    "message": message,
+                    "options": options,
+                    "on_reject": on_reject,
+                    "show_file": show_file,
+                    "choice": choice,
+                    "dry_run": True,
+                    "dry_run_message": preview,
+                },
+            )
+
         # Non-interactive: pause for later resume (the file is not read here)
         if not sys.stdin.isatty():
             return StepResult(status=StepStatus.PAUSED, output=output)
 
+        # Interactive with no options: a workflow that bypassed validation
+        # can hand us ``options=None`` (normalized to ``[]``). Indexing
+        # into an empty list would emit ``Choose [1-0]:`` and crash the
+        # gate on the first ``int(raw)``. Fail loudly instead so the
+        # operator sees the authoring mistake on stdout, not a stack
+        # trace. ``output`` carries the empty ``options`` so the engine
+        # can still record what the gate saw.
+        if not options:
+            return StepResult(
+                status=StepStatus.FAILED,
+                output=output,
+                error=(
+                    f"Gate {config.get('id', '?')!r} has no options — "
+                    "interactive review requires at least one choice."
+                ),
+            )
+
         # Interactive: prompt the user. ``show_file`` contents are folded
         # into the displayed message so the operator can review the
         # referenced material before choosing. Composing the prompt text