-
Notifications
You must be signed in to change notification settings - Fork 10.2k
feat(workflows): add --dry-run flag to specify workflow run #2704
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ba8c9e7
475c50e
b2bd611
eef7f0c
aa514bd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -313,6 +313,7 @@ def __init__( | |
| run_id: str | None = None, | ||
| workflow_id: str = "", | ||
| project_root: Path | None = None, | ||
| dry_run: bool = False, | ||
| ) -> None: | ||
| # ``run_id is None`` (omitted) → auto-generate. An explicit empty | ||
| # string is *not* the same as "omitted" and must be validated like | ||
|
|
@@ -334,6 +335,13 @@ def __init__( | |
| self.created_at = datetime.now(timezone.utc).isoformat() | ||
| self.updated_at = self.created_at | ||
| self.log_entries: list[dict[str, Any]] = [] | ||
| # Persisted into ``state.json`` (and restored by :meth:`load`) so | ||
| # :meth:`WorkflowEngine.resume` can rebuild the ``StepContext`` | ||
| # with the same ``dry_run`` flag. Without persistence, an | ||
| # interrupted dry-run would silently turn into a real run after | ||
| # a process restart, which is the exact bug the CLI's | ||
| # ``--dry-run`` promise is meant to prevent. | ||
| self.dry_run = dry_run | ||
|
|
||
| @property | ||
| def runs_dir(self) -> Path: | ||
|
|
@@ -354,6 +362,12 @@ def save(self) -> None: | |
| "step_results": self.step_results, | ||
| "created_at": self.created_at, | ||
| "updated_at": self.updated_at, | ||
| # Persisted with a default of ``False`` so state files written | ||
| # by older releases (before ``dry_run`` was added) still load | ||
| # — ``get(..., False)`` makes the older format behave | ||
| # identically to a fresh non-dry-run run. Older runs were | ||
| # never dry-runs, so the default is safe. | ||
| "dry_run": self.dry_run, | ||
| } | ||
| with open(runs_dir / "state.json", "w", encoding="utf-8") as f: | ||
| json.dump(state_data, f, indent=2) | ||
|
|
@@ -398,6 +412,11 @@ def load(cls, run_id: str, project_root: Path) -> RunState: | |
| state.step_results = state_data.get("step_results", {}) | ||
| state.created_at = state_data.get("created_at", "") | ||
| state.updated_at = state_data.get("updated_at", "") | ||
| # ``dry_run`` is the load-bearing field for ``resume()``'s | ||
| # ``StepContext`` rebuild — without restoring it, a resumed | ||
| # dry-run would step right past every short-circuit and invoke | ||
| # the real CLI. See ``test_resume_restores_dry_run``. | ||
| state.dry_run = state_data.get("dry_run", False) | ||
|
|
||
| inputs_path = runs_dir / "inputs.json" | ||
| if inputs_path.exists(): | ||
|
|
@@ -478,6 +497,7 @@ def execute( | |
| definition: WorkflowDefinition, | ||
| inputs: dict[str, Any] | None = None, | ||
| run_id: str | None = None, | ||
| dry_run: bool = False, | ||
| ) -> RunState: | ||
| """Execute a workflow definition. | ||
|
|
||
|
|
@@ -489,6 +509,16 @@ def execute( | |
| User-provided input values. | ||
| run_id: | ||
| Optional run ID (uses SPECKIT_WORKFLOW_RUN_ID when set, otherwise auto-generated). | ||
| dry_run: | ||
| Preview-only mode. When ``True``, step implementations skip | ||
| side-effecting work (AI invocations, file writes) and emit a | ||
| synthetic ``dry_run_message`` instead. ``dry_run`` propagates | ||
| into each step's ``StepContext`` and is persisted on the | ||
| resulting ``RunState`` so ``resume()`` keeps the run in | ||
| preview mode across restarts. Step ``output`` shape is | ||
|
Comment on lines
+512
to
+518
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks — addressed in b2ea446: b2ea446 Same fix as above but for the |
||
| unchanged; downstream ``switch``/``do-while`` gates coerce | ||
| any dry-run-only fields (e.g. ``output.choice``) so the | ||
| preview branch is deterministic. | ||
|
|
||
| Returns | ||
| ------- | ||
|
|
@@ -506,6 +536,7 @@ def execute( | |
| run_id=effective_run_id, | ||
| workflow_id=definition.id, | ||
| project_root=self.project_root, | ||
| dry_run=dry_run, | ||
| ) | ||
|
|
||
| # Persist a copy of the workflow definition so resume can | ||
|
|
@@ -531,6 +562,15 @@ def execute( | |
| default_options=definition.default_options, | ||
| project_root=str(self.project_root), | ||
| run_id=state.run_id, | ||
| # The ``dry_run`` flag travels into ``StepContext`` so each | ||
| # step implementation can short-circuit independently — see | ||
| # ``CommandStep.execute`` / ``PromptStep.execute`` / | ||
| # ``GateStep.execute``. Keeping it on the per-call context | ||
| # (rather than reading it off ``RunState`` inside each | ||
| # step) means library consumers that build a ``StepContext`` | ||
| # directly (e.g. tests, custom runners) get the same | ||
| # contract without needing a ``RunState``. | ||
| dry_run=dry_run, | ||
| ) | ||
|
|
||
| # Execute steps | ||
|
|
@@ -545,6 +585,13 @@ def execute( | |
| state.status = RunStatus.FAILED | ||
| state.append_log({"event": "workflow_failed", "error": str(exc)}) | ||
| state.save() | ||
| # Attach the partially-populated state to the exception so | ||
| # callers (most notably the CLI's exception handler) can | ||
| # surface any dry-run previews already resolved by earlier | ||
| # steps — the most useful debug signal a dry-run failure | ||
| # can offer. The state has been saved to disk by this point | ||
| # so consumers can also reload it via ``RunState.load()``. | ||
| exc.partial_state = state # type: ignore[attr-defined] | ||
| raise | ||
|
mnriem marked this conversation as resolved.
|
||
|
|
||
| if state.status == RunStatus.RUNNING: | ||
|
|
@@ -587,7 +634,11 @@ def resume( | |
| merged = {**state.inputs, **inputs} | ||
| state.inputs = self._resolve_inputs(definition, merged) | ||
|
|
||
| # Restore context | ||
| # Restore context — ``dry_run`` is read from the persisted | ||
| # ``RunState`` (not from the caller's flag, since ``resume`` has | ||
| # no caller-supplied dry-run flag) so a paused dry-run stays a | ||
| # dry-run after a process restart. Mirrors the same plumbing in | ||
| # :meth:`execute`. | ||
| context = StepContext( | ||
| inputs=state.inputs, | ||
| steps=state.step_results, | ||
|
|
@@ -596,6 +647,7 @@ def resume( | |
| default_options=definition.default_options, | ||
| project_root=str(self.project_root), | ||
| run_id=state.run_id, | ||
| dry_run=state.dry_run, | ||
| ) | ||
|
|
||
| from . import STEP_REGISTRY | ||
|
|
@@ -622,6 +674,12 @@ def resume( | |
| state.status = RunStatus.FAILED | ||
| state.append_log({"event": "resume_failed", "error": str(exc)}) | ||
| state.save() | ||
| # Attach the partially-populated state to the exception so | ||
| # callers (e.g. the CLI's exception handler for ``workflow | ||
| # resume``) can surface any dry-run previews already | ||
| # resolved by earlier resumed steps. Mirrors the same | ||
| # contract used by :meth:`execute`. | ||
| exc.partial_state = state # type: ignore[attr-defined] | ||
| raise | ||
|
|
||
| if state.status == RunStatus.RUNNING: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,8 +53,69 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult: | |
| if step_options: | ||
| options.update(step_options) | ||
|
|
||
| # Attempt CLI dispatch | ||
| args_str = str(resolved_input.get("args", "")) | ||
|
|
||
| # Dry-run: never invoke the integration CLI. Synthesize a | ||
| # ``COMPLETED`` result carrying the rendered dispatch preview so | ||
| # ``specify workflow run --dry-run`` can show what would have | ||
| # been sent. ``message`` stays user-facing (``DRY RUN: ...``) so | ||
| # downstream ``{{ steps.<id>.output.message }}`` resolves; the | ||
| # same body is duplicated on ``dry_run_message`` for the CLI's | ||
| # preview loop. Mirrors the same contract used by ``PromptStep`` | ||
| # and ``GateStep`` so the CLI never has to special-case step | ||
| # types. See ``test_dry_run_returns_completed_without_dispatch``. | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks — The full |
||
| if context.dry_run: | ||
| # Try to build the integration-specific invocation string | ||
| # (e.g. ``/speckit.specify ...`` for markdown agents, | ||
| # ``/speckit-specify ...`` for skills agents) so the dry-run | ||
| # preview matches what a real run would dispatch. Falls back | ||
| # to the bare ``command`` name when no integration is | ||
| # configured or the integration is not registered (e.g., | ||
| # dry-run on a brand-new project before setup). | ||
| preview_invocation: str | None = None | ||
| if isinstance(integration, str) and integration: | ||
| try: | ||
| from specify_cli.integrations import get_integration | ||
| impl = get_integration(integration) | ||
| if impl is not None: | ||
| preview_invocation = impl.build_command_invocation( | ||
| command, args_str | ||
| ) | ||
| except Exception: | ||
| preview_invocation = None | ||
|
Comment on lines
+77
to
+85
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks — addressed in b2ea446: b2ea446 Narrowed Comment was about the narrow-except goal; verified all 536 tests in tests/test_workflows.py + tests/test_extensions.py pass. |
||
| if preview_invocation: | ||
| preview = f"DRY RUN: would invoke {preview_invocation!r}" | ||
| else: | ||
| preview = ( | ||
| f"DRY RUN: would invoke {command!r} " | ||
| f"(integration={integration!r}, model={model!r}, " | ||
| f"args={args_str!r})" | ||
| ) | ||
| return StepResult( | ||
| status=StepStatus.COMPLETED, | ||
| output={ | ||
| "command": command, | ||
| "integration": integration, | ||
| "model": model, | ||
| "options": options, | ||
| "input": resolved_input, | ||
| # Dispatch contract: a real run would have invoked | ||
| # the CLI; in dry-run mode we did not, so both | ||
| # ``dispatched`` and ``executed`` are ``False``. | ||
| # ``invoke_command`` preserves the command name so | ||
| # the CLI's preview loop can label the preview | ||
| # block even when the integration is unknown. | ||
| "dispatched": False, | ||
| "executed": False, | ||
| "exit_code": 0, | ||
| "invoke_command": command, | ||
| "dry_run": True, | ||
| "message": preview, | ||
| "dry_run_message": preview, | ||
| }, | ||
| ) | ||
|
|
||
| # Attempt CLI dispatch | ||
| dispatch_result = self._try_dispatch( | ||
| command, integration, model, args_str, context | ||
| ) | ||
|
|
@@ -72,6 +133,12 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult: | |
| output["stdout"] = dispatch_result["stdout"] | ||
| output["stderr"] = dispatch_result["stderr"] | ||
| output["dispatched"] = True | ||
| # Real runs must set ``executed=True`` so downstream | ||
| # ``{{ steps.<id>.output.executed }}`` resolves to truthy — | ||
| # the boolean is the documented signal that a real | ||
| # invocation occurred. The dry-run branch above is the only | ||
| # path that sets ``executed=False`` for this step type. | ||
| output["executed"] = True | ||
| if dispatch_result["exit_code"] != 0: | ||
| return StepResult( | ||
| status=StepStatus.FAILED, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks — addressed in b2ea446: b2ea446
Replaced the
every step will short-circuit/no subprocess, no CLI call, no network I/Oclaim inStepContext.dry_run(base.py) with an accurate description: the flag is honored byCommandStep,PromptStep, andGateStep; other step types (e.g.init,shell) may still perform their normal work until they adopt the flag; the engine itself does not enforce a dry-run contract.