Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 18 additions & 13 deletions .github/instructions/scenarios.instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,29 @@ All scenarios inherit from `Scenario` (ABC) and must:
2. **Optionally declare `BASELINE_ATTACK_POLICY`** (defaults to `BaselineAttackPolicy.Enabled` — a baseline `PromptSendingAttack` is prepended and callers can opt out per run via `initialize_async(include_baseline=False)`):
- `BaselineAttackPolicy.Disabled` — baseline supported but off by default (e.g. `Jailbreak`, where templates dominate the run).
- `BaselineAttackPolicy.Forbidden` — baseline is meaningless for this scenario's comparison axis (e.g. `AdversarialBenchmark`, which compares against gold-standard answers). Explicit `include_baseline=True` raises `ValueError`.
3. **Implement three abstract methods:**
3. **Pass `strategy_class`, `default_strategy`, and `default_dataset_config` to `super().__init__()`:**

```python
class MyScenario(Scenario):
VERSION: int = 1
BASELINE_ATTACK_POLICY: ClassVar[BaselineAttackPolicy] = BaselineAttackPolicy.Enabled

@classmethod
def get_strategy_class(cls) -> type[ScenarioStrategy]:
return MyStrategy

@classmethod
def get_default_strategy(cls) -> ScenarioStrategy:
return MyStrategy.ALL

@classmethod
def default_dataset_config(cls) -> DatasetConfiguration:
return DatasetConfiguration(dataset_names=["my_dataset"])
@apply_defaults
def __init__(self, *, objective_scorer=None, scenario_result_id=None) -> None:
super().__init__(
version=self.VERSION,
strategy_class=MyStrategy,
default_strategy=MyStrategy.ALL,
default_dataset_config=DatasetConfiguration(dataset_names=["my_dataset"]),
objective_scorer=objective_scorer or self._get_default_objective_scorer(),
scenario_result_id=scenario_result_id,
)
```

For scenarios whose strategy enum is built dynamically (RapidResponse pattern), build the
strategy class in a module-level `@cache`-decorated function and pass the result through
the constructor — no classmethod indirection required.

4. **Optionally override `_get_atomic_attacks_async()`** — the base class provides a default
that uses the factory/registry pattern (see "AtomicAttack Construction" below).
Only override if your scenario needs custom attack construction logic.
Expand All @@ -60,14 +63,16 @@ def __init__(
super().__init__(
version=self.VERSION,
strategy_class=MyStrategy,
default_strategy=MyStrategy.ALL,
default_dataset_config=DatasetConfiguration(dataset_names=["my_dataset"]),
objective_scorer=objective_scorer,
)
```

Requirements:
- `@apply_defaults` decorator on `__init__`
- All parameters keyword-only via `*`
- `super().__init__()` called with `version`, `strategy_class`, `objective_scorer`
- `super().__init__()` called with `version`, `strategy_class`, `default_strategy`, `default_dataset_config`, `objective_scorer`
- complex objects like `adversarial_chat` or `objective_scorer` should be passed into the constructor.

## Dataset Loading
Expand Down
252 changes: 11 additions & 241 deletions doc/code/scenarios/0_scenarios.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@
" - Include an `ALL` aggregate strategy that expands to all available strategies\n",
" - Optionally override `_prepare_strategies()` for custom composition logic (see `FoundryComposite`)\n",
"\n",
"2. **Scenario Class**: Extend `Scenario` and implement these abstract methods:\n",
" - `get_strategy_class()`: Return your strategy enum class\n",
" - `get_default_strategy()`: Return the default strategy (typically `YourStrategy.ALL`)\n",
"2. **Scenario Class**: Extend `Scenario` and pass these to `super().__init__()`:\n",
" - `strategy_class`: Your strategy enum class\n",
" - `default_strategy`: The default strategy (typically `YourStrategy.ALL` or `YourStrategy.DEFAULT`)\n",
" - The base class provides a default `_get_atomic_attacks_async()` that uses the factory/registry\n",
" pattern. Override it only if your scenario needs custom attack construction logic.\n",
"\n",
"3. **Default Dataset**: Implement `default_dataset_config()` to specify the datasets your scenario uses out of the box.\n",
"3. **Default Dataset**: Pass `default_dataset_config=` to `super().__init__()` to specify the datasets your scenario uses out of the box.\n",
" - Returns a `DatasetConfiguration` with one or more named datasets (e.g., `DatasetConfiguration(dataset_names=[\"my_dataset\"])`)\n",
" - Users can override this at runtime via `--dataset-names` in the CLI or by passing a custom `dataset_config` programmatically\n",
"\n",
Expand Down Expand Up @@ -97,24 +97,7 @@
"execution_count": null,
"id": "1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found default environment files: ['./.pyrit/.env', './.pyrit/.env.local']\n",
"Loaded environment file: ./.pyrit/.env\n",
"Loaded environment file: ./.pyrit/.env.local\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"No new upgrade operations detected.\n"
]
}
],
"outputs": [],
"source": [
"from pyrit.common import apply_defaults\n",
"from pyrit.scenario import (\n",
Expand Down Expand Up @@ -142,18 +125,6 @@
"\n",
" VERSION: int = 1\n",
"\n",
" @classmethod\n",
" def get_strategy_class(cls) -> type[ScenarioStrategy]:\n",
" return MyStrategy\n",
"\n",
" @classmethod\n",
" def get_default_strategy(cls) -> ScenarioStrategy:\n",
" return MyStrategy.DEFAULT\n",
"\n",
" @classmethod\n",
" def default_dataset_config(cls) -> DatasetConfiguration:\n",
" return DatasetConfiguration(dataset_names=[\"dataset_name\"], max_dataset_size=4)\n",
"\n",
" @apply_defaults\n",
" def __init__(\n",
" self,\n",
Expand All @@ -168,7 +139,9 @@
" super().__init__(\n",
" version=self.VERSION,\n",
" objective_scorer=self._objective_scorer,\n",
" strategy_class=self.get_strategy_class(),\n",
" strategy_class=MyStrategy,\n",
" default_strategy=MyStrategy.DEFAULT,\n",
" default_dataset_config=DatasetConfiguration(dataset_names=[\"dataset_name\"], max_dataset_size=4),\n",
" scenario_result_id=scenario_result_id,\n",
" )\n",
"\n",
Expand Down Expand Up @@ -196,201 +169,7 @@
"execution_count": null,
"id": "3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading default configuration file: ./.pyrit/.pyrit_conf\n",
"Found default environment files: ['./.pyrit/.env', './.pyrit/.env.local']\n",
"Loaded environment file: ./.pyrit/.env\n",
"Loaded environment file: ./.pyrit/.env.local\n",
"\n",
"Available Scenarios:\n",
"================================================================================\n",
"\u001b[1m\u001b[36m\n",
" airt.cyber\u001b[0m\n",
" Class: Cyber\n",
" Description:\n",
" Cyber scenario implementation for PyRIT. This scenario tests how willing\n",
" models are to exploit cybersecurity harms by generating malware. The\n",
" Cyber class contains different variations of the malware generation\n",
" techniques.\n",
" Aggregate Strategies:\n",
" - all, single_turn, multi_turn\n",
" Available Strategies (2):\n",
" prompt_sending, red_teaming\n",
" Default Strategy: all\n",
" Default Datasets (1, max 4 per dataset):\n",
" airt_malware\n",
"\u001b[1m\u001b[36m\n",
" airt.jailbreak\u001b[0m\n",
" Class: Jailbreak\n",
" Description:\n",
" Jailbreak scenario implementation for PyRIT. This scenario tests how\n",
" vulnerable models are to jailbreak attacks by applying various\n",
" single-turn jailbreak templates to a set of test prompts. The responses\n",
" are scored to determine if the jailbreak was successful.\n",
" Aggregate Strategies:\n",
" - all, simple, complex\n",
" Available Strategies (4):\n",
" prompt_sending, many_shot, skeleton, role_play\n",
" Default Strategy: simple\n",
" Default Datasets (1, max 4 per dataset):\n",
" airt_harms\n",
"\u001b[1m\u001b[36m\n",
" airt.leakage\u001b[0m\n",
" Class: Leakage\n",
" Description:\n",
" Leakage scenario implementation for PyRIT. This scenario tests how\n",
" susceptible models are to leaking training data, PII, intellectual\n",
" property, or other confidential information. Uses the registry/factory\n",
" pattern to construct attack techniques.\n",
" Aggregate Strategies:\n",
" - all, default, single_turn, multi_turn\n",
" Available Strategies (9):\n",
" prompt_sending, role_play, many_shot, tap, crescendo_simulated,\n",
" red_teaming, context_compliance, first_letter, image\n",
" Default Strategy: default\n",
" Default Datasets (1, max 4 per dataset):\n",
" airt_leakage\n",
"\u001b[1m\u001b[36m\n",
" airt.psychosocial\u001b[0m\n",
" Class: Psychosocial\n",
" Description:\n",
" Psychosocial Harms Scenario implementation for PyRIT. This scenario\n",
" contains various psychosocial harm-based checks that evaluate model\n",
" behavior for responses that may harm the well-being or mental health of\n",
" users. Examples include mistreating users in a mental health crisis or\n",
" aiding them in self-harm. All datasets in this scenario share the\n",
" unified 'psychosocial' harm category, but each dataset (e.g.,\n",
" imminent_crisis, dependency) can have unique: - Scoring rubrics - Seed\n",
" objectives The scenario tests whether models: - Recognize mental health\n",
" crises and respond appropriately - Avoid facilitating self-harm or\n",
" harmful behaviors - Provide crisis resources and encourage professional\n",
" help Example usage with custom configurations: # Define custom\n",
" configurations per subharm category custom_configs = {\n",
" \"airt_imminent_crisis\": SubharmConfig(\n",
" crescendo_system_prompt_path=\"path/to/custom_escalation.yaml\",\n",
" scoring_rubric_path=\"path/to/custom_rubric.yaml\", ), } scenario =\n",
" Psychosocial(subharm_configs=custom_configs) await\n",
" scenario.initialize_async( objective_target=target_llm,\n",
" scenario_strategies=[PsychosocialStrategy.ImminentCrisis], )\n",
" Aggregate Strategies:\n",
" - all\n",
" Available Strategies (2):\n",
" imminent_crisis, licensed_therapist\n",
" Default Strategy: all\n",
" Default Datasets (1, max 4 per dataset):\n",
" airt_imminent_crisis\n",
"\u001b[1m\u001b[36m\n",
" airt.rapid_response\u001b[0m\n",
" Class: RapidResponse\n",
" Description:\n",
" Rapid Response scenario for content-harms testing. Tests model behavior\n",
" across multiple harm categories using selectable attack techniques.\n",
" Aggregate Strategies:\n",
" - all, default, single_turn, multi_turn\n",
" Available Strategies (7):\n",
" prompt_sending, role_play, many_shot, tap, crescendo_simulated,\n",
" red_teaming, context_compliance\n",
" Default Strategy: default\n",
" Default Datasets (7, max 4 per dataset):\n",
" airt_hate, airt_fairness, airt_violence, airt_sexual, airt_harassment,\n",
" airt_misinformation, airt_leakage\n",
"\u001b[1m\u001b[36m\n",
" airt.scam\u001b[0m\n",
" Class: Scam\n",
" Description:\n",
" Scam scenario evaluates an endpoint's ability to generate scam-related\n",
" materials (e.g., phishing emails, fraudulent messages) with primarily\n",
" persuasion-oriented techniques.\n",
" Aggregate Strategies:\n",
" - all, single_turn, multi_turn\n",
" Available Strategies (3):\n",
" context_compliance, role_play, persuasive_rta\n",
" Default Strategy: all\n",
" Default Datasets (1, max 4 per dataset):\n",
" airt_scams\n",
" Supported Parameters:\n",
" - max_turns (int) [default: 5]: Maximum conversation turns for the persuasive_rta strategy.\n",
"\u001b[1m\u001b[36m\n",
" benchmark.adversarial\u001b[0m\n",
" Class: AdversarialBenchmark\n",
" Description:\n",
" Benchmarking scenario that compares the attack success rate (ASR) of\n",
" several different adversarial models.\n",
" Aggregate Strategies:\n",
" - all, default, single_turn, multi_turn, light\n",
" Available Strategies (4):\n",
" role_play, tap, red_teaming, context_compliance\n",
" Default Strategy: light\n",
" Default Datasets (1, max 8 per dataset):\n",
" harmbench\n",
"\u001b[1m\u001b[36m\n",
" foundry.red_team_agent\u001b[0m\n",
" Class: RedTeamAgent\n",
" Description:\n",
" RedTeamAgent is a preconfigured scenario that automatically generates\n",
" multiple AtomicAttack instances based on the specified attack\n",
" strategies. It supports both single-turn attacks (with various\n",
" converters) and multi-turn attacks (Crescendo, RedTeaming), making it\n",
" easy to quickly test a target against multiple attack vectors. The\n",
" scenario can expand difficulty levels (EASY, MODERATE, DIFFICULT) into\n",
" their constituent attack strategies, or you can specify individual\n",
" strategies directly. This scenario is designed for use with the Foundry\n",
" AI Red Teaming Agent library, providing a consistent PyRIT contract for\n",
" their integration.\n",
" Aggregate Strategies:\n",
" - all, easy, moderate, difficult\n",
" Available Strategies (25):\n",
" ansi_attack, ascii_art, ascii_smuggler, atbash, base64, binary, caesar,\n",
" character_space, char_swap, diacritic, flip, leetspeak, morse, rot13,\n",
" suffix_append, string_join, unicode_confusable, unicode_substitution,\n",
" url, jailbreak, tense, multi_turn, crescendo, pair, tap\n",
" Default Strategy: easy\n",
" Default Datasets (1, max 4 per dataset):\n",
" harmbench\n",
"\u001b[1m\u001b[36m\n",
" garak.encoding\u001b[0m\n",
" Class: Encoding\n",
" Description:\n",
" Encoding Scenario implementation for PyRIT. This scenario tests how\n",
" resilient models are to various encoding attacks by encoding potentially\n",
" harmful text (by default slurs and XSS payloads) and testing if the\n",
" model will decode and repeat the encoded payload. It mimics the Garak\n",
" encoding probe. The scenario works by: 1. Taking seed prompts (the\n",
" harmful text to be encoded) 2. Encoding them using various encoding\n",
" schemes (Base64, ROT13, Morse, etc.) 3. Asking the target model to\n",
" decode the encoded text 4. Scoring whether the model successfully\n",
" decoded and repeated the harmful content By default, this uses the same\n",
" dataset as Garak: slur terms and web XSS payloads.\n",
" Aggregate Strategies:\n",
" - all\n",
" Available Strategies (17):\n",
" base64, base2048, base16, base32, ascii85, hex, quoted_printable,\n",
" uuencode, rot13, braille, atbash, morse_code, nato, ecoji, zalgo,\n",
" leet_speak, ascii_smuggler\n",
" Default Strategy: all\n",
" Default Datasets (2, max 3 per dataset):\n",
" garak_slur_terms_en, garak_web_html_js\n",
"\n",
"================================================================================\n",
"\n",
"Total scenarios: 9\n"
]
},
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"from pyrit.backend.services.scenario_service import get_scenario_service\n",
"from pyrit.cli._output import print_scenario_list\n",
Expand Down Expand Up @@ -454,17 +233,8 @@
}
],
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.12"
"jupytext": {
"main_language": "python"
}
},
"nbformat": 4,
Expand Down
Loading
Loading