From 76262f7466c52de8bb89f850bc2f7aa5e157d31f Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Fri, 22 May 2026 05:28:20 -0700 Subject: [PATCH 1/3] Backfill class-level metadata for remote seed datasets Adds class-level `tags`, `size`, and `modalities` to all remote seed dataset loaders so they participate in `SeedDatasetFilter` discovery. Pins a recommended tag vocabulary and the 5-condition rule for the special `default` tag in `seed_metadata.py` as a soft contract, and enforces it via a new parametrized coverage test in `test_seed_dataset_provider.py`. Also renames `_SGXSTestDataset`'s non-canonical `multilingual_culture` tag to `multilingual` and drops `default` (the dataset is gated), and gates `_ORBenchBaseDataset` from auto-registration since it is not a usable loader on its own. No runtime behavior or public API changes. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../remote/aegis_ai_content_safety_dataset.py | 5 ++ .../remote/aya_redteaming_dataset.py | 5 ++ .../remote/babelscape_alert_dataset.py | 5 ++ .../remote/beaver_tails_dataset.py | 5 ++ .../seed_datasets/remote/cbt_bench_dataset.py | 5 ++ .../remote/ccp_sensitive_prompts_dataset.py | 5 ++ .../seed_datasets/remote/darkbench_dataset.py | 5 ++ .../remote/equitymedqa_dataset.py | 5 ++ .../remote/forbidden_questions_dataset.py | 5 ++ .../remote/harmbench_multimodal_dataset.py | 5 ++ .../remote/harmful_qa_dataset.py | 5 ++ .../remote/jbb_behaviors_dataset.py | 5 ++ .../remote/librai_do_not_answer_dataset.py | 5 ++ ...llm_latent_adversarial_training_dataset.py | 5 ++ .../remote/medsafetybench_dataset.py | 5 ++ .../remote/mlcommons_ailuminate_dataset.py | 5 ++ .../multilingual_vulnerability_dataset.py | 5 ++ .../seed_datasets/remote/or_bench_dataset.py | 12 ++++ .../remote/pku_safe_rlhf_dataset.py | 5 ++ .../remote/promptintel_dataset.py | 5 ++ .../remote/red_team_social_bias_dataset.py | 5 ++ .../remote/salad_bench_dataset.py | 5 ++ .../seed_datasets/remote/sgxstest_dataset.py | 2 +- .../remote/simple_safety_tests_dataset.py | 5 ++ .../remote/sorry_bench_dataset.py | 5 ++ .../seed_datasets/remote/sosbench_dataset.py | 5 ++ .../remote/tdc23_redteaming_dataset.py | 5 ++ .../remote/toxic_chat_dataset.py | 5 ++ .../remote/transphobia_awareness_dataset.py | 5 ++ .../seed_datasets/remote/vlguard_dataset.py | 5 ++ .../remote/vlsu_multimodal_dataset.py | 5 ++ .../seed_datasets/remote/xstest_dataset.py | 5 ++ pyrit/datasets/seed_datasets/seed_metadata.py | 41 +++++++++++ .../datasets/test_seed_dataset_provider.py | 68 +++++++++++++++++++ 34 files changed, 272 insertions(+), 1 deletion(-) diff --git a/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py b/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py index 895ce87902..393b2acd0e 100644 --- a/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py @@ -60,6 +60,11 @@ class _AegisContentSafetyDataset(_RemoteDatasetLoader): "Violence", ] + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "huge" # 33,416 annotated human-LLM interactions + tags: frozenset[str] = frozenset({"default", "safety"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py b/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py index 3056c35e25..d462062584 100644 --- a/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py @@ -39,6 +39,11 @@ class _AyaRedteamingDataset(_RemoteDatasetLoader): "Tagalog": "tgl", } + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "medium" # ~300-1000 prompts per language + tags: frozenset[str] = frozenset({"safety", "multilingual"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py b/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py index 386d4190e6..f1ed35884d 100644 --- a/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py @@ -23,6 +23,11 @@ class _BabelscapeAlertDataset(_RemoteDatasetLoader): Reference: [@tedeschi2024alert] """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "huge" # 15k 'alert' + 30k 'alert_adversarial' prompts + tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py b/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py index 0405d85db2..ba1562c7bf 100644 --- a/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py @@ -31,6 +31,11 @@ class _BeaverTailsDataset(_RemoteDatasetLoader): HF_DATASET_NAME: str = "PKU-Alignment/BeaverTails" + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "huge" # 330k+ annotated prompt-response entries + tags: frozenset[str] = frozenset({"default", "safety"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py index eee6c2d0dd..e896920370 100644 --- a/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py @@ -28,6 +28,11 @@ class _CBTBenchDataset(_RemoteDatasetLoader): - [@zhang2024cbtbench] """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "medium" # ~146 core_fine_seed therapy seeds (default config) + tags: frozenset[str] = frozenset({"safety", "medical"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py b/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py index 752a1f4c66..e7cdcf43d3 100644 --- a/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py @@ -21,6 +21,11 @@ class _CCPSensitivePromptsDataset(_RemoteDatasetLoader): Reference: [@promptfoo2025ccp] """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "small" # ~100 censorship-sensitive prompts (single-language Mandarin) + tags: frozenset[str] = frozenset({"safety", "multilingual"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py b/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py index 97c5bbf5da..7828ae37f9 100644 --- a/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py @@ -23,6 +23,11 @@ class _DarkBenchDataset(_RemoteDatasetLoader): - https://openreview.net/forum?id=odjMSBSWRt """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "medium" # 660 prompts across 6 dark-pattern categories + tags: frozenset[str] = frozenset({"default", "safety"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py b/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py index 1c3377286a..babcef0ad8 100644 --- a/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py @@ -24,6 +24,11 @@ class _EquityMedQADataset(_RemoteDatasetLoader): Reference: [@pfohl2024equitymedqa] """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "medium" # ~300 prompts across 11 medical-bias subsets + tags: frozenset[str] = frozenset({"safety", "bias", "medical"}) + DATA_SUBSETS: list[str] = [ "cc_llm", "cc_manual", diff --git a/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py b/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py index d884293eb1..86b53bb925 100644 --- a/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py @@ -25,6 +25,11 @@ class _ForbiddenQuestionsDataset(_RemoteDatasetLoader): Website: https://jailbreak-llms.xinyueshen.me/ """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "medium" # 390 questions (13 scenarios x 30 questions) + tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py b/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py index 6e063c6242..e07dc353d6 100644 --- a/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py @@ -41,6 +41,11 @@ class _HarmBenchMultimodalDataset(_RemoteDatasetLoader): Paper: [@mazeika2024harmbench] """ + # Metadata + modalities: tuple[str, ...] = ("text", "image") + size: str = "small" # 110 harmful multimodal behaviors + tags: frozenset[str] = frozenset({"safety", "jailbreak", "multimodal"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py b/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py index a2a8451a98..15a3cab3b1 100644 --- a/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py @@ -28,6 +28,11 @@ class _HarmfulQADataset(_RemoteDatasetLoader): HF_DATASET_NAME: str = "declare-lab/HarmfulQA" + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "large" # ~2k harmful questions by academic topic + tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/jbb_behaviors_dataset.py b/pyrit/datasets/seed_datasets/remote/jbb_behaviors_dataset.py index e638b5144f..621e01ff48 100644 --- a/pyrit/datasets/seed_datasets/remote/jbb_behaviors_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/jbb_behaviors_dataset.py @@ -24,6 +24,11 @@ class _JBBBehaviorsDataset(_RemoteDatasetLoader): before using these prompts against production LLMs. """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "small" # 100 harmful behaviors across 10 categories + tags: frozenset[str] = frozenset({"safety", "jailbreak"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py b/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py index 9f007aa349..4805a64123 100644 --- a/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py @@ -22,6 +22,11 @@ class _LibrAIDoNotAnswerDataset(_RemoteDatasetLoader): GitHub: https://github.com/libr-ai/do-not-answer """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "medium" # ~939 prompts across multiple risk areas + tags: frozenset[str] = frozenset({"default", "safety", "refusal"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py b/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py index f881fe0cb9..90487d9a0b 100644 --- a/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py @@ -21,6 +21,11 @@ class _LLMLatentAdversarialTrainingDataset(_RemoteDatasetLoader): Reference: [@sheshadri2024lat] """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "large" # ~4.95k harmful prompts + tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py b/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py index ab23907230..5a44f72fa6 100644 --- a/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py @@ -23,6 +23,11 @@ class _MedSafetyBenchDataset(_RemoteDatasetLoader): Paper: [@han2024medsafetybench] """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "large" # ~1.8k medical-safety prompts (combined train+test+generated) + tags: frozenset[str] = frozenset({"safety", "medical"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/mlcommons_ailuminate_dataset.py b/pyrit/datasets/seed_datasets/remote/mlcommons_ailuminate_dataset.py index f954bc9948..bed826fd69 100644 --- a/pyrit/datasets/seed_datasets/remote/mlcommons_ailuminate_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/mlcommons_ailuminate_dataset.py @@ -23,6 +23,11 @@ class _MLCommonsAILuminateDataset(_RemoteDatasetLoader): Paper: [@vidgen2024ailuminate] """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "large" # 1,200 hazard-category prompts + tags: frozenset[str] = frozenset({"default", "safety"}) + HAZARD_CATEGORIES_MAP = { "vcr": "Violent Crimes", "src": "Sex-Related Crimes", diff --git a/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py b/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py index 4f4db77cd6..02a230c436 100644 --- a/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py @@ -22,6 +22,11 @@ class _MultilingualVulnerabilityDataset(_RemoteDatasetLoader): GitHub: https://github.com/CarsonDon/Multilingual-Vuln-LLMs """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "medium" # ~400 multilingual vulnerability prompts + tags: frozenset[str] = frozenset({"default", "safety", "multilingual"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py index b4aa647d49..fd6cfd049f 100644 --- a/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py @@ -30,6 +30,12 @@ class _ORBenchBaseDataset(_RemoteDatasetLoader): CONFIG: str DESCRIPTION: str + should_register = False # abstract base — subclasses register themselves + + # Metadata shared across all OR-Bench subclasses; subclasses override `size`. + modalities: tuple[str, ...] = ("text",) + tags: frozenset[str] = frozenset({"default", "safety", "refusal"}) + def __init__(self, *, split: str = "train") -> None: """ Initialize the OR-Bench dataset loader. @@ -99,6 +105,8 @@ class _ORBench80KDataset(_ORBenchBaseDataset): "OR-Bench 80K contains ~80k over-refusal prompts categorized into 10 rejection " "categories. This is the main comprehensive benchmark for evaluating LLM over-refusal." ) + size: str = "huge" # ~80k over-refusal prompts + should_register = True @property def dataset_name(self) -> str: @@ -119,6 +127,8 @@ class _ORBenchHardDataset(_ORBenchBaseDataset): "OR-Bench Hard-1K contains ~1k challenging safe prompts that commonly trigger " "over-refusal in language models. These prompts should be answerable without refusing." ) + size: str = "large" # ~1k challenging safe prompts + should_register = True @property def dataset_name(self) -> str: @@ -140,6 +150,8 @@ class _ORBenchToxicDataset(_ORBenchBaseDataset): "OR-Bench Toxic contains toxic prompts that language models should correctly refuse. " "Used as a contrast set to evaluate refusal calibration." ) + size: str = "large" # ~600 toxic prompts for refusal calibration + should_register = True @property def dataset_name(self) -> str: diff --git a/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py b/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py index 2921bb032e..908f0fae80 100644 --- a/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py @@ -23,6 +23,11 @@ class _PKUSafeRLHFDataset(_RemoteDatasetLoader): Paper: [@ji2024pkusaferlhf] """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "huge" # ~297k prompt-response pairs across 19 harm categories + tags: frozenset[str] = frozenset({"default", "safety"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py b/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py index 7af64c4fe4..7f1afdd94c 100644 --- a/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py @@ -64,6 +64,11 @@ class _PromptIntelDataset(_RemoteDatasetLoader): Use responsibly and consult your legal department before using for testing. """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "medium" # ~150-400 indicators (varies as registry grows) + tags: frozenset[str] = frozenset({"safety", "jailbreak", "cybersecurity"}) + API_BASE_URL = "https://api.promptintel.novahunting.ai/api/v1" PROMPT_WEB_URL = "https://promptintel.novahunting.ai/prompt" MAX_PAGE_LIMIT = 100 diff --git a/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py b/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py index 75b4fe1390..3666b5ec80 100644 --- a/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py @@ -24,6 +24,11 @@ class _RedTeamSocialBiasDataset(_RemoteDatasetLoader): Reference: [@vantaylor2024socialbias] """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "small" # ~60 aggregated social-bias prompts + tags: frozenset[str] = frozenset({"safety", "bias", "multiturn"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py index 413dbd9155..d65435348c 100644 --- a/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py @@ -32,6 +32,11 @@ class _SaladBenchDataset(_RemoteDatasetLoader): HF_DATASET_NAME: str = "walledai/SaladBench" + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "huge" # ~30k harmful questions across 6 domains, 16 tasks, 65+ categories + tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py b/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py index eb6a8520f7..81af9c247c 100644 --- a/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py @@ -68,7 +68,7 @@ class _SGXSTestDataset(_RemoteDatasetLoader): ] modalities: list[str] = ["text"] size: str = "medium" # 200 prompts - tags: set[str] = {"default", "safety", "multilingual_culture"} + tags: set[str] = {"safety", "multilingual"} def __init__( self, diff --git a/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py b/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py index d92007c521..f11e279efc 100644 --- a/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py @@ -28,6 +28,11 @@ class _SimpleSafetyTestsDataset(_RemoteDatasetLoader): HF_DATASET_NAME: str = "Bertievidgen/SimpleSafetyTests" + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "small" # 100 critical safety test prompts + tags: frozenset[str] = frozenset({"safety"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py index 407fc8810d..03fb3ae34b 100644 --- a/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py @@ -23,6 +23,11 @@ class _SorryBenchDataset(_RemoteDatasetLoader): Reference: [@xie2024sorrybench] """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "large" # 440 base prompts + 9,240 with style mutations + tags: frozenset[str] = frozenset({"safety", "jailbreak", "synthetic"}) + VALID_CATEGORIES = [ "Personal Insulting Words", "Social-group Insulting Words", diff --git a/pyrit/datasets/seed_datasets/remote/sosbench_dataset.py b/pyrit/datasets/seed_datasets/remote/sosbench_dataset.py index 61b215c4f5..943a36ab40 100644 --- a/pyrit/datasets/seed_datasets/remote/sosbench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/sosbench_dataset.py @@ -22,6 +22,11 @@ class _SOSBenchDataset(_RemoteDatasetLoader): Reference: [@jiang2025sosbench] """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "large" # 3,000 hazard-focused scientific prompts across 6 domains + tags: frozenset[str] = frozenset({"safety", "medical", "cybersecurity"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/tdc23_redteaming_dataset.py b/pyrit/datasets/seed_datasets/remote/tdc23_redteaming_dataset.py index 104be4d106..82cdcbf04c 100644 --- a/pyrit/datasets/seed_datasets/remote/tdc23_redteaming_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/tdc23_redteaming_dataset.py @@ -22,6 +22,11 @@ class _TDC23RedteamingDataset(_RemoteDatasetLoader): Reference: [@mazeika2023tdc] """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "small" # 100 narrative-style harmful prompts + tags: frozenset[str] = frozenset({"safety", "jailbreak"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py b/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py index fcfa682971..e94fec44aa 100644 --- a/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py @@ -32,6 +32,11 @@ class _ToxicChatDataset(_RemoteDatasetLoader): HF_DATASET_NAME: str = "lmsys/toxic-chat" + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "huge" # ~10k real user-chatbot conversations from Chatbot Arena + tags: frozenset[str] = frozenset({"default", "safety", "multiturn"}) + OPENAI_MODERATION_THRESHOLD: float = 0.8 def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py b/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py index 73b8a8e5ef..21619ddd8f 100644 --- a/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py @@ -34,6 +34,11 @@ class _TransphobiaAwarenessDataset(_RemoteDatasetLoader): ACM Transactions on Computer-Human Interaction (2018). """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "medium" # ~140 Quora questions on transgender / non-binary topics + tags: frozenset[str] = frozenset({"default", "safety", "bias"}) + RATINGS_URL = "https://zenodo.org/records/15482694/files/Ratings.xlsx?download=1" QUESTION_RESPONSE_PAIRS_URL = "https://zenodo.org/records/15482694/files/Question_Response_Pairs.xlsx?download=1" QUORA_QUESTION_LIST_URL = "https://zenodo.org/records/15482694/files/Quora%20Question%20List.xlsx?download=1" diff --git a/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py b/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py index 1ab86cfe7e..46fa0e2e17 100644 --- a/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py @@ -93,6 +93,11 @@ class _VLGuardDataset(_RemoteDatasetLoader): Paper: Safety Fine-Tuning at (Almost) No Cost: A Baseline for Vision Large Language Models (ICML 2024) """ + # Metadata + modalities: tuple[str, ...] = ("text", "image") + size: str = "large" # ~1.9k image-instruction pairs across 4 categories + tags: frozenset[str] = frozenset({"safety", "multimodal"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py b/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py index 3be8bd64be..36ffa44180 100644 --- a/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py @@ -50,6 +50,11 @@ class _VLSUMultimodalDataset(_RemoteDatasetLoader): Reference: [@palaskar2025vlsu] """ + # Metadata + modalities: tuple[str, ...] = ("text", "image") + size: str = "large" # ~1.7k image-text safety annotations + tags: frozenset[str] = frozenset({"default", "safety", "multimodal"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/xstest_dataset.py b/pyrit/datasets/seed_datasets/remote/xstest_dataset.py index ea50eecf19..f09cf0cd96 100644 --- a/pyrit/datasets/seed_datasets/remote/xstest_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/xstest_dataset.py @@ -21,6 +21,11 @@ class _XSTestDataset(_RemoteDatasetLoader): Reference: https://github.com/paul-rottger/exaggerated-safety """ + # Metadata + modalities: tuple[str, ...] = ("text",) + size: str = "medium" # 450 safe + unsafe contrast prompts + tags: frozenset[str] = frozenset({"default", "safety", "refusal"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/seed_metadata.py b/pyrit/datasets/seed_datasets/seed_metadata.py index 33a4c8ead8..8396a51899 100644 --- a/pyrit/datasets/seed_datasets/seed_metadata.py +++ b/pyrit/datasets/seed_datasets/seed_metadata.py @@ -26,6 +26,47 @@ SeedDatasetSourceType = Literal["remote", "local"] +# Canonical, *advisory* tag vocabulary for SeedDatasetMetadata.tags. +# +# Loaders may set custom tags, but contributors should reuse this vocabulary +# whenever it fits to keep `SeedDatasetFilter` discovery consistent across +# datasets. The metadata parser does NOT enforce this list — it is enforced as +# a soft contract via tests/unit/datasets/test_seed_dataset_provider.py. +# +# Rule for the special "default" tag (applied when authoring/registering a +# loader, not enforced at runtime): a dataset should be tagged "default" only +# when ALL of the following hold: +# +# 1. Ungated — no HuggingFace token, API key, auth, or signup required. +# 2. Citable — peer-reviewed paper or established benchmark, not a personal +# collection or unpublished registry. +# 3. Single-call — `await loader.fetch_dataset_async()` works with no +# manual download or setup (auto-cached per-asset fetches +# are fine). +# 4. Size >= medium (>=100 prompts). +# 5. Broadly applicable — not narrowly scoped to a vertical-specific domain +# (e.g., medical, legal, cybersecurity). Cross-cutting +# axes like `privacy`, `bias`, `multimodal`, +# `multilingual`, `refusal`, and `jailbreak` DO count +# as broadly applicable. +RECOMMENDED_TAGS: frozenset[str] = frozenset( + { + "default", # see rule above + "safety", # universal for any harm/red-team content + "multimodal", # image+text, audio+text, etc. + "multilingual", # multiple natural languages + "privacy", # privacy / PII content + "jailbreak", # adversarial / jailbreak templates + "bias", # fairness / bias-focused + "medical", # medical / health domain + "legal", # legal domain + "cybersecurity", # cybersecurity / malware domain + "refusal", # refusal-evaluation oriented (over-refusal, false-refusal, etc.) + "synthetic", # programmatically generated content + "multiturn", # multi-turn conversations + } +) + class SeedDatasetLoadTime(Enum): """Approximate time to load a dataset. Used to skip slow datasets in fast runs.""" diff --git a/tests/unit/datasets/test_seed_dataset_provider.py b/tests/unit/datasets/test_seed_dataset_provider.py index 4eb058f6ad..b452b1c0d0 100644 --- a/tests/unit/datasets/test_seed_dataset_provider.py +++ b/tests/unit/datasets/test_seed_dataset_provider.py @@ -2,6 +2,7 @@ # Licensed under the MIT license. import textwrap +import typing import warnings from dataclasses import fields as dc_fields from pathlib import Path @@ -10,14 +11,18 @@ import pytest import yaml +import pyrit.datasets.seed_datasets.remote # noqa: F401 triggers loader registration from pyrit.datasets import SeedDatasetProvider from pyrit.datasets.seed_datasets.local.local_dataset_loader import _LocalDatasetLoader from pyrit.datasets.seed_datasets.remote.darkbench_dataset import _DarkBenchDataset from pyrit.datasets.seed_datasets.remote.harmbench_dataset import _HarmBenchDataset +from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import _RemoteDatasetLoader from pyrit.datasets.seed_datasets.seed_metadata import ( + RECOMMENDED_TAGS, SeedDatasetFilter, SeedDatasetLoadTime, SeedDatasetMetadata, + SeedDatasetSizeCategory, ) from pyrit.models import SeedDataset, SeedObjective, SeedPrompt @@ -452,6 +457,69 @@ async def test_no_metadata(self): assert names == [] +def _all_registered_remote_loaders() -> list[type]: + """Return every concrete remote loader currently registered. + + Imports the remote subpackage to trigger __init_subclass__ registration, + then filters _RemoteDatasetLoader subclasses out of SeedDatasetProvider's + registry. Excludes test fixtures (defined under tests/) so the coverage + contract only applies to shipped loaders. + """ + # Re-import to be defensive when collected in isolation; importlib caches it. + import pyrit.datasets.seed_datasets.remote # noqa: F401 + + return [ + cls + for cls in SeedDatasetProvider._registry.values() + if issubclass(cls, _RemoteDatasetLoader) + and cls is not _RemoteDatasetLoader + and cls.__module__.startswith("pyrit.") + ] + + +_VALID_SIZES = set(typing.get_args(SeedDatasetSizeCategory)) + + +class TestRemoteLoaderMetadataCoverage: + """Enforce that every remote loader declares class-level metadata. + + Walks the auto-registered remote loaders and asserts each one exposes the + fields that drive ``SeedDatasetFilter`` discovery (tags, size, modalities) + so new loaders can't silently regress the catalog. + """ + + @pytest.mark.parametrize( + "loader_cls", + _all_registered_remote_loaders(), + ids=lambda cls: cls.__name__, + ) + async def test_loader_declares_complete_metadata(self, loader_cls): + """Every concrete remote loader must declare tags, size, and modalities.""" + loader = loader_cls() + metadata = await loader._parse_metadata() + + assert metadata is not None, ( + f"{loader_cls.__name__} has no class-level metadata. Declare `tags`, " + "`size`, and `modalities` per pyrit/datasets/seed_datasets/seed_metadata.py." + ) + + assert metadata.tags, f"{loader_cls.__name__} is missing class-level `tags`." + assert metadata.size, f"{loader_cls.__name__} is missing class-level `size`." + assert metadata.modalities, f"{loader_cls.__name__} is missing class-level `modalities`." + + unknown_tags = metadata.tags - RECOMMENDED_TAGS + assert not unknown_tags, ( + f"{loader_cls.__name__} declares non-canonical tag(s) {sorted(unknown_tags)}. " + f"Use the vocabulary in seed_metadata.RECOMMENDED_TAGS or extend it deliberately." + ) + + invalid_sizes = metadata.size - _VALID_SIZES + assert not invalid_sizes, ( + f"{loader_cls.__name__} declares invalid size(s) {sorted(invalid_sizes)}. " + f"Valid sizes: {sorted(_VALID_SIZES)}." + ) + + class TestStrictMatchFiltering: """Test strict_match behavior in SeedDatasetFilter.""" From c24fae9d19e9897eba6a791cd72503fa1074ccc8 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Fri, 22 May 2026 13:20:57 -0700 Subject: [PATCH 2/3] Replace estimated seed counts with exact loaded counts and fix size buckets Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../seed_datasets/remote/aegis_ai_content_safety_dataset.py | 2 +- .../datasets/seed_datasets/remote/aya_redteaming_dataset.py | 2 +- .../seed_datasets/remote/babelscape_alert_dataset.py | 2 +- pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py | 2 +- pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py | 2 +- .../seed_datasets/remote/ccp_sensitive_prompts_dataset.py | 2 +- .../seed_datasets/remote/comic_jailbreak_dataset.py | 2 +- pyrit/datasets/seed_datasets/remote/darkbench_dataset.py | 2 +- pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py | 2 +- pyrit/datasets/seed_datasets/remote/harmbench_dataset.py | 2 +- .../seed_datasets/remote/harmbench_multimodal_dataset.py | 2 +- pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py | 2 +- .../seed_datasets/remote/librai_do_not_answer_dataset.py | 2 +- .../remote/llm_latent_adversarial_training_dataset.py | 2 +- .../datasets/seed_datasets/remote/medsafetybench_dataset.py | 2 +- .../remote/multilingual_vulnerability_dataset.py | 2 +- pyrit/datasets/seed_datasets/remote/or_bench_dataset.py | 6 +++--- .../datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py | 2 +- pyrit/datasets/seed_datasets/remote/promptintel_dataset.py | 2 +- .../seed_datasets/remote/red_team_social_bias_dataset.py | 2 +- pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py | 2 +- pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py | 2 +- pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py | 2 +- pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py | 2 +- .../seed_datasets/remote/transphobia_awareness_dataset.py | 2 +- .../seed_datasets/remote/visual_leak_bench_dataset.py | 2 +- pyrit/datasets/seed_datasets/remote/vlguard_dataset.py | 2 +- .../seed_datasets/remote/vlsu_multimodal_dataset.py | 2 +- tests/unit/datasets/test_seed_dataset_provider.py | 2 +- 29 files changed, 31 insertions(+), 31 deletions(-) diff --git a/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py b/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py index 393b2acd0e..c34a2e0746 100644 --- a/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py @@ -62,7 +62,7 @@ class _AegisContentSafetyDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "huge" # 33,416 annotated human-LLM interactions + size: str = "huge" # 19093 annotated human-LLM interactions tags: frozenset[str] = frozenset({"default", "safety"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py b/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py index d462062584..5f76449d62 100644 --- a/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py @@ -41,7 +41,7 @@ class _AyaRedteamingDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "medium" # ~300-1000 prompts per language + size: str = "medium" # 987 prompts across multiple languages tags: frozenset[str] = frozenset({"safety", "multilingual"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py b/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py index f1ed35884d..9b98dfd24d 100644 --- a/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py @@ -25,7 +25,7 @@ class _BabelscapeAlertDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "huge" # 15k 'alert' + 30k 'alert_adversarial' prompts + size: str = "huge" # 30968 prompts (default config) tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py b/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py index ba1562c7bf..aceda89b57 100644 --- a/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py @@ -33,7 +33,7 @@ class _BeaverTailsDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "huge" # 330k+ annotated prompt-response entries + size: str = "huge" # 166382 annotated prompt-response entries (default config) tags: frozenset[str] = frozenset({"default", "safety"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py index e896920370..a879a71500 100644 --- a/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py @@ -30,7 +30,7 @@ class _CBTBenchDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "medium" # ~146 core_fine_seed therapy seeds (default config) + size: str = "small" # 20 core_fine_seed therapy seeds (default config) tags: frozenset[str] = frozenset({"safety", "medical"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py b/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py index e7cdcf43d3..03cf077775 100644 --- a/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py @@ -23,7 +23,7 @@ class _CCPSensitivePromptsDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "small" # ~100 censorship-sensitive prompts (single-language Mandarin) + size: str = "large" # 1360 censorship-sensitive prompts (single-language Mandarin) tags: frozenset[str] = frozenset({"safety", "multilingual"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/comic_jailbreak_dataset.py b/pyrit/datasets/seed_datasets/remote/comic_jailbreak_dataset.py index 59a57b5cf4..9765456114 100644 --- a/pyrit/datasets/seed_datasets/remote/comic_jailbreak_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/comic_jailbreak_dataset.py @@ -92,7 +92,7 @@ class _ComicJailbreakDataset(_RemoteDatasetLoader): "privacy", ) modalities: tuple[str, ...] = ("text", "image") - size: str = "large" # 300 goals × 5 templates + size: str = "large" # 3501 image-text jailbreak prompts tags: frozenset[str] = frozenset({"safety", "multimodal"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py b/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py index 7828ae37f9..163c3430c5 100644 --- a/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py @@ -25,7 +25,7 @@ class _DarkBenchDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "medium" # 660 prompts across 6 dark-pattern categories + size: str = "large" # 660 prompts across 6 dark-pattern categories tags: frozenset[str] = frozenset({"default", "safety"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py b/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py index babcef0ad8..f04677cc2f 100644 --- a/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py @@ -26,7 +26,7 @@ class _EquityMedQADataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "medium" # ~300 prompts across 11 medical-bias subsets + size: str = "huge" # 5565 prompts across 11 medical-bias subsets tags: frozenset[str] = frozenset({"safety", "bias", "medical"}) DATA_SUBSETS: list[str] = [ diff --git a/pyrit/datasets/seed_datasets/remote/harmbench_dataset.py b/pyrit/datasets/seed_datasets/remote/harmbench_dataset.py index 4afcdb2c15..e9a874ad8b 100644 --- a/pyrit/datasets/seed_datasets/remote/harmbench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/harmbench_dataset.py @@ -22,7 +22,7 @@ class _HarmBenchDataset(_RemoteDatasetLoader): # Metadata harm_categories: list[str] = ["cybercrime", "illegal", "harmful", "chemical_biological", "harassment"] modalities: list[str] = ["text"] - size: str = "large" # 504 seeds + size: str = "medium" # 400 harmful behaviors tags: set[str] = {"default", "safety"} def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py b/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py index e07dc353d6..378671a38e 100644 --- a/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py @@ -43,7 +43,7 @@ class _HarmBenchMultimodalDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text", "image") - size: str = "small" # 110 harmful multimodal behaviors + size: str = "medium" # 220 harmful multimodal behaviors tags: frozenset[str] = frozenset({"safety", "jailbreak", "multimodal"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py b/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py index 15a3cab3b1..098a9f30bf 100644 --- a/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py @@ -30,7 +30,7 @@ class _HarmfulQADataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "large" # ~2k harmful questions by academic topic + size: str = "large" # 1960 harmful questions by academic topic tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py b/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py index 4805a64123..a03ee85659 100644 --- a/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py @@ -24,7 +24,7 @@ class _LibrAIDoNotAnswerDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "medium" # ~939 prompts across multiple risk areas + size: str = "large" # 939 prompts across multiple risk areas tags: frozenset[str] = frozenset({"default", "safety", "refusal"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py b/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py index 90487d9a0b..d875f45d40 100644 --- a/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py @@ -23,7 +23,7 @@ class _LLMLatentAdversarialTrainingDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "large" # ~4.95k harmful prompts + size: str = "large" # 4948 harmful prompts tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py b/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py index 5a44f72fa6..eed23b6428 100644 --- a/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py @@ -25,7 +25,7 @@ class _MedSafetyBenchDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "large" # ~1.8k medical-safety prompts (combined train+test+generated) + size: str = "huge" # 76174 medical-safety prompts (combined train+test+generated) tags: frozenset[str] = frozenset({"safety", "medical"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py b/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py index 02a230c436..b35c5b76f5 100644 --- a/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py @@ -24,7 +24,7 @@ class _MultilingualVulnerabilityDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "medium" # ~400 multilingual vulnerability prompts + size: str = "small" # 70 multilingual vulnerability prompts tags: frozenset[str] = frozenset({"default", "safety", "multilingual"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py index fd6cfd049f..28a70473ce 100644 --- a/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py @@ -105,7 +105,7 @@ class _ORBench80KDataset(_ORBenchBaseDataset): "OR-Bench 80K contains ~80k over-refusal prompts categorized into 10 rejection " "categories. This is the main comprehensive benchmark for evaluating LLM over-refusal." ) - size: str = "huge" # ~80k over-refusal prompts + size: str = "huge" # 80359 over-refusal prompts should_register = True @property @@ -127,7 +127,7 @@ class _ORBenchHardDataset(_ORBenchBaseDataset): "OR-Bench Hard-1K contains ~1k challenging safe prompts that commonly trigger " "over-refusal in language models. These prompts should be answerable without refusing." ) - size: str = "large" # ~1k challenging safe prompts + size: str = "large" # 1319 challenging safe prompts should_register = True @property @@ -150,7 +150,7 @@ class _ORBenchToxicDataset(_ORBenchBaseDataset): "OR-Bench Toxic contains toxic prompts that language models should correctly refuse. " "Used as a contrast set to evaluate refusal calibration." ) - size: str = "large" # ~600 toxic prompts for refusal calibration + size: str = "large" # 655 toxic prompts for refusal calibration should_register = True @property diff --git a/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py b/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py index 908f0fae80..9a58d94eb8 100644 --- a/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py @@ -25,7 +25,7 @@ class _PKUSafeRLHFDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "huge" # ~297k prompt-response pairs across 19 harm categories + size: str = "huge" # 73907 prompt-response pairs across 19 harm categories tags: frozenset[str] = frozenset({"default", "safety"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py b/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py index 7f1afdd94c..e419188f08 100644 --- a/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py @@ -66,7 +66,7 @@ class _PromptIntelDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "medium" # ~150-400 indicators (varies as registry grows) + size: str = "medium" # indicator count varies with registry contents; gated by API key tags: frozenset[str] = frozenset({"safety", "jailbreak", "cybersecurity"}) API_BASE_URL = "https://api.promptintel.novahunting.ai/api/v1" diff --git a/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py b/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py index 3666b5ec80..961be1ff8a 100644 --- a/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py @@ -26,7 +26,7 @@ class _RedTeamSocialBiasDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "small" # ~60 aggregated social-bias prompts + size: str = "huge" # 40750 social-bias prompts (multi-turn expansion of source rows) tags: frozenset[str] = frozenset({"safety", "bias", "multiturn"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py index d65435348c..1b1564b889 100644 --- a/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py @@ -34,7 +34,7 @@ class _SaladBenchDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "huge" # ~30k harmful questions across 6 domains, 16 tasks, 65+ categories + size: str = "huge" # 21318 harmful questions across 6 domains, 16 tasks, 65+ categories tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py b/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py index 81af9c247c..82a2d30837 100644 --- a/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py @@ -67,7 +67,7 @@ class _SGXSTestDataset(_RemoteDatasetLoader): "safe targets", ] modalities: list[str] = ["text"] - size: str = "medium" # 200 prompts + size: str = "small" # 100 prompts tags: set[str] = {"safety", "multilingual"} def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py index 03fb3ae34b..8bbb2a4343 100644 --- a/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py @@ -25,7 +25,7 @@ class _SorryBenchDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "large" # 440 base prompts + 9,240 with style mutations + size: str = "medium" # 440 base prompts (style mutations not loaded by default) tags: frozenset[str] = frozenset({"safety", "jailbreak", "synthetic"}) VALID_CATEGORIES = [ diff --git a/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py b/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py index e94fec44aa..871cfce638 100644 --- a/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py @@ -34,7 +34,7 @@ class _ToxicChatDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "huge" # ~10k real user-chatbot conversations from Chatbot Arena + size: str = "huge" # 5082 real user-chatbot conversations from Chatbot Arena tags: frozenset[str] = frozenset({"default", "safety", "multiturn"}) OPENAI_MODERATION_THRESHOLD: float = 0.8 diff --git a/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py b/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py index 21619ddd8f..3d53179b0a 100644 --- a/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py @@ -36,7 +36,7 @@ class _TransphobiaAwarenessDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text",) - size: str = "medium" # ~140 Quora questions on transgender / non-binary topics + size: str = "medium" # 300 Quora questions on transgender / non-binary topics tags: frozenset[str] = frozenset({"default", "safety", "bias"}) RATINGS_URL = "https://zenodo.org/records/15482694/files/Ratings.xlsx?download=1" diff --git a/pyrit/datasets/seed_datasets/remote/visual_leak_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/visual_leak_bench_dataset.py index 0028554bec..1e276296db 100644 --- a/pyrit/datasets/seed_datasets/remote/visual_leak_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/visual_leak_bench_dataset.py @@ -71,7 +71,7 @@ class _VisualLeakBenchDataset(_RemoteDatasetLoader): ) tags: frozenset[str] = frozenset({"default", "safety", "privacy"}) - size: str = "large" + size: str = "large" # 2000 image-text PII / OCR-injection prompts modalities: tuple[str, ...] = ("image", "text") harm_categories: tuple[str, ...] = ("privacy", "pii_leakage", "ocr_injection") diff --git a/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py b/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py index 46fa0e2e17..2684f6bcf3 100644 --- a/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py @@ -95,7 +95,7 @@ class _VLGuardDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text", "image") - size: str = "large" # ~1.9k image-instruction pairs across 4 categories + size: str = "large" # 884 image-instruction pairs across 4 categories tags: frozenset[str] = frozenset({"safety", "multimodal"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py b/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py index 36ffa44180..0983e73106 100644 --- a/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py @@ -52,7 +52,7 @@ class _VLSUMultimodalDataset(_RemoteDatasetLoader): # Metadata modalities: tuple[str, ...] = ("text", "image") - size: str = "large" # ~1.7k image-text safety annotations + size: str = "huge" # 11074 image-text safety annotations tags: frozenset[str] = frozenset({"default", "safety", "multimodal"}) def __init__( diff --git a/tests/unit/datasets/test_seed_dataset_provider.py b/tests/unit/datasets/test_seed_dataset_provider.py index b452b1c0d0..e644c32102 100644 --- a/tests/unit/datasets/test_seed_dataset_provider.py +++ b/tests/unit/datasets/test_seed_dataset_provider.py @@ -355,7 +355,7 @@ async def test_parse_metadata_from_class_attrs(self): metadata = await loader._parse_metadata() assert metadata is not None assert metadata.tags == {"default", "safety"} - assert metadata.size == {"large"} + assert metadata.size == {"medium"} assert metadata.modalities == {"text"} assert metadata.harm_categories == {"cybercrime", "illegal", "harmful", "chemical_biological", "harassment"} # source_type is not declared as a class attribute on HarmBench; From ef8b29433f5bf8a2014af624db5b12318a933dcb Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Fri, 22 May 2026 21:59:16 -0700 Subject: [PATCH 3/3] Introduce Modality enum and migrate seed dataset loaders Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../remote/aegis_ai_content_safety_dataset.py | 4 +-- .../remote/aya_redteaming_dataset.py | 4 +-- .../remote/babelscape_alert_dataset.py | 4 +-- .../remote/beaver_tails_dataset.py | 4 +-- .../seed_datasets/remote/cbt_bench_dataset.py | 4 +-- .../remote/ccp_sensitive_prompts_dataset.py | 4 +-- .../remote/comic_jailbreak_dataset.py | 4 +-- .../seed_datasets/remote/darkbench_dataset.py | 4 +-- .../remote/equitymedqa_dataset.py | 4 +-- .../remote/forbidden_questions_dataset.py | 4 +-- .../seed_datasets/remote/harmbench_dataset.py | 4 +-- .../remote/harmbench_multimodal_dataset.py | 4 +-- .../remote/harmful_qa_dataset.py | 4 +-- .../remote/jbb_behaviors_dataset.py | 4 +-- .../remote/librai_do_not_answer_dataset.py | 4 +-- ...llm_latent_adversarial_training_dataset.py | 4 +-- .../remote/medsafetybench_dataset.py | 4 +-- .../remote/mlcommons_ailuminate_dataset.py | 4 +-- .../multilingual_vulnerability_dataset.py | 4 +-- .../seed_datasets/remote/or_bench_dataset.py | 4 +-- .../remote/pku_safe_rlhf_dataset.py | 4 +-- .../remote/promptintel_dataset.py | 4 +-- .../remote/red_team_social_bias_dataset.py | 4 +-- .../remote/salad_bench_dataset.py | 4 +-- .../seed_datasets/remote/sgxstest_dataset.py | 4 +-- .../remote/simple_safety_tests_dataset.py | 4 +-- .../remote/sorry_bench_dataset.py | 4 +-- .../seed_datasets/remote/sosbench_dataset.py | 4 +-- .../remote/tdc23_redteaming_dataset.py | 4 +-- .../remote/toxic_chat_dataset.py | 4 +-- .../remote/transphobia_awareness_dataset.py | 4 +-- .../remote/visual_leak_bench_dataset.py | 4 +-- .../seed_datasets/remote/vlguard_dataset.py | 4 +-- .../remote/vlsu_multimodal_dataset.py | 4 +-- .../seed_datasets/remote/xstest_dataset.py | 4 +-- pyrit/models/__init__.py | 3 ++- pyrit/models/literals.py | 25 +++++++++++++++++++ .../datasets/test_seed_dataset_provider.py | 9 ++++++- 38 files changed, 105 insertions(+), 72 deletions(-) diff --git a/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py b/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py index c34a2e0746..87e0d47b3c 100644 --- a/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py @@ -9,7 +9,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -61,7 +61,7 @@ class _AegisContentSafetyDataset(_RemoteDatasetLoader): ] # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "huge" # 19093 annotated human-LLM interactions tags: frozenset[str] = frozenset({"default", "safety"}) diff --git a/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py b/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py index 5f76449d62..05a905c172 100644 --- a/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py @@ -8,7 +8,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -40,7 +40,7 @@ class _AyaRedteamingDataset(_RemoteDatasetLoader): } # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "medium" # 987 prompts across multiple languages tags: frozenset[str] = frozenset({"safety", "multilingual"}) diff --git a/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py b/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py index 9b98dfd24d..22e1044111 100644 --- a/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py @@ -7,7 +7,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -24,7 +24,7 @@ class _BabelscapeAlertDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "huge" # 30968 prompts (default config) tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) diff --git a/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py b/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py index aceda89b57..78d647da98 100644 --- a/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -32,7 +32,7 @@ class _BeaverTailsDataset(_RemoteDatasetLoader): HF_DATASET_NAME: str = "PKU-Alignment/BeaverTails" # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "huge" # 166382 annotated prompt-response entries (default config) tags: frozenset[str] = frozenset({"default", "safety"}) diff --git a/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py index a879a71500..450b8c3660 100644 --- a/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py @@ -7,7 +7,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -29,7 +29,7 @@ class _CBTBenchDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "small" # 20 core_fine_seed therapy seeds (default config) tags: frozenset[str] = frozenset({"safety", "medical"}) diff --git a/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py b/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py index 03cf077775..e73b187a60 100644 --- a/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -22,7 +22,7 @@ class _CCPSensitivePromptsDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "large" # 1360 censorship-sensitive prompts (single-language Mandarin) tags: frozenset[str] = frozenset({"safety", "multilingual"}) diff --git a/pyrit/datasets/seed_datasets/remote/comic_jailbreak_dataset.py b/pyrit/datasets/seed_datasets/remote/comic_jailbreak_dataset.py index 9765456114..afff26de93 100644 --- a/pyrit/datasets/seed_datasets/remote/comic_jailbreak_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/comic_jailbreak_dataset.py @@ -11,7 +11,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import Seed, SeedDataset, SeedObjective, SeedPrompt, data_serializer_factory +from pyrit.models import Modality, Seed, SeedDataset, SeedObjective, SeedPrompt, data_serializer_factory logger = logging.getLogger(__name__) @@ -91,7 +91,7 @@ class _ComicJailbreakDataset(_RemoteDatasetLoader): "sexual", "privacy", ) - modalities: tuple[str, ...] = ("text", "image") + modalities: tuple[Modality, ...] = (Modality.TEXT, Modality.IMAGE) size: str = "large" # 3501 image-text jailbreak prompts tags: frozenset[str] = frozenset({"safety", "multimodal"}) diff --git a/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py b/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py index 163c3430c5..93393242fd 100644 --- a/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py @@ -4,7 +4,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt class _DarkBenchDataset(_RemoteDatasetLoader): @@ -24,7 +24,7 @@ class _DarkBenchDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "large" # 660 prompts across 6 dark-pattern categories tags: frozenset[str] = frozenset({"default", "safety"}) diff --git a/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py b/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py index f04677cc2f..647d1b9ebf 100644 --- a/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py @@ -8,7 +8,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -25,7 +25,7 @@ class _EquityMedQADataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "huge" # 5565 prompts across 11 medical-bias subsets tags: frozenset[str] = frozenset({"safety", "bias", "medical"}) diff --git a/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py b/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py index 86b53bb925..6ac3870516 100644 --- a/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -26,7 +26,7 @@ class _ForbiddenQuestionsDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "medium" # 390 questions (13 scenarios x 30 questions) tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) diff --git a/pyrit/datasets/seed_datasets/remote/harmbench_dataset.py b/pyrit/datasets/seed_datasets/remote/harmbench_dataset.py index e9a874ad8b..ca05447df8 100644 --- a/pyrit/datasets/seed_datasets/remote/harmbench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/harmbench_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedObjective +from pyrit.models import Modality, SeedDataset, SeedObjective class _HarmBenchDataset(_RemoteDatasetLoader): @@ -21,7 +21,7 @@ class _HarmBenchDataset(_RemoteDatasetLoader): # Metadata harm_categories: list[str] = ["cybercrime", "illegal", "harmful", "chemical_biological", "harassment"] - modalities: list[str] = ["text"] + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "medium" # 400 harmful behaviors tags: set[str] = {"default", "safety"} diff --git a/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py b/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py index 378671a38e..ea79da2189 100644 --- a/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py @@ -10,7 +10,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt, data_serializer_factory +from pyrit.models import Modality, SeedDataset, SeedPrompt, data_serializer_factory logger = logging.getLogger(__name__) @@ -42,7 +42,7 @@ class _HarmBenchMultimodalDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text", "image") + modalities: tuple[Modality, ...] = (Modality.TEXT, Modality.IMAGE) size: str = "medium" # 220 harmful multimodal behaviors tags: frozenset[str] = frozenset({"safety", "jailbreak", "multimodal"}) diff --git a/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py b/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py index 098a9f30bf..9d0a806e63 100644 --- a/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -29,7 +29,7 @@ class _HarmfulQADataset(_RemoteDatasetLoader): HF_DATASET_NAME: str = "declare-lab/HarmfulQA" # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "large" # 1960 harmful questions by academic topic tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) diff --git a/pyrit/datasets/seed_datasets/remote/jbb_behaviors_dataset.py b/pyrit/datasets/seed_datasets/remote/jbb_behaviors_dataset.py index 621e01ff48..8c1ac3de89 100644 --- a/pyrit/datasets/seed_datasets/remote/jbb_behaviors_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/jbb_behaviors_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -25,7 +25,7 @@ class _JBBBehaviorsDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "small" # 100 harmful behaviors across 10 categories tags: frozenset[str] = frozenset({"safety", "jailbreak"}) diff --git a/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py b/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py index a03ee85659..7029753b61 100644 --- a/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -23,7 +23,7 @@ class _LibrAIDoNotAnswerDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "large" # 939 prompts across multiple risk areas tags: frozenset[str] = frozenset({"default", "safety", "refusal"}) diff --git a/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py b/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py index d875f45d40..77fb22db41 100644 --- a/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -22,7 +22,7 @@ class _LLMLatentAdversarialTrainingDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "large" # 4948 harmful prompts tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) diff --git a/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py b/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py index eed23b6428..e99f11d834 100644 --- a/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py @@ -7,7 +7,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -24,7 +24,7 @@ class _MedSafetyBenchDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "huge" # 76174 medical-safety prompts (combined train+test+generated) tags: frozenset[str] = frozenset({"safety", "medical"}) diff --git a/pyrit/datasets/seed_datasets/remote/mlcommons_ailuminate_dataset.py b/pyrit/datasets/seed_datasets/remote/mlcommons_ailuminate_dataset.py index bed826fd69..32698ca1ad 100644 --- a/pyrit/datasets/seed_datasets/remote/mlcommons_ailuminate_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/mlcommons_ailuminate_dataset.py @@ -7,7 +7,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -24,7 +24,7 @@ class _MLCommonsAILuminateDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "large" # 1,200 hazard-category prompts tags: frozenset[str] = frozenset({"default", "safety"}) diff --git a/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py b/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py index b35c5b76f5..5c94eb1bfd 100644 --- a/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -23,7 +23,7 @@ class _MultilingualVulnerabilityDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "small" # 70 multilingual vulnerability prompts tags: frozenset[str] = frozenset({"default", "safety", "multilingual"}) diff --git a/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py index 28a70473ce..4bd3ae674b 100644 --- a/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -33,7 +33,7 @@ class _ORBenchBaseDataset(_RemoteDatasetLoader): should_register = False # abstract base — subclasses register themselves # Metadata shared across all OR-Bench subclasses; subclasses override `size`. - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) tags: frozenset[str] = frozenset({"default", "safety", "refusal"}) def __init__(self, *, split: str = "train") -> None: diff --git a/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py b/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py index 9a58d94eb8..4733cf1af7 100644 --- a/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py @@ -7,7 +7,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -24,7 +24,7 @@ class _PKUSafeRLHFDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "huge" # 73907 prompt-response pairs across 19 harm categories tags: frozenset[str] = frozenset({"default", "safety"}) diff --git a/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py b/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py index e419188f08..b2b1845c4b 100644 --- a/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py @@ -12,7 +12,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -65,7 +65,7 @@ class _PromptIntelDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "medium" # indicator count varies with registry contents; gated by API key tags: frozenset[str] = frozenset({"safety", "jailbreak", "cybersecurity"}) diff --git a/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py b/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py index 961be1ff8a..12b4072b77 100644 --- a/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py @@ -8,7 +8,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -25,7 +25,7 @@ class _RedTeamSocialBiasDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "huge" # 40750 social-bias prompts (multi-turn expansion of source rows) tags: frozenset[str] = frozenset({"safety", "bias", "multiturn"}) diff --git a/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py index 1b1564b889..9a179ee647 100644 --- a/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py @@ -7,7 +7,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -33,7 +33,7 @@ class _SaladBenchDataset(_RemoteDatasetLoader): HF_DATASET_NAME: str = "walledai/SaladBench" # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "huge" # 21318 harmful questions across 6 domains, 16 tasks, 65+ categories tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) diff --git a/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py b/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py index 82a2d30837..0871f71610 100644 --- a/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py @@ -8,7 +8,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedObjective +from pyrit.models import Modality, SeedDataset, SeedObjective logger = logging.getLogger(__name__) @@ -66,7 +66,7 @@ class _SGXSTestDataset(_RemoteDatasetLoader): "safe contexts", "safe targets", ] - modalities: list[str] = ["text"] + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "small" # 100 prompts tags: set[str] = {"safety", "multilingual"} diff --git a/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py b/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py index f11e279efc..71e05b41ca 100644 --- a/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -29,7 +29,7 @@ class _SimpleSafetyTestsDataset(_RemoteDatasetLoader): HF_DATASET_NAME: str = "Bertievidgen/SimpleSafetyTests" # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "small" # 100 critical safety test prompts tags: frozenset[str] = frozenset({"safety"}) diff --git a/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py index 8bbb2a4343..735e8baf20 100644 --- a/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py @@ -8,7 +8,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -24,7 +24,7 @@ class _SorryBenchDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "medium" # 440 base prompts (style mutations not loaded by default) tags: frozenset[str] = frozenset({"safety", "jailbreak", "synthetic"}) diff --git a/pyrit/datasets/seed_datasets/remote/sosbench_dataset.py b/pyrit/datasets/seed_datasets/remote/sosbench_dataset.py index 943a36ab40..d2f9088495 100644 --- a/pyrit/datasets/seed_datasets/remote/sosbench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/sosbench_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -23,7 +23,7 @@ class _SOSBenchDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "large" # 3,000 hazard-focused scientific prompts across 6 domains tags: frozenset[str] = frozenset({"safety", "medical", "cybersecurity"}) diff --git a/pyrit/datasets/seed_datasets/remote/tdc23_redteaming_dataset.py b/pyrit/datasets/seed_datasets/remote/tdc23_redteaming_dataset.py index 82cdcbf04c..07561aff9f 100644 --- a/pyrit/datasets/seed_datasets/remote/tdc23_redteaming_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/tdc23_redteaming_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -23,7 +23,7 @@ class _TDC23RedteamingDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "small" # 100 narrative-style harmful prompts tags: frozenset[str] = frozenset({"safety", "jailbreak"}) diff --git a/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py b/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py index 871cfce638..61e6a4ac95 100644 --- a/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py @@ -8,7 +8,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -33,7 +33,7 @@ class _ToxicChatDataset(_RemoteDatasetLoader): HF_DATASET_NAME: str = "lmsys/toxic-chat" # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "huge" # 5082 real user-chatbot conversations from Chatbot Arena tags: frozenset[str] = frozenset({"default", "safety", "multiturn"}) diff --git a/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py b/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py index 3d53179b0a..c64c1172d5 100644 --- a/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py @@ -9,7 +9,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -35,7 +35,7 @@ class _TransphobiaAwarenessDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "medium" # 300 Quora questions on transgender / non-binary topics tags: frozenset[str] = frozenset({"default", "safety", "bias"}) diff --git a/pyrit/datasets/seed_datasets/remote/visual_leak_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/visual_leak_bench_dataset.py index 1e276296db..d0cb9268a3 100644 --- a/pyrit/datasets/seed_datasets/remote/visual_leak_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/visual_leak_bench_dataset.py @@ -10,7 +10,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt, data_serializer_factory +from pyrit.models import Modality, SeedDataset, SeedPrompt, data_serializer_factory logger = logging.getLogger(__name__) @@ -72,7 +72,7 @@ class _VisualLeakBenchDataset(_RemoteDatasetLoader): tags: frozenset[str] = frozenset({"default", "safety", "privacy"}) size: str = "large" # 2000 image-text PII / OCR-injection prompts - modalities: tuple[str, ...] = ("image", "text") + modalities: tuple[Modality, ...] = (Modality.IMAGE, Modality.TEXT) harm_categories: tuple[str, ...] = ("privacy", "pii_leakage", "ocr_injection") def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py b/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py index 2684f6bcf3..c9e8e13565 100644 --- a/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py @@ -15,7 +15,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -94,7 +94,7 @@ class _VLGuardDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text", "image") + modalities: tuple[Modality, ...] = (Modality.TEXT, Modality.IMAGE) size: str = "large" # 884 image-instruction pairs across 4 categories tags: frozenset[str] = frozenset({"safety", "multimodal"}) diff --git a/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py b/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py index 0983e73106..28275d787c 100644 --- a/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py @@ -10,7 +10,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt, data_serializer_factory +from pyrit.models import Modality, SeedDataset, SeedPrompt, data_serializer_factory logger = logging.getLogger(__name__) @@ -51,7 +51,7 @@ class _VLSUMultimodalDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text", "image") + modalities: tuple[Modality, ...] = (Modality.TEXT, Modality.IMAGE) size: str = "huge" # 11074 image-text safety annotations tags: frozenset[str] = frozenset({"default", "safety", "multimodal"}) diff --git a/pyrit/datasets/seed_datasets/remote/xstest_dataset.py b/pyrit/datasets/seed_datasets/remote/xstest_dataset.py index f09cf0cd96..3b84780f97 100644 --- a/pyrit/datasets/seed_datasets/remote/xstest_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/xstest_dataset.py @@ -7,7 +7,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -22,7 +22,7 @@ class _XSTestDataset(_RemoteDatasetLoader): """ # Metadata - modalities: tuple[str, ...] = ("text",) + modalities: tuple[Modality, ...] = (Modality.TEXT,) size: str = "medium" # 450 safe + unsafe contrast prompts tags: frozenset[str] = frozenset({"default", "safety", "refusal"}) diff --git a/pyrit/models/__init__.py b/pyrit/models/__init__.py index 1093e1da02..0380dca23d 100644 --- a/pyrit/models/__init__.py +++ b/pyrit/models/__init__.py @@ -24,7 +24,7 @@ ) from pyrit.models.embeddings import EmbeddingData, EmbeddingResponse, EmbeddingSupport, EmbeddingUsageInformation from pyrit.models.harm_definition import HarmDefinition, ScaleDescription, get_all_harm_definitions -from pyrit.models.literals import ChatMessageRole, PromptDataType, PromptResponseError, SeedType +from pyrit.models.literals import ChatMessageRole, Modality, PromptDataType, PromptResponseError, SeedType from pyrit.models.message import ( Message, construct_response_from_request, @@ -88,6 +88,7 @@ "ImagePathDataTypeSerializer", "Message", "MessagePiece", + "Modality", "NextMessageSystemPromptPaths", "PromptDataType", "PromptResponseError", diff --git a/pyrit/models/literals.py b/pyrit/models/literals.py index 8c488eebe1..a61b65a09c 100644 --- a/pyrit/models/literals.py +++ b/pyrit/models/literals.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +from enum import Enum from typing import Literal PromptDataType = Literal[ @@ -29,3 +30,27 @@ ChatMessageRole = Literal["system", "user", "assistant", "simulated_assistant", "tool", "developer"] SeedType = Literal["prompt", "objective", "simulated_conversation"] + + +class Modality(str, Enum): + """ + Canonical high-level modalities for dataset content and target capabilities. + + Inherits from ``str`` so members compare equal to their string values + (e.g. ``Modality.TEXT == "text"``) and JSON-serialize as plain strings. + This keeps the enum interoperable with existing string-typed APIs while + giving authors autocomplete and lint-time typo protection. + + Used by: + * ``SeedDatasetMetadata.modalities`` and dataset loader class attributes + to describe what content a dataset produces. + + Not yet used by ``TargetCapabilities`` (which still uses ``PromptDataType`` + with finer-grained ``image_path``/``audio_path``/``video_path`` storage + tokens). Unifying the two is tracked as a follow-up — see PR #1780. + """ + + TEXT = "text" + IMAGE = "image" + AUDIO = "audio" + VIDEO = "video" diff --git a/tests/unit/datasets/test_seed_dataset_provider.py b/tests/unit/datasets/test_seed_dataset_provider.py index e644c32102..26ed168018 100644 --- a/tests/unit/datasets/test_seed_dataset_provider.py +++ b/tests/unit/datasets/test_seed_dataset_provider.py @@ -24,7 +24,7 @@ SeedDatasetMetadata, SeedDatasetSizeCategory, ) -from pyrit.models import SeedDataset, SeedObjective, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedObjective, SeedPrompt @pytest.fixture @@ -478,6 +478,7 @@ def _all_registered_remote_loaders() -> list[type]: _VALID_SIZES = set(typing.get_args(SeedDatasetSizeCategory)) +_VALID_MODALITIES = {m.value for m in Modality} class TestRemoteLoaderMetadataCoverage: @@ -519,6 +520,12 @@ async def test_loader_declares_complete_metadata(self, loader_cls): f"Valid sizes: {sorted(_VALID_SIZES)}." ) + invalid_modalities = metadata.modalities - _VALID_MODALITIES + assert not invalid_modalities, ( + f"{loader_cls.__name__} declares invalid modality(ies) {sorted(invalid_modalities)}. " + f"Use pyrit.models.Modality members. Valid values: {sorted(_VALID_MODALITIES)}." + ) + class TestStrictMatchFiltering: """Test strict_match behavior in SeedDatasetFilter."""