diff --git a/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py b/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py index 895ce8790..87e0d47b3 100644 --- a/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/aegis_ai_content_safety_dataset.py @@ -9,7 +9,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -60,6 +60,11 @@ class _AegisContentSafetyDataset(_RemoteDatasetLoader): "Violence", ] + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "huge" # 19093 annotated human-LLM interactions + tags: frozenset[str] = frozenset({"default", "safety"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py b/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py index 3056c35e2..05a905c17 100644 --- a/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/aya_redteaming_dataset.py @@ -8,7 +8,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -39,6 +39,11 @@ class _AyaRedteamingDataset(_RemoteDatasetLoader): "Tagalog": "tgl", } + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "medium" # 987 prompts across multiple languages + tags: frozenset[str] = frozenset({"safety", "multilingual"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py b/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py index 386d4190e..22e104411 100644 --- a/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/babelscape_alert_dataset.py @@ -7,7 +7,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -23,6 +23,11 @@ class _BabelscapeAlertDataset(_RemoteDatasetLoader): Reference: [@tedeschi2024alert] """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "huge" # 30968 prompts (default config) + tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py b/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py index 0405d85db..78d647da9 100644 --- a/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -31,6 +31,11 @@ class _BeaverTailsDataset(_RemoteDatasetLoader): HF_DATASET_NAME: str = "PKU-Alignment/BeaverTails" + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "huge" # 166382 annotated prompt-response entries (default config) + tags: frozenset[str] = frozenset({"default", "safety"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py index eee6c2d0d..450b8c366 100644 --- a/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/cbt_bench_dataset.py @@ -7,7 +7,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -28,6 +28,11 @@ class _CBTBenchDataset(_RemoteDatasetLoader): - [@zhang2024cbtbench] """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "small" # 20 core_fine_seed therapy seeds (default config) + tags: frozenset[str] = frozenset({"safety", "medical"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py b/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py index 752a1f4c6..e73b187a6 100644 --- a/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/ccp_sensitive_prompts_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -21,6 +21,11 @@ class _CCPSensitivePromptsDataset(_RemoteDatasetLoader): Reference: [@promptfoo2025ccp] """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "large" # 1360 censorship-sensitive prompts (single-language Mandarin) + tags: frozenset[str] = frozenset({"safety", "multilingual"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/comic_jailbreak_dataset.py b/pyrit/datasets/seed_datasets/remote/comic_jailbreak_dataset.py index b4d43bca0..0ec8c216e 100644 --- a/pyrit/datasets/seed_datasets/remote/comic_jailbreak_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/comic_jailbreak_dataset.py @@ -12,7 +12,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import Seed, SeedDataset, SeedObjective, SeedPrompt +from pyrit.models import Modality, Seed, SeedDataset, SeedObjective, SeedPrompt logger = logging.getLogger(__name__) @@ -92,8 +92,8 @@ class _ComicJailbreakDataset(_RemoteDatasetLoader): "sexual", "privacy", ) - modalities: tuple[str, ...] = ("text", "image") - size: str = "large" # 300 goals × 5 templates + modalities: tuple[Modality, ...] = (Modality.TEXT, Modality.IMAGE) + size: str = "large" # 3501 image-text jailbreak prompts tags: frozenset[str] = frozenset({"safety", "multimodal"}) def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py b/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py index 97c5bbf5d..93393242f 100644 --- a/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/darkbench_dataset.py @@ -4,7 +4,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt class _DarkBenchDataset(_RemoteDatasetLoader): @@ -23,6 +23,11 @@ class _DarkBenchDataset(_RemoteDatasetLoader): - https://openreview.net/forum?id=odjMSBSWRt """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "large" # 660 prompts across 6 dark-pattern categories + tags: frozenset[str] = frozenset({"default", "safety"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py b/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py index 1c3377286..647d1b9eb 100644 --- a/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/equitymedqa_dataset.py @@ -8,7 +8,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -24,6 +24,11 @@ class _EquityMedQADataset(_RemoteDatasetLoader): Reference: [@pfohl2024equitymedqa] """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "huge" # 5565 prompts across 11 medical-bias subsets + tags: frozenset[str] = frozenset({"safety", "bias", "medical"}) + DATA_SUBSETS: list[str] = [ "cc_llm", "cc_manual", diff --git a/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py b/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py index d884293eb..6ac387051 100644 --- a/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/forbidden_questions_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -25,6 +25,11 @@ class _ForbiddenQuestionsDataset(_RemoteDatasetLoader): Website: https://jailbreak-llms.xinyueshen.me/ """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "medium" # 390 questions (13 scenarios x 30 questions) + tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/harmbench_dataset.py b/pyrit/datasets/seed_datasets/remote/harmbench_dataset.py index 4afcdb2c1..ca05447df 100644 --- a/pyrit/datasets/seed_datasets/remote/harmbench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/harmbench_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedObjective +from pyrit.models import Modality, SeedDataset, SeedObjective class _HarmBenchDataset(_RemoteDatasetLoader): @@ -21,8 +21,8 @@ class _HarmBenchDataset(_RemoteDatasetLoader): # Metadata harm_categories: list[str] = ["cybercrime", "illegal", "harmful", "chemical_biological", "harassment"] - modalities: list[str] = ["text"] - size: str = "large" # 504 seeds + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "medium" # 400 harmful behaviors tags: set[str] = {"default", "safety"} def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py b/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py index 514c5a1cb..d77767d93 100644 --- a/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/harmbench_multimodal_dataset.py @@ -12,7 +12,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -43,6 +43,11 @@ class _HarmBenchMultimodalDataset(_RemoteDatasetLoader): Paper: [@mazeika2024harmbench] """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT, Modality.IMAGE) + size: str = "medium" # 220 harmful multimodal behaviors + tags: frozenset[str] = frozenset({"safety", "jailbreak", "multimodal"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py b/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py index a2a8451a9..9d0a806e6 100644 --- a/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/harmful_qa_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -28,6 +28,11 @@ class _HarmfulQADataset(_RemoteDatasetLoader): HF_DATASET_NAME: str = "declare-lab/HarmfulQA" + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "large" # 1960 harmful questions by academic topic + tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/jbb_behaviors_dataset.py b/pyrit/datasets/seed_datasets/remote/jbb_behaviors_dataset.py index e638b5144..8c1ac3de8 100644 --- a/pyrit/datasets/seed_datasets/remote/jbb_behaviors_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/jbb_behaviors_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -24,6 +24,11 @@ class _JBBBehaviorsDataset(_RemoteDatasetLoader): before using these prompts against production LLMs. """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "small" # 100 harmful behaviors across 10 categories + tags: frozenset[str] = frozenset({"safety", "jailbreak"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py b/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py index 9f007aa34..7029753b6 100644 --- a/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/librai_do_not_answer_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -22,6 +22,11 @@ class _LibrAIDoNotAnswerDataset(_RemoteDatasetLoader): GitHub: https://github.com/libr-ai/do-not-answer """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "large" # 939 prompts across multiple risk areas + tags: frozenset[str] = frozenset({"default", "safety", "refusal"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py b/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py index f881fe0cb..77fb22db4 100644 --- a/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/llm_latent_adversarial_training_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -21,6 +21,11 @@ class _LLMLatentAdversarialTrainingDataset(_RemoteDatasetLoader): Reference: [@sheshadri2024lat] """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "large" # 4948 harmful prompts + tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py b/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py index ab2390723..e99f11d83 100644 --- a/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/medsafetybench_dataset.py @@ -7,7 +7,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -23,6 +23,11 @@ class _MedSafetyBenchDataset(_RemoteDatasetLoader): Paper: [@han2024medsafetybench] """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "huge" # 76174 medical-safety prompts (combined train+test+generated) + tags: frozenset[str] = frozenset({"safety", "medical"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/mlcommons_ailuminate_dataset.py b/pyrit/datasets/seed_datasets/remote/mlcommons_ailuminate_dataset.py index f954bc994..32698ca1a 100644 --- a/pyrit/datasets/seed_datasets/remote/mlcommons_ailuminate_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/mlcommons_ailuminate_dataset.py @@ -7,7 +7,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -23,6 +23,11 @@ class _MLCommonsAILuminateDataset(_RemoteDatasetLoader): Paper: [@vidgen2024ailuminate] """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "large" # 1,200 hazard-category prompts + tags: frozenset[str] = frozenset({"default", "safety"}) + HAZARD_CATEGORIES_MAP = { "vcr": "Violent Crimes", "src": "Sex-Related Crimes", diff --git a/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py b/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py index 4f4db77cd..5c94eb1bf 100644 --- a/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/multilingual_vulnerability_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -22,6 +22,11 @@ class _MultilingualVulnerabilityDataset(_RemoteDatasetLoader): GitHub: https://github.com/CarsonDon/Multilingual-Vuln-LLMs """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "small" # 70 multilingual vulnerability prompts + tags: frozenset[str] = frozenset({"default", "safety", "multilingual"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py index b4aa647d4..4bd3ae674 100644 --- a/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/or_bench_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -30,6 +30,12 @@ class _ORBenchBaseDataset(_RemoteDatasetLoader): CONFIG: str DESCRIPTION: str + should_register = False # abstract base — subclasses register themselves + + # Metadata shared across all OR-Bench subclasses; subclasses override `size`. + modalities: tuple[Modality, ...] = (Modality.TEXT,) + tags: frozenset[str] = frozenset({"default", "safety", "refusal"}) + def __init__(self, *, split: str = "train") -> None: """ Initialize the OR-Bench dataset loader. @@ -99,6 +105,8 @@ class _ORBench80KDataset(_ORBenchBaseDataset): "OR-Bench 80K contains ~80k over-refusal prompts categorized into 10 rejection " "categories. This is the main comprehensive benchmark for evaluating LLM over-refusal." ) + size: str = "huge" # 80359 over-refusal prompts + should_register = True @property def dataset_name(self) -> str: @@ -119,6 +127,8 @@ class _ORBenchHardDataset(_ORBenchBaseDataset): "OR-Bench Hard-1K contains ~1k challenging safe prompts that commonly trigger " "over-refusal in language models. These prompts should be answerable without refusing." ) + size: str = "large" # 1319 challenging safe prompts + should_register = True @property def dataset_name(self) -> str: @@ -140,6 +150,8 @@ class _ORBenchToxicDataset(_ORBenchBaseDataset): "OR-Bench Toxic contains toxic prompts that language models should correctly refuse. " "Used as a contrast set to evaluate refusal calibration." ) + size: str = "large" # 655 toxic prompts for refusal calibration + should_register = True @property def dataset_name(self) -> str: diff --git a/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py b/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py index 2921bb032..4733cf1af 100644 --- a/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/pku_safe_rlhf_dataset.py @@ -7,7 +7,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -23,6 +23,11 @@ class _PKUSafeRLHFDataset(_RemoteDatasetLoader): Paper: [@ji2024pkusaferlhf] """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "huge" # 73907 prompt-response pairs across 19 harm categories + tags: frozenset[str] = frozenset({"default", "safety"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py b/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py index 7af64c4fe..b2b1845c4 100644 --- a/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/promptintel_dataset.py @@ -12,7 +12,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -64,6 +64,11 @@ class _PromptIntelDataset(_RemoteDatasetLoader): Use responsibly and consult your legal department before using for testing. """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "medium" # indicator count varies with registry contents; gated by API key + tags: frozenset[str] = frozenset({"safety", "jailbreak", "cybersecurity"}) + API_BASE_URL = "https://api.promptintel.novahunting.ai/api/v1" PROMPT_WEB_URL = "https://promptintel.novahunting.ai/prompt" MAX_PAGE_LIMIT = 100 diff --git a/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py b/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py index 75b4fe139..12b4072b7 100644 --- a/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/red_team_social_bias_dataset.py @@ -8,7 +8,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -24,6 +24,11 @@ class _RedTeamSocialBiasDataset(_RemoteDatasetLoader): Reference: [@vantaylor2024socialbias] """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "huge" # 40750 social-bias prompts (multi-turn expansion of source rows) + tags: frozenset[str] = frozenset({"safety", "bias", "multiturn"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py index 413dbd915..9a179ee64 100644 --- a/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/salad_bench_dataset.py @@ -7,7 +7,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -32,6 +32,11 @@ class _SaladBenchDataset(_RemoteDatasetLoader): HF_DATASET_NAME: str = "walledai/SaladBench" + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "huge" # 21318 harmful questions across 6 domains, 16 tasks, 65+ categories + tags: frozenset[str] = frozenset({"default", "safety", "jailbreak"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py b/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py index eb6a8520f..0871f7161 100644 --- a/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/sgxstest_dataset.py @@ -8,7 +8,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedObjective +from pyrit.models import Modality, SeedDataset, SeedObjective logger = logging.getLogger(__name__) @@ -66,9 +66,9 @@ class _SGXSTestDataset(_RemoteDatasetLoader): "safe contexts", "safe targets", ] - modalities: list[str] = ["text"] - size: str = "medium" # 200 prompts - tags: set[str] = {"default", "safety", "multilingual_culture"} + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "small" # 100 prompts + tags: set[str] = {"safety", "multilingual"} def __init__( self, diff --git a/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py b/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py index d92007c52..71e05b41c 100644 --- a/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/simple_safety_tests_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -28,6 +28,11 @@ class _SimpleSafetyTestsDataset(_RemoteDatasetLoader): HF_DATASET_NAME: str = "Bertievidgen/SimpleSafetyTests" + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "small" # 100 critical safety test prompts + tags: frozenset[str] = frozenset({"safety"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py index 407fc8810..735e8baf2 100644 --- a/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/sorry_bench_dataset.py @@ -8,7 +8,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -23,6 +23,11 @@ class _SorryBenchDataset(_RemoteDatasetLoader): Reference: [@xie2024sorrybench] """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "medium" # 440 base prompts (style mutations not loaded by default) + tags: frozenset[str] = frozenset({"safety", "jailbreak", "synthetic"}) + VALID_CATEGORIES = [ "Personal Insulting Words", "Social-group Insulting Words", diff --git a/pyrit/datasets/seed_datasets/remote/sosbench_dataset.py b/pyrit/datasets/seed_datasets/remote/sosbench_dataset.py index 61b215c4f..d2f908849 100644 --- a/pyrit/datasets/seed_datasets/remote/sosbench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/sosbench_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -22,6 +22,11 @@ class _SOSBenchDataset(_RemoteDatasetLoader): Reference: [@jiang2025sosbench] """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "large" # 3,000 hazard-focused scientific prompts across 6 domains + tags: frozenset[str] = frozenset({"safety", "medical", "cybersecurity"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/tdc23_redteaming_dataset.py b/pyrit/datasets/seed_datasets/remote/tdc23_redteaming_dataset.py index 104be4d10..07561aff9 100644 --- a/pyrit/datasets/seed_datasets/remote/tdc23_redteaming_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/tdc23_redteaming_dataset.py @@ -6,7 +6,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -22,6 +22,11 @@ class _TDC23RedteamingDataset(_RemoteDatasetLoader): Reference: [@mazeika2023tdc] """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "small" # 100 narrative-style harmful prompts + tags: frozenset[str] = frozenset({"safety", "jailbreak"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py b/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py index fcfa68297..61e6a4ac9 100644 --- a/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py @@ -8,7 +8,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -32,6 +32,11 @@ class _ToxicChatDataset(_RemoteDatasetLoader): HF_DATASET_NAME: str = "lmsys/toxic-chat" + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "huge" # 5082 real user-chatbot conversations from Chatbot Arena + tags: frozenset[str] = frozenset({"default", "safety", "multiturn"}) + OPENAI_MODERATION_THRESHOLD: float = 0.8 def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py b/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py index 73b8a8e5e..c64c1172d 100644 --- a/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/transphobia_awareness_dataset.py @@ -9,7 +9,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -34,6 +34,11 @@ class _TransphobiaAwarenessDataset(_RemoteDatasetLoader): ACM Transactions on Computer-Human Interaction (2018). """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "medium" # 300 Quora questions on transgender / non-binary topics + tags: frozenset[str] = frozenset({"default", "safety", "bias"}) + RATINGS_URL = "https://zenodo.org/records/15482694/files/Ratings.xlsx?download=1" QUESTION_RESPONSE_PAIRS_URL = "https://zenodo.org/records/15482694/files/Question_Response_Pairs.xlsx?download=1" QUORA_QUESTION_LIST_URL = "https://zenodo.org/records/15482694/files/Quora%20Question%20List.xlsx?download=1" diff --git a/pyrit/datasets/seed_datasets/remote/visual_leak_bench_dataset.py b/pyrit/datasets/seed_datasets/remote/visual_leak_bench_dataset.py index 2edd921eb..09f47de4c 100644 --- a/pyrit/datasets/seed_datasets/remote/visual_leak_bench_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/visual_leak_bench_dataset.py @@ -12,7 +12,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -73,8 +73,8 @@ class _VisualLeakBenchDataset(_RemoteDatasetLoader): ) tags: frozenset[str] = frozenset({"default", "safety", "privacy"}) - size: str = "large" - modalities: tuple[str, ...] = ("image", "text") + size: str = "large" # 2000 image-text PII / OCR-injection prompts + modalities: tuple[Modality, ...] = (Modality.IMAGE, Modality.TEXT) harm_categories: tuple[str, ...] = ("privacy", "pii_leakage", "ocr_injection") def __init__( diff --git a/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py b/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py index 1ab86cfe7..c9e8e1356 100644 --- a/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/vlguard_dataset.py @@ -15,7 +15,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -93,6 +93,11 @@ class _VLGuardDataset(_RemoteDatasetLoader): Paper: Safety Fine-Tuning at (Almost) No Cost: A Baseline for Vision Large Language Models (ICML 2024) """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT, Modality.IMAGE) + size: str = "large" # 884 image-instruction pairs across 4 categories + tags: frozenset[str] = frozenset({"safety", "multimodal"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py b/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py index 74ccf9434..26a2460af 100644 --- a/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/vlsu_multimodal_dataset.py @@ -12,7 +12,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -52,6 +52,11 @@ class _VLSUMultimodalDataset(_RemoteDatasetLoader): Reference: [@palaskar2025vlsu] """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT, Modality.IMAGE) + size: str = "huge" # 11074 image-text safety annotations + tags: frozenset[str] = frozenset({"default", "safety", "multimodal"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/remote/xstest_dataset.py b/pyrit/datasets/seed_datasets/remote/xstest_dataset.py index ea50eecf1..3b84780f9 100644 --- a/pyrit/datasets/seed_datasets/remote/xstest_dataset.py +++ b/pyrit/datasets/seed_datasets/remote/xstest_dataset.py @@ -7,7 +7,7 @@ from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import ( _RemoteDatasetLoader, ) -from pyrit.models import SeedDataset, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedPrompt logger = logging.getLogger(__name__) @@ -21,6 +21,11 @@ class _XSTestDataset(_RemoteDatasetLoader): Reference: https://github.com/paul-rottger/exaggerated-safety """ + # Metadata + modalities: tuple[Modality, ...] = (Modality.TEXT,) + size: str = "medium" # 450 safe + unsafe contrast prompts + tags: frozenset[str] = frozenset({"default", "safety", "refusal"}) + def __init__( self, *, diff --git a/pyrit/datasets/seed_datasets/seed_metadata.py b/pyrit/datasets/seed_datasets/seed_metadata.py index 33a4c8ead..8396a5189 100644 --- a/pyrit/datasets/seed_datasets/seed_metadata.py +++ b/pyrit/datasets/seed_datasets/seed_metadata.py @@ -26,6 +26,47 @@ SeedDatasetSourceType = Literal["remote", "local"] +# Canonical, *advisory* tag vocabulary for SeedDatasetMetadata.tags. +# +# Loaders may set custom tags, but contributors should reuse this vocabulary +# whenever it fits to keep `SeedDatasetFilter` discovery consistent across +# datasets. The metadata parser does NOT enforce this list — it is enforced as +# a soft contract via tests/unit/datasets/test_seed_dataset_provider.py. +# +# Rule for the special "default" tag (applied when authoring/registering a +# loader, not enforced at runtime): a dataset should be tagged "default" only +# when ALL of the following hold: +# +# 1. Ungated — no HuggingFace token, API key, auth, or signup required. +# 2. Citable — peer-reviewed paper or established benchmark, not a personal +# collection or unpublished registry. +# 3. Single-call — `await loader.fetch_dataset_async()` works with no +# manual download or setup (auto-cached per-asset fetches +# are fine). +# 4. Size >= medium (>=100 prompts). +# 5. Broadly applicable — not narrowly scoped to a vertical-specific domain +# (e.g., medical, legal, cybersecurity). Cross-cutting +# axes like `privacy`, `bias`, `multimodal`, +# `multilingual`, `refusal`, and `jailbreak` DO count +# as broadly applicable. +RECOMMENDED_TAGS: frozenset[str] = frozenset( + { + "default", # see rule above + "safety", # universal for any harm/red-team content + "multimodal", # image+text, audio+text, etc. + "multilingual", # multiple natural languages + "privacy", # privacy / PII content + "jailbreak", # adversarial / jailbreak templates + "bias", # fairness / bias-focused + "medical", # medical / health domain + "legal", # legal domain + "cybersecurity", # cybersecurity / malware domain + "refusal", # refusal-evaluation oriented (over-refusal, false-refusal, etc.) + "synthetic", # programmatically generated content + "multiturn", # multi-turn conversations + } +) + class SeedDatasetLoadTime(Enum): """Approximate time to load a dataset. Used to skip slow datasets in fast runs.""" diff --git a/pyrit/models/__init__.py b/pyrit/models/__init__.py index 1093e1da0..0380dca23 100644 --- a/pyrit/models/__init__.py +++ b/pyrit/models/__init__.py @@ -24,7 +24,7 @@ ) from pyrit.models.embeddings import EmbeddingData, EmbeddingResponse, EmbeddingSupport, EmbeddingUsageInformation from pyrit.models.harm_definition import HarmDefinition, ScaleDescription, get_all_harm_definitions -from pyrit.models.literals import ChatMessageRole, PromptDataType, PromptResponseError, SeedType +from pyrit.models.literals import ChatMessageRole, Modality, PromptDataType, PromptResponseError, SeedType from pyrit.models.message import ( Message, construct_response_from_request, @@ -88,6 +88,7 @@ "ImagePathDataTypeSerializer", "Message", "MessagePiece", + "Modality", "NextMessageSystemPromptPaths", "PromptDataType", "PromptResponseError", diff --git a/pyrit/models/literals.py b/pyrit/models/literals.py index 8c488eebe..a61b65a09 100644 --- a/pyrit/models/literals.py +++ b/pyrit/models/literals.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +from enum import Enum from typing import Literal PromptDataType = Literal[ @@ -29,3 +30,27 @@ ChatMessageRole = Literal["system", "user", "assistant", "simulated_assistant", "tool", "developer"] SeedType = Literal["prompt", "objective", "simulated_conversation"] + + +class Modality(str, Enum): + """ + Canonical high-level modalities for dataset content and target capabilities. + + Inherits from ``str`` so members compare equal to their string values + (e.g. ``Modality.TEXT == "text"``) and JSON-serialize as plain strings. + This keeps the enum interoperable with existing string-typed APIs while + giving authors autocomplete and lint-time typo protection. + + Used by: + * ``SeedDatasetMetadata.modalities`` and dataset loader class attributes + to describe what content a dataset produces. + + Not yet used by ``TargetCapabilities`` (which still uses ``PromptDataType`` + with finer-grained ``image_path``/``audio_path``/``video_path`` storage + tokens). Unifying the two is tracked as a follow-up — see PR #1780. + """ + + TEXT = "text" + IMAGE = "image" + AUDIO = "audio" + VIDEO = "video" diff --git a/tests/unit/datasets/test_seed_dataset_provider.py b/tests/unit/datasets/test_seed_dataset_provider.py index 4eb058f6a..26ed16801 100644 --- a/tests/unit/datasets/test_seed_dataset_provider.py +++ b/tests/unit/datasets/test_seed_dataset_provider.py @@ -2,6 +2,7 @@ # Licensed under the MIT license. import textwrap +import typing import warnings from dataclasses import fields as dc_fields from pathlib import Path @@ -10,16 +11,20 @@ import pytest import yaml +import pyrit.datasets.seed_datasets.remote # noqa: F401 triggers loader registration from pyrit.datasets import SeedDatasetProvider from pyrit.datasets.seed_datasets.local.local_dataset_loader import _LocalDatasetLoader from pyrit.datasets.seed_datasets.remote.darkbench_dataset import _DarkBenchDataset from pyrit.datasets.seed_datasets.remote.harmbench_dataset import _HarmBenchDataset +from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import _RemoteDatasetLoader from pyrit.datasets.seed_datasets.seed_metadata import ( + RECOMMENDED_TAGS, SeedDatasetFilter, SeedDatasetLoadTime, SeedDatasetMetadata, + SeedDatasetSizeCategory, ) -from pyrit.models import SeedDataset, SeedObjective, SeedPrompt +from pyrit.models import Modality, SeedDataset, SeedObjective, SeedPrompt @pytest.fixture @@ -350,7 +355,7 @@ async def test_parse_metadata_from_class_attrs(self): metadata = await loader._parse_metadata() assert metadata is not None assert metadata.tags == {"default", "safety"} - assert metadata.size == {"large"} + assert metadata.size == {"medium"} assert metadata.modalities == {"text"} assert metadata.harm_categories == {"cybercrime", "illegal", "harmful", "chemical_biological", "harassment"} # source_type is not declared as a class attribute on HarmBench; @@ -452,6 +457,76 @@ async def test_no_metadata(self): assert names == [] +def _all_registered_remote_loaders() -> list[type]: + """Return every concrete remote loader currently registered. + + Imports the remote subpackage to trigger __init_subclass__ registration, + then filters _RemoteDatasetLoader subclasses out of SeedDatasetProvider's + registry. Excludes test fixtures (defined under tests/) so the coverage + contract only applies to shipped loaders. + """ + # Re-import to be defensive when collected in isolation; importlib caches it. + import pyrit.datasets.seed_datasets.remote # noqa: F401 + + return [ + cls + for cls in SeedDatasetProvider._registry.values() + if issubclass(cls, _RemoteDatasetLoader) + and cls is not _RemoteDatasetLoader + and cls.__module__.startswith("pyrit.") + ] + + +_VALID_SIZES = set(typing.get_args(SeedDatasetSizeCategory)) +_VALID_MODALITIES = {m.value for m in Modality} + + +class TestRemoteLoaderMetadataCoverage: + """Enforce that every remote loader declares class-level metadata. + + Walks the auto-registered remote loaders and asserts each one exposes the + fields that drive ``SeedDatasetFilter`` discovery (tags, size, modalities) + so new loaders can't silently regress the catalog. + """ + + @pytest.mark.parametrize( + "loader_cls", + _all_registered_remote_loaders(), + ids=lambda cls: cls.__name__, + ) + async def test_loader_declares_complete_metadata(self, loader_cls): + """Every concrete remote loader must declare tags, size, and modalities.""" + loader = loader_cls() + metadata = await loader._parse_metadata() + + assert metadata is not None, ( + f"{loader_cls.__name__} has no class-level metadata. Declare `tags`, " + "`size`, and `modalities` per pyrit/datasets/seed_datasets/seed_metadata.py." + ) + + assert metadata.tags, f"{loader_cls.__name__} is missing class-level `tags`." + assert metadata.size, f"{loader_cls.__name__} is missing class-level `size`." + assert metadata.modalities, f"{loader_cls.__name__} is missing class-level `modalities`." + + unknown_tags = metadata.tags - RECOMMENDED_TAGS + assert not unknown_tags, ( + f"{loader_cls.__name__} declares non-canonical tag(s) {sorted(unknown_tags)}. " + f"Use the vocabulary in seed_metadata.RECOMMENDED_TAGS or extend it deliberately." + ) + + invalid_sizes = metadata.size - _VALID_SIZES + assert not invalid_sizes, ( + f"{loader_cls.__name__} declares invalid size(s) {sorted(invalid_sizes)}. " + f"Valid sizes: {sorted(_VALID_SIZES)}." + ) + + invalid_modalities = metadata.modalities - _VALID_MODALITIES + assert not invalid_modalities, ( + f"{loader_cls.__name__} declares invalid modality(ies) {sorted(invalid_modalities)}. " + f"Use pyrit.models.Modality members. Valid values: {sorted(_VALID_MODALITIES)}." + ) + + class TestStrictMatchFiltering: """Test strict_match behavior in SeedDatasetFilter."""