microsoft · romanlutz · May 23, 2026 · May 23, 2026 · May 23, 2026
diff --git a/doc/bibliography.md b/doc/bibliography.md
@@ -5,6 +5,6 @@ All academic papers, research blogs, and technical reports referenced throughout
 :::{dropdown} Citation Keys
 :class: hidden-citations
 
-[@aakanksha2024multilingual; @adversaai2023universal; @andriushchenko2024tense; @anthropic2024manyshot; @aqrawi2024singleturncrescendo; @bethany2024mathprompt; @bhardwaj2023harmfulqa; @bhardwaj2024homer; @bryan2025agentictaxonomy; @bullwinkel2025airtlessons; @bullwinkel2025repeng; @bullwinkel2026trigger; @chao2023pair; @chao2024jailbreakbench; @cui2024orbench; @darkbench2025; @derczynski2024garak; @ding2023wolf; @embracethered2024unicode; @embracethered2025sneakybits; @ghosh2025aegis; @gupta2024walledeval; @haider2024phi3safety; @han2024medsafetybench; @hines2024spotlighting; @ji2023beavertails; @ji2024pkusaferlhf; @jiang2025sosbench; @jones2025computeruse; @kingma2014adam; @li2024saladbench; @li2024wmdp; @lin2023toxicchat; @liu2024flipattack; @lopez2024pyrit; @lv2024codechameleon; @mazeika2023tdc; @mazeika2024harmbench; @mckee2024transparency; @mehrotra2023tap; @microsoft2024skeletonkey; @palaskar2025vlsu; @pfohl2024equitymedqa; @promptfoo2025ccp; @robustintelligence2024bypass; @roccia2024promptintel; @rottger2023xstest; @rottger2025msts; @russinovich2024crescendo; @russinovich2025price; @scheuerman2025transphobia; @shaikh2022second; @shayegani2025computeruse; @shen2023donotanything; @sheshadri2024lat; @stok2023ansi; @tan2026comicjailbreak; @tang2025multilingual; @tedeschi2024alert; @vantaylor2024socialbias; @vidgen2023simplesafetytests; @vidgen2024ailuminate; @wang2023decodingtrust; @wang2023donotanswer; @wei2023jailbroken; @xie2024sorrybench; @yu2023gptfuzzer; @yuan2023cipherchat; @zeng2024persuasion; @zhang2024cbtbench; @zou2023gcg]
+[@aakanksha2024multilingual; @adversaai2023universal; @andriushchenko2024tense; @anthropic2024manyshot; @aqrawi2024singleturncrescendo; @bethany2024mathprompt; @bhardwaj2023harmfulqa; @bhardwaj2024homer; @bryan2025agentictaxonomy; @bullwinkel2025airtlessons; @bullwinkel2025repeng; @bullwinkel2026trigger; @chao2023pair; @chao2024jailbreakbench; @cui2024orbench; @darkbench2025; @derczynski2024garak; @ding2023wolf; @embracethered2024unicode; @embracethered2025sneakybits; @ghosh2025aegis; @gupta2024walledeval; @haider2024phi3safety; @han2024medsafetybench; @hines2024spotlighting; @ji2023beavertails; @ji2024pkusaferlhf; @jiang2025sosbench; @jones2025computeruse; @kingma2014adam; @li2024saladbench; @li2024wmdp; @lin2023toxicchat; @liu2024flipattack; @liu2024mmsafetybench; @lopez2024pyrit; @lv2024codechameleon; @mazeika2023tdc; @mazeika2024harmbench; @mckee2024transparency; @mehrotra2023tap; @microsoft2024skeletonkey; @palaskar2025vlsu; @pfohl2024equitymedqa; @promptfoo2025ccp; @robustintelligence2024bypass; @roccia2024promptintel; @rottger2023xstest; @rottger2025msts; @russinovich2024crescendo; @russinovich2025price; @scheuerman2025transphobia; @shaikh2022second; @shayegani2025computeruse; @shen2023donotanything; @sheshadri2024lat; @stok2023ansi; @tan2026comicjailbreak; @tang2025multilingual; @tedeschi2024alert; @vantaylor2024socialbias; @vidgen2023simplesafetytests; @vidgen2024ailuminate; @wang2023decodingtrust; @wang2023donotanswer; @wei2023jailbroken; @xie2024sorrybench; @yu2023gptfuzzer; @yuan2023cipherchat; @zeng2024persuasion; @zhang2024cbtbench; @zou2023gcg]
 
 :::
diff --git a/doc/code/datasets/1_loading_datasets.ipynb b/doc/code/datasets/1_loading_datasets.ipynb
@@ -28,6 +28,7 @@
     "JailbreakBench [@chao2024jailbreakbench],\n",
     "LLM-LAT [@sheshadri2024lat],\n",
     "MedSafetyBench [@han2024medsafetybench],\n",
+    "MM-SafetyBench [@liu2024mmsafetybench],\n",
     "Multilingual Alignment Prism [@aakanksha2024multilingual],\n",
     "Multilingual Vulnerabilities [@tang2025multilingual],\n",
     "OR-Bench [@cui2024orbench],\n",
@@ -98,6 +99,7 @@
        " 'mental_health_crisis_multiturn_example',\n",
        " 'ml_vlsu',\n",
        " 'mlcommons_ailuminate',\n",
+       " 'mm_safetybench',\n",
        " 'msts',\n",
        " 'multilingual_vulnerability',\n",
        " 'or_bench_80k',\n",

diff --git a/doc/code/datasets/1_loading_datasets.py b/doc/code/datasets/1_loading_datasets.py
@@ -32,6 +32,7 @@
 # JailbreakBench [@chao2024jailbreakbench],
 # LLM-LAT [@sheshadri2024lat],
 # MedSafetyBench [@han2024medsafetybench],
+# MM-SafetyBench [@liu2024mmsafetybench],
 # Multilingual Alignment Prism [@aakanksha2024multilingual],
 # Multilingual Vulnerabilities [@tang2025multilingual],
 # OR-Bench [@cui2024orbench],

diff --git a/doc/references.bib b/doc/references.bib
@@ -610,3 +610,11 @@ @misc{embracethered2025sneakybits
   url       = {https://embracethered.com/blog/posts/2025/sneaky-bits-and-ascii-smuggler/},
   note      = {Embrace The Red Blog},
 }
+
+@inproceedings{liu2024mmsafetybench,
+  title     = {{MM-SafetyBench}: A Benchmark for Safety Evaluation of Multimodal Large Language Models},
+  author    = {Xin Liu and Yichen Zhu and Jindong Gu and Yunshi Lan and Chao Yang and Yu Qiao},
+  booktitle = {Proceedings of the European Conference on Computer Vision (ECCV)},
+  year      = {2024},
+  url       = {https://arxiv.org/abs/2311.17600},
+}
diff --git a/pyrit/datasets/seed_datasets/remote/__init__.py b/pyrit/datasets/seed_datasets/remote/__init__.py
@@ -73,6 +73,11 @@
 from pyrit.datasets.seed_datasets.remote.mlcommons_ailuminate_dataset import (
     _MLCommonsAILuminateDataset,
 )  # noqa: F401
+from pyrit.datasets.seed_datasets.remote.mm_safetybench_dataset import (
+    MMSafetyBenchCategory,
+    MMSafetyBenchVariant,
+    _MMSafetyBenchDataset,
+)  # noqa: F401
 from pyrit.datasets.seed_datasets.remote.msts_dataset import (
     _MSTSDataset,
 )  # noqa: F401
@@ -143,6 +148,8 @@
 
 __all__ = [
     "HiXSTestLanguage",
+    "MMSafetyBenchCategory",
+    "MMSafetyBenchVariant",
     "PromptIntelCategory",
     "PromptIntelSeverity",
     "SGXSTestLabel",
@@ -172,6 +179,7 @@
     "_LLMLatentAdversarialTrainingDataset",
     "_MedSafetyBenchDataset",
     "_MLCommonsAILuminateDataset",
+    "_MMSafetyBenchDataset",
     "_MSTSDataset",
     "_MultilingualVulnerabilityDataset",
     "_ORBench80KDataset",