microsoft · romanlutz · May 22, 2026 · May 23, 2026 · May 23, 2026
diff --git a/doc/bibliography.md b/doc/bibliography.md
@@ -5,6 +5,6 @@ All academic papers, research blogs, and technical reports referenced throughout
 :::{dropdown} Citation Keys
 :class: hidden-citations
 
-[@aakanksha2024multilingual; @adversaai2023universal; @andriushchenko2024tense; @anthropic2024manyshot; @aqrawi2024singleturncrescendo; @bethany2024mathprompt; @bhardwaj2023harmfulqa; @bhardwaj2024homer; @bryan2025agentictaxonomy; @bullwinkel2025airtlessons; @bullwinkel2025repeng; @bullwinkel2026trigger; @chao2023pair; @chao2024jailbreakbench; @cui2024orbench; @darkbench2025; @derczynski2024garak; @ding2023wolf; @embracethered2024unicode; @embracethered2025sneakybits; @ghosh2025aegis; @gupta2024walledeval; @haider2024phi3safety; @han2024medsafetybench; @hines2024spotlighting; @ji2023beavertails; @ji2024pkusaferlhf; @jiang2025sosbench; @jones2025computeruse; @kingma2014adam; @li2024saladbench; @li2024wmdp; @lin2023toxicchat; @liu2024flipattack; @lopez2024pyrit; @lv2024codechameleon; @mazeika2023tdc; @mazeika2024harmbench; @mckee2024transparency; @mehrotra2023tap; @microsoft2024skeletonkey; @palaskar2025vlsu; @pfohl2024equitymedqa; @promptfoo2025ccp; @robustintelligence2024bypass; @roccia2024promptintel; @rottger2023xstest; @rottger2025msts; @russinovich2024crescendo; @russinovich2025price; @scheuerman2025transphobia; @shaikh2022second; @shayegani2025computeruse; @shen2023donotanything; @sheshadri2024lat; @stok2023ansi; @tan2026comicjailbreak; @tang2025multilingual; @tedeschi2024alert; @vantaylor2024socialbias; @vidgen2023simplesafetytests; @vidgen2024ailuminate; @wang2023decodingtrust; @wang2023donotanswer; @wei2023jailbroken; @xie2024sorrybench; @yu2023gptfuzzer; @yuan2023cipherchat; @zeng2024persuasion; @zhang2024cbtbench; @zou2023gcg]
+[@aakanksha2024multilingual; @adversaai2023universal; @andriushchenko2024tense; @anthropic2024manyshot; @aqrawi2024singleturncrescendo; @bethany2024mathprompt; @bhardwaj2023harmfulqa; @bhardwaj2024homer; @bryan2025agentictaxonomy; @bullwinkel2025airtlessons; @bullwinkel2025repeng; @bullwinkel2026trigger; @chao2023pair; @chao2024jailbreakbench; @cui2024orbench; @darkbench2025; @derczynski2024garak; @ding2023wolf; @embracethered2024unicode; @embracethered2025sneakybits; @ghosh2025aegis; @gong2025figstep; @gupta2024walledeval; @haider2024phi3safety; @han2024medsafetybench; @hines2024spotlighting; @ji2023beavertails; @ji2024pkusaferlhf; @jiang2025sosbench; @jones2025computeruse; @kingma2014adam; @li2024saladbench; @li2024wmdp; @lin2023toxicchat; @liu2024flipattack; @lopez2024pyrit; @lv2024codechameleon; @mazeika2023tdc; @mazeika2024harmbench; @mckee2024transparency; @mehrotra2023tap; @microsoft2024skeletonkey; @palaskar2025vlsu; @pfohl2024equitymedqa; @promptfoo2025ccp; @robustintelligence2024bypass; @roccia2024promptintel; @rottger2023xstest; @rottger2025msts; @russinovich2024crescendo; @russinovich2025price; @scheuerman2025transphobia; @shaikh2022second; @shayegani2025computeruse; @shen2023donotanything; @sheshadri2024lat; @stok2023ansi; @tan2026comicjailbreak; @tang2025multilingual; @tedeschi2024alert; @vantaylor2024socialbias; @vidgen2023simplesafetytests; @vidgen2024ailuminate; @wang2023decodingtrust; @wang2023donotanswer; @wei2023jailbroken; @xie2024sorrybench; @yu2023gptfuzzer; @yuan2023cipherchat; @zeng2024persuasion; @zhang2024cbtbench; @zou2023gcg]
 
 :::
diff --git a/doc/code/datasets/1_loading_datasets.ipynb b/doc/code/datasets/1_loading_datasets.ipynb
@@ -23,6 +23,7 @@
     "Do Anything Now [@shen2023donotanything],\n",
     "Do-Not-Answer [@wang2023donotanswer],\n",
     "EquityMedQA [@pfohl2024equitymedqa],\n",
+    "FigStep [@gong2025figstep],\n",
     "HarmBench [@mazeika2024harmbench],\n",
     "HarmfulQA [@bhardwaj2023harmfulqa],\n",
     "JailbreakBench [@chao2024jailbreakbench],\n",

diff --git a/doc/code/datasets/1_loading_datasets.py b/doc/code/datasets/1_loading_datasets.py
@@ -27,6 +27,7 @@
 # Do Anything Now [@shen2023donotanything],
 # Do-Not-Answer [@wang2023donotanswer],
 # EquityMedQA [@pfohl2024equitymedqa],
+# FigStep [@gong2025figstep],
 # HarmBench [@mazeika2024harmbench],
 # HarmfulQA [@bhardwaj2023harmfulqa],
 # JailbreakBench [@chao2024jailbreakbench],

diff --git a/doc/references.bib b/doc/references.bib
@@ -96,6 +96,18 @@ @article{bhardwaj2023harmfulqa
   note      = {Introduces the {HarmfulQA} dataset},
 }
 
+@inproceedings{gong2025figstep,
+  title     = {{FigStep}: Jailbreaking Large Vision-Language Models via Typographic Visual Prompts},
+  author    = {Yichen Gong and Delong Ran and Jinyuan Liu and Conglei Wang and Tianshuo Cong and Anyu Wang and Sisi Duan and Xiaoyun Wang},
+  booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
+  volume    = {39},
+  number    = {22},
+  pages     = {23951--23959},
+  year      = {2025},
+  url       = {https://arxiv.org/abs/2311.05608},
+  note      = {Introduces the {SafeBench} typographic-image jailbreak benchmark (AAAI 2025 Oral)},
+}
+
 @article{gupta2024walledeval,
   title     = {{WalledEval}: A Comprehensive Safety Evaluation Toolkit for Large Language Models},
   author    = {Prannaya Gupta and Le Qi Yau and Hao Han Low and I-Shiang Lee and Hugo Maximus Lim and Yu Xin Teoh and Jia Hng Koh and Dar Win Liew and Rishabh Bhardwaj and Rajat Bhardwaj and Soujanya Poria},

diff --git a/pyrit/datasets/seed_datasets/remote/__init__.py b/pyrit/datasets/seed_datasets/remote/__init__.py
@@ -42,6 +42,11 @@
 from pyrit.datasets.seed_datasets.remote.equitymedqa_dataset import (
     _EquityMedQADataset,
 )  # noqa: F401
+from pyrit.datasets.seed_datasets.remote.figstep_dataset import (
+    FigStepCategory,
+    FigStepVariant,
+    _FigStepDataset,
+)  # noqa: F401
 from pyrit.datasets.seed_datasets.remote.forbidden_questions_dataset import (
     _ForbiddenQuestionsDataset,
 )  # noqa: F401
@@ -142,6 +147,8 @@
 )  # noqa: F401
 
 __all__ = [
+    "FigStepCategory",
+    "FigStepVariant",
     "HiXSTestLanguage",
     "PromptIntelCategory",
     "PromptIntelSeverity",
@@ -162,6 +169,7 @@
     "_DangerousQADataset",
     "_DarkBenchDataset",
     "_EquityMedQADataset",
+    "_FigStepDataset",
     "_ForbiddenQuestionsDataset",
     "_HarmBenchDataset",
     "_HarmBenchMultimodalDataset",