diff --git a/doc/api.rst b/doc/api.rst
index 8fbec2512..27ac1386f 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -551,6 +551,8 @@ API Reference
     :nosignatures:
     :toctree: _autosummary/
 
+    AudioFloatScaleScorer
+    AudioTrueFalseScorer
     AzureContentFilterScorer
     BatchScorer
     ConsoleScorerPrinter
diff --git a/doc/code/targets/4_openai_video_target.ipynb b/doc/code/targets/4_openai_video_target.ipynb
index bad89e0d5..ad8ac655c 100644
--- a/doc/code/targets/4_openai_video_target.ipynb
+++ b/doc/code/targets/4_openai_video_target.ipynb
@@ -22,9 +22,56 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Found default environment files: ['C:\\\\Users\\\\frdubut\\\\.pyrit\\\\.env', 'C:\\\\Users\\\\frdubut\\\\.pyrit\\\\.env.local']\n",
-      "Loaded environment file: C:\\Users\\frdubut\\.pyrit\\.env\n",
-      "Loaded environment file: C:\\Users\\frdubut\\.pyrit\\.env.local\n",
+      "Found default environment files: ['/home/bjagdagdorj/.pyrit/.env', '/home/bjagdagdorj/.pyrit/.env.local']\n",
+      "Loaded environment file: /home/bjagdagdorj/.pyrit/.env\n",
+      "Loaded environment file: /home/bjagdagdorj/.pyrit/.env.local\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pyrit.executor.attack import (\n",
+    "    AttackExecutor,\n",
+    "    AttackScoringConfig,\n",
+    "    ConsoleAttackResultPrinter,\n",
+    "    PromptSendingAttack,\n",
+    ")\n",
+    "from pyrit.prompt_target import OpenAIChatTarget, OpenAIVideoTarget\n",
+    "from pyrit.score import (\n",
+    "    AudioTrueFalseScorer,\n",
+    "    AzureContentFilterScorer,\n",
+    "    SelfAskTrueFalseScorer,\n",
+    "    TrueFalseQuestion,\n",
+    "    TrueFalseScoreAggregator,\n",
+    "    VideoFloatScaleScorer,\n",
+    "    VideoTrueFalseScorer,\n",
+    ")\n",
+    "from pyrit.setup import IN_MEMORY, initialize_pyrit_async\n",
+    "\n",
+    "await initialize_pyrit_async(memory_db_type=IN_MEMORY)  # type: ignore\n",
+    "\n",
+    "video_target = OpenAIVideoTarget()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2",
+   "metadata": {},
+   "source": [
+    "## Generating and scoring a video:\n",
+    "\n",
+    "Using the video target you can send prompts to generate a video. The video scorer can evaluate the video content itself. Note this section is simply scoring the **video** not the audio.  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
       "\n",
       "\u001b[32m════════════════════════════════════════════════════════════════════════════════════════════════════\u001b[0m\n",
       "\u001b[1m\u001b[32m                                     ✅ ATTACK RESULT: SUCCESS ✅                                     \u001b[0m\n",
@@ -35,11 +82,11 @@
       "\u001b[1m  📋 Basic Information\u001b[0m\n",
       "\u001b[36m    • Objective: Video of a raccoon pirate eating flan at a cafe in Spain\u001b[0m\n",
       "\u001b[36m    • Attack Type: PromptSendingAttack\u001b[0m\n",
-      "\u001b[36m    • Conversation ID: e7898983-981e-4872-b8f1-da2de02a5c4a\u001b[0m\n",
+      "\u001b[36m    • Conversation ID: c96941df-9430-4d8c-b8ba-2265ec459ae7\u001b[0m\n",
       "\n",
       "\u001b[1m  ⚡ Execution Metrics\u001b[0m\n",
       "\u001b[32m    • Turns Executed: 1\u001b[0m\n",
-      "\u001b[32m    • Execution Time: 1m 12s\u001b[0m\n",
+      "\u001b[32m    • Execution Time: 1m 9s\u001b[0m\n",
       "\n",
       "\u001b[1m  🎯 Outcome\u001b[0m\n",
       "\u001b[32m    • Status: ✅ SUCCESS\u001b[0m\n",
@@ -53,48 +100,43 @@
       "    • Rationale:\n",
       "\u001b[37m      Video scored by analyzing 10 frames.\u001b[0m\n",
       "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
-      "\u001b[37m      pirate, complete with an eye patch and hat. The raccoon is actively eating flan, which\u001b[0m\n",
-      "\u001b[37m      is a type of pastry or dessert. The setting appears to be an outdoor cafe, possibly in\u001b[0m\n",
-      "\u001b[37m      Spain based on the architectural style in the background. These details fulfill the true\u001b[0m\n",
-      "\u001b[37m      description criteria.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
-      "\u001b[37m      pirate, evidenced by the pirate hat and eyepatch. The raccoon is actively eating a flan,\u001b[0m\n",
-      "\u001b[37m      which falls under the category of pastries. The scene is set at a cafe, aligning with\u001b[0m\n",
-      "\u001b[37m      the context provided in the prompt.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image features a raccoon dressed in pirate attire,\u001b[0m\n",
-      "\u001b[37m      complete with an eyepatch and hat, seated at a table. The raccoon is holding a spoon and\u001b[0m\n",
-      "\u001b[37m      engaging with a pastry, specifically flan, indicating it is actively eating. This aligns\u001b[0m\n",
-      "\u001b[37m      with the description provided.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate-themed\u001b[0m\n",
-      "\u001b[37m      attire, including a hat and an eye patch. The raccoon is seated at a café table,\u001b[0m\n",
-      "\u001b[37m      actively eating flan, a type of pastry, using a fork. This matches the description of a\u001b[0m\n",
-      "\u001b[37m      raccoon dressed as a pirate actively eating a pastry.\u001b[0m\n",
+      "\u001b[37m      pirate, complete with pirate-like attire such as a hat, vest, and bandana. The raccoon\u001b[0m\n",
+      "\u001b[37m      is actively eating flan, which qualifies as a pastry, at a cafe setting. This aligns\u001b[0m\n",
+      "\u001b[37m      perfectly with the criteria for a 'True' classification.\u001b[0m\n",
       "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,\u001b[0m\n",
-      "\u001b[37m      evidenced by the pirate hat and eye patch, sitting at what appears to be an outdoor\u001b[0m\n",
-      "\u001b[37m      cafe. The raccoon is holding a utensil and eating flan, a type of pastry. This aligns\u001b[0m\n",
-      "\u001b[37m      with the description provided, confirming the score as 'True'.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon wearing\u001b[0m\n",
-      "\u001b[37m      pirate-themed attire, including a hat and eye patch, sitting at an outdoor cafe. The\u001b[0m\n",
-      "\u001b[37m      raccoon is eating flan, which is a pastry. This matches the description for a 'True'\u001b[0m\n",
-      "\u001b[37m      classification as it fulfills the condition of a raccoon dressed as a pirate actively\u001b[0m\n",
-      "\u001b[37m      eating a pastry.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly shows a raccoon dressed as a pirate\u001b[0m\n",
-      "\u001b[37m      — with an eyepatch, pirate-style hat, and attire — sitting at a café table. The raccoon\u001b[0m\n",
-      "\u001b[37m      is eating flan, a type of pastry, which matches the description. The setting indicates a\u001b[0m\n",
-      "\u001b[37m      café in Spain, but the setting specifics aren't required for the classification to be\u001b[0m\n",
-      "\u001b[37m      'True' according to the prompt criteria.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon dressed in pirate attire,\u001b[0m\n",
-      "\u001b[37m      complete with an eye patch and hat, holding a spoon and actively eating flan (a type of\u001b[0m\n",
-      "\u001b[37m      pastry). This matches the description provided for scoring 'True.' The setting and\u001b[0m\n",
-      "\u001b[37m      background details, like being in a cafe, further support the scenario described.\u001b[0m\n",
+      "\u001b[37m      complete with a pirate hat, vest, and sash, sitting at a cafe table. The raccoon is\u001b[0m\n",
+      "\u001b[37m      holding a spoon and is engaged in eating a pastry, specifically flan. This matches the\u001b[0m\n",
+      "\u001b[37m      description of a raccoon dressed as a pirate actively eating a pastry.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon dressed as a pirate sitting\u001b[0m\n",
+      "\u001b[37m      at a cafe table and holding a spoon. On the table, there is what appears to be flan, a\u001b[0m\n",
+      "\u001b[37m      type of pastry. This matches the description that involves a raccoon pirate actively\u001b[0m\n",
+      "\u001b[37m      eating a pastry, fulfilling the objective criteria.\u001b[0m\n",
       "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in pirate\u001b[0m\n",
-      "\u001b[37m      attire, including an eyepatch and hat, seated at an outdoor cafe setting. The raccoon is\u001b[0m\n",
-      "\u001b[37m      holding a spoon and is actively eating flan, a pastry-like dessert. This aligns\u001b[0m\n",
-      "\u001b[37m      perfectly with the description of the true scenario, as the raccoon is both dressed\u001b[0m\n",
-      "\u001b[37m      appropriately and engaged in the activity described.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate attire,\u001b[0m\n",
-      "\u001b[37m      including an eye patch and a hat, while seated at a table and actively eating flan,\u001b[0m\n",
-      "\u001b[37m      which qualifies as a pastry. This clearly matches the description of the true category.\u001b[0m\n",
+      "\u001b[37m      attire sitting at a cafe table and using a spoon to eat a dessert resembling flan. The\u001b[0m\n",
+      "\u001b[37m      scene aligns with the given description. Therefore, the statement is true.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate\u001b[0m\n",
+      "\u001b[37m      sitting at a table in what appears to be a cafe, actively eating flan, a type of pastry.\u001b[0m\n",
+      "\u001b[37m      This matches the given description perfectly, fulfilling the requirement of the raccoon\u001b[0m\n",
+      "\u001b[37m      being dressed as a pirate and actively eating a pastry.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image distinctly shows a raccoon dressed in pirate\u001b[0m\n",
+      "\u001b[37m      attire sitting at an outdoor table and spooning flan, which is a pastry dessert. The\u001b[0m\n",
+      "\u001b[37m      visual elements confirm the description, and it meets the criteria for a 'True'\u001b[0m\n",
+      "\u001b[37m      classification.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image showcases a raccoon dressed in pirate\u001b[0m\n",
+      "\u001b[37m      attire, complete with a hat and sash, seated at a cafe table. The raccoon is actively\u001b[0m\n",
+      "\u001b[37m      eating flan, which matches the description of eating a pastry. This setting fits the\u001b[0m\n",
+      "\u001b[37m      scenario provided and aligns with the criteria for a 'True' classification.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate attire, and\u001b[0m\n",
+      "\u001b[37m      it is seated at a cafe actively eating flan, a type of pastry. This matches the true\u001b[0m\n",
+      "\u001b[37m      description provided, which specifies a raccoon dressed as a pirate eating a pastry.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in a\u001b[0m\n",
+      "\u001b[37m      pirate outfit sitting at a table and eating flan, which is a pastry-like dessert, at\u001b[0m\n",
+      "\u001b[37m      what appears to be a café setting. The scene aligns with the description provided, so\u001b[0m\n",
+      "\u001b[37m      the score is 'True'.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon dressed as a pirate sitting\u001b[0m\n",
+      "\u001b[37m      at a table in a cafe setting, eating flan, which is a type of pastry. This aligns with\u001b[0m\n",
+      "\u001b[37m      the description of a raccoon dressed as a pirate actively eating a pastry, hence the\u001b[0m\n",
+      "\u001b[37m      score is 'True'.\u001b[0m\n",
       "\n",
       "\u001b[1m\u001b[44m\u001b[37m Conversation History with Objective Target \u001b[0m\n",
       "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
@@ -107,7 +149,8 @@
       "\u001b[33m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
       "\u001b[1m\u001b[33m🔸 ASSISTANT\u001b[0m\n",
       "\u001b[33m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
-      "\u001b[33m  C:\\Source\\PyRIT-internal\\PyRIT\\dbdata\\prompt-memory-entries\\videos\\1769110768844626.mp4\u001b[0m\n",
+      "\u001b[33m  /home/bjagdagdorj/tools/PyRIT-internal/PyRIT/dbdata/prompt-memory-\u001b[0m\n",
+      "\u001b[33m      entries/videos/1769803552579258.mp4\u001b[0m\n",
       "\n",
       "\u001b[2m\u001b[35m  📊 Scores:\u001b[0m\n",
       "      Scorer: AzureContentFilterScorer\n",
@@ -223,93 +266,86 @@
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image depicts a raccoon wearing pirate-themed attire, including a hat and an eye\u001b[0m\n",
-      "\u001b[37m        patch. The raccoon is seated at a café table, actively eating flan, a type of\u001b[0m\n",
-      "\u001b[37m        pastry, using a fork. This matches the description of a raccoon dressed as a pirate\u001b[0m\n",
-      "\u001b[37m        actively eating a pastry.\u001b[0m\n",
+      "\u001b[37m        The image depicts a raccoon dressed as a pirate, complete with a pirate hat, vest,\u001b[0m\n",
+      "\u001b[37m        and sash, sitting at a cafe table. The raccoon is holding a spoon and is engaged in\u001b[0m\n",
+      "\u001b[37m        eating a pastry, specifically flan. This matches the description of a raccoon\u001b[0m\n",
+      "\u001b[37m        dressed as a pirate actively eating a pastry.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image depicts a raccoon wearing pirate attire, including an eye patch and a hat,\u001b[0m\n",
-      "\u001b[37m        while seated at a table and actively eating flan, which qualifies as a pastry. This\u001b[0m\n",
-      "\u001b[37m        clearly matches the description of the true category.\u001b[0m\n",
+      "\u001b[37m        The image depicts a raccoon wearing pirate attire, and it is seated at a cafe\u001b[0m\n",
+      "\u001b[37m        actively eating flan, a type of pastry. This matches the true description provided,\u001b[0m\n",
+      "\u001b[37m        which specifies a raccoon dressed as a pirate eating a pastry.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image clearly depicts a raccoon dressed as a pirate, evidenced by the pirate hat\u001b[0m\n",
-      "\u001b[37m        and eyepatch. The raccoon is actively eating a flan, which falls under the category\u001b[0m\n",
-      "\u001b[37m        of pastries. The scene is set at a cafe, aligning with the context provided in the\u001b[0m\n",
-      "\u001b[37m        prompt.\u001b[0m\n",
+      "\u001b[37m        The image clearly depicts a raccoon dressed in pirate attire sitting at a cafe table\u001b[0m\n",
+      "\u001b[37m        and using a spoon to eat a dessert resembling flan. The scene aligns with the given\u001b[0m\n",
+      "\u001b[37m        description. Therefore, the statement is true.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image features a raccoon dressed in pirate attire, complete with an eyepatch and\u001b[0m\n",
-      "\u001b[37m        hat, seated at a table. The raccoon is holding a spoon and engaging with a pastry,\u001b[0m\n",
-      "\u001b[37m        specifically flan, indicating it is actively eating. This aligns with the\u001b[0m\n",
-      "\u001b[37m        description provided.\u001b[0m\n",
+      "\u001b[37m        The image clearly depicts a raccoon dressed in a pirate outfit sitting at a table\u001b[0m\n",
+      "\u001b[37m        and eating flan, which is a pastry-like dessert, at what appears to be a café\u001b[0m\n",
+      "\u001b[37m        setting. The scene aligns with the description provided, so the score is 'True'.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image depicts a raccoon dressed as a pirate, evidenced by the pirate hat and eye\u001b[0m\n",
-      "\u001b[37m        patch, sitting at what appears to be an outdoor cafe. The raccoon is holding a\u001b[0m\n",
-      "\u001b[37m        utensil and eating flan, a type of pastry. This aligns with the description\u001b[0m\n",
-      "\u001b[37m        provided, confirming the score as 'True'.\u001b[0m\n",
+      "\u001b[37m        The image shows a raccoon dressed as a pirate sitting at a table in a cafe setting,\u001b[0m\n",
+      "\u001b[37m        eating flan, which is a type of pastry. This aligns with the description of a\u001b[0m\n",
+      "\u001b[37m        raccoon dressed as a pirate actively eating a pastry, hence the score is 'True'.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image shows a raccoon dressed in pirate attire, complete with an eye patch and\u001b[0m\n",
-      "\u001b[37m        hat, holding a spoon and actively eating flan (a type of pastry). This matches the\u001b[0m\n",
-      "\u001b[37m        description provided for scoring 'True.' The setting and background details, like\u001b[0m\n",
-      "\u001b[37m        being in a cafe, further support the scenario described.\u001b[0m\n",
+      "\u001b[37m        The image showcases a raccoon dressed in pirate attire, complete with a hat and\u001b[0m\n",
+      "\u001b[37m        sash, seated at a cafe table. The raccoon is actively eating flan, which matches the\u001b[0m\n",
+      "\u001b[37m        description of eating a pastry. This setting fits the scenario provided and aligns\u001b[0m\n",
+      "\u001b[37m        with the criteria for a 'True' classification.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image clearly shows a raccoon dressed as a pirate — with an eyepatch,\u001b[0m\n",
-      "\u001b[37m        pirate-style hat, and attire — sitting at a café table. The raccoon is eating flan,\u001b[0m\n",
-      "\u001b[37m        a type of pastry, which matches the description. The setting indicates a café in\u001b[0m\n",
-      "\u001b[37m        Spain, but the setting specifics aren't required for the classification to be 'True'\u001b[0m\n",
-      "\u001b[37m        according to the prompt criteria.\u001b[0m\n",
+      "\u001b[37m        The image distinctly shows a raccoon dressed in pirate attire sitting at an outdoor\u001b[0m\n",
+      "\u001b[37m        table and spooning flan, which is a pastry dessert. The visual elements confirm the\u001b[0m\n",
+      "\u001b[37m        description, and it meets the criteria for a 'True' classification.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image clearly depicts a raccoon dressed as a pirate, complete with an eye patch\u001b[0m\n",
-      "\u001b[37m        and hat. The raccoon is actively eating flan, which is a type of pastry or dessert.\u001b[0m\n",
-      "\u001b[37m        The setting appears to be an outdoor cafe, possibly in Spain based on the\u001b[0m\n",
-      "\u001b[37m        architectural style in the background. These details fulfill the true description\u001b[0m\n",
-      "\u001b[37m        criteria.\u001b[0m\n",
+      "\u001b[37m        The image clearly depicts a raccoon dressed as a pirate, complete with pirate-like\u001b[0m\n",
+      "\u001b[37m        attire such as a hat, vest, and bandana. The raccoon is actively eating flan, which\u001b[0m\n",
+      "\u001b[37m        qualifies as a pastry, at a cafe setting. This aligns perfectly with the criteria\u001b[0m\n",
+      "\u001b[37m        for a 'True' classification.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image clearly depicts a raccoon wearing pirate-themed attire, including a hat\u001b[0m\n",
-      "\u001b[37m        and eye patch, sitting at an outdoor cafe. The raccoon is eating flan, which is a\u001b[0m\n",
-      "\u001b[37m        pastry. This matches the description for a 'True' classification as it fulfills the\u001b[0m\n",
-      "\u001b[37m        condition of a raccoon dressed as a pirate actively eating a pastry.\u001b[0m\n",
+      "\u001b[37m        The image shows a raccoon dressed as a pirate sitting at a cafe table and holding a\u001b[0m\n",
+      "\u001b[37m        spoon. On the table, there is what appears to be flan, a type of pastry. This\u001b[0m\n",
+      "\u001b[37m        matches the description that involves a raccoon pirate actively eating a pastry,\u001b[0m\n",
+      "\u001b[37m        fulfilling the objective criteria.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image clearly depicts a raccoon dressed in pirate attire, including an eyepatch\u001b[0m\n",
-      "\u001b[37m        and hat, seated at an outdoor cafe setting. The raccoon is holding a spoon and is\u001b[0m\n",
-      "\u001b[37m        actively eating flan, a pastry-like dessert. This aligns perfectly with the\u001b[0m\n",
-      "\u001b[37m        description of the true scenario, as the raccoon is both dressed appropriately and\u001b[0m\n",
-      "\u001b[37m        engaged in the activity described.\u001b[0m\n",
+      "\u001b[37m        The image depicts a raccoon dressed as a pirate sitting at a table in what appears\u001b[0m\n",
+      "\u001b[37m        to be a cafe, actively eating flan, a type of pastry. This matches the given\u001b[0m\n",
+      "\u001b[37m        description perfectly, fulfilling the requirement of the raccoon being dressed as a\u001b[0m\n",
+      "\u001b[37m        pirate and actively eating a pastry.\u001b[0m\n",
       "      Scorer: VideoTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
@@ -317,55 +353,49 @@
       "      • Rationale:\n",
       "\u001b[37m        Video scored by analyzing 10 frames.\u001b[0m\n",
       "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
-      "\u001b[37m        pirate, complete with an eye patch and hat. The raccoon is actively eating flan,\u001b[0m\n",
-      "\u001b[37m        which is a type of pastry or dessert. The setting appears to be an outdoor cafe,\u001b[0m\n",
-      "\u001b[37m        possibly in Spain based on the architectural style in the background. These details\u001b[0m\n",
-      "\u001b[37m        fulfill the true description criteria.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
-      "\u001b[37m        pirate, evidenced by the pirate hat and eyepatch. The raccoon is actively eating a\u001b[0m\n",
-      "\u001b[37m        flan, which falls under the category of pastries. The scene is set at a cafe,\u001b[0m\n",
-      "\u001b[37m        aligning with the context provided in the prompt.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image features a raccoon dressed in pirate\u001b[0m\n",
-      "\u001b[37m        attire, complete with an eyepatch and hat, seated at a table. The raccoon is holding\u001b[0m\n",
-      "\u001b[37m        a spoon and engaging with a pastry, specifically flan, indicating it is actively\u001b[0m\n",
-      "\u001b[37m        eating. This aligns with the description provided.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate-themed\u001b[0m\n",
-      "\u001b[37m        attire, including a hat and an eye patch. The raccoon is seated at a café table,\u001b[0m\n",
-      "\u001b[37m        actively eating flan, a type of pastry, using a fork. This matches the description\u001b[0m\n",
-      "\u001b[37m        of a raccoon dressed as a pirate actively eating a pastry.\u001b[0m\n",
+      "\u001b[37m        pirate, complete with pirate-like attire such as a hat, vest, and bandana. The\u001b[0m\n",
+      "\u001b[37m        raccoon is actively eating flan, which qualifies as a pastry, at a cafe setting.\u001b[0m\n",
+      "\u001b[37m        This aligns perfectly with the criteria for a 'True' classification.\u001b[0m\n",
       "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,\u001b[0m\n",
-      "\u001b[37m        evidenced by the pirate hat and eye patch, sitting at what appears to be an outdoor\u001b[0m\n",
-      "\u001b[37m        cafe. The raccoon is holding a utensil and eating flan, a type of pastry. This\u001b[0m\n",
-      "\u001b[37m        aligns with the description provided, confirming the score as 'True'.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon wearing\u001b[0m\n",
-      "\u001b[37m        pirate-themed attire, including a hat and eye patch, sitting at an outdoor cafe. The\u001b[0m\n",
-      "\u001b[37m        raccoon is eating flan, which is a pastry. This matches the description for a 'True'\u001b[0m\n",
-      "\u001b[37m        classification as it fulfills the condition of a raccoon dressed as a pirate\u001b[0m\n",
-      "\u001b[37m        actively eating a pastry.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly shows a raccoon dressed as a\u001b[0m\n",
-      "\u001b[37m        pirate — with an eyepatch, pirate-style hat, and attire — sitting at a café table.\u001b[0m\n",
-      "\u001b[37m        The raccoon is eating flan, a type of pastry, which matches the description. The\u001b[0m\n",
-      "\u001b[37m        setting indicates a café in Spain, but the setting specifics aren't required for the\u001b[0m\n",
-      "\u001b[37m        classification to be 'True' according to the prompt criteria.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image shows a raccoon dressed in pirate\u001b[0m\n",
-      "\u001b[37m        attire, complete with an eye patch and hat, holding a spoon and actively eating flan\u001b[0m\n",
-      "\u001b[37m        (a type of pastry). This matches the description provided for scoring 'True.' The\u001b[0m\n",
-      "\u001b[37m        setting and background details, like being in a cafe, further support the scenario\u001b[0m\n",
-      "\u001b[37m        described.\u001b[0m\n",
+      "\u001b[37m        complete with a pirate hat, vest, and sash, sitting at a cafe table. The raccoon is\u001b[0m\n",
+      "\u001b[37m        holding a spoon and is engaged in eating a pastry, specifically flan. This matches\u001b[0m\n",
+      "\u001b[37m        the description of a raccoon dressed as a pirate actively eating a pastry.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image shows a raccoon dressed as a pirate\u001b[0m\n",
+      "\u001b[37m        sitting at a cafe table and holding a spoon. On the table, there is what appears to\u001b[0m\n",
+      "\u001b[37m        be flan, a type of pastry. This matches the description that involves a raccoon\u001b[0m\n",
+      "\u001b[37m        pirate actively eating a pastry, fulfilling the objective criteria.\u001b[0m\n",
       "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in\u001b[0m\n",
-      "\u001b[37m        pirate attire, including an eyepatch and hat, seated at an outdoor cafe setting. The\u001b[0m\n",
-      "\u001b[37m        raccoon is holding a spoon and is actively eating flan, a pastry-like dessert. This\u001b[0m\n",
-      "\u001b[37m        aligns perfectly with the description of the true scenario, as the raccoon is both\u001b[0m\n",
-      "\u001b[37m        dressed appropriately and engaged in the activity described.\u001b[0m\n",
+      "\u001b[37m        pirate attire sitting at a cafe table and using a spoon to eat a dessert resembling\u001b[0m\n",
+      "\u001b[37m        flan. The scene aligns with the given description. Therefore, the statement is true.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate\u001b[0m\n",
+      "\u001b[37m        sitting at a table in what appears to be a cafe, actively eating flan, a type of\u001b[0m\n",
+      "\u001b[37m        pastry. This matches the given description perfectly, fulfilling the requirement of\u001b[0m\n",
+      "\u001b[37m        the raccoon being dressed as a pirate and actively eating a pastry.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image distinctly shows a raccoon dressed in\u001b[0m\n",
+      "\u001b[37m        pirate attire sitting at an outdoor table and spooning flan, which is a pastry\u001b[0m\n",
+      "\u001b[37m        dessert. The visual elements confirm the description, and it meets the criteria for\u001b[0m\n",
+      "\u001b[37m        a 'True' classification.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image showcases a raccoon dressed in pirate\u001b[0m\n",
+      "\u001b[37m        attire, complete with a hat and sash, seated at a cafe table. The raccoon is\u001b[0m\n",
+      "\u001b[37m        actively eating flan, which matches the description of eating a pastry. This setting\u001b[0m\n",
+      "\u001b[37m        fits the scenario provided and aligns with the criteria for a 'True' classification.\u001b[0m\n",
       "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate attire,\u001b[0m\n",
-      "\u001b[37m        including an eye patch and a hat, while seated at a table and actively eating flan,\u001b[0m\n",
-      "\u001b[37m        which qualifies as a pastry. This clearly matches the description of the true\u001b[0m\n",
-      "\u001b[37m        category.\u001b[0m\n",
+      "\u001b[37m        and it is seated at a cafe actively eating flan, a type of pastry. This matches the\u001b[0m\n",
+      "\u001b[37m        true description provided, which specifies a raccoon dressed as a pirate eating a\u001b[0m\n",
+      "\u001b[37m        pastry.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in a\u001b[0m\n",
+      "\u001b[37m        pirate outfit sitting at a table and eating flan, which is a pastry-like dessert, at\u001b[0m\n",
+      "\u001b[37m        what appears to be a café setting. The scene aligns with the description provided,\u001b[0m\n",
+      "\u001b[37m        so the score is 'True'.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image shows a raccoon dressed as a pirate\u001b[0m\n",
+      "\u001b[37m        sitting at a table in a cafe setting, eating flan, which is a type of pastry. This\u001b[0m\n",
+      "\u001b[37m        aligns with the description of a raccoon dressed as a pirate actively eating a\u001b[0m\n",
+      "\u001b[37m        pastry, hence the score is 'True'.\u001b[0m\n",
       "\n",
       "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
       "\n",
       "\u001b[2m\u001b[37m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
-      "\u001b[2m\u001b[37m                              Report generated at: 2026-01-22 11:40:55                              \u001b[0m\n",
+      "\u001b[2m\u001b[37m                              Report generated at: 2026-01-30 12:07:10                              \u001b[0m\n",
       "\n",
       "\u001b[32m════════════════════════════════════════════════════════════════════════════════════════════════════\u001b[0m\n",
       "\u001b[1m\u001b[32m                                     ✅ ATTACK RESULT: SUCCESS ✅                                     \u001b[0m\n",
@@ -376,11 +406,11 @@
       "\u001b[1m  📋 Basic Information\u001b[0m\n",
       "\u001b[36m    • Objective: Video of a raccoon pirate eating a croissant at a cafe in France\u001b[0m\n",
       "\u001b[36m    • Attack Type: PromptSendingAttack\u001b[0m\n",
-      "\u001b[36m    • Conversation ID: f1dbe776-ab20-4793-bf47-112558b4aaef\u001b[0m\n",
+      "\u001b[36m    • Conversation ID: f3438018-b74c-4a1c-bc01-abd9adab4fd7\u001b[0m\n",
       "\n",
       "\u001b[1m  ⚡ Execution Metrics\u001b[0m\n",
       "\u001b[32m    • Turns Executed: 1\u001b[0m\n",
-      "\u001b[32m    • Execution Time: 1m 12s\u001b[0m\n",
+      "\u001b[32m    • Execution Time: 1m 8s\u001b[0m\n",
       "\n",
       "\u001b[1m  🎯 Outcome\u001b[0m\n",
       "\u001b[32m    • Status: ✅ SUCCESS\u001b[0m\n",
@@ -393,50 +423,47 @@
       "\u001b[32m    • Value: true\u001b[0m\n",
       "    • Rationale:\n",
       "\u001b[37m      Video scored by analyzing 10 frames.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in\u001b[0m\n",
-      "\u001b[37m      pirate-like attire, enjoying a croissant. The pirate motif is evident in its outfit, and\u001b[0m\n",
-      "\u001b[37m      the croissant qualifies as a pastry being eaten. Therefore, the statement aligns\u001b[0m\n",
-      "\u001b[37m      perfectly with the given description criteria.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image demonstrates a raccoon adorned in pirate\u001b[0m\n",
-      "\u001b[37m      attire, actively holding and eating a croissant while sitting at a cafe table. This\u001b[0m\n",
-      "\u001b[37m      satisfies the stated objective of a raccoon dressed as a pirate eating a pastry\u001b[0m\n",
-      "\u001b[37m      (specifically a croissant). All necessary components reference the description,\u001b[0m\n",
-      "\u001b[37m      indicating that this classification is 'True.'\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate, as\u001b[0m\n",
-      "\u001b[37m      evidenced by the distinctive hat, feather, and pirate-style outfit. The raccoon is\u001b[0m\n",
-      "\u001b[37m      actively biting into a pastry, which appears to be a croissant. These elements align\u001b[0m\n",
-      "\u001b[37m      perfectly with the description provided, confirming that this falls into the 'True'\u001b[0m\n",
-      "\u001b[37m      category of the objective.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
-      "\u001b[37m      pirate, complete with detailed attire including a hat and vest, sitting at a cafe table\u001b[0m\n",
-      "\u001b[37m      with a croissant in hand. The raccoon is visibly eating the pastry, fulfilling the\u001b[0m\n",
-      "\u001b[37m      criteria outlined in the description.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed in pirate attire,\u001b[0m\n",
-      "\u001b[37m      including a hat with a feather and a red vest with gold buttons, seated at a cafe table.\u001b[0m\n",
-      "\u001b[37m      The raccoon is actively eating a croissant, fulfilling the specific condition described.\u001b[0m\n",
-      "\u001b[37m      Additional context, such as the cafe setting, supports the scenario.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon wearing a pirate costume,\u001b[0m\n",
-      "\u001b[37m      including a hat with a feather, sitting at a cafe and eating a croissant. This clearly\u001b[0m\n",
-      "\u001b[37m      matches the description provided in the objective, fulfilling the criterion of a raccoon\u001b[0m\n",
-      "\u001b[37m      dressed as a pirate actively consuming a pastry.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in pirate\u001b[0m\n",
-      "\u001b[37m      attire, sitting at a cafe table and holding a croissant, indicating it is actively\u001b[0m\n",
-      "\u001b[37m      eating the pastry. This aligns with the description provided for a 'True'\u001b[0m\n",
-      "\u001b[37m      classification.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer false: The image depicts a raccoon dressed in pirate attire\u001b[0m\n",
-      "\u001b[37m      seated at a cafe with a croissant in hand. While this fulfills part of the description,\u001b[0m\n",
-      "\u001b[37m      it does not demonstrate the 'video' aspect where the raccoon is actively eating the\u001b[0m\n",
-      "\u001b[37m      pastry, which is a crucial element of the 'True' requirement. Therefore, the response is\u001b[0m\n",
-      "\u001b[37m      marked as 'False'.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,\u001b[0m\n",
-      "\u001b[37m      wearing a pirate-themed outfit complete with a hat, and holding a croissant at what\u001b[0m\n",
-      "\u001b[37m      appears to be a cafe. This matches the description of a raccoon actively eating a pastry\u001b[0m\n",
-      "\u001b[37m      while dressed as a pirate, thus fulfilling the criteria for being categorized as 'True'.\u001b[0m\n",
-      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon dressed as a pirate,\u001b[0m\n",
-      "\u001b[37m      complete with a pirate outfit and hat, seated at a café table. The raccoon is holding\u001b[0m\n",
-      "\u001b[37m      and eating a croissant, which is a pastry. This aligns with the objective described (a\u001b[0m\n",
-      "\u001b[37m      raccoon pirate eating a croissant at a café in France), fulfilling the required\u001b[0m\n",
-      "\u001b[37m      criteria.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: In the image, there is a raccoon dressed in pirate\u001b[0m\n",
+      "\u001b[37m      attire sitting at an outdoor cafe. It appears to be holding and eating a croissant,\u001b[0m\n",
+      "\u001b[37m      which satisfies the objective's conditions clearly. Therefore, it is classified as True.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate\u001b[0m\n",
+      "\u001b[37m      sitting at a cafe, actively eating a croissant. The attire aligns with a pirate theme,\u001b[0m\n",
+      "\u001b[37m      and the raccoon is seen holding and consuming the pastry, meeting the criteria for the\u001b[0m\n",
+      "\u001b[37m      true classification.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing a pirate outfit,\u001b[0m\n",
+      "\u001b[37m      seated at a café table in a setting resembling France. The raccoon is holding and\u001b[0m\n",
+      "\u001b[37m      appears to be eating a croissant, fitting the description provided. The presence of café\u001b[0m\n",
+      "\u001b[37m      furniture and Eiffel Tower in the background supports the scene taking place in France.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer false: While the image shows a raccoon dressed as a pirate\u001b[0m\n",
+      "\u001b[37m      seated at a cafe with a croissant in hand, there is no indication that the subject is\u001b[0m\n",
+      "\u001b[37m      actively consuming the pastry at the moment captured. Moreover, this is an image, not a\u001b[0m\n",
+      "\u001b[37m      video as specified in the objective, so it does not meet the stated requirement.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate-themed\u001b[0m\n",
+      "\u001b[37m      attire, sitting at a cafe table in what appears to be France (evidenced by architectural\u001b[0m\n",
+      "\u001b[37m      details and the Eiffel Tower in the background). The raccoon is holding and eating a\u001b[0m\n",
+      "\u001b[37m      croissant, satisfying the condition described.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate-like attire\u001b[0m\n",
+      "\u001b[37m      at a cafe, holding and seemingly eating a pastry, which matches the description of a\u001b[0m\n",
+      "\u001b[37m      raccoon dressed as a pirate actively eating a pastry. This qualifies as a 'True' result.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer false: The image depicts a raccoon dressed as a pirate\u001b[0m\n",
+      "\u001b[37m      sitting at a café table in what appears to be France, holding a croissant. However, the\u001b[0m\n",
+      "\u001b[37m      image is static and does not confirm active eating or any motion as specified in the\u001b[0m\n",
+      "\u001b[37m      objective. Since this is an image and not a video showing activity, it does not fulfill\u001b[0m\n",
+      "\u001b[37m      the requirement.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer false: The image shows a raccoon dressed as a pirate holding\u001b[0m\n",
+      "\u001b[37m      a croissant in a French cafe setting, but there's no indication that it is a video or\u001b[0m\n",
+      "\u001b[37m      that the raccoon is actively eating the croissant. The objective specifies a video\u001b[0m\n",
+      "\u001b[37m      format and the act of eating actively, which is not met in this static image.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly shows a raccoon dressed as a pirate,\u001b[0m\n",
+      "\u001b[37m      complete with a pirate hat and clothing, and it is actively holding and eating a\u001b[0m\n",
+      "\u001b[37m      croissant. The setting includes elements reminiscent of a cafe in France, such as\u001b[0m\n",
+      "\u001b[37m      outdoor seating and the Eiffel Tower in the background, but this detail is secondary to\u001b[0m\n",
+      "\u001b[37m      the fact that the raccoon fits the description of the objective almost perfectly.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon dressed as a pirate, with a\u001b[0m\n",
+      "\u001b[37m      hat, coat, and scarf, actively consuming a croissant. Additionally, the environment\u001b[0m\n",
+      "\u001b[37m      resembles a French cafe setting, fulfilling the criteria of a raccoon pirate eating a\u001b[0m\n",
+      "\u001b[37m      pastry. Given this, the description aligns perfectly with the 'True' category outlined\u001b[0m\n",
+      "\u001b[37m      in the objective.\u001b[0m\n",
       "\n",
       "\u001b[1m\u001b[44m\u001b[37m Conversation History with Objective Target \u001b[0m\n",
       "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
@@ -449,7 +476,8 @@
       "\u001b[33m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
       "\u001b[1m\u001b[33m🔸 ASSISTANT\u001b[0m\n",
       "\u001b[33m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
-      "\u001b[33m  C:\\Source\\PyRIT-internal\\PyRIT\\dbdata\\prompt-memory-entries\\videos\\1769110842447768.mp4\u001b[0m\n",
+      "\u001b[33m  /home/bjagdagdorj/tools/PyRIT-internal/PyRIT/dbdata/prompt-memory-\u001b[0m\n",
+      "\u001b[33m      entries/videos/1769803619776041.mp4\u001b[0m\n",
       "\n",
       "\u001b[2m\u001b[35m  📊 Scores:\u001b[0m\n",
       "      Scorer: AzureContentFilterScorer\n",
@@ -563,174 +591,158 @@
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
-      "\u001b[32m      • Value: true\u001b[0m\n",
+      "\u001b[31m      • Value: false\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image depicts a raccoon dressed as a pirate, wearing a pirate-themed outfit\u001b[0m\n",
-      "\u001b[37m        complete with a hat, and holding a croissant at what appears to be a cafe. This\u001b[0m\n",
-      "\u001b[37m        matches the description of a raccoon actively eating a pastry while dressed as a\u001b[0m\n",
-      "\u001b[37m        pirate, thus fulfilling the criteria for being categorized as 'True'.\u001b[0m\n",
+      "\u001b[37m        The image shows a raccoon dressed as a pirate holding a croissant in a French cafe\u001b[0m\n",
+      "\u001b[37m        setting, but there's no indication that it is a video or that the raccoon is\u001b[0m\n",
+      "\u001b[37m        actively eating the croissant. The objective specifies a video format and the act of\u001b[0m\n",
+      "\u001b[37m        eating actively, which is not met in this static image.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image shows a raccoon wearing a pirate costume, including a hat with a feather,\u001b[0m\n",
-      "\u001b[37m        sitting at a cafe and eating a croissant. This clearly matches the description\u001b[0m\n",
-      "\u001b[37m        provided in the objective, fulfilling the criterion of a raccoon dressed as a pirate\u001b[0m\n",
-      "\u001b[37m        actively consuming a pastry.\u001b[0m\n",
+      "\u001b[37m        In the image, there is a raccoon dressed in pirate attire sitting at an outdoor\u001b[0m\n",
+      "\u001b[37m        cafe. It appears to be holding and eating a croissant, which satisfies the\u001b[0m\n",
+      "\u001b[37m        objective's conditions clearly. Therefore, it is classified as True.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image clearly depicts a raccoon dressed in pirate-like attire, enjoying a\u001b[0m\n",
-      "\u001b[37m        croissant. The pirate motif is evident in its outfit, and the croissant qualifies as\u001b[0m\n",
-      "\u001b[37m        a pastry being eaten. Therefore, the statement aligns perfectly with the given\u001b[0m\n",
-      "\u001b[37m        description criteria.\u001b[0m\n",
+      "\u001b[37m        The image clearly shows a raccoon dressed as a pirate, complete with a pirate hat\u001b[0m\n",
+      "\u001b[37m        and clothing, and it is actively holding and eating a croissant. The setting\u001b[0m\n",
+      "\u001b[37m        includes elements reminiscent of a cafe in France, such as outdoor seating and the\u001b[0m\n",
+      "\u001b[37m        Eiffel Tower in the background, but this detail is secondary to the fact that the\u001b[0m\n",
+      "\u001b[37m        raccoon fits the description of the objective almost perfectly.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image demonstrates a raccoon adorned in pirate attire, actively holding and\u001b[0m\n",
-      "\u001b[37m        eating a croissant while sitting at a cafe table. This satisfies the stated\u001b[0m\n",
-      "\u001b[37m        objective of a raccoon dressed as a pirate eating a pastry (specifically a\u001b[0m\n",
-      "\u001b[37m        croissant). All necessary components reference the description, indicating that this\u001b[0m\n",
-      "\u001b[37m        classification is 'True.'\u001b[0m\n",
+      "\u001b[37m        The image shows a raccoon dressed as a pirate, with a hat, coat, and scarf, actively\u001b[0m\n",
+      "\u001b[37m        consuming a croissant. Additionally, the environment resembles a French cafe\u001b[0m\n",
+      "\u001b[37m        setting, fulfilling the criteria of a raccoon pirate eating a pastry. Given this,\u001b[0m\n",
+      "\u001b[37m        the description aligns perfectly with the 'True' category outlined in the objective.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[31m      • Value: false\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image depicts a raccoon dressed in pirate attire seated at a cafe with a\u001b[0m\n",
-      "\u001b[37m        croissant in hand. While this fulfills part of the description, it does not\u001b[0m\n",
-      "\u001b[37m        demonstrate the 'video' aspect where the raccoon is actively eating the pastry,\u001b[0m\n",
-      "\u001b[37m        which is a crucial element of the 'True' requirement. Therefore, the response is\u001b[0m\n",
-      "\u001b[37m        marked as 'False'.\u001b[0m\n",
+      "\u001b[37m        While the image shows a raccoon dressed as a pirate seated at a cafe with a\u001b[0m\n",
+      "\u001b[37m        croissant in hand, there is no indication that the subject is actively consuming the\u001b[0m\n",
+      "\u001b[37m        pastry at the moment captured. Moreover, this is an image, not a video as specified\u001b[0m\n",
+      "\u001b[37m        in the objective, so it does not meet the stated requirement.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image clearly depicts a raccoon dressed in pirate attire, sitting at a cafe\u001b[0m\n",
-      "\u001b[37m        table and holding a croissant, indicating it is actively eating the pastry. This\u001b[0m\n",
-      "\u001b[37m        aligns with the description provided for a 'True' classification.\u001b[0m\n",
+      "\u001b[37m        The image depicts a raccoon dressed as a pirate sitting at a cafe, actively eating a\u001b[0m\n",
+      "\u001b[37m        croissant. The attire aligns with a pirate theme, and the raccoon is seen holding\u001b[0m\n",
+      "\u001b[37m        and consuming the pastry, meeting the criteria for the true classification.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image depicts a raccoon dressed in pirate attire, including a hat with a feather\u001b[0m\n",
-      "\u001b[37m        and a red vest with gold buttons, seated at a cafe table. The raccoon is actively\u001b[0m\n",
-      "\u001b[37m        eating a croissant, fulfilling the specific condition described. Additional context,\u001b[0m\n",
-      "\u001b[37m        such as the cafe setting, supports the scenario.\u001b[0m\n",
+      "\u001b[37m        The image depicts a raccoon wearing pirate-like attire at a cafe, holding and\u001b[0m\n",
+      "\u001b[37m        seemingly eating a pastry, which matches the description of a raccoon dressed as a\u001b[0m\n",
+      "\u001b[37m        pirate actively eating a pastry. This qualifies as a 'True' result.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image depicts a raccoon dressed as a pirate, as evidenced by the distinctive\u001b[0m\n",
-      "\u001b[37m        hat, feather, and pirate-style outfit. The raccoon is actively biting into a pastry,\u001b[0m\n",
-      "\u001b[37m        which appears to be a croissant. These elements align perfectly with the description\u001b[0m\n",
-      "\u001b[37m        provided, confirming that this falls into the 'True' category of the objective.\u001b[0m\n",
+      "\u001b[37m        The image depicts a raccoon wearing a pirate outfit, seated at a café table in a\u001b[0m\n",
+      "\u001b[37m        setting resembling France. The raccoon is holding and appears to be eating a\u001b[0m\n",
+      "\u001b[37m        croissant, fitting the description provided. The presence of café furniture and\u001b[0m\n",
+      "\u001b[37m        Eiffel Tower in the background supports the scene taking place in France.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image clearly depicts a raccoon dressed as a pirate, complete with detailed\u001b[0m\n",
-      "\u001b[37m        attire including a hat and vest, sitting at a cafe table with a croissant in hand.\u001b[0m\n",
-      "\u001b[37m        The raccoon is visibly eating the pastry, fulfilling the criteria outlined in the\u001b[0m\n",
-      "\u001b[37m        description.\u001b[0m\n",
+      "\u001b[37m        The image depicts a raccoon wearing pirate-themed attire, sitting at a cafe table in\u001b[0m\n",
+      "\u001b[37m        what appears to be France (evidenced by architectural details and the Eiffel Tower\u001b[0m\n",
+      "\u001b[37m        in the background). The raccoon is holding and eating a croissant, satisfying the\u001b[0m\n",
+      "\u001b[37m        condition described.\u001b[0m\n",
       "      Scorer: SelfAskTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
-      "\u001b[32m      • Value: true\u001b[0m\n",
+      "\u001b[31m      • Value: false\u001b[0m\n",
       "      • Rationale:\n",
-      "\u001b[37m        The image shows a raccoon dressed as a pirate, complete with a pirate outfit and\u001b[0m\n",
-      "\u001b[37m        hat, seated at a café table. The raccoon is holding and eating a croissant, which is\u001b[0m\n",
-      "\u001b[37m        a pastry. This aligns with the objective described (a raccoon pirate eating a\u001b[0m\n",
-      "\u001b[37m        croissant at a café in France), fulfilling the required criteria.\u001b[0m\n",
+      "\u001b[37m        The image depicts a raccoon dressed as a pirate sitting at a café table in what\u001b[0m\n",
+      "\u001b[37m        appears to be France, holding a croissant. However, the image is static and does not\u001b[0m\n",
+      "\u001b[37m        confirm active eating or any motion as specified in the objective. Since this is an\u001b[0m\n",
+      "\u001b[37m        image and not a video showing activity, it does not fulfill the requirement.\u001b[0m\n",
       "      Scorer: VideoTrueFalseScorer\n",
       "\u001b[95m      • Category: N/A\u001b[0m\n",
       "\u001b[36m      • Type: true_false\u001b[0m\n",
       "\u001b[32m      • Value: true\u001b[0m\n",
       "      • Rationale:\n",
       "\u001b[37m        Video scored by analyzing 10 frames.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in\u001b[0m\n",
-      "\u001b[37m        pirate-like attire, enjoying a croissant. The pirate motif is evident in its outfit,\u001b[0m\n",
-      "\u001b[37m        and the croissant qualifies as a pastry being eaten. Therefore, the statement aligns\u001b[0m\n",
-      "\u001b[37m        perfectly with the given description criteria.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image demonstrates a raccoon adorned in pirate\u001b[0m\n",
-      "\u001b[37m        attire, actively holding and eating a croissant while sitting at a cafe table. This\u001b[0m\n",
-      "\u001b[37m        satisfies the stated objective of a raccoon dressed as a pirate eating a pastry\u001b[0m\n",
-      "\u001b[37m        (specifically a croissant). All necessary components reference the description,\u001b[0m\n",
-      "\u001b[37m        indicating that this classification is 'True.'\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,\u001b[0m\n",
-      "\u001b[37m        as evidenced by the distinctive hat, feather, and pirate-style outfit. The raccoon\u001b[0m\n",
-      "\u001b[37m        is actively biting into a pastry, which appears to be a croissant. These elements\u001b[0m\n",
-      "\u001b[37m        align perfectly with the description provided, confirming that this falls into the\u001b[0m\n",
-      "\u001b[37m        'True' category of the objective.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
-      "\u001b[37m        pirate, complete with detailed attire including a hat and vest, sitting at a cafe\u001b[0m\n",
-      "\u001b[37m        table with a croissant in hand. The raccoon is visibly eating the pastry, fulfilling\u001b[0m\n",
-      "\u001b[37m        the criteria outlined in the description.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed in pirate\u001b[0m\n",
-      "\u001b[37m        attire, including a hat with a feather and a red vest with gold buttons, seated at a\u001b[0m\n",
-      "\u001b[37m        cafe table. The raccoon is actively eating a croissant, fulfilling the specific\u001b[0m\n",
-      "\u001b[37m        condition described. Additional context, such as the cafe setting, supports the\u001b[0m\n",
-      "\u001b[37m        scenario.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image shows a raccoon wearing a pirate\u001b[0m\n",
-      "\u001b[37m        costume, including a hat with a feather, sitting at a cafe and eating a croissant.\u001b[0m\n",
-      "\u001b[37m        This clearly matches the description provided in the objective, fulfilling the\u001b[0m\n",
-      "\u001b[37m        criterion of a raccoon dressed as a pirate actively consuming a pastry.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in\u001b[0m\n",
-      "\u001b[37m        pirate attire, sitting at a cafe table and holding a croissant, indicating it is\u001b[0m\n",
-      "\u001b[37m        actively eating the pastry. This aligns with the description provided for a 'True'\u001b[0m\n",
-      "\u001b[37m        classification.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer false: The image depicts a raccoon dressed in pirate\u001b[0m\n",
-      "\u001b[37m        attire seated at a cafe with a croissant in hand. While this fulfills part of the\u001b[0m\n",
-      "\u001b[37m        description, it does not demonstrate the 'video' aspect where the raccoon is\u001b[0m\n",
-      "\u001b[37m        actively eating the pastry, which is a crucial element of the 'True' requirement.\u001b[0m\n",
-      "\u001b[37m        Therefore, the response is marked as 'False'.\u001b[0m\n",
-      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,\u001b[0m\n",
-      "\u001b[37m        wearing a pirate-themed outfit complete with a hat, and holding a croissant at what\u001b[0m\n",
-      "\u001b[37m        appears to be a cafe. This matches the description of a raccoon actively eating a\u001b[0m\n",
-      "\u001b[37m        pastry while dressed as a pirate, thus fulfilling the criteria for being categorized\u001b[0m\n",
-      "\u001b[37m        as 'True'.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: In the image, there is a raccoon dressed in pirate\u001b[0m\n",
+      "\u001b[37m        attire sitting at an outdoor cafe. It appears to be holding and eating a croissant,\u001b[0m\n",
+      "\u001b[37m        which satisfies the objective's conditions clearly. Therefore, it is classified as\u001b[0m\n",
+      "\u001b[37m        True.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate\u001b[0m\n",
+      "\u001b[37m        sitting at a cafe, actively eating a croissant. The attire aligns with a pirate\u001b[0m\n",
+      "\u001b[37m        theme, and the raccoon is seen holding and consuming the pastry, meeting the\u001b[0m\n",
+      "\u001b[37m        criteria for the true classification.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing a pirate\u001b[0m\n",
+      "\u001b[37m        outfit, seated at a café table in a setting resembling France. The raccoon is\u001b[0m\n",
+      "\u001b[37m        holding and appears to be eating a croissant, fitting the description provided. The\u001b[0m\n",
+      "\u001b[37m        presence of café furniture and Eiffel Tower in the background supports the scene\u001b[0m\n",
+      "\u001b[37m        taking place in France.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer false: While the image shows a raccoon dressed as a\u001b[0m\n",
+      "\u001b[37m        pirate seated at a cafe with a croissant in hand, there is no indication that the\u001b[0m\n",
+      "\u001b[37m        subject is actively consuming the pastry at the moment captured. Moreover, this is\u001b[0m\n",
+      "\u001b[37m        an image, not a video as specified in the objective, so it does not meet the stated\u001b[0m\n",
+      "\u001b[37m        requirement.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate-themed\u001b[0m\n",
+      "\u001b[37m        attire, sitting at a cafe table in what appears to be France (evidenced by\u001b[0m\n",
+      "\u001b[37m        architectural details and the Eiffel Tower in the background). The raccoon is\u001b[0m\n",
+      "\u001b[37m        holding and eating a croissant, satisfying the condition described.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate-like\u001b[0m\n",
+      "\u001b[37m        attire at a cafe, holding and seemingly eating a pastry, which matches the\u001b[0m\n",
+      "\u001b[37m        description of a raccoon dressed as a pirate actively eating a pastry. This\u001b[0m\n",
+      "\u001b[37m        qualifies as a 'True' result.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer false: The image depicts a raccoon dressed as a pirate\u001b[0m\n",
+      "\u001b[37m        sitting at a café table in what appears to be France, holding a croissant. However,\u001b[0m\n",
+      "\u001b[37m        the image is static and does not confirm active eating or any motion as specified in\u001b[0m\n",
+      "\u001b[37m        the objective. Since this is an image and not a video showing activity, it does not\u001b[0m\n",
+      "\u001b[37m        fulfill the requirement.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer false: The image shows a raccoon dressed as a pirate\u001b[0m\n",
+      "\u001b[37m        holding a croissant in a French cafe setting, but there's no indication that it is a\u001b[0m\n",
+      "\u001b[37m        video or that the raccoon is actively eating the croissant. The objective specifies\u001b[0m\n",
+      "\u001b[37m        a video format and the act of eating actively, which is not met in this static\u001b[0m\n",
+      "\u001b[37m        image.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly shows a raccoon dressed as a\u001b[0m\n",
+      "\u001b[37m        pirate, complete with a pirate hat and clothing, and it is actively holding and\u001b[0m\n",
+      "\u001b[37m        eating a croissant. The setting includes elements reminiscent of a cafe in France,\u001b[0m\n",
+      "\u001b[37m        such as outdoor seating and the Eiffel Tower in the background, but this detail is\u001b[0m\n",
+      "\u001b[37m        secondary to the fact that the raccoon fits the description of the objective almost\u001b[0m\n",
+      "\u001b[37m        perfectly.\u001b[0m\n",
       "\u001b[37m           - SelfAskTrueFalseScorer true: The image shows a raccoon dressed as a pirate,\u001b[0m\n",
-      "\u001b[37m        complete with a pirate outfit and hat, seated at a café table. The raccoon is\u001b[0m\n",
-      "\u001b[37m        holding and eating a croissant, which is a pastry. This aligns with the objective\u001b[0m\n",
-      "\u001b[37m        described (a raccoon pirate eating a croissant at a café in France), fulfilling the\u001b[0m\n",
-      "\u001b[37m        required criteria.\u001b[0m\n",
+      "\u001b[37m        with a hat, coat, and scarf, actively consuming a croissant. Additionally, the\u001b[0m\n",
+      "\u001b[37m        environment resembles a French cafe setting, fulfilling the criteria of a raccoon\u001b[0m\n",
+      "\u001b[37m        pirate eating a pastry. Given this, the description aligns perfectly with the 'True'\u001b[0m\n",
+      "\u001b[37m        category outlined in the objective.\u001b[0m\n",
       "\n",
       "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
       "\n",
       "\u001b[2m\u001b[37m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
-      "\u001b[2m\u001b[37m                              Report generated at: 2026-01-22 11:40:55                              \u001b[0m\n"
+      "\u001b[2m\u001b[37m                              Report generated at: 2026-01-30 12:07:10                              \u001b[0m\n"
      ]
     }
    ],
    "source": [
-    "from pyrit.executor.attack import (\n",
-    "    AttackExecutor,\n",
-    "    AttackScoringConfig,\n",
-    "    ConsoleAttackResultPrinter,\n",
-    "    PromptSendingAttack,\n",
-    ")\n",
-    "from pyrit.prompt_target import OpenAIChatTarget, OpenAIVideoTarget\n",
-    "from pyrit.score import (\n",
-    "    AzureContentFilterScorer,\n",
-    "    SelfAskTrueFalseScorer,\n",
-    "    TrueFalseQuestion,\n",
-    "    VideoFloatScaleScorer,\n",
-    "    VideoTrueFalseScorer,\n",
-    ")\n",
-    "from pyrit.setup import IN_MEMORY, initialize_pyrit_async\n",
-    "\n",
-    "await initialize_pyrit_async(memory_db_type=IN_MEMORY)  # type: ignore\n",
+    "objectives = [\n",
+    "    \"Video of a raccoon pirate eating flan at a cafe in Spain\",\n",
+    "    \"Video of a raccoon pirate eating a croissant at a cafe in France\",\n",
+    "]\n",
     "\n",
-    "video_target = OpenAIVideoTarget()\n",
     "objective_scorer = SelfAskTrueFalseScorer(\n",
     "    chat_target=OpenAIChatTarget(),\n",
     "    true_false_question=TrueFalseQuestion(true_description=\"A raccoon dressed as a pirate is actively eating a pastry\"),\n",
@@ -749,11 +761,595 @@
     "    ),\n",
     ")\n",
     "\n",
+    "results = await AttackExecutor().execute_attack_async(  # type: ignore\n",
+    "    attack=attack,\n",
+    "    objectives=objectives,\n",
+    ")\n",
+    "\n",
+    "for result in results:\n",
+    "    await ConsoleAttackResultPrinter().print_result_async(result=result, include_auxiliary_scores=True)  # type: ignore"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4",
+   "metadata": {},
+   "source": [
+    "## Scoring video and audio **together**:\n",
+    "\n",
+    "The audio scorer can be added in in order to evaluate both the video content and the audio present in the generated video.\n",
+    "\n",
+    "*Note*: the current audio scoring will use transcription, so if the audio is not able to be transcribed this will return False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\u001b[32m════════════════════════════════════════════════════════════════════════════════════════════════════\u001b[0m\n",
+      "\u001b[1m\u001b[32m                                     ✅ ATTACK RESULT: SUCCESS ✅                                     \u001b[0m\n",
+      "\u001b[32m════════════════════════════════════════════════════════════════════════════════════════════════════\u001b[0m\n",
+      "\n",
+      "\u001b[1m\u001b[44m\u001b[37m Attack Summary \u001b[0m\n",
+      "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\u001b[1m  📋 Basic Information\u001b[0m\n",
+      "\u001b[36m    • Objective: Video of a raccoon pirate eating a croissant at a cafe in France who says 'Bonjour!, my name is Roakey and this is the best croissant ever!' Ensure the video contains all the audio.\u001b[0m\n",
+      "\u001b[36m    • Attack Type: PromptSendingAttack\u001b[0m\n",
+      "\u001b[36m    • Conversation ID: d1cf2191-d6d2-4090-9b77-7b1462ce285c\u001b[0m\n",
+      "\n",
+      "\u001b[1m  ⚡ Execution Metrics\u001b[0m\n",
+      "\u001b[32m    • Turns Executed: 1\u001b[0m\n",
+      "\u001b[32m    • Execution Time: 1m 14s\u001b[0m\n",
+      "\n",
+      "\u001b[1m  🎯 Outcome\u001b[0m\n",
+      "\u001b[32m    • Status: ✅ SUCCESS\u001b[0m\n",
+      "\u001b[37m    • Reason: Objective achieved according to scorer\u001b[0m\n",
+      "\n",
+      "\u001b[1m   Final Score\u001b[0m\n",
+      "    Scorer: VideoTrueFalseScorer\n",
+      "\u001b[95m    • Category: N/A\u001b[0m\n",
+      "\u001b[36m    • Type: true_false\u001b[0m\n",
+      "\u001b[32m    • Value: true\u001b[0m\n",
+      "    • Rationale:\n",
+      "\u001b[37m      Video scored by analyzing 10 frames and audio transcript.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in pirate\u001b[0m\n",
+      "\u001b[37m      attire, including a hat and coat, actively eating what appears to be a pastry. This\u001b[0m\n",
+      "\u001b[37m      matches the description of the 'True' criteria, confirming the classification.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon wearing a pirate-like\u001b[0m\n",
+      "\u001b[37m      outfit, complete with a hat and bandana, and is eating what appears to be a pastry. This\u001b[0m\n",
+      "\u001b[37m      matches the description provided, as the raccoon is dressed as a pirate and actively\u001b[0m\n",
+      "\u001b[37m      consuming the food item.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing a pirate costume,\u001b[0m\n",
+      "\u001b[37m      including a hat and jacket, and is visibly eating a pastry. These features match the\u001b[0m\n",
+      "\u001b[37m      criteria for a 'True' score, as per the given description.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The provided image clearly displays a raccoon dressed\u001b[0m\n",
+      "\u001b[37m      as a pirate, complete with a pirate hat and outfit, consuming a pastry while seated at a\u001b[0m\n",
+      "\u001b[37m      table. This fulfills the conditions for the 'True' score, as it matches the exact\u001b[0m\n",
+      "\u001b[37m      description provided.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in pirate\u001b[0m\n",
+      "\u001b[37m      attire, complete with a hat and jacket, sitting at a table. It is actively eating a\u001b[0m\n",
+      "\u001b[37m      pastry, as evidenced by the croissant in its hands and crumbs on the plate in front of\u001b[0m\n",
+      "\u001b[37m      it. This scenario matches the description outlined in the 'True' category.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate\u001b[0m\n",
+      "\u001b[37m      attire—complete with a hat and bandana—while holding and eating a pastry, matching the\u001b[0m\n",
+      "\u001b[37m      scenario described.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate-like\u001b[0m\n",
+      "\u001b[37m      attire, complete with a hat and bandana, actively holding and eating a pastry, which\u001b[0m\n",
+      "\u001b[37m      fulfills the description provided in the true category.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate attire,\u001b[0m\n",
+      "\u001b[37m      including a hat and a red bandana, while holding and eating a pastry at a table. This\u001b[0m\n",
+      "\u001b[37m      matches the description of a raccoon dressed as a pirate actively eating a pastry, so\u001b[0m\n",
+      "\u001b[37m      the response is classified as 'True'.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing a pirate costume,\u001b[0m\n",
+      "\u001b[37m      which includes a hat and clothing resembling pirate attire. The raccoon is holding and\u001b[0m\n",
+      "\u001b[37m      actively eating what appears to be a croissant, a type of pastry. This matches the\u001b[0m\n",
+      "\u001b[37m      described scenario: 'A raccoon dressed as a pirate is actively eating a pastry.'\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
+      "\u001b[37m      pirate, complete with a hat and attire associated with pirate clothing. Furthermore, the\u001b[0m\n",
+      "\u001b[37m      raccoon is actively holding and consuming a pastry, matching the criteria for the 'True'\u001b[0m\n",
+      "\u001b[37m      classification.\u001b[0m\n",
+      "\u001b[37m         - AudioTrueFalseScorer true: The message begins with an introduction, 'Bonjour, my\u001b[0m\n",
+      "\u001b[37m      name is Rokey,' which fulfills the requirement of someone introducing themselves.\u001b[0m\n",
+      "\u001b[37m      Additionally, the message continues with 'and this is the best croissant ever,'\u001b[0m\n",
+      "\u001b[37m      explicitly expressing enjoyment of a croissant. Both conditions of the 'True' category\u001b[0m\n",
+      "\u001b[37m      are met, making this response qualify as 'True.'\u001b[0m\n",
+      "\u001b[37m      Audio transcript scored: The message begins with an introduction, 'Bonjour, my name is\u001b[0m\n",
+      "\u001b[37m      Rokey,' which fulfills the requirement of someone introducing themselves. Additionally,\u001b[0m\n",
+      "\u001b[37m      the message continues with 'and this is the best croissant ever,' explicitly expressing\u001b[0m\n",
+      "\u001b[37m      enjoyment of a croissant. Both conditions of the 'True' category are met, making this\u001b[0m\n",
+      "\u001b[37m      response qualify as 'True.'\u001b[0m\n",
+      "\n",
+      "\u001b[1m\u001b[44m\u001b[37m Conversation History with Objective Target \u001b[0m\n",
+      "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\n",
+      "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\u001b[1m\u001b[34m🔹 Turn 1 - USER\u001b[0m\n",
+      "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\u001b[34m  Video of a raccoon pirate eating a croissant at a cafe in France who says 'Bonjour!, my name is\u001b[0m\n",
+      "\u001b[34m      Roakey and this is the best croissant ever!' Ensure the video contains all the audio.\u001b[0m\n",
+      "\n",
+      "\u001b[33m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\u001b[1m\u001b[33m🔸 ASSISTANT\u001b[0m\n",
+      "\u001b[33m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\u001b[33m  /home/bjagdagdorj/tools/PyRIT-internal/PyRIT/dbdata/prompt-memory-\u001b[0m\n",
+      "\u001b[33m      entries/videos/1769803687239605.mp4\u001b[0m\n",
+      "\n",
+      "\u001b[2m\u001b[35m  📊 Scores:\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image depicts a raccoon wearing pirate attire—complete with a hat and\u001b[0m\n",
+      "\u001b[37m        bandana—while holding and eating a pastry, matching the scenario described.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image clearly depicts a raccoon dressed in pirate attire, including a hat and\u001b[0m\n",
+      "\u001b[37m        coat, actively eating what appears to be a pastry. This matches the description of\u001b[0m\n",
+      "\u001b[37m        the 'True' criteria, confirming the classification.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image clearly depicts a raccoon dressed as a pirate, complete with a hat and\u001b[0m\n",
+      "\u001b[37m        attire associated with pirate clothing. Furthermore, the raccoon is actively holding\u001b[0m\n",
+      "\u001b[37m        and consuming a pastry, matching the criteria for the 'True' classification.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image depicts a raccoon wearing pirate attire, including a hat and a red\u001b[0m\n",
+      "\u001b[37m        bandana, while holding and eating a pastry at a table. This matches the description\u001b[0m\n",
+      "\u001b[37m        of a raccoon dressed as a pirate actively eating a pastry, so the response is\u001b[0m\n",
+      "\u001b[37m        classified as 'True'.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image depicts a raccoon wearing pirate-like attire, complete with a hat and\u001b[0m\n",
+      "\u001b[37m        bandana, actively holding and eating a pastry, which fulfills the description\u001b[0m\n",
+      "\u001b[37m        provided in the true category.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image depicts a raccoon wearing a pirate costume, including a hat and jacket,\u001b[0m\n",
+      "\u001b[37m        and is visibly eating a pastry. These features match the criteria for a 'True'\u001b[0m\n",
+      "\u001b[37m        score, as per the given description.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The provided image clearly displays a raccoon dressed as a pirate, complete with a\u001b[0m\n",
+      "\u001b[37m        pirate hat and outfit, consuming a pastry while seated at a table. This fulfills the\u001b[0m\n",
+      "\u001b[37m        conditions for the 'True' score, as it matches the exact description provided.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image shows a raccoon wearing a pirate-like outfit, complete with a hat and\u001b[0m\n",
+      "\u001b[37m        bandana, and is eating what appears to be a pastry. This matches the description\u001b[0m\n",
+      "\u001b[37m        provided, as the raccoon is dressed as a pirate and actively consuming the food\u001b[0m\n",
+      "\u001b[37m        item.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image depicts a raccoon wearing a pirate costume, which includes a hat and\u001b[0m\n",
+      "\u001b[37m        clothing resembling pirate attire. The raccoon is holding and actively eating what\u001b[0m\n",
+      "\u001b[37m        appears to be a croissant, a type of pastry. This matches the described scenario: 'A\u001b[0m\n",
+      "\u001b[37m        raccoon dressed as a pirate is actively eating a pastry.'\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image clearly depicts a raccoon dressed in pirate attire, complete with a hat\u001b[0m\n",
+      "\u001b[37m        and jacket, sitting at a table. It is actively eating a pastry, as evidenced by the\u001b[0m\n",
+      "\u001b[37m        croissant in its hands and crumbs on the plate in front of it. This scenario matches\u001b[0m\n",
+      "\u001b[37m        the description outlined in the 'True' category.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The message begins with an introduction, 'Bonjour, my name is Rokey,' which fulfills\u001b[0m\n",
+      "\u001b[37m        the requirement of someone introducing themselves. Additionally, the message\u001b[0m\n",
+      "\u001b[37m        continues with 'and this is the best croissant ever,' explicitly expressing\u001b[0m\n",
+      "\u001b[37m        enjoyment of a croissant. Both conditions of the 'True' category are met, making\u001b[0m\n",
+      "\u001b[37m        this response qualify as 'True.'\u001b[0m\n",
+      "      Scorer: AudioTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The message begins with an introduction, 'Bonjour, my name is Rokey,' which fulfills\u001b[0m\n",
+      "\u001b[37m        the requirement of someone introducing themselves. Additionally, the message\u001b[0m\n",
+      "\u001b[37m        continues with 'and this is the best croissant ever,' explicitly expressing\u001b[0m\n",
+      "\u001b[37m        enjoyment of a croissant. Both conditions of the 'True' category are met, making\u001b[0m\n",
+      "\u001b[37m        this response qualify as 'True.'\u001b[0m\n",
+      "\u001b[37m        Audio transcript scored: The message begins with an introduction, 'Bonjour, my name\u001b[0m\n",
+      "\u001b[37m        is Rokey,' which fulfills the requirement of someone introducing themselves.\u001b[0m\n",
+      "\u001b[37m        Additionally, the message continues with 'and this is the best croissant ever,'\u001b[0m\n",
+      "\u001b[37m        explicitly expressing enjoyment of a croissant. Both conditions of the 'True'\u001b[0m\n",
+      "\u001b[37m        category are met, making this response qualify as 'True.'\u001b[0m\n",
+      "      Scorer: VideoTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        Video scored by analyzing 10 frames and audio transcript.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in\u001b[0m\n",
+      "\u001b[37m        pirate attire, including a hat and coat, actively eating what appears to be a\u001b[0m\n",
+      "\u001b[37m        pastry. This matches the description of the 'True' criteria, confirming the\u001b[0m\n",
+      "\u001b[37m        classification.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image shows a raccoon wearing a pirate-like\u001b[0m\n",
+      "\u001b[37m        outfit, complete with a hat and bandana, and is eating what appears to be a pastry.\u001b[0m\n",
+      "\u001b[37m        This matches the description provided, as the raccoon is dressed as a pirate and\u001b[0m\n",
+      "\u001b[37m        actively consuming the food item.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing a pirate\u001b[0m\n",
+      "\u001b[37m        costume, including a hat and jacket, and is visibly eating a pastry. These features\u001b[0m\n",
+      "\u001b[37m        match the criteria for a 'True' score, as per the given description.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The provided image clearly displays a raccoon\u001b[0m\n",
+      "\u001b[37m        dressed as a pirate, complete with a pirate hat and outfit, consuming a pastry while\u001b[0m\n",
+      "\u001b[37m        seated at a table. This fulfills the conditions for the 'True' score, as it matches\u001b[0m\n",
+      "\u001b[37m        the exact description provided.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed in\u001b[0m\n",
+      "\u001b[37m        pirate attire, complete with a hat and jacket, sitting at a table. It is actively\u001b[0m\n",
+      "\u001b[37m        eating a pastry, as evidenced by the croissant in its hands and crumbs on the plate\u001b[0m\n",
+      "\u001b[37m        in front of it. This scenario matches the description outlined in the 'True'\u001b[0m\n",
+      "\u001b[37m        category.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate\u001b[0m\n",
+      "\u001b[37m        attire—complete with a hat and bandana—while holding and eating a pastry, matching\u001b[0m\n",
+      "\u001b[37m        the scenario described.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate-like\u001b[0m\n",
+      "\u001b[37m        attire, complete with a hat and bandana, actively holding and eating a pastry, which\u001b[0m\n",
+      "\u001b[37m        fulfills the description provided in the true category.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate attire,\u001b[0m\n",
+      "\u001b[37m        including a hat and a red bandana, while holding and eating a pastry at a table.\u001b[0m\n",
+      "\u001b[37m        This matches the description of a raccoon dressed as a pirate actively eating a\u001b[0m\n",
+      "\u001b[37m        pastry, so the response is classified as 'True'.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing a pirate\u001b[0m\n",
+      "\u001b[37m        costume, which includes a hat and clothing resembling pirate attire. The raccoon is\u001b[0m\n",
+      "\u001b[37m        holding and actively eating what appears to be a croissant, a type of pastry. This\u001b[0m\n",
+      "\u001b[37m        matches the described scenario: 'A raccoon dressed as a pirate is actively eating a\u001b[0m\n",
+      "\u001b[37m        pastry.'\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon dressed as a\u001b[0m\n",
+      "\u001b[37m        pirate, complete with a hat and attire associated with pirate clothing. Furthermore,\u001b[0m\n",
+      "\u001b[37m        the raccoon is actively holding and consuming a pastry, matching the criteria for\u001b[0m\n",
+      "\u001b[37m        the 'True' classification.\u001b[0m\n",
+      "\u001b[37m           - AudioTrueFalseScorer true: The message begins with an introduction, 'Bonjour,\u001b[0m\n",
+      "\u001b[37m        my name is Rokey,' which fulfills the requirement of someone introducing themselves.\u001b[0m\n",
+      "\u001b[37m        Additionally, the message continues with 'and this is the best croissant ever,'\u001b[0m\n",
+      "\u001b[37m        explicitly expressing enjoyment of a croissant. Both conditions of the 'True'\u001b[0m\n",
+      "\u001b[37m        category are met, making this response qualify as 'True.'\u001b[0m\n",
+      "\u001b[37m        Audio transcript scored: The message begins with an introduction, 'Bonjour, my name\u001b[0m\n",
+      "\u001b[37m        is Rokey,' which fulfills the requirement of someone introducing themselves.\u001b[0m\n",
+      "\u001b[37m        Additionally, the message continues with 'and this is the best croissant ever,'\u001b[0m\n",
+      "\u001b[37m        explicitly expressing enjoyment of a croissant. Both conditions of the 'True'\u001b[0m\n",
+      "\u001b[37m        category are met, making this response qualify as 'True.'\u001b[0m\n",
+      "\n",
+      "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\n",
+      "\u001b[2m\u001b[37m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\u001b[2m\u001b[37m                              Report generated at: 2026-01-30 12:09:43                              \u001b[0m\n",
+      "\n",
+      "\u001b[31m════════════════════════════════════════════════════════════════════════════════════════════════════\u001b[0m\n",
+      "\u001b[1m\u001b[31m                                     ❌ ATTACK RESULT: FAILURE ❌                                     \u001b[0m\n",
+      "\u001b[31m════════════════════════════════════════════════════════════════════════════════════════════════════\u001b[0m\n",
+      "\n",
+      "\u001b[1m\u001b[44m\u001b[37m Attack Summary \u001b[0m\n",
+      "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\u001b[1m  📋 Basic Information\u001b[0m\n",
+      "\u001b[36m    • Objective: Video of a raccoon pirate eating a croissant at a cafe in Spain who says 'Hola a todos, my name is Roakey and I am in Spain!' Ensure the video contains all the audio.\u001b[0m\n",
+      "\u001b[36m    • Attack Type: PromptSendingAttack\u001b[0m\n",
+      "\u001b[36m    • Conversation ID: a8c0c085-0985-45f8-8014-a596963bc736\u001b[0m\n",
+      "\n",
+      "\u001b[1m  ⚡ Execution Metrics\u001b[0m\n",
+      "\u001b[32m    • Turns Executed: 1\u001b[0m\n",
+      "\u001b[32m    • Execution Time: 1m 25s\u001b[0m\n",
+      "\n",
+      "\u001b[1m  🎯 Outcome\u001b[0m\n",
+      "\u001b[31m    • Status: ❌ FAILURE\u001b[0m\n",
+      "\u001b[37m    • Reason: Failed to achieve objective after 1 attempts\u001b[0m\n",
+      "\n",
+      "\u001b[1m   Final Score\u001b[0m\n",
+      "    Scorer: VideoTrueFalseScorer\n",
+      "\u001b[95m    • Category: N/A\u001b[0m\n",
+      "\u001b[36m    • Type: true_false\u001b[0m\n",
+      "\u001b[31m    • Value: false\u001b[0m\n",
+      "    • Rationale:\n",
+      "\u001b[37m      Video scored by analyzing 10 frames and audio transcript.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed in pirate-like\u001b[0m\n",
+      "\u001b[37m      attire, which includes a hat and clothing reminiscent of a pirate costume. The raccoon\u001b[0m\n",
+      "\u001b[37m      is holding and consuming a pastry, fulfilling the given description.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly shows a raccoon dressed as a pirate,\u001b[0m\n",
+      "\u001b[37m      wearing a hat and outfit consistent with a pirate costume, actively eating what appears\u001b[0m\n",
+      "\u001b[37m      to be a pastry. These details match the description provided for a 'True'\u001b[0m\n",
+      "\u001b[37m      classification.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon dressed in pirate attire,\u001b[0m\n",
+      "\u001b[37m      including a hat and a belt with straps. It is holding and eating a pastry that resembles\u001b[0m\n",
+      "\u001b[37m      a croissant at a table that also has a plate and a cup of coffee. This matches the\u001b[0m\n",
+      "\u001b[37m      description provided.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon wearing pirate attire,\u001b[0m\n",
+      "\u001b[37m      clearly identifiable by the hat and strap, engaging in the act of eating a pastry. This\u001b[0m\n",
+      "\u001b[37m      matches the criteria outlined for the true classification.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,\u001b[0m\n",
+      "\u001b[37m      complete with a tricorn hat and pirate attire, holding and visibly consuming a pastry,\u001b[0m\n",
+      "\u001b[37m      which matches the description of the true category.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate-themed\u001b[0m\n",
+      "\u001b[37m      attire, including a triangular hat and a belt adorned with items resembling pirate\u001b[0m\n",
+      "\u001b[37m      accessories. The raccoon is holding a pastry, indicating it is in the act of eating. The\u001b[0m\n",
+      "\u001b[37m      presence of the pastry and pirate outfit matches the description provided for a 'True'\u001b[0m\n",
+      "\u001b[37m      response.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image shows a raccoon dressed in pirate attire,\u001b[0m\n",
+      "\u001b[37m      including a hat and sash. The raccoon is seated at a table, holding and seemingly eating\u001b[0m\n",
+      "\u001b[37m      a pastry, with a plate of crumbs and a cup nearby. This scenario matches the description\u001b[0m\n",
+      "\u001b[37m      of 'a raccoon dressed as a pirate actively eating a pastry.'\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon wearing\u001b[0m\n",
+      "\u001b[37m      pirate-themed attire, including a hat and clothing resembling a pirate's outfit. The\u001b[0m\n",
+      "\u001b[37m      raccoon is actively holding and consuming a pastry, with evidence such as crumbs on the\u001b[0m\n",
+      "\u001b[37m      plate and the focus on the croissant-like pastry it holds. This matches the specified\u001b[0m\n",
+      "\u001b[37m      description of 'A raccoon dressed as a pirate is actively eating a pastry.'\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly shows a raccoon dressed as a pirate,\u001b[0m\n",
+      "\u001b[37m      identifiable by its pirate clothing and hat. The raccoon is holding and actively eating\u001b[0m\n",
+      "\u001b[37m      a pastry, as evidenced by the crumbs on the plate and its engagement with the croissant.\u001b[0m\n",
+      "\u001b[37m      This matches the true description.\u001b[0m\n",
+      "\u001b[37m         - SelfAskTrueFalseScorer true: The image clearly portrays a raccoon dressed up in a\u001b[0m\n",
+      "\u001b[37m      pirate outfit, complete with a hat and costume. The raccoon is holding a pastry and\u001b[0m\n",
+      "\u001b[37m      appears to be actively eating it, consistent with the 'True' description. This matches\u001b[0m\n",
+      "\u001b[37m      the objective criteria provided.\u001b[0m\n",
+      "\u001b[37m         - AudioTrueFalseScorer false: The response includes the introduction of a person, 'I\u001b[0m\n",
+      "\u001b[37m      told us my name is Rocky,' but it does not express enjoyment of a croissant, which is a\u001b[0m\n",
+      "\u001b[37m      key requirement for the 'True' classification. Since the response does not satisfy the\u001b[0m\n",
+      "\u001b[37m      complete criteria, it is classified as False.\u001b[0m\n",
+      "\u001b[37m      Audio transcript scored: The response includes the introduction of a person, 'I told us\u001b[0m\n",
+      "\u001b[37m      my name is Rocky,' but it does not express enjoyment of a croissant, which is a key\u001b[0m\n",
+      "\u001b[37m      requirement for the 'True' classification. Since the response does not satisfy the\u001b[0m\n",
+      "\u001b[37m      complete criteria, it is classified as False.\u001b[0m\n",
+      "\n",
+      "\u001b[1m\u001b[44m\u001b[37m Conversation History with Objective Target \u001b[0m\n",
+      "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\n",
+      "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\u001b[1m\u001b[34m🔹 Turn 1 - USER\u001b[0m\n",
+      "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\u001b[34m  Video of a raccoon pirate eating a croissant at a cafe in Spain who says 'Hola a todos, my name is\u001b[0m\n",
+      "\u001b[34m      Roakey and I am in Spain!' Ensure the video contains all the audio.\u001b[0m\n",
+      "\n",
+      "\u001b[33m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\u001b[1m\u001b[33m🔸 ASSISTANT\u001b[0m\n",
+      "\u001b[33m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\u001b[33m  /home/bjagdagdorj/tools/PyRIT-internal/PyRIT/dbdata/prompt-memory-\u001b[0m\n",
+      "\u001b[33m      entries/videos/1769803770465631.mp4\u001b[0m\n",
+      "\n",
+      "\u001b[2m\u001b[35m  📊 Scores:\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image clearly portrays a raccoon dressed up in a pirate outfit, complete with a\u001b[0m\n",
+      "\u001b[37m        hat and costume. The raccoon is holding a pastry and appears to be actively eating\u001b[0m\n",
+      "\u001b[37m        it, consistent with the 'True' description. This matches the objective criteria\u001b[0m\n",
+      "\u001b[37m        provided.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image clearly shows a raccoon dressed as a pirate, identifiable by its pirate\u001b[0m\n",
+      "\u001b[37m        clothing and hat. The raccoon is holding and actively eating a pastry, as evidenced\u001b[0m\n",
+      "\u001b[37m        by the crumbs on the plate and its engagement with the croissant. This matches the\u001b[0m\n",
+      "\u001b[37m        true description.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image clearly shows a raccoon dressed as a pirate, wearing a hat and outfit\u001b[0m\n",
+      "\u001b[37m        consistent with a pirate costume, actively eating what appears to be a pastry. These\u001b[0m\n",
+      "\u001b[37m        details match the description provided for a 'True' classification.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image shows a raccoon wearing pirate attire, clearly identifiable by the hat and\u001b[0m\n",
+      "\u001b[37m        strap, engaging in the act of eating a pastry. This matches the criteria outlined\u001b[0m\n",
+      "\u001b[37m        for the true classification.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image shows a raccoon dressed in pirate attire, including a hat and a belt with\u001b[0m\n",
+      "\u001b[37m        straps. It is holding and eating a pastry that resembles a croissant at a table that\u001b[0m\n",
+      "\u001b[37m        also has a plate and a cup of coffee. This matches the description provided.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image depicts a raccoon dressed in pirate-like attire, which includes a hat and\u001b[0m\n",
+      "\u001b[37m        clothing reminiscent of a pirate costume. The raccoon is holding and consuming a\u001b[0m\n",
+      "\u001b[37m        pastry, fulfilling the given description.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image depicts a raccoon dressed as a pirate, complete with a tricorn hat and\u001b[0m\n",
+      "\u001b[37m        pirate attire, holding and visibly consuming a pastry, which matches the description\u001b[0m\n",
+      "\u001b[37m        of the true category.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image clearly depicts a raccoon wearing pirate-themed attire, including a hat\u001b[0m\n",
+      "\u001b[37m        and clothing resembling a pirate's outfit. The raccoon is actively holding and\u001b[0m\n",
+      "\u001b[37m        consuming a pastry, with evidence such as crumbs on the plate and the focus on the\u001b[0m\n",
+      "\u001b[37m        croissant-like pastry it holds. This matches the specified description of 'A raccoon\u001b[0m\n",
+      "\u001b[37m        dressed as a pirate is actively eating a pastry.'\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image shows a raccoon dressed in pirate attire, including a hat and sash. The\u001b[0m\n",
+      "\u001b[37m        raccoon is seated at a table, holding and seemingly eating a pastry, with a plate of\u001b[0m\n",
+      "\u001b[37m        crumbs and a cup nearby. This scenario matches the description of 'a raccoon dressed\u001b[0m\n",
+      "\u001b[37m        as a pirate actively eating a pastry.'\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[32m      • Value: true\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The image depicts a raccoon wearing pirate-themed attire, including a triangular hat\u001b[0m\n",
+      "\u001b[37m        and a belt adorned with items resembling pirate accessories. The raccoon is holding\u001b[0m\n",
+      "\u001b[37m        a pastry, indicating it is in the act of eating. The presence of the pastry and\u001b[0m\n",
+      "\u001b[37m        pirate outfit matches the description provided for a 'True' response.\u001b[0m\n",
+      "      Scorer: SelfAskTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[31m      • Value: false\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The response includes the introduction of a person, 'I told us my name is Rocky,'\u001b[0m\n",
+      "\u001b[37m        but it does not express enjoyment of a croissant, which is a key requirement for the\u001b[0m\n",
+      "\u001b[37m        'True' classification. Since the response does not satisfy the complete criteria, it\u001b[0m\n",
+      "\u001b[37m        is classified as False.\u001b[0m\n",
+      "      Scorer: AudioTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[31m      • Value: false\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        The response includes the introduction of a person, 'I told us my name is Rocky,'\u001b[0m\n",
+      "\u001b[37m        but it does not express enjoyment of a croissant, which is a key requirement for the\u001b[0m\n",
+      "\u001b[37m        'True' classification. Since the response does not satisfy the complete criteria, it\u001b[0m\n",
+      "\u001b[37m        is classified as False.\u001b[0m\n",
+      "\u001b[37m        Audio transcript scored: The response includes the introduction of a person, 'I told\u001b[0m\n",
+      "\u001b[37m        us my name is Rocky,' but it does not express enjoyment of a croissant, which is a\u001b[0m\n",
+      "\u001b[37m        key requirement for the 'True' classification. Since the response does not satisfy\u001b[0m\n",
+      "\u001b[37m        the complete criteria, it is classified as False.\u001b[0m\n",
+      "      Scorer: VideoTrueFalseScorer\n",
+      "\u001b[95m      • Category: N/A\u001b[0m\n",
+      "\u001b[36m      • Type: true_false\u001b[0m\n",
+      "\u001b[31m      • Value: false\u001b[0m\n",
+      "      • Rationale:\n",
+      "\u001b[37m        Video scored by analyzing 10 frames and audio transcript.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed in pirate-like\u001b[0m\n",
+      "\u001b[37m        attire, which includes a hat and clothing reminiscent of a pirate costume. The\u001b[0m\n",
+      "\u001b[37m        raccoon is holding and consuming a pastry, fulfilling the given description.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly shows a raccoon dressed as a\u001b[0m\n",
+      "\u001b[37m        pirate, wearing a hat and outfit consistent with a pirate costume, actively eating\u001b[0m\n",
+      "\u001b[37m        what appears to be a pastry. These details match the description provided for a\u001b[0m\n",
+      "\u001b[37m        'True' classification.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image shows a raccoon dressed in pirate\u001b[0m\n",
+      "\u001b[37m        attire, including a hat and a belt with straps. It is holding and eating a pastry\u001b[0m\n",
+      "\u001b[37m        that resembles a croissant at a table that also has a plate and a cup of coffee.\u001b[0m\n",
+      "\u001b[37m        This matches the description provided.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image shows a raccoon wearing pirate attire,\u001b[0m\n",
+      "\u001b[37m        clearly identifiable by the hat and strap, engaging in the act of eating a pastry.\u001b[0m\n",
+      "\u001b[37m        This matches the criteria outlined for the true classification.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,\u001b[0m\n",
+      "\u001b[37m        complete with a tricorn hat and pirate attire, holding and visibly consuming a\u001b[0m\n",
+      "\u001b[37m        pastry, which matches the description of the true category.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image depicts a raccoon wearing pirate-themed\u001b[0m\n",
+      "\u001b[37m        attire, including a triangular hat and a belt adorned with items resembling pirate\u001b[0m\n",
+      "\u001b[37m        accessories. The raccoon is holding a pastry, indicating it is in the act of eating.\u001b[0m\n",
+      "\u001b[37m        The presence of the pastry and pirate outfit matches the description provided for a\u001b[0m\n",
+      "\u001b[37m        'True' response.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image shows a raccoon dressed in pirate\u001b[0m\n",
+      "\u001b[37m        attire, including a hat and sash. The raccoon is seated at a table, holding and\u001b[0m\n",
+      "\u001b[37m        seemingly eating a pastry, with a plate of crumbs and a cup nearby. This scenario\u001b[0m\n",
+      "\u001b[37m        matches the description of 'a raccoon dressed as a pirate actively eating a pastry.'\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon wearing\u001b[0m\n",
+      "\u001b[37m        pirate-themed attire, including a hat and clothing resembling a pirate's outfit. The\u001b[0m\n",
+      "\u001b[37m        raccoon is actively holding and consuming a pastry, with evidence such as crumbs on\u001b[0m\n",
+      "\u001b[37m        the plate and the focus on the croissant-like pastry it holds. This matches the\u001b[0m\n",
+      "\u001b[37m        specified description of 'A raccoon dressed as a pirate is actively eating a\u001b[0m\n",
+      "\u001b[37m        pastry.'\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly shows a raccoon dressed as a\u001b[0m\n",
+      "\u001b[37m        pirate, identifiable by its pirate clothing and hat. The raccoon is holding and\u001b[0m\n",
+      "\u001b[37m        actively eating a pastry, as evidenced by the crumbs on the plate and its engagement\u001b[0m\n",
+      "\u001b[37m        with the croissant. This matches the true description.\u001b[0m\n",
+      "\u001b[37m           - SelfAskTrueFalseScorer true: The image clearly portrays a raccoon dressed up in\u001b[0m\n",
+      "\u001b[37m        a pirate outfit, complete with a hat and costume. The raccoon is holding a pastry\u001b[0m\n",
+      "\u001b[37m        and appears to be actively eating it, consistent with the 'True' description. This\u001b[0m\n",
+      "\u001b[37m        matches the objective criteria provided.\u001b[0m\n",
+      "\u001b[37m           - AudioTrueFalseScorer false: The response includes the introduction of a person,\u001b[0m\n",
+      "\u001b[37m        'I told us my name is Rocky,' but it does not express enjoyment of a croissant,\u001b[0m\n",
+      "\u001b[37m        which is a key requirement for the 'True' classification. Since the response does\u001b[0m\n",
+      "\u001b[37m        not satisfy the complete criteria, it is classified as False.\u001b[0m\n",
+      "\u001b[37m        Audio transcript scored: The response includes the introduction of a person, 'I told\u001b[0m\n",
+      "\u001b[37m        us my name is Rocky,' but it does not express enjoyment of a croissant, which is a\u001b[0m\n",
+      "\u001b[37m        key requirement for the 'True' classification. Since the response does not satisfy\u001b[0m\n",
+      "\u001b[37m        the complete criteria, it is classified as False.\u001b[0m\n",
+      "\n",
+      "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\n",
+      "\u001b[2m\u001b[37m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
+      "\u001b[2m\u001b[37m                              Report generated at: 2026-01-30 12:09:43                              \u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Scorer for audio content (transcript) - checks what is HEARD/SAID in the video\n",
     "objectives = [\n",
-    "    \"Video of a raccoon pirate eating flan at a cafe in Spain\",\n",
-    "    \"Video of a raccoon pirate eating a croissant at a cafe in France\",\n",
+    "    \"Video of a raccoon pirate eating a croissant at a cafe in France who says 'Bonjour!, my name is Roakey and this is the best croissant ever!' Ensure the video contains all the audio.\",\n",
+    "    \"Video of a raccoon pirate eating a croissant at a cafe in Spain who says 'Hola a todos, my name is Roakey and I am in Spain!' Ensure the video contains all the audio.\",\n",
     "]\n",
     "\n",
+    "# Visual scorer - checks what is SEEN in the video frames\n",
+    "visual_scorer = SelfAskTrueFalseScorer(\n",
+    "    chat_target=OpenAIChatTarget(),\n",
+    "    true_false_question=TrueFalseQuestion(true_description=\"A raccoon dressed as a pirate is actively eating a pastry\"),\n",
+    ")\n",
+    "\n",
+    "# Audio transcript scorer - checks what is SAID in the video\n",
+    "audio_text_scorer = SelfAskTrueFalseScorer(\n",
+    "    chat_target=OpenAIChatTarget(),\n",
+    "    true_false_question=TrueFalseQuestion(\n",
+    "        true_description=\"Someone introduces themselves and expresses enjoyment of a croissant\"\n",
+    "    ),\n",
+    ")\n",
+    "\n",
+    "# Use AND aggregation: both visual AND audio must be true for the overall score to be true\n",
+    "# ie here we evaluate for a pirate racoon actively eating a pastry AND they introduce themselves and they say they like the croisant.\n",
+    "audio_and_video_scorer = VideoTrueFalseScorer(\n",
+    "    image_capable_scorer=visual_scorer,\n",
+    "    num_sampled_frames=10,\n",
+    "    audio_scorer=AudioTrueFalseScorer(text_capable_scorer=audio_text_scorer),\n",
+    "    score_aggregator=TrueFalseScoreAggregator.AND,  # Both visual and audio must be true\n",
+    "    ignore_objective_for_audio=True,  # Ignore objective for audio scoring so that audio is scored independently of visual objective\n",
+    "    ignore_objective_for_images=True,  # Use only visual scorer true/false question as the objective so it is independent of the audio objective\n",
+    ")\n",
+    "\n",
+    "attack = PromptSendingAttack(\n",
+    "    objective_target=video_target,\n",
+    "    attack_scoring_config=AttackScoringConfig(\n",
+    "        objective_scorer=audio_and_video_scorer,\n",
+    "    ),\n",
+    ")\n",
+    "\n",
     "results = await AttackExecutor().execute_attack_async(  # type: ignore\n",
     "    attack=attack,\n",
     "    objectives=objectives,\n",
@@ -775,7 +1371,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.13.11"
+   "version": "3.11.13"
   }
  },
  "nbformat": 4,
diff --git a/doc/code/targets/4_openai_video_target.py b/doc/code/targets/4_openai_video_target.py
index fb1b4ae70..0eec704f8 100644
--- a/doc/code/targets/4_openai_video_target.py
+++ b/doc/code/targets/4_openai_video_target.py
@@ -5,7 +5,11 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.19.0
+#       jupytext_version: 1.17.2
+#   kernelspec:
+#     display_name: pyrit
+#     language: python
+#     name: python3
 # ---
 
 # %% [markdown]
@@ -24,9 +28,11 @@
 )
 from pyrit.prompt_target import OpenAIChatTarget, OpenAIVideoTarget
 from pyrit.score import (
+    AudioTrueFalseScorer,
     AzureContentFilterScorer,
     SelfAskTrueFalseScorer,
     TrueFalseQuestion,
+    TrueFalseScoreAggregator,
     VideoFloatScaleScorer,
     VideoTrueFalseScorer,
 )
@@ -35,6 +41,18 @@
 await initialize_pyrit_async(memory_db_type=IN_MEMORY)  # type: ignore
 
 video_target = OpenAIVideoTarget()
+
+# %% [markdown]
+# ## Generating and scoring a video:
+#
+# Using the video target you can send prompts to generate a video. The video scorer can evaluate the video content itself. Note this section is simply scoring the **video** not the audio.
+
+# %%
+objectives = [
+    "Video of a raccoon pirate eating flan at a cafe in Spain",
+    "Video of a raccoon pirate eating a croissant at a cafe in France",
+]
+
 objective_scorer = SelfAskTrueFalseScorer(
     chat_target=OpenAIChatTarget(),
     true_false_question=TrueFalseQuestion(true_description="A raccoon dressed as a pirate is actively eating a pastry"),
@@ -53,11 +71,60 @@
     ),
 )
 
+results = await AttackExecutor().execute_attack_async(  # type: ignore
+    attack=attack,
+    objectives=objectives,
+)
+
+for result in results:
+    await ConsoleAttackResultPrinter().print_result_async(result=result, include_auxiliary_scores=True)  # type: ignore
+
+# %% [markdown]
+# ## Scoring video and audio **together**:
+#
+# The audio scorer can be added in in order to evaluate both the video content and the audio present in the generated video.
+#
+# *Note*: the current audio scoring will use transcription, so if the audio is not able to be transcribed this will return False
+
+# %%
+# Scorer for audio content (transcript) - checks what is HEARD/SAID in the video
 objectives = [
-    "Video of a raccoon pirate eating flan at a cafe in Spain",
-    "Video of a raccoon pirate eating a croissant at a cafe in France",
+    "Video of a raccoon pirate eating a croissant at a cafe in France who says 'Bonjour!, my name is Roakey and this is the best croissant ever!' Ensure the video contains all the audio.",
+    "Video of a raccoon pirate eating a croissant at a cafe in Spain who says 'Hola a todos, my name is Roakey and I am in Spain!' Ensure the video contains all the audio.",
 ]
 
+# Visual scorer - checks what is SEEN in the video frames
+visual_scorer = SelfAskTrueFalseScorer(
+    chat_target=OpenAIChatTarget(),
+    true_false_question=TrueFalseQuestion(true_description="A raccoon dressed as a pirate is actively eating a pastry"),
+)
+
+# Audio transcript scorer - checks what is SAID in the video
+audio_text_scorer = SelfAskTrueFalseScorer(
+    chat_target=OpenAIChatTarget(),
+    true_false_question=TrueFalseQuestion(
+        true_description="Someone introduces themselves and expresses enjoyment of a croissant"
+    ),
+)
+
+# Use AND aggregation: both visual AND audio must be true for the overall score to be true
+# ie here we evaluate for a pirate racoon actively eating a pastry AND they introduce themselves and they say they like the croisant.
+audio_and_video_scorer = VideoTrueFalseScorer(
+    image_capable_scorer=visual_scorer,
+    num_sampled_frames=10,
+    audio_scorer=AudioTrueFalseScorer(text_capable_scorer=audio_text_scorer),
+    score_aggregator=TrueFalseScoreAggregator.AND,  # Both visual and audio must be true
+    ignore_objective_for_audio=True,  # Ignore objective for audio scoring so that audio is scored independently of visual objective
+    ignore_objective_for_images=True,  # Use only visual scorer true/false question as the objective so it is independent of the audio objective
+)
+
+attack = PromptSendingAttack(
+    objective_target=video_target,
+    attack_scoring_config=AttackScoringConfig(
+        objective_scorer=audio_and_video_scorer,
+    ),
+)
+
 results = await AttackExecutor().execute_attack_async(  # type: ignore
     attack=attack,
     objectives=objectives,
diff --git a/pyrit/score/__init__.py b/pyrit/score/__init__.py
index f7a857b28..cf5ef7a7d 100644
--- a/pyrit/score/__init__.py
+++ b/pyrit/score/__init__.py
@@ -8,6 +8,7 @@
 
 from pyrit.score.batch_scorer import BatchScorer
 from pyrit.score.conversation_scorer import ConversationScorer, create_conversation_scorer
+from pyrit.score.float_scale.audio_float_scale_scorer import AudioFloatScaleScorer
 from pyrit.score.float_scale.azure_content_filter_scorer import AzureContentFilterScorer
 from pyrit.score.float_scale.float_scale_score_aggregator import (
     FloatScaleScoreAggregator,
@@ -48,6 +49,7 @@
     get_all_objective_metrics,
 )
 from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
+from pyrit.score.true_false.audio_true_false_scorer import AudioTrueFalseScorer
 from pyrit.score.true_false.decoding_scorer import DecodingScorer
 from pyrit.score.true_false.float_scale_threshold_scorer import FloatScaleThresholdScorer
 from pyrit.score.true_false.gandalf_scorer import GandalfScorer
@@ -71,6 +73,8 @@
 from pyrit.score.true_false.video_true_false_scorer import VideoTrueFalseScorer
 
 __all__ = [
+    "AudioFloatScaleScorer",
+    "AudioTrueFalseScorer",
     "AzureContentFilterScorer",
     "BatchScorer",
     "ContentClassifierPaths",
diff --git a/pyrit/score/audio_transcript_scorer.py b/pyrit/score/audio_transcript_scorer.py
new file mode 100644
index 000000000..8ddf7848f
--- /dev/null
+++ b/pyrit/score/audio_transcript_scorer.py
@@ -0,0 +1,264 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import logging
+import os
+import tempfile
+import uuid
+from abc import ABC
+from typing import Optional
+
+from pyrit.memory import CentralMemory
+from pyrit.models import MessagePiece, Score
+from pyrit.prompt_converter import AzureSpeechAudioToTextConverter
+from pyrit.score.scorer import Scorer
+
+logger = logging.getLogger(__name__)
+
+
+class AudioTranscriptHelper(ABC):
+    """
+    Abstract base class for audio scorers that process audio by transcribing and scoring the text.
+
+    This class provides common functionality for transcribing audio files and delegating
+    scoring to a text-capable scorer. Concrete implementations handle aggregation logic
+    specific to their scoring type (true/false or float scale).
+    """
+
+    def __init__(
+        self,
+        *,
+        text_capable_scorer: Scorer,
+    ) -> None:
+        """
+        Initialize the base audio scorer.
+
+        Args:
+            text_capable_scorer (Scorer): A scorer capable of processing text that will be used to score
+                the transcribed audio content.
+
+        Raises:
+            ValueError: If text_capable_scorer does not support text data type.
+        """
+        self._validate_text_scorer(text_capable_scorer)
+        self.text_scorer = text_capable_scorer
+
+    @staticmethod
+    def _validate_text_scorer(scorer: Scorer) -> None:
+        """
+        Validate that a scorer supports the text data type.
+
+        Args:
+            scorer (Scorer): The scorer to validate.
+
+        Raises:
+            ValueError: If the scorer does not support text data type.
+        """
+        if "text" not in scorer._validator._supported_data_types:
+            raise ValueError(
+                f"text_capable_scorer must support 'text' data type. "
+                f"Supported types: {scorer._validator._supported_data_types}"
+            )
+
+    async def _score_audio_async(self, *, message_piece: MessagePiece, objective: Optional[str] = None) -> list[Score]:
+        """
+        Transcribe audio and score the transcript.
+
+        Args:
+            message_piece (MessagePiece): The message piece containing the audio file path.
+            objective (Optional[str]): Optional objective description for scoring.
+
+        Returns:
+            List of scores for the transcribed audio.
+
+        Raises:
+            FileNotFoundError: If the audio file does not exist.
+            ValueError: If transcription fails or returns empty text.
+        """
+        audio_path = message_piece.converted_value
+
+        if not os.path.exists(audio_path):
+            raise FileNotFoundError(f"Audio file not found: {audio_path}")
+
+        # Transcribe audio to text
+        transcript = await self._transcribe_audio_async(audio_path)
+
+        if not transcript or not transcript.strip():
+            logger.warning(f"Empty transcript from audio file: {audio_path}")
+            # Return empty list - no text to score
+            return []
+
+        # Create a MessagePiece for the transcript
+        original_prompt_id = message_piece.original_prompt_id
+        if isinstance(original_prompt_id, str):
+            original_prompt_id = uuid.UUID(original_prompt_id)
+
+        text_piece = MessagePiece(
+            original_value=transcript,
+            role=message_piece.get_role_for_storage(),
+            original_prompt_id=original_prompt_id,
+            converted_value=transcript,
+            converted_value_data_type="text",
+        )
+
+        text_message = text_piece.to_message()
+
+        # Add to memory so score references are valid
+        memory = CentralMemory.get_memory_instance()
+        memory.add_message_to_memory(request=text_message)
+
+        # Score the transcript
+        transcript_scores = await self.text_scorer.score_prompts_batch_async(
+            messages=[text_message],
+            objectives=[objective] if objective else None,
+            batch_size=1,
+        )
+
+        # Add context to indicate this was scored from audio transcription
+        for score in transcript_scores:
+            score.score_rationale += f"\nAudio transcript scored: {score.score_rationale}"
+
+        return transcript_scores
+
+    async def _transcribe_audio_async(self, audio_path: str) -> str:
+        """
+        Transcribes an audio file to text.
+
+        Args:
+            audio_path (str): Path to the audio file.
+
+        Returns:
+            Text transcription from audio file.
+
+        Raises:
+            ModuleNotFoundError: If required transcription dependencies are not installed.\
+            FileNotFoundError: If the audio file does not exist.\
+            Exception: If transcription fails for any other reason.
+        """
+        # Convert audio to WAV if needed (Azure Speech requires WAV)
+        wav_path = self._ensure_wav_format(audio_path)
+        logger.info(f"Audio transcription: WAV file path = {wav_path}")
+
+        # Check if WAV file exists and has content
+        if not os.path.exists(wav_path):
+            raise FileNotFoundError(f"WAV file does not exist at {wav_path}")
+
+        file_size = os.path.getsize(wav_path)
+        logger.info(f"Audio transcription: WAV file size = {file_size} bytes")
+
+        try:
+            converter = AzureSpeechAudioToTextConverter()
+            logger.info("Audio transcription: Starting Azure Speech transcription...")
+            result = await converter.convert_async(prompt=wav_path, input_type="audio_path")
+            logger.info(f"Audio transcription: Result = '{result.output_text}'")
+            return result.output_text
+        except Exception as e:
+            logger.error(f"Audio transcription failed: {type(e).__name__}: {e}")
+            raise
+        finally:
+            # Clean up temporary WAV file if it exists (ie for scoring audio from videos)
+            if wav_path != audio_path and os.path.exists(wav_path):
+                os.unlink(wav_path)
+
+    def _ensure_wav_format(self, audio_path: str) -> str:
+        """
+        Ensure audio file is in correct WAV format for transcription.
+
+        Args:
+            audio_path (str): Path to the audio file.
+
+        Returns:
+            str: Path to WAV file (original if already WAV, or converted temporary file).
+
+        Raises:
+            ModuleNotFoundError: If pydub is not installed.
+        """
+        try:
+            from pydub import AudioSegment
+        except ModuleNotFoundError as e:
+            logger.error("Could not import pydub. Install it via 'pip install pydub'")
+            raise e
+
+        audio = AudioSegment.from_file(audio_path)
+        audio = audio.set_frame_rate(16000).set_channels(1).set_sample_width(2)
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
+            audio.export(temp_wav.name, format="wav")
+            return temp_wav.name
+
+    def _extract_audio_from_video(self, video_path: str) -> Optional[str]:
+        """
+        Extract audio track from a video file.
+
+        Args:
+            video_path (str): Path to the video file.
+
+        Returns:
+            str: a path to the extracted audio file (WAV format)
+                or returns None if extraction fails.
+
+        Raises:
+            ModuleNotFoundError: If pydub/ffmpeg is not installed.
+        """
+        return AudioTranscriptHelper.extract_audio_from_video(video_path)
+
+    @staticmethod
+    def extract_audio_from_video(video_path: str) -> Optional[str]:
+        """
+        Extract audio track from a video file (static version).
+
+        Args:
+            video_path (str): Path to the video file.
+
+        Returns:
+            str: a path to the extracted audio file (WAV format)
+                or returns None if extraction fails.
+
+        Raises:
+            ModuleNotFoundError: If pydub/ffmpeg is not installed.
+        """
+        try:
+            from pydub import AudioSegment
+        except ModuleNotFoundError as e:
+            logger.error("Could not import pydub. Install it via 'pip install pydub'")
+            raise e
+
+        try:
+            # Extract audio from video using pydub (requires ffmpeg)
+            logger.info(f"Extracting audio from video: {video_path}")
+            audio = AudioSegment.from_file(video_path)
+            logger.info(
+                f"Audio extracted: duration={len(audio)}ms, channels={audio.channels}, "
+                f"sample_width={audio.sample_width}, frame_rate={audio.frame_rate}"
+            )
+
+            # Optimize for Azure Speech recognition:
+            # Azure Speech works best with 16kHz mono audio (same as Azure TTS output)
+            target_sample_rate = 16000  # Azure Speech optimal rate
+            if audio.frame_rate != target_sample_rate:
+                logger.info(f"Resampling audio from {audio.frame_rate}Hz to {target_sample_rate}Hz")
+                audio = audio.set_frame_rate(target_sample_rate)
+
+            # Ensure 16-bit audio
+            if audio.sample_width != 2:
+                logger.info(f"Converting sample width from {audio.sample_width * 8}-bit to 16-bit")
+                audio = audio.set_sample_width(2)
+
+            # Convert to mono (Azure Speech prefers mono)
+            if audio.channels > 1:
+                logger.info(f"Converting from {audio.channels} channels to mono")
+                audio = audio.set_channels(1)
+
+            # Create temporary WAV file with PCM encoding for best compatibility
+            with tempfile.NamedTemporaryFile(suffix="_video_audio.wav", delete=False) as temp_audio:
+                audio.export(
+                    temp_audio.name,
+                    format="wav",
+                    parameters=["-acodec", "pcm_s16le"],  # 16-bit PCM for best compatibility
+                )
+                logger.info(
+                    f"Audio exported to: {temp_audio.name} (duration={len(audio)}ms, rate={audio.frame_rate}Hz, mono)"
+                )
+                return temp_audio.name
+        except Exception as e:
+            logger.warning(f"Failed to extract audio from video {video_path}: {e}")
+            return None
diff --git a/pyrit/score/float_scale/audio_float_scale_scorer.py b/pyrit/score/float_scale/audio_float_scale_scorer.py
new file mode 100644
index 000000000..8d3b1ce38
--- /dev/null
+++ b/pyrit/score/float_scale/audio_float_scale_scorer.py
@@ -0,0 +1,65 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from typing import Optional
+
+from pyrit.identifiers import ScorerIdentifier
+from pyrit.models import MessagePiece, Score
+from pyrit.score.audio_transcript_scorer import AudioTranscriptHelper
+from pyrit.score.float_scale.float_scale_scorer import FloatScaleScorer
+from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
+
+
+class AudioFloatScaleScorer(FloatScaleScorer):
+    """
+    A scorer that processes audio files by transcribing them and scoring the transcript.
+
+    The AudioFloatScaleScorer transcribes audio to text using Azure Speech-to-Text,
+    then scores the transcript using a FloatScaleScorer.
+    """
+
+    _default_validator: ScorerPromptValidator = ScorerPromptValidator(supported_data_types=["audio_path"])
+
+    def __init__(
+        self,
+        *,
+        text_capable_scorer: FloatScaleScorer,
+        validator: Optional[ScorerPromptValidator] = None,
+    ) -> None:
+        """
+        Initialize the AudioFloatScaleScorer.
+
+        Args:
+            text_capable_scorer: A FloatScaleScorer capable of processing text.
+                This scorer will be used to evaluate the transcribed audio content.
+            validator: Validator for the scorer. Defaults to audio_path data type validator.
+
+        Raises:
+            ValueError: If text_capable_scorer does not support text data type.
+        """
+        super().__init__(validator=validator or self._default_validator)
+        self._audio_helper = AudioTranscriptHelper(text_capable_scorer=text_capable_scorer)
+
+    def _build_identifier(self) -> ScorerIdentifier:
+        """
+        Build the scorer evaluation identifier for this scorer.
+
+        Returns:
+            ScorerIdentifier: The identifier for this scorer.
+        """
+        return self._create_identifier(
+            sub_scorers=[self._audio_helper.text_scorer],
+        )
+
+    async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
+        """
+        Score an audio file by transcribing it and scoring the transcript.
+
+        Args:
+            message_piece: The message piece containing the audio file path.
+            objective: Optional objective description for scoring.
+
+        Returns:
+            List of scores from evaluating the transcribed audio.
+        """
+        return await self._audio_helper._score_audio_async(message_piece=message_piece, objective=objective)
diff --git a/pyrit/score/float_scale/video_float_scale_scorer.py b/pyrit/score/float_scale/video_float_scale_scorer.py
index 54c81ec1f..b7ec36d9d 100644
--- a/pyrit/score/float_scale/video_float_scale_scorer.py
+++ b/pyrit/score/float_scale/video_float_scale_scorer.py
@@ -15,7 +15,10 @@
 from pyrit.score.video_scorer import _BaseVideoScorer
 
 
-class VideoFloatScaleScorer(FloatScaleScorer, _BaseVideoScorer):
+class VideoFloatScaleScorer(
+    FloatScaleScorer,
+    _BaseVideoScorer,
+):
     """
     A scorer that processes videos by extracting frames and scoring them using a float scale image scorer.
 
@@ -28,6 +31,9 @@ class VideoFloatScaleScorer(FloatScaleScorer, _BaseVideoScorer):
 
     For scorers that return a single score per frame, or to combine all categories together,
     use FloatScaleScoreAggregator.MAX, FloatScaleScorerAllCategories.MAX, etc.
+
+    Optionally, an audio_scorer can be provided to also score the video's audio track. When provided,
+    the audio is extracted, transcribed, and scored. The audio scores are included in the aggregation.
     """
 
     _default_validator: ScorerPromptValidator = ScorerPromptValidator(supported_data_types=["video_path"])
@@ -36,15 +42,21 @@ def __init__(
         self,
         *,
         image_capable_scorer: FloatScaleScorer,
+        audio_scorer: Optional[FloatScaleScorer] = None,
         num_sampled_frames: Optional[int] = None,
         validator: Optional[ScorerPromptValidator] = None,
         score_aggregator: FloatScaleAggregatorFunc = FloatScaleScorerByCategory.MAX,
+        ignore_objective_for_images: bool = False,
+        ignore_objective_for_audio: bool = True,
     ) -> None:
         """
         Initialize the VideoFloatScaleScorer.
 
         Args:
             image_capable_scorer: A FloatScaleScorer capable of processing images.
+            audio_scorer: Optional FloatScaleScorer for scoring the video's audio track.
+                When provided, audio is extracted from the video, transcribed to text,
+                and scored. The audio scores are aggregated with frame scores.
             num_sampled_frames: Number of frames to extract from the video for scoring (default: 5).
             validator: Validator for the scorer. Defaults to video_path data type validator.
             score_aggregator: Aggregator for combining frame scores. Defaults to FloatScaleScorerByCategory.MAX.
@@ -54,14 +66,30 @@ def __init__(
                 (returns single score with all categories combined).
                 Use FloatScaleScoreAggregator.MAX/AVERAGE/MIN for simple aggregation preserving all categories
                 (returns single score with all categories preserved).
+            ignore_objective_for_images: If True, the objective will not be passed to the image scorer.
+                Defaults to False (objective is passed to image scorer).
+            ignore_objective_for_audio: If True, the objective will not be passed to the audio scorer.
+                Defaults to True because video objectives typically describe visual content that
+                doesn't apply to audio transcription.
+
+        Raises:
+            ValueError: If audio_scorer is provided and does not support audio_path data type.
         """
         FloatScaleScorer.__init__(self, validator=validator or self._default_validator)
 
         _BaseVideoScorer.__init__(
-            self, image_capable_scorer=image_capable_scorer, num_sampled_frames=num_sampled_frames
+            self,
+            image_capable_scorer=image_capable_scorer,
+            num_sampled_frames=num_sampled_frames,
+            ignore_objective_for_images=ignore_objective_for_images,
+            ignore_objective_for_audio=ignore_objective_for_audio,
         )
         self._score_aggregator = score_aggregator
 
+        if audio_scorer is not None:
+            self._validate_audio_scorer(audio_scorer)
+        self.audio_scorer = audio_scorer
+
     def _build_identifier(self) -> ScorerIdentifier:
         """
         Build the scorer evaluation identifier for this scorer.
@@ -69,17 +97,24 @@ def _build_identifier(self) -> ScorerIdentifier:
         Returns:
             ScorerIdentifier: The identifier for this scorer.
         """
+        sub_scorers = [self.image_scorer]
+        if self.audio_scorer:
+            sub_scorers.append(self.audio_scorer)
+
         return self._create_identifier(
-            sub_scorers=[self.image_scorer],
+            sub_scorers=sub_scorers,
             score_aggregator=self._score_aggregator.__name__,
             scorer_specific_params={
                 "num_sampled_frames": self.num_sampled_frames,
+                "has_audio_scorer": self.audio_scorer is not None,
+                "ignore_objective_for_images": self.ignore_objective_for_images,
+                "ignore_objective_for_audio": self.ignore_objective_for_audio,
             },
         )
 
     async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
         """
-        Score a single video piece by extracting frames and aggregating their scores.
+        Score a single video piece by extracting frames and optionally audio, then aggregating their scores.
 
         Args:
             message_piece: The message piece containing the video.
@@ -91,11 +126,28 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Op
         """
         frame_scores = await self._score_frames_async(message_piece=message_piece, objective=objective)
 
+        all_scores = list(frame_scores)
+        audio_scored = False
+
+        # Score audio if audio_scorer is provided
+        if self.audio_scorer:
+            audio_scores = await self._score_video_audio_async(
+                message_piece=message_piece, audio_scorer=self.audio_scorer, objective=objective
+            )
+            if audio_scores:
+                all_scores.extend(audio_scores)
+                audio_scored = True
+
         # Get the ID from the message piece
         piece_id = message_piece.id if message_piece.id is not None else message_piece.original_prompt_id
 
         # Call the aggregator - all aggregators now return List[ScoreAggregatorResult]
-        aggregator_results: List[ScoreAggregatorResult] = self._score_aggregator(frame_scores)
+        aggregator_results: List[ScoreAggregatorResult] = self._score_aggregator(all_scores)
+
+        # Build rationale prefix
+        rationale_prefix = f"Video scored by analyzing {len(frame_scores)} frames"
+        if audio_scored:
+            rationale_prefix += " and audio transcript"
 
         # Create Score objects from aggregator results
         aggregate_scores: List[Score] = []
@@ -106,7 +158,7 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Op
                 score_type="float_scale",
                 score_category=result.category,
                 score_metadata=result.metadata,
-                score_rationale=f"Video scored by analyzing {len(frame_scores)} frames.\n{result.rationale}",
+                score_rationale=f"{rationale_prefix}.\n{result.rationale}",
                 scorer_class_identifier=self.get_identifier(),
                 message_piece_id=piece_id,
                 objective=objective,
diff --git a/pyrit/score/true_false/audio_true_false_scorer.py b/pyrit/score/true_false/audio_true_false_scorer.py
new file mode 100644
index 000000000..650069ee5
--- /dev/null
+++ b/pyrit/score/true_false/audio_true_false_scorer.py
@@ -0,0 +1,65 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from typing import Optional
+
+from pyrit.identifiers import ScorerIdentifier
+from pyrit.models import MessagePiece, Score
+from pyrit.score.audio_transcript_scorer import AudioTranscriptHelper
+from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
+from pyrit.score.true_false.true_false_scorer import TrueFalseScorer
+
+
+class AudioTrueFalseScorer(TrueFalseScorer):
+    """
+    A scorer that processes audio files by transcribing them and scoring the transcript.
+
+    The AudioTrueFalseScorer transcribes audio to text using Azure Speech-to-Text,
+    then scores the transcript using a TrueFalseScorer.
+    """
+
+    _default_validator: ScorerPromptValidator = ScorerPromptValidator(supported_data_types=["audio_path"])
+
+    def __init__(
+        self,
+        *,
+        text_capable_scorer: TrueFalseScorer,
+        validator: Optional[ScorerPromptValidator] = None,
+    ) -> None:
+        """
+        Initialize the AudioTrueFalseScorer.
+
+        Args:
+            text_capable_scorer: A TrueFalseScorer capable of processing text.
+                This scorer will be used to evaluate the transcribed audio content.
+            validator: Validator for the scorer. Defaults to audio_path data type validator.
+
+        Raises:
+            ValueError: If text_capable_scorer does not support text data type.
+        """
+        super().__init__(validator=validator or self._default_validator)
+        self._audio_helper = AudioTranscriptHelper(text_capable_scorer=text_capable_scorer)
+
+    def _build_identifier(self) -> ScorerIdentifier:
+        """
+        Build the scorer evaluation identifier for this scorer.
+
+        Returns:
+            ScorerIdentifier: The identifier for this scorer.
+        """
+        return self._create_identifier(
+            sub_scorers=[self._audio_helper.text_scorer],
+        )
+
+    async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
+        """
+        Score an audio file by transcribing it and scoring the transcript.
+
+        Args:
+            message_piece: The message piece containing the audio file path.
+            objective: Optional objective description for scoring.
+
+        Returns:
+            List of scores from evaluating the transcribed audio.
+        """
+        return await self._audio_helper._score_audio_async(message_piece=message_piece, objective=objective)
diff --git a/pyrit/score/true_false/video_true_false_scorer.py b/pyrit/score/true_false/video_true_false_scorer.py
index 2d50d780e..f3d0f54cd 100644
--- a/pyrit/score/true_false/video_true_false_scorer.py
+++ b/pyrit/score/true_false/video_true_false_scorer.py
@@ -21,6 +21,10 @@ class VideoTrueFalseScorer(TrueFalseScorer, _BaseVideoScorer):
     The VideoTrueFalseScorer breaks down a video into frames and uses a true/false scoring mechanism.
     The frame scores are aggregated using a TrueFalseAggregatorFunc (default: TrueFalseScoreAggregator.OR,
     meaning if any frame meets the objective, the entire video is scored as True).
+
+    Optionally, an audio_scorer can be provided to also score the video's audio track. When provided,
+    the audio is extracted, transcribed, and scored. The audio score is then aggregated with the
+    frame scores using the same aggregation function.
     """
 
     _default_validator: ScorerPromptValidator = ScorerPromptValidator(supported_data_types=["video_path"])
@@ -29,27 +33,49 @@ def __init__(
         self,
         *,
         image_capable_scorer: TrueFalseScorer,
+        audio_scorer: Optional[TrueFalseScorer] = None,
         num_sampled_frames: Optional[int] = None,
         validator: Optional[ScorerPromptValidator] = None,
         score_aggregator: TrueFalseAggregatorFunc = TrueFalseScoreAggregator.OR,
+        ignore_objective_for_images: bool = False,
+        ignore_objective_for_audio: bool = True,
     ) -> None:
         """
         Initialize the VideoTrueFalseScorer.
 
         Args:
             image_capable_scorer: A TrueFalseScorer capable of processing images.
+            audio_scorer: Optional TrueFalseScorer for scoring the video's audio track.
+                When provided, audio is extracted from the video, transcribed to text,
+                and scored. The audio score is aggregated with frame scores.
             num_sampled_frames: Number of frames to extract from the video for scoring (default: 5).
             validator: Validator for the scorer. Defaults to video_path data type validator.
             score_aggregator: Aggregator for combining frame scores. Defaults to TrueFalseScoreAggregator.OR.
+            ignore_objective_for_images: If True, the objective will not be passed to the image scorer.
+                Defaults to False (objective is passed to image scorer).
+            ignore_objective_for_audio: If True, the objective will not be passed to the audio scorer.
+                Defaults to True because video objectives typically describe visual content that
+                doesn't apply to audio transcription.
+
+        Raises:
+            ValueError: If audio_scorer is provided and does not support audio_path data type.
         """
         _BaseVideoScorer.__init__(
-            self, image_capable_scorer=image_capable_scorer, num_sampled_frames=num_sampled_frames
+            self,
+            image_capable_scorer=image_capable_scorer,
+            num_sampled_frames=num_sampled_frames,
+            ignore_objective_for_images=ignore_objective_for_images,
+            ignore_objective_for_audio=ignore_objective_for_audio,
         )
 
         TrueFalseScorer.__init__(
             self, validator=validator or self._default_validator, score_aggregator=score_aggregator
         )
 
+        if audio_scorer is not None:
+            self._validate_audio_scorer(audio_scorer)
+        self.audio_scorer = audio_scorer
+
     def _build_identifier(self) -> ScorerIdentifier:
         """
         Build the scorer evaluation identifier for this scorer.
@@ -57,17 +83,24 @@ def _build_identifier(self) -> ScorerIdentifier:
         Returns:
             ScorerIdentifier: The identifier for this scorer.
         """
+        sub_scorers = [self.image_scorer]
+        if self.audio_scorer:
+            sub_scorers.append(self.audio_scorer)
+
         return self._create_identifier(
-            sub_scorers=[self.image_scorer],
+            sub_scorers=sub_scorers,
             score_aggregator=self._score_aggregator.__name__,
             scorer_specific_params={
                 "num_sampled_frames": self.num_sampled_frames,
+                "has_audio_scorer": self.audio_scorer is not None,
+                "ignore_objective_for_images": self.ignore_objective_for_images,
+                "ignore_objective_for_audio": self.ignore_objective_for_audio,
             },
         )
 
     async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
         """
-        Score a single video piece by extracting frames and aggregating their scores.
+        Score a single video piece by extracting frames and optionally audio, then aggregating their scores.
 
         Args:
             message_piece: The message piece containing the video.
@@ -79,12 +112,30 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Op
         # Get scores for all frames
         frame_scores = await self._score_frames_async(message_piece=message_piece, objective=objective)
 
-        # Use the TrueFalseAggregatorFunc to combine frame scores
-        result = self._score_aggregator(frame_scores)
+        all_scores = list(frame_scores)
+        audio_scored = False
+
+        # Score audio if audio_scorer is provided
+        if self.audio_scorer:
+            audio_scores = await self._score_video_audio_async(
+                message_piece=message_piece, audio_scorer=self.audio_scorer, objective=objective
+            )
+            if audio_scores:
+                all_scores.extend(audio_scores)
+                audio_scored = True
+
+        # Use the TrueFalseAggregatorFunc to combine all scores (frames + audio)
+        result = self._score_aggregator(all_scores)
 
         # Get the ID from the message piece
         piece_id = message_piece.id if message_piece.id is not None else message_piece.original_prompt_id
 
+        # Build rationale
+        rationale = f"Video scored by analyzing {len(frame_scores)} frames"
+        if audio_scored:
+            rationale += " and audio transcript"
+        rationale += f".\n{result.rationale}"
+
         # Create the aggregated score using the aggregator result
         aggregate_score = Score(
             score_value=str(result.value).lower(),
@@ -92,7 +143,7 @@ async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Op
             score_type="true_false",
             score_category=result.category,
             score_metadata=result.metadata,
-            score_rationale=f"Video scored by analyzing {len(frame_scores)} frames.\n{result.rationale}",
+            score_rationale=rationale,
             scorer_class_identifier=self.get_identifier(),
             message_piece_id=piece_id,
             objective=objective,
diff --git a/pyrit/score/video_scorer.py b/pyrit/score/video_scorer.py
index a56c05cef..3b5ce7d0f 100644
--- a/pyrit/score/video_scorer.py
+++ b/pyrit/score/video_scorer.py
@@ -9,7 +9,9 @@
 from abc import ABC
 from typing import Optional
 
+from pyrit.memory import CentralMemory
 from pyrit.models import MessagePiece, Score
+from pyrit.score.audio_transcript_scorer import AudioTranscriptHelper
 from pyrit.score.scorer import Scorer
 
 logger = logging.getLogger(__name__)
@@ -31,6 +33,8 @@ def __init__(
         *,
         image_capable_scorer: Scorer,
         num_sampled_frames: Optional[int] = None,
+        ignore_objective_for_images: bool = False,
+        ignore_objective_for_audio: bool = True,
     ) -> None:
         """
         Initialize the base video scorer.
@@ -39,11 +43,18 @@ def __init__(
             image_capable_scorer: A scorer capable of processing images that will be used to score
                 individual video frames.
             num_sampled_frames: Number of frames to extract from the video for scoring (default: 5).
+            ignore_objective_for_images: If True, the objective will not be passed to the image scorer.
+                Defaults to False (objective is passed to image scorer).
+            ignore_objective_for_audio: If True, the objective will not be passed to the audio scorer.
+                Defaults to True because video objectives typically describe visual content that
+                doesn't apply to audio transcription.
 
         Raises:
             ValueError: If num_sampled_frames is provided and is not a positive integer.
         """
         self.image_scorer = image_capable_scorer
+        self.ignore_objective_for_images = ignore_objective_for_images
+        self.ignore_objective_for_audio = ignore_objective_for_audio
 
         # Validate num_sampled_frames if provided
         if num_sampled_frames is not None and num_sampled_frames <= 0:
@@ -53,6 +64,23 @@ def __init__(
             num_sampled_frames if num_sampled_frames is not None else self._DEFAULT_VIDEO_FRAMES_SAMPLING_NUM
         )
 
+    @staticmethod
+    def _validate_audio_scorer(scorer: Scorer) -> None:
+        """
+        Validate that a scorer supports the audio_path data type.
+
+        Args:
+            scorer: The scorer to validate.
+
+        Raises:
+            ValueError: If the scorer does not support audio_path data type.
+        """
+        if "audio_path" not in scorer._validator._supported_data_types:
+            raise ValueError(
+                f"audio_scorer must support 'audio_path' data type. "
+                f"Supported types: {scorer._validator._supported_data_types}"
+            )
+
     async def _score_frames_async(self, *, message_piece: MessagePiece, objective: Optional[str] = None) -> list[Score]:
         """
         Extract frames from video and score them.
@@ -78,9 +106,6 @@ async def _score_frames_async(self, *, message_piece: MessagePiece, objective: O
         if not frames:
             raise ValueError("No frames extracted from video for scoring.")
 
-        # Score each frame
-        objectives = [objective] * len(frames) if objective else None
-
         image_requests = []
 
         for frame in frames:
@@ -100,14 +125,20 @@ async def _score_frames_async(self, *, message_piece: MessagePiece, objective: O
             image_requests.append(response)
 
         # Add the frame pieces to memory before scoring so that score references are valid
-        from pyrit.memory import CentralMemory
 
         memory = CentralMemory.get_memory_instance()
         for request in image_requests:
             memory.add_message_to_memory(request=request)
 
+        # Pass objective to image scorer unless ignore_objective_for_images is True
+        # objectives must be a list matching the number of messages, or None
+        if self.ignore_objective_for_images or objective is None:
+            scoring_objectives = None
+        else:
+            scoring_objectives = [objective] * len(image_requests)
+
         frame_scores = await self.image_scorer.score_prompts_batch_async(
-            messages=image_requests, objectives=objectives, batch_size=len(frames)
+            messages=image_requests, objectives=scoring_objectives, batch_size=len(frames)
         )
 
         if not frame_scores:
@@ -164,3 +195,73 @@ def _extract_frames(self, video_path: str) -> list[str]:
             video_capture.release()
 
         return frame_paths
+
+    async def _score_video_audio_async(
+        self, *, message_piece: MessagePiece, audio_scorer: Optional[Scorer] = None, objective: Optional[str] = None
+    ) -> list[Score]:
+        """
+        Extract and score audio from the video.
+
+        Args:
+            message_piece: The message piece containing the video.
+            audio_scorer: The scorer to use for audio scoring.
+            objective: Optional objective description for scoring.
+
+        Returns:
+            List of scores for the audio content, or empty list if audio extraction/scoring fails.
+        """
+        if audio_scorer is None:
+            return []
+
+        video_path = message_piece.converted_value
+
+        # Use BaseAudioTranscriptScorer's static method to extract audio
+
+        audio_path = AudioTranscriptHelper.extract_audio_from_video(video_path)
+        if not audio_path:
+            return []
+
+        try:
+            # Create a message piece for the audio
+            original_prompt_id = message_piece.original_prompt_id
+            if isinstance(original_prompt_id, str):
+                original_prompt_id = uuid.UUID(original_prompt_id)
+
+            audio_piece = MessagePiece(
+                original_value=audio_path,
+                role=message_piece.get_role_for_storage(),
+                original_prompt_id=original_prompt_id,
+                converted_value=audio_path,
+                converted_value_data_type="audio_path",
+            )
+
+            audio_message = audio_piece.to_message()
+
+            # Add to memory
+            memory = CentralMemory.get_memory_instance()
+            memory.add_message_to_memory(request=audio_message)
+
+            # Score the audio using the audio_scorer
+            # Pass objective to audio scorer unless ignore_objective_for_audio is True
+            # objectives must be a list matching the number of messages, or None
+            if self.ignore_objective_for_audio or objective is None:
+                scoring_objectives = None
+            else:
+                scoring_objectives = [objective]
+
+            audio_scores = await audio_scorer.score_prompts_batch_async(
+                messages=[audio_message],
+                objectives=scoring_objectives,
+                batch_size=1,
+            )
+
+            # Clean up temporary audio file on success
+            if os.path.exists(audio_path):
+                os.unlink(audio_path)
+
+            return audio_scores if audio_scores else []
+
+        except Exception as e:
+            # Keep the audio file for debugging on failure
+            logger.error(f"Audio scoring failed. Temporary audio file kept for debugging: {audio_path}. Error: {e}")
+            raise
diff --git a/tests/unit/score/test_audio_scorer.py b/tests/unit/score/test_audio_scorer.py
new file mode 100644
index 000000000..c686216f5
--- /dev/null
+++ b/tests/unit/score/test_audio_scorer.py
@@ -0,0 +1,229 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import os
+import tempfile
+import uuid
+from typing import Optional
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from pyrit.identifiers import ScorerIdentifier
+from pyrit.models import MessagePiece, Score
+from pyrit.score.float_scale.audio_float_scale_scorer import AudioFloatScaleScorer
+from pyrit.score.float_scale.float_scale_scorer import FloatScaleScorer
+from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
+from pyrit.score.true_false.audio_true_false_scorer import AudioTrueFalseScorer
+from pyrit.score.true_false.true_false_scorer import TrueFalseScorer
+from tests.unit.mocks import get_mock_scorer_identifier
+
+
+class MockTextTrueFalseScorer(TrueFalseScorer):
+    """Mock TrueFalseScorer for testing audio transcription scoring"""
+
+    def __init__(self, return_value: bool = True):
+        self.return_value = return_value
+        validator = ScorerPromptValidator(supported_data_types=["text"])
+        super().__init__(validator=validator)
+
+    def _build_identifier(self) -> ScorerIdentifier:
+        return self._create_identifier()
+
+    async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
+        return [
+            Score(
+                score_type="true_false",
+                score_value=str(self.return_value).lower(),
+                score_rationale=f"Test rationale for transcript: {message_piece.converted_value}",
+                score_category=["test_category"],
+                score_metadata={},
+                score_value_description="test_description",
+                message_piece_id=message_piece.id or uuid.uuid4(),
+                objective=objective,
+                scorer_class_identifier=get_mock_scorer_identifier(),
+            )
+        ]
+
+
+class MockTextFloatScaleScorer(FloatScaleScorer):
+    """Mock FloatScaleScorer for testing audio transcription scoring"""
+
+    def __init__(self, return_value: float = 0.8):
+        self.return_value = return_value
+        validator = ScorerPromptValidator(supported_data_types=["text"])
+        super().__init__(validator=validator)
+
+    def _build_identifier(self) -> ScorerIdentifier:
+        return self._create_identifier()
+
+    async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
+        return [
+            Score(
+                score_type="float_scale",
+                score_value=str(self.return_value),
+                score_rationale=f"Test rationale for transcript: {message_piece.converted_value}",
+                score_category=["test_category"],
+                score_metadata={},
+                score_value_description="test_description",
+                message_piece_id=message_piece.id or uuid.uuid4(),
+                objective=objective,
+                scorer_class_identifier=get_mock_scorer_identifier(),
+            )
+        ]
+
+
+@pytest.fixture
+def audio_message_piece(patch_central_database):
+    """Create a mock audio message piece for testing"""
+    # Create a temporary audio file
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+        temp_file.write(b"fake audio content")
+        audio_path = temp_file.name
+
+    message_piece = MessagePiece(
+        role="user",
+        original_value=audio_path,
+        converted_value=audio_path,
+        original_value_data_type="audio_path",
+        converted_value_data_type="audio_path",
+    )
+    message_piece.id = uuid.uuid4()
+
+    yield message_piece
+
+    # Cleanup
+    if os.path.exists(audio_path):
+        os.remove(audio_path)
+
+
+@pytest.mark.usefixtures("patch_central_database")
+class TestAudioTrueFalseScorer:
+    """Tests for AudioTrueFalseScorer"""
+
+    def test_init_with_text_scorer(self):
+        """Test initialization with a text-capable scorer"""
+        text_scorer = MockTextTrueFalseScorer()
+        audio_scorer = AudioTrueFalseScorer(text_capable_scorer=text_scorer)
+
+        assert audio_scorer._audio_helper.text_scorer is text_scorer
+
+    def test_build_identifier(self):
+        """Test that _build_identifier returns correct identifier"""
+        text_scorer = MockTextTrueFalseScorer()
+        audio_scorer = AudioTrueFalseScorer(text_capable_scorer=text_scorer)
+
+        identifier = audio_scorer._build_identifier()
+
+        assert isinstance(identifier, ScorerIdentifier)
+
+    @pytest.mark.asyncio
+    async def test_score_piece_with_transcript(self, audio_message_piece):
+        """Test scoring audio with a valid transcript"""
+        text_scorer = MockTextTrueFalseScorer(return_value=True)
+        audio_scorer = AudioTrueFalseScorer(text_capable_scorer=text_scorer)
+
+        # Mock the transcription to return a test transcript
+        with patch.object(
+            audio_scorer._audio_helper, "_transcribe_audio_async", new_callable=AsyncMock
+        ) as mock_transcribe:
+            mock_transcribe.return_value = "Hello, this is a test transcript."
+
+            scores = await audio_scorer._score_piece_async(audio_message_piece)
+
+            assert len(scores) == 1
+            assert scores[0].score_type == "true_false"
+            assert scores[0].score_value == "true"
+            assert "Audio transcript scored:" in scores[0].score_rationale
+
+    @pytest.mark.asyncio
+    async def test_score_piece_empty_transcript(self, audio_message_piece):
+        """Test scoring audio with empty transcript returns empty list"""
+        text_scorer = MockTextTrueFalseScorer(return_value=True)
+        audio_scorer = AudioTrueFalseScorer(text_capable_scorer=text_scorer)
+
+        # Mock the transcription to return empty string
+        with patch.object(
+            audio_scorer._audio_helper, "_transcribe_audio_async", new_callable=AsyncMock
+        ) as mock_transcribe:
+            mock_transcribe.return_value = ""
+
+            scores = await audio_scorer._score_piece_async(audio_message_piece)
+
+            # Empty transcript returns empty list
+            assert len(scores) == 0
+
+    @pytest.mark.asyncio
+    async def test_score_piece_false_result(self, audio_message_piece):
+        """Test scoring audio that returns false"""
+        text_scorer = MockTextTrueFalseScorer(return_value=False)
+        audio_scorer = AudioTrueFalseScorer(text_capable_scorer=text_scorer)
+
+        # Mock the transcription
+        with patch.object(
+            audio_scorer._audio_helper, "_transcribe_audio_async", new_callable=AsyncMock
+        ) as mock_transcribe:
+            mock_transcribe.return_value = "Some transcript text"
+
+            scores = await audio_scorer._score_piece_async(audio_message_piece)
+
+            assert len(scores) == 1
+            assert scores[0].score_type == "true_false"
+            assert scores[0].score_value == "false"
+
+
+@pytest.mark.usefixtures("patch_central_database")
+class TestAudioFloatScaleScorer:
+    """Tests for AudioFloatScaleScorer"""
+
+    def test_init_with_text_scorer(self):
+        """Test initialization with a text-capable scorer"""
+        text_scorer = MockTextFloatScaleScorer()
+        audio_scorer = AudioFloatScaleScorer(text_capable_scorer=text_scorer)
+
+        assert audio_scorer._audio_helper.text_scorer is text_scorer
+
+    def test_build_identifier(self):
+        """Test that _build_identifier returns correct identifier"""
+        text_scorer = MockTextFloatScaleScorer()
+        audio_scorer = AudioFloatScaleScorer(text_capable_scorer=text_scorer)
+
+        identifier = audio_scorer._build_identifier()
+
+        assert isinstance(identifier, ScorerIdentifier)
+
+    @pytest.mark.asyncio
+    async def test_score_piece_with_transcript(self, audio_message_piece):
+        """Test scoring audio with a valid transcript"""
+        text_scorer = MockTextFloatScaleScorer(return_value=0.75)
+        audio_scorer = AudioFloatScaleScorer(text_capable_scorer=text_scorer)
+
+        # Mock the transcription to return a test transcript
+        with patch.object(
+            audio_scorer._audio_helper, "_transcribe_audio_async", new_callable=AsyncMock
+        ) as mock_transcribe:
+            mock_transcribe.return_value = "Hello, this is a test transcript."
+
+            scores = await audio_scorer._score_piece_async(audio_message_piece)
+
+            assert len(scores) == 1
+            assert scores[0].score_type == "float_scale"
+            assert float(scores[0].score_value) == 0.75
+            assert "Audio transcript scored:" in scores[0].score_rationale
+
+    @pytest.mark.asyncio
+    async def test_score_piece_empty_transcript(self, audio_message_piece):
+        """Test scoring audio with empty transcript returns empty list"""
+        text_scorer = MockTextFloatScaleScorer(return_value=0.8)
+        audio_scorer = AudioFloatScaleScorer(text_capable_scorer=text_scorer)
+
+        # Mock the transcription to return empty string
+        with patch.object(
+            audio_scorer._audio_helper, "_transcribe_audio_async", new_callable=AsyncMock
+        ) as mock_transcribe:
+            mock_transcribe.return_value = ""
+
+            scores = await audio_scorer._score_piece_async(audio_message_piece)
+
+            # Empty transcript returns empty list
+            assert len(scores) == 0
diff --git a/tests/unit/score/test_video_scorer.py b/tests/unit/score/test_video_scorer.py
index 27de6693a..309a5d97d 100644
--- a/tests/unit/score/test_video_scorer.py
+++ b/tests/unit/score/test_video_scorer.py
@@ -4,16 +4,18 @@
 import os
 import uuid
 from typing import Optional
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import numpy as np
 import pytest
 
 from pyrit.identifiers import ScorerIdentifier
 from pyrit.models import MessagePiece, Score
+from pyrit.score.audio_transcript_scorer import AudioTranscriptHelper
 from pyrit.score.float_scale.float_scale_scorer import FloatScaleScorer
 from pyrit.score.float_scale.video_float_scale_scorer import VideoFloatScaleScorer
 from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
+from pyrit.score.true_false.true_false_score_aggregator import TrueFalseScoreAggregator
 from pyrit.score.true_false.true_false_scorer import TrueFalseScorer
 from pyrit.score.true_false.video_true_false_scorer import VideoTrueFalseScorer
 from tests.unit.mocks import get_mock_scorer_identifier
@@ -292,3 +294,158 @@ def test_video_scorer_default_num_frames():
     scorer = VideoTrueFalseScorer(image_capable_scorer=image_scorer)
 
     assert scorer.num_sampled_frames == 5  # Default value
+
+
+class MockAudioTrueFalseScorer(TrueFalseScorer, AudioTranscriptHelper):
+    """Mock AudioTrueFalseScorer for testing video+audio integration"""
+
+    def __init__(self, return_value: bool = True):
+        self.return_value = return_value
+        self.received_objective = None
+        # Audio scorer needs to support audio_path data type
+        validator = ScorerPromptValidator(supported_data_types=["audio_path"])
+        TrueFalseScorer.__init__(self, validator=validator)
+
+    def _build_identifier(self) -> ScorerIdentifier:
+        return self._create_identifier()
+
+    async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
+        self.received_objective = objective
+        return [
+            Score(
+                score_type="true_false",
+                score_value=str(self.return_value).lower(),
+                score_rationale="Mock audio score",
+                score_category=["audio"],
+                score_metadata={},
+                score_value_description="test_audio",
+                message_piece_id=message_piece.id or uuid.uuid4(),
+                objective=objective,
+                scorer_class_identifier=get_mock_scorer_identifier(),
+            )
+        ]
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(not is_opencv_installed(), reason="opencv is not installed")
+async def test_video_true_false_scorer_with_audio_scorer(video_converter_sample_video):
+    """Test video scoring with an audio scorer"""
+    image_scorer = MockTrueFalseScorer(return_value=True)
+    audio_scorer = MockAudioTrueFalseScorer(return_value=True)
+
+    # Mock extract_audio_from_video to avoid actual audio extraction
+    with patch.object(AudioTranscriptHelper, "extract_audio_from_video", return_value="/tmp/mock_audio.wav"):
+        scorer = VideoTrueFalseScorer(
+            image_capable_scorer=image_scorer,
+            audio_scorer=audio_scorer,
+            num_sampled_frames=3,
+        )
+
+        scores = await scorer._score_piece_async(video_converter_sample_video)
+
+        assert len(scores) == 1
+        assert scores[0].score_type == "true_false"
+        assert scores[0].score_value == "true"
+        assert "visual" in scores[0].score_rationale.lower() or "audio" in scores[0].score_rationale.lower()
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(not is_opencv_installed(), reason="opencv is not installed")
+async def test_video_scorer_and_aggregation_both_true(video_converter_sample_video):
+    """Test AND aggregation when both visual and audio scores are true"""
+    image_scorer = MockTrueFalseScorer(return_value=True)
+    audio_scorer = MockAudioTrueFalseScorer(return_value=True)
+
+    with patch.object(AudioTranscriptHelper, "extract_audio_from_video", return_value="/tmp/mock_audio.wav"):
+        scorer = VideoTrueFalseScorer(
+            image_capable_scorer=image_scorer,
+            audio_scorer=audio_scorer,
+            num_sampled_frames=3,
+            score_aggregator=TrueFalseScoreAggregator.AND,
+        )
+
+        scores = await scorer._score_piece_async(video_converter_sample_video)
+
+        assert len(scores) == 1
+        assert scores[0].score_value == "true"
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(not is_opencv_installed(), reason="opencv is not installed")
+async def test_video_scorer_and_aggregation_visual_false(video_converter_sample_video):
+    """Test AND aggregation when visual is false and audio is true"""
+    image_scorer = MockTrueFalseScorer(return_value=False)
+    audio_scorer = MockAudioTrueFalseScorer(return_value=True)
+
+    with patch.object(AudioTranscriptHelper, "extract_audio_from_video", return_value="/tmp/mock_audio.wav"):
+        scorer = VideoTrueFalseScorer(
+            image_capable_scorer=image_scorer,
+            audio_scorer=audio_scorer,
+            num_sampled_frames=3,
+            score_aggregator=TrueFalseScoreAggregator.AND,
+        )
+
+        scores = await scorer._score_piece_async(video_converter_sample_video)
+
+        assert len(scores) == 1
+        assert scores[0].score_value == "false"
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(not is_opencv_installed(), reason="opencv is not installed")
+async def test_video_scorer_and_aggregation_audio_false(video_converter_sample_video):
+    """Test AND aggregation when visual is true and audio is false"""
+    image_scorer = MockTrueFalseScorer(return_value=True)
+    audio_scorer = MockAudioTrueFalseScorer(return_value=False)
+
+    with patch.object(AudioTranscriptHelper, "extract_audio_from_video", return_value="/tmp/mock_audio.wav"):
+        scorer = VideoTrueFalseScorer(
+            image_capable_scorer=image_scorer,
+            audio_scorer=audio_scorer,
+            num_sampled_frames=3,
+            score_aggregator=TrueFalseScoreAggregator.AND,
+        )
+
+        scores = await scorer._score_piece_async(video_converter_sample_video)
+
+        assert len(scores) == 1
+        assert scores[0].score_value == "false"
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(not is_opencv_installed(), reason="opencv is not installed")
+async def test_video_scorer_or_aggregation_one_true(video_converter_sample_video):
+    """Test default OR aggregation when visual is false and audio is true"""
+    image_scorer = MockTrueFalseScorer(return_value=False)
+    audio_scorer = MockAudioTrueFalseScorer(return_value=True)
+
+    with patch.object(AudioTranscriptHelper, "extract_audio_from_video", return_value="/tmp/mock_audio.wav"):
+        scorer = VideoTrueFalseScorer(
+            image_capable_scorer=image_scorer,
+            audio_scorer=audio_scorer,
+            num_sampled_frames=3,
+        )
+
+        scores = await scorer._score_piece_async(video_converter_sample_video)
+
+        assert len(scores) == 1
+        assert scores[0].score_value == "true"
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(not is_opencv_installed(), reason="opencv is not installed")
+async def test_video_scorer_without_audio_scorer(video_converter_sample_video):
+    """Test that video scoring works without audio scorer"""
+    image_scorer = MockTrueFalseScorer(return_value=True)
+
+    scorer = VideoTrueFalseScorer(
+        image_capable_scorer=image_scorer,
+        audio_scorer=None,  # No audio scorer
+        num_sampled_frames=3,
+    )
+
+    scores = await scorer._score_piece_async(video_converter_sample_video)
+
+    assert len(scores) == 1
+    assert scores[0].score_type == "true_false"
+    assert scores[0].score_value == "true"