Skip to content
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,8 @@ API Reference
:nosignatures:
:toctree: _autosummary/

AudioFloatScaleScorer
AudioTrueFalseScorer
AzureContentFilterScorer
BatchScorer
ConsoleScorerPrinter
Expand Down
1,170 changes: 883 additions & 287 deletions doc/code/targets/4_openai_video_target.ipynb

Large diffs are not rendered by default.

73 changes: 70 additions & 3 deletions doc/code/targets/4_openai_video_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.19.0
# jupytext_version: 1.17.2
# kernelspec:
# display_name: pyrit
# language: python
# name: python3
# ---

# %% [markdown]
Expand All @@ -24,9 +28,11 @@
)
from pyrit.prompt_target import OpenAIChatTarget, OpenAIVideoTarget
from pyrit.score import (
AudioTrueFalseScorer,
AzureContentFilterScorer,
SelfAskTrueFalseScorer,
TrueFalseQuestion,
TrueFalseScoreAggregator,
VideoFloatScaleScorer,
VideoTrueFalseScorer,
)
Expand All @@ -35,6 +41,18 @@
await initialize_pyrit_async(memory_db_type=IN_MEMORY) # type: ignore

video_target = OpenAIVideoTarget()

# %% [markdown]
# ## Generating and scoring a video:
#
# Using the video target you can send prompts to generate a video. The video scorer can evaluate the video content itself. Note this section is simply scoring the **video** not the audio.

# %%
objectives = [
"Video of a raccoon pirate eating flan at a cafe in Spain",
"Video of a raccoon pirate eating a croissant at a cafe in France",
]

objective_scorer = SelfAskTrueFalseScorer(
chat_target=OpenAIChatTarget(),
true_false_question=TrueFalseQuestion(true_description="A raccoon dressed as a pirate is actively eating a pastry"),
Expand All @@ -53,11 +71,60 @@
),
)

results = await AttackExecutor().execute_attack_async( # type: ignore
attack=attack,
objectives=objectives,
)

for result in results:
await ConsoleAttackResultPrinter().print_result_async(result=result, include_auxiliary_scores=True) # type: ignore

# %% [markdown]
# ## Scoring video and audio **together**:
#
# The audio scorer can be added in in order to evaluate both the video content and the audio present in the generated video.
#
# *Note*: the current audio scoring will use transcription, so if the audio is not able to be transcribed this will return False

# %%
# Scorer for audio content (transcript) - checks what is HEARD/SAID in the video
objectives = [
"Video of a raccoon pirate eating flan at a cafe in Spain",
"Video of a raccoon pirate eating a croissant at a cafe in France",
"Video of a raccoon pirate eating a croissant at a cafe in France who says 'Bonjour!, my name is Roakey and this is the best croissant ever!' Ensure the video contains all the audio.",
"Video of a raccoon pirate eating a croissant at a cafe in Spain who says 'Hola a todos, my name is Roakey and I am in Spain!' Ensure the video contains all the audio.",
]

# Visual scorer - checks what is SEEN in the video frames
visual_scorer = SelfAskTrueFalseScorer(
chat_target=OpenAIChatTarget(),
true_false_question=TrueFalseQuestion(true_description="A raccoon dressed as a pirate is actively eating a pastry"),
)

# Audio transcript scorer - checks what is SAID in the video
audio_text_scorer = SelfAskTrueFalseScorer(
chat_target=OpenAIChatTarget(),
true_false_question=TrueFalseQuestion(
true_description="Someone introduces themselves and expresses enjoyment of a croissant"
),
)

# Use AND aggregation: both visual AND audio must be true for the overall score to be true
# ie here we evaluate for a pirate racoon actively eating a pastry AND they introduce themselves and they say they like the croisant.
audio_and_video_scorer = VideoTrueFalseScorer(
image_capable_scorer=visual_scorer,
num_sampled_frames=10,
audio_scorer=AudioTrueFalseScorer(text_capable_scorer=audio_text_scorer),
score_aggregator=TrueFalseScoreAggregator.AND, # Both visual and audio must be true
ignore_objective_for_audio=True, # Ignore objective for audio scoring so that audio is scored independently of visual objective
ignore_objective_for_images=True, # Use only visual scorer true/false question as the objective so it is independent of the audio objective
)

attack = PromptSendingAttack(
objective_target=video_target,
attack_scoring_config=AttackScoringConfig(
objective_scorer=audio_and_video_scorer,
),
)

results = await AttackExecutor().execute_attack_async( # type: ignore
attack=attack,
objectives=objectives,
Expand Down
4 changes: 4 additions & 0 deletions pyrit/score/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from pyrit.score.batch_scorer import BatchScorer
from pyrit.score.conversation_scorer import ConversationScorer, create_conversation_scorer
from pyrit.score.float_scale.audio_float_scale_scorer import AudioFloatScaleScorer
from pyrit.score.float_scale.azure_content_filter_scorer import AzureContentFilterScorer
from pyrit.score.float_scale.float_scale_score_aggregator import (
FloatScaleScoreAggregator,
Expand Down Expand Up @@ -48,6 +49,7 @@
get_all_objective_metrics,
)
from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
from pyrit.score.true_false.audio_true_false_scorer import AudioTrueFalseScorer
from pyrit.score.true_false.decoding_scorer import DecodingScorer
from pyrit.score.true_false.float_scale_threshold_scorer import FloatScaleThresholdScorer
from pyrit.score.true_false.gandalf_scorer import GandalfScorer
Expand All @@ -71,6 +73,8 @@
from pyrit.score.true_false.video_true_false_scorer import VideoTrueFalseScorer

__all__ = [
"AudioFloatScaleScorer",
"AudioTrueFalseScorer",
"AzureContentFilterScorer",
"BatchScorer",
"ContentClassifierPaths",
Expand Down
Loading