Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@
**/__pycache__/
**/*.pyc
**/*.pyo

# Vally
vally-results/
9 changes: 4 additions & 5 deletions .vally.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
paths:
evals: evals
evals: evals/

suites:
pr:
description: Run all plugin skill evals for pull requests
evals:
- evals/linting/eval.yaml
- evals/security/eval.yaml
description: Run p0 plugin skill evals for pull requests
filter:
priority: p0
5 changes: 5 additions & 0 deletions evals/linting/eval.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
name: linting skills evals
version: 1
description: Evaluates the linting plugin skills against representative file fixes
tags:
priority: p0

defaults:
runs: 1
timeout: 5m

scoring:
threshold: 1.0

stimuli:
- name: check-spelling-fixes-typos-and-updates-dictionary
prompt: |
Expand Down
5 changes: 5 additions & 0 deletions evals/security/eval.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
name: security skills evals
version: 1
description: Evaluates the security plugin skills against representative workflow hardening tasks
tags:
priority: p0

defaults:
runs: 1
timeout: 5m

scoring:
threshold: 1.0

stimuli:
- name: pin-github-actions-to-shas
prompt: |
Expand Down