Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 26 additions & 6 deletions openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7617,11 +7617,14 @@ components:
RL.SampleBody:
type: object
required:
- prompt
- prompts
properties:
prompt:
$ref: '#/components/schemas/RL.ModelInput'
description: Input prompt as tokenized chunks
prompts:
description: Input prompts as tokenized chunks
type: array
items:
type: object
$ref: '#/components/schemas/RL.ModelInput'
sampling_params:
$ref: '#/components/schemas/RL.SamplingParams'
description: Optional sampling parameters
Expand All @@ -7630,7 +7633,7 @@ components:
format: int64
example: 1
default: "1"
description: Number of completions to generate for this prompt
description: Number of completions to generate per prompt
RL.ForwardBackwardBody:
type: object
required:
Expand Down Expand Up @@ -7714,6 +7717,10 @@ components:
type: boolean
example: true
description: Whether more items exist beyond this page
next_cursor:
type: string
example: 123e4567-e89b-12d3-a456-426614174000
description: Cursor to use as the 'after' parameter for the next page. Empty when has_more is false.
RL.EncodedText:
type: object
properties:
Expand Down Expand Up @@ -7943,14 +7950,23 @@ components:
format: uint64
example: 100
RL.SampleResult:
type: object
properties:
rollouts:
type: array
items:
type: object
$ref: '#/components/schemas/RL.SampleRollout'
description: Completions grouped by prompt
RL.SampleRollout:
type: object
properties:
sequences:
type: array
items:
type: object
$ref: '#/components/schemas/RL.SampleSequence'
description: Generated completions
description: Completions generated for one prompt
RL.SampleSequence:
type: object
properties:
Expand Down Expand Up @@ -7998,11 +8014,15 @@ components:
properties:
loss:
type: number
format: double
example: 2.345
description: Loss value
metrics:
type: object
description: Loss-specific metrics (e.g., KL divergence, clip fraction for GRPO)
example:
loss/clip/high_fraction: 0.1
loss/kl_ref/mean: 0.05
additionalProperties:
type: number
format: double
Expand Down
Loading