Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added tests/__init__.py
Empty file.
112 changes: 112 additions & 0 deletions tests/test_json_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""Tests for skillopt.utils.json_utils."""
from __future__ import annotations

import pytest

from skillopt.utils.json_utils import extract_json, extract_json_array


class TestExtractJson:
"""extract_json — extract a JSON object from LLM response text."""

def test_code_fence_json(self) -> None:
text = 'Some text\n```json\n{"key": "value", "num": 42}\n```\nmore text'
assert extract_json(text) == {"key": "value", "num": 42}

def test_bare_json_object(self) -> None:
text = 'The result is {"answer": "yes", "score": 0.95}.'
assert extract_json(text) == {"answer": "yes", "score": 0.95}

def test_code_fence_takes_precedence(self) -> None:
"""If fence content parses successfully it should be preferred over bare."""
text = (
'```json\n{"source": "fence"}\n```\n'
'Then also {"source": "bare"}'
)
assert extract_json(text) == {"source": "fence"}

def test_broken_fence_falls_back_to_bare(self) -> None:
"""When fence content is invalid JSON, fall back to bare {...} match."""
# Use invalid fence content that has no braces so the greedy bare
# regex doesn't swallow the valid object.
text = (
'```json\nnot json at all\n```\n'
'Answer: {"fallback": "yes"}'
)
assert extract_json(text) == {"fallback": "yes"}

def test_nested_json(self) -> None:
text = '```json\n{"outer": {"inner": [1, 2, 3]}}\n```'
assert extract_json(text) == {"outer": {"inner": [1, 2, 3]}}

def test_no_json_returns_none(self) -> None:
assert extract_json("Just plain text without JSON.") is None

def test_empty_string_returns_none(self) -> None:
assert extract_json("") is None

def test_malformed_json_returns_none(self) -> None:
assert extract_json("{broken") is None

def test_empty_json_object(self) -> None:
assert extract_json('{"empty": {}}') == {"empty": {}}

def test_json_with_escaped_chars(self) -> None:
text = '{"message": "hello\\nworld"}'
assert extract_json(text) == {"message": "hello\nworld"}

def test_only_fence_with_no_json_syntax(self) -> None:
"""Code fences without valid JSON content should not match."""
text = "```\nplain code block\n```"
assert extract_json(text) is None


class TestExtractJsonArray:
"""extract_json_array — extract a JSON array from LLM response text."""

def test_code_fence_array(self) -> None:
text = '```json\n["a", "b", "c"]\n```'
assert extract_json_array(text) == ["a", "b", "c"]

def test_bare_array(self) -> None:
text = "The items are [1, 2, 3]."
assert extract_json_array(text) == [1, 2, 3]

def test_code_fence_takes_precedence(self) -> None:
text = (
'```json\n["from_fence"]\n```\n'
'also ["from_bare"]'
)
assert extract_json_array(text) == ["from_fence"]

def test_broken_fence_falls_back_to_bare(self) -> None:
text = (
'```json\nnot json at all\n```\n'
'values: [42]'
)
assert extract_json_array(text) == [42]

def test_nested_array(self) -> None:
text = '```json\n[[1, 2], [3, 4]]\n```'
assert extract_json_array(text) == [[1, 2], [3, 4]]

def test_no_array_returns_none(self) -> None:
assert extract_json_array("no brackets here") is None

def test_empty_string_returns_none(self) -> None:
assert extract_json_array("") is None

def test_malformed_array_returns_none(self) -> None:
assert extract_json_array("[1, 2, ") is None

def test_empty_json_array(self) -> None:
assert extract_json_array("[]") == []

def test_array_of_objects(self) -> None:
text = '[{"x": 1}, {"x": 2}]'
assert extract_json_array(text) == [{"x": 1}, {"x": 2}]

def test_object_not_confused_with_array(self) -> None:
"""extract_json_array should not match a bare JSON object."""
text = '{"this is an object": true}'
assert extract_json_array(text) is None
106 changes: 106 additions & 0 deletions tests/test_scoring.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""Tests for skillopt.utils.scoring."""
from __future__ import annotations

import pytest

from skillopt.utils.scoring import compute_score, skill_hash


class _ResultObject:
"""Minimal object with hard/soft attrs (duck-typing path)."""

def __init__(self, hard: float, soft: float) -> None:
self.hard = hard
self.soft = soft


class TestComputeScore:
"""compute_score — hard/soft accuracy from a list of episode results."""

def test_empty_list_returns_zeros(self) -> None:
assert compute_score([]) == (0.0, 0.0)

def test_dict_results_happy_path(self) -> None:
results = [
{"hard": 1, "soft": 0.8},
{"hard": 0, "soft": 0.5},
{"hard": 1, "soft": 0.9},
]
hard, soft = compute_score(results)
assert hard == pytest.approx(2 / 3)
assert soft == pytest.approx((0.8 + 0.5 + 0.9) / 3)

def test_object_results(self) -> None:
results = [
_ResultObject(1.0, 0.75),
_ResultObject(0.0, 0.25),
]
hard, soft = compute_score(results)
assert hard == 0.5
assert soft == 0.5

def test_mixed_dict_and_object_results(self) -> None:
results = [
{"hard": 1, "soft": 1.0},
_ResultObject(0, 0.0),
]
hard, soft = compute_score(results)
assert hard == 0.5
assert soft == 0.5

def test_missing_keys_default_to_zero(self) -> None:
results = [
{"hard": 1},
{},
]
hard, soft = compute_score(results)
assert hard == 0.5
assert soft == 0.0

def test_single_result(self) -> None:
results = [{"hard": 1, "soft": 0.95}]
assert compute_score(results) == (1.0, 0.95)

def test_continuous_hard_values(self) -> None:
"""Hard may be continuous 0.0-1.0 when using smoothed reward."""
results = [
{"hard": 0.75, "soft": 0.6},
{"hard": 0.25, "soft": 0.4},
]
hard, soft = compute_score(results)
assert hard == 0.5
assert soft == 0.5


class TestSkillHash:
"""skill_hash — a short, deterministic hash of skill content."""

def test_deterministic(self) -> None:
assert skill_hash("hello") == skill_hash("hello")

def test_different_input_produces_different_hash(self) -> None:
assert skill_hash("hello") != skill_hash("world")

def test_empty_string(self) -> None:
h = skill_hash("")
assert isinstance(h, str)
assert len(h) == 16

def test_output_length(self) -> None:
h = skill_hash("some skill content here")
assert len(h) == 16

def test_hex_characters(self) -> None:
h = skill_hash("any content")
assert all(c in "0123456789abcdef" for c in h)

def test_unicode_content(self) -> None:
h1 = skill_hash("cafe")
h2 = skill_hash("cafe")
assert h1 == h2

def test_multiline_content(self) -> None:
content = "line1\nline2\nline3"
h = skill_hash(content)
assert len(h) == 16
assert isinstance(h, str)
Loading