Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 42 additions & 23 deletions src/app/endpoints/rlsapi_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,39 +122,58 @@ def _build_instructions(systeminfo: RlsapiV1SystemInfo) -> str:
return f"{base_prompt}\n\nUser's system: {system_context}"


def _get_default_model_id() -> str:
"""Get the default model ID from configuration.
async def _get_default_model_id() -> str:
"""Get the default model ID from configuration or auto-discovery.

Returns the model identifier in Llama Stack format (provider/model).
Model selection precedence:
1. If default model and provider are configured, use them.
2. Otherwise, query Llama Stack for available LLM models and select the first one.

Returns:
The model identifier string.
The model identifier string in "provider/model" format.

Raises:
HTTPException: If no model can be determined from configuration.
HTTPException: If no model can be determined from configuration or discovery.
"""
if configuration.inference is None:
msg = "No inference configuration available"
# 1. Try configured defaults
if configuration.inference is not None:
model_id = configuration.inference.default_model
provider_id = configuration.inference.default_provider

if model_id and provider_id:
return f"{provider_id}/{model_id}"

# 2. Auto-discover from Llama Stack
client = AsyncLlamaStackClientHolder().get_client()
try:
models = await client.models.list()
except APIConnectionError as e:
error_response = ServiceUnavailableResponse(
backend_name="Llama Stack",
cause=str(e),
)
raise HTTPException(**error_response.model_dump()) from e
except APIStatusError as e:
error_response = InternalServerErrorResponse.generic()
raise HTTPException(**error_response.model_dump()) from e

Comment on lines +147 to +159
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Handle uninitialized Llama Stack client in _get_default_model_id().

Line 147 calls AsyncLlamaStackClientHolder().get_client() outside the try. If the client was not loaded, RuntimeError escapes as an unstructured failure instead of a controlled API response.

💡 Proposed fix
-    client = AsyncLlamaStackClientHolder().get_client()
     try:
+        client = AsyncLlamaStackClientHolder().get_client()
         models = await client.models.list()
+    except RuntimeError as e:
+        error_response = ServiceUnavailableResponse(
+            backend_name="Llama Stack",
+            cause=str(e),
+        )
+        raise HTTPException(**error_response.model_dump()) from e
     except APIConnectionError as e:
         error_response = ServiceUnavailableResponse(
             backend_name="Llama Stack",
             cause=str(e),
         )
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/app/endpoints/rlsapi_v1.py` around lines 147 - 159, The call to
AsyncLlamaStackClientHolder().get_client() in _get_default_model_id can raise
RuntimeError when the client is uninitialized and currently sits outside the try
block; move the get_client() call inside the try (or add an except RuntimeError
branch) so uninitialized-client errors are handled like other backend failures.
Specifically, update _get_default_model_id to either: 1) call client =
AsyncLlamaStackClientHolder().get_client() inside the existing try and let
APIConnectionError/APIStatusError handling proceed, or 2) add except
RuntimeError as e to produce a ServiceUnavailableResponse (backend_name="Llama
Stack", cause=str(e)) and raise HTTPException with that model_dump(), ensuring
AsyncLlamaStackClientHolder().get_client and the existing exception handling
cover this case.

llm_models = [
m
for m in models
if m.custom_metadata and m.custom_metadata.get("model_type") == "llm"
]
if not llm_models:
msg = "No LLM model found in available models"
logger.error(msg)
error_response = ServiceUnavailableResponse(
backend_name="inference service (configuration)",
backend_name="inference service",
cause=msg,
)
raise HTTPException(**error_response.model_dump())

model_id = configuration.inference.default_model
provider_id = configuration.inference.default_provider

if model_id and provider_id:
return f"{provider_id}/{model_id}"

msg = "No default model configured for rlsapi v1 inference"
logger.error(msg)
error_response = ServiceUnavailableResponse(
backend_name="inference service (configuration)",
cause=msg,
)
raise HTTPException(**error_response.model_dump())
model = llm_models[0]
logger.info("Auto-discovered LLM model for rlsapi v1: %s", model.id)
return model.id


async def retrieve_simple_response(
Expand All @@ -178,7 +197,7 @@ async def retrieve_simple_response(
HTTPException: 503 if no model is configured.
"""
client = AsyncLlamaStackClientHolder().get_client()
model_id = _get_default_model_id()
model_id = await _get_default_model_id()

logger.debug("Using model %s for rlsapi v1 inference", model_id)

Expand Down Expand Up @@ -306,7 +325,7 @@ async def infer_endpoint(

input_source = infer_request.get_input_source()
instructions = _build_instructions(infer_request.context.systeminfo)
model_id = _get_default_model_id()
model_id = await _get_default_model_id()
mcp_tools = await get_mcp_tools()
logger.debug(
"Request %s: Combined input source length: %d", request_id, len(input_source)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,6 @@ conversation_cache:

authentication:
module: "noop-with-token"
inference:
default_provider: openai
default_model: gpt-4o-mini
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ authorization:
- "get_tools"
- "info"
- "model_override"
- "rlsapi_v1_infer"
# Viewer role can only read (no mutations)
- role: "viewer"
actions:
Expand Down
3 changes: 3 additions & 0 deletions tests/e2e/configuration/library-mode/lightspeed-stack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ user_data_collection:
transcripts_storage: "/tmp/data/transcripts"
authentication:
module: "noop"
inference:
default_provider: openai
default_model: gpt-4o-mini
mcp_servers:
# Mock server with client-provided auth - should appear in mcp-auth/client-options response
- name: "github-api"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,6 @@ conversation_cache:

authentication:
module: "noop-with-token"
inference:
default_provider: openai
default_model: gpt-4o-mini
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ authorization:
- "get_tools"
- "info"
- "model_override"
- "rlsapi_v1_infer"
# Viewer role can only read (no mutations)
- role: "viewer"
actions:
Expand Down
3 changes: 3 additions & 0 deletions tests/e2e/configuration/server-mode/lightspeed-stack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ user_data_collection:
transcripts_storage: "/tmp/data/transcripts"
authentication:
module: "noop"
inference:
default_provider: openai
default_model: gpt-4o-mini
mcp_servers:
# Mock server with client-provided auth - should appear in mcp-auth/client-options response
- name: "github-api"
Expand Down
89 changes: 89 additions & 0 deletions tests/e2e/features/rlsapi_v1.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
@Authorized
Feature: rlsapi v1 /infer endpoint API tests

Background:
Given The service is started locally
And REST API service prefix is /v1

Scenario: Basic inference with minimal request (question only)
Given The system is in default state
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
When I use "infer" to ask question with authorization header
"""
{"question": "How do I list files in Linux?"}
"""
Then The status code of the response is 200
And The rlsapi response should have valid structure

Scenario: Inference with full context (systeminfo populated)
Given The system is in default state
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
When I use "infer" to ask question with authorization header
"""
{"question": "How do I configure SELinux?", "context": {"systeminfo": {"os": "RHEL", "version": "9.3", "arch": "x86_64"}}}
"""
Then The status code of the response is 200
And The rlsapi response should have valid structure

Scenario: Request without authorization returns 401
Given The system is in default state
When I use "infer" to ask question
"""
{"question": "How do I list files?"}
"""
Then The status code of the response is 401
And The body of the response is the following
"""
{
"detail": {
"response": "Missing or invalid credentials provided by client",
"cause": "No Authorization header found"
}
}
"""

Scenario: Request with empty bearer token returns 401
Given The system is in default state
And I set the Authorization header to Bearer
When I use "infer" to ask question with authorization header
"""
{"question": "How do I list files?"}
"""
Then The status code of the response is 401
And The body of the response contains No token found in Authorization header

Scenario: Empty/whitespace question returns 422
Given The system is in default state
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
When I use "infer" to ask question with authorization header
"""
{"question": " "}
"""
Then The status code of the response is 422
And The body of the response contains Question cannot be empty

Scenario: Response contains valid structure (data.text, data.request_id)
Given The system is in default state
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
When I use "infer" to ask question with authorization header
"""
{"question": "What is RHEL?"}
"""
Then The status code of the response is 200
And The rlsapi response should have valid structure

Scenario: Multiple requests generate unique request_ids
Given The system is in default state
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
When I use "infer" to ask question with authorization header
"""
{"question": "First question"}
"""
Then The status code of the response is 200
And I store the rlsapi request_id
When I use "infer" to ask question with authorization header
"""
{"question": "Second question"}
"""
Then The status code of the response is 200
And The rlsapi request_id should be different from the stored one
49 changes: 49 additions & 0 deletions tests/e2e/features/rlsapi_v1_errors.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
@RBAC
Feature: rlsapi v1 /infer endpoint error response tests

Tests for error conditions on the rlsapi v1 /infer endpoint including
authorization failures (403) and service unavailability (503).

Background:
Given The service is started locally
And REST API service prefix is /v1

# ============================================
# Authorization - 403 Forbidden
# ============================================

Scenario: User without rlsapi_v1_infer permission returns 403
Given The system is in default state
And I authenticate as "viewer" user
When I use "infer" to ask question with authorization header
"""
{"question": "How do I list files?"}
"""
Then The status code of the response is 403
And The body of the response contains does not have permission

Scenario: User with rlsapi_v1_infer permission can access endpoint
Given The system is in default state
And I authenticate as "user" user
When I use "infer" to ask question with authorization header
"""
{"question": "How do I list files?"}
"""
Then The status code of the response is 200
And The rlsapi response should have valid structure

# ============================================
# Service Unavailable - 503
# ============================================

@skip-in-library-mode
Scenario: Returns 503 when llama-stack connection is broken
Given The system is in default state
And I authenticate as "user" user
And The llama-stack connection is disrupted
When I use "infer" to ask question with authorization header
"""
{"question": "How do I list files?"}
"""
Then The status code of the response is 503
And The body of the response contains Llama Stack
65 changes: 65 additions & 0 deletions tests/e2e/features/steps/rlsapi_v1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""rlsapi v1 endpoint test steps."""

from behave import then, step # pyright: ignore[reportAttributeAccessIssue]
from behave.runner import Context


@then("The rlsapi response should have valid structure")
def check_rlsapi_response_structure(context: Context) -> None:
"""Check that rlsapi v1 response has valid structure.

Validates that the response contains:
- data.text (non-empty string)
- data.request_id (non-empty string)
"""
assert context.response is not None, "Request needs to be performed first"
response_json = context.response.json()

assert "data" in response_json, "Response missing 'data' field"
data = response_json["data"]

assert "text" in data, "Response data missing 'text' field"
assert isinstance(data["text"], str), "data.text must be a string"
assert len(data["text"]) > 0, "data.text must not be empty"

assert "request_id" in data, "Response data missing 'request_id' field"
assert isinstance(data["request_id"], str), "data.request_id must be a string"
assert len(data["request_id"]) > 0, "data.request_id must not be empty"


@step("I store the rlsapi request_id")
def store_rlsapi_request_id(context: Context) -> None:
"""Store the request_id from rlsapi response for later comparison."""
assert context.response is not None, "Request needs to be performed first"
response_json = context.response.json()

assert "data" in response_json, "Response missing 'data' field"
assert "request_id" in response_json["data"], "Response data missing 'request_id'"
assert isinstance(
response_json["data"]["request_id"], str
), "data.request_id must be a string"
assert (
len(response_json["data"]["request_id"]) > 0
), "data.request_id must not be empty"

context.stored_request_id = response_json["data"]["request_id"]


@then("The rlsapi request_id should be different from the stored one")
def check_rlsapi_request_id_different(context: Context) -> None:
"""Verify that the current request_id differs from the stored one."""
assert context.response is not None, "Request needs to be performed first"
assert hasattr(context, "stored_request_id"), "No request_id was stored previously"

response_json = context.response.json()
assert "data" in response_json, "Response missing 'data' field"
assert "request_id" in response_json["data"], "Response data missing 'request_id'"

current_request_id = response_json["data"]["request_id"]
assert isinstance(current_request_id, str), "data.request_id must be a string"
assert len(current_request_id) > 0, "data.request_id must not be empty"
stored_request_id = context.stored_request_id

assert (
current_request_id != stored_request_id
), f"request_id should be unique, but got same value: {current_request_id}"
2 changes: 2 additions & 0 deletions tests/e2e/test_list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ features/feedback.feature
features/health.feature
features/info.feature
features/query.feature
features/rlsapi_v1.feature
features/rlsapi_v1_errors.feature
features/streaming_query.feature
features/rest_api.feature
features/models.feature
Loading
Loading