lightspeed-core · major · Jan 27, 2026 · Feb 25, 2026 · coderabbitai · Feb 25, 2026
diff --git a/src/app/endpoints/rlsapi_v1.py b/src/app/endpoints/rlsapi_v1.py
@@ -122,39 +122,58 @@ def _build_instructions(systeminfo: RlsapiV1SystemInfo) -> str:
     return f"{base_prompt}\n\nUser's system: {system_context}"
 
 
-def _get_default_model_id() -> str:
-    """Get the default model ID from configuration.
+async def _get_default_model_id() -> str:
+    """Get the default model ID from configuration or auto-discovery.
 
-    Returns the model identifier in Llama Stack format (provider/model).
+    Model selection precedence:
+    1. If default model and provider are configured, use them.
+    2. Otherwise, query Llama Stack for available LLM models and select the first one.
 
     Returns:
-        The model identifier string.
+        The model identifier string in "provider/model" format.
 
     Raises:
-        HTTPException: If no model can be determined from configuration.
+        HTTPException: If no model can be determined from configuration or discovery.
     """
-    if configuration.inference is None:
-        msg = "No inference configuration available"
+    # 1. Try configured defaults
+    if configuration.inference is not None:
+        model_id = configuration.inference.default_model
+        provider_id = configuration.inference.default_provider
+
+        if model_id and provider_id:
+            return f"{provider_id}/{model_id}"
+
+    # 2. Auto-discover from Llama Stack
+    client = AsyncLlamaStackClientHolder().get_client()
+    try:
+        models = await client.models.list()
+    except APIConnectionError as e:
+        error_response = ServiceUnavailableResponse(
+            backend_name="Llama Stack",
+            cause=str(e),
+        )
+        raise HTTPException(**error_response.model_dump()) from e
+    except APIStatusError as e:
+        error_response = InternalServerErrorResponse.generic()
+        raise HTTPException(**error_response.model_dump()) from e
+
+    llm_models = [
+        m
+        for m in models
+        if m.custom_metadata and m.custom_metadata.get("model_type") == "llm"
+    ]
+    if not llm_models:
+        msg = "No LLM model found in available models"
         logger.error(msg)
         error_response = ServiceUnavailableResponse(
-            backend_name="inference service (configuration)",
+            backend_name="inference service",
             cause=msg,
         )
         raise HTTPException(**error_response.model_dump())
 
-    model_id = configuration.inference.default_model
-    provider_id = configuration.inference.default_provider
-
-    if model_id and provider_id:
-        return f"{provider_id}/{model_id}"
-
-    msg = "No default model configured for rlsapi v1 inference"
-    logger.error(msg)
-    error_response = ServiceUnavailableResponse(
-        backend_name="inference service (configuration)",
-        cause=msg,
-    )
-    raise HTTPException(**error_response.model_dump())
+    model = llm_models[0]
+    logger.info("Auto-discovered LLM model for rlsapi v1: %s", model.id)
+    return model.id
 
 
 async def retrieve_simple_response(
@@ -178,7 +197,7 @@ async def retrieve_simple_response(
         HTTPException: 503 if no model is configured.
     """
     client = AsyncLlamaStackClientHolder().get_client()
-    model_id = _get_default_model_id()
+    model_id = await _get_default_model_id()
 
     logger.debug("Using model %s for rlsapi v1 inference", model_id)
 
@@ -306,7 +325,7 @@ async def infer_endpoint(
 
     input_source = infer_request.get_input_source()
     instructions = _build_instructions(infer_request.context.systeminfo)
-    model_id = _get_default_model_id()
+    model_id = await _get_default_model_id()
     mcp_tools = await get_mcp_tools()
     logger.debug(
         "Request %s: Combined input source length: %d", request_id, len(input_source)

diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-auth-noop-token.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-auth-noop-token.yaml
@@ -23,3 +23,6 @@ conversation_cache:
 
 authentication:
   module: "noop-with-token"
+inference:
+  default_provider: openai
+  default_model: gpt-4o-mini
diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-rbac.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-rbac.yaml
@@ -74,6 +74,7 @@ authorization:
         - "get_tools"
         - "info"
         - "model_override"
+        - "rlsapi_v1_infer"
     # Viewer role can only read (no mutations)
     - role: "viewer"
       actions:

diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack.yaml
@@ -17,6 +17,9 @@ user_data_collection:
   transcripts_storage: "/tmp/data/transcripts"
 authentication:
   module: "noop"
+inference:
+  default_provider: openai
+  default_model: gpt-4o-mini
 mcp_servers:
   # Mock server with client-provided auth - should appear in mcp-auth/client-options response
   - name: "github-api"

diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-auth-noop-token.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-auth-noop-token.yaml
@@ -29,3 +29,6 @@ conversation_cache:
 
 authentication:
   module: "noop-with-token"
+inference:
+  default_provider: openai
+  default_model: gpt-4o-mini
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-rbac.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-rbac.yaml
@@ -75,6 +75,7 @@ authorization:
         - "get_tools"
         - "info"
         - "model_override"
+        - "rlsapi_v1_infer"
     # Viewer role can only read (no mutations)
     - role: "viewer"
       actions:

diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack.yaml
@@ -18,6 +18,9 @@ user_data_collection:
   transcripts_storage: "/tmp/data/transcripts"
 authentication:
   module: "noop"
+inference:
+  default_provider: openai
+  default_model: gpt-4o-mini
 mcp_servers:
   # Mock server with client-provided auth - should appear in mcp-auth/client-options response
   - name: "github-api"

diff --git a/tests/e2e/features/rlsapi_v1.feature b/tests/e2e/features/rlsapi_v1.feature
@@ -0,0 +1,89 @@
+@Authorized
+Feature: rlsapi v1 /infer endpoint API tests
+
+  Background:
+    Given The service is started locally
+      And REST API service prefix is /v1
+
+  Scenario: Basic inference with minimal request (question only)
+    Given The system is in default state
+    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    When I use "infer" to ask question with authorization header
+    """
+    {"question": "How do I list files in Linux?"}
+    """
+    Then The status code of the response is 200
+    And The rlsapi response should have valid structure
+
+  Scenario: Inference with full context (systeminfo populated)
+    Given The system is in default state
+    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    When I use "infer" to ask question with authorization header
+    """
+    {"question": "How do I configure SELinux?", "context": {"systeminfo": {"os": "RHEL", "version": "9.3", "arch": "x86_64"}}}
+    """
+    Then The status code of the response is 200
+    And The rlsapi response should have valid structure
+
+  Scenario: Request without authorization returns 401
+    Given The system is in default state
+    When I use "infer" to ask question
+    """
+    {"question": "How do I list files?"}
+    """
+    Then The status code of the response is 401
+    And The body of the response is the following
+    """
+    {
+      "detail": {
+        "response": "Missing or invalid credentials provided by client",
+        "cause": "No Authorization header found"
+      }
+    }
+    """
+
+  Scenario: Request with empty bearer token returns 401
+    Given The system is in default state
+    And I set the Authorization header to Bearer
+    When I use "infer" to ask question with authorization header
+    """
+    {"question": "How do I list files?"}
+    """
+    Then The status code of the response is 401
+    And The body of the response contains No token found in Authorization header
+
+  Scenario: Empty/whitespace question returns 422
+    Given The system is in default state
+    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    When I use "infer" to ask question with authorization header
+    """
+    {"question": "   "}
+    """
+    Then The status code of the response is 422
+    And The body of the response contains Question cannot be empty
+
+  Scenario: Response contains valid structure (data.text, data.request_id)
+    Given The system is in default state
+    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    When I use "infer" to ask question with authorization header
+    """
+    {"question": "What is RHEL?"}
+    """
+    Then The status code of the response is 200
+    And The rlsapi response should have valid structure
+
+  Scenario: Multiple requests generate unique request_ids
+    Given The system is in default state
+    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    When I use "infer" to ask question with authorization header
+    """
+    {"question": "First question"}
+    """
+    Then The status code of the response is 200
+    And I store the rlsapi request_id
+    When I use "infer" to ask question with authorization header
+    """
+    {"question": "Second question"}
+    """
+    Then The status code of the response is 200
+    And The rlsapi request_id should be different from the stored one
diff --git a/tests/e2e/features/rlsapi_v1_errors.feature b/tests/e2e/features/rlsapi_v1_errors.feature
@@ -0,0 +1,49 @@
+@RBAC
+Feature: rlsapi v1 /infer endpoint error response tests
+
+  Tests for error conditions on the rlsapi v1 /infer endpoint including
+  authorization failures (403) and service unavailability (503).
+
+  Background:
+    Given The service is started locally
+      And REST API service prefix is /v1
+
+  # ============================================
+  # Authorization - 403 Forbidden
+  # ============================================
+
+  Scenario: User without rlsapi_v1_infer permission returns 403
+    Given The system is in default state
+      And I authenticate as "viewer" user
+     When I use "infer" to ask question with authorization header
+      """
+      {"question": "How do I list files?"}
+      """
+     Then The status code of the response is 403
+      And The body of the response contains does not have permission
+
+  Scenario: User with rlsapi_v1_infer permission can access endpoint
+    Given The system is in default state
+      And I authenticate as "user" user
+     When I use "infer" to ask question with authorization header
+      """
+      {"question": "How do I list files?"}
+      """
+     Then The status code of the response is 200
+      And The rlsapi response should have valid structure
+
+  # ============================================
+  # Service Unavailable - 503
+  # ============================================
+
+  @skip-in-library-mode
+  Scenario: Returns 503 when llama-stack connection is broken
+    Given The system is in default state
+      And I authenticate as "user" user
+      And The llama-stack connection is disrupted
+     When I use "infer" to ask question with authorization header
+      """
+      {"question": "How do I list files?"}
+      """
+     Then The status code of the response is 503
+      And The body of the response contains Llama Stack
diff --git a/tests/e2e/features/steps/rlsapi_v1.py b/tests/e2e/features/steps/rlsapi_v1.py
@@ -0,0 +1,65 @@
+"""rlsapi v1 endpoint test steps."""
+
+from behave import then, step  # pyright: ignore[reportAttributeAccessIssue]
+from behave.runner import Context
+
+
+@then("The rlsapi response should have valid structure")
+def check_rlsapi_response_structure(context: Context) -> None:
+    """Check that rlsapi v1 response has valid structure.
+
+    Validates that the response contains:
+    - data.text (non-empty string)
+    - data.request_id (non-empty string)
+    """
+    assert context.response is not None, "Request needs to be performed first"
+    response_json = context.response.json()
+
+    assert "data" in response_json, "Response missing 'data' field"
+    data = response_json["data"]
+
+    assert "text" in data, "Response data missing 'text' field"
+    assert isinstance(data["text"], str), "data.text must be a string"
+    assert len(data["text"]) > 0, "data.text must not be empty"
+
+    assert "request_id" in data, "Response data missing 'request_id' field"
+    assert isinstance(data["request_id"], str), "data.request_id must be a string"
+    assert len(data["request_id"]) > 0, "data.request_id must not be empty"
+
+
+@step("I store the rlsapi request_id")
+def store_rlsapi_request_id(context: Context) -> None:
+    """Store the request_id from rlsapi response for later comparison."""
+    assert context.response is not None, "Request needs to be performed first"
+    response_json = context.response.json()
+
+    assert "data" in response_json, "Response missing 'data' field"
+    assert "request_id" in response_json["data"], "Response data missing 'request_id'"
+    assert isinstance(
+        response_json["data"]["request_id"], str
+    ), "data.request_id must be a string"
+    assert (
+        len(response_json["data"]["request_id"]) > 0
+    ), "data.request_id must not be empty"
+
+    context.stored_request_id = response_json["data"]["request_id"]
+
+
+@then("The rlsapi request_id should be different from the stored one")
+def check_rlsapi_request_id_different(context: Context) -> None:
+    """Verify that the current request_id differs from the stored one."""
+    assert context.response is not None, "Request needs to be performed first"
+    assert hasattr(context, "stored_request_id"), "No request_id was stored previously"
+
+    response_json = context.response.json()
+    assert "data" in response_json, "Response missing 'data' field"
+    assert "request_id" in response_json["data"], "Response data missing 'request_id'"
+
+    current_request_id = response_json["data"]["request_id"]
+    assert isinstance(current_request_id, str), "data.request_id must be a string"
+    assert len(current_request_id) > 0, "data.request_id must not be empty"
+    stored_request_id = context.stored_request_id
+
+    assert (
+        current_request_id != stored_request_id
+    ), f"request_id should be unique, but got same value: {current_request_id}"
diff --git a/tests/e2e/test_list.txt b/tests/e2e/test_list.txt
@@ -10,6 +10,8 @@ features/feedback.feature
 features/health.feature
 features/info.feature
 features/query.feature
+features/rlsapi_v1.feature
+features/rlsapi_v1_errors.feature
 features/streaming_query.feature
 features/rest_api.feature
 features/models.feature