diff --git a/src/app/endpoints/rlsapi_v1.py b/src/app/endpoints/rlsapi_v1.py index 6528bfce4..49d45a0a1 100644 --- a/src/app/endpoints/rlsapi_v1.py +++ b/src/app/endpoints/rlsapi_v1.py @@ -122,39 +122,58 @@ def _build_instructions(systeminfo: RlsapiV1SystemInfo) -> str: return f"{base_prompt}\n\nUser's system: {system_context}" -def _get_default_model_id() -> str: - """Get the default model ID from configuration. +async def _get_default_model_id() -> str: + """Get the default model ID from configuration or auto-discovery. - Returns the model identifier in Llama Stack format (provider/model). + Model selection precedence: + 1. If default model and provider are configured, use them. + 2. Otherwise, query Llama Stack for available LLM models and select the first one. Returns: - The model identifier string. + The model identifier string in "provider/model" format. Raises: - HTTPException: If no model can be determined from configuration. + HTTPException: If no model can be determined from configuration or discovery. """ - if configuration.inference is None: - msg = "No inference configuration available" + # 1. Try configured defaults + if configuration.inference is not None: + model_id = configuration.inference.default_model + provider_id = configuration.inference.default_provider + + if model_id and provider_id: + return f"{provider_id}/{model_id}" + + # 2. Auto-discover from Llama Stack + client = AsyncLlamaStackClientHolder().get_client() + try: + models = await client.models.list() + except APIConnectionError as e: + error_response = ServiceUnavailableResponse( + backend_name="Llama Stack", + cause=str(e), + ) + raise HTTPException(**error_response.model_dump()) from e + except APIStatusError as e: + error_response = InternalServerErrorResponse.generic() + raise HTTPException(**error_response.model_dump()) from e + + llm_models = [ + m + for m in models + if m.custom_metadata and m.custom_metadata.get("model_type") == "llm" + ] + if not llm_models: + msg = "No LLM model found in available models" logger.error(msg) error_response = ServiceUnavailableResponse( - backend_name="inference service (configuration)", + backend_name="inference service", cause=msg, ) raise HTTPException(**error_response.model_dump()) - model_id = configuration.inference.default_model - provider_id = configuration.inference.default_provider - - if model_id and provider_id: - return f"{provider_id}/{model_id}" - - msg = "No default model configured for rlsapi v1 inference" - logger.error(msg) - error_response = ServiceUnavailableResponse( - backend_name="inference service (configuration)", - cause=msg, - ) - raise HTTPException(**error_response.model_dump()) + model = llm_models[0] + logger.info("Auto-discovered LLM model for rlsapi v1: %s", model.id) + return model.id async def retrieve_simple_response( @@ -178,7 +197,7 @@ async def retrieve_simple_response( HTTPException: 503 if no model is configured. """ client = AsyncLlamaStackClientHolder().get_client() - model_id = _get_default_model_id() + model_id = await _get_default_model_id() logger.debug("Using model %s for rlsapi v1 inference", model_id) @@ -306,7 +325,7 @@ async def infer_endpoint( input_source = infer_request.get_input_source() instructions = _build_instructions(infer_request.context.systeminfo) - model_id = _get_default_model_id() + model_id = await _get_default_model_id() mcp_tools = await get_mcp_tools() logger.debug( "Request %s: Combined input source length: %d", request_id, len(input_source) diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-auth-noop-token.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-auth-noop-token.yaml index 4ab62b2b5..0391b8ac6 100644 --- a/tests/e2e/configuration/library-mode/lightspeed-stack-auth-noop-token.yaml +++ b/tests/e2e/configuration/library-mode/lightspeed-stack-auth-noop-token.yaml @@ -23,3 +23,6 @@ conversation_cache: authentication: module: "noop-with-token" +inference: + default_provider: openai + default_model: gpt-4o-mini diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-rbac.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-rbac.yaml index b8aacaf16..ab03e7904 100644 --- a/tests/e2e/configuration/library-mode/lightspeed-stack-rbac.yaml +++ b/tests/e2e/configuration/library-mode/lightspeed-stack-rbac.yaml @@ -74,6 +74,7 @@ authorization: - "get_tools" - "info" - "model_override" + - "rlsapi_v1_infer" # Viewer role can only read (no mutations) - role: "viewer" actions: diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack.yaml index 118b917c5..bc5694578 100644 --- a/tests/e2e/configuration/library-mode/lightspeed-stack.yaml +++ b/tests/e2e/configuration/library-mode/lightspeed-stack.yaml @@ -17,6 +17,9 @@ user_data_collection: transcripts_storage: "/tmp/data/transcripts" authentication: module: "noop" +inference: + default_provider: openai + default_model: gpt-4o-mini mcp_servers: # Mock server with client-provided auth - should appear in mcp-auth/client-options response - name: "github-api" diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-auth-noop-token.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-auth-noop-token.yaml index 960919eda..642624020 100644 --- a/tests/e2e/configuration/server-mode/lightspeed-stack-auth-noop-token.yaml +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-auth-noop-token.yaml @@ -29,3 +29,6 @@ conversation_cache: authentication: module: "noop-with-token" +inference: + default_provider: openai + default_model: gpt-4o-mini diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-rbac.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-rbac.yaml index 7fd953952..21e4505e2 100644 --- a/tests/e2e/configuration/server-mode/lightspeed-stack-rbac.yaml +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-rbac.yaml @@ -75,6 +75,7 @@ authorization: - "get_tools" - "info" - "model_override" + - "rlsapi_v1_infer" # Viewer role can only read (no mutations) - role: "viewer" actions: diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack.yaml index 1dbef61cf..026c551de 100644 --- a/tests/e2e/configuration/server-mode/lightspeed-stack.yaml +++ b/tests/e2e/configuration/server-mode/lightspeed-stack.yaml @@ -18,6 +18,9 @@ user_data_collection: transcripts_storage: "/tmp/data/transcripts" authentication: module: "noop" +inference: + default_provider: openai + default_model: gpt-4o-mini mcp_servers: # Mock server with client-provided auth - should appear in mcp-auth/client-options response - name: "github-api" diff --git a/tests/e2e/features/rlsapi_v1.feature b/tests/e2e/features/rlsapi_v1.feature new file mode 100644 index 000000000..3b7d41afb --- /dev/null +++ b/tests/e2e/features/rlsapi_v1.feature @@ -0,0 +1,89 @@ +@Authorized +Feature: rlsapi v1 /infer endpoint API tests + + Background: + Given The service is started locally + And REST API service prefix is /v1 + + Scenario: Basic inference with minimal request (question only) + Given The system is in default state + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + When I use "infer" to ask question with authorization header + """ + {"question": "How do I list files in Linux?"} + """ + Then The status code of the response is 200 + And The rlsapi response should have valid structure + + Scenario: Inference with full context (systeminfo populated) + Given The system is in default state + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + When I use "infer" to ask question with authorization header + """ + {"question": "How do I configure SELinux?", "context": {"systeminfo": {"os": "RHEL", "version": "9.3", "arch": "x86_64"}}} + """ + Then The status code of the response is 200 + And The rlsapi response should have valid structure + + Scenario: Request without authorization returns 401 + Given The system is in default state + When I use "infer" to ask question + """ + {"question": "How do I list files?"} + """ + Then The status code of the response is 401 + And The body of the response is the following + """ + { + "detail": { + "response": "Missing or invalid credentials provided by client", + "cause": "No Authorization header found" + } + } + """ + + Scenario: Request with empty bearer token returns 401 + Given The system is in default state + And I set the Authorization header to Bearer + When I use "infer" to ask question with authorization header + """ + {"question": "How do I list files?"} + """ + Then The status code of the response is 401 + And The body of the response contains No token found in Authorization header + + Scenario: Empty/whitespace question returns 422 + Given The system is in default state + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + When I use "infer" to ask question with authorization header + """ + {"question": " "} + """ + Then The status code of the response is 422 + And The body of the response contains Question cannot be empty + + Scenario: Response contains valid structure (data.text, data.request_id) + Given The system is in default state + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + When I use "infer" to ask question with authorization header + """ + {"question": "What is RHEL?"} + """ + Then The status code of the response is 200 + And The rlsapi response should have valid structure + + Scenario: Multiple requests generate unique request_ids + Given The system is in default state + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + When I use "infer" to ask question with authorization header + """ + {"question": "First question"} + """ + Then The status code of the response is 200 + And I store the rlsapi request_id + When I use "infer" to ask question with authorization header + """ + {"question": "Second question"} + """ + Then The status code of the response is 200 + And The rlsapi request_id should be different from the stored one diff --git a/tests/e2e/features/rlsapi_v1_errors.feature b/tests/e2e/features/rlsapi_v1_errors.feature new file mode 100644 index 000000000..89668b006 --- /dev/null +++ b/tests/e2e/features/rlsapi_v1_errors.feature @@ -0,0 +1,49 @@ +@RBAC +Feature: rlsapi v1 /infer endpoint error response tests + + Tests for error conditions on the rlsapi v1 /infer endpoint including + authorization failures (403) and service unavailability (503). + + Background: + Given The service is started locally + And REST API service prefix is /v1 + + # ============================================ + # Authorization - 403 Forbidden + # ============================================ + + Scenario: User without rlsapi_v1_infer permission returns 403 + Given The system is in default state + And I authenticate as "viewer" user + When I use "infer" to ask question with authorization header + """ + {"question": "How do I list files?"} + """ + Then The status code of the response is 403 + And The body of the response contains does not have permission + + Scenario: User with rlsapi_v1_infer permission can access endpoint + Given The system is in default state + And I authenticate as "user" user + When I use "infer" to ask question with authorization header + """ + {"question": "How do I list files?"} + """ + Then The status code of the response is 200 + And The rlsapi response should have valid structure + + # ============================================ + # Service Unavailable - 503 + # ============================================ + + @skip-in-library-mode + Scenario: Returns 503 when llama-stack connection is broken + Given The system is in default state + And I authenticate as "user" user + And The llama-stack connection is disrupted + When I use "infer" to ask question with authorization header + """ + {"question": "How do I list files?"} + """ + Then The status code of the response is 503 + And The body of the response contains Llama Stack diff --git a/tests/e2e/features/steps/rlsapi_v1.py b/tests/e2e/features/steps/rlsapi_v1.py new file mode 100644 index 000000000..3444acb37 --- /dev/null +++ b/tests/e2e/features/steps/rlsapi_v1.py @@ -0,0 +1,65 @@ +"""rlsapi v1 endpoint test steps.""" + +from behave import then, step # pyright: ignore[reportAttributeAccessIssue] +from behave.runner import Context + + +@then("The rlsapi response should have valid structure") +def check_rlsapi_response_structure(context: Context) -> None: + """Check that rlsapi v1 response has valid structure. + + Validates that the response contains: + - data.text (non-empty string) + - data.request_id (non-empty string) + """ + assert context.response is not None, "Request needs to be performed first" + response_json = context.response.json() + + assert "data" in response_json, "Response missing 'data' field" + data = response_json["data"] + + assert "text" in data, "Response data missing 'text' field" + assert isinstance(data["text"], str), "data.text must be a string" + assert len(data["text"]) > 0, "data.text must not be empty" + + assert "request_id" in data, "Response data missing 'request_id' field" + assert isinstance(data["request_id"], str), "data.request_id must be a string" + assert len(data["request_id"]) > 0, "data.request_id must not be empty" + + +@step("I store the rlsapi request_id") +def store_rlsapi_request_id(context: Context) -> None: + """Store the request_id from rlsapi response for later comparison.""" + assert context.response is not None, "Request needs to be performed first" + response_json = context.response.json() + + assert "data" in response_json, "Response missing 'data' field" + assert "request_id" in response_json["data"], "Response data missing 'request_id'" + assert isinstance( + response_json["data"]["request_id"], str + ), "data.request_id must be a string" + assert ( + len(response_json["data"]["request_id"]) > 0 + ), "data.request_id must not be empty" + + context.stored_request_id = response_json["data"]["request_id"] + + +@then("The rlsapi request_id should be different from the stored one") +def check_rlsapi_request_id_different(context: Context) -> None: + """Verify that the current request_id differs from the stored one.""" + assert context.response is not None, "Request needs to be performed first" + assert hasattr(context, "stored_request_id"), "No request_id was stored previously" + + response_json = context.response.json() + assert "data" in response_json, "Response missing 'data' field" + assert "request_id" in response_json["data"], "Response data missing 'request_id'" + + current_request_id = response_json["data"]["request_id"] + assert isinstance(current_request_id, str), "data.request_id must be a string" + assert len(current_request_id) > 0, "data.request_id must not be empty" + stored_request_id = context.stored_request_id + + assert ( + current_request_id != stored_request_id + ), f"request_id should be unique, but got same value: {current_request_id}" diff --git a/tests/e2e/test_list.txt b/tests/e2e/test_list.txt index 398d824c2..69e277c51 100644 --- a/tests/e2e/test_list.txt +++ b/tests/e2e/test_list.txt @@ -10,6 +10,8 @@ features/feedback.feature features/health.feature features/info.feature features/query.feature +features/rlsapi_v1.feature +features/rlsapi_v1_errors.feature features/streaming_query.feature features/rest_api.feature features/models.feature diff --git a/tests/unit/app/endpoints/test_rlsapi_v1.py b/tests/unit/app/endpoints/test_rlsapi_v1.py index 52196b597..0273bb0a8 100644 --- a/tests/unit/app/endpoints/test_rlsapi_v1.py +++ b/tests/unit/app/endpoints/test_rlsapi_v1.py @@ -229,69 +229,88 @@ def test_build_instructions_no_customization(mocker: MockerFixture) -> None: # --- Test _get_default_model_id --- -def test_get_default_model_id_success(mock_configuration: AppConfig) -> None: +@pytest.mark.asyncio +async def test_get_default_model_id_success(mock_configuration: AppConfig) -> None: """Test _get_default_model_id returns properly formatted model ID.""" - model_id = _get_default_model_id() + model_id = await _get_default_model_id() assert model_id == "openai/gpt-4-turbo" @pytest.mark.parametrize( - ("config_setup", "expected_cause"), + "failure_mode", [ - pytest.param( - "missing_model", - "No default model configured", - id="missing_model_config", - ), - pytest.param( - "none_inference", - "No inference configuration available", - id="none_inference_config", - ), + pytest.param("no_llm_models", id="no_llm_models_found"), + pytest.param("connection_error", id="connection_error"), ], ) -def test_get_default_model_id_errors( +@pytest.mark.asyncio +async def test_get_default_model_id_errors( mocker: MockerFixture, minimal_config: AppConfig, - config_setup: str, - expected_cause: str, + failure_mode: str, ) -> None: - """Test _get_default_model_id raises HTTPException with ServiceUnavailableResponse shape.""" - if config_setup == "missing_model": - # Config exists but no model/provider defaults - mocker.patch("app.endpoints.rlsapi_v1.configuration", minimal_config) + """Test _get_default_model_id fallback failures raise 503 responses.""" + mocker.patch("app.endpoints.rlsapi_v1.configuration", minimal_config) + + mock_embedding_model = mocker.Mock() + mock_embedding_model.custom_metadata = {"model_type": "embedding"} + mock_embedding_model.id = "sentence-transformers/all-mpnet-base-v2" + + mock_client = mocker.Mock() + mock_client.models = mocker.Mock() + + if failure_mode == "no_llm_models": + mock_client.models.list = mocker.AsyncMock(return_value=[mock_embedding_model]) else: - # inference is None - mock_config = mocker.Mock() - mock_config.inference = None - mocker.patch("app.endpoints.rlsapi_v1.configuration", mock_config) + mock_client.models.list = mocker.AsyncMock( + side_effect=APIConnectionError(request=mocker.Mock()) + ) + + mock_client_holder = mocker.Mock() + mock_client_holder.get_client.return_value = mock_client + mocker.patch( + "app.endpoints.rlsapi_v1.AsyncLlamaStackClientHolder", + return_value=mock_client_holder, + ) with pytest.raises(HTTPException) as exc_info: - _get_default_model_id() + await _get_default_model_id() assert exc_info.value.status_code == 503 - assert expected_cause in str(exc_info.value.detail) - # Verify ServiceUnavailableResponse produces dict with response+cause keys detail: dict[str, str] = exc_info.value.detail # type: ignore[assignment] assert set(detail.keys()) == {"response", "cause"} -def test_config_error_503_matches_llm_error_503_shape( +@pytest.mark.asyncio +async def test_config_error_503_matches_llm_error_503_shape( mocker: MockerFixture, + minimal_config: AppConfig, ) -> None: - """Test that configuration error 503s have the same shape as LLM error 503s. + """Test that auto-discovery 503s have the same shape as LLM error 503s. - Both _get_default_model_id() configuration errors and APIConnectionError + Both _get_default_model_id() no-LLM auto-discovery errors and APIConnectionError handlers use ServiceUnavailableResponse, producing identical detail shapes with 'response' and 'cause' keys. """ - # Trigger a configuration error 503 - mock_config = mocker.Mock() - mock_config.inference = None - mocker.patch("app.endpoints.rlsapi_v1.configuration", mock_config) + mocker.patch("app.endpoints.rlsapi_v1.configuration", minimal_config) + + mock_embedding_model = mocker.Mock() + mock_embedding_model.custom_metadata = {"model_type": "embedding"} + mock_embedding_model.id = "sentence-transformers/all-mpnet-base-v2" + + mock_client = mocker.Mock() + mock_client.models = mocker.Mock() + mock_client.models.list = mocker.AsyncMock(return_value=[mock_embedding_model]) + + mock_client_holder = mocker.Mock() + mock_client_holder.get_client.return_value = mock_client + mocker.patch( + "app.endpoints.rlsapi_v1.AsyncLlamaStackClientHolder", + return_value=mock_client_holder, + ) with pytest.raises(HTTPException) as config_exc: - _get_default_model_id() + await _get_default_model_id() # Build an LLM connection error 503 using the same response model llm_response = ServiceUnavailableResponse( @@ -306,6 +325,39 @@ def test_config_error_503_matches_llm_error_503_shape( assert set(config_detail.keys()) == set(llm_detail.keys()) == {"response", "cause"} +@pytest.mark.asyncio +async def test_get_default_model_id_auto_discovery_success( + mocker: MockerFixture, minimal_config: AppConfig +) -> None: + """Test _get_default_model_id returns first discovered LLM model ID.""" + mocker.patch("app.endpoints.rlsapi_v1.configuration", minimal_config) + + mock_llm_model = mocker.Mock() + mock_llm_model.custom_metadata = {"model_type": "llm"} + mock_llm_model.id = "openai/gpt-4o-mini" + + mock_embedding_model = mocker.Mock() + mock_embedding_model.custom_metadata = {"model_type": "embedding"} + mock_embedding_model.id = "sentence-transformers/all-mpnet-base-v2" + + mock_client = mocker.Mock() + mock_client.models = mocker.Mock() + mock_client.models.list = mocker.AsyncMock( + return_value=[mock_embedding_model, mock_llm_model] + ) + + mock_client_holder = mocker.Mock() + mock_client_holder.get_client.return_value = mock_client + mocker.patch( + "app.endpoints.rlsapi_v1.AsyncLlamaStackClientHolder", + return_value=mock_client_holder, + ) + + model_id = await _get_default_model_id() + + assert model_id == "openai/gpt-4o-mini" + + # --- Test retrieve_simple_response ---