diff --git a/src/app/endpoints/rlsapi_v1.py b/src/app/endpoints/rlsapi_v1.py
index 6528bfce4..49d45a0a1 100644
--- a/src/app/endpoints/rlsapi_v1.py
+++ b/src/app/endpoints/rlsapi_v1.py
@@ -122,39 +122,58 @@ def _build_instructions(systeminfo: RlsapiV1SystemInfo) -> str:
     return f"{base_prompt}\n\nUser's system: {system_context}"
 
 
-def _get_default_model_id() -> str:
-    """Get the default model ID from configuration.
+async def _get_default_model_id() -> str:
+    """Get the default model ID from configuration or auto-discovery.
 
-    Returns the model identifier in Llama Stack format (provider/model).
+    Model selection precedence:
+    1. If default model and provider are configured, use them.
+    2. Otherwise, query Llama Stack for available LLM models and select the first one.
 
     Returns:
-        The model identifier string.
+        The model identifier string in "provider/model" format.
 
     Raises:
-        HTTPException: If no model can be determined from configuration.
+        HTTPException: If no model can be determined from configuration or discovery.
     """
-    if configuration.inference is None:
-        msg = "No inference configuration available"
+    # 1. Try configured defaults
+    if configuration.inference is not None:
+        model_id = configuration.inference.default_model
+        provider_id = configuration.inference.default_provider
+
+        if model_id and provider_id:
+            return f"{provider_id}/{model_id}"
+
+    # 2. Auto-discover from Llama Stack
+    client = AsyncLlamaStackClientHolder().get_client()
+    try:
+        models = await client.models.list()
+    except APIConnectionError as e:
+        error_response = ServiceUnavailableResponse(
+            backend_name="Llama Stack",
+            cause=str(e),
+        )
+        raise HTTPException(**error_response.model_dump()) from e
+    except APIStatusError as e:
+        error_response = InternalServerErrorResponse.generic()
+        raise HTTPException(**error_response.model_dump()) from e
+
+    llm_models = [
+        m
+        for m in models
+        if m.custom_metadata and m.custom_metadata.get("model_type") == "llm"
+    ]
+    if not llm_models:
+        msg = "No LLM model found in available models"
         logger.error(msg)
         error_response = ServiceUnavailableResponse(
-            backend_name="inference service (configuration)",
+            backend_name="inference service",
             cause=msg,
         )
         raise HTTPException(**error_response.model_dump())
 
-    model_id = configuration.inference.default_model
-    provider_id = configuration.inference.default_provider
-
-    if model_id and provider_id:
-        return f"{provider_id}/{model_id}"
-
-    msg = "No default model configured for rlsapi v1 inference"
-    logger.error(msg)
-    error_response = ServiceUnavailableResponse(
-        backend_name="inference service (configuration)",
-        cause=msg,
-    )
-    raise HTTPException(**error_response.model_dump())
+    model = llm_models[0]
+    logger.info("Auto-discovered LLM model for rlsapi v1: %s", model.id)
+    return model.id
 
 
 async def retrieve_simple_response(
@@ -178,7 +197,7 @@ async def retrieve_simple_response(
         HTTPException: 503 if no model is configured.
     """
     client = AsyncLlamaStackClientHolder().get_client()
-    model_id = _get_default_model_id()
+    model_id = await _get_default_model_id()
 
     logger.debug("Using model %s for rlsapi v1 inference", model_id)
 
@@ -306,7 +325,7 @@ async def infer_endpoint(
 
     input_source = infer_request.get_input_source()
     instructions = _build_instructions(infer_request.context.systeminfo)
-    model_id = _get_default_model_id()
+    model_id = await _get_default_model_id()
     mcp_tools = await get_mcp_tools()
     logger.debug(
         "Request %s: Combined input source length: %d", request_id, len(input_source)
diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-auth-noop-token.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-auth-noop-token.yaml
index 4ab62b2b5..0391b8ac6 100644
--- a/tests/e2e/configuration/library-mode/lightspeed-stack-auth-noop-token.yaml
+++ b/tests/e2e/configuration/library-mode/lightspeed-stack-auth-noop-token.yaml
@@ -23,3 +23,6 @@ conversation_cache:
 
 authentication:
   module: "noop-with-token"
+inference:
+  default_provider: openai
+  default_model: gpt-4o-mini
diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-rbac.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-rbac.yaml
index b8aacaf16..ab03e7904 100644
--- a/tests/e2e/configuration/library-mode/lightspeed-stack-rbac.yaml
+++ b/tests/e2e/configuration/library-mode/lightspeed-stack-rbac.yaml
@@ -74,6 +74,7 @@ authorization:
         - "get_tools"
         - "info"
         - "model_override"
+        - "rlsapi_v1_infer"
     # Viewer role can only read (no mutations)
     - role: "viewer"
       actions:
diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack.yaml
index 118b917c5..bc5694578 100644
--- a/tests/e2e/configuration/library-mode/lightspeed-stack.yaml
+++ b/tests/e2e/configuration/library-mode/lightspeed-stack.yaml
@@ -17,6 +17,9 @@ user_data_collection:
   transcripts_storage: "/tmp/data/transcripts"
 authentication:
   module: "noop"
+inference:
+  default_provider: openai
+  default_model: gpt-4o-mini
 mcp_servers:
   # Mock server with client-provided auth - should appear in mcp-auth/client-options response
   - name: "github-api"
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-auth-noop-token.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-auth-noop-token.yaml
index 960919eda..642624020 100644
--- a/tests/e2e/configuration/server-mode/lightspeed-stack-auth-noop-token.yaml
+++ b/tests/e2e/configuration/server-mode/lightspeed-stack-auth-noop-token.yaml
@@ -29,3 +29,6 @@ conversation_cache:
 
 authentication:
   module: "noop-with-token"
+inference:
+  default_provider: openai
+  default_model: gpt-4o-mini
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-rbac.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-rbac.yaml
index 7fd953952..21e4505e2 100644
--- a/tests/e2e/configuration/server-mode/lightspeed-stack-rbac.yaml
+++ b/tests/e2e/configuration/server-mode/lightspeed-stack-rbac.yaml
@@ -75,6 +75,7 @@ authorization:
         - "get_tools"
         - "info"
         - "model_override"
+        - "rlsapi_v1_infer"
     # Viewer role can only read (no mutations)
     - role: "viewer"
       actions:
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack.yaml
index 1dbef61cf..026c551de 100644
--- a/tests/e2e/configuration/server-mode/lightspeed-stack.yaml
+++ b/tests/e2e/configuration/server-mode/lightspeed-stack.yaml
@@ -18,6 +18,9 @@ user_data_collection:
   transcripts_storage: "/tmp/data/transcripts"
 authentication:
   module: "noop"
+inference:
+  default_provider: openai
+  default_model: gpt-4o-mini
 mcp_servers:
   # Mock server with client-provided auth - should appear in mcp-auth/client-options response
   - name: "github-api"
diff --git a/tests/e2e/features/rlsapi_v1.feature b/tests/e2e/features/rlsapi_v1.feature
new file mode 100644
index 000000000..3b7d41afb
--- /dev/null
+++ b/tests/e2e/features/rlsapi_v1.feature
@@ -0,0 +1,89 @@
+@Authorized
+Feature: rlsapi v1 /infer endpoint API tests
+
+  Background:
+    Given The service is started locally
+      And REST API service prefix is /v1
+
+  Scenario: Basic inference with minimal request (question only)
+    Given The system is in default state
+    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    When I use "infer" to ask question with authorization header
+    """
+    {"question": "How do I list files in Linux?"}
+    """
+    Then The status code of the response is 200
+    And The rlsapi response should have valid structure
+
+  Scenario: Inference with full context (systeminfo populated)
+    Given The system is in default state
+    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    When I use "infer" to ask question with authorization header
+    """
+    {"question": "How do I configure SELinux?", "context": {"systeminfo": {"os": "RHEL", "version": "9.3", "arch": "x86_64"}}}
+    """
+    Then The status code of the response is 200
+    And The rlsapi response should have valid structure
+
+  Scenario: Request without authorization returns 401
+    Given The system is in default state
+    When I use "infer" to ask question
+    """
+    {"question": "How do I list files?"}
+    """
+    Then The status code of the response is 401
+    And The body of the response is the following
+    """
+    {
+      "detail": {
+        "response": "Missing or invalid credentials provided by client",
+        "cause": "No Authorization header found"
+      }
+    }
+    """
+
+  Scenario: Request with empty bearer token returns 401
+    Given The system is in default state
+    And I set the Authorization header to Bearer
+    When I use "infer" to ask question with authorization header
+    """
+    {"question": "How do I list files?"}
+    """
+    Then The status code of the response is 401
+    And The body of the response contains No token found in Authorization header
+
+  Scenario: Empty/whitespace question returns 422
+    Given The system is in default state
+    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    When I use "infer" to ask question with authorization header
+    """
+    {"question": "   "}
+    """
+    Then The status code of the response is 422
+    And The body of the response contains Question cannot be empty
+
+  Scenario: Response contains valid structure (data.text, data.request_id)
+    Given The system is in default state
+    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    When I use "infer" to ask question with authorization header
+    """
+    {"question": "What is RHEL?"}
+    """
+    Then The status code of the response is 200
+    And The rlsapi response should have valid structure
+
+  Scenario: Multiple requests generate unique request_ids
+    Given The system is in default state
+    And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
+    When I use "infer" to ask question with authorization header
+    """
+    {"question": "First question"}
+    """
+    Then The status code of the response is 200
+    And I store the rlsapi request_id
+    When I use "infer" to ask question with authorization header
+    """
+    {"question": "Second question"}
+    """
+    Then The status code of the response is 200
+    And The rlsapi request_id should be different from the stored one
diff --git a/tests/e2e/features/rlsapi_v1_errors.feature b/tests/e2e/features/rlsapi_v1_errors.feature
new file mode 100644
index 000000000..89668b006
--- /dev/null
+++ b/tests/e2e/features/rlsapi_v1_errors.feature
@@ -0,0 +1,49 @@
+@RBAC
+Feature: rlsapi v1 /infer endpoint error response tests
+
+  Tests for error conditions on the rlsapi v1 /infer endpoint including
+  authorization failures (403) and service unavailability (503).
+
+  Background:
+    Given The service is started locally
+      And REST API service prefix is /v1
+
+  # ============================================
+  # Authorization - 403 Forbidden
+  # ============================================
+
+  Scenario: User without rlsapi_v1_infer permission returns 403
+    Given The system is in default state
+      And I authenticate as "viewer" user
+     When I use "infer" to ask question with authorization header
+      """
+      {"question": "How do I list files?"}
+      """
+     Then The status code of the response is 403
+      And The body of the response contains does not have permission
+
+  Scenario: User with rlsapi_v1_infer permission can access endpoint
+    Given The system is in default state
+      And I authenticate as "user" user
+     When I use "infer" to ask question with authorization header
+      """
+      {"question": "How do I list files?"}
+      """
+     Then The status code of the response is 200
+      And The rlsapi response should have valid structure
+
+  # ============================================
+  # Service Unavailable - 503
+  # ============================================
+
+  @skip-in-library-mode
+  Scenario: Returns 503 when llama-stack connection is broken
+    Given The system is in default state
+      And I authenticate as "user" user
+      And The llama-stack connection is disrupted
+     When I use "infer" to ask question with authorization header
+      """
+      {"question": "How do I list files?"}
+      """
+     Then The status code of the response is 503
+      And The body of the response contains Llama Stack
diff --git a/tests/e2e/features/steps/rlsapi_v1.py b/tests/e2e/features/steps/rlsapi_v1.py
new file mode 100644
index 000000000..3444acb37
--- /dev/null
+++ b/tests/e2e/features/steps/rlsapi_v1.py
@@ -0,0 +1,65 @@
+"""rlsapi v1 endpoint test steps."""
+
+from behave import then, step  # pyright: ignore[reportAttributeAccessIssue]
+from behave.runner import Context
+
+
+@then("The rlsapi response should have valid structure")
+def check_rlsapi_response_structure(context: Context) -> None:
+    """Check that rlsapi v1 response has valid structure.
+
+    Validates that the response contains:
+    - data.text (non-empty string)
+    - data.request_id (non-empty string)
+    """
+    assert context.response is not None, "Request needs to be performed first"
+    response_json = context.response.json()
+
+    assert "data" in response_json, "Response missing 'data' field"
+    data = response_json["data"]
+
+    assert "text" in data, "Response data missing 'text' field"
+    assert isinstance(data["text"], str), "data.text must be a string"
+    assert len(data["text"]) > 0, "data.text must not be empty"
+
+    assert "request_id" in data, "Response data missing 'request_id' field"
+    assert isinstance(data["request_id"], str), "data.request_id must be a string"
+    assert len(data["request_id"]) > 0, "data.request_id must not be empty"
+
+
+@step("I store the rlsapi request_id")
+def store_rlsapi_request_id(context: Context) -> None:
+    """Store the request_id from rlsapi response for later comparison."""
+    assert context.response is not None, "Request needs to be performed first"
+    response_json = context.response.json()
+
+    assert "data" in response_json, "Response missing 'data' field"
+    assert "request_id" in response_json["data"], "Response data missing 'request_id'"
+    assert isinstance(
+        response_json["data"]["request_id"], str
+    ), "data.request_id must be a string"
+    assert (
+        len(response_json["data"]["request_id"]) > 0
+    ), "data.request_id must not be empty"
+
+    context.stored_request_id = response_json["data"]["request_id"]
+
+
+@then("The rlsapi request_id should be different from the stored one")
+def check_rlsapi_request_id_different(context: Context) -> None:
+    """Verify that the current request_id differs from the stored one."""
+    assert context.response is not None, "Request needs to be performed first"
+    assert hasattr(context, "stored_request_id"), "No request_id was stored previously"
+
+    response_json = context.response.json()
+    assert "data" in response_json, "Response missing 'data' field"
+    assert "request_id" in response_json["data"], "Response data missing 'request_id'"
+
+    current_request_id = response_json["data"]["request_id"]
+    assert isinstance(current_request_id, str), "data.request_id must be a string"
+    assert len(current_request_id) > 0, "data.request_id must not be empty"
+    stored_request_id = context.stored_request_id
+
+    assert (
+        current_request_id != stored_request_id
+    ), f"request_id should be unique, but got same value: {current_request_id}"
diff --git a/tests/e2e/test_list.txt b/tests/e2e/test_list.txt
index 398d824c2..69e277c51 100644
--- a/tests/e2e/test_list.txt
+++ b/tests/e2e/test_list.txt
@@ -10,6 +10,8 @@ features/feedback.feature
 features/health.feature
 features/info.feature
 features/query.feature
+features/rlsapi_v1.feature
+features/rlsapi_v1_errors.feature
 features/streaming_query.feature
 features/rest_api.feature
 features/models.feature
diff --git a/tests/unit/app/endpoints/test_rlsapi_v1.py b/tests/unit/app/endpoints/test_rlsapi_v1.py
index 52196b597..0273bb0a8 100644
--- a/tests/unit/app/endpoints/test_rlsapi_v1.py
+++ b/tests/unit/app/endpoints/test_rlsapi_v1.py
@@ -229,69 +229,88 @@ def test_build_instructions_no_customization(mocker: MockerFixture) -> None:
 # --- Test _get_default_model_id ---
 
 
-def test_get_default_model_id_success(mock_configuration: AppConfig) -> None:
+@pytest.mark.asyncio
+async def test_get_default_model_id_success(mock_configuration: AppConfig) -> None:
     """Test _get_default_model_id returns properly formatted model ID."""
-    model_id = _get_default_model_id()
+    model_id = await _get_default_model_id()
     assert model_id == "openai/gpt-4-turbo"
 
 
 @pytest.mark.parametrize(
-    ("config_setup", "expected_cause"),
+    "failure_mode",
     [
-        pytest.param(
-            "missing_model",
-            "No default model configured",
-            id="missing_model_config",
-        ),
-        pytest.param(
-            "none_inference",
-            "No inference configuration available",
-            id="none_inference_config",
-        ),
+        pytest.param("no_llm_models", id="no_llm_models_found"),
+        pytest.param("connection_error", id="connection_error"),
     ],
 )
-def test_get_default_model_id_errors(
+@pytest.mark.asyncio
+async def test_get_default_model_id_errors(
     mocker: MockerFixture,
     minimal_config: AppConfig,
-    config_setup: str,
-    expected_cause: str,
+    failure_mode: str,
 ) -> None:
-    """Test _get_default_model_id raises HTTPException with ServiceUnavailableResponse shape."""
-    if config_setup == "missing_model":
-        # Config exists but no model/provider defaults
-        mocker.patch("app.endpoints.rlsapi_v1.configuration", minimal_config)
+    """Test _get_default_model_id fallback failures raise 503 responses."""
+    mocker.patch("app.endpoints.rlsapi_v1.configuration", minimal_config)
+
+    mock_embedding_model = mocker.Mock()
+    mock_embedding_model.custom_metadata = {"model_type": "embedding"}
+    mock_embedding_model.id = "sentence-transformers/all-mpnet-base-v2"
+
+    mock_client = mocker.Mock()
+    mock_client.models = mocker.Mock()
+
+    if failure_mode == "no_llm_models":
+        mock_client.models.list = mocker.AsyncMock(return_value=[mock_embedding_model])
     else:
-        # inference is None
-        mock_config = mocker.Mock()
-        mock_config.inference = None
-        mocker.patch("app.endpoints.rlsapi_v1.configuration", mock_config)
+        mock_client.models.list = mocker.AsyncMock(
+            side_effect=APIConnectionError(request=mocker.Mock())
+        )
+
+    mock_client_holder = mocker.Mock()
+    mock_client_holder.get_client.return_value = mock_client
+    mocker.patch(
+        "app.endpoints.rlsapi_v1.AsyncLlamaStackClientHolder",
+        return_value=mock_client_holder,
+    )
 
     with pytest.raises(HTTPException) as exc_info:
-        _get_default_model_id()
+        await _get_default_model_id()
 
     assert exc_info.value.status_code == 503
-    assert expected_cause in str(exc_info.value.detail)
-    # Verify ServiceUnavailableResponse produces dict with response+cause keys
     detail: dict[str, str] = exc_info.value.detail  # type: ignore[assignment]
     assert set(detail.keys()) == {"response", "cause"}
 
 
-def test_config_error_503_matches_llm_error_503_shape(
+@pytest.mark.asyncio
+async def test_config_error_503_matches_llm_error_503_shape(
     mocker: MockerFixture,
+    minimal_config: AppConfig,
 ) -> None:
-    """Test that configuration error 503s have the same shape as LLM error 503s.
+    """Test that auto-discovery 503s have the same shape as LLM error 503s.
 
-    Both _get_default_model_id() configuration errors and APIConnectionError
+    Both _get_default_model_id() no-LLM auto-discovery errors and APIConnectionError
     handlers use ServiceUnavailableResponse, producing identical detail shapes
     with 'response' and 'cause' keys.
     """
-    # Trigger a configuration error 503
-    mock_config = mocker.Mock()
-    mock_config.inference = None
-    mocker.patch("app.endpoints.rlsapi_v1.configuration", mock_config)
+    mocker.patch("app.endpoints.rlsapi_v1.configuration", minimal_config)
+
+    mock_embedding_model = mocker.Mock()
+    mock_embedding_model.custom_metadata = {"model_type": "embedding"}
+    mock_embedding_model.id = "sentence-transformers/all-mpnet-base-v2"
+
+    mock_client = mocker.Mock()
+    mock_client.models = mocker.Mock()
+    mock_client.models.list = mocker.AsyncMock(return_value=[mock_embedding_model])
+
+    mock_client_holder = mocker.Mock()
+    mock_client_holder.get_client.return_value = mock_client
+    mocker.patch(
+        "app.endpoints.rlsapi_v1.AsyncLlamaStackClientHolder",
+        return_value=mock_client_holder,
+    )
 
     with pytest.raises(HTTPException) as config_exc:
-        _get_default_model_id()
+        await _get_default_model_id()
 
     # Build an LLM connection error 503 using the same response model
     llm_response = ServiceUnavailableResponse(
@@ -306,6 +325,39 @@ def test_config_error_503_matches_llm_error_503_shape(
     assert set(config_detail.keys()) == set(llm_detail.keys()) == {"response", "cause"}
 
 
+@pytest.mark.asyncio
+async def test_get_default_model_id_auto_discovery_success(
+    mocker: MockerFixture, minimal_config: AppConfig
+) -> None:
+    """Test _get_default_model_id returns first discovered LLM model ID."""
+    mocker.patch("app.endpoints.rlsapi_v1.configuration", minimal_config)
+
+    mock_llm_model = mocker.Mock()
+    mock_llm_model.custom_metadata = {"model_type": "llm"}
+    mock_llm_model.id = "openai/gpt-4o-mini"
+
+    mock_embedding_model = mocker.Mock()
+    mock_embedding_model.custom_metadata = {"model_type": "embedding"}
+    mock_embedding_model.id = "sentence-transformers/all-mpnet-base-v2"
+
+    mock_client = mocker.Mock()
+    mock_client.models = mocker.Mock()
+    mock_client.models.list = mocker.AsyncMock(
+        return_value=[mock_embedding_model, mock_llm_model]
+    )
+
+    mock_client_holder = mocker.Mock()
+    mock_client_holder.get_client.return_value = mock_client
+    mocker.patch(
+        "app.endpoints.rlsapi_v1.AsyncLlamaStackClientHolder",
+        return_value=mock_client_holder,
+    )
+
+    model_id = await _get_default_model_id()
+
+    assert model_id == "openai/gpt-4o-mini"
+
+
 # --- Test retrieve_simple_response ---