From 89a2ddd5a05ddda9a19b30ea1e311e0399d30157 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Benjamin=20Egelund-M=C3=BCller?= <b@egelund-muller.com>
Date: Thu, 28 May 2026 13:38:27 +0100
Subject: [PATCH 1/5] Improve AI timeout handling and error messages

---
 admin/server/server.go     |  2 +-
 runtime/ai/ai.go           | 10 +++++++++-
 runtime/ai/router_agent.go | 18 +++++++++++++++---
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/admin/server/server.go b/admin/server/server.go
index 24066a0e22b..6b8fed0555c 100644
--- a/admin/server/server.go
+++ b/admin/server/server.go
@@ -411,7 +411,7 @@ func (s *Server) jwtAttributesForService(ctx context.Context, serviceID string,
 
 func timeoutSelector(fullMethodName string) time.Duration {
 	if strings.HasPrefix(fullMethodName, "/rill.admin.v1.AIService") {
-		return time.Minute * 2
+		return time.Minute * 10
 	}
 	if fullMethodName == "/rill.admin.v1.AdminService/DeleteProject" {
 		return time.Minute * 4
diff --git a/runtime/ai/ai.go b/runtime/ai/ai.go
index 00c27a5b425..a401608776c 100644
--- a/runtime/ai/ai.go
+++ b/runtime/ai/ai.go
@@ -27,6 +27,8 @@ import (
 	semconv "go.opentelemetry.io/otel/semconv/v1.17.0"
 	"go.opentelemetry.io/otel/trace"
 	"go.uber.org/zap"
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/status"
 	"google.golang.org/protobuf/types/known/structpb"
 )
 
@@ -1278,9 +1280,15 @@ func (s *Session) Complete(ctx context.Context, name string, out any, opts *Comp
 
 			// Handle LLM completion error
 			if err != nil {
-				if errors.Is(err, llmCtx.Err()) && errors.Is(err, context.DeadlineExceeded) {
+				if errors.Is(err, llmCtx.Err()) && errors.Is(err, context.DeadlineExceeded) { // Timeout from local ctx.
 					return nil, fmt.Errorf("LLM request timed out after %s: %w", llmRequestTimeout, err)
 				}
+				if status.Code(err) == codes.DeadlineExceeded { // Timeout from admin service.
+					return nil, fmt.Errorf("LLM request timed out: %w", err)
+				}
+				if errors.Is(err, ctx.Err()) {
+					return nil, ctx.Err()
+				}
 				return nil, fmt.Errorf("completion failed: %w (stack: %s)", err, string(debug.Stack()))
 			}
 
diff --git a/runtime/ai/router_agent.go b/runtime/ai/router_agent.go
index 590c9a158ae..2ef9f50161c 100644
--- a/runtime/ai/router_agent.go
+++ b/runtime/ai/router_agent.go
@@ -2,6 +2,7 @@ package ai
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"regexp"
 	"slices"
@@ -165,7 +166,7 @@ func (t *RouterAgent) Handler(ctx context.Context, args *RouterAgentArgs) (*Rout
 			Args: analystAgentArgs,
 		})
 		if err != nil {
-			return nil, err
+			return nil, mapAgentErr(err)
 		}
 		return &RouterAgentResult{Response: res.Response, Agent: args.Agent}, nil
 
@@ -184,7 +185,7 @@ func (t *RouterAgent) Handler(ctx context.Context, args *RouterAgentArgs) (*Rout
 			Args: developerAgentArgs,
 		})
 		if err != nil {
-			return nil, err
+			return nil, mapAgentErr(err)
 		}
 		return &RouterAgentResult{Response: res.Response, Agent: args.Agent}, nil
 
@@ -197,7 +198,7 @@ func (t *RouterAgent) Handler(ctx context.Context, args *RouterAgentArgs) (*Rout
 			Args: args.FeedbackAgentArgs,
 		})
 		if err != nil {
-			return nil, err
+			return nil, mapAgentErr(err)
 		}
 		return &RouterAgentResult{Response: res.Response, Agent: FeedbackAgentName}, nil
 	}
@@ -245,6 +246,17 @@ func promptToTitle(message string) string {
 	return title
 }
 
+// mapAgentErr maps common agent errors to more user-friendly messages.
+func mapAgentErr(err error) error {
+	if errors.Is(err, context.Canceled) {
+		return fmt.Errorf("agent canceled")
+	}
+	if errors.Is(err, context.DeadlineExceeded) {
+		return fmt.Errorf("agent timed out")
+	}
+	return fmt.Errorf("agent error: %w", err)
+}
+
 func must[T any](t T, ok bool) T {
 	if !ok {
 		panic("expected value to be present")

From 5ea042b82d9e8f858ceb4f068e0f2aa7b86e868c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Benjamin=20Egelund-M=C3=BCller?= <b@egelund-muller.com>
Date: Thu, 28 May 2026 14:05:05 +0100
Subject: [PATCH 2/5] Add comment explaining admin AI service timeout

---
 admin/server/server.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/admin/server/server.go b/admin/server/server.go
index 6b8fed0555c..a1ff73bd9ff 100644
--- a/admin/server/server.go
+++ b/admin/server/server.go
@@ -411,6 +411,7 @@ func (s *Server) jwtAttributesForService(ctx context.Context, serviceID string,
 
 func timeoutSelector(fullMethodName string) time.Duration {
 	if strings.HasPrefix(fullMethodName, "/rill.admin.v1.AIService") {
+		// NOTE: The runtime usually sets a lower timeout through its AILLMTimeoutSeconds config, so this is more of a hard upper bound.
 		return time.Minute * 10
 	}
 	if fullMethodName == "/rill.admin.v1.AdminService/DeleteProject" {

From c432b9a60eaf1365dd9cc0e9d984d8cda4c9e72e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Benjamin=20Egelund-M=C3=BCller?= <b@egelund-muller.com>
Date: Thu, 28 May 2026 14:06:13 +0100
Subject: [PATCH 3/5] Document why mapAgentErr drops underlying context errors

---
 runtime/ai/router_agent.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/runtime/ai/router_agent.go b/runtime/ai/router_agent.go
index 2ef9f50161c..1224fe8693f 100644
--- a/runtime/ai/router_agent.go
+++ b/runtime/ai/router_agent.go
@@ -247,6 +247,9 @@ func promptToTitle(message string) string {
 }
 
 // mapAgentErr maps common agent errors to more user-friendly messages.
+//
+// NOTE: For context errors, it does not include the underlying error to keep messages clean.
+// The actual error is still available in the message containing the sub-agent's result.
 func mapAgentErr(err error) error {
 	if errors.Is(err, context.Canceled) {
 		return fmt.Errorf("agent canceled")

From 986bea14ac55b8ed0229413f48d3715b88d1d3bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Benjamin=20Egelund-M=C3=BCller?= <b@egelund-muller.com>
Date: Fri, 29 May 2026 08:45:44 +0100
Subject: [PATCH 4/5] Docs

---
 runtime/drivers/registry.go                | 1 +
 runtime/parser/schema/rillyaml.schema.yaml | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/runtime/drivers/registry.go b/runtime/drivers/registry.go
index 6ae29b2026f..3af2802ce29 100644
--- a/runtime/drivers/registry.go
+++ b/runtime/drivers/registry.go
@@ -120,6 +120,7 @@ type InstanceConfig struct {
 	// AICompletionTimeoutSeconds is the maximum duration of a full AI completion request, which may include multiple LLM requests and tool calls.
 	AICompletionTimeoutSeconds uint32 `mapstructure:"rill.ai.completion_timeout_seconds"`
 	// AILLMTimeoutSeconds is the maximum duration of a single LLM completion request.
+	// Note: when using Rill's hosted AI service (i.e. not a self-configured LLM), the admin server enforces a hard upper bound of 10 minutes, so values above that have no effect.
 	AILLMTimeoutSeconds uint32 `mapstructure:"rill.ai.llm_timeout_seconds"`
 	// AIDefaultQueryLimit is the default row limit applied to AI tool queries when no limit is specified.
 	AIDefaultQueryLimit int64 `mapstructure:"rill.ai.default_query_limit"`
diff --git a/runtime/parser/schema/rillyaml.schema.yaml b/runtime/parser/schema/rillyaml.schema.yaml
index f1f8ba77863..73c99642f3f 100644
--- a/runtime/parser/schema/rillyaml.schema.yaml
+++ b/runtime/parser/schema/rillyaml.schema.yaml
@@ -171,7 +171,7 @@ allOf:
             description: "Maximum duration of a full AI completion request (which may include multiple LLM calls and tool uses), in seconds. Default: 300."
           rill.ai.llm_timeout_seconds:
             type: integer
-            description: "Maximum duration of a single LLM completion request, in seconds. Default: 180."
+            description: "Maximum duration of a single LLM completion request, in seconds. Default: 180. Note: when using Rill's hosted AI service (i.e. not a self-configured LLM), the admin server enforces a hard upper bound of 10 minutes, so values above that have no effect."
           rill.ai.default_query_limit:
             type: integer
             description: "Default row limit applied to AI tool queries when no limit is specified. Default: 25."

From 8e382a04db7bce6137265c32f651dc6c5d6c005b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Benjamin=20Egelund-M=C3=BCller?= <b@egelund-muller.com>
Date: Fri, 29 May 2026 10:51:09 +0100
Subject: [PATCH 5/5] Docs

---
 docs/docs/reference/project-files/rill-yaml.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docs/reference/project-files/rill-yaml.md b/docs/docs/reference/project-files/rill-yaml.md
index 6a86977b7f6..87758c0c139 100644
--- a/docs/docs/reference/project-files/rill-yaml.md
+++ b/docs/docs/reference/project-files/rill-yaml.md
@@ -164,7 +164,7 @@ _[object]_ - A map of key-value pairs for setting variables on your project. It
 
   - **`rill.ai.completion_timeout_seconds`** - _[integer]_ - Maximum duration of a full AI completion request (which may include multiple LLM calls and tool uses), in seconds. Default: 300. 
 
-  - **`rill.ai.llm_timeout_seconds`** - _[integer]_ - Maximum duration of a single LLM completion request, in seconds. Default: 180. 
+  - **`rill.ai.llm_timeout_seconds`** - _[integer]_ - Maximum duration of a single LLM completion request, in seconds. Default: 180. Note: when using Rill's hosted AI service (i.e. not a self-configured LLM), the admin server enforces a hard upper bound of 10 minutes, so values above that have no effect. 
 
   - **`rill.ai.default_query_limit`** - _[integer]_ - Default row limit applied to AI tool queries when no limit is specified. Default: 25.