From 89a2ddd5a05ddda9a19b30ea1e311e0399d30157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benjamin=20Egelund-M=C3=BCller?= Date: Thu, 28 May 2026 13:38:27 +0100 Subject: [PATCH 1/5] Improve AI timeout handling and error messages --- admin/server/server.go | 2 +- runtime/ai/ai.go | 10 +++++++++- runtime/ai/router_agent.go | 18 +++++++++++++++--- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/admin/server/server.go b/admin/server/server.go index 24066a0e22b..6b8fed0555c 100644 --- a/admin/server/server.go +++ b/admin/server/server.go @@ -411,7 +411,7 @@ func (s *Server) jwtAttributesForService(ctx context.Context, serviceID string, func timeoutSelector(fullMethodName string) time.Duration { if strings.HasPrefix(fullMethodName, "/rill.admin.v1.AIService") { - return time.Minute * 2 + return time.Minute * 10 } if fullMethodName == "/rill.admin.v1.AdminService/DeleteProject" { return time.Minute * 4 diff --git a/runtime/ai/ai.go b/runtime/ai/ai.go index 00c27a5b425..a401608776c 100644 --- a/runtime/ai/ai.go +++ b/runtime/ai/ai.go @@ -27,6 +27,8 @@ import ( semconv "go.opentelemetry.io/otel/semconv/v1.17.0" "go.opentelemetry.io/otel/trace" "go.uber.org/zap" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" "google.golang.org/protobuf/types/known/structpb" ) @@ -1278,9 +1280,15 @@ func (s *Session) Complete(ctx context.Context, name string, out any, opts *Comp // Handle LLM completion error if err != nil { - if errors.Is(err, llmCtx.Err()) && errors.Is(err, context.DeadlineExceeded) { + if errors.Is(err, llmCtx.Err()) && errors.Is(err, context.DeadlineExceeded) { // Timeout from local ctx. return nil, fmt.Errorf("LLM request timed out after %s: %w", llmRequestTimeout, err) } + if status.Code(err) == codes.DeadlineExceeded { // Timeout from admin service. + return nil, fmt.Errorf("LLM request timed out: %w", err) + } + if errors.Is(err, ctx.Err()) { + return nil, ctx.Err() + } return nil, fmt.Errorf("completion failed: %w (stack: %s)", err, string(debug.Stack())) } diff --git a/runtime/ai/router_agent.go b/runtime/ai/router_agent.go index 590c9a158ae..2ef9f50161c 100644 --- a/runtime/ai/router_agent.go +++ b/runtime/ai/router_agent.go @@ -2,6 +2,7 @@ package ai import ( "context" + "errors" "fmt" "regexp" "slices" @@ -165,7 +166,7 @@ func (t *RouterAgent) Handler(ctx context.Context, args *RouterAgentArgs) (*Rout Args: analystAgentArgs, }) if err != nil { - return nil, err + return nil, mapAgentErr(err) } return &RouterAgentResult{Response: res.Response, Agent: args.Agent}, nil @@ -184,7 +185,7 @@ func (t *RouterAgent) Handler(ctx context.Context, args *RouterAgentArgs) (*Rout Args: developerAgentArgs, }) if err != nil { - return nil, err + return nil, mapAgentErr(err) } return &RouterAgentResult{Response: res.Response, Agent: args.Agent}, nil @@ -197,7 +198,7 @@ func (t *RouterAgent) Handler(ctx context.Context, args *RouterAgentArgs) (*Rout Args: args.FeedbackAgentArgs, }) if err != nil { - return nil, err + return nil, mapAgentErr(err) } return &RouterAgentResult{Response: res.Response, Agent: FeedbackAgentName}, nil } @@ -245,6 +246,17 @@ func promptToTitle(message string) string { return title } +// mapAgentErr maps common agent errors to more user-friendly messages. +func mapAgentErr(err error) error { + if errors.Is(err, context.Canceled) { + return fmt.Errorf("agent canceled") + } + if errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("agent timed out") + } + return fmt.Errorf("agent error: %w", err) +} + func must[T any](t T, ok bool) T { if !ok { panic("expected value to be present") From 5ea042b82d9e8f858ceb4f068e0f2aa7b86e868c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benjamin=20Egelund-M=C3=BCller?= Date: Thu, 28 May 2026 14:05:05 +0100 Subject: [PATCH 2/5] Add comment explaining admin AI service timeout --- admin/server/server.go | 1 + 1 file changed, 1 insertion(+) diff --git a/admin/server/server.go b/admin/server/server.go index 6b8fed0555c..a1ff73bd9ff 100644 --- a/admin/server/server.go +++ b/admin/server/server.go @@ -411,6 +411,7 @@ func (s *Server) jwtAttributesForService(ctx context.Context, serviceID string, func timeoutSelector(fullMethodName string) time.Duration { if strings.HasPrefix(fullMethodName, "/rill.admin.v1.AIService") { + // NOTE: The runtime usually sets a lower timeout through its AILLMTimeoutSeconds config, so this is more of a hard upper bound. return time.Minute * 10 } if fullMethodName == "/rill.admin.v1.AdminService/DeleteProject" { From c432b9a60eaf1365dd9cc0e9d984d8cda4c9e72e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benjamin=20Egelund-M=C3=BCller?= Date: Thu, 28 May 2026 14:06:13 +0100 Subject: [PATCH 3/5] Document why mapAgentErr drops underlying context errors --- runtime/ai/router_agent.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/runtime/ai/router_agent.go b/runtime/ai/router_agent.go index 2ef9f50161c..1224fe8693f 100644 --- a/runtime/ai/router_agent.go +++ b/runtime/ai/router_agent.go @@ -247,6 +247,9 @@ func promptToTitle(message string) string { } // mapAgentErr maps common agent errors to more user-friendly messages. +// +// NOTE: For context errors, it does not include the underlying error to keep messages clean. +// The actual error is still available in the message containing the sub-agent's result. func mapAgentErr(err error) error { if errors.Is(err, context.Canceled) { return fmt.Errorf("agent canceled") From 986bea14ac55b8ed0229413f48d3715b88d1d3bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benjamin=20Egelund-M=C3=BCller?= Date: Fri, 29 May 2026 08:45:44 +0100 Subject: [PATCH 4/5] Docs --- runtime/drivers/registry.go | 1 + runtime/parser/schema/rillyaml.schema.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/drivers/registry.go b/runtime/drivers/registry.go index 6ae29b2026f..3af2802ce29 100644 --- a/runtime/drivers/registry.go +++ b/runtime/drivers/registry.go @@ -120,6 +120,7 @@ type InstanceConfig struct { // AICompletionTimeoutSeconds is the maximum duration of a full AI completion request, which may include multiple LLM requests and tool calls. AICompletionTimeoutSeconds uint32 `mapstructure:"rill.ai.completion_timeout_seconds"` // AILLMTimeoutSeconds is the maximum duration of a single LLM completion request. + // Note: when using Rill's hosted AI service (i.e. not a self-configured LLM), the admin server enforces a hard upper bound of 10 minutes, so values above that have no effect. AILLMTimeoutSeconds uint32 `mapstructure:"rill.ai.llm_timeout_seconds"` // AIDefaultQueryLimit is the default row limit applied to AI tool queries when no limit is specified. AIDefaultQueryLimit int64 `mapstructure:"rill.ai.default_query_limit"` diff --git a/runtime/parser/schema/rillyaml.schema.yaml b/runtime/parser/schema/rillyaml.schema.yaml index f1f8ba77863..73c99642f3f 100644 --- a/runtime/parser/schema/rillyaml.schema.yaml +++ b/runtime/parser/schema/rillyaml.schema.yaml @@ -171,7 +171,7 @@ allOf: description: "Maximum duration of a full AI completion request (which may include multiple LLM calls and tool uses), in seconds. Default: 300." rill.ai.llm_timeout_seconds: type: integer - description: "Maximum duration of a single LLM completion request, in seconds. Default: 180." + description: "Maximum duration of a single LLM completion request, in seconds. Default: 180. Note: when using Rill's hosted AI service (i.e. not a self-configured LLM), the admin server enforces a hard upper bound of 10 minutes, so values above that have no effect." rill.ai.default_query_limit: type: integer description: "Default row limit applied to AI tool queries when no limit is specified. Default: 25." From 8e382a04db7bce6137265c32f651dc6c5d6c005b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benjamin=20Egelund-M=C3=BCller?= Date: Fri, 29 May 2026 10:51:09 +0100 Subject: [PATCH 5/5] Docs --- docs/docs/reference/project-files/rill-yaml.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/reference/project-files/rill-yaml.md b/docs/docs/reference/project-files/rill-yaml.md index 6a86977b7f6..87758c0c139 100644 --- a/docs/docs/reference/project-files/rill-yaml.md +++ b/docs/docs/reference/project-files/rill-yaml.md @@ -164,7 +164,7 @@ _[object]_ - A map of key-value pairs for setting variables on your project. It - **`rill.ai.completion_timeout_seconds`** - _[integer]_ - Maximum duration of a full AI completion request (which may include multiple LLM calls and tool uses), in seconds. Default: 300. - - **`rill.ai.llm_timeout_seconds`** - _[integer]_ - Maximum duration of a single LLM completion request, in seconds. Default: 180. + - **`rill.ai.llm_timeout_seconds`** - _[integer]_ - Maximum duration of a single LLM completion request, in seconds. Default: 180. Note: when using Rill's hosted AI service (i.e. not a self-configured LLM), the admin server enforces a hard upper bound of 10 minutes, so values above that have no effect. - **`rill.ai.default_query_limit`** - _[integer]_ - Default row limit applied to AI tool queries when no limit is specified. Default: 25.