From cedb27219b7a7afdcee6d0dad0b9bed07a442fb2 Mon Sep 17 00:00:00 2001 From: Timothy Zelinsky Date: Sat, 21 Mar 2026 17:40:38 +1100 Subject: [PATCH] Add skill support plus refine the demo flow to show how skills can be used with tooling. Demo uses HealthKit with the health coach. --- .../Shared/AgentDemoView+ChatSections.swift | 144 +++++- .../Shared/AgentDemoView.swift | 1 + .../Shared/AgentDemoViewModel+Messaging.swift | 281 ++++++++-- .../Shared/AgentDemoViewModel+Tools.swift | 226 ++++---- .../Shared/AgentDemoViewModel.swift | 116 ++++- DemoApp/README.md | 9 +- README.md | 104 +++- .../Runtime/AgentDefinitionSource.swift | 180 +++++++ Sources/CodexKit/Runtime/AgentModels.swift | 82 ++- Sources/CodexKit/Runtime/AgentPersona.swift | 88 +++- Sources/CodexKit/Runtime/AgentRuntime.swift | 398 +++++++++++++- Sources/CodexKit/Runtime/AgentSkill.swift | 59 +++ .../AgentDefinitionSourceLoaderTests.swift | 116 +++++ Tests/CodexKitTests/AgentRuntimeTests.swift | 484 ++++++++++++++++++ 14 files changed, 2116 insertions(+), 172 deletions(-) create mode 100644 Sources/CodexKit/Runtime/AgentDefinitionSource.swift create mode 100644 Sources/CodexKit/Runtime/AgentSkill.swift create mode 100644 Tests/CodexKitTests/AgentDefinitionSourceLoaderTests.swift diff --git a/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoView+ChatSections.swift b/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoView+ChatSections.swift index 8a04379..a55f7df 100644 --- a/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoView+ChatSections.swift +++ b/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoView+ChatSections.swift @@ -136,16 +136,89 @@ extension AgentDemoView { var personaExamples: some View { if viewModel.session != nil { VStack(alignment: .leading, spacing: 10) { - Text("Persona Demo") + Text("Personas And Skills") .font(.headline) Text( viewModel.activeThreadPersonaSummary.map { "Active persona: \($0)" } - ?? "Create a support-persona thread, swap it to planner, or send a one-turn reviewer override." + ?? "Run the quick test to compare normal behavior vs a skill-restricted thread." ) .font(.subheadline) .foregroundStyle(.secondary) + Button( + viewModel.isRunningSkillPolicyProbe + ? "Running Quick Skill Test..." + : "Run Quick Skill Test" + ) { + Task { + await viewModel.runSkillPolicyProbe() + } + } + .buttonStyle(.borderedProminent) + .disabled(viewModel.isRunningSkillPolicyProbe) + + if let skillPolicyProbeResult = viewModel.skillPolicyProbeResult { + VStack(alignment: .leading, spacing: 8) { + Text( + skillPolicyProbeResult.passed + ? "Quick Skill Test Passed" + : "Quick Skill Test Inconclusive" + ) + .font(.subheadline.weight(.semibold)) + .foregroundStyle(skillPolicyProbeResult.passed ? .green : .orange) + + Text("Prompt: \(skillPolicyProbeResult.prompt)") + .font(.caption) + .foregroundStyle(.secondary) + + Text("\(skillPolicyProbeResult.normalThreadTitle): \(skillPolicyProbeResult.normalSummary)") + .font(.caption) + .frame(maxWidth: .infinity, alignment: .leading) + + if let normalAssistantReply = skillPolicyProbeResult.normalAssistantReply, + !normalAssistantReply.isEmpty { + Text("Normal reply: \(normalAssistantReply)") + .font(.caption) + .foregroundStyle(.secondary) + .frame(maxWidth: .infinity, alignment: .leading) + } + + Text("\(skillPolicyProbeResult.skillThreadTitle): \(skillPolicyProbeResult.skillSummary)") + .font(.caption) + .frame(maxWidth: .infinity, alignment: .leading) + + if let skillAssistantReply = skillPolicyProbeResult.skillAssistantReply, + !skillAssistantReply.isEmpty { + Text("Skill reply: \(skillAssistantReply)") + .font(.caption) + .foregroundStyle(.secondary) + .frame(maxWidth: .infinity, alignment: .leading) + } + + HStack(spacing: 8) { + Button("Open Normal Thread") { + Task { + await viewModel.activateThread(id: skillPolicyProbeResult.normalThreadID) + } + } + .buttonStyle(.bordered) + + Button("Open Skill Thread") { + Task { + await viewModel.activateThread(id: skillPolicyProbeResult.skillThreadID) + } + } + .buttonStyle(.bordered) + } + } + .padding(12) + .background( + RoundedRectangle(cornerRadius: 12) + .fill(Color.secondary.opacity(0.10)) + ) + } + ScrollView(.horizontal, showsIndicators: false) { HStack(spacing: 12) { Button("Create Support Thread") { @@ -168,7 +241,72 @@ extension AgentDemoView { await viewModel.sendReviewerOverrideExample() } } - .buttonStyle(.borderedProminent) + .buttonStyle(.bordered) + + Button("Create Health Coach Skill") { + Task { + await viewModel.createHealthCoachSkillThread() + } + } + .buttonStyle(.bordered) + + Button("Create Travel Planner Skill") { + Task { + await viewModel.createTravelPlannerSkillThread() + } + } + .buttonStyle(.bordered) + } + } + } + .frame(maxWidth: .infinity, alignment: .leading) + } + } + + @ViewBuilder + var instructionsDebugPanel: some View { + if viewModel.session != nil { + VStack(alignment: .leading, spacing: 10) { + Toggle( + "Show Resolved Instructions", + isOn: Binding( + get: { viewModel.showResolvedInstructionsDebug }, + set: { isEnabled in + viewModel.showResolvedInstructionsDebug = isEnabled + if !isEnabled { + viewModel.lastResolvedInstructions = nil + viewModel.lastResolvedInstructionsThreadTitle = nil + } + } + ) + ) + .toggleStyle(.switch) + + if viewModel.showResolvedInstructionsDebug { + VStack(alignment: .leading, spacing: 8) { + Text( + viewModel.lastResolvedInstructionsThreadTitle.map { + "Latest for thread: \($0)" + } ?? "Send a message to capture resolved instructions." + ) + .font(.caption) + .foregroundStyle(.secondary) + + ScrollView { + Text( + viewModel.lastResolvedInstructions + ?? "No captured instructions yet." + ) + .font(.system(.caption, design: .monospaced)) + .textSelection(.enabled) + .frame(maxWidth: .infinity, alignment: .leading) + } + .frame(maxHeight: 240) + .padding(10) + .background( + RoundedRectangle(cornerRadius: 12) + .fill(Color.secondary.opacity(0.10)) + ) } } } diff --git a/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoView.swift b/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoView.swift index 68dab6f..b78ab8c 100644 --- a/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoView.swift +++ b/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoView.swift @@ -21,6 +21,7 @@ struct AgentDemoView: View { VStack(spacing: 16) { header personaExamples + instructionsDebugPanel threadStrip messageTranscript } diff --git a/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoViewModel+Messaging.swift b/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoViewModel+Messaging.swift index 63a274e..ee3cf6c 100644 --- a/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoViewModel+Messaging.swift +++ b/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoViewModel+Messaging.swift @@ -3,18 +3,29 @@ import Foundation @MainActor extension AgentDemoViewModel { + private struct SendDiagnostics { + var sawToolCall = false + var sawSuccessfulToolResult = false + var sawFailedToolResult = false + var firstFailureMessage: String? + var turnFailedCode: String? + var assistantReply: String? + } + func createThreadInternal( title: String?, - personaStack: AgentPersonaStack? + personaStack: AgentPersonaStack?, + skillIDs: [String] = [] ) async { do { let thread = try await runtime.createThread( title: title, - personaStack: personaStack + personaStack: personaStack, + skillIDs: skillIDs ) threads = await runtime.threads() activeThreadID = thread.id - messages = await runtime.messages(for: thread.id) + setMessages(await runtime.messages(for: thread.id)) } catch { lastError = error.localizedDescription } @@ -40,74 +51,238 @@ extension AgentDemoViewModel { return } - streamingText = "" + let request = UserMessageRequest( + text: trimmedText, + images: images, + personaOverride: personaOverride + ) + + do { + _ = try await sendRequest( + request, + in: activeThreadID, + captureResolvedInstructions: showResolvedInstructionsDebug, + renderInActiveTranscript: true + ) + } catch { + lastError = error.localizedDescription + } + } + + func runSkillPolicyProbe() async { + guard session != nil else { + lastError = "Sign in before running the skill policy probe." + return + } + guard !isRunningSkillPolicyProbe else { + return + } + + isRunningSkillPolicyProbe = true + lastError = nil + skillPolicyProbeResult = nil + showResolvedInstructionsDebug = true + defer { + isRunningSkillPolicyProbe = false + } + + let probePrompt = """ + Give me a practical plan for the rest of today. + """ do { - let stream = try await runtime.sendMessage( - UserMessageRequest( - text: trimmedText, - images: images, - personaOverride: personaOverride - ), - in: activeThreadID + let normalThread = try await runtime.createThread( + title: "Skill Policy Probe: Normal" + ) + let skillThread = try await runtime.createThread( + title: "Skill Policy Probe: Health Coach", + skillIDs: [Self.healthCoachSkill.id] + ) + threads = await runtime.threads() + + let normalDiagnostics = try await sendRequest( + UserMessageRequest(text: probePrompt), + in: normalThread.id, + captureResolvedInstructions: true, + renderInActiveTranscript: false + ) + + let skillDiagnostics = try await sendRequest( + UserMessageRequest(text: probePrompt), + in: skillThread.id, + captureResolvedInstructions: true, + renderInActiveTranscript: false + ) + + threads = await runtime.threads() + streamingText = "" + activeThreadID = skillThread.id + setMessages(await runtime.messages(for: skillThread.id)) + + let normalSummary = diagnosticsSummary( + normalDiagnostics, + fallback: "No tool call was emitted by the model in the normal thread." + ) + let skillSummary = diagnosticsSummary( + skillDiagnostics, + fallback: "No tool call was emitted by the model in the skill thread." ) - messages = await runtime.messages(for: activeThreadID) - - for try await event in stream { - switch event { - case let .threadStarted(thread): - threads = [thread] + threads.filter { $0.id != thread.id } - - case let .threadStatusChanged(threadID, status): - threads = threads.map { thread in - guard thread.id == threadID else { - return thread - } - - var updated = thread - updated.status = status - updated.updatedAt = Date() - return updated + + skillPolicyProbeResult = SkillPolicyProbeResult( + prompt: probePrompt, + normalThreadID: normalThread.id, + normalThreadTitle: normalThread.title ?? "Skill Policy Probe: Normal", + skillThreadID: skillThread.id, + skillThreadTitle: skillThread.title ?? "Skill Policy Probe: Health Coach", + normalSummary: normalSummary, + skillSummary: skillSummary, + normalAssistantReply: normalDiagnostics.assistantReply, + skillAssistantReply: skillDiagnostics.assistantReply, + skillToolSucceeded: skillDiagnostics.sawSuccessfulToolResult + ) + + if skillPolicyProbeResult?.passed == false { + lastError = "Probe completed, but result was inconclusive. Review the two thread summaries." + } + } catch { + lastError = error.localizedDescription + } + } + + private func diagnosticsSummary( + _ diagnostics: SendDiagnostics, + fallback: String + ) -> String { + if diagnostics.sawSuccessfulToolResult { + return "Tool executed successfully." + } + if let failureMessage = diagnostics.firstFailureMessage, + !failureMessage.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + return "Tool blocked: \(failureMessage)" + } + if let turnFailedCode = diagnostics.turnFailedCode { + return "Turn failed: \(turnFailedCode)" + } + if diagnostics.sawToolCall { + return "Tool call was requested, but no final tool result was captured." + } + return fallback + } + + private func sendRequest( + _ request: UserMessageRequest, + in threadID: String, + captureResolvedInstructions: Bool, + renderInActiveTranscript: Bool + ) async throws -> SendDiagnostics { + if captureResolvedInstructions { + do { + lastResolvedInstructions = try await runtime.resolvedInstructionsPreview( + for: threadID, + request: request + ) + let threadTitle = threads.first(where: { $0.id == threadID })?.title + lastResolvedInstructionsThreadTitle = threadTitle ?? "Untitled Thread" + } catch { + lastResolvedInstructions = nil + lastResolvedInstructionsThreadTitle = nil + throw error + } + } else { + lastResolvedInstructions = nil + lastResolvedInstructionsThreadTitle = nil + } + + var diagnostics = SendDiagnostics() + if renderInActiveTranscript { + streamingText = "" + } + + let stream = try await runtime.sendMessage( + request, + in: threadID + ) + if renderInActiveTranscript { + setMessages(await runtime.messages(for: threadID)) + } + + for try await event in stream { + switch event { + case let .threadStarted(thread): + threads = [thread] + threads.filter { $0.id != thread.id } + + case let .threadStatusChanged(threadID, status): + threads = threads.map { thread in + guard thread.id == threadID else { + return thread } - case .turnStarted: - break + var updated = thread + updated.status = status + updated.updatedAt = Date() + return updated + } + + case .turnStarted: + break - case let .assistantMessageDelta(_, _, delta): + case let .assistantMessageDelta(_, _, delta): + if renderInActiveTranscript { streamingText.append(delta) + } - case let .messageCommitted(message): - messages.append(message) + case let .messageCommitted(message): + if message.role == .assistant { + let reply = message.text.trimmingCharacters(in: .whitespacesAndNewlines) + if !reply.isEmpty { + diagnostics.assistantReply = reply + } + } + if renderInActiveTranscript { + upsertMessage(message) if message.role == .assistant { streamingText = "" } + } - case .approvalRequested: - break - - case .approvalResolved: - break + case .approvalRequested: + break - case let .toolCallStarted(invocation): - Self.logger.info( - "Tool call requested: \(invocation.toolName, privacy: .public) with arguments: \(String(describing: invocation.arguments), privacy: .public)" - ) + case .approvalResolved: + break - case let .toolCallFinished(result): - Self.logger.info( - "Tool call finished: \(result.toolName, privacy: .public) success=\(result.success, privacy: .public) output=\(result.primaryText ?? "", privacy: .public)" - ) + case let .toolCallStarted(invocation): + diagnostics.sawToolCall = true + Self.logger.info( + "Tool call requested: \(invocation.toolName, privacy: .public) with arguments: \(String(describing: invocation.arguments), privacy: .public)" + ) - case .turnCompleted: - messages = await runtime.messages(for: activeThreadID) - threads = await runtime.threads() + case let .toolCallFinished(result): + diagnostics.sawToolCall = true + if result.success { + diagnostics.sawSuccessfulToolResult = true + } else { + diagnostics.sawFailedToolResult = true + if diagnostics.firstFailureMessage == nil { + diagnostics.firstFailureMessage = result.primaryText ?? result.errorMessage + } + } + Self.logger.info( + "Tool call finished: \(result.toolName, privacy: .public) success=\(result.success, privacy: .public) output=\(result.primaryText ?? "", privacy: .public)" + ) - case let .turnFailed(error): - lastError = error.message + case .turnCompleted: + if renderInActiveTranscript { + setMessages(await runtime.messages(for: threadID)) } + threads = await runtime.threads() + + case let .turnFailed(error): + diagnostics.turnFailedCode = error.code + lastError = error.message } - } catch { - lastError = error.localizedDescription } + + return diagnostics } } diff --git a/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoViewModel+Tools.swift b/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoViewModel+Tools.swift index ad39109..c078a91 100644 --- a/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoViewModel+Tools.swift +++ b/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoViewModel+Tools.swift @@ -1,124 +1,170 @@ import CodexKit import Foundation +private struct HealthCoachToolSnapshot: Sendable { + let stepsToday: Int + let dailyGoal: Int + let remainingSteps: Int + let hoursLeftToday: Int + let healthKitAuthorized: Bool +} + @MainActor extension AgentDemoViewModel { + func registerDemoSkills() async { + do { + try await runtime.replaceSkill(Self.healthCoachSkill) + try await runtime.replaceSkill(Self.travelPlannerSkill) + } catch { + lastError = error.localizedDescription + } + } + func registerDemoTool() async { - let definition = ToolDefinition( - name: "demo_calculate_shipping_quote", - description: "Calculate a deterministic demo shipping quote, including price and estimated delivery days.", - inputSchema: .object([ - "type": .string("object"), - "properties": .object([ - "destination_zone": .object([ - "type": .string("string"), - "description": .string("Destination zone: A, B, C, or D."), - ]), - "weight_kg": .object([ - "type": .string("number"), - "description": .string("Package weight in kilograms."), - ]), - "speed": .object([ - "type": .string("string"), - "description": .string("Shipping speed: standard, express, or priority."), + do { + let healthCoachDefinition = ToolDefinition( + name: Self.healthCoachToolName, + description: "Fetch a live health-coach progress snapshot from HealthKit-aware app state.", + inputSchema: .object([ + "type": .string("object"), + "properties": .object([:]), + ]) + ) + + let travelPlannerDefinition = ToolDefinition( + name: Self.travelPlannerToolName, + description: "Build a compact deterministic day-by-day travel plan.", + inputSchema: .object([ + "type": .string("object"), + "properties": .object([ + "destination": .object([ + "type": .string("string"), + "description": .string("Trip destination."), + ]), + "trip_days": .object([ + "type": .string("number"), + "description": .string("Number of trip days."), + ]), + "budget_level": .object([ + "type": .string("string"), + "description": .string("Budget level: low, medium, or high."), + ]), + "companions": .object([ + "type": .string("string"), + "description": .string("Who is traveling, for example solo, couple, or family."), + ]), ]), - "signature_required": .object([ - "type": .string("boolean"), - "description": .string("Whether signature on delivery is required."), + "required": .array([ + .string("destination"), ]), - ]), - ]), - approvalPolicy: .requiresApproval, - approvalMessage: "Allow the demo app to calculate a shipping quote?" - ) + ]) + ) - do { - try await runtime.replaceTool(definition, executor: AnyToolExecutor { invocation, _ in - Self.logger.info( - "Executing tool \(invocation.toolName, privacy: .public) with arguments: \(String(describing: invocation.arguments), privacy: .public)" - ) - let result = Self.makeShippingQuote(invocation: invocation) - Self.logger.info( - "Tool \(invocation.toolName, privacy: .public) returned: \(result.primaryText ?? "", privacy: .public)" + try await registerTool(healthCoachDefinition) { [weak self] invocation, _ in + guard let self else { + return .failure(invocation: invocation, message: "Health coach context is unavailable.") + } + let snapshot = await self.captureHealthCoachToolSnapshot() + return Self.makeHealthCoachProgress( + invocation: invocation, + snapshot: snapshot ) - return result - }) + } + try await registerTool(travelPlannerDefinition) { invocation, _ in + Self.makeTravelDayPlan(invocation: invocation) + } } catch { lastError = error.localizedDescription } } - nonisolated static func makeShippingQuote(invocation: ToolInvocation) -> ToolResultEnvelope { + private func registerTool( + _ definition: ToolDefinition, + execute: @escaping @Sendable (ToolInvocation, ToolExecutionContext) async throws -> ToolResultEnvelope + ) async throws { + try await runtime.replaceTool(definition, executor: AnyToolExecutor { invocation, context in + Self.logger.info( + "Executing tool \(invocation.toolName, privacy: .public) with arguments: \(String(describing: invocation.arguments), privacy: .public)" + ) + let result = try await execute(invocation, context) + Self.logger.info( + "Tool \(invocation.toolName, privacy: .public) returned: \(result.primaryText ?? "", privacy: .public)" + ) + return result + }) + } + + private func captureHealthCoachToolSnapshot() async -> HealthCoachToolSnapshot { + var stepsToday = todayStepCount +#if os(iOS) + if healthKitAuthorized, + let refreshedStepCount = try? await fetchTodayStepCount() { + stepsToday = refreshedStepCount + todayStepCount = refreshedStepCount + healthLastUpdatedAt = Date() + } +#endif + let safeGoal = max(dailyStepGoal, 1_000) + let remainingSteps = max(safeGoal - stepsToday, 0) + let endOfDay = Calendar.current.startOfDay(for: Date()).addingTimeInterval(86_400) + let hoursLeftToday = max(Int(ceil(endOfDay.timeIntervalSinceNow / 3600)), 1) + + return HealthCoachToolSnapshot( + stepsToday: stepsToday, + dailyGoal: safeGoal, + remainingSteps: remainingSteps, + hoursLeftToday: hoursLeftToday, + healthKitAuthorized: healthKitAuthorized + ) + } + + private nonisolated static func makeHealthCoachProgress( + invocation: ToolInvocation, + snapshot: HealthCoachToolSnapshot + ) -> ToolResultEnvelope { + let freshness = snapshot.healthKitAuthorized ? "live_or_cached_healthkit" : "app_cached_only" + + return .success( + invocation: invocation, + text: """ + health_progress[stepsToday=\(snapshot.stepsToday), dailyGoal=\(snapshot.dailyGoal), remainingSteps=\(snapshot.remainingSteps), hoursLeftToday=\(snapshot.hoursLeftToday), healthKitAuthorized=\(snapshot.healthKitAuthorized), freshness=\(freshness)] + """ + ) + } + + nonisolated static func makeTravelDayPlan(invocation: ToolInvocation) -> ToolResultEnvelope { guard case let .object(arguments) = invocation.arguments else { return .failure( invocation: invocation, - message: "The shipping quote tool expected object arguments." + message: "The travel planner tool expected object arguments." ) } - let destinationZone = arguments["destination_zone"]?.stringValue? + let destination = arguments["destination"]?.stringValue? .trimmingCharacters(in: .whitespacesAndNewlines) - .uppercased() ?? "" - let speed = arguments["speed"]?.stringValue? + let tripDays = max(Int(arguments["trip_days"]?.numberValue ?? 3), 1) + let budget = arguments["budget_level"]?.stringValue? .trimmingCharacters(in: .whitespacesAndNewlines) - .lowercased() ?? "standard" - let weightKilograms = arguments["weight_kg"]?.numberValue ?? 0 - let signatureRequired = arguments["signature_required"]?.boolValue ?? false - - let basePriceByZone: [String: Double] = [ - "A": 4.0, - "B": 6.5, - "C": 9.0, - "D": 12.5, - ] - let speedMultipliers: [String: Double] = [ - "standard": 1.0, - "express": 1.6, - "priority": 2.1, - ] - let deliveryDaysBySpeedAndZone: [String: [String: Int]] = [ - "standard": ["A": 2, "B": 4, "C": 6, "D": 8], - "express": ["A": 1, "B": 2, "C": 3, "D": 4], - "priority": ["A": 1, "B": 1, "C": 2, "D": 3], - ] - - guard let zoneBasePrice = basePriceByZone[destinationZone] else { - return .failure( - invocation: invocation, - message: "Unknown destination zone. Use A, B, C, or D." - ) - } + .lowercased() ?? "medium" + let companions = arguments["companions"]?.stringValue? + .trimmingCharacters(in: .whitespacesAndNewlines) + .lowercased() ?? "solo" - guard let speedMultiplier = speedMultipliers[speed] else { - return .failure( - invocation: invocation, - message: "Unknown shipping speed. Use standard, express, or priority." - ) + guard let destination, !destination.isEmpty else { + return .failure(invocation: invocation, message: "destination is required.") } - guard weightKilograms > 0 else { - return .failure( - invocation: invocation, - message: "Weight must be greater than zero kilograms." - ) + let planLines = (1 ... min(tripDays, 10)).map { day in + "day\(day):arrival_walk=\(budget == "high" ? "taxi+priority-pass" : "public-transit"),focus=\(companions == "family" ? "kid-friendly highlight + early dinner" : "local highlight + flexible dinner")" } - let signatureSurcharge = signatureRequired ? 2.5 : 0 - let subtotal = (zoneBasePrice + (weightKilograms * 1.75)) * speedMultiplier - let total = round((subtotal + signatureSurcharge) * 100) / 100 - let deliveryDays = deliveryDaysBySpeedAndZone[speed]?[destinationZone] ?? 0 - return .success( invocation: invocation, text: """ - quote[zone=\(destinationZone), weightKg=\(Self.formattedDecimal(weightKilograms)), speed=\(speed), signatureRequired=\(signatureRequired ? "yes" : "no"), totalUSD=\(Self.formattedDecimal(total)), estimatedDeliveryDays=\(deliveryDays), reference=DEMO-\(destinationZone)-\(speed.uppercased())] + travel_day_plan[destination=\(destination), tripDays=\(tripDays), budget=\(budget), companions=\(companions), plan=\(planLines.joined(separator: " | "))] """ ) } - - nonisolated static func formattedDecimal(_ value: Double) -> String { - String(format: "%.2f", value) - } } private extension JSONValue { @@ -129,10 +175,4 @@ private extension JSONValue { return value } - var boolValue: Bool? { - guard case let .bool(value) = self else { - return nil - } - return value - } } diff --git a/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoViewModel.swift b/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoViewModel.swift index ee18427..51bfe3d 100644 --- a/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoViewModel.swift +++ b/DemoApp/AssistantRuntimeDemoApp/Shared/AgentDemoViewModel.swift @@ -8,6 +8,23 @@ import HealthKit import UserNotifications #endif +struct SkillPolicyProbeResult: Sendable { + let prompt: String + let normalThreadID: String + let normalThreadTitle: String + let skillThreadID: String + let skillThreadTitle: String + let normalSummary: String + let skillSummary: String + let normalAssistantReply: String? + let skillAssistantReply: String? + let skillToolSucceeded: Bool + + var passed: Bool { + skillToolSucceeded + } +} + @MainActor @Observable final class AgentDemoViewModel: @unchecked Sendable { @@ -37,12 +54,38 @@ final class AgentDemoViewModel: @unchecked Sendable { instructions: "For this reply only, act as a strict reviewer and call out risks first." ), ]) + nonisolated static let healthCoachToolName = "health_coach_fetch_progress" + nonisolated static let travelPlannerToolName = "travel_planner_build_day_plan" + nonisolated static let healthCoachSkill = AgentSkill( + id: "health_coach", + name: "Health Coach", + instructions: "You are a health coach focused on daily step goals and execution. For every user turn, call the \(healthCoachToolName) tool exactly once before your final reply, then provide one practical walking plan and one accountability line.", + executionPolicy: .init( + allowedToolNames: [healthCoachToolName], + requiredToolNames: [healthCoachToolName], + maxToolCalls: 1 + ) + ) + nonisolated static let travelPlannerSkill = AgentSkill( + id: "travel_planner", + name: "Travel Planner", + instructions: "You are a travel planning assistant for mobile users. Provide concise day-by-day itineraries, practical logistics, and a compact packing checklist.", + executionPolicy: .init( + allowedToolNames: [travelPlannerToolName], + maxToolCalls: 1 + ) + ) var session: ChatGPTSession? var threads: [AgentThread] = [] var messages: [AgentMessage] = [] var streamingText = "" var lastError: String? + var showResolvedInstructionsDebug = false + var lastResolvedInstructions: String? + var lastResolvedInstructionsThreadTitle: String? + var isRunningSkillPolicyProbe = false + var skillPolicyProbeResult: SkillPolicyProbeResult? var isAuthenticating = false var pendingComposerImages: [AgentImageAttachment] = [] var composerText = "" @@ -126,6 +169,7 @@ final class AgentDemoViewModel: @unchecked Sendable { do { _ = try await runtime.restore() await registerDemoTool() + await registerDemoSkills() await refreshSnapshot() } catch { lastError = error.localizedDescription @@ -156,6 +200,7 @@ final class AgentDemoViewModel: @unchecked Sendable { do { _ = try await runtime.restore() await registerDemoTool() + await registerDemoSkills() session = try await runtime.signIn() await refreshSnapshot() if healthCoachInitialized { @@ -210,18 +255,40 @@ final class AgentDemoViewModel: @unchecked Sendable { ) } + func createHealthCoachSkillThread() async { + await createThreadInternal( + title: "Skill Demo: Health Coach", + personaStack: nil, + skillIDs: [Self.healthCoachSkill.id] + ) + } + + func createTravelPlannerSkillThread() async { + await createThreadInternal( + title: "Skill Demo: Travel Planner", + personaStack: nil, + skillIDs: [Self.travelPlannerSkill.id] + ) + } + func personaSummary(for thread: AgentThread?) -> String? { - guard let layers = thread?.personaStack?.layers, - !layers.isEmpty else { + guard let thread else { return nil } - - return layers.map(\.name).joined(separator: ", ") + var sections: [String] = [] + if let layers = thread.personaStack?.layers, !layers.isEmpty { + sections.append("persona: \(layers.map(\.name).joined(separator: ", "))") + } + if !thread.skillIDs.isEmpty { + sections.append("skills: \(thread.skillIDs.joined(separator: ", "))") + } + guard !sections.isEmpty else { return nil } + return sections.joined(separator: " | ") } func activateThread(id: String) async { activeThreadID = id - messages = await runtime.messages(for: id) + setMessages(await runtime.messages(for: id)) streamingText = "" } @@ -283,6 +350,10 @@ final class AgentDemoViewModel: @unchecked Sendable { streamingText = "" composerText = "" pendingComposerImages = [] + lastResolvedInstructions = nil + lastResolvedInstructionsThreadTitle = nil + isRunningSkillPolicyProbe = false + skillPolicyProbeResult = nil activeThreadID = nil healthCoachThreadID = nil healthCoachFeedback = "Set a step goal, then start moving." @@ -313,13 +384,13 @@ final class AgentDemoViewModel: @unchecked Sendable { let selectedThreadID = activeThreadID if let selectedThreadID, threads.contains(where: { $0.id == selectedThreadID }) { - messages = await runtime.messages(for: selectedThreadID) + setMessages(await runtime.messages(for: selectedThreadID)) return } if let firstThread = threads.first { activeThreadID = firstThread.id - messages = await runtime.messages(for: firstThread.id) + setMessages(await runtime.messages(for: firstThread.id)) } else { activeThreadID = nil messages = [] @@ -331,6 +402,37 @@ final class AgentDemoViewModel: @unchecked Sendable { messages = [] streamingText = "" pendingComposerImages = [] + lastResolvedInstructions = nil + lastResolvedInstructionsThreadTitle = nil + isRunningSkillPolicyProbe = false + skillPolicyProbeResult = nil activeThreadID = nil } + + func setMessages(_ incoming: [AgentMessage]) { + messages = deduplicatedMessages(incoming) + } + + func upsertMessage(_ message: AgentMessage) { + if let existingIndex = messages.firstIndex(where: { $0.id == message.id }) { + messages[existingIndex] = message + return + } + messages.append(message) + } + + private func deduplicatedMessages(_ incoming: [AgentMessage]) -> [AgentMessage] { + var seen = Set() + var reversedUnique: [AgentMessage] = [] + reversedUnique.reserveCapacity(incoming.count) + + for message in incoming.reversed() { + guard seen.insert(message.id).inserted else { + continue + } + reversedUnique.append(message) + } + + return reversedUnique.reversed() + } } diff --git a/DemoApp/README.md b/DemoApp/README.md index c4b7a21..e11f578 100644 --- a/DemoApp/README.md +++ b/DemoApp/README.md @@ -22,14 +22,19 @@ The Xcode project is the source of truth for the demo app. Edit it directly in X - lets you attach a photo from the library and send it with or without text - renders attached user images in the transcript - streams assistant output into the UI -- shows approval prompts before running a host-defined demo tool +- supports approval prompts for host-defined tools that opt into `requiresApproval` - demonstrates thread-pinned personas and one-turn persona overrides +- includes first-class framework skill examples for `health_coach` and `travel_planner` +- demonstrates skill execution policy with skill-specific tool constraints +- includes a one-tap `Run Skill Policy Probe` action that runs the same tool-focused prompt in normal vs skill threads +- showcases runtime APIs that can load persona/skill definitions from local or remote files +- includes a `Show Resolved Instructions` debug toggle so you can inspect per-turn compiled instructions - enables Responses web search in the checked-in demo configuration - reads HealthKit step totals (with permission), tracks a daily goal, and schedules local reminder notifications - supports switchable coaching tone (`Hardcore Personal` or `Firm Coach`) - proactively generates AI coach feedback in a dedicated persona-pinned thread as steps, goal, or tone change -The checked-in demo tool is a deterministic shipping quote tool, and the Xcode console logs when the tool is requested, executed, and completed so you can verify tool usage during a run. +The checked-in demo registers deterministic skill-specific tools (`health_coach_fetch_progress` and `travel_planner_build_day_plan`), and the Xcode console logs when each tool is requested, executed, and completed so you can verify tool usage during a run. The demo currently focuses on text plus photo input flows. Built-in image generation is not enabled in the checked-in app configuration. diff --git a/README.md b/README.md index 2e9b0f1..8ddfd99 100644 --- a/README.md +++ b/README.md @@ -186,12 +186,114 @@ The demo app exercises: - device-code and browser-based ChatGPT sign-in - streamed assistant output and resumable threads -- approval-gated host tools with a shipping quote example +- host tools with skill-specific examples for health coaching and travel planning - image messages from the photo library through the composer - Responses web search in checked-in configuration - thread-pinned personas and one-turn overrides +- a one-tap skill policy probe that compares tool behavior in normal vs skill-constrained threads - a Health Coach tab with HealthKit steps, AI-generated coaching, local reminders, and tone switching +## Skill Examples + +`CodexKit` skills are behavior modules, not just tone layers. They can carry both instructions and execution policy (tool allow/require/sequence/call limits). + +```swift +let healthCoachSkill = AgentSkill( + id: "health_coach", + name: "Health Coach", + instructions: "You are a health coach focused on daily step goals and execution. For every user turn, call the health_coach_fetch_progress tool exactly once before your final reply.", + executionPolicy: .init( + allowedToolNames: ["health_coach_fetch_progress"], + requiredToolNames: ["health_coach_fetch_progress"], + maxToolCalls: 1 + ) +) + +let travelPlannerSkill = AgentSkill( + id: "travel_planner", + name: "Travel Planner", + instructions: "You are a travel planning assistant for mobile users. Provide concise day-by-day itineraries, practical logistics, and a compact packing checklist.", + executionPolicy: .init( + allowedToolNames: ["lookup_flights", "lookup_hotels"], + requiredToolNames: ["lookup_flights"], + toolSequence: ["lookup_flights", "lookup_hotels"], + maxToolCalls: 3 + ) +) + +let runtime = try AgentRuntime(configuration: .init( + authProvider: authProvider, + secureStore: secureStore, + backend: backend, + approvalPresenter: approvalPresenter, + stateStore: stateStore, + skills: [healthCoachSkill, travelPlannerSkill] +)) + +let healthThread = try await runtime.createThread( + title: "Skill Demo: Health Coach", + skillIDs: ["health_coach"] +) + +let tripThread = try await runtime.createThread( + title: "Skill Demo: Travel Planner", + skillIDs: ["travel_planner"] +) + +let stream = try await runtime.sendMessage( + UserMessageRequest( + text: "Review this plan with extra travel rigor.", + skillOverrideIDs: ["travel_planner"] + ), + in: healthThread.id +) +``` + +## Dynamic Persona And Skill Sources + +You can load persona/skill instructions from local files or remote URLs at runtime. + +```swift +let localPersonaURL = URL(fileURLWithPath: "/path/to/persona.txt") +let thread = try await runtime.createThread( + title: "Dynamic Persona Thread", + personaSource: .file(localPersonaURL) +) +``` + +```swift +let remoteSkillURL = URL(string: "https://example.com/skills/shipping_support.json")! +let skill = try await runtime.registerSkill( + from: .remote(remoteSkillURL) +) + +try await runtime.setSkillIDs([skill.id], for: thread.id) +``` + +For persona sources: + +- plain text creates a single-layer persona stack +- JSON can be a full `AgentPersonaStack` + +For skill sources: + +- JSON supports `{ "id": "...", "name": "...", "instructions": "...", "executionPolicy": { ... } }` +- plain text is supported when you pass `id` and `name` in `registerSkill(from:id:name:)` + +## Debugging Instruction Resolution + +You can preview the exact compiled instructions for a specific send before starting a turn. + +```swift +let preview = try await runtime.resolvedInstructionsPreview( + for: thread.id, + request: UserMessageRequest( + text: "Give me a strict step plan." + ) +) +print(preview) +``` + ## Production Checklist - Store sessions in keychain (`KeychainSessionSecureStore`) diff --git a/Sources/CodexKit/Runtime/AgentDefinitionSource.swift b/Sources/CodexKit/Runtime/AgentDefinitionSource.swift new file mode 100644 index 0000000..361e9a3 --- /dev/null +++ b/Sources/CodexKit/Runtime/AgentDefinitionSource.swift @@ -0,0 +1,180 @@ +import Foundation + +public enum AgentDefinitionSource: Hashable, Sendable { + case file(URL) + case remote(URL) +} + +public struct AgentDefinitionSourceError: Error, LocalizedError, Equatable, Sendable { + public let code: String + public let message: String + + public init(code: String, message: String) { + self.code = code + self.message = message + } + + public var errorDescription: String? { + message + } + + public static func unsupportedRemoteResponse(_ statusCode: Int) -> AgentDefinitionSourceError { + AgentDefinitionSourceError( + code: "unsupported_remote_response", + message: "Remote definition request failed with status code \(statusCode)." + ) + } + + public static func unreadableContent() -> AgentDefinitionSourceError { + AgentDefinitionSourceError( + code: "unreadable_content", + message: "The definition content could not be decoded as UTF-8 text." + ) + } + + public static func emptyInstructions() -> AgentDefinitionSourceError { + AgentDefinitionSourceError( + code: "empty_instructions", + message: "The definition did not contain any usable instructions." + ) + } + + public static func missingSkillIdentity() -> AgentDefinitionSourceError { + AgentDefinitionSourceError( + code: "missing_skill_identity", + message: "A skill loaded from this source must include an id and name, or they must be provided by the caller." + ) + } + + public static func invalidSkillID(_ skillID: String) -> AgentDefinitionSourceError { + AgentDefinitionSourceError( + code: "invalid_skill_id", + message: "The skill ID \(skillID) is invalid. Skill IDs must match ^[a-zA-Z0-9_-]+$." + ) + } +} + +public actor AgentDefinitionSourceLoader { + private struct SkillDocument: Codable { + var id: String? + var name: String? + var instructions: String + var executionPolicy: AgentSkillExecutionPolicy? + } + + private let urlSession: URLSession + private let decoder = JSONDecoder() + + public init(urlSession: URLSession = .shared) { + self.urlSession = urlSession + } + + public func loadPersonaStack( + from source: AgentDefinitionSource, + defaultLayerName: String = "dynamic_persona" + ) async throws -> AgentPersonaStack { + let text = try await loadText(from: source) + if let jsonData = text.data(using: .utf8), + let stack = try? decoder.decode(AgentPersonaStack.self, from: jsonData), + !stack.layers.isEmpty { + return stack + } + + let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { + throw AgentDefinitionSourceError.emptyInstructions() + } + + return AgentPersonaStack(layers: [ + .init(name: defaultLayerName, instructions: trimmed), + ]) + } + + public func loadSkill( + from source: AgentDefinitionSource, + id: String? = nil, + name: String? = nil + ) async throws -> AgentSkill { + let text = try await loadText(from: source) + let decodedDocument = decodeSkillDocument(from: text) + + let resolvedInstructions = (decodedDocument?.instructions ?? text) + .trimmingCharacters(in: .whitespacesAndNewlines) + guard !resolvedInstructions.isEmpty else { + throw AgentDefinitionSourceError.emptyInstructions() + } + + let resolvedID = (id ?? decodedDocument?.id)?.trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedName = (name ?? decodedDocument?.name)?.trimmingCharacters(in: .whitespacesAndNewlines) + + guard let finalID = resolvedID, !finalID.isEmpty else { + throw AgentDefinitionSourceError.missingSkillIdentity() + } + guard AgentSkill.isValidID(finalID) else { + throw AgentDefinitionSourceError.invalidSkillID(finalID) + } + + let finalName = (resolvedName?.isEmpty == false) ? resolvedName! : finalID + + return AgentSkill( + id: finalID, + name: finalName, + instructions: resolvedInstructions, + executionPolicy: decodedDocument?.executionPolicy + ) + } + + public func loadText(from source: AgentDefinitionSource) async throws -> String { + let data: Data + switch source { + case let .file(url): + data = try Data(contentsOf: url) + case let .remote(url): + let (responseData, response) = try await urlSession.data(from: url) + if let httpResponse = response as? HTTPURLResponse, + !(200 ... 299).contains(httpResponse.statusCode) { + throw AgentDefinitionSourceError.unsupportedRemoteResponse(httpResponse.statusCode) + } + data = responseData + } + + guard let text = String(data: data, encoding: .utf8) else { + throw AgentDefinitionSourceError.unreadableContent() + } + + return text + } + + private func decodeSkillDocument(from text: String) -> SkillDocument? { + guard let data = text.data(using: .utf8) else { + return nil + } + + if let decoded = try? decoder.decode(SkillDocument.self, from: data) { + return decoded + } + + guard let object = (try? JSONSerialization.jsonObject(with: data)) as? [String: Any] else { + return nil + } + + guard let instructions = object["instructions"] as? String else { + return nil + } + + let executionPolicy: AgentSkillExecutionPolicy? = if let policyObject = object["executionPolicy"], + JSONSerialization.isValidJSONObject(policyObject), + let policyData = try? JSONSerialization.data(withJSONObject: policyObject) { + try? decoder.decode(AgentSkillExecutionPolicy.self, from: policyData) + } else { + nil + } + + return SkillDocument( + id: object["id"] as? String, + name: object["name"] as? String, + instructions: instructions, + executionPolicy: executionPolicy + ) + } +} diff --git a/Sources/CodexKit/Runtime/AgentModels.swift b/Sources/CodexKit/Runtime/AgentModels.swift index d05c8fa..c436c7a 100644 --- a/Sources/CodexKit/Runtime/AgentModels.swift +++ b/Sources/CodexKit/Runtime/AgentModels.swift @@ -34,6 +34,76 @@ public struct AgentRuntimeError: Error, LocalizedError, Equatable, Sendable { message: "A user message must include text or at least one image attachment." ) } + + public static func invalidSkillID(_ skillID: String) -> AgentRuntimeError { + AgentRuntimeError( + code: "invalid_skill_id", + message: "The skill ID \(skillID) is invalid. Skill IDs must match ^[a-zA-Z0-9_-]+$." + ) + } + + public static func duplicateSkill(_ skillID: String) -> AgentRuntimeError { + AgentRuntimeError( + code: "duplicate_skill", + message: "A skill with ID \(skillID) is already registered." + ) + } + + public static func skillsNotFound(_ skillIDs: [String]) -> AgentRuntimeError { + let joined = skillIDs.sorted().joined(separator: ", ") + return AgentRuntimeError( + code: "skills_not_found", + message: "The following skills are not registered: \(joined)." + ) + } + + public static func invalidSkillToolName( + skillID: String, + toolName: String + ) -> AgentRuntimeError { + AgentRuntimeError( + code: "invalid_skill_tool_name", + message: "Skill \(skillID) references invalid tool name \(toolName). Tool names must match ^[a-zA-Z0-9_-]+$." + ) + } + + public static func invalidSkillMaxToolCalls(skillID: String) -> AgentRuntimeError { + AgentRuntimeError( + code: "invalid_skill_max_tool_calls", + message: "Skill \(skillID) has invalid maxToolCalls. It must be 0 or greater." + ) + } + + public static func skillToolNotAllowed(_ toolName: String) -> AgentRuntimeError { + AgentRuntimeError( + code: "skill_tool_not_allowed", + message: "Tool \(toolName) is not allowed by the active skill policy." + ) + } + + public static func skillToolSequenceViolation( + expected: String, + actual: String + ) -> AgentRuntimeError { + AgentRuntimeError( + code: "skill_tool_sequence_violation", + message: "Tool \(actual) was requested out of sequence. Expected \(expected)." + ) + } + + public static func skillToolCallLimitExceeded(_ maxCalls: Int) -> AgentRuntimeError { + AgentRuntimeError( + code: "skill_tool_call_limit_exceeded", + message: "The active skill policy allows at most \(maxCalls) tool call(s) per turn." + ) + } + + public static func skillRequiredToolsMissing(_ toolNames: [String]) -> AgentRuntimeError { + AgentRuntimeError( + code: "skill_required_tools_missing", + message: "The active skill policy requires tool calls that did not occur: \(toolNames.sorted().joined(separator: ", "))." + ) + } } public enum AgentRole: String, Codable, Hashable, Sendable { @@ -141,15 +211,18 @@ public struct UserMessageRequest: Codable, Hashable, Sendable { public var text: String public var images: [AgentImageAttachment] public var personaOverride: AgentPersonaStack? + public var skillOverrideIDs: [String]? public init( text: String, images: [AgentImageAttachment] = [], - personaOverride: AgentPersonaStack? = nil + personaOverride: AgentPersonaStack? = nil, + skillOverrideIDs: [String]? = nil ) { self.text = text self.images = images self.personaOverride = personaOverride + self.skillOverrideIDs = skillOverrideIDs } public var hasContent: Bool { @@ -160,6 +233,7 @@ public struct UserMessageRequest: Codable, Hashable, Sendable { case text case images case personaOverride + case skillOverrideIDs } public init(from decoder: Decoder) throws { @@ -167,6 +241,7 @@ public struct UserMessageRequest: Codable, Hashable, Sendable { text = try container.decode(String.self, forKey: .text) images = try container.decodeIfPresent([AgentImageAttachment].self, forKey: .images) ?? [] personaOverride = try container.decodeIfPresent(AgentPersonaStack.self, forKey: .personaOverride) + skillOverrideIDs = try container.decodeIfPresent([String].self, forKey: .skillOverrideIDs) } } @@ -174,6 +249,7 @@ public struct AgentThread: Identifiable, Codable, Hashable, Sendable { public var id: String public var title: String? public var personaStack: AgentPersonaStack? + public var skillIDs: [String] public var createdAt: Date public var updatedAt: Date public var status: AgentThreadStatus @@ -182,6 +258,7 @@ public struct AgentThread: Identifiable, Codable, Hashable, Sendable { id: String, title: String? = nil, personaStack: AgentPersonaStack? = nil, + skillIDs: [String] = [], createdAt: Date = Date(), updatedAt: Date = Date(), status: AgentThreadStatus = .idle @@ -189,6 +266,7 @@ public struct AgentThread: Identifiable, Codable, Hashable, Sendable { self.id = id self.title = title self.personaStack = personaStack + self.skillIDs = skillIDs self.createdAt = createdAt self.updatedAt = updatedAt self.status = status @@ -198,6 +276,7 @@ public struct AgentThread: Identifiable, Codable, Hashable, Sendable { case id case title case personaStack + case skillIDs case createdAt case updatedAt case status @@ -208,6 +287,7 @@ public struct AgentThread: Identifiable, Codable, Hashable, Sendable { id = try container.decode(String.self, forKey: .id) title = try container.decodeIfPresent(String.self, forKey: .title) personaStack = try container.decodeIfPresent(AgentPersonaStack.self, forKey: .personaStack) + skillIDs = try container.decodeIfPresent([String].self, forKey: .skillIDs) ?? [] createdAt = try container.decodeIfPresent(Date.self, forKey: .createdAt) ?? Date() updatedAt = try container.decodeIfPresent(Date.self, forKey: .updatedAt) ?? createdAt status = try container.decodeIfPresent(AgentThreadStatus.self, forKey: .status) ?? .idle diff --git a/Sources/CodexKit/Runtime/AgentPersona.swift b/Sources/CodexKit/Runtime/AgentPersona.swift index be36dc8..895769a 100644 --- a/Sources/CodexKit/Runtime/AgentPersona.swift +++ b/Sources/CodexKit/Runtime/AgentPersona.swift @@ -26,7 +26,9 @@ enum AgentInstructionCompiler { static func compile( baseInstructions: String?, threadPersonaStack: AgentPersonaStack?, - turnPersonaOverride: AgentPersonaStack? + threadSkills: [AgentSkill], + turnPersonaOverride: AgentPersonaStack?, + turnSkills: [AgentSkill] ) -> String { var sections: [String] = [] @@ -43,6 +45,13 @@ enum AgentInstructionCompiler { sections.append(compiledThreadLayers) } + if let compiledThreadSkills = compile( + title: "Thread Skills", + skills: threadSkills + ) { + sections.append(compiledThreadSkills) + } + if let turnPersonaOverride, let compiledOverrideLayers = compile( title: "Turn Persona Override", @@ -51,6 +60,13 @@ enum AgentInstructionCompiler { sections.append(compiledOverrideLayers) } + if let compiledTurnSkills = compile( + title: "Turn Skill Override", + skills: turnSkills + ) { + sections.append(compiledTurnSkills) + } + return sections.joined(separator: "\n\n") } @@ -79,4 +95,74 @@ enum AgentInstructionCompiler { \(renderedLayers.joined(separator: "\n\n")) """ } + + private static func compile( + title: String, + skills: [AgentSkill] + ) -> String? { + let renderedSkills = skills.compactMap { skill -> String? in + let trimmedInstructions = skill.instructions.trimmingCharacters(in: .whitespacesAndNewlines) + let policyLines = compilePolicyLines(skill.executionPolicy) + guard !trimmedInstructions.isEmpty || !policyLines.isEmpty else { + return nil + } + + var sections: [String] = [] + if !trimmedInstructions.isEmpty { + sections.append(trimmedInstructions) + } + if !policyLines.isEmpty { + sections.append( + """ + Execution Policy: + \(policyLines.joined(separator: "\n")) + """ + ) + } + + return """ + [\(skill.id): \(skill.name)] + \(sections.joined(separator: "\n\n")) + """ + } + + guard !renderedSkills.isEmpty else { + return nil + } + + return """ + \(title): + \(renderedSkills.joined(separator: "\n\n")) + """ + } + + private static func compilePolicyLines( + _ policy: AgentSkillExecutionPolicy? + ) -> [String] { + guard let policy else { + return [] + } + + var lines: [String] = [] + + if let allowedToolNames = policy.allowedToolNames, + !allowedToolNames.isEmpty { + lines.append("- allowed tools: \(allowedToolNames.joined(separator: ", "))") + } + + if !policy.requiredToolNames.isEmpty { + lines.append("- required tools this turn: \(policy.requiredToolNames.joined(separator: ", "))") + } + + if let toolSequence = policy.toolSequence, + !toolSequence.isEmpty { + lines.append("- required tool sequence: \(toolSequence.joined(separator: " -> "))") + } + + if let maxToolCalls = policy.maxToolCalls { + lines.append("- max tool calls this turn: \(maxToolCalls)") + } + + return lines + } } diff --git a/Sources/CodexKit/Runtime/AgentRuntime.swift b/Sources/CodexKit/Runtime/AgentRuntime.swift index e133c28..de9c208 100644 --- a/Sources/CodexKit/Runtime/AgentRuntime.swift +++ b/Sources/CodexKit/Runtime/AgentRuntime.swift @@ -22,6 +22,8 @@ public actor AgentRuntime { public let stateStore: any RuntimeStateStoring public let baseInstructions: String? public let tools: [ToolRegistration] + public let skills: [AgentSkill] + public let definitionSourceLoader: AgentDefinitionSourceLoader public init( authProvider: any ChatGPTAuthProviding, @@ -30,7 +32,9 @@ public actor AgentRuntime { approvalPresenter: any ApprovalPresenting, stateStore: any RuntimeStateStoring, baseInstructions: String? = nil, - tools: [ToolRegistration] = [] + tools: [ToolRegistration] = [], + skills: [AgentSkill] = [], + definitionSourceLoader: AgentDefinitionSourceLoader = AgentDefinitionSourceLoader() ) { self.authProvider = authProvider self.secureStore = secureStore @@ -39,6 +43,8 @@ public actor AgentRuntime { self.stateStore = stateStore self.baseInstructions = baseInstructions self.tools = tools + self.skills = skills + self.definitionSourceLoader = definitionSourceLoader } } @@ -48,9 +54,94 @@ public actor AgentRuntime { private let toolRegistry: ToolRegistry private let approvalCoordinator: ApprovalCoordinator private let baseInstructions: String? + private let definitionSourceLoader: AgentDefinitionSourceLoader + private var skillsByID: [String: AgentSkill] private var state: StoredRuntimeState = .empty + private struct ResolvedTurnSkills { + let threadSkills: [AgentSkill] + let turnSkills: [AgentSkill] + let compiledToolPolicy: CompiledSkillToolPolicy + } + + private struct CompiledSkillToolPolicy { + let allowedToolNames: Set? + let requiredToolNames: Set + let toolSequence: [String]? + let maxToolCalls: Int? + + var hasConstraints: Bool { + allowedToolNames != nil || + !requiredToolNames.isEmpty || + (toolSequence?.isEmpty == false) || + maxToolCalls != nil + } + } + + private final class TurnSkillPolicyTracker: @unchecked Sendable { + private let policy: CompiledSkillToolPolicy + private var toolCallsCount = 0 + private var usedToolNames: Set = [] + private var nextSequenceIndex = 0 + + init(policy: CompiledSkillToolPolicy) { + self.policy = policy + } + + func validate(toolName: String) -> AgentRuntimeError? { + if let maxToolCalls = policy.maxToolCalls, + toolCallsCount >= maxToolCalls { + return AgentRuntimeError.skillToolCallLimitExceeded(maxToolCalls) + } + + if let allowedToolNames = policy.allowedToolNames, + !allowedToolNames.contains(toolName) { + return AgentRuntimeError.skillToolNotAllowed(toolName) + } + + if let toolSequence = policy.toolSequence, + nextSequenceIndex < toolSequence.count { + let expectedToolName = toolSequence[nextSequenceIndex] + if toolName != expectedToolName { + return AgentRuntimeError.skillToolSequenceViolation( + expected: expectedToolName, + actual: toolName + ) + } + } + + return nil + } + + func recordAccepted(toolName: String) { + toolCallsCount += 1 + usedToolNames.insert(toolName) + + if let toolSequence = policy.toolSequence, + nextSequenceIndex < toolSequence.count, + toolSequence[nextSequenceIndex] == toolName { + nextSequenceIndex += 1 + } + } + + func completionError() -> AgentRuntimeError? { + var missingTools = policy.requiredToolNames.subtracting(usedToolNames) + + if let toolSequence = policy.toolSequence, + nextSequenceIndex < toolSequence.count { + let remainingSequenceTools = toolSequence[nextSequenceIndex...] + missingTools.formUnion(remainingSequenceTools) + } + + guard !missingTools.isEmpty else { + return nil + } + + return AgentRuntimeError.skillRequiredToolsMissing(Array(missingTools).sorted()) + } + } + public init(configuration: Configuration) throws { self.backend = configuration.backend self.stateStore = configuration.stateStore @@ -63,6 +154,8 @@ public actor AgentRuntime { presenter: configuration.approvalPresenter ) self.baseInstructions = configuration.baseInstructions ?? configuration.backend.baseInstructions + self.definitionSourceLoader = configuration.definitionSourceLoader + self.skillsByID = try Self.validatedSkills(from: configuration.skills) } @discardableResult @@ -107,17 +200,89 @@ public actor AgentRuntime { try await toolRegistry.replace(definition, executor: executor) } + public func skills() -> [AgentSkill] { + skillsByID.values.sorted { $0.id < $1.id } + } + + public func skill(for skillID: String) -> AgentSkill? { + skillsByID[skillID] + } + + public func registerSkill(_ skill: AgentSkill) throws { + guard AgentSkill.isValidID(skill.id) else { + throw AgentRuntimeError.invalidSkillID(skill.id) + } + try Self.validateSkillExecutionPolicy(skill) + guard skillsByID[skill.id] == nil else { + throw AgentRuntimeError.duplicateSkill(skill.id) + } + + skillsByID[skill.id] = skill + } + + public func replaceSkill(_ skill: AgentSkill) throws { + guard AgentSkill.isValidID(skill.id) else { + throw AgentRuntimeError.invalidSkillID(skill.id) + } + try Self.validateSkillExecutionPolicy(skill) + + skillsByID[skill.id] = skill + } + + @discardableResult + public func registerSkill( + from source: AgentDefinitionSource, + id: String? = nil, + name: String? = nil + ) async throws -> AgentSkill { + let skill = try await definitionSourceLoader.loadSkill( + from: source, + id: id, + name: name + ) + try registerSkill(skill) + return skill + } + + @discardableResult + public func replaceSkill( + from source: AgentDefinitionSource, + id: String? = nil, + name: String? = nil + ) async throws -> AgentSkill { + let skill = try await definitionSourceLoader.loadSkill( + from: source, + id: id, + name: name + ) + try replaceSkill(skill) + return skill + } + @discardableResult public func createThread( title: String? = nil, - personaStack: AgentPersonaStack? = nil + personaStack: AgentPersonaStack? = nil, + personaSource: AgentDefinitionSource? = nil, + skillIDs: [String] = [] ) async throws -> AgentThread { + try assertSkillsExist(skillIDs) + let resolvedPersonaStack: AgentPersonaStack? + if let personaStack { + resolvedPersonaStack = personaStack + } else if let personaSource { + resolvedPersonaStack = try await definitionSourceLoader.loadPersonaStack(from: personaSource) + } else { + resolvedPersonaStack = nil + } + let session = try await sessionManager.requireSession() var thread = try await backend.createThread(session: session) if let title { thread.title = title } - thread.personaStack = personaStack + thread.personaStack = resolvedPersonaStack + thread.skillIDs = skillIDs try await upsertThread(thread) return thread } @@ -150,10 +315,15 @@ public actor AgentRuntime { images: request.images ) let priorMessages = state.messagesByThread[threadID] ?? [] - let resolvedInstructions = resolveInstructions( + let resolvedTurnSkills = try resolveTurnSkills( thread: thread, message: request ) + let resolvedInstructions = resolveInstructions( + thread: thread, + message: request, + resolvedTurnSkills: resolvedTurnSkills + ) try await appendMessage(userMessage) try await setThreadStatus(.streaming, for: threadID) @@ -177,18 +347,46 @@ public actor AgentRuntime { turnStream, for: threadID, session: session, + resolvedTurnSkills: resolvedTurnSkills, continuation: continuation ) } } } + public func resolvedInstructionsPreview( + for threadID: String, + request: UserMessageRequest + ) throws -> String { + guard let thread = thread(for: threadID) else { + throw AgentRuntimeError.threadNotFound(threadID) + } + + let resolvedTurnSkills = try resolveTurnSkills( + thread: thread, + message: request + ) + + return resolveInstructions( + thread: thread, + message: request, + resolvedTurnSkills: resolvedTurnSkills + ) + } + private func consumeTurnStream( _ turnStream: any AgentTurnStreaming, for threadID: String, session: ChatGPTSession, + resolvedTurnSkills: ResolvedTurnSkills, continuation: AsyncThrowingStream.Continuation ) async { + let policyTracker: TurnSkillPolicyTracker? = if resolvedTurnSkills.compiledToolPolicy.hasConstraints { + TurnSkillPolicyTracker(policy: resolvedTurnSkills.compiledToolPolicy) + } else { + nil + } + do { for try await backendEvent in turnStream.events { switch backendEvent { @@ -211,11 +409,22 @@ public actor AgentRuntime { case let .toolCallRequested(invocation): continuation.yield(.toolCallStarted(invocation)) - let result = try await resolveToolInvocation( - invocation, - session: session, - continuation: continuation - ) + let result: ToolResultEnvelope + if let policyTracker, + let validationError = policyTracker.validate(toolName: invocation.toolName) { + result = .failure( + invocation: invocation, + message: validationError.message + ) + } else { + let resolvedResult = try await resolveToolInvocation( + invocation, + session: session, + continuation: continuation + ) + result = resolvedResult + policyTracker?.recordAccepted(toolName: invocation.toolName) + } try await turnStream.submitToolResult(result, for: invocation.id) continuation.yield(.toolCallFinished(result)) @@ -223,6 +432,14 @@ public actor AgentRuntime { continuation.yield(.threadStatusChanged(threadID: threadID, status: .streaming)) case let .turnCompleted(summary): + if let completionError = policyTracker?.completionError() { + try await setThreadStatus(.failed, for: threadID) + continuation.yield(.threadStatusChanged(threadID: threadID, status: .failed)) + continuation.yield(.turnFailed(completionError)) + continuation.finish(throwing: completionError) + return + } + try await setThreadStatus(.idle, for: threadID) continuation.yield(.threadStatusChanged(threadID: threadID, status: .idle)) continuation.yield(.turnCompleted(summary)) @@ -321,6 +538,42 @@ public actor AgentRuntime { try await persistState() } + @discardableResult + public func setPersonaStack( + from source: AgentDefinitionSource, + for threadID: String, + defaultLayerName: String = "dynamic_persona" + ) async throws -> AgentPersonaStack { + let personaStack = try await definitionSourceLoader.loadPersonaStack( + from: source, + defaultLayerName: defaultLayerName + ) + try await setPersonaStack(personaStack, for: threadID) + return personaStack + } + + public func skillIDs(for threadID: String) throws -> [String] { + guard let thread = thread(for: threadID) else { + throw AgentRuntimeError.threadNotFound(threadID) + } + + return thread.skillIDs + } + + public func setSkillIDs( + _ skillIDs: [String], + for threadID: String + ) async throws { + guard let index = state.threads.firstIndex(where: { $0.id == threadID }) else { + throw AgentRuntimeError.threadNotFound(threadID) + } + try assertSkillsExist(skillIDs) + + state.threads[index].skillIDs = skillIDs + state.threads[index].updatedAt = Date() + try await persistState() + } + private func upsertThread(_ thread: AgentThread) async throws { if let index = state.threads.firstIndex(where: { $0.id == thread.id }) { var mergedThread = thread @@ -330,6 +583,9 @@ public actor AgentRuntime { if mergedThread.personaStack == nil { mergedThread.personaStack = state.threads[index].personaStack } + if mergedThread.skillIDs.isEmpty { + mergedThread.skillIDs = state.threads[index].skillIDs + } state.threads[index] = mergedThread } else { state.threads.append(thread) @@ -375,12 +631,132 @@ public actor AgentRuntime { private func resolveInstructions( thread: AgentThread, - message: UserMessageRequest + message: UserMessageRequest, + resolvedTurnSkills: ResolvedTurnSkills ) -> String { AgentInstructionCompiler.compile( baseInstructions: baseInstructions, threadPersonaStack: thread.personaStack, - turnPersonaOverride: message.personaOverride + threadSkills: resolvedTurnSkills.threadSkills, + turnPersonaOverride: message.personaOverride, + turnSkills: resolvedTurnSkills.turnSkills + ) + } + + private func resolveTurnSkills( + thread: AgentThread, + message: UserMessageRequest + ) throws -> ResolvedTurnSkills { + if let skillOverrideIDs = message.skillOverrideIDs { + try assertSkillsExist(skillOverrideIDs) + } + + let threadSkills = resolveSkills(for: thread.skillIDs) + let turnSkills = resolveSkills(for: message.skillOverrideIDs ?? []) + let allSkills = threadSkills + turnSkills + + return ResolvedTurnSkills( + threadSkills: threadSkills, + turnSkills: turnSkills, + compiledToolPolicy: compileToolPolicy(from: allSkills) ) } + + private func compileToolPolicy(from skills: [AgentSkill]) -> CompiledSkillToolPolicy { + var allowedToolNames: Set? + var requiredToolNames: Set = [] + var toolSequence: [String]? + var maxToolCalls: Int? + + for skill in skills { + guard let executionPolicy = skill.executionPolicy else { + continue + } + + if let allowed = executionPolicy.allowedToolNames, + !allowed.isEmpty { + let allowedSet = Set(allowed) + if let existingAllowed = allowedToolNames { + allowedToolNames = existingAllowed.intersection(allowedSet) + } else { + allowedToolNames = allowedSet + } + } + + if !executionPolicy.requiredToolNames.isEmpty { + requiredToolNames.formUnion(executionPolicy.requiredToolNames) + } + + if let sequence = executionPolicy.toolSequence, + !sequence.isEmpty { + toolSequence = sequence + } + + if let maxCalls = executionPolicy.maxToolCalls { + if let existingMaxCalls = maxToolCalls { + maxToolCalls = min(existingMaxCalls, maxCalls) + } else { + maxToolCalls = maxCalls + } + } + } + + return CompiledSkillToolPolicy( + allowedToolNames: allowedToolNames, + requiredToolNames: requiredToolNames, + toolSequence: toolSequence, + maxToolCalls: maxToolCalls + ) + } + + private func resolveSkills(for skillIDs: [String]) -> [AgentSkill] { + skillIDs.compactMap { skillsByID[$0] } + } + + private func assertSkillsExist(_ skillIDs: [String]) throws { + let missing = Array(Set(skillIDs.filter { skillsByID[$0] == nil })).sorted() + guard missing.isEmpty else { + throw AgentRuntimeError.skillsNotFound(missing) + } + } + + private static func validatedSkills(from skills: [AgentSkill]) throws -> [String: AgentSkill] { + var dictionary: [String: AgentSkill] = [:] + for skill in skills { + guard AgentSkill.isValidID(skill.id) else { + throw AgentRuntimeError.invalidSkillID(skill.id) + } + try validateSkillExecutionPolicy(skill) + guard dictionary[skill.id] == nil else { + throw AgentRuntimeError.duplicateSkill(skill.id) + } + dictionary[skill.id] = skill + } + return dictionary + } + + private static func validateSkillExecutionPolicy(_ skill: AgentSkill) throws { + guard let executionPolicy = skill.executionPolicy else { + return + } + + if let maxToolCalls = executionPolicy.maxToolCalls, + maxToolCalls < 0 { + throw AgentRuntimeError.invalidSkillMaxToolCalls(skillID: skill.id) + } + + let policyToolNames: [String] = + (executionPolicy.allowedToolNames ?? []) + + executionPolicy.requiredToolNames + + (executionPolicy.toolSequence ?? []) + + for toolName in policyToolNames { + guard ToolDefinition.isValidName(toolName) else { + throw AgentRuntimeError.invalidSkillToolName( + skillID: skill.id, + toolName: toolName + ) + } + } + } } diff --git a/Sources/CodexKit/Runtime/AgentSkill.swift b/Sources/CodexKit/Runtime/AgentSkill.swift new file mode 100644 index 0000000..a7d3144 --- /dev/null +++ b/Sources/CodexKit/Runtime/AgentSkill.swift @@ -0,0 +1,59 @@ +import Foundation + +public struct AgentSkillExecutionPolicy: Codable, Hashable, Sendable { + public var allowedToolNames: [String]? = nil + public var requiredToolNames: [String] = [] + public var toolSequence: [String]? = nil + public var maxToolCalls: Int? = nil + + public init( + allowedToolNames: [String]? = nil, + requiredToolNames: [String] = [], + toolSequence: [String]? = nil, + maxToolCalls: Int? = nil + ) { + self.allowedToolNames = allowedToolNames + self.requiredToolNames = requiredToolNames + self.toolSequence = toolSequence + self.maxToolCalls = maxToolCalls + } + + enum CodingKeys: String, CodingKey { + case allowedToolNames + case requiredToolNames + case toolSequence + case maxToolCalls + } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + allowedToolNames = try container.decodeIfPresent([String].self, forKey: .allowedToolNames) + requiredToolNames = try container.decodeIfPresent([String].self, forKey: .requiredToolNames) ?? [] + toolSequence = try container.decodeIfPresent([String].self, forKey: .toolSequence) + maxToolCalls = try container.decodeIfPresent(Int.self, forKey: .maxToolCalls) + } +} + +public struct AgentSkill: Codable, Hashable, Sendable, Identifiable { + public let id: String + public var name: String + public var instructions: String + public var executionPolicy: AgentSkillExecutionPolicy? + + public init( + id: String, + name: String, + instructions: String, + executionPolicy: AgentSkillExecutionPolicy? = nil + ) { + self.id = id + self.name = name + self.instructions = instructions + self.executionPolicy = executionPolicy + } + + public static func isValidID(_ id: String) -> Bool { + let pattern = "^[a-zA-Z0-9_-]+$" + return id.range(of: pattern, options: .regularExpression) != nil + } +} diff --git a/Tests/CodexKitTests/AgentDefinitionSourceLoaderTests.swift b/Tests/CodexKitTests/AgentDefinitionSourceLoaderTests.swift new file mode 100644 index 0000000..4c5ede5 --- /dev/null +++ b/Tests/CodexKitTests/AgentDefinitionSourceLoaderTests.swift @@ -0,0 +1,116 @@ +import CodexKit +import Foundation +import XCTest + +private final class StubURLProtocol: URLProtocol, @unchecked Sendable { + nonisolated(unsafe) static var requestHandler: (@Sendable (URLRequest) throws -> (HTTPURLResponse, Data))? + + override class func canInit(with _: URLRequest) -> Bool { + true + } + + override class func canonicalRequest(for request: URLRequest) -> URLRequest { + request + } + + override func startLoading() { + guard let handler = Self.requestHandler else { + client?.urlProtocol(self, didFailWithError: URLError(.badServerResponse)) + return + } + + do { + let (response, data) = try handler(request) + client?.urlProtocol(self, didReceive: response, cacheStoragePolicy: .notAllowed) + client?.urlProtocol(self, didLoad: data) + client?.urlProtocolDidFinishLoading(self) + } catch { + client?.urlProtocol(self, didFailWithError: error) + } + } + + override func stopLoading() {} +} + +final class AgentDefinitionSourceLoaderTests: XCTestCase { + func testLoaderBuildsPersonaStackFromPlainTextFile() async throws { + let text = "You are a direct planning assistant focused on tradeoffs." + let fileURL = try temporaryFile(with: text) + let loader = AgentDefinitionSourceLoader() + + let stack = try await loader.loadPersonaStack( + from: .file(fileURL), + defaultLayerName: "file_persona" + ) + + XCTAssertEqual(stack.layers.count, 1) + XCTAssertEqual(stack.layers[0].name, "file_persona") + XCTAssertEqual(stack.layers[0].instructions, text) + } + + func testLoaderBuildsSkillFromJSONFile() async throws { + let json = """ + { + "id": "travel_planner", + "name": "Travel Planner", + "instructions": "Build practical itineraries with logistics.", + "executionPolicy": { + "maxToolCalls": 0 + } + } + """ + let fileURL = try temporaryFile(with: json) + let loader = AgentDefinitionSourceLoader() + + let skill = try await loader.loadSkill(from: .file(fileURL)) + + XCTAssertEqual(skill.id, "travel_planner") + XCTAssertEqual(skill.name, "Travel Planner") + XCTAssertTrue(skill.instructions.contains("itineraries")) + XCTAssertEqual(skill.executionPolicy?.maxToolCalls, 0) + } + + func testLoaderBuildsSkillFromRemoteSource() async throws { + let config = URLSessionConfiguration.ephemeral + config.protocolClasses = [StubURLProtocol.self] + let session = URLSession(configuration: config) + let loader = AgentDefinitionSourceLoader(urlSession: session) + let url = URL(string: "https://example.com/skills/health.json")! + + StubURLProtocol.requestHandler = { request in + XCTAssertEqual(request.url, url) + let body = """ + { + "id": "health_coach", + "name": "Health Coach", + "instructions": "Drive daily step execution." + } + """ + return ( + HTTPURLResponse( + url: url, + statusCode: 200, + httpVersion: nil, + headerFields: nil + )!, + Data(body.utf8) + ) + } + defer { + StubURLProtocol.requestHandler = nil + } + + let skill = try await loader.loadSkill(from: .remote(url)) + + XCTAssertEqual(skill.id, "health_coach") + XCTAssertEqual(skill.name, "Health Coach") + } + + private func temporaryFile(with content: String) throws -> URL { + let url = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + .appendingPathExtension("txt") + try Data(content.utf8).write(to: url, options: .atomic) + return url + } +} diff --git a/Tests/CodexKitTests/AgentRuntimeTests.swift b/Tests/CodexKitTests/AgentRuntimeTests.swift index 42d69d4..8474ab3 100644 --- a/Tests/CodexKitTests/AgentRuntimeTests.swift +++ b/Tests/CodexKitTests/AgentRuntimeTests.swift @@ -9,6 +9,154 @@ private struct AutoApprovalPresenter: ApprovalPresenting { } final class AgentRuntimeTests: XCTestCase { + func testThreadSkillsAreResolvedIntoInstructions() async throws { + let backend = InMemoryAgentBackend( + baseInstructions: "Base host instructions." + ) + let runtime = try AgentRuntime(configuration: .init( + authProvider: DemoChatGPTAuthProvider(), + secureStore: KeychainSessionSecureStore( + service: "CodexKitTests.ChatGPTSession", + account: UUID().uuidString + ), + backend: backend, + approvalPresenter: AutoApprovalPresenter(), + stateStore: InMemoryRuntimeStateStore(), + skills: [ + .init( + id: "health_coach", + name: "Health Coach", + instructions: "Coach users toward their daily step goals." + ), + ] + )) + + _ = try await runtime.restore() + _ = try await runtime.signIn() + + let thread = try await runtime.createThread( + title: "Health", + skillIDs: ["health_coach"] + ) + let stream = try await runtime.sendMessage( + UserMessageRequest(text: "Give me my plan."), + in: thread.id + ) + for try await _ in stream {} + + let instructions = await backend.receivedInstructions() + let resolvedInstructions = try XCTUnwrap(instructions.last) + XCTAssertTrue(resolvedInstructions.contains("Thread Skills:")) + XCTAssertTrue(resolvedInstructions.contains("[health_coach: Health Coach]")) + } + + func testTurnSkillOverrideAppliesOnlyToCurrentTurn() async throws { + let backend = InMemoryAgentBackend( + baseInstructions: "Base host instructions." + ) + let runtime = try AgentRuntime(configuration: .init( + authProvider: DemoChatGPTAuthProvider(), + secureStore: KeychainSessionSecureStore( + service: "CodexKitTests.ChatGPTSession", + account: UUID().uuidString + ), + backend: backend, + approvalPresenter: AutoApprovalPresenter(), + stateStore: InMemoryRuntimeStateStore(), + skills: [ + .init( + id: "travel_planner", + name: "Travel Planner", + instructions: "Plan practical itineraries." + ), + ] + )) + + _ = try await runtime.restore() + _ = try await runtime.signIn() + + let thread = try await runtime.createThread() + + let firstStream = try await runtime.sendMessage( + UserMessageRequest( + text: "Plan my trip.", + skillOverrideIDs: ["travel_planner"] + ), + in: thread.id + ) + for try await _ in firstStream {} + + let secondStream = try await runtime.sendMessage( + UserMessageRequest(text: "Now answer normally."), + in: thread.id + ) + for try await _ in secondStream {} + + let instructions = await backend.receivedInstructions() + XCTAssertEqual(instructions.count, 2) + XCTAssertTrue(instructions[0].contains("Turn Skill Override:")) + XCTAssertTrue(instructions[0].contains("[travel_planner: Travel Planner]")) + XCTAssertFalse(instructions[1].contains("Turn Skill Override:")) + XCTAssertFalse(instructions[1].contains("[travel_planner: Travel Planner]")) + } + + func testSetSkillIDsAffectsFutureTurnsOnly() async throws { + let backend = InMemoryAgentBackend( + baseInstructions: "Base host instructions." + ) + let runtime = try AgentRuntime(configuration: .init( + authProvider: DemoChatGPTAuthProvider(), + secureStore: KeychainSessionSecureStore( + service: "CodexKitTests.ChatGPTSession", + account: UUID().uuidString + ), + backend: backend, + approvalPresenter: AutoApprovalPresenter(), + stateStore: InMemoryRuntimeStateStore(), + skills: [ + .init( + id: "health_coach", + name: "Health Coach", + instructions: "Coach users toward step goals." + ), + .init( + id: "travel_planner", + name: "Travel Planner", + instructions: "Plan practical itineraries." + ), + ] + )) + + _ = try await runtime.restore() + _ = try await runtime.signIn() + + let thread = try await runtime.createThread( + title: "Skills", + skillIDs: ["health_coach"] + ) + + let firstStream = try await runtime.sendMessage( + UserMessageRequest(text: "What should I walk today?"), + in: thread.id + ) + for try await _ in firstStream {} + + try await runtime.setSkillIDs(["travel_planner"], for: thread.id) + + let secondStream = try await runtime.sendMessage( + UserMessageRequest(text: "Plan a weekend trip."), + in: thread.id + ) + for try await _ in secondStream {} + + let instructions = await backend.receivedInstructions() + XCTAssertEqual(instructions.count, 2) + XCTAssertTrue(instructions[0].contains("[health_coach: Health Coach]")) + XCTAssertFalse(instructions[0].contains("[travel_planner: Travel Planner]")) + XCTAssertTrue(instructions[1].contains("[travel_planner: Travel Planner]")) + XCTAssertFalse(instructions[1].contains("[health_coach: Health Coach]")) + } + func testThreadPersonaUsesBackendBaseInstructionsWhenRuntimeBaseIsUnset() async throws { let backend = InMemoryAgentBackend( baseInstructions: "Base host instructions." @@ -234,6 +382,331 @@ final class AgentRuntimeTests: XCTestCase { } } + func testSetSkillIDsThrowsWhenSkillIsNotRegistered() async throws { + let runtime = try AgentRuntime(configuration: .init( + authProvider: DemoChatGPTAuthProvider(), + secureStore: KeychainSessionSecureStore( + service: "CodexKitTests.ChatGPTSession", + account: UUID().uuidString + ), + backend: InMemoryAgentBackend(), + approvalPresenter: AutoApprovalPresenter(), + stateStore: InMemoryRuntimeStateStore() + )) + + _ = try await runtime.restore() + _ = try await runtime.signIn() + let thread = try await runtime.createThread() + + await XCTAssertThrowsErrorAsync( + try await runtime.setSkillIDs(["travel_planner"], for: thread.id) + ) { error in + XCTAssertEqual( + error as? AgentRuntimeError, + .skillsNotFound(["travel_planner"]) + ) + } + } + + func testSkillPolicyBlocksDisallowedToolCalls() async throws { + let runtime = try AgentRuntime(configuration: .init( + authProvider: DemoChatGPTAuthProvider(), + secureStore: KeychainSessionSecureStore( + service: "CodexKitTests.ChatGPTSession", + account: UUID().uuidString + ), + backend: InMemoryAgentBackend(), + approvalPresenter: AutoApprovalPresenter(), + stateStore: InMemoryRuntimeStateStore(), + skills: [ + .init( + id: "strict_support", + name: "Strict Support", + instructions: "Answer directly.", + executionPolicy: .init( + allowedToolNames: ["allowed_tool"] + ) + ), + ] + )) + + _ = try await runtime.restore() + _ = try await runtime.signIn() + + try await runtime.registerTool( + ToolDefinition( + name: "demo_lookup_profile", + description: "Lookup profile", + inputSchema: .object([:]), + approvalPolicy: .automatic + ), + executor: AnyToolExecutor { invocation, _ in + .success(invocation: invocation, text: "profile-ok") + } + ) + + let thread = try await runtime.createThread( + title: "Strict Tool Policy", + skillIDs: ["strict_support"] + ) + + let stream = try await runtime.sendMessage( + UserMessageRequest(text: "please use the tool"), + in: thread.id + ) + for try await _ in stream {} + + let messages = await runtime.messages(for: thread.id) + let assistantText = messages + .filter { $0.role == .assistant } + .map(\.text) + .joined(separator: "\n") + XCTAssertTrue(assistantText.contains("not allowed by the active skill policy")) + } + + func testSkillPolicyFailsTurnWhenRequiredToolIsMissing() async throws { + let runtime = try AgentRuntime(configuration: .init( + authProvider: DemoChatGPTAuthProvider(), + secureStore: KeychainSessionSecureStore( + service: "CodexKitTests.ChatGPTSession", + account: UUID().uuidString + ), + backend: InMemoryAgentBackend(), + approvalPresenter: AutoApprovalPresenter(), + stateStore: InMemoryRuntimeStateStore(), + skills: [ + .init( + id: "requires_tool", + name: "Requires Tool", + instructions: "Use the required tool.", + executionPolicy: .init( + requiredToolNames: ["demo_lookup_profile"] + ) + ), + ] + )) + + _ = try await runtime.restore() + _ = try await runtime.signIn() + + let thread = try await runtime.createThread( + title: "Required Tool", + skillIDs: ["requires_tool"] + ) + + let stream = try await runtime.sendMessage( + UserMessageRequest(text: "hello without tool"), + in: thread.id + ) + + var sawTurnFailed = false + var failureError: AgentRuntimeError? + + do { + for try await event in stream { + if case let .turnFailed(error) = event { + sawTurnFailed = true + failureError = error + } + } + XCTFail("Expected turn stream to throw when required tools are missing.") + } catch { + XCTAssertEqual((error as? AgentRuntimeError)?.code, "skill_required_tools_missing") + } + + XCTAssertTrue(sawTurnFailed) + XCTAssertEqual(failureError?.code, "skill_required_tools_missing") + } + + func testRuntimeRejectsSkillWithInvalidPolicyToolName() async throws { + XCTAssertThrowsError( + try AgentRuntime(configuration: .init( + authProvider: DemoChatGPTAuthProvider(), + secureStore: KeychainSessionSecureStore( + service: "CodexKitTests.ChatGPTSession", + account: UUID().uuidString + ), + backend: InMemoryAgentBackend(), + approvalPresenter: AutoApprovalPresenter(), + stateStore: InMemoryRuntimeStateStore(), + skills: [ + .init( + id: "invalid_policy", + name: "Invalid Policy", + instructions: "Invalid tool name policy.", + executionPolicy: .init( + requiredToolNames: ["bad tool name"] + ) + ), + ] + )) + ) { error in + XCTAssertEqual((error as? AgentRuntimeError)?.code, "invalid_skill_tool_name") + } + } + + func testResolvedInstructionsPreviewIncludesThreadPersonaAndSkills() async throws { + let runtime = try AgentRuntime(configuration: .init( + authProvider: DemoChatGPTAuthProvider(), + secureStore: KeychainSessionSecureStore( + service: "CodexKitTests.ChatGPTSession", + account: UUID().uuidString + ), + backend: InMemoryAgentBackend(baseInstructions: "Base host instructions."), + approvalPresenter: AutoApprovalPresenter(), + stateStore: InMemoryRuntimeStateStore(), + skills: [ + .init( + id: "health_coach", + name: "Health Coach", + instructions: "Coach users toward their daily step goals." + ), + ] + )) + + _ = try await runtime.restore() + _ = try await runtime.signIn() + + let thread = try await runtime.createThread( + title: "Preview", + personaStack: AgentPersonaStack(layers: [ + .init(name: "planner", instructions: "Act as a planning specialist.") + ]), + skillIDs: ["health_coach"] + ) + + let preview = try await runtime.resolvedInstructionsPreview( + for: thread.id, + request: UserMessageRequest(text: "Give me a plan.") + ) + + XCTAssertTrue(preview.contains("Base host instructions.")) + XCTAssertTrue(preview.contains("Thread Persona Layers:")) + XCTAssertTrue(preview.contains("[planner]")) + XCTAssertTrue(preview.contains("Thread Skills:")) + XCTAssertTrue(preview.contains("[health_coach: Health Coach]")) + } + + func testResolvedInstructionsPreviewThrowsForMissingThread() async throws { + let runtime = try AgentRuntime(configuration: .init( + authProvider: DemoChatGPTAuthProvider(), + secureStore: KeychainSessionSecureStore( + service: "CodexKitTests.ChatGPTSession", + account: UUID().uuidString + ), + backend: InMemoryAgentBackend(), + approvalPresenter: AutoApprovalPresenter(), + stateStore: InMemoryRuntimeStateStore() + )) + + _ = try await runtime.restore() + + await XCTAssertThrowsErrorAsync( + try await runtime.resolvedInstructionsPreview( + for: "missing-thread", + request: UserMessageRequest(text: "hello") + ) + ) { error in + XCTAssertEqual(error as? AgentRuntimeError, .threadNotFound("missing-thread")) + } + } + + func testCreateThreadLoadsPersonaFromFileSource() async throws { + let backend = InMemoryAgentBackend( + baseInstructions: "Base host instructions." + ) + let runtime = try AgentRuntime(configuration: .init( + authProvider: DemoChatGPTAuthProvider(), + secureStore: KeychainSessionSecureStore( + service: "CodexKitTests.ChatGPTSession", + account: UUID().uuidString + ), + backend: backend, + approvalPresenter: AutoApprovalPresenter(), + stateStore: InMemoryRuntimeStateStore() + )) + + _ = try await runtime.restore() + _ = try await runtime.signIn() + + let personaText = "Act as a migration planning assistant focused on sequencing." + let personaFile = try temporaryFile( + with: personaText, + pathExtension: "txt" + ) + + let thread = try await runtime.createThread( + title: "Dynamic Persona", + personaSource: .file(personaFile) + ) + + let personaStack = try XCTUnwrap(thread.personaStack) + XCTAssertEqual(personaStack.layers.count, 1) + XCTAssertEqual(personaStack.layers[0].instructions, personaText) + + let stream = try await runtime.sendMessage( + UserMessageRequest(text: "Plan this migration."), + in: thread.id + ) + for try await _ in stream {} + + let instructions = await backend.receivedInstructions() + let resolved = try XCTUnwrap(instructions.last) + XCTAssertTrue(resolved.contains("Thread Persona Layers:")) + XCTAssertTrue(resolved.contains(personaText)) + } + + func testRegisterSkillFromFileSourceCanBeUsedInThread() async throws { + let backend = InMemoryAgentBackend( + baseInstructions: "Base host instructions." + ) + let runtime = try AgentRuntime(configuration: .init( + authProvider: DemoChatGPTAuthProvider(), + secureStore: KeychainSessionSecureStore( + service: "CodexKitTests.ChatGPTSession", + account: UUID().uuidString + ), + backend: backend, + approvalPresenter: AutoApprovalPresenter(), + stateStore: InMemoryRuntimeStateStore() + )) + + _ = try await runtime.restore() + _ = try await runtime.signIn() + + let skillJSON = """ + { + "id": "hydration_coach", + "name": "Hydration Coach", + "instructions": "Drive hydration execution with concrete water targets." + } + """ + let skillFile = try temporaryFile( + with: skillJSON, + pathExtension: "json" + ) + + _ = try await runtime.registerSkill(from: .file(skillFile)) + let registeredSkill = await runtime.skill(for: "hydration_coach") + XCTAssertNotNil(registeredSkill) + + let thread = try await runtime.createThread( + title: "Hydration", + skillIDs: ["hydration_coach"] + ) + + let stream = try await runtime.sendMessage( + UserMessageRequest(text: "Give me today's hydration plan."), + in: thread.id + ) + for try await _ in stream {} + + let instructions = await backend.receivedInstructions() + let resolved = try XCTUnwrap(instructions.last) + XCTAssertTrue(resolved.contains("Thread Skills:")) + XCTAssertTrue(resolved.contains("[hydration_coach: Hydration Coach]")) + } + func testImageOnlyMessageIsAcceptedAndPersisted() async throws { let runtime = try AgentRuntime(configuration: .init( authProvider: DemoChatGPTAuthProvider(), @@ -440,6 +913,17 @@ final class AgentRuntimeTests: XCTestCase { XCTAssertTrue(sawToolResult) } + + private func temporaryFile( + with content: String, + pathExtension: String + ) throws -> URL { + let url = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + .appendingPathExtension(pathExtension) + try Data(content.utf8).write(to: url, options: .atomic) + return url + } } private actor ImageReplyAgentBackend: AgentBackend {