From b2af17514898f716d43be5ee94e68b210cf43b8b Mon Sep 17 00:00:00 2001 From: Patrick Fuchs Date: Wed, 25 Feb 2026 21:14:46 +0100 Subject: [PATCH] Fix supervisor unstuck vaults added autobalancer callback to find potentially stuck vaults --- .../FlowYieldVaultsAutoBalancers.cdc | 38 +++++- .../FlowYieldVaultsSchedulerRegistry.cdc | 60 +++++++-- .../contracts/FlowYieldVaultsSchedulerV1.cdc | 24 +--- cadence/tests/scheduled_supervisor_test.cdc | 127 ++++++++++++++++++ 4 files changed, 212 insertions(+), 37 deletions(-) diff --git a/cadence/contracts/FlowYieldVaultsAutoBalancers.cdc b/cadence/contracts/FlowYieldVaultsAutoBalancers.cdc index 5e127d57..7bb0877f 100644 --- a/cadence/contracts/FlowYieldVaultsAutoBalancers.cdc +++ b/cadence/contracts/FlowYieldVaultsAutoBalancers.cdc @@ -29,6 +29,16 @@ access(all) contract FlowYieldVaultsAutoBalancers { /// The path prefix used for StoragePath & PublicPath derivations access(all) let pathPrefix: String + /// Storage path for the shared execution callback that reports to the registry (one per account) + access(self) let registryReportCallbackStoragePath: StoragePath + + /// Callback resource invoked by each AutoBalancer after execution; calls Registry.reportExecutionFromCallback with its id + access(all) resource RegistryReportCallback: DeFiActions.AutoBalancerExecutionCallback { + access(all) fun onExecuted(balancerUUID: UInt64) { + FlowYieldVaultsSchedulerRegistry.reportExecution(yieldVaultID: balancerUUID) + } + } + /* --- PUBLIC METHODS --- */ /// Returns the path (StoragePath or PublicPath) at which an AutoBalancer is stored with the associated @@ -55,7 +65,7 @@ access(all) contract FlowYieldVaultsAutoBalancers { if autoBalancer == nil { return false } - + let txnIDs = autoBalancer!.getScheduledTransactionIDs() for txnID in txnIDs { if autoBalancer!.borrowScheduledTransaction(id: txnID)?.status() == FlowTransactionScheduler.Status.Scheduled { @@ -79,24 +89,24 @@ access(all) contract FlowYieldVaultsAutoBalancers { if autoBalancer == nil { return false } - + // Check if yield vault has recurring config (should be executing periodically) let config = autoBalancer!.getRecurringConfig() if config == nil { return false // Not configured for recurring, can't be "stuck" } - + // Check if there's an active schedule if self.hasActiveSchedule(id: id) { return false // Has active schedule, not stuck } - + // Check if yield vault is overdue let nextExpected = autoBalancer!.calculateNextExecutionTimestampAsConfigured() if nextExpected == nil { return true // Can't calculate next time, likely stuck } - + // If next expected time has passed and no active schedule, yield vault is stuck return nextExpected! < getCurrentBlock().timestamp } @@ -136,6 +146,8 @@ access(all) contract FlowYieldVaultsAutoBalancers { assert(!publishedCap, message: "Published Capability collision found when publishing AutoBalancer for UniqueIdentifier.id \(uniqueID.id) at path \(publicPath)") + let reportCap = self.account.capabilities.storage.issue<&{DeFiActions.AutoBalancerExecutionCallback}>(self.registryReportCallbackStoragePath) + // create & save AutoBalancer with optional recurring config let autoBalancer <- DeFiActions.createAutoBalancer( oracle: oracle, @@ -147,6 +159,7 @@ access(all) contract FlowYieldVaultsAutoBalancers { recurringConfig: recurringConfig, uniqueID: uniqueID ) + autoBalancer.setExecutionCallback(reportCap) self.account.storage.save(<-autoBalancer, to: storagePath) let autoBalancerRef = self._borrowAutoBalancer(uniqueID.id) @@ -210,7 +223,7 @@ access(all) contract FlowYieldVaultsAutoBalancers { let publicPath = self.deriveAutoBalancerPath(id: id, storage: false) as! PublicPath // unpublish the public AutoBalancer Capability let _ = self.account.capabilities.unpublish(publicPath) - + // Collect controller IDs first (can't modify during iteration) var controllersToDelete: [UInt64] = [] self.account.capabilities.storage.forEachController(forPath: storagePath, fun(_ controller: &StorageCapabilityController): Bool { @@ -223,13 +236,24 @@ access(all) contract FlowYieldVaultsAutoBalancers { controller.delete() } } - + // load & burn the AutoBalancer (this also handles any pending scheduled transactions via burnCallback) let autoBalancer <-self.account.storage.load<@DeFiActions.AutoBalancer>(from: storagePath) Burner.burn(<-autoBalancer) } + access(self) fun createRegistryReportCallbackImpl(): @RegistryReportCallback { + return <-create RegistryReportCallback() + } + init() { self.pathPrefix = "FlowYieldVaultsAutoBalancer_" + self.registryReportCallbackStoragePath = StoragePath(identifier: "FlowYieldVaultsRegistryReportCallback")! + + // Ensure shared execution callback exists (reports this account's executions to Registry) + if self.account.storage.type(at: self.registryReportCallbackStoragePath) == nil { + self.account.storage.save(<-self.createRegistryReportCallbackImpl(), to: self.registryReportCallbackStoragePath) + } + } } diff --git a/cadence/contracts/FlowYieldVaultsSchedulerRegistry.cdc b/cadence/contracts/FlowYieldVaultsSchedulerRegistry.cdc index 645d7e0a..d0f275b6 100644 --- a/cadence/contracts/FlowYieldVaultsSchedulerRegistry.cdc +++ b/cadence/contracts/FlowYieldVaultsSchedulerRegistry.cdc @@ -58,6 +58,11 @@ access(all) contract FlowYieldVaultsSchedulerRegistry { /// Stored as a dictionary for O(1) add/remove; iteration gives the pending set access(self) var pendingQueue: {UInt64: Bool} + /// Order for stuck scanning: least recently reported (executed) first. + /// Vaults call reportExecution() on each run (remove id from array, append to end). + /// Supervisor scans only the first MAX_BATCH_SIZE entries for stuck detection. + access(self) var stuckScanOrder: [UInt64] + /* --- ACCOUNT-LEVEL FUNCTIONS --- */ /// Register a YieldVault and store its handler and schedule capabilities (idempotent) @@ -73,9 +78,27 @@ access(all) contract FlowYieldVaultsSchedulerRegistry { self.yieldVaultRegistry[yieldVaultID] = true self.handlerCaps[yieldVaultID] = handlerCap self.scheduleCaps[yieldVaultID] = scheduleCap + self.stuckScanOrder.append(yieldVaultID) emit YieldVaultRegistered(yieldVaultID: yieldVaultID) } + /// Called by the account that holds this contract (e.g. from the wrapper) on every execution. Removes yieldVaultID from stuckScanOrder (if present) + /// and appends it to the end so the Supervisor only scans the first N (least recently executed) for stuck. + access(account) fun reportExecution(yieldVaultID: UInt64) { + if !(self.yieldVaultRegistry[yieldVaultID] ?? false) { + return + } + var i = 0 + while i < self.stuckScanOrder.length { + if self.stuckScanOrder[i] == yieldVaultID { + self.stuckScanOrder.remove(at: i) + break + } + i = i + 1 + } + self.stuckScanOrder.append(yieldVaultID) + } + /// Adds a yield vault to the pending queue for seeding by the Supervisor access(account) fun enqueuePending(yieldVaultID: UInt64) { if self.yieldVaultRegistry[yieldVaultID] == true { @@ -92,12 +115,20 @@ access(all) contract FlowYieldVaultsSchedulerRegistry { } } - /// Unregister a YieldVault (idempotent) - removes from registry, capabilities, and pending queue + /// Unregister a YieldVault (idempotent) - removes from registry, capabilities, pending queue, and stuckScanOrder access(account) fun unregister(yieldVaultID: UInt64) { self.yieldVaultRegistry.remove(key: yieldVaultID) self.handlerCaps.remove(key: yieldVaultID) self.scheduleCaps.remove(key: yieldVaultID) let pending = self.pendingQueue.remove(key: yieldVaultID) + var i = 0 + while i < self.stuckScanOrder.length { + if self.stuckScanOrder[i] == yieldVaultID { + self.stuckScanOrder.remove(at: i) + break + } + i = i + 1 + } emit YieldVaultUnregistered(yieldVaultID: yieldVaultID, wasInPendingQueue: pending != nil) } @@ -156,19 +187,19 @@ access(all) contract FlowYieldVaultsSchedulerRegistry { /// Get paginated pending yield vault IDs /// @param page: The page number (0-indexed) /// @param size: The page size (defaults to MAX_BATCH_SIZE if nil) - access(all) view fun getPendingYieldVaultIDsPaginated(page: Int, size: Int?): [UInt64] { - let pageSize = size ?? self.MAX_BATCH_SIZE + access(all) view fun getPendingYieldVaultIDsPaginated(page: Int, size: UInt?): [UInt64] { + let pageSize = size ?? Int(self.MAX_BATCH_SIZE) let allPending = self.pendingQueue.keys - let startIndex = page * pageSize - + let startIndex = page * Int(pageSize) + if startIndex >= allPending.length { return [] } - - let endIndex = startIndex + pageSize > allPending.length - ? allPending.length - : startIndex + pageSize - + + let endIndex = startIndex + Int(pageSize) > allPending.length + ? allPending.length + : startIndex + Int(pageSize) + return allPending.slice(from: startIndex, upTo: endIndex) } @@ -177,6 +208,14 @@ access(all) contract FlowYieldVaultsSchedulerRegistry { return self.pendingQueue.length } + /// Returns the first n yield vault IDs from the stuck-scan order (least recently executed first). + /// Supervisor should only scan these for stuck detection instead of all registered vaults. + /// @param limit: Maximum number of IDs to return (caller typically passes MAX_BATCH_SIZE) + access(all) view fun getStuckScanCandidates(limit: UInt): [UInt64] { + let end = limit > UInt(self.stuckScanOrder.length) ? self.stuckScanOrder.length : limit + return self.stuckScanOrder.slice(from: 0, upTo: Int(end)) + } + /// Get global Supervisor capability, if set /// NOTE: Access restricted - only used internally by the scheduler access(account) @@ -193,6 +232,7 @@ access(all) contract FlowYieldVaultsSchedulerRegistry { self.handlerCaps = {} self.scheduleCaps = {} self.pendingQueue = {} + self.stuckScanOrder = [] } } diff --git a/cadence/contracts/FlowYieldVaultsSchedulerV1.cdc b/cadence/contracts/FlowYieldVaultsSchedulerV1.cdc index be81a875..4af44109 100644 --- a/cadence/contracts/FlowYieldVaultsSchedulerV1.cdc +++ b/cadence/contracts/FlowYieldVaultsSchedulerV1.cdc @@ -186,24 +186,8 @@ access(all) contract FlowYieldVaultsSchedulerV1 { // STEP 1: State-based detection - scan for stuck yield vaults if scanForStuck { - // TODO: add pagination - this will inevitably fails and at minimum creates inconsistent execution - // effort between runs - let registeredYieldVaults = FlowYieldVaultsSchedulerRegistry.getRegisteredYieldVaultIDs() - var scanned = 0 - for yieldVaultID in registeredYieldVaults { - if scanned >= FlowYieldVaultsSchedulerRegistry.MAX_BATCH_SIZE { - break - } - scanned = scanned + 1 - - // Skip if already in pending queue - // TODO: This is extremely inefficient - accessing from mapping is preferrable to iterating over - // an array - if FlowYieldVaultsSchedulerRegistry.getPendingYieldVaultIDs().contains(yieldVaultID) { - continue - } - - // Check if yield vault is stuck (has recurring config, no active schedule, overdue) + let candidates = FlowYieldVaultsSchedulerRegistry.getStuckScanCandidates(limit: UInt(FlowYieldVaultsSchedulerRegistry.MAX_BATCH_SIZE)) + for yieldVaultID in candidates { if FlowYieldVaultsAutoBalancers.isStuckYieldVault(id: yieldVaultID) { FlowYieldVaultsSchedulerRegistry.enqueuePending(yieldVaultID: yieldVaultID) emit StuckYieldVaultDetected(yieldVaultID: yieldVaultID) @@ -213,7 +197,7 @@ access(all) contract FlowYieldVaultsSchedulerV1 { // STEP 2: Process pending yield vaults - recover them via Schedule capability let pendingYieldVaults = FlowYieldVaultsSchedulerRegistry.getPendingYieldVaultIDsPaginated(page: 0, size: nil) - + for yieldVaultID in pendingYieldVaults { // Get Schedule capability for this yield vault let scheduleCap = FlowYieldVaultsSchedulerRegistry.getScheduleCap(yieldVaultID: yieldVaultID) @@ -457,7 +441,7 @@ access(all) contract FlowYieldVaultsSchedulerV1 { // Initialize paths self.SupervisorStoragePath = /storage/FlowYieldVaultsSupervisor - + // Configure Supervisor at deploy time self.ensureSupervisorConfigured() } diff --git a/cadence/tests/scheduled_supervisor_test.cdc b/cadence/tests/scheduled_supervisor_test.cdc index eada6f15..dff90dc6 100644 --- a/cadence/tests/scheduled_supervisor_test.cdc +++ b/cadence/tests/scheduled_supervisor_test.cdc @@ -913,3 +913,130 @@ fun testInsufficientFundsAndRecovery() { log("- All ".concat(activeScheduleCount.toString()).concat(" yield vaults have active schedules")) log("========================================") } + +/// Supervisor batch recovery: 200 stuck vaults, no capacity-probe loop. +/// +/// Flow: create 200 yield vaults, run 2 scheduling rounds, drain FLOW so executions fail, +/// wait for vaults to be marked stuck, refund FLOW, schedule the supervisor, then advance +/// time for ceil(200/MAX_BATCH_SIZE)+10 supervisor ticks. Asserts all 200 vaults are +/// recovered (YieldVaultRecovered events), none still stuck, and all have active schedules. +/// The +10 extra ticks are a buffer so every vault is processed despite scheduler timing. +access(all) +fun testSupervisorHandlesManyStuckVaults() { + let n = 200 + let maxBatchSize = FlowYieldVaultsSchedulerRegistry.MAX_BATCH_SIZE + + if snapshot != getCurrentBlockHeight() { + Test.reset(to: snapshot) + } + + // 1. Setup: user, FLOW, and grant + let user = Test.createAccount() + mintFlow(to: user, amount: 100000.0) + grantBeta(flowYieldVaultsAccount, user) + mintFlow(to: flowYieldVaultsAccount, amount: 10000.0) + + // 2. Create n yield vaults in batch (Test.executeTransactions) + var i = 0 + let tx = Test.Transaction( + code: Test.readFile("../transactions/flow-yield-vaults/create_yield_vault.cdc"), + authorizers: [user.address], + signers: [user], + arguments: [strategyIdentifier, flowTokenIdentifier, 5.0] + ) + let txs: [Test.Transaction] = [] + while i < n { + txs.append(tx) + i = i + 1 + } + let results = Test.executeTransactions(txs) + for result in results { + Test.expect(result, Test.beSucceeded()) + } + log("testSupervisorHandlesManyStuckVaults: created \(n.toString()) yield vaults") + + let yieldVaultIDs = getYieldVaultIDs(address: user.address)! + Test.assert(yieldVaultIDs.length == n, message: "expected \(n.toString()) vaults, got \(yieldVaultIDs.length.toString())") + + // 3. Two scheduling rounds so vaults run once + setMockOraclePrice(signer: flowYieldVaultsAccount, forTokenIdentifier: flowTokenIdentifier, price: 1.5) + setMockOraclePrice(signer: flowYieldVaultsAccount, forTokenIdentifier: yieldTokenIdentifier, price: 1.2) + Test.moveTime(by: 60.0 * 10.0 + 10.0) + Test.commitBlock() + Test.moveTime(by: 60.0 * 10.0 + 10.0) + Test.commitBlock() + + // 4. Drain FLOW so subsequent executions fail and vaults become stuck + let balanceBeforeDrain = (executeScript( + "../scripts/flow-yield-vaults/get_flow_balance.cdc", + [flowYieldVaultsAccount.address] + ).returnValue! as! UFix64) + if balanceBeforeDrain > 0.01 { + let drainRes = _executeTransaction( + "../transactions/flow-yield-vaults/drain_flow.cdc", + [balanceBeforeDrain - 0.001], + flowYieldVaultsAccount + ) + Test.expect(drainRes, Test.beSucceeded()) + } + log("testSupervisorHandlesManyStuckVaults: drained FLOW, waiting for vaults to be marked stuck") + + // 5. Wait rounds until vaults are marked stuck + var waitRound = 0 + while waitRound < 6 { + Test.moveTime(by: 60.0 * 10.0 + 10.0) + Test.commitBlock() + waitRound = waitRound + 1 + } + + // 6. Refund FLOW and schedule supervisor + mintFlow(to: flowYieldVaultsAccount, amount: 500.0) + Test.commitBlock() + Test.moveTime(by: 1.0) + Test.commitBlock() + + let interval = 60.0 * 10.0 + let schedSupRes = _executeTransaction( + "../transactions/flow-yield-vaults/admin/schedule_supervisor.cdc", + [interval, UInt8(1), UInt64(5000), true], + flowYieldVaultsAccount + ) + Test.expect(schedSupRes, Test.beSucceeded()) + + // 7. Advance time for supervisor ticks (ceil(n/MAX_BATCH_SIZE)+10); each tick processes a batch + let supervisorRunsNeeded = (UInt(n) + UInt(maxBatchSize) - 1) / UInt(maxBatchSize) + var run = 0 as UInt + while run < supervisorRunsNeeded + 10 { + Test.moveTime(by: 60.0 * 10.0 + 10.0) + Test.commitBlock() + run = run + 1 + } + log("testSupervisorHandlesManyStuckVaults: ran \(supervisorRunsNeeded + 10).toString()) supervisor ticks") + + let recoveredEvents = Test.eventsOfType(Type()) + Test.assert(recoveredEvents.length >= n, message: "expected at least \(n.toString()) recovered, got \(recoveredEvents.length.toString())") + log("testSupervisorHandlesManyStuckVaults: recovered \(recoveredEvents.length.toString()) vaults") + + // 8. Health check: none stuck, all have active schedules + var stillStuck = 0 + var activeCount = 0 + for yieldVaultID in yieldVaultIDs { + let isStuckRes = executeScript( + "../scripts/flow-yield-vaults/is_stuck_yield_vault.cdc", + [yieldVaultID] + ) + if isStuckRes.returnValue != nil && (isStuckRes.returnValue! as! Bool) { + stillStuck = stillStuck + 1 + } + let hasActiveRes = executeScript( + "../scripts/flow-yield-vaults/has_active_schedule.cdc", + [yieldVaultID] + ) + if hasActiveRes.returnValue != nil && (hasActiveRes.returnValue! as! Bool) { + activeCount = activeCount + 1 + } + } + Test.assert(stillStuck == 0, message: "expected 0 stuck, got \(stillStuck.toString())") + Test.assert(activeCount == n, message: "expected \(n.toString()) active, got \(activeCount.toString())") + log("testSupervisorHandlesManyStuckVaults: all \(n.toString()) vaults healthy, active schedules: \(activeCount.toString())") +} \ No newline at end of file