diff --git a/API-INTERNAL.md b/API-INTERNAL.md index b3cfeb90a..54dc85d21 100644 --- a/API-INTERNAL.md +++ b/API-INTERNAL.md @@ -79,12 +79,18 @@ If the requested key is a collection, it will return an object with all the coll

Remove a key from Onyx and update the subscribers

retryOperation()
-

Handles storage operation failures based on the error type:

+

Handles storage operation failures based on the error class (see lib/storage/errors.ts). +The connection layer (createStore) owns connection/transport recovery; this operation layer owns +capacity recovery (eviction) so that a given failure is retried by exactly one layer:

broadcastUpdate()
@@ -318,11 +324,17 @@ Remove a key from Onyx and update the subscribers ## retryOperation() -Handles storage operation failures based on the error type: -- Storage capacity errors: evicts data and retries the operation -- Invalid data errors: logs an alert and throws an error -- Non-retriable errors: logs an alert and resolves without retrying -- Other errors: retries the operation +Handles storage operation failures based on the error class (see lib/storage/errors.ts). +The connection layer (createStore) owns connection/transport recovery; this operation layer owns +capacity recovery (eviction) so that a given failure is retried by exactly one layer: +- INVALID_DATA: logs an alert and throws (the same data will always fail). +- TRANSIENT / FATAL: the connection layer already retried (transient) or exhausted its heal budget + and alerted (fatal). Retrying here would only re-amplify, so we skip the write quietly. +- CAPACITY: evicts the least recently accessed evictable key and retries, under a session-level + circuit breaker (see lib/StorageCircuitBreaker.ts) that halts the loop once eviction stops making + progress or failures storm — the per-operation budget alone cannot stop a session-wide storm. +- UNKNOWN: the provider couldn't classify it — log the full error shape (name + message + + provider) once so it's visible, then bounded retry without eviction. **Kind**: global function diff --git a/lib/OnyxUtils.ts b/lib/OnyxUtils.ts index c88b2e170..4979b3c59 100644 --- a/lib/OnyxUtils.ts +++ b/lib/OnyxUtils.ts @@ -6,8 +6,9 @@ import * as Logger from './Logger'; import type Onyx from './Onyx'; import cache, {TASK} from './OnyxCache'; import OnyxKeys from './OnyxKeys'; -import * as Str from './Str'; +import StorageCircuitBreaker from './StorageCircuitBreaker'; import Storage from './storage'; +import {StorageErrorClass} from './storage/errors'; import type { CollectionKeyBase, ConnectOptions, @@ -49,26 +50,6 @@ const METHOD = { CLEAR: 'clear', } as const; -// IndexedDB errors that indicate storage capacity issues where eviction can help -const IDB_STORAGE_ERRORS = [ - 'quotaexceedederror', // Browser storage quota exceeded -] as const; - -// SQLite errors that indicate storage capacity issues where eviction can help -const SQLITE_STORAGE_ERRORS = [ - 'database or disk is full', // Device storage is full -] as const; - -const STORAGE_ERRORS = [...IDB_STORAGE_ERRORS, ...SQLITE_STORAGE_ERRORS]; - -// IndexedDB errors where retrying is futile because the underlying connection/store is broken. -// The healing path (separate from retryOperation) is responsible for recovery. -const IDB_NON_RETRIABLE_ERRORS = [ - 'internal error opening backing store', // LevelDB backing store is broken at the filesystem level -] as const; - -const NON_RETRIABLE_ERRORS = [...IDB_NON_RETRIABLE_ERRORS]; - // Max number of retries for failed storage operations const MAX_STORAGE_OPERATION_RETRY_ATTEMPTS = 5; @@ -425,7 +406,9 @@ function multiGet(keys: CollectionKeyBase[]): Promise|OnyxEntry>`, which is not what we want. This preserves the order of the keys provided. */ function tupleGet(keys: Keys): Promise<{[Index in keyof Keys]: OnyxValue}> { - return Promise.all(keys.map((key) => get(key))) as Promise<{[Index in keyof Keys]: OnyxValue}>; + return Promise.all(keys.map((key) => get(key))) as Promise<{ + [Index in keyof Keys]: OnyxValue; + }>; } /** @@ -597,7 +580,10 @@ function keysChanged( } try { - lastConnectionCallbackData.set(subscriber.subscriptionID, {value: cachedCollection, matchedKey: subscriber.key}); + lastConnectionCallbackData.set(subscriber.subscriptionID, { + value: cachedCollection, + matchedKey: subscriber.key, + }); if (subscriber.waitForCollectionCallback) { subscriber.callback(cachedCollection, subscriber.key, partialCollection); @@ -638,7 +624,10 @@ function keysChanged( try { const subscriberCallback = subscriber.callback as DefaultConnectCallback; subscriberCallback(cachedCollection[subscriber.key], subscriber.key as TKey); - lastConnectionCallbackData.set(subscriber.subscriptionID, {value: cachedCollection[subscriber.key], matchedKey: subscriber.key}); + lastConnectionCallbackData.set(subscriber.subscriptionID, { + value: cachedCollection[subscriber.key], + matchedKey: subscriber.key, + }); } catch (error) { Logger.logAlert(`[OnyxUtils.keysChanged] Subscriber callback threw an error for key '${collectionKey}': ${error}`); } @@ -709,15 +698,23 @@ function keyChanged( cachedCollection = getCachedCollection(subscriber.key); cachedCollections[subscriber.key] = cachedCollection; } - lastConnectionCallbackData.set(subscriber.subscriptionID, {value: cachedCollection, matchedKey: subscriber.key}); - subscriber.callback(cachedCollection, subscriber.key, {[key]: value}); + lastConnectionCallbackData.set(subscriber.subscriptionID, { + value: cachedCollection, + matchedKey: subscriber.key, + }); + subscriber.callback(cachedCollection, subscriber.key, { + [key]: value, + }); continue; } const subscriberCallback = subscriber.callback as DefaultConnectCallback; subscriberCallback(value, key); - lastConnectionCallbackData.set(subscriber.subscriptionID, {value, matchedKey: key}); + lastConnectionCallbackData.set(subscriber.subscriptionID, { + value, + matchedKey: key, + }); continue; } catch (error) { Logger.logAlert(`[OnyxUtils.keyChanged] Subscriber callback threw an error for key '${key}': ${error}`); @@ -791,8 +788,11 @@ function remove(key: TKey, isProcessingCollectionUpdate?: function reportStorageQuota(error?: Error): Promise { return Storage.getDatabaseSize() .then(({bytesUsed, bytesRemaining, usageDetails}) => { + // `bytesRemaining` comes from navigator.storage.estimate() and is an ORIGIN-WIDE estimate, + // not headroom for this database. The browser allocates IndexedDB storage dynamically, so a + // QuotaExceededError can legitimately occur even when this number still looks large. Logger.logInfo( - `Storage Quota Check -- bytesUsed: ${bytesUsed} bytesRemaining: ${bytesRemaining}${ + `Storage Quota Check -- bytesUsed: ${bytesUsed} originWideBytesRemaining (estimate, not per-DB headroom): ${bytesRemaining}${ usageDetails ? ` usageDetails: ${JSON.stringify(usageDetails)}` : '' }. Original error: ${error}`, ); @@ -803,11 +803,17 @@ function reportStorageQuota(error?: Error): Promise { } /** - * Handles storage operation failures based on the error type: - * - Storage capacity errors: evicts data and retries the operation - * - Invalid data errors: logs an alert and throws an error - * - Non-retriable errors: logs an alert and resolves without retrying - * - Other errors: retries the operation + * Handles storage operation failures based on the error class (see lib/storage/errors.ts). + * The connection layer (createStore) owns connection/transport recovery; this operation layer owns + * capacity recovery (eviction) so that a given failure is retried by exactly one layer: + * - INVALID_DATA: logs an alert and throws (the same data will always fail). + * - TRANSIENT / FATAL: the connection layer already retried (transient) or exhausted its heal budget + * and alerted (fatal). Retrying here would only re-amplify, so we skip the write quietly. + * - CAPACITY: evicts the least recently accessed evictable key and retries, under a session-level + * circuit breaker (see lib/StorageCircuitBreaker.ts) that halts the loop once eviction stops making + * progress or failures storm — the per-operation budget alone cannot stop a session-wide storm. + * - UNKNOWN: the provider couldn't classify it — log the full error shape (name + message + + * provider) once so it's visible, then bounded retry without eviction. */ function retryOperation( error: Error, @@ -818,34 +824,57 @@ function retryOperation( ): Promise { const currentRetryAttempt = retryAttempt ?? 0; const nextRetryAttempt = currentRetryAttempt + 1; + const errorClass = Storage.classifyError(error); - Logger.logInfo(`Failed to save to storage. Error: ${error}. onyxMethod: ${onyxMethod.name}. retryAttempt: ${currentRetryAttempt}/${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS}`); + // Once the breaker is open, every capacity write is going to fail the same way. Drop it silently — + // the breaker already emitted its single alert, and logging per failed write is exactly the storm + // we are suppressing. (We return before the log line below on purpose.) + if (errorClass === StorageErrorClass.CAPACITY && StorageCircuitBreaker.isTripped()) { + return Promise.resolve(); + } + + Logger.logInfo( + `Failed to save to storage. Error: ${error}. class: ${errorClass}. onyxMethod: ${onyxMethod.name}. retryAttempt: ${currentRetryAttempt}/${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS}`, + ); - if (error && Str.startsWith(error.message, "Failed to execute 'put' on 'IDBObjectStore'")) { + if (errorClass === StorageErrorClass.INVALID_DATA) { Logger.logAlert(`Attempted to set invalid data set in Onyx. Please ensure all data is serializable. Error: ${error}`); throw error; } - const errorMessage = error?.message?.toLowerCase?.(); - const errorName = error?.name?.toLowerCase?.(); - const isStorageCapacityError = STORAGE_ERRORS.some((storageError) => errorName?.includes(storageError) || errorMessage?.includes(storageError)); - const isNonRetriableError = NON_RETRIABLE_ERRORS.some((nonRetriableError) => errorName?.includes(nonRetriableError) || errorMessage?.includes(nonRetriableError)); - - if (isNonRetriableError) { - Logger.logAlert(`Storage operation skipped retry for non-retriable error. Error: ${error}. onyxMethod: ${onyxMethod.name}.`); + if (errorClass === StorageErrorClass.TRANSIENT || errorClass === StorageErrorClass.FATAL) { + Logger.logInfo(`Storage operation skipped retry; ${errorClass} errors are handled by the connection layer. Error: ${error}. onyxMethod: ${onyxMethod.name}.`); return Promise.resolve(); } if (nextRetryAttempt > MAX_STORAGE_OPERATION_RETRY_ATTEMPTS) { - Logger.logAlert(`Storage operation failed after 5 retries. Error: ${error}. onyxMethod: ${onyxMethod.name}.`); + Logger.logAlert(`Storage operation failed after ${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS} retries. Error: ${error}. onyxMethod: ${onyxMethod.name}.`); return Promise.resolve(); } - if (!isStorageCapacityError) { + if (errorClass === StorageErrorClass.UNKNOWN) { + // UNKNOWN is the blind spot: the active provider's classifier did not recognize this error, so + // we cannot route it to a real recovery strategy. Log the full error shape (name + message + + // provider) once per operation so telemetry can reveal what lives in UNKNOWN, letting us promote + // recurring cases into TRANSIENT/CAPACITY/FATAL. Logged on the first attempt only to avoid the + // per-retry amplification this mechanism is trying to kill. Then bounded retry without eviction. + if (currentRetryAttempt === 0) { + Logger.logAlert(`Unclassified storage error. provider: ${Storage.getStorageProvider().name}. name: ${error?.name}. message: ${error?.message}. onyxMethod: ${onyxMethod.name}.`); + } // @ts-expect-error No overload matches this call. return onyxMethod(defaultParams, nextRetryAttempt); } + // CAPACITY: feed the session-level circuit breaker before evicting. The per-operation budget above + // cannot stop a session-wide storm — each evicted key triggers an OnyxDerived recompute that spawns + // a fresh write with its own budget — so the breaker is what actually halts the meltdown. (The + // already-open case returned silently at the top of this function.) + StorageCircuitBreaker.recordCapacityFailure(); + if (StorageCircuitBreaker.isTripped()) { + // This failure tripped the breaker; it already emitted its single alert. Stop here. + return Promise.resolve(); + } + // Find the least recently accessed evictable key that we can remove. Never evict an in-flight // key — its cache value is the merge base this retry depends on, so dropping it would truncate // the write to just the delta and diverge cache from storage. @@ -858,9 +887,11 @@ function retryOperation( return reportStorageQuota(error); } - // Remove the least recently accessed key and retry. + // Remove the least recently accessed key and retry. Tell the breaker we evicted so that, if the + // retry comes back as another capacity failure, it counts as a no-progress cycle. Logger.logInfo(`Out of storage. Evicting least recently accessed key (${keyForRemoval}) and retrying. Error: ${error}`); reportStorageQuota(error); + StorageCircuitBreaker.recordEviction(); // @ts-expect-error No overload matches this call. return remove(keyForRemoval).then(() => onyxMethod(defaultParams, nextRetryAttempt)); @@ -1249,7 +1280,10 @@ function updateSnapshots(data: Array>, me const keysToCopy = new Set([...snapshotExistingKeys, ...allowedNewKeys]); const newValue = typeof value === 'object' && value !== null ? utils.pick(value as Record, [...keysToCopy]) : {}; - updatedData = {...updatedData, [key]: Object.assign(oldValue, newValue)}; + updatedData = { + ...updatedData, + [key]: Object.assign(oldValue, newValue), + }; } // Skip the update if there's no data to be merged @@ -1347,6 +1381,7 @@ function setWithRetry({key, value, options}: SetParams StorageCircuitBreaker.recordWriteSuccess()) .catch((error) => OnyxUtils.retryOperation(error, setWithRetry, {key, value: valueWithoutNestedNullValues, options}, retryAttempt)) .then(() => { OnyxUtils.sendActionToDevTools(OnyxUtils.METHOD.SET, key, valueWithoutNestedNullValues); @@ -1387,7 +1422,13 @@ function multiSetWithRetry(data: OnyxMultiSetInput, retryAttempt?: number): Prom // via a single batched keysChanged() call instead of one keyChanged() per member. For each // collection, `partial` holds the new values being set and `previous` holds the cached values // from before the set, which keysChanged() uses to skip subscribers whose value didn't change. - const collectionBatches = new Map>; previous: Record>}>(); + const collectionBatches = new Map< + string, + { + partial: Record>; + previous: Record>; + } + >(); for (const [key, value] of keyValuePairsToSet) { // When we use multiSet to set a key we want to clear the current delta changes from Onyx.merge that were queued @@ -1441,6 +1482,7 @@ function multiSetWithRetry(data: OnyxMultiSetInput, retryAttempt?: number): Prom const inFlightKeys = new Set(keyValuePairsToSet.map(([key]) => key)); return Storage.multiSet(keyValuePairsToStore) + .then(() => StorageCircuitBreaker.recordWriteSuccess()) .catch((error) => OnyxUtils.retryOperation(error, multiSetWithRetry, newData, retryAttempt, inFlightKeys)) .then(() => { OnyxUtils.sendActionToDevTools(OnyxUtils.METHOD.MULTI_SET, undefined, newData); @@ -1520,6 +1562,7 @@ function setCollectionWithRetry({collectionKey, const inFlightKeys = new Set(keyValuePairs.map(([key]) => key)); return Storage.multiSet(keyValuePairs) + .then(() => StorageCircuitBreaker.recordWriteSuccess()) .catch((error) => OnyxUtils.retryOperation(error, setCollectionWithRetry, {collectionKey, collection}, retryAttempt, inFlightKeys)) .then(() => { OnyxUtils.sendActionToDevTools(OnyxUtils.METHOD.SET_COLLECTION, undefined, mutableCollection); @@ -1630,7 +1673,10 @@ function mergeCollectionWithPatches( const keyValuePairsForNewCollection = prepareKeyValuePairsForStorage(newCollection, true); // finalMergedCollection contains all the keys that were merged, without the keys of incompatible updates - const finalMergedCollection = {...existingKeyCollection, ...newCollection}; + const finalMergedCollection = { + ...existingKeyCollection, + ...newCollection, + }; // Pre-warm cache for cache-miss existingKeys so cache.merge() merges the new delta into // the real previous storage value. Fast path (all warm) skips the pre-warm to preserve @@ -1675,11 +1721,17 @@ function mergeCollectionWithPatches( const inFlightKeys = new Set(Object.keys(finalMergedCollection)); return Promise.all(promises) + .then(() => StorageCircuitBreaker.recordWriteSuccess()) .catch((error) => retryOperation( error, mergeCollectionWithPatches, - {collectionKey, collection: resultCollection as OnyxMergeCollectionInput, mergeReplaceNullPatches, isProcessingCollectionUpdate}, + { + collectionKey, + collection: resultCollection as OnyxMergeCollectionInput, + mergeReplaceNullPatches, + isProcessingCollectionUpdate, + }, retryAttempt, inFlightKeys, ), @@ -1752,6 +1804,7 @@ function partialSetCollection({collectionKey, co const inFlightKeys = new Set(keyValuePairs.map(([key]) => key)); return Storage.multiSet(keyValuePairs) + .then(() => StorageCircuitBreaker.recordWriteSuccess()) .catch((error) => retryOperation(error, partialSetCollection, {collectionKey, collection}, retryAttempt, inFlightKeys)) .then(() => { sendActionToDevTools(METHOD.SET_COLLECTION, undefined, mutableCollection); diff --git a/lib/StorageCircuitBreaker.ts b/lib/StorageCircuitBreaker.ts new file mode 100644 index 000000000..d7c55677f --- /dev/null +++ b/lib/StorageCircuitBreaker.ts @@ -0,0 +1,120 @@ +import * as Logger from './Logger'; + +/** + * Process-scoped circuit breaker for storage CAPACITY failures. + * + * The per-operation retry budget in `OnyxUtils.retryOperation` cannot stop a session-level storm: + * each evict -> OnyxDerived recompute -> new write starts its own fresh budget, so a full disk or + * exhausted quota can drive tens of thousands of evict+retry cycles that never make progress and + * freeze the app. This breaker is the session-level brake — `retryOperation` consults it before + * every eviction. + * + * It trips when EITHER: + * - capacity failures within {@link ROLLING_WINDOW_MS} exceed {@link FAILURE_THRESHOLD}, or + * - {@link NO_PROGRESS_CAP} consecutive evictions are each immediately followed by another capacity + * failure (the eviction freed nothing the next write could use — a no-progress cycle). This is a + * cheap proxy for `getDatabaseSize()`, which is costly and only reports origin-wide usage. + * + * On trip it emits exactly ONE alert and self-resets once the rolling window clears, so a persistent + * condition produces at most one alert per window instead of one log line per failed write. + */ + +/** Rolling window over which capacity failures are counted, and how long a trip stays open. */ +const ROLLING_WINDOW_MS = 60 * 1000; + +/** Capacity failures within the window above which the breaker trips (storm backstop). */ +const FAILURE_THRESHOLD = 50; + +/** Consecutive no-progress evictions (evict -> still capacity failure) above which the breaker trips. */ +const NO_PROGRESS_CAP = 5; + +let failureTimestamps: number[] = []; +let consecutiveNoProgressEvictions = 0; +let evictionAwaitingResult = false; +let trippedUntil = 0; + +function reset(): void { + failureTimestamps = []; + consecutiveNoProgressEvictions = 0; + evictionAwaitingResult = false; + trippedUntil = 0; +} + +/** Whether the breaker is currently open. Self-resets once the window since the trip has cleared. */ +function isTripped(): boolean { + if (trippedUntil === 0) { + return false; + } + if (Date.now() >= trippedUntil) { + reset(); + return false; + } + return true; +} + +function trip(reason: string): void { + trippedUntil = Date.now() + ROLLING_WINDOW_MS; + Logger.logAlert(`Storage circuit breaker tripped: ${reason}. Halting eviction/retry for ${ROLLING_WINDOW_MS / 1000}s to stop a storage failure storm.`); +} + +/** + * Record a CAPACITY failure. Call once per capacity failure in `retryOperation`, BEFORE deciding + * whether to evict; then check {@link isTripped} to decide whether to proceed. + */ +function recordCapacityFailure(): void { + // While open, recording is a no-op: no extra timestamps, no second alert, and nothing to keep the + // window from clearing. `isTripped()` self-resets here once the window has elapsed. + if (isTripped()) { + return; + } + + const now = Date.now(); + failureTimestamps = failureTimestamps.filter((timestamp) => now - timestamp < ROLLING_WINDOW_MS); + + // A fresh storm (nothing left in the window) resets the no-progress tracking so a stale eviction + // from an earlier, unrelated incident can't be miscounted as no-progress for this one. + if (failureTimestamps.length === 0) { + consecutiveNoProgressEvictions = 0; + evictionAwaitingResult = false; + } + + // We evicted on the previous cycle and we're back here with another capacity failure, so that + // eviction freed no usable space. + if (evictionAwaitingResult) { + consecutiveNoProgressEvictions += 1; + evictionAwaitingResult = false; + } + + failureTimestamps.push(now); + + if (failureTimestamps.length > FAILURE_THRESHOLD) { + trip(`${failureTimestamps.length} capacity failures within ${ROLLING_WINDOW_MS / 1000}s`); + return; + } + if (consecutiveNoProgressEvictions >= NO_PROGRESS_CAP) { + trip(`${consecutiveNoProgressEvictions} consecutive evictions freed no usable space`); + } +} + +/** Record that `retryOperation` just evicted a key, so the next capacity failure counts as no-progress. */ +function recordEviction(): void { + evictionAwaitingResult = true; +} + +/** + * Record that a storage write SUCCEEDED. If an eviction was awaiting its verdict, the eviction freed + * usable space — so it must NOT later be miscounted as a no-progress cycle by the next capacity + * failure. Clear the pending flag and reset the consecutive no-progress streak (a success breaks the + * streak). No-op when no eviction is pending (the common case), so it's cheap to call on every write. + */ +function recordWriteSuccess(): void { + if (!evictionAwaitingResult) { + return; + } + evictionAwaitingResult = false; + consecutiveNoProgressEvictions = 0; +} + +const StorageCircuitBreaker = {recordCapacityFailure, recordEviction, recordWriteSuccess, isTripped, reset, ROLLING_WINDOW_MS, FAILURE_THRESHOLD, NO_PROGRESS_CAP}; + +export default StorageCircuitBreaker; diff --git a/lib/storage/__mocks__/index.ts b/lib/storage/__mocks__/index.ts index a4456a7e7..90c75fd62 100644 --- a/lib/storage/__mocks__/index.ts +++ b/lib/storage/__mocks__/index.ts @@ -1,11 +1,24 @@ import MemoryOnlyProvider, {mockStore, setMockStore} from '../providers/MemoryOnlyProvider'; +import classifyIDBError from '../providers/IDBKeyValProvider/classifyError'; +import classifySQLiteError from '../providers/classifySQLiteError'; +import {StorageErrorClass} from '../errors'; const init = jest.fn(MemoryOnlyProvider.init); init(); +// Tests exercise retryOperation against both IndexedDB- and SQLite-shaped errors, so the mock facade +// classifies with each engine's real (native-dep-free) classifier in turn. Mirrors how the real facade +// delegates to the active provider; here we cover both engines since one mock stands in for all. +const classifyError = (error: unknown) => { + const idbClass = classifyIDBError(error); + return idbClass === StorageErrorClass.UNKNOWN ? classifySQLiteError(error) : idbClass; +}; + const StorageMock = { init, + classifyError: jest.fn(classifyError), + getStorageProvider: jest.fn(() => MemoryOnlyProvider), getItem: jest.fn(MemoryOnlyProvider.getItem), multiGet: jest.fn(MemoryOnlyProvider.multiGet), setItem: jest.fn(MemoryOnlyProvider.setItem), diff --git a/lib/storage/errors.ts b/lib/storage/errors.ts new file mode 100644 index 000000000..607e19756 --- /dev/null +++ b/lib/storage/errors.ts @@ -0,0 +1,43 @@ +import type {ValueOf} from 'type-fest'; + +/** + * Shared vocabulary for storage write failures. The *classes* are engine-agnostic; the *matching* + * is not — each storage provider knows its own error dialect and owns its classifier (see each + * provider's `classifyError`). This module deliberately holds NO string matchers: it is the common + * taxonomy the two reacting layers agree on, while the per-engine knowledge lives with the engine. + * + * - the connection layer (`createStore`) recovers TRANSIENT and FATAL errors by reopening the DB, and + * - the operation layer (`OnyxUtils.retryOperation`) recovers CAPACITY by eviction and retries UNKNOWN. + * + * This module has no Onyx dependencies (and no engine dependencies) so it can live in the storage + * layer, and be imported by every provider, without creating an import cycle. + */ +const StorageErrorClass = { + /** Connection/transport failure (stale connection). Owner: connection layer — reopen + retry once. */ + TRANSIENT: 'transient', + /** Quota exceeded / disk full. Owner: operation layer — evict and retry. */ + CAPACITY: 'capacity', + /** Non-serializable payload. Never retriable — the same data will always fail. */ + INVALID_DATA: 'invalidData', + /** Backing-store corruption. Owner: connection layer — budgeted heal, then give up. */ + FATAL: 'fatal', + /** Unmatched by the active provider. Owner: operation layer — bounded retry, and log the shape so + * recurring cases can be promoted into one of the classes above. */ + UNKNOWN: 'unknown', +} as const; + +type StorageErrorClassValue = ValueOf; + +/** + * Normalizes any thrown value into a lowercased `{name, message}` pair for matching. Shared by every + * provider's classifier so they all extract the error the same way. + */ +function getErrorParts(error: unknown): {name: string; message: string} { + if (error instanceof Error || error instanceof DOMException) { + return {name: (error.name ?? '').toLowerCase(), message: (error.message ?? '').toLowerCase()}; + } + return {name: '', message: String(error ?? '').toLowerCase()}; +} + +export {StorageErrorClass, getErrorParts}; +export type {StorageErrorClassValue}; diff --git a/lib/storage/index.ts b/lib/storage/index.ts index 6dcd8e0bd..414a0a0ca 100644 --- a/lib/storage/index.ts +++ b/lib/storage/index.ts @@ -49,6 +49,12 @@ const storage: Storage = { return provider; }, + /** + * Classifies a write error using the active provider's own classifier. Synchronous and pure — + * never wrapped in tryOrDegradePerformance. + */ + classifyError: (error) => provider.classifyError(error), + /** * Initializes all providers in the list of storage providers * and enables fallback providers if necessary diff --git a/lib/storage/providers/IDBKeyValProvider/classifyError.ts b/lib/storage/providers/IDBKeyValProvider/classifyError.ts new file mode 100644 index 000000000..9e4834a00 --- /dev/null +++ b/lib/storage/providers/IDBKeyValProvider/classifyError.ts @@ -0,0 +1,45 @@ +import {StorageErrorClass, getErrorParts} from '../../errors'; +import type {StorageErrorClassValue} from '../../errors'; + +/** + * Classifies an IndexedDB write failure into the shared storage taxonomy (lib/storage/errors.ts). + * Matching is done on the lowercased error name and message. This is the IndexedDB engine's own + * dialect — it is NOT shared with other engines. + */ +function classifyIDBError(error: unknown): StorageErrorClassValue { + const {name, message} = getErrorParts(error); + + // Non-serializable data passed to IDBObjectStore.put — retrying is futile. + if (message.includes("failed to execute 'put' on 'idbobjectstore'")) { + return StorageErrorClass.INVALID_DATA; + } + + // Browser quota exceeded. + if (name.includes('quotaexceedederror') || message.includes('quotaexceedederror')) { + return StorageErrorClass.CAPACITY; + } + + // Backing-store corruption (Chromium LevelDB). Recoverable only via a budgeted reopen. + if (message.includes('internal error opening backing store')) { + return StorageErrorClass.FATAL; + } + + // Transient connection/transport failures — the cached connection is stale and a reopen fixes it: + // - InvalidStateError: connection closed between getDB() resolving and db.transaction(). + // - AbortError: write transaction aborted (connection close / versionchange / sibling abort). + // - Safari/WebKit IDB server termination for backgrounded tabs. + if ( + name.includes('invalidstateerror') || + name.includes('aborterror') || + message.includes('connection to indexed database server lost') || + message.includes('connection is closing') || + // This is related to https://github.com/Expensify/react-native-onyx/pull/796 — remove this comment when #796 is merged. + message.includes('idb write transaction aborted without an error') + ) { + return StorageErrorClass.TRANSIENT; + } + + return StorageErrorClass.UNKNOWN; +} + +export default classifyIDBError; diff --git a/lib/storage/providers/IDBKeyValProvider/createStore.ts b/lib/storage/providers/IDBKeyValProvider/createStore.ts index 4d0304ed4..dc7d0b270 100644 --- a/lib/storage/providers/IDBKeyValProvider/createStore.ts +++ b/lib/storage/providers/IDBKeyValProvider/createStore.ts @@ -1,49 +1,11 @@ import * as IDB from 'idb-keyval'; import type {UseStore} from 'idb-keyval'; import * as Logger from '../../../Logger'; +import {StorageErrorClass} from '../../errors'; +import classifyIDBError from './classifyError'; const HEAL_ATTEMPTS_MAX = 3; -/** - * Detects the Chromium-specific IDB backing store corruption error. - * Fires when LevelDB files backing IndexedDB are corrupted and Chrome's - * internal recovery (RepairDB -> delete -> recreate) also fails. - */ -function isBackingStoreError(error: unknown): boolean { - return (error instanceof Error || error instanceof DOMException) && (error as Error).message.includes('Internal error opening backing store'); -} - -/** - * Detects Safari/WebKit IDB connection termination errors. - * Fires when Safari kills the IDB server process for backgrounded tabs. - * WebKit bugs: https://bugs.webkit.org/show_bug.cgi?id=197050, https://bugs.webkit.org/show_bug.cgi?id=201483 - */ -function isConnectionLostError(error: unknown): boolean { - if (!(error instanceof Error || error instanceof DOMException)) return false; - const msg = (error as Error).message.toLowerCase(); - return msg.includes('connection to indexed database server lost') || msg.includes('connection is closing'); -} - -function isInvalidStateError(error: unknown): boolean { - return (error instanceof Error || error instanceof DOMException) && (error as Error).name === 'InvalidStateError'; -} - -/** Errors that trigger a budgeted heal-and-retry in store(). */ -function isBudgetedHealError(error: unknown): boolean { - return isBackingStoreError(error) || isConnectionLostError(error); -} - -function getBudgetedHealErrorLabel(error: unknown): string { - if (isBackingStoreError(error)) return 'backing store'; - if (isConnectionLostError(error)) return 'connection lost'; - return 'unknown'; -} - -/** Union of all error types indicating a stale/dead IDB connection. Used by the visibilitychange probe. */ -function isStaleConnectionError(error: unknown): boolean { - return isInvalidStateError(error) || isBackingStoreError(error) || isConnectionLostError(error); -} - // This is a copy of the createStore function from idb-keyval, we need a custom implementation // because we need to create the database manually in order to ensure that the store exists before we use it. // If the store does not exist, idb-keyval will throw an error @@ -67,7 +29,10 @@ function createStore(dbName: string, storeName: string): UseStore { // https://developer.mozilla.org/en-US/docs/Web/API/IDBDatabase/versionchange_event // eslint-disable-next-line no-param-reassign db.onversionchange = () => { - Logger.logInfo('IDB connection closing due to version change', {dbName, storeName}); + Logger.logInfo('IDB connection closing due to version change', { + dbName, + storeName, + }); db.close(); dbp = undefined; }; @@ -146,7 +111,11 @@ function createStore(dbName: string, storeName: string): UseStore { const probePromise = dbp; const dropCacheIfStale = (error: unknown) => { - if (dbp !== probePromise || !isStaleConnectionError(error)) { + // A stale/dead connection surfaces as TRANSIENT (InvalidStateError, Safari connection lost) + // or FATAL (Chromium backing-store corruption) per the shared taxonomy (lib/storage/errors.ts). + const errorClass = classifyIDBError(error); + const isStaleConnection = errorClass === StorageErrorClass.TRANSIENT || errorClass === StorageErrorClass.FATAL; + if (dbp !== probePromise || !isStaleConnection) { return; } Logger.logAlert('IDB visibilitychange probe: stale connection detected, dropping cached connection', { @@ -182,22 +151,27 @@ function createStore(dbName: string, storeName: string): UseStore { }); }); - // Handles three recoverable error classes: - // 1. InvalidStateError — connection closed between getDB() resolving and db.transaction(). - // Retry once with a fresh connection. No budget limit (transient, always worth one reopen). - // 2. Backing store corruption (Chromium UnknownError) — drop cached connection and reopen. - // 3. Connection lost (Safari UnknownError) — IDB server terminated for backgrounded tabs. - // Both 2 and 3 share a heal budget (3 attempts, reset on success). - // Mirrors Dexie's PR1398_maxLoop pattern: https://github.com/dexie/Dexie.js/blob/master/src/functions/temp-transaction.ts - // Note: concurrent store() calls share the budget. Under overlapping failures each caller + // The connection layer owns recovery for connection/transport failures. It reacts per the shared + // error taxonomy (see lib/storage/errors.ts): + // - TRANSIENT (InvalidStateError, AbortError, Safari connection lost) — the cached connection is + // stale. Drop it and retry once with a fresh one. Unbudgeted: a single reopen is always worth it + // and is bounded per operation. + // - FATAL (Chromium backing-store corruption) — reopening can recover transient corruption, but + // repeating forever is futile, so the heal is budgeted (3 attempts, reset on success). + // Mirrors Dexie's PR1398_maxLoop pattern: https://github.com/dexie/Dexie.js/blob/master/src/functions/temp-transaction.ts + // - CAPACITY / UNKNOWN are NOT the connection layer's responsibility — propagate to the operation + // layer (OnyxUtils.retryOperation) without retrying here, to avoid compounding retries. + // Note: concurrent store() calls share the heal budget. Under overlapping failures each caller // decrements independently, so the budget may drain faster than one-per-incident. This is // acceptable — same as Dexie's approach — and the budget resets on any success. return (txMode, callback) => executeTransaction(txMode, callback) .then(resetHealBudget) .catch((error) => { - if (isInvalidStateError(error)) { - Logger.logInfo('IDB InvalidStateError — dropping cached connection and retrying', { + const errorClass = classifyIDBError(error); + + if (errorClass === StorageErrorClass.TRANSIENT) { + Logger.logInfo('IDB transient error — dropping cached connection and retrying once', { dbName, storeName, txMode, @@ -207,29 +181,33 @@ function createStore(dbName: string, storeName: string): UseStore { return executeTransaction(txMode, callback).then(resetHealBudget); } - if (isBudgetedHealError(error) && healAttemptsRemaining > 0) { + if (errorClass === StorageErrorClass.FATAL && healAttemptsRemaining > 0) { healAttemptsRemaining--; - const label = getBudgetedHealErrorLabel(error); - Logger.logInfo(`IDB heal: ${label} error detected — dropping cached connection and reopening (${healAttemptsRemaining} attempts left)`, { + Logger.logInfo(`IDB heal: backing store error detected — dropping cached connection and reopening (${healAttemptsRemaining} attempts left)`, { dbName, storeName, }); dbp = undefined; return executeTransaction(txMode, callback).then((result) => { - Logger.logInfo(`IDB heal: successfully recovered after ${label} error`, {dbName, storeName}); + Logger.logInfo('IDB heal: successfully recovered after backing store error', {dbName, storeName}); return resetHealBudget(result); }); } - if (isBudgetedHealError(error)) { - Logger.logAlert(`IDB heal: ${getBudgetedHealErrorLabel(error)} error — heal budget exhausted, giving up`, { + if (errorClass === StorageErrorClass.FATAL) { + Logger.logAlert('IDB heal: backing store error — heal budget exhausted, giving up', { dbName, storeName, }); - } else { - Logger.logAlert('IDB error is not recoverable, giving up', { + } else if (errorClass === StorageErrorClass.UNKNOWN) { + // UNKNOWN — unexpected at this layer; record it so it's visible. CAPACITY is the + // expected propagation path (the operation layer owns its logging, and suppresses it + // entirely once the circuit breaker is open), so we do NOT log it here — doing so was a + // per-failed-write line that dominated the storm. + Logger.logInfo('IDB error not recoverable at the connection layer, propagating', { dbName, storeName, + errorClass, errorMessage: error instanceof Error ? error.message : String(error), }); } diff --git a/lib/storage/providers/IDBKeyValProvider/index.ts b/lib/storage/providers/IDBKeyValProvider/index.ts index 68768d1b3..09fcbb1f2 100644 --- a/lib/storage/providers/IDBKeyValProvider/index.ts +++ b/lib/storage/providers/IDBKeyValProvider/index.ts @@ -4,6 +4,7 @@ import utils from '../../../utils'; import type StorageProvider from '../types'; import type {OnyxKey, OnyxValue} from '../../../types'; import createStore from './createStore'; +import classifyIDBError from './classifyError'; import type {StorageKeyValuePair} from '../types'; const DB_NAME = 'OnyxDB'; @@ -29,6 +30,10 @@ const provider: StorageProvider = { * The name of the provider that can be printed to the logs */ name: 'IDBKeyValProvider', + /** + * Classifies an IndexedDB write failure into the shared storage taxonomy. + */ + classifyError: classifyIDBError, /** * Initializes the storage provider */ diff --git a/lib/storage/providers/MemoryOnlyProvider.ts b/lib/storage/providers/MemoryOnlyProvider.ts index 1f92425b6..b5f859c8f 100644 --- a/lib/storage/providers/MemoryOnlyProvider.ts +++ b/lib/storage/providers/MemoryOnlyProvider.ts @@ -1,6 +1,7 @@ import _ from 'underscore'; import type {OnyxKey, OnyxValue} from '../../types'; import utils from '../../utils'; +import {StorageErrorClass} from '../errors'; import type StorageProvider from './types'; import type {StorageKeyValuePair} from './types'; @@ -24,6 +25,11 @@ const provider: StorageProvider = { */ name: 'MemoryOnlyProvider', + /** + * In-memory storage has no quota/connection failure modes, so nothing is classifiable. + */ + classifyError: () => StorageErrorClass.UNKNOWN, + /** * Initializes the storage provider */ diff --git a/lib/storage/providers/NoopProvider.ts b/lib/storage/providers/NoopProvider.ts index 677826cfb..4f3aaea11 100644 --- a/lib/storage/providers/NoopProvider.ts +++ b/lib/storage/providers/NoopProvider.ts @@ -1,4 +1,5 @@ import type {OnyxValue} from '../../types'; +import {StorageErrorClass} from '../errors'; import type StorageProvider from './types'; const provider: StorageProvider = { @@ -9,6 +10,11 @@ const provider: StorageProvider = { */ name: 'NoopProvider', + /** + * The noop provider never throws, so nothing is classifiable. + */ + classifyError: () => StorageErrorClass.UNKNOWN, + /** * Initializes the storage provider */ diff --git a/lib/storage/providers/SQLiteProvider.ts b/lib/storage/providers/SQLiteProvider.ts index d3b6fee67..56fadc588 100644 --- a/lib/storage/providers/SQLiteProvider.ts +++ b/lib/storage/providers/SQLiteProvider.ts @@ -9,6 +9,7 @@ import type {FastMergeReplaceNullPatch} from '../../utils'; import utils from '../../utils'; import type StorageProvider from './types'; import type {StorageKeyList, StorageKeyValuePair} from './types'; +import classifySQLiteError from './classifySQLiteError'; /** * The type of the key-value pair stored in the SQLite database @@ -63,6 +64,10 @@ const provider: StorageProvider = { * The name of the provider that can be printed to the logs */ name: 'SQLiteProvider', + /** + * Classifies a SQLite write failure into the shared storage taxonomy. + */ + classifyError: classifySQLiteError, /** * Initializes the storage provider */ diff --git a/lib/storage/providers/classifySQLiteError.ts b/lib/storage/providers/classifySQLiteError.ts new file mode 100644 index 000000000..3bf2e0250 --- /dev/null +++ b/lib/storage/providers/classifySQLiteError.ts @@ -0,0 +1,24 @@ +import {StorageErrorClass, getErrorParts} from '../errors'; +import type {StorageErrorClassValue} from '../errors'; + +/** + * Classifies a SQLite write failure into the shared storage taxonomy (lib/storage/errors.ts). + * This is the SQLite engine's own dialect — it is NOT shared with other engines. It lives in a + * standalone module (no `react-native-nitro-sqlite` import) so it can be reused without pulling in + * native dependencies. + * + * SQLite surfaces fewer distinct write-failure shapes than IndexedDB. As telemetry from the UNKNOWN + * bucket (see OnyxUtils.retryOperation) reveals recurring native errors, add matchers here. + */ +function classifySQLiteError(error: unknown): StorageErrorClassValue { + const {message} = getErrorParts(error); + + // Device disk full. + if (message.includes('database or disk is full')) { + return StorageErrorClass.CAPACITY; + } + + return StorageErrorClass.UNKNOWN; +} + +export default classifySQLiteError; diff --git a/lib/storage/providers/types.ts b/lib/storage/providers/types.ts index 046b531b0..b8e52b440 100644 --- a/lib/storage/providers/types.ts +++ b/lib/storage/providers/types.ts @@ -1,5 +1,6 @@ import type {OnyxKey, OnyxValue} from '../../types'; import type {FastMergeReplaceNullPatch} from '../../utils'; +import type {StorageErrorClassValue} from '../errors'; type StorageKeyValuePair = [key: OnyxKey, value: OnyxValue, replaceNullPatches?: FastMergeReplaceNullPatch[]]; type StorageKeyList = OnyxKey[]; @@ -87,6 +88,13 @@ type StorageProvider = { */ getDatabaseSize: () => Promise; + /** + * Classifies a write error from THIS engine into the shared {@link StorageErrorClassValue} taxonomy. + * Each provider owns its own matchers (IndexedDB DOMExceptions, SQLite messages, …) so the central + * taxonomy stays engine-agnostic. Anything the provider doesn't recognize must return UNKNOWN. + */ + classifyError: (error: unknown) => StorageErrorClassValue; + /** * @param onStorageKeyChanged Storage synchronization mechanism keeping all opened tabs in sync */ diff --git a/tests/unit/StorageCircuitBreakerTest.ts b/tests/unit/StorageCircuitBreakerTest.ts new file mode 100644 index 000000000..94069e06e --- /dev/null +++ b/tests/unit/StorageCircuitBreakerTest.ts @@ -0,0 +1,142 @@ +import * as Logger from '../../lib/Logger'; +import StorageCircuitBreaker from '../../lib/StorageCircuitBreaker'; + +describe('StorageCircuitBreaker', () => { + let currentTime = 1_000_000; + let nowSpy: jest.SpyInstance; + + const advance = (ms: number) => { + currentTime += ms; + }; + + beforeEach(() => { + currentTime = 1_000_000; + nowSpy = jest.spyOn(Date, 'now').mockImplementation(() => currentTime); + StorageCircuitBreaker.reset(); + }); + + afterEach(() => { + nowSpy.mockRestore(); + jest.restoreAllMocks(); + }); + + it('should not trip below the failure threshold', () => { + for (let i = 0; i < StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + + it('should trip once capacity failures exceed the threshold within the window', () => { + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + + expect(StorageCircuitBreaker.isTripped()).toBe(true); + }); + + it('should not trip when failures are spread across multiple windows', () => { + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + // Space each failure out so older ones fall out of the rolling window before the count builds up. + advance(2_000); + } + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + + it('should trip after consecutive no-progress evictions', () => { + // Each cycle is a capacity failure followed by an eviction that frees no usable space. + for (let i = 0; i < StorageCircuitBreaker.NO_PROGRESS_CAP; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordEviction(); + } + // The next capacity failure observes that the last eviction made no progress, tipping it over. + StorageCircuitBreaker.recordCapacityFailure(); + + expect(StorageCircuitBreaker.isTripped()).toBe(true); + }); + + it('should not trip when each eviction makes progress (retry succeeds)', () => { + // Intermittent quota pressure: every cycle is a capacity failure → eviction → SUCCESSFUL retry. + // A successful retry means the eviction freed usable space, so it must never be counted as a + // no-progress cycle by the next failure. Without recordWriteSuccess the stale pending flag made + // each subsequent failure look like no-progress and tripped the breaker after NO_PROGRESS_CAP cycles. + for (let i = 0; i < StorageCircuitBreaker.NO_PROGRESS_CAP + 3; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordEviction(); + StorageCircuitBreaker.recordWriteSuccess(); + } + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + + it('should reset the no-progress streak when an eviction finally makes progress', () => { + // A few no-progress evictions build the streak up, but short of the cap. + for (let i = 0; i < StorageCircuitBreaker.NO_PROGRESS_CAP - 1; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordEviction(); + } + + // This eviction succeeds, breaking the consecutive streak. + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordEviction(); + StorageCircuitBreaker.recordWriteSuccess(); + + // Two more no-progress cycles must not trip, because the streak was reset by the success above. + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordEviction(); + StorageCircuitBreaker.recordCapacityFailure(); + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + + it('should not count a failure as no-progress when no eviction preceded it', () => { + // Capacity failures with no interleaved evictions must not accumulate no-progress cycles. + for (let i = 0; i < StorageCircuitBreaker.NO_PROGRESS_CAP + 2; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + + it('should emit exactly one alert when it trips, even as failures continue', () => { + const logAlertSpy = jest.spyOn(Logger, 'logAlert'); + + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + // Further failures while open must not produce more alerts. + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordCapacityFailure(); + + expect(logAlertSpy).toHaveBeenCalledTimes(1); + expect(logAlertSpy).toHaveBeenCalledWith(expect.stringContaining('Storage circuit breaker tripped')); + }); + + it('should self-reset once the rolling window clears', () => { + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + expect(StorageCircuitBreaker.isTripped()).toBe(true); + + advance(StorageCircuitBreaker.ROLLING_WINDOW_MS); + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + + it('should reset no-progress tracking after the window clears between storms', () => { + // First storm: some no-progress evictions, but not enough to trip. + for (let i = 0; i < StorageCircuitBreaker.NO_PROGRESS_CAP - 1; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordEviction(); + } + + // Let the window fully clear so the next failure starts a fresh storm. + advance(StorageCircuitBreaker.ROLLING_WINDOW_MS + 1); + StorageCircuitBreaker.recordCapacityFailure(); + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); +}); diff --git a/tests/unit/onyxUtilsTest.ts b/tests/unit/onyxUtilsTest.ts index 8767dca82..99ae441d8 100644 --- a/tests/unit/onyxUtilsTest.ts +++ b/tests/unit/onyxUtilsTest.ts @@ -8,6 +8,7 @@ import type GenericCollection from '../utils/GenericCollection'; import OnyxCache from '../../lib/OnyxCache'; import * as Logger from '../../lib/Logger'; import StorageMock from '../../lib/storage'; +import StorageCircuitBreaker from '../../lib/StorageCircuitBreaker'; import createDeferredTask from '../../lib/createDeferredTask'; import waitForPromisesToResolve from '../utils/waitForPromisesToResolve'; @@ -751,6 +752,9 @@ describe('OnyxUtils', () => { const diskFullError = new Error('database or disk is full'); const nonRetriableIdbError = Object.assign(new Error('Internal error opening backing store for indexedDB.open.'), {name: 'UnknownError'}); + // The circuit breaker is process-scoped, so reset it between tests to avoid state leaking. + beforeEach(() => StorageCircuitBreaker.reset()); + it('should retry only one time if the operation is firstly failed and then passed', async () => { StorageMock.setItem = jest.fn(StorageMock.setItem).mockRejectedValueOnce(genericError).mockImplementation(StorageMock.setItem); @@ -769,6 +773,21 @@ describe('OnyxUtils', () => { expect(retryOperationSpy).toHaveBeenCalledTimes(6); }); + it('should log the full shape of an unclassified (UNKNOWN) error once per operation', async () => { + const logAlertSpy = jest.spyOn(Logger, 'logAlert'); + StorageMock.setItem = jest.fn().mockRejectedValue(genericError); + + await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); + + // UNKNOWN is instrumented so we can see what to promote into a real class. The shape (provider + // + name + message) is logged exactly once even though the operation retries 6 times. + const unclassifiedCalls = logAlertSpy.mock.calls.filter((call) => typeof call[0] === 'string' && call[0].startsWith('Unclassified storage error.')); + expect(unclassifiedCalls).toHaveLength(1); + expect(unclassifiedCalls[0][0]).toBe( + `Unclassified storage error. provider: MemoryOnlyProvider. name: ${genericError.name}. message: ${genericError.message}. onyxMethod: setWithRetry.`, + ); + }); + it("should throw error for if operation failed with \"Failed to execute 'put' on 'IDBObjectStore': invalid data\" error", async () => { StorageMock.setItem = jest.fn().mockRejectedValueOnce(invalidDataError); @@ -793,15 +812,19 @@ describe('OnyxUtils', () => { expect(retryOperationSpy).toHaveBeenCalledTimes(1); }); - it('should log a single skip alert for non-retriable errors', async () => { + it('should skip retry quietly (info, not alert) for fatal connection-layer errors', async () => { const logAlertSpy = jest.spyOn(Logger, 'logAlert'); + const logInfoSpy = jest.spyOn(Logger, 'logInfo'); StorageMock.setItem = jest.fn().mockRejectedValue(nonRetriableIdbError); await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); - expect(logAlertSpy).toHaveBeenCalledWith(`Storage operation skipped retry for non-retriable error. Error: ${nonRetriableIdbError}. onyxMethod: setWithRetry.`); - // Not paired with the "5 retries exhausted" alert - expect(logAlertSpy).toHaveBeenCalledTimes(1); + // The connection layer (createStore) owns and alerts on fatal errors; the operation layer + // just skips the retry at info level. No alert here, and no "5 retries exhausted" alert. + expect(logInfoSpy).toHaveBeenCalledWith( + `Storage operation skipped retry; fatal errors are handled by the connection layer. Error: ${nonRetriableIdbError}. onyxMethod: setWithRetry.`, + ); + expect(logAlertSpy).not.toHaveBeenCalled(); }); it('should include the error in logAlert for IDBObjectStore invalid data errors', async () => { @@ -821,7 +844,9 @@ describe('OnyxUtils', () => { await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); expect(logAlertSpy).toHaveBeenCalledWith(`Out of storage. But found no acceptable keys to remove. Error: ${diskFullError}`); - expect(logInfoSpy).toHaveBeenCalledWith(`Storage Quota Check -- bytesUsed: 0 bytesRemaining: Infinity. Original error: ${diskFullError}`); + expect(logInfoSpy).toHaveBeenCalledWith( + `Storage Quota Check -- bytesUsed: 0 originWideBytesRemaining (estimate, not per-DB headroom): Infinity. Original error: ${diskFullError}`, + ); }); it('should include usageDetails in the storage quota log when available', async () => { @@ -841,7 +866,9 @@ describe('OnyxUtils', () => { await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); expect(logInfoSpy).toHaveBeenCalledWith( - `Storage Quota Check -- bytesUsed: 13289269 bytesRemaining: 5000000 usageDetails: ${JSON.stringify(usageDetails)}. Original error: ${diskFullError}`, + `Storage Quota Check -- bytesUsed: 13289269 originWideBytesRemaining (estimate, not per-DB headroom): 5000000 usageDetails: ${JSON.stringify( + usageDetails, + )}. Original error: ${diskFullError}`, ); }); @@ -856,6 +883,43 @@ describe('OnyxUtils', () => { expect(logAlertSpy).toHaveBeenCalledWith(`Unable to get database size. getDatabaseSize error: ${dbSizeError}. Original error: ${diskFullError}`); }); + it('should trip the circuit breaker and alert once after sustained capacity failures', async () => { + const logAlertSpy = jest.spyOn(Logger, 'logAlert'); + StorageMock.setItem = jest.fn().mockRejectedValue(diskFullError); + + // No evictable keys are configured, so each failing write records exactly one capacity + // failure with the breaker (it cannot evict). Enough of them within one window trips it. + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + await Onyx.set(ONYXKEYS.TEST_KEY, {test: i}); + } + await waitForPromisesToResolve(); + + expect(StorageCircuitBreaker.isTripped()).toBe(true); + expect(logAlertSpy).toHaveBeenCalledWith(expect.stringContaining('Storage circuit breaker tripped')); + }); + + it('should drop capacity writes silently while the circuit breaker is open', async () => { + // Trip the breaker deterministically so every capacity failure below is observed while open. + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + expect(StorageCircuitBreaker.isTripped()).toBe(true); + + // Clear so we only observe logging caused by the write below, not the trip alert above. + const logInfoSpy = jest.spyOn(Logger, 'logInfo').mockClear(); + const logAlertSpy = jest.spyOn(Logger, 'logAlert').mockClear(); + StorageMock.setItem = jest.fn().mockRejectedValue(diskFullError); + + await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); + await waitForPromisesToResolve(); + + // The write and any cascading derived writes are dropped without per-write log spam, and + // without re-alerting — the single trip alert is the only signal while open. + expect(StorageCircuitBreaker.isTripped()).toBe(true); + expect(logInfoSpy).not.toHaveBeenCalledWith(expect.stringContaining('Failed to save to storage')); + expect(logAlertSpy).not.toHaveBeenCalled(); + }); + it('should not re-add an evicted key to recentlyAccessedKeys after removal', async () => { // Re-init with evictable keys so getKeyForEviction() has something to return Object.assign(OnyxUtils.getDeferredInitTask(), createDeferredTask()); @@ -1514,7 +1578,9 @@ describe('OnyxUtils', () => { await LocalOnyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); expect(logInfoSpy).toHaveBeenCalledWith(`Out of storage. Evicting least recently accessed key (${key1}) and retrying. Error: ${diskFullError}`); - expect(logInfoSpy).toHaveBeenCalledWith(`Storage Quota Check -- bytesUsed: 0 bytesRemaining: Infinity. Original error: ${diskFullError}`); + expect(logInfoSpy).toHaveBeenCalledWith( + `Storage Quota Check -- bytesUsed: 0 originWideBytesRemaining (estimate, not per-DB headroom): Infinity. Original error: ${diskFullError}`, + ); }); it('multiSet — eviction of an UNRELATED key still notifies its subscribers (codex regression guard)', async () => { diff --git a/tests/unit/storage/providers/createStoreTest.ts b/tests/unit/storage/providers/createStoreTest.ts index 437b23c00..16f206747 100644 --- a/tests/unit/storage/providers/createStoreTest.ts +++ b/tests/unit/storage/providers/createStoreTest.ts @@ -77,7 +77,7 @@ describe('createStore', () => { }); await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('key1')))).rejects.toThrow(DOMException); - expect(logInfoSpy).toHaveBeenCalledWith(expect.stringContaining('IDB InvalidStateError'), expect.anything()); + expect(logInfoSpy).toHaveBeenCalledWith(expect.stringContaining('IDB transient error'), expect.anything()); }); it('should not retry on non-InvalidStateError DOMException', async () => { @@ -96,8 +96,14 @@ describe('createStore', () => { await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('key1')))).rejects.toThrow(DOMException); expect(callCount).toBe(1); - expect(logAlertSpy).toHaveBeenCalledWith('IDB error is not recoverable, giving up', expect.objectContaining({errorMessage: 'Not found'})); - expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('dropping cached connection'), expect.anything()); + expect(logInfoSpy).toHaveBeenCalledWith( + 'IDB error not recoverable at the connection layer, propagating', + expect.objectContaining({ + errorMessage: 'Not found', + errorClass: 'unknown', + }), + ); + expect(logAlertSpy).not.toHaveBeenCalled(); }); it('should not retry on non-DOMException errors', async () => { @@ -116,8 +122,14 @@ describe('createStore', () => { await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('key1')))).rejects.toThrow(TypeError); expect(callCount).toBe(1); - expect(logAlertSpy).toHaveBeenCalledWith('IDB error is not recoverable, giving up', expect.objectContaining({errorMessage: 'Something went wrong'})); - expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('dropping cached connection'), expect.anything()); + expect(logInfoSpy).toHaveBeenCalledWith( + 'IDB error not recoverable at the connection layer, propagating', + expect.objectContaining({ + errorMessage: 'Something went wrong', + errorClass: 'unknown', + }), + ); + expect(logAlertSpy).not.toHaveBeenCalled(); }); it('should preserve data integrity after a successful retry', async () => { @@ -176,7 +188,7 @@ describe('createStore', () => { return IDB.promisifyRequest(s.transaction); }); - expect(logInfoSpy).toHaveBeenCalledWith('IDB InvalidStateError — dropping cached connection and retrying', { + expect(logInfoSpy).toHaveBeenCalledWith('IDB transient error — dropping cached connection and retrying once', { dbName, storeName: STORE_NAME, txMode: 'readwrite', @@ -308,7 +320,10 @@ describe('createStore', () => { if (openCallCount <= 2) { const req = {} as IDBOpenDBRequest; Promise.resolve().then(() => { - Object.defineProperty(req, 'error', {value: backingStoreError(), configurable: true}); + Object.defineProperty(req, 'error', { + value: backingStoreError(), + configurable: true, + }); req.onerror?.(new Event('error') as Event & {target: IDBOpenDBRequest}); }); return req; @@ -332,7 +347,10 @@ describe('createStore', () => { jest.spyOn(indexedDB, 'open').mockImplementation(() => { const req = {} as IDBOpenDBRequest; Promise.resolve().then(() => { - Object.defineProperty(req, 'error', {value: backingStoreError(), configurable: true}); + Object.defineProperty(req, 'error', { + value: backingStoreError(), + configurable: true, + }); req.onerror?.(new Event('error') as Event & {target: IDBOpenDBRequest}); }); return req; @@ -412,24 +430,27 @@ describe('createStore', () => { jest.restoreAllMocks(); logAlertSpy = jest.spyOn(Logger, 'logAlert'); + logInfoSpy = jest.spyOn(Logger, 'logInfo'); - // QuotaExceededError + // QuotaExceededError — a CAPACITY error the connection layer does not own; it propagates + // to the operation layer (OnyxUtils.retryOperation) which handles eviction. The connection + // layer stays quiet for capacity (it's the expected path) so it can't spam the storm log. jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(() => { throw new DOMException('Quota exceeded', 'QuotaExceededError'); }); await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('key1')))).rejects.toThrow('Quota exceeded'); - expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('dropping cached connection and reopening'), expect.anything()); - expect(logAlertSpy).toHaveBeenCalledWith('IDB error is not recoverable, giving up', expect.objectContaining({errorMessage: 'Quota exceeded'})); + expect(logAlertSpy).not.toHaveBeenCalled(); + expect(logInfoSpy).not.toHaveBeenCalledWith('IDB error not recoverable at the connection layer, propagating', expect.objectContaining({errorClass: 'capacity'})); }); }); - describe('connection lost healing', () => { + describe('connection lost recovery (transient, unbudgeted)', () => { function connectionLostError() { return new DOMException('Connection to Indexed Database server lost. Refresh the page to try again', 'UnknownError'); } - it('should heal by dropping cached connection and reopening', async () => { + it('should recover by dropping cached connection and retrying once', async () => { const store = createStore(uniqueDBName(), STORE_NAME); await store('readwrite', (s) => { @@ -450,45 +471,39 @@ describe('createStore', () => { const result = await store('readonly', (s) => IDB.promisifyRequest(s.get('key1'))); expect(result).toBe('value'); expect(callCount).toBe(2); - expect(logInfoSpy).toHaveBeenCalledWith( - expect.stringContaining('connection lost error detected — dropping cached connection and reopening'), - expect.objectContaining({dbName: expect.any(String)}), - ); - expect(logInfoSpy).toHaveBeenCalledWith('IDB heal: successfully recovered after connection lost error', expect.objectContaining({dbName: expect.any(String)})); + expect(logInfoSpy).toHaveBeenCalledWith('IDB transient error — dropping cached connection and retrying once', expect.objectContaining({dbName: expect.any(String)})); }); - it('should stop healing after budget exhausts', async () => { + it('should not be budgeted — reopens on every call without ever exhausting a budget', async () => { const store = createStore(uniqueDBName(), STORE_NAME); // All transaction calls fail permanently with connection lost. - // The heal path clears dbp and calls indexedDB.open() again — mock that to - // also fail so fake-indexeddb doesn't deadlock waiting for the old connection - // to close (same pattern as the backing store budget exhaustion test). + // The transient path clears dbp and calls indexedDB.open() again — mock that to + // also fail so fake-indexeddb doesn't deadlock waiting for the old connection to close. jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(() => { throw connectionLostError(); }); jest.spyOn(indexedDB, 'open').mockImplementation(() => { const req = {} as IDBOpenDBRequest; Promise.resolve().then(() => { - Object.defineProperty(req, 'error', {value: connectionLostError(), configurable: true}); + Object.defineProperty(req, 'error', { + value: connectionLostError(), + configurable: true, + }); req.onerror?.(new Event('error') as Event & {target: IDBOpenDBRequest}); }); return req; }); - // Each call drains 1 heal attempt (initial fails, heal retry also fails) - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow('Connection to Indexed Database server lost'); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow('Connection to Indexed Database server lost'); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow('Connection to Indexed Database server lost'); - - // Budget exhausted — 4th call should NOT attempt healing, but should log budget exhausted - logAlertSpy.mockClear(); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow('Connection to Indexed Database server lost'); - expect(logAlertSpy).toHaveBeenCalledWith(expect.stringContaining('heal budget exhausted'), expect.anything()); - expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('dropping cached connection and reopening'), expect.anything()); + // Many calls — each attempts a single reopen and propagates. No budget, so it never logs + // "heal budget exhausted" no matter how many times it fails. + for (let i = 0; i < 5; i++) { + await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow('Connection to Indexed Database server lost'); + } + expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('heal budget exhausted'), expect.anything()); }); - it('should also heal "connection is closing" variant', async () => { + it('should also recover the "connection is closing" variant', async () => { const store = createStore(uniqueDBName(), STORE_NAME); await store('readwrite', (s) => { @@ -511,57 +526,96 @@ describe('createStore', () => { expect(callCount).toBe(2); }); - it('should share heal budget with backing store errors', async () => { + it('should not consume the backing-store heal budget', async () => { const store = createStore(uniqueDBName(), STORE_NAME); - // All transaction calls fail permanently, alternating error types. - // The heal path clears dbp and calls indexedDB.open() again — mock that to - // also fail so fake-indexeddb doesn't deadlock waiting for the old connection - // to close. - const txErrors = [ - new DOMException('Internal error opening backing store for indexedDB.open.', 'UnknownError'), - connectionLostError(), - new DOMException('Internal error opening backing store for indexedDB.open.', 'UnknownError'), - ]; - let txErrorIndex = 0; - jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(() => { - const err = txErrors[Math.min(txErrorIndex, txErrors.length - 1)]; - txErrorIndex++; - throw err; + await store('readwrite', (s) => { + s.put('value', 'key1'); + return IDB.promisifyRequest(s.transaction); }); - jest.spyOn(indexedDB, 'open').mockImplementation(() => { - const req = {} as IDBOpenDBRequest; - Promise.resolve().then(() => { - Object.defineProperty(req, 'error', { - value: new DOMException('Internal error opening backing store for indexedDB.open.', 'UnknownError'), - configurable: true, - }); - req.onerror?.(new Event('error') as Event & {target: IDBOpenDBRequest}); + + const original = IDBDatabase.prototype.transaction; + const backingStoreError = () => new DOMException('Internal error opening backing store for indexedDB.open.', 'UnknownError'); + + // Connection-lost errors are transient and must NOT decrement the backing-store budget. + // Fail with connection-lost on the first transaction of 4 separate operations (each recovers + // on its single reopen). If they wrongly shared the budget, the backing-store budget (3) + // would be drained; afterwards a backing-store error must still heal. + for (let i = 0; i < 4; i++) { + let callCount = 0; + const spy = jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(function (this: IDBDatabase, ...args) { + callCount++; + if (callCount === 1) { + throw connectionLostError(); + } + spy.mockRestore(); + return original.apply(this, args); }); - return req; + await store('readonly', (s) => IDB.promisifyRequest(s.get('key1'))); + } + + // A backing-store error still heals — proving the budget was untouched by the 4 transient failures. + let backingCallCount = 0; + const backingSpy = jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(function (this: IDBDatabase, ...args) { + backingCallCount++; + if (backingCallCount === 1) { + throw backingStoreError(); + } + backingSpy.mockRestore(); + return original.apply(this, args); }); + const result = await store('readonly', (s) => IDB.promisifyRequest(s.get('key1'))); + expect(result).toBe('value'); + expect(logInfoSpy).toHaveBeenCalledWith( + expect.stringContaining('backing store error detected — dropping cached connection and reopening (2 attempts left)'), + expect.objectContaining({dbName: expect.any(String)}), + ); + }); + }); - // 3 calls drain the budget (each call: fail + heal retry fail = 1 budget) - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow(); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow(); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow(); + describe('AbortError recovery (transient)', () => { + it('should treat a normalized write-abort AbortError as transient and retry once', async () => { + const store = createStore(uniqueDBName(), STORE_NAME); - // Budget exhausted — no more healing for either error type - logAlertSpy.mockClear(); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow(); - expect(logAlertSpy).toHaveBeenCalledWith(expect.stringContaining('heal budget exhausted'), expect.anything()); - expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('dropping cached connection and reopening'), expect.anything()); + await store('readwrite', (s) => { + s.put('value', 'key1'); + return IDB.promisifyRequest(s.transaction); + }); + + const original = IDBDatabase.prototype.transaction; + let callCount = 0; + jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(function (this: IDBDatabase, ...args) { + callCount++; + if (callCount === 1) { + throw new DOMException('IDB write transaction aborted without an error', 'AbortError'); + } + return original.apply(this, args); + }); + + const result = await store('readonly', (s) => IDB.promisifyRequest(s.get('key1'))); + expect(result).toBe('value'); + expect(callCount).toBe(2); + expect(logInfoSpy).toHaveBeenCalledWith('IDB transient error — dropping cached connection and retrying once', expect.objectContaining({dbName: expect.any(String)})); + expect(logAlertSpy).not.toHaveBeenCalled(); }); }); describe('visibilitychange probe', () => { function simulateVisibilityChange(state: string) { - Object.defineProperty(document, 'visibilityState', {value: state, writable: true, configurable: true}); + Object.defineProperty(document, 'visibilityState', { + value: state, + writable: true, + configurable: true, + }); document.dispatchEvent(new Event('visibilitychange')); } afterEach(() => { - Object.defineProperty(document, 'visibilityState', {value: 'visible', writable: true, configurable: true}); + Object.defineProperty(document, 'visibilityState', { + value: 'visible', + writable: true, + configurable: true, + }); }); it('should drop stale dbp when probe detects connection lost on foreground', async () => { @@ -677,7 +731,13 @@ describe('createStore', () => { jest.spyOn(indexedDB, 'open').mockImplementation(() => { const req = {} as IDBOpenDBRequest; rejectOpen = () => { - Object.defineProperty(req, 'error', {value: new DOMException('probe open failed', 'AbortError'), configurable: true}); + // Use an UNKNOWN-classified error so the connection layer propagates it (AbortError is now + // classified TRANSIENT and would be retried — see lib/storage/errors.ts). The point of this + // test is the probe branch not surfacing an unhandled rejection, not the specific error type. + Object.defineProperty(req, 'error', { + value: new DOMException('probe open failed', 'UnknownError'), + configurable: true, + }); req.onerror?.(new Event('error') as Event & {target: IDBOpenDBRequest}); }; return req;