From 1c1263fa20debbf446078e68a4c24b123eb1d0db Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Tue, 24 Feb 2026 01:56:34 -0800 Subject: [PATCH 1/4] Add UnavailableReason for MLXLanguageModel --- .../Models/MLXLanguageModel.swift | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift index 4ffb877..c733b3e 100644 --- a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift +++ b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift @@ -74,6 +74,11 @@ import Foundation cache.removeAllObjects() } + /// Returns whether a cached context exists for the key. + func contains(_ key: String) -> Bool { + cache.object(forKey: key as NSString) != nil + } + /// Cancels in-flight work and removes cached data for the key. func removeAndCancel(for key: String) async { let task = removeInFlight(for: key) @@ -132,8 +137,10 @@ import Foundation /// ``` public struct MLXLanguageModel: LanguageModel { /// The reason the model is unavailable. - /// This model is always available. - public typealias UnavailableReason = Never + public enum UnavailableReason: Sendable, Equatable, Hashable { + /// The model has not been loaded into memory yet. + case notLoaded + } /// The model identifier. public let modelId: String @@ -156,6 +163,12 @@ import Foundation self.directory = directory } + /// The current availability of this model in memory. + public var availability: Availability { + let key = directory?.absoluteString ?? modelId + return modelCache.contains(key) ? .available : .unavailable(.notLoaded) + } + /// Removes this model from the shared cache and cancels any in-flight load. /// /// Call this to free memory when the model is no longer needed. From e89d0ed24c4899b5b840ad524ca182cc5046b481 Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Tue, 24 Feb 2026 02:39:49 -0800 Subject: [PATCH 2/4] Distinguish between not loaded and load failure --- .../Models/MLXLanguageModel.swift | 41 ++++++++++++++++++- .../MLXLanguageModelTests.swift | 34 +++++++++++++++ 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift index c733b3e..f9390b2 100644 --- a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift +++ b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift @@ -26,8 +26,13 @@ import Foundation /// Coordinates a bounded in-memory cache with structured, coalesced loading. private final class ModelContextCache { + private struct LoadFailure: Sendable { + let description: String + } + private let cache: NSCache private let inFlight = Locked<[String: Task]>([:]) + private let failures = Locked<[String: LoadFailure]>([:]) /// Creates a cache with a count-based eviction limit. init(countLimit: Int) { @@ -56,9 +61,11 @@ import Foundation do { let cached = try await task.value cache.setObject(cached, forKey: cacheKey) + clearFailure(for: key) clearInFlight(for: key) return cached.context } catch { + setFailure(for: key, error: error) clearInFlight(for: key) throw error } @@ -67,11 +74,13 @@ import Foundation /// Removes a cached context for the key. func remove(for key: String) { cache.removeObject(forKey: key as NSString) + clearFailure(for: key) } /// Clears all cached contexts. func removeAll() { cache.removeAllObjects() + clearAllFailures() } /// Returns whether a cached context exists for the key. @@ -79,11 +88,17 @@ import Foundation cache.object(forKey: key as NSString) != nil } + /// Returns a description of the most recent load failure for the key. + func failureDescription(for key: String) -> String? { + failures.withLock { $0[key]?.description } + } + /// Cancels in-flight work and removes cached data for the key. func removeAndCancel(for key: String) async { let task = removeInFlight(for: key) task?.cancel() cache.removeObject(forKey: key as NSString) + clearFailure(for: key) } /// Cancels all in-flight work and clears cached data. @@ -91,6 +106,7 @@ import Foundation let tasks = removeAllInFlight() tasks.forEach { $0.cancel() } cache.removeAllObjects() + clearAllFailures() } private func inFlightTask(for key: String) -> Task? { @@ -120,6 +136,19 @@ import Foundation return tasks } } + + private func setFailure(for key: String, error: any Error) { + let description = String(reflecting: error) + failures.withLock { $0[key] = LoadFailure(description: description) } + } + + private func clearFailure(for key: String) { + failures.withLock { $0[key] = nil } + } + + private func clearAllFailures() { + failures.withLock { $0.removeAll() } + } } /// Shared cache across MLXLanguageModel instances. @@ -140,6 +169,8 @@ import Foundation public enum UnavailableReason: Sendable, Equatable, Hashable { /// The model has not been loaded into memory yet. case notLoaded + /// The model failed to load and includes the underlying error details. + case failedToLoad(String) } /// The model identifier. @@ -166,7 +197,15 @@ import Foundation /// The current availability of this model in memory. public var availability: Availability { let key = directory?.absoluteString ?? modelId - return modelCache.contains(key) ? .available : .unavailable(.notLoaded) + if modelCache.contains(key) { + return .available + } + + if let failureDescription = modelCache.failureDescription(for: key) { + return .unavailable(.failedToLoad(failureDescription)) + } + + return .unavailable(.notLoaded) } /// Removes this model from the shared cache and cancels any in-flight load. diff --git a/Tests/AnyLanguageModelTests/MLXLanguageModelTests.swift b/Tests/AnyLanguageModelTests/MLXLanguageModelTests.swift index 62827fd..bd43490 100644 --- a/Tests/AnyLanguageModelTests/MLXLanguageModelTests.swift +++ b/Tests/AnyLanguageModelTests/MLXLanguageModelTests.swift @@ -205,5 +205,39 @@ import Testing ) #expect([Priority.low, Priority.medium, Priority.high].contains(response.content)) } + + @Test func unavailableForNonexistentModel() async { + let model = MLXLanguageModel(modelId: "mlx-community/does-not-exist-anylanguagemodel-test") + #expect(model.availability == .unavailable(.notLoaded)) + #expect(model.isAvailable == false) + + let session = LanguageModelSession(model: model) + await #expect(throws: Error.self) { + _ = try await session.respond(to: "Hello") + } + + switch model.availability { + case .unavailable(.failedToLoad(let description)): + #expect(!description.isEmpty) + default: + Issue.record("Expected model availability to report failedToLoad after failed request") + } + #expect(model.isAvailable == false) + } + + @Test func availabilityBecomesAvailableAfterSuccessfulLoad() async throws { + await MLXLanguageModel.removeAllFromCache() + + let model = MLXLanguageModel(modelId: "mlx-community/Granite-4.0-H-Tiny-4bit-DWQ") + #expect(model.availability == .unavailable(.notLoaded)) + #expect(model.isAvailable == false) + + let session = LanguageModelSession(model: model) + let response = try await session.respond(to: "Say hello") + #expect(!response.content.isEmpty) + + #expect(model.availability == .available) + #expect(model.isAvailable == true) + } } #endif // MLX From 1e2c39cbe3f57f68b38d2b5887ea322115a86e32 Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Tue, 24 Feb 2026 03:05:22 -0800 Subject: [PATCH 3/4] Refactor model state / context caching --- .../Models/MLXLanguageModel.swift | 102 ++++++++++-------- 1 file changed, 60 insertions(+), 42 deletions(-) diff --git a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift index f9390b2..f4be593 100644 --- a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift +++ b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift @@ -18,25 +18,28 @@ import Foundation import Tokenizers import Hub - /// Wrapper to store ModelContext in NSCache (requires NSObject subclass). - private final class CachedContext: NSObject, @unchecked Sendable { - let context: ModelContext - init(_ context: ModelContext) { self.context = context } + /// Wrapper to store model availability state in NSCache. + private final class CachedModelState: NSObject, @unchecked Sendable { + enum Value { + case loaded(ModelContext) + case failed(String) + } + + let value: Value + + init(_ value: Value) { + self.value = value + } } /// Coordinates a bounded in-memory cache with structured, coalesced loading. private final class ModelContextCache { - private struct LoadFailure: Sendable { - let description: String - } - - private let cache: NSCache - private let inFlight = Locked<[String: Task]>([:]) - private let failures = Locked<[String: LoadFailure]>([:]) + private let cache: NSCache + private let inFlight = Locked<[String: Task]>([:]) /// Creates a cache with a count-based eviction limit. init(countLimit: Int) { - let cache = NSCache() + let cache = NSCache() cache.countLimit = countLimit self.cache = cache } @@ -47,25 +50,45 @@ import Foundation loader: @escaping @Sendable () async throws -> ModelContext ) async throws -> ModelContext { let cacheKey = key as NSString - if let cached = cache.object(forKey: cacheKey) { - return cached.context + if let cached = cache.object(forKey: cacheKey), + case .loaded(let context) = cached.value + { + return context } if let task = inFlightTask(for: key) { - return try await task.value.context + let cached = try await task.value + if case .loaded(let context) = cached.value { + return context + } + throw CancellationError() } - let task = Task { try await CachedContext(loader()) } + let task = Task { + let context = try await loader() + return CachedModelState(.loaded(context)) + } setInFlight(task, for: key) do { let cached = try await task.value cache.setObject(cached, forKey: cacheKey) - clearFailure(for: key) clearInFlight(for: key) - return cached.context + if case .loaded(let context) = cached.value { + return context + } + throw CancellationError() } catch { - setFailure(for: key, error: error) + // Don't treat cancellations as load failures. + if error is CancellationError || Task.isCancelled { + cache.removeObject(forKey: cacheKey) + clearInFlight(for: key) + throw error + } + cache.setObject( + CachedModelState(.failed(String(reflecting: error))), + forKey: cacheKey + ) clearInFlight(for: key) throw error } @@ -74,23 +97,33 @@ import Foundation /// Removes a cached context for the key. func remove(for key: String) { cache.removeObject(forKey: key as NSString) - clearFailure(for: key) } /// Clears all cached contexts. func removeAll() { cache.removeAllObjects() - clearAllFailures() } /// Returns whether a cached context exists for the key. func contains(_ key: String) -> Bool { - cache.object(forKey: key as NSString) != nil + guard let cached = cache.object(forKey: key as NSString) else { + return false + } + if case .loaded = cached.value { + return true + } + return false } /// Returns a description of the most recent load failure for the key. func failureDescription(for key: String) -> String? { - failures.withLock { $0[key]?.description } + guard let cached = cache.object(forKey: key as NSString) else { + return nil + } + if case .failed(let description) = cached.value { + return description + } + return nil } /// Cancels in-flight work and removes cached data for the key. @@ -98,7 +131,6 @@ import Foundation let task = removeInFlight(for: key) task?.cancel() cache.removeObject(forKey: key as NSString) - clearFailure(for: key) } /// Cancels all in-flight work and clears cached data. @@ -106,14 +138,13 @@ import Foundation let tasks = removeAllInFlight() tasks.forEach { $0.cancel() } cache.removeAllObjects() - clearAllFailures() } - private func inFlightTask(for key: String) -> Task? { + private func inFlightTask(for key: String) -> Task? { inFlight.withLock { $0[key] } } - private func setInFlight(_ task: Task, for key: String) { + private func setInFlight(_ task: Task, for key: String) { inFlight.withLock { $0[key] = task } } @@ -121,7 +152,7 @@ import Foundation inFlight.withLock { $0[key] = nil } } - private func removeInFlight(for key: String) -> Task? { + private func removeInFlight(for key: String) -> Task? { inFlight.withLock { let task = $0[key] $0[key] = nil @@ -129,26 +160,13 @@ import Foundation } } - private func removeAllInFlight() -> [Task] { + private func removeAllInFlight() -> [Task] { inFlight.withLock { let tasks = Array($0.values) $0.removeAll() return tasks } } - - private func setFailure(for key: String, error: any Error) { - let description = String(reflecting: error) - failures.withLock { $0[key] = LoadFailure(description: description) } - } - - private func clearFailure(for key: String) { - failures.withLock { $0[key] = nil } - } - - private func clearAllFailures() { - failures.withLock { $0.removeAll() } - } } /// Shared cache across MLXLanguageModel instances. From f5871b2209df51cd0d7b4b39967c765663393ec8 Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Tue, 24 Feb 2026 03:19:31 -0800 Subject: [PATCH 4/4] Use removeFromCache to explicitly unload models when checking availability --- .../MLXLanguageModelTests.swift | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/Tests/AnyLanguageModelTests/MLXLanguageModelTests.swift b/Tests/AnyLanguageModelTests/MLXLanguageModelTests.swift index bd43490..82c5f85 100644 --- a/Tests/AnyLanguageModelTests/MLXLanguageModelTests.swift +++ b/Tests/AnyLanguageModelTests/MLXLanguageModelTests.swift @@ -35,6 +35,20 @@ import Testing let model = MLXLanguageModel(modelId: "mlx-community/Qwen3-0.6B-4bit") let visionModel = MLXLanguageModel(modelId: "mlx-community/Qwen2-VL-2B-Instruct-4bit") + @Test func availabilityBecomesAvailableAfterSuccessfulLoad() async throws { + await model.removeFromCache() + + #expect(model.availability == .unavailable(.notLoaded)) + #expect(model.isAvailable == false) + + let session = LanguageModelSession(model: model) + let response = try await session.respond(to: "Say hello") + #expect(!response.content.isEmpty) + + #expect(model.availability == .available) + #expect(model.isAvailable == true) + } + @Test func basicResponse() async throws { let session = LanguageModelSession(model: model) @@ -208,6 +222,7 @@ import Testing @Test func unavailableForNonexistentModel() async { let model = MLXLanguageModel(modelId: "mlx-community/does-not-exist-anylanguagemodel-test") + await model.removeFromCache() #expect(model.availability == .unavailable(.notLoaded)) #expect(model.isAvailable == false) @@ -224,20 +239,5 @@ import Testing } #expect(model.isAvailable == false) } - - @Test func availabilityBecomesAvailableAfterSuccessfulLoad() async throws { - await MLXLanguageModel.removeAllFromCache() - - let model = MLXLanguageModel(modelId: "mlx-community/Granite-4.0-H-Tiny-4bit-DWQ") - #expect(model.availability == .unavailable(.notLoaded)) - #expect(model.isAvailable == false) - - let session = LanguageModelSession(model: model) - let response = try await session.respond(to: "Say hello") - #expect(!response.content.isEmpty) - - #expect(model.availability == .available) - #expect(model.isAvailable == true) - } } #endif // MLX