ml-explore
diff --git a/‎Applications/MLXChatExample/Services/MLXService.swift‎
Lines changed: 1 addition & 0 deletions b/‎Applications/MLXChatExample/Services/MLXService.swift‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Libraries/MLXLLM/Documentation.docc/Documentation.md‎
Lines changed: 17 additions & 0 deletions b/‎Libraries/MLXLLM/Documentation.docc/Documentation.md‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎Libraries/MLXLLM/Documentation.docc/evaluation.md‎
Lines changed: 70 additions & 0 deletions b/‎Libraries/MLXLLM/Documentation.docc/evaluation.md‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎Libraries/MLXLLM/Documentation.docc/using-model.md‎
Lines changed: 3 additions & 0 deletions b/‎Libraries/MLXLLM/Documentation.docc/using-model.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎Libraries/MLXLLM/LLMModelFactory.swift‎
Lines changed: 14 additions & 1 deletion b/‎Libraries/MLXLLM/LLMModelFactory.swift‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎Libraries/MLXLLM/README.md‎
Lines changed: 17 additions & 0 deletions b/‎Libraries/MLXLLM/README.md‎
Lines changed: 17 additions & 0 deletions
@@ -29,6 +29,7 @@ class MLXService {
             name: "qwen2.5VL:3b", configuration: VLMRegistry.qwen2_5VL3BInstruct4Bit, type: .vlm),
         LMModel(name: "qwen2VL:2b", configuration: VLMRegistry.qwen2VL2BInstruct4Bit, type: .vlm),
         LMModel(name: "smolVLM", configuration: VLMRegistry.smolvlminstruct4bit, type: .vlm),
+        LMModel(name: "acereason:7B", configuration: LLMRegistry.acereason_7b_4bit, type: .llm),
     ]
 
     /// Cache to store loaded model containers to avoid reloading.
 
@@ -11,8 +11,24 @@ Example implementations of various Large Language Models (LLMs).
 - [MLXVLM](MLXVLM)
 - [StableDiffusion](StableDiffusion)
 
+## Quick Start
+
+See <doc:evaluation>.
+
+Using LLMs and VLMs is as easy as this:
+
+```swift
+let model = try await loadModel(id: "mlx-community/Qwen3-4B-4bit")
+let session = ChatSession(model)
+print(try await session.respond(to: "What are two things to see in San Francisco?")
+print(try await session.respond(to: "How about a great place to eat?")
+```
+
+More advanced APIs are available for those that need them, see <doc:using-model>.
+
 ## Topics
 
+- <doc:evaluation>
 - <doc:adding-model>
 - <doc:using-model>
 
@@ -32,3 +48,4 @@ Example implementations of various Large Language Models (LLMs).
 - ``Starcoder2Model``
 - ``MiMoModel``
 - ``GLM4Model``
+- ``AceReason``
@@ -0,0 +1,70 @@
+#  Evaluation
+
+The simplified LLM/VLM API allows you to load a model and evaluate prompts with only a few lines of code.
+
+For example, this loads a model and asks a question and a follow-on question:
+
+```swift
+let model = try await loadModel(id: "mlx-community/Qwen3-4B-4bit")
+let session = ChatSession(model)
+print(try await session.respond(to: "What are two things to see in San Francisco?")
+print(try await session.respond(to: "How about a great place to eat?")
+```
+
+The second question actually refers to information (the location) from the first
+question -- this context is maintained inside the ``ChatSession`` object.
+
+If you need a one-shot prompt/response simply create a ``ChatSession``, evaluate
+the prompt and discard.  Multiple ``ChatSession`` instances could also be used
+(at the cost of the memory in the `KVCache`) to handle multiple streams of
+context.
+
+## Streaming Output
+
+The previous example produced the entire response in one call.  Often
+users want to see the text as it is generated -- you can do this with
+a stream:
+
+```swift
+let model = try await loadModel(id: "mlx-community/Qwen3-4B-4bit")
+let session = ChatSession(model)
+
+for try await item in session.streamResponse(to: "Why is the sky blue?") {
+    print(item, terminator: "")
+}
+print()
+```
+
+## VLMs (Vision Language Models)
+
+This same API supports VLMs as well.  Simply present the image or video
+to the ``ChatSession``:
+
+```swift
+let model = try await loadModel(id: "mlx-community/Qwen2.5-VL-3B-Instruct-4bit")
+let session = ChatSession(model)
+
+let answer1 = try await session.respond(
+    to: "what kind of creature is in the picture?"
+    image: .url(URL(fileURLWithPath: "support/test.jpg"))
+)
+print(answer1)
+
+// we can ask a followup question referring back to the previous image
+let answer2 = try await session.respond(
+    to: "What is behind the dog?"
+)
+print(answer2)
+```
+
+## Advanced Usage
+
+The ``ChatSession`` has a number of parameters you can supply when creating it:
+
+- **instructions**: optional instructions to the chat session, e.g. describing what type of responses to give
+    - for example you might instruct the language model to respond in rhyme or
+        talking like a famous character from a movie
+    - or that the responses should be very brief
+- **generateParameters**: parameters that control the generation of output, e.g. token limits and temperature
+    - see ``GenerateParameters``
+- **processing**: optional media processing instructions
@@ -2,6 +2,9 @@
 
 Using a model is easy:  load the weights, tokenize and evaluate.
 
+There is a high level API described in <doc:evaluation> and this documentation
+describes the lower level API if you need more control.
+
 ## Loading a Model
 
 A model is typically loaded by using a `ModelFactory` and a `ModelConfiguration`:
 
@@ -45,6 +45,7 @@ public class LLMTypeRegistry: ModelTypeRegistry, @unchecked Sendable {
             "granite": create(GraniteConfiguration.self, GraniteModel.init),
             "mimo": create(MiMoConfiguration.self, MiMoModel.init),
             "glm4": create(GLM4Configuration.self, GLM4Model.init),
+            "acereason": create(Qwen2Configuration.self, Qwen2Model.init),
         ]
     }
 
@@ -211,6 +212,11 @@ public class LLMRegistry: AbstractModelRegistry, @unchecked Sendable {
         defaultPrompt: "Why is the sky blue?"
     )
 
+    static public let acereason_7b_4bit = ModelConfiguration(
+        id: "mlx-community/AceReason-Nemotron-7B-4bit",
+        defaultPrompt: ""
+    )
+
     private static func all() -> [ModelConfiguration] {
         [
             codeLlama13b4bit,
@@ -240,6 +246,7 @@ public class LLMRegistry: AbstractModelRegistry, @unchecked Sendable {
             smolLM_135M_4bit,
             mimo_7b_sft_4bit,
             glm4_9b_4bit,
+            acereason_7b_4bit,
         ]
     }
 
@@ -312,7 +319,7 @@ public class LLMModelFactory: ModelFactory {
     public func _load(
         hub: HubApi, configuration: ModelConfiguration,
         progressHandler: @Sendable @escaping (Progress) -> Void
-    ) async throws -> ModelContext {
+    ) async throws -> sending ModelContext {
         // download weights and config
         let modelDirectory = try await downloadModel(
             hub: hub, configuration: configuration, progressHandler: progressHandler)
@@ -361,3 +368,9 @@ public class LLMModelFactory: ModelFactory {
     }
 
 }
+
+public class TrampolineModelFactory: NSObject, ModelFactoryTrampoline {
+    public static func modelFactory() -> (any MLXLMCommon.ModelFactory)? {
+        LLMModelFactory.shared
+    }
+}
@@ -58,9 +58,26 @@ Currently supported model types are:
 - Starcoder2
 - MiMo
 - GLM4
+- AceReason
 
 See [llm-tool](../../Tools/llm-tool)
 
+# Quick Start
+
+Using LLMs and VLMs from MLXLMCommon is as easy as:
+
+```swift
+let model = try await loadModel(id: "mlx-community/Qwen3-4B-4bit")
+let session = ChatSession(model)
+print(try await session.respond(to: "What are two things to see in San Francisco?")
+print(try await session.respond(to: "How about a great place to eat?")
+```
+
+For more information see 
+[Evaluation](https://swiftpackageindex.com/ml-explore/mlx-swift-examples/main/documentation/mlxlmcommon/evaluation)
+or [Using Models](https://swiftpackageindex.com/ml-explore/mlx-swift-examples/main/documentation/mlxlmcommon/using-model)
+for more advanced API.
+
 # Adding a Model
 
 If the model follows the typical LLM pattern:
Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,7 @@ class MLXService {`
`29`	`29`	`name: "qwen2.5VL:3b", configuration: VLMRegistry.qwen2_5VL3BInstruct4Bit, type: .vlm),`
`30`	`30`	`LMModel(name: "qwen2VL:2b", configuration: VLMRegistry.qwen2VL2BInstruct4Bit, type: .vlm),`
`31`	`31`	`LMModel(name: "smolVLM", configuration: VLMRegistry.smolvlminstruct4bit, type: .vlm),`
	`32`	`+ LMModel(name: "acereason:7B", configuration: LLMRegistry.acereason_7b_4bit, type: .llm),`
`32`	`33`	`]`
`33`	`34`
`34`	`35`	`/// Cache to store loaded model containers to avoid reloading.`
Original file line number	Diff line number	Diff line change
`@@ -45,6 +45,7 @@ public class LLMTypeRegistry: ModelTypeRegistry, @unchecked Sendable {`
`45`	`45`	`"granite": create(GraniteConfiguration.self, GraniteModel.init),`
`46`	`46`	`"mimo": create(MiMoConfiguration.self, MiMoModel.init),`
`47`	`47`	`"glm4": create(GLM4Configuration.self, GLM4Model.init),`
	`48`	`+ "acereason": create(Qwen2Configuration.self, Qwen2Model.init),`
`48`	`49`	`]`
`49`	`50`	`}`
`50`	`51`
`@@ -211,6 +212,11 @@ public class LLMRegistry: AbstractModelRegistry, @unchecked Sendable {`
`211`	`212`	`defaultPrompt: "Why is the sky blue?"`
`212`	`213`	`)`
`213`	`214`
	`215`	`+ static public let acereason_7b_4bit = ModelConfiguration(`
	`216`	`+ id: "mlx-community/AceReason-Nemotron-7B-4bit",`
	`217`	`+ defaultPrompt: ""`
	`218`	`+ )`
	`219`	`+`
`214`	`220`	`private static func all() -> [ModelConfiguration] {`
`215`	`221`	`[`
`216`	`222`	`codeLlama13b4bit,`
`@@ -240,6 +246,7 @@ public class LLMRegistry: AbstractModelRegistry, @unchecked Sendable {`
`240`	`246`	`smolLM_135M_4bit,`
`241`	`247`	`mimo_7b_sft_4bit,`
`242`	`248`	`glm4_9b_4bit,`
	`249`	`+ acereason_7b_4bit,`
`243`	`250`	`]`
`244`	`251`	`}`
`245`	`252`
`@@ -312,7 +319,7 @@ public class LLMModelFactory: ModelFactory {`
`312`	`319`	`public func _load(`
`313`	`320`	`hub: HubApi, configuration: ModelConfiguration,`
`314`	`321`	`progressHandler: @Sendable @escaping (Progress) -> Void`
`315`		`- ) async throws -> ModelContext {`
	`322`	`+ ) async throws -> sending ModelContext {`
`316`	`323`	`// download weights and config`
`317`	`324`	`let modelDirectory = try await downloadModel(`
`318`	`325`	`hub: hub, configuration: configuration, progressHandler: progressHandler)`
`@@ -361,3 +368,9 @@ public class LLMModelFactory: ModelFactory {`
`361`	`368`	`}`
`362`	`369`
`363`	`370`	`}`
	`371`	`+`
	`372`	`+public class TrampolineModelFactory: NSObject, ModelFactoryTrampoline {`
	`373`	`+ public static func modelFactory() -> (any MLXLMCommon.ModelFactory)? {`
	`374`	`+ LLMModelFactory.shared`
	`375`	`+ }`
	`376`	`+}`