wip

launch: set default model as active selection in vscode copilot chat
mlx: fix KV cache snapshot memory leak
2026-04-21 16:25:42 +02:00 · 2026-03-26 19:55:13 -04:00 · 2026-03-26 14:50:13 -04:00 · 2026-03-25 17:26:34 -07:00 · 2026-03-25 16:15:49 -07:00 · 2026-03-25 14:02:22 -07:00
73 changed files with 5504 additions and 510 deletions
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -64,6 +64,7 @@ jobs:
            container: nvidia/cuda:13.0.0-devel-ubuntu22.04
            extra-packages: libcudnn9-dev-cuda-13 libopenblas-dev liblapack-dev liblapacke-dev git curl
            flags: '-DCMAKE_CUDA_ARCHITECTURES=87 -DBLAS_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu -DLAPACK_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu'
+            install-go: true
    runs-on: linux
    container: ${{ matrix.container }}
    steps:
@@ -90,6 +91,12 @@ jobs:
          fi
        env:
          DEBIAN_FRONTEND: noninteractive
+      - if: matrix.install-go
+        name: Install Go
+        run: |
+          GO_VERSION=$(awk '/^go / { print $2 }' go.mod)
+          curl -fsSL "https://golang.org/dl/go${GO_VERSION}.linux-$(dpkg --print-architecture).tar.gz" | tar xz -C /usr/local
+          echo "/usr/local/go/bin" >> $GITHUB_PATH
      - uses: actions/cache@v4
        with:
          path: /github/home/.cache/ccache
--- a/2
+++ b/2
@@ -157,7 +157,7 @@ COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 COPY x/imagegen/mlx x/imagegen/mlx
 COPY go.mod go.sum .
-COPY MLX_VERSION MLX_CORE_VERSION .
+COPY MLX_VERSION MLX_C_VERSION .
 RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
 ENV PATH=/usr/local/go/bin:$PATH
 RUN go mod download
--- a/1
+++ b/1
@@ -1 +0,0 @@
-v0.30.6
--- a/1
+++ b/1
@@ -0,0 +1 @@
+0726ca922fc902c4c61ef9c27d94132be418e945
--- a/2
+++ b/2
@@ -1 +1 @@
-v0.5.0
+38ad257088fb2193ad47e527cf6534a689f30943
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -2065,6 +2065,10 @@ func runLauncherAction(cmd *cobra.Command, action tui.TUIAction, deps launcherDe
 		if err != nil {
 			return true, fmt.Errorf("launching %s: %w", action.Integration, err)
 		}
+		// VS Code is a GUI app — exit the TUI loop after launching
+		if action.Integration == "vscode" {
+			return false, nil
+		}
 		return true, nil
 	default:
 		return false, fmt.Errorf("unknown launcher action: %d", action.Kind)
--- a/cmd/cmd_launcher_test.go
+++ b/cmd/cmd_launcher_test.go
@@ -209,6 +209,43 @@ func TestRunLauncherAction_RunModelContinuesAfterCancellation(t *testing.T) {
 	}
 }

+func TestRunLauncherAction_VSCodeExitsTUILoop(t *testing.T) {
+	setCmdTestHome(t, t.TempDir())
+
+	cmd := &cobra.Command{}
+	cmd.SetContext(context.Background())
+
+	// VS Code should exit the TUI loop (return false) after a successful launch.
+	continueLoop, err := runLauncherAction(cmd, tui.TUIAction{Kind: tui.TUIActionLaunchIntegration, Integration: "vscode"}, launcherDeps{
+		resolveRunModel: unexpectedRunModelResolution(t),
+		launchIntegration: func(ctx context.Context, req launch.IntegrationLaunchRequest) error {
+			return nil
+		},
+		runModel: unexpectedModelLaunch(t),
+	})
+	if err != nil {
+		t.Fatalf("expected nil error, got %v", err)
+	}
+	if continueLoop {
+		t.Fatal("expected vscode launch to exit the TUI loop (return false)")
+	}
+
+	// Other integrations should continue the TUI loop (return true).
+	continueLoop, err = runLauncherAction(cmd, tui.TUIAction{Kind: tui.TUIActionLaunchIntegration, Integration: "claude"}, launcherDeps{
+		resolveRunModel: unexpectedRunModelResolution(t),
+		launchIntegration: func(ctx context.Context, req launch.IntegrationLaunchRequest) error {
+			return nil
+		},
+		runModel: unexpectedModelLaunch(t),
+	})
+	if err != nil {
+		t.Fatalf("expected nil error, got %v", err)
+	}
+	if !continueLoop {
+		t.Fatal("expected non-vscode integration to continue the TUI loop (return true)")
+	}
+}
+
 func TestRunLauncherAction_IntegrationContinuesAfterCancellation(t *testing.T) {
 	setCmdTestHome(t, t.TempDir())

--- a/cmd/launch/launch.go
+++ b/cmd/launch/launch.go
@@ -179,6 +179,7 @@ Supported integrations:
  opencode  OpenCode
  openclaw  OpenClaw (aliases: clawdbot, moltbot)
  pi        Pi
+  vscode    VS Code (aliases: code)

 Examples:
  ollama launch
@@ -801,13 +802,6 @@ func cloneAliases(aliases map[string]string) map[string]string {
 	return cloned
 }

-func singleModelPrechecked(current string) []string {
-	if current == "" {
-		return nil
-	}
-	return []string{current}
-}
-
 func firstModel(models []string) string {
 	if len(models) == 0 {
 		return ""
--- a/cmd/launch/openclaw.go
+++ b/cmd/launch/openclaw.go
@@ -80,6 +80,12 @@ func (c *Openclaw) Run(model string, args []string) error {
 		}
 		if canInstallDaemon() {
 			onboardArgs = append(onboardArgs, "--install-daemon")
+		} else {
+			// When we can't install a daemon (e.g. no systemd, sudo dropped
+			// XDG_RUNTIME_DIR, or container environment), skip the gateway
+			// health check so non-interactive onboarding completes. The
+			// gateway is started as a foreground child process after onboarding.
+			onboardArgs = append(onboardArgs, "--skip-health")
 		}
 		cmd := exec.Command(bin, onboardArgs...)
 		cmd.Stdin = os.Stdin
--- a/cmd/launch/registry.go
+++ b/cmd/launch/registry.go
@@ -33,7 +33,7 @@ type IntegrationInfo struct {
 	Description string
 }

-var launcherIntegrationOrder = []string{"opencode", "droid", "pi", "cline"}
+var launcherIntegrationOrder = []string{"vscode", "opencode", "droid", "pi", "cline"}

 var integrationSpecs = []*IntegrationSpec{
 	{
@@ -131,6 +131,18 @@ var integrationSpecs = []*IntegrationSpec{
 			Command: []string{"npm", "install", "-g", "@mariozechner/pi-coding-agent"},
 		},
 	},
+	{
+		Name:        "vscode",
+		Runner:      &VSCode{},
+		Aliases:     []string{"code"},
+		Description: "Microsoft's open-source AI code editor",
+		Install: IntegrationInstallSpec{
+			CheckInstalled: func() bool {
+				return (&VSCode{}).findBinary() != ""
+			},
+			URL: "https://code.visualstudio.com",
+		},
+	},
 }

 var integrationSpecsByName map[string]*IntegrationSpec
--- a/cmd/launch/vscode.go
+++ b/cmd/launch/vscode.go
@@ -0,0 +1,660 @@
+package launch
+
+import (
+	"context"
+	"database/sql"
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strconv"
+	"strings"
+	"time"
+
+	_ "github.com/mattn/go-sqlite3"
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/cmd/internal/fileutil"
+	"github.com/ollama/ollama/envconfig"
+)
+
+// VSCode implements Runner and Editor for Visual Studio Code integration.
+type VSCode struct{}
+
+func (v *VSCode) String() string { return "Visual Studio Code" }
+
+// findBinary returns the path/command to launch VS Code, or "" if not found.
+// It checks for the "code" CLI on PATH first, then falls back to platform-specific locations.
+func (v *VSCode) findBinary() string {
+	if _, err := exec.LookPath("code"); err == nil {
+		return "code"
+	}
+	var candidates []string
+	switch runtime.GOOS {
+	case "darwin":
+		candidates = []string{
+			"/Applications/Visual Studio Code.app",
+		}
+	case "windows":
+		if localAppData := os.Getenv("LOCALAPPDATA"); localAppData != "" {
+			candidates = append(candidates, filepath.Join(localAppData, "Programs", "Microsoft VS Code", "bin", "code.cmd"))
+		}
+	default: // linux
+		candidates = []string{
+			"/usr/bin/code",
+			"/snap/bin/code",
+		}
+	}
+	for _, c := range candidates {
+		if _, err := os.Stat(c); err == nil {
+			return c
+		}
+	}
+	return ""
+}
+
+// IsRunning reports whether VS Code is currently running.
+// Each platform uses a pattern specific enough to avoid matching Cursor or
+// other VS Code forks.
+func (v *VSCode) IsRunning() bool {
+	switch runtime.GOOS {
+	case "darwin":
+		out, err := exec.Command("pgrep", "-f", "Visual Studio Code.app/Contents/MacOS/Code").Output()
+		return err == nil && len(out) > 0
+	case "windows":
+		// Match VS Code by executable path to avoid matching Cursor or other forks.
+		out, err := exec.Command("powershell", "-NoProfile", "-Command",
+			`Get-Process Code -ErrorAction SilentlyContinue | Where-Object { $_.Path -like '*Microsoft VS Code*' } | Select-Object -First 1`).Output()
+		return err == nil && len(strings.TrimSpace(string(out))) > 0
+	default:
+		// Match VS Code specifically by its install path to avoid matching
+		// Cursor (/cursor/) or other forks.
+		for _, pattern := range []string{"/usr/share/code/", "/snap/code/"} {
+			out, err := exec.Command("pgrep", "-f", pattern).Output()
+			if err == nil && len(out) > 0 {
+				return true
+			}
+		}
+		return false
+	}
+}
+
+// Quit gracefully quits VS Code and waits for it to exit so that it flushes
+// its in-memory state back to the database.
+func (v *VSCode) Quit() {
+	if !v.IsRunning() {
+		return
+	}
+	switch runtime.GOOS {
+	case "darwin":
+		_ = exec.Command("osascript", "-e", `quit app "Visual Studio Code"`).Run()
+	case "windows":
+		// Kill VS Code by executable path to avoid killing Cursor or other forks.
+		_ = exec.Command("powershell", "-NoProfile", "-Command",
+			`Get-Process Code -ErrorAction SilentlyContinue | Where-Object { $_.Path -like '*Microsoft VS Code*' } | Stop-Process -Force`).Run()
+	default:
+		for _, pattern := range []string{"/usr/share/code/", "/snap/code/"} {
+			_ = exec.Command("pkill", "-f", pattern).Run()
+		}
+	}
+	// Wait for the process to fully exit and flush its state to disk
+	// TODO(hoyyeva): update spinner to use bubble tea
+	spinnerFrames := []string{"|", "/", "-", "\\"}
+	frame := 0
+	fmt.Fprintf(os.Stderr, "\033[90mRestarting VS Code... %s\033[0m", spinnerFrames[0])
+
+	ticker := time.NewTicker(200 * time.Millisecond)
+	defer ticker.Stop()
+
+	for range 150 { // 150 ticks × 200ms = 30s timeout
+		<-ticker.C
+		frame++
+		fmt.Fprintf(os.Stderr, "\r\033[90mRestarting VS Code... %s\033[0m", spinnerFrames[frame%len(spinnerFrames)])
+
+		if frame%5 == 0 { // check every ~1s
+			if !v.IsRunning() {
+				fmt.Fprintf(os.Stderr, "\r\033[K")
+				// Give VS Code a moment to finish writing its state DB
+				time.Sleep(1 * time.Second)
+				return
+			}
+		}
+	}
+	fmt.Fprintf(os.Stderr, "\r\033[K")
+}
+
+const (
+	minCopilotChatVersion = "0.41.0"
+	minVSCodeVersion      = "1.113"
+)
+
+func (v *VSCode) Run(model string, args []string) error {
+	v.checkVSCodeVersion()
+	v.checkCopilotChatVersion()
+
+	// Get all configured models (saved by the launcher framework before Run is called)
+	models := []string{model}
+	if cfg, err := loadStoredIntegrationConfig("vscode"); err == nil && len(cfg.Models) > 0 {
+		models = cfg.Models
+	}
+
+	// VS Code discovers models from ollama ls. Cloud models that pass Show
+	// (the server knows about them) but aren't in ls need to be pulled to
+	// register them so VS Code can find them.
+	if client, err := api.ClientFromEnvironment(); err == nil {
+		v.ensureModelsRegistered(context.Background(), client, models)
+	}
+
+	// Warn if the default model doesn't support tool calling
+	if client, err := api.ClientFromEnvironment(); err == nil {
+		if resp, err := client.Show(context.Background(), &api.ShowRequest{Model: models[0]}); err == nil {
+			hasTools := false
+			for _, c := range resp.Capabilities {
+				if c == "tools" {
+					hasTools = true
+					break
+				}
+			}
+			if !hasTools {
+				fmt.Fprintf(os.Stderr, "Note: %s does not support tool calling and may not appear in the Copilot Chat model picker.\n", models[0])
+			}
+		}
+	}
+
+	v.printModelAccessTip()
+
+	if v.IsRunning() {
+		restart, err := ConfirmPrompt("Restart VS Code?")
+		if err != nil {
+			restart = false
+		}
+		if restart {
+			v.Quit()
+			if err := v.ShowInModelPicker(models); err != nil {
+				fmt.Fprintf(os.Stderr, "%s  Warning: could not update VS Code model picker: %v%s\n", ansiYellow, err, ansiReset)
+			}
+			v.FocusVSCode()
+		} else {
+			fmt.Fprintf(os.Stderr, "\nTo get the latest model configuration, restart VS Code when you're ready.\n")
+		}
+	} else {
+		if err := v.ShowInModelPicker(models); err != nil {
+			fmt.Fprintf(os.Stderr, "%s  Warning: could not update VS Code model picker: %v%s\n", ansiYellow, err, ansiReset)
+		}
+		v.FocusVSCode()
+	}
+
+	return nil
+}
+
+// ensureModelsRegistered pulls models that the server knows about (Show succeeds)
+// but aren't in ollama ls yet. This is needed for cloud models so that VS Code
+// can discover them from the Ollama API.
+func (v *VSCode) ensureModelsRegistered(ctx context.Context, client *api.Client, models []string) {
+	listed, err := client.List(ctx)
+	if err != nil {
+		return
+	}
+	registered := make(map[string]bool, len(listed.Models))
+	for _, m := range listed.Models {
+		registered[m.Name] = true
+	}
+
+	for _, model := range models {
+		if registered[model] {
+			continue
+		}
+		// Also check without :latest suffix
+		if !strings.Contains(model, ":") && registered[model+":latest"] {
+			continue
+		}
+		if err := pullModel(ctx, client, model, false); err != nil {
+			fmt.Fprintf(os.Stderr, "%s  Warning: could not register model %s: %v%s\n", ansiYellow, model, err, ansiReset)
+		}
+	}
+}
+
+// FocusVSCode brings VS Code to the foreground.
+func (v *VSCode) FocusVSCode() {
+	binary := v.findBinary()
+	if binary == "" {
+		return
+	}
+	if runtime.GOOS == "darwin" && strings.HasSuffix(binary, ".app") {
+		_ = exec.Command("open", "-a", binary).Run()
+	} else {
+		_ = exec.Command(binary).Start()
+	}
+}
+
+// printModelAccessTip shows instructions for finding Ollama models in VS Code.
+func (v *VSCode) printModelAccessTip() {
+	fmt.Fprintf(os.Stderr, "\nTip: To use Ollama models, open Copilot Chat and click the model picker.\n")
+	fmt.Fprintf(os.Stderr, "     If you don't see your models, click \"Other models\" to find them.\n\n")
+}
+
+func (v *VSCode) Paths() []string {
+	if p := v.chatLanguageModelsPath(); fileExists(p) {
+		return []string{p}
+	}
+	return nil
+}
+
+func (v *VSCode) Edit(models []string) error {
+	if len(models) == 0 {
+		return nil
+	}
+
+	// Write chatLanguageModels.json with Ollama vendor entry
+	clmPath := v.chatLanguageModelsPath()
+	if err := os.MkdirAll(filepath.Dir(clmPath), 0o755); err != nil {
+		return err
+	}
+
+	var entries []map[string]any
+	if data, err := os.ReadFile(clmPath); err == nil {
+		_ = json.Unmarshal(data, &entries)
+	}
+
+	// Remove any existing Ollama entries, preserve others
+	filtered := make([]map[string]any, 0, len(entries))
+	for _, entry := range entries {
+		if vendor, _ := entry["vendor"].(string); vendor != "ollama" {
+			filtered = append(filtered, entry)
+		}
+	}
+
+	// Add new Ollama entry
+	filtered = append(filtered, map[string]any{
+		"vendor": "ollama",
+		"name":   "Ollama",
+		"url":    envconfig.Host().String(),
+	})
+
+	data, err := json.MarshalIndent(filtered, "", "  ")
+	if err != nil {
+		return err
+	}
+	if err := fileutil.WriteWithBackup(clmPath, data); err != nil {
+		return err
+	}
+
+	// Clean up legacy settings from older Ollama integrations
+	v.updateSettings()
+
+	return nil
+}
+
+func (v *VSCode) Models() []string {
+	if !v.hasOllamaVendor() {
+		return nil
+	}
+	if cfg, err := loadStoredIntegrationConfig("vscode"); err == nil {
+		return cfg.Models
+	}
+	return nil
+}
+
+// hasOllamaVendor checks if chatLanguageModels.json contains an Ollama vendor entry.
+func (v *VSCode) hasOllamaVendor() bool {
+	data, err := os.ReadFile(v.chatLanguageModelsPath())
+	if err != nil {
+		return false
+	}
+
+	var entries []map[string]any
+	if err := json.Unmarshal(data, &entries); err != nil {
+		return false
+	}
+
+	for _, entry := range entries {
+		if vendor, _ := entry["vendor"].(string); vendor == "ollama" {
+			return true
+		}
+	}
+	return false
+}
+
+func (v *VSCode) chatLanguageModelsPath() string {
+	return v.vscodePath("chatLanguageModels.json")
+}
+
+func (v *VSCode) settingsPath() string {
+	return v.vscodePath("settings.json")
+}
+
+// updateSettings cleans up legacy settings from older Ollama integrations.
+func (v *VSCode) updateSettings() {
+	settingsPath := v.settingsPath()
+	data, err := os.ReadFile(settingsPath)
+	if err != nil {
+		return
+	}
+
+	var settings map[string]any
+	if err := json.Unmarshal(data, &settings); err != nil {
+		return
+	}
+
+	changed := false
+	for _, key := range []string{"github.copilot.chat.byok.ollamaEndpoint", "ollama.launch.configured"} {
+		if _, ok := settings[key]; ok {
+			delete(settings, key)
+			changed = true
+		}
+	}
+
+	if !changed {
+		return
+	}
+
+	updated, err := json.MarshalIndent(settings, "", "  ")
+	if err != nil {
+		return
+	}
+	_ = fileutil.WriteWithBackup(settingsPath, updated)
+}
+
+func (v *VSCode) statePath() string {
+	return v.vscodePath("globalStorage", "state.vscdb")
+}
+
+// ShowInModelPicker ensures the given models are visible in VS Code's Copilot
+// Chat model picker and sets the primary model as the active selection. It sets
+// the configured models to true in the picker preferences so they appear in the
+// dropdown, and writes the first model as the selected model for both the panel
+// and editor chat views. Models use the VS Code identifier format
+// "ollama/Ollama/<name>".
+func (v *VSCode) ShowInModelPicker(models []string) error {
+	if len(models) == 0 {
+		return nil
+	}
+
+	dbPath := v.statePath()
+	needsCreate := !fileExists(dbPath)
+	if needsCreate {
+		if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil {
+			return fmt.Errorf("creating state directory: %w", err)
+		}
+	}
+
+	db, err := sql.Open("sqlite3", dbPath+"?_busy_timeout=5000")
+	if err != nil {
+		return fmt.Errorf("opening state database: %w", err)
+	}
+	defer db.Close()
+
+	// Create the table if this is a fresh DB. Schema must match what VS Code creates.
+	if needsCreate {
+		if _, err := db.Exec("CREATE TABLE ItemTable (key TEXT UNIQUE ON CONFLICT REPLACE, value BLOB)"); err != nil {
+			return fmt.Errorf("initializing state database: %w", err)
+		}
+	}
+
+	// Read existing preferences
+	prefs := make(map[string]bool)
+	var prefsJSON string
+	if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = 'chatModelPickerPreferences'").Scan(&prefsJSON); err == nil {
+		_ = json.Unmarshal([]byte(prefsJSON), &prefs)
+	}
+
+	// Build name→ID map from VS Code's cached model list.
+	// VS Code uses numeric IDs like "ollama/Ollama/4", not "ollama/Ollama/kimi-k2.5:cloud".
+	nameToID := make(map[string]string)
+	var cached []map[string]any
+	var cacheJSON string
+	if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = 'chat.cachedLanguageModels.v2'").Scan(&cacheJSON); err == nil {
+		_ = json.Unmarshal([]byte(cacheJSON), &cached)
+	}
+	cachedNames := make(map[string]bool)
+	for _, entry := range cached {
+		meta, _ := entry["metadata"].(map[string]any)
+		if meta == nil {
+			continue
+		}
+		if vendor, _ := meta["vendor"].(string); vendor == "ollama" {
+			name, _ := meta["name"].(string)
+			id, _ := entry["identifier"].(string)
+			if name != "" && id != "" {
+				nameToID[name] = id
+			}
+			if name != "" {
+				cachedNames[name] = true
+			}
+		}
+	}
+
+	// Ollama config is authoritative: always show configured models,
+	// hide Ollama models that are no longer in the config.
+	configuredIDs := make(map[string]bool)
+	for _, m := range models {
+		for _, id := range v.modelVSCodeIDs(m, nameToID) {
+			prefs[id] = true
+			configuredIDs[id] = true
+		}
+	}
+	for id := range prefs {
+		if strings.HasPrefix(id, "ollama/") && !configuredIDs[id] {
+			prefs[id] = false
+		}
+	}
+
+	data, _ := json.Marshal(prefs)
+	if _, err = db.Exec("INSERT OR REPLACE INTO ItemTable (key, value) VALUES ('chatModelPickerPreferences', ?)", string(data)); err != nil {
+		return err
+	}
+
+	// Set the primary model as the active selection in Copilot Chat so it
+	// doesn't default to "auto" or whatever the user last picked manually.
+	primaryID := v.modelVSCodeIDs(models[0], nameToID)[0]
+	for _, key := range []string{"chat.currentLanguageModel.panel", "chat.currentLanguageModel.editor"} {
+		if _, err := db.Exec("INSERT OR REPLACE INTO ItemTable (key, value) VALUES (?, ?)", key, primaryID); err != nil {
+			return err
+		}
+		if _, err := db.Exec("INSERT OR REPLACE INTO ItemTable (key, value) VALUES (?, ?)", key+".isDefault", "false"); err != nil {
+			return err
+		}
+	}
+
+	// Ensure configured models exist in the cached model list so VS Code can
+	// restore the selection immediately on startup, before extensions load.
+	// Without this, a model that was never previously used won't be in the
+	// cache, and VS Code falls back to "auto" until the Ollama BYOK provider
+	// discovers it via the API (which is slow).
+	cacheChanged := false
+	for _, m := range models {
+		if cachedNames[m] {
+			continue
+		}
+		if !strings.Contains(m, ":") && cachedNames[m+":latest"] {
+			continue
+		}
+		cacheID := m
+		if !strings.Contains(m, ":") {
+			cacheID = m + ":latest"
+		}
+		cached = append(cached, map[string]any{
+			"identifier": "ollama/Ollama/" + cacheID,
+			"metadata": map[string]any{
+				"extension":            map[string]any{"value": "github.copilot-chat"},
+				"name":                 m,
+				"id":                   m,
+				"vendor":               "ollama",
+				"version":              "1.0.0",
+				"family":               m,
+				"detail":               "Ollama",
+				"maxInputTokens":       4096,
+				"maxOutputTokens":      4096,
+				"isDefaultForLocation": map[string]any{},
+				"isUserSelectable":     true,
+				"capabilities":         map[string]any{"toolCalling": true},
+			},
+		})
+		cacheChanged = true
+	}
+	if cacheChanged {
+		cacheData, _ := json.Marshal(cached)
+		if _, err := db.Exec("INSERT OR REPLACE INTO ItemTable (key, value) VALUES ('chat.cachedLanguageModels.v2', ?)", string(cacheData)); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// modelVSCodeIDs returns all possible VS Code picker IDs for a model name.
+// The primary (first) ID should match the live identifier that VS Code assigns
+// at runtime via toModelIdentifier(vendor, group, m.id), where m.id comes from
+// /api/tags and always includes the tag (e.g. "llama3.2:latest").
+func (v *VSCode) modelVSCodeIDs(model string, nameToID map[string]string) []string {
+	var ids []string
+	if id, ok := nameToID[model]; ok {
+		ids = append(ids, id)
+	} else if !strings.Contains(model, ":") {
+		if id, ok := nameToID[model+":latest"]; ok {
+			ids = append(ids, id)
+		}
+	}
+	// For untagged models, the live identifier includes :latest
+	// (e.g. ollama/Ollama/llama3.2:latest), so prefer that format
+	// to avoid a mismatch that causes VS Code to reset to "auto".
+	if !strings.Contains(model, ":") {
+		ids = append(ids, "ollama/Ollama/"+model+":latest")
+	}
+	ids = append(ids, "ollama/Ollama/"+model)
+	return ids
+}
+
+func (v *VSCode) vscodePath(parts ...string) string {
+	home, _ := os.UserHomeDir()
+	var base string
+	switch runtime.GOOS {
+	case "darwin":
+		base = filepath.Join(home, "Library", "Application Support", "Code", "User")
+	case "windows":
+		base = filepath.Join(os.Getenv("APPDATA"), "Code", "User")
+	default:
+		base = filepath.Join(home, ".config", "Code", "User")
+	}
+	return filepath.Join(append([]string{base}, parts...)...)
+}
+
+// checkVSCodeVersion warns if VS Code is older than minVSCodeVersion.
+func (v *VSCode) checkVSCodeVersion() {
+	codeCLI := v.findCodeCLI()
+	if codeCLI == "" {
+		return
+	}
+
+	out, err := exec.Command(codeCLI, "--version").Output()
+	if err != nil {
+		return
+	}
+
+	// "code --version" outputs: version\ncommit\narch
+	lines := strings.Split(strings.TrimSpace(string(out)), "\n")
+	if len(lines) == 0 || lines[0] == "" {
+		return
+	}
+	version := strings.TrimSpace(lines[0])
+
+	if compareVersions(version, minVSCodeVersion) < 0 {
+		fmt.Fprintf(os.Stderr, "\n%sWarning: VS Code version (%s) is older than the recommended version (%s)%s\n", ansiYellow, version, minVSCodeVersion, ansiReset)
+		fmt.Fprintf(os.Stderr, "Please update VS Code to the latest version.\n\n")
+	}
+}
+
+// checkCopilotChatVersion warns if the GitHub Copilot Chat extension is
+// missing or older than minCopilotChatVersion.
+func (v *VSCode) checkCopilotChatVersion() {
+	codeCLI := v.findCodeCLI()
+	if codeCLI == "" {
+		return
+	}
+
+	out, err := exec.Command(codeCLI, "--list-extensions", "--show-versions").Output()
+	if err != nil {
+		return
+	}
+
+	installed, version := parseCopilotChatVersion(string(out))
+	if !installed {
+		fmt.Fprintf(os.Stderr, "\n%sWarning: GitHub Copilot Chat extension is not installed%s\n", ansiYellow, ansiReset)
+		fmt.Fprintf(os.Stderr, "Install it in VS Code: Extensions → search \"GitHub Copilot Chat\" → Install\n\n")
+		return
+	}
+	if compareVersions(version, minCopilotChatVersion) < 0 {
+		fmt.Fprintf(os.Stderr, "\n%sWarning: GitHub Copilot Chat extension version (%s) is older than the recommended version (%s)%s\n", ansiYellow, version, minCopilotChatVersion, ansiReset)
+		fmt.Fprintf(os.Stderr, "Please update it in VS Code: Extensions → search \"GitHub Copilot Chat\" → Update\n\n")
+	}
+}
+
+// findCodeCLI returns the path to the VS Code CLI for querying extensions.
+// On macOS, findBinary may return an .app bundle which can't run --list-extensions,
+// so this resolves to the actual CLI binary inside the bundle.
+func (v *VSCode) findCodeCLI() string {
+	binary := v.findBinary()
+	if binary == "" {
+		return ""
+	}
+	if runtime.GOOS == "darwin" && strings.HasSuffix(binary, ".app") {
+		bundleCLI := binary + "/Contents/Resources/app/bin/code"
+		if _, err := os.Stat(bundleCLI); err == nil {
+			return bundleCLI
+		}
+		return ""
+	}
+	return binary
+}
+
+// parseCopilotChatVersion extracts the version of the GitHub Copilot Chat
+// extension from "code --list-extensions --show-versions" output.
+func parseCopilotChatVersion(output string) (installed bool, version string) {
+	for _, line := range strings.Split(output, "\n") {
+		// Format: github.copilot-chat@0.40.1
+		if !strings.HasPrefix(strings.ToLower(line), "github.copilot-chat@") {
+			continue
+		}
+		parts := strings.SplitN(line, "@", 2)
+		if len(parts) != 2 {
+			continue
+		}
+		return true, strings.TrimSpace(parts[1])
+	}
+	return false, ""
+}
+
+// compareVersions compares two dot-separated version strings.
+// Returns -1 if a < b, 0 if a == b, 1 if a > b.
+func compareVersions(a, b string) int {
+	aParts := strings.Split(a, ".")
+	bParts := strings.Split(b, ".")
+
+	maxLen := len(aParts)
+	if len(bParts) > maxLen {
+		maxLen = len(bParts)
+	}
+
+	for i := range maxLen {
+		var aNum, bNum int
+		if i < len(aParts) {
+			aNum, _ = strconv.Atoi(aParts[i])
+		}
+		if i < len(bParts) {
+			bNum, _ = strconv.Atoi(bParts[i])
+		}
+		if aNum < bNum {
+			return -1
+		}
+		if aNum > bNum {
+			return 1
+		}
+	}
+	return 0
+}
+
+func fileExists(path string) bool {
+	_, err := os.Stat(path)
+	return err == nil
+}
--- a/cmd/launch/vscode_test.go
+++ b/cmd/launch/vscode_test.go
@@ -0,0 +1,656 @@
+package launch
+
+import (
+	"database/sql"
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"runtime"
+	"testing"
+
+	_ "github.com/mattn/go-sqlite3"
+)
+
+func TestVSCodeIntegration(t *testing.T) {
+	v := &VSCode{}
+
+	t.Run("String", func(t *testing.T) {
+		if got := v.String(); got != "Visual Studio Code" {
+			t.Errorf("String() = %q, want %q", got, "Visual Studio Code")
+		}
+	})
+
+	t.Run("implements Runner", func(t *testing.T) {
+		var _ Runner = v
+	})
+
+	t.Run("implements Editor", func(t *testing.T) {
+		var _ Editor = v
+	})
+}
+
+func TestVSCodeEdit(t *testing.T) {
+	v := &VSCode{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+	t.Setenv("XDG_CONFIG_HOME", "")
+	clmPath := testVSCodePath(t, tmpDir, "chatLanguageModels.json")
+
+	tests := []struct {
+		name     string
+		setup    string // initial chatLanguageModels.json content, empty means no file
+		models   []string
+		validate func(t *testing.T, data []byte)
+	}{
+		{
+			name:   "fresh install",
+			models: []string{"llama3.2"},
+			validate: func(t *testing.T, data []byte) {
+				assertOllamaVendorConfigured(t, data)
+			},
+		},
+		{
+			name:   "preserve other vendor entries",
+			setup:  `[{"vendor": "azure", "name": "Azure", "url": "https://example.com"}]`,
+			models: []string{"llama3.2"},
+			validate: func(t *testing.T, data []byte) {
+				var entries []map[string]any
+				json.Unmarshal(data, &entries)
+				if len(entries) != 2 {
+					t.Errorf("expected 2 entries, got %d", len(entries))
+				}
+				// Check Azure entry preserved
+				found := false
+				for _, e := range entries {
+					if v, _ := e["vendor"].(string); v == "azure" {
+						found = true
+					}
+				}
+				if !found {
+					t.Error("azure vendor entry was not preserved")
+				}
+				assertOllamaVendorConfigured(t, data)
+			},
+		},
+		{
+			name:   "update existing ollama entry",
+			setup:  `[{"vendor": "ollama", "name": "Ollama", "url": "http://old:11434"}]`,
+			models: []string{"llama3.2"},
+			validate: func(t *testing.T, data []byte) {
+				assertOllamaVendorConfigured(t, data)
+			},
+		},
+		{
+			name:   "empty models is no-op",
+			setup:  `[{"vendor": "azure", "name": "Azure"}]`,
+			models: []string{},
+			validate: func(t *testing.T, data []byte) {
+				if string(data) != `[{"vendor": "azure", "name": "Azure"}]` {
+					t.Error("empty models should not modify file")
+				}
+			},
+		},
+		{
+			name:   "corrupted JSON treated as empty",
+			setup:  `{corrupted json`,
+			models: []string{"llama3.2"},
+			validate: func(t *testing.T, data []byte) {
+				var entries []map[string]any
+				if err := json.Unmarshal(data, &entries); err != nil {
+					t.Errorf("result is not valid JSON: %v", err)
+				}
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			os.RemoveAll(filepath.Dir(clmPath))
+
+			if tt.setup != "" {
+				os.MkdirAll(filepath.Dir(clmPath), 0o755)
+				os.WriteFile(clmPath, []byte(tt.setup), 0o644)
+			}
+
+			if err := v.Edit(tt.models); err != nil {
+				t.Fatal(err)
+			}
+
+			data, _ := os.ReadFile(clmPath)
+			tt.validate(t, data)
+		})
+	}
+}
+
+func TestVSCodeEditCleansUpOldSettings(t *testing.T) {
+	v := &VSCode{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+	t.Setenv("XDG_CONFIG_HOME", "")
+	settingsPath := testVSCodePath(t, tmpDir, "settings.json")
+
+	// Create settings.json with old byok setting
+	os.MkdirAll(filepath.Dir(settingsPath), 0o755)
+	os.WriteFile(settingsPath, []byte(`{"github.copilot.chat.byok.ollamaEndpoint": "http://old:11434", "ollama.launch.configured": true, "editor.fontSize": 14}`), 0o644)
+
+	if err := v.Edit([]string{"llama3.2"}); err != nil {
+		t.Fatal(err)
+	}
+
+	// Verify old settings were removed
+	data, err := os.ReadFile(settingsPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	var settings map[string]any
+	json.Unmarshal(data, &settings)
+	if _, ok := settings["github.copilot.chat.byok.ollamaEndpoint"]; ok {
+		t.Error("github.copilot.chat.byok.ollamaEndpoint should have been removed")
+	}
+	if _, ok := settings["ollama.launch.configured"]; ok {
+		t.Error("ollama.launch.configured should have been removed")
+	}
+	if settings["editor.fontSize"] != float64(14) {
+		t.Error("editor.fontSize should have been preserved")
+	}
+}
+
+func TestVSCodePaths(t *testing.T) {
+	v := &VSCode{}
+	tmpDir := t.TempDir()
+	setTestHome(t, tmpDir)
+	t.Setenv("XDG_CONFIG_HOME", "")
+	clmPath := testVSCodePath(t, tmpDir, "chatLanguageModels.json")
+
+	t.Run("no file returns nil", func(t *testing.T) {
+		os.Remove(clmPath)
+		if paths := v.Paths(); paths != nil {
+			t.Errorf("expected nil, got %v", paths)
+		}
+	})
+
+	t.Run("existing file returns path", func(t *testing.T) {
+		os.MkdirAll(filepath.Dir(clmPath), 0o755)
+		os.WriteFile(clmPath, []byte(`[]`), 0o644)
+
+		if paths := v.Paths(); len(paths) != 1 {
+			t.Errorf("expected 1 path, got %d", len(paths))
+		}
+	})
+}
+
+// testVSCodePath returns the expected VS Code config path for the given file in tests.
+func testVSCodePath(t *testing.T, tmpDir, filename string) string {
+	t.Helper()
+	switch runtime.GOOS {
+	case "darwin":
+		return filepath.Join(tmpDir, "Library", "Application Support", "Code", "User", filename)
+	case "windows":
+		t.Setenv("APPDATA", tmpDir)
+		return filepath.Join(tmpDir, "Code", "User", filename)
+	default:
+		return filepath.Join(tmpDir, ".config", "Code", "User", filename)
+	}
+}
+
+func assertOllamaVendorConfigured(t *testing.T, data []byte) {
+	t.Helper()
+	var entries []map[string]any
+	if err := json.Unmarshal(data, &entries); err != nil {
+		t.Fatalf("invalid JSON: %v", err)
+	}
+
+	for _, entry := range entries {
+		if vendor, _ := entry["vendor"].(string); vendor == "ollama" {
+			if name, _ := entry["name"].(string); name != "Ollama" {
+				t.Errorf("expected name \"Ollama\", got %q", name)
+			}
+			if url, _ := entry["url"].(string); url == "" {
+				t.Error("url not set")
+			}
+			return
+		}
+	}
+	t.Error("no ollama vendor entry found")
+}
+
+func TestShowInModelPicker(t *testing.T) {
+	v := &VSCode{}
+
+	// helper to create a state DB with optional seed data
+	setupDB := func(t *testing.T, tmpDir string, seedPrefs map[string]bool, seedCache []map[string]any) string {
+		t.Helper()
+		dbDir := filepath.Join(tmpDir, "globalStorage")
+		os.MkdirAll(dbDir, 0o755)
+		dbPath := filepath.Join(dbDir, "state.vscdb")
+
+		db, err := sql.Open("sqlite3", dbPath)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer db.Close()
+
+		if _, err := db.Exec("CREATE TABLE ItemTable (key TEXT UNIQUE ON CONFLICT REPLACE, value BLOB)"); err != nil {
+			t.Fatal(err)
+		}
+		if seedPrefs != nil {
+			data, _ := json.Marshal(seedPrefs)
+			db.Exec("INSERT INTO ItemTable (key, value) VALUES ('chatModelPickerPreferences', ?)", string(data))
+		}
+		if seedCache != nil {
+			data, _ := json.Marshal(seedCache)
+			db.Exec("INSERT INTO ItemTable (key, value) VALUES ('chat.cachedLanguageModels.v2', ?)", string(data))
+		}
+		return dbPath
+	}
+
+	// helper to read prefs back from DB
+	readPrefs := func(t *testing.T, dbPath string) map[string]bool {
+		t.Helper()
+		db, err := sql.Open("sqlite3", dbPath)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer db.Close()
+
+		var raw string
+		if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = 'chatModelPickerPreferences'").Scan(&raw); err != nil {
+			t.Fatal(err)
+		}
+		prefs := make(map[string]bool)
+		json.Unmarshal([]byte(raw), &prefs)
+		return prefs
+	}
+
+	t.Run("fresh DB creates table and shows models", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("XDG_CONFIG_HOME", "")
+		if runtime.GOOS == "windows" {
+			t.Setenv("APPDATA", tmpDir)
+		}
+
+		err := v.ShowInModelPicker([]string{"llama3.2"})
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		dbPath := testVSCodePath(t, tmpDir, filepath.Join("globalStorage", "state.vscdb"))
+		prefs := readPrefs(t, dbPath)
+		if !prefs["ollama/Ollama/llama3.2"] {
+			t.Error("expected llama3.2 to be shown")
+		}
+		if !prefs["ollama/Ollama/llama3.2:latest"] {
+			t.Error("expected llama3.2:latest to be shown")
+		}
+	})
+
+	t.Run("configured models are shown", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("XDG_CONFIG_HOME", "")
+		dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, nil)
+
+		err := v.ShowInModelPicker([]string{"llama3.2", "qwen3:8b"})
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		prefs := readPrefs(t, dbPath)
+		if !prefs["ollama/Ollama/llama3.2"] {
+			t.Error("expected llama3.2 to be shown")
+		}
+		if !prefs["ollama/Ollama/qwen3:8b"] {
+			t.Error("expected qwen3:8b to be shown")
+		}
+	})
+
+	t.Run("removed models are hidden", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("XDG_CONFIG_HOME", "")
+		dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), map[string]bool{
+			"ollama/Ollama/llama3.2":        true,
+			"ollama/Ollama/llama3.2:latest": true,
+			"ollama/Ollama/mistral":         true,
+			"ollama/Ollama/mistral:latest":  true,
+		}, nil)
+
+		// Only configure llama3.2 — mistral should get hidden
+		err := v.ShowInModelPicker([]string{"llama3.2"})
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		prefs := readPrefs(t, dbPath)
+		if !prefs["ollama/Ollama/llama3.2"] {
+			t.Error("expected llama3.2 to stay shown")
+		}
+		if prefs["ollama/Ollama/mistral"] {
+			t.Error("expected mistral to be hidden")
+		}
+		if prefs["ollama/Ollama/mistral:latest"] {
+			t.Error("expected mistral:latest to be hidden")
+		}
+	})
+
+	t.Run("non-ollama prefs are preserved", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("XDG_CONFIG_HOME", "")
+		dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), map[string]bool{
+			"copilot/gpt-4o": true,
+		}, nil)
+
+		err := v.ShowInModelPicker([]string{"llama3.2"})
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		prefs := readPrefs(t, dbPath)
+		if !prefs["copilot/gpt-4o"] {
+			t.Error("expected copilot/gpt-4o to stay shown")
+		}
+	})
+
+	t.Run("uses cached numeric IDs when available", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("XDG_CONFIG_HOME", "")
+		cache := []map[string]any{
+			{
+				"identifier": "ollama/Ollama/4",
+				"metadata":   map[string]any{"vendor": "ollama", "name": "llama3.2"},
+			},
+		}
+		dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, cache)
+
+		err := v.ShowInModelPicker([]string{"llama3.2"})
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		prefs := readPrefs(t, dbPath)
+		if !prefs["ollama/Ollama/4"] {
+			t.Error("expected numeric ID ollama/Ollama/4 to be shown")
+		}
+		// Name-based fallback should also be set
+		if !prefs["ollama/Ollama/llama3.2"] {
+			t.Error("expected name-based ID to also be shown")
+		}
+	})
+
+	t.Run("empty models is no-op", func(t *testing.T) {
+		err := v.ShowInModelPicker([]string{})
+		if err != nil {
+			t.Fatal(err)
+		}
+	})
+
+	// helper to read a string value from the state DB
+	readValue := func(t *testing.T, dbPath, key string) string {
+		t.Helper()
+		db, err := sql.Open("sqlite3", dbPath)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer db.Close()
+		var val string
+		if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = ?", key).Scan(&val); err != nil {
+			return ""
+		}
+		return val
+	}
+
+	t.Run("sets primary model as active selection", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("XDG_CONFIG_HOME", "")
+		setupDB(t, testVSCodePath(t, tmpDir, ""), nil, nil)
+
+		err := v.ShowInModelPicker([]string{"llama3.2", "qwen3:8b"})
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		dbPath := testVSCodePath(t, tmpDir, filepath.Join("globalStorage", "state.vscdb"))
+		panelModel := readValue(t, dbPath, "chat.currentLanguageModel.panel")
+		if panelModel != "ollama/Ollama/llama3.2:latest" {
+			t.Errorf("expected panel model ollama/Ollama/llama3.2:latest, got %q", panelModel)
+		}
+		editorModel := readValue(t, dbPath, "chat.currentLanguageModel.editor")
+		if editorModel != "ollama/Ollama/llama3.2:latest" {
+			t.Errorf("expected editor model ollama/Ollama/llama3.2:latest, got %q", editorModel)
+		}
+		panelDefault := readValue(t, dbPath, "chat.currentLanguageModel.panel.isDefault")
+		if panelDefault != "false" {
+			t.Errorf("expected panel isDefault false, got %q", panelDefault)
+		}
+	})
+
+	t.Run("sets cached numeric ID as active selection", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("XDG_CONFIG_HOME", "")
+		cache := []map[string]any{
+			{
+				"identifier": "ollama/Ollama/4",
+				"metadata":   map[string]any{"vendor": "ollama", "name": "llama3.2"},
+			},
+		}
+		setupDB(t, testVSCodePath(t, tmpDir, ""), nil, cache)
+
+		err := v.ShowInModelPicker([]string{"llama3.2"})
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		dbPath := testVSCodePath(t, tmpDir, filepath.Join("globalStorage", "state.vscdb"))
+		panelModel := readValue(t, dbPath, "chat.currentLanguageModel.panel")
+		if panelModel != "ollama/Ollama/4" {
+			t.Errorf("expected panel model to use cached numeric ID ollama/Ollama/4, got %q", panelModel)
+		}
+	})
+
+	t.Run("previously hidden model is re-shown when configured", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("XDG_CONFIG_HOME", "")
+		dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), map[string]bool{
+			"ollama/Ollama/llama3.2":        false,
+			"ollama/Ollama/llama3.2:latest": false,
+		}, nil)
+
+		// Ollama config is authoritative — should override the hidden state
+		err := v.ShowInModelPicker([]string{"llama3.2"})
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		prefs := readPrefs(t, dbPath)
+		if !prefs["ollama/Ollama/llama3.2"] {
+			t.Error("expected llama3.2 to be re-shown")
+		}
+	})
+
+	// helper to read and parse the cached models from the state DB
+	readCache := func(t *testing.T, dbPath string) []map[string]any {
+		t.Helper()
+		db, err := sql.Open("sqlite3", dbPath)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer db.Close()
+		var raw string
+		if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = 'chat.cachedLanguageModels.v2'").Scan(&raw); err != nil {
+			return nil
+		}
+		var result []map[string]any
+		_ = json.Unmarshal([]byte(raw), &result)
+		return result
+	}
+
+	t.Run("adds uncached model to cache for instant startup display", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("XDG_CONFIG_HOME", "")
+		// No seed cache — model has never been used in VS Code before
+		dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, nil)
+
+		err := v.ShowInModelPicker([]string{"qwen3:8b"})
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		cache := readCache(t, dbPath)
+		if len(cache) != 1 {
+			t.Fatalf("expected 1 cached entry, got %d", len(cache))
+		}
+		entry := cache[0]
+		if id, _ := entry["identifier"].(string); id != "ollama/Ollama/qwen3:8b" {
+			t.Errorf("expected identifier ollama/Ollama/qwen3:8b, got %q", id)
+		}
+		meta, _ := entry["metadata"].(map[string]any)
+		if meta == nil {
+			t.Fatal("expected metadata in cache entry")
+		}
+		if v, _ := meta["vendor"].(string); v != "ollama" {
+			t.Errorf("expected vendor ollama, got %q", v)
+		}
+		if sel, ok := meta["isUserSelectable"].(bool); !ok || !sel {
+			t.Error("expected isUserSelectable to be true")
+		}
+	})
+
+	t.Run("does not duplicate already-cached model", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("XDG_CONFIG_HOME", "")
+		cache := []map[string]any{
+			{
+				"identifier": "ollama/Ollama/4",
+				"metadata":   map[string]any{"vendor": "ollama", "name": "llama3.2"},
+			},
+			{
+				"identifier": "copilot/copilot/auto",
+				"metadata":   map[string]any{"vendor": "copilot", "name": "Auto"},
+			},
+		}
+		dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, cache)
+
+		err := v.ShowInModelPicker([]string{"llama3.2"})
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		// Cache should still have exactly 2 entries (no duplicate added)
+		result := readCache(t, dbPath)
+		if len(result) != 2 {
+			t.Errorf("expected 2 cached entries (no duplicate), got %d", len(result))
+		}
+	})
+
+	t.Run("adds only missing models to existing cache", func(t *testing.T) {
+		tmpDir := t.TempDir()
+		setTestHome(t, tmpDir)
+		t.Setenv("XDG_CONFIG_HOME", "")
+		cache := []map[string]any{
+			{
+				"identifier": "ollama/Ollama/4",
+				"metadata":   map[string]any{"vendor": "ollama", "name": "llama3.2"},
+			},
+		}
+		dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, cache)
+
+		// llama3.2 is cached, qwen3:8b is not
+		err := v.ShowInModelPicker([]string{"llama3.2", "qwen3:8b"})
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		result := readCache(t, dbPath)
+		if len(result) != 2 {
+			t.Fatalf("expected 2 cached entries, got %d", len(result))
+		}
+		// Second entry should be the newly added qwen3:8b
+		if id, _ := result[1]["identifier"].(string); id != "ollama/Ollama/qwen3:8b" {
+			t.Errorf("expected new entry ollama/Ollama/qwen3:8b, got %q", id)
+		}
+	})
+}
+
+func TestParseCopilotChatVersion(t *testing.T) {
+	tests := []struct {
+		name          string
+		output        string
+		wantInstalled bool
+		wantVersion   string
+	}{
+		{
+			name:          "found among other extensions",
+			output:        "ms-python.python@2024.1.1\ngithub.copilot-chat@0.40.1\ngithub.copilot@1.200.0\n",
+			wantInstalled: true,
+			wantVersion:   "0.40.1",
+		},
+		{
+			name:          "only extension",
+			output:        "GitHub.copilot-chat@0.41.0\n",
+			wantInstalled: true,
+			wantVersion:   "0.41.0",
+		},
+		{
+			name:          "not installed",
+			output:        "ms-python.python@2024.1.1\ngithub.copilot@1.200.0\n",
+			wantInstalled: false,
+		},
+		{
+			name:          "empty output",
+			output:        "",
+			wantInstalled: false,
+		},
+		{
+			name:          "case insensitive match",
+			output:        "GitHub.Copilot-Chat@0.39.0\n",
+			wantInstalled: true,
+			wantVersion:   "0.39.0",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			installed, version := parseCopilotChatVersion(tt.output)
+			if installed != tt.wantInstalled {
+				t.Errorf("installed = %v, want %v", installed, tt.wantInstalled)
+			}
+			if installed && version != tt.wantVersion {
+				t.Errorf("version = %q, want %q", version, tt.wantVersion)
+			}
+		})
+	}
+}
+
+func TestCompareVersions(t *testing.T) {
+	tests := []struct {
+		a, b string
+		want int
+	}{
+		{"0.40.1", "0.40.1", 0},
+		{"0.40.2", "0.40.1", 1},
+		{"0.40.0", "0.40.1", -1},
+		{"0.41.0", "0.40.1", 1},
+		{"0.39.9", "0.40.1", -1},
+		{"1.0.0", "0.40.1", 1},
+		{"0.40", "0.40.1", -1},
+		{"0.40.1.1", "0.40.1", 1},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.a+"_vs_"+tt.b, func(t *testing.T) {
+			got := compareVersions(tt.a, tt.b)
+			if got != tt.want {
+				t.Errorf("compareVersions(%q, %q) = %d, want %d", tt.a, tt.b, got, tt.want)
+			}
+		})
+	}
+}
--- a/cmd/tui/selector.go
+++ b/cmd/tui/selector.go
@@ -242,6 +242,10 @@ func (m selectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 			m.cancelled = true
 			return m, tea.Quit

+		case tea.KeyLeft:
+			m.cancelled = true
+			return m, tea.Quit
+
 		case tea.KeyEnter:
 			filtered := m.filteredItems()
 			if len(filtered) > 0 && m.cursor < len(filtered) {
@@ -354,7 +358,7 @@ func (m selectorModel) renderContent() string {
 	}

 	s.WriteString("\n")
-	help := "↑/↓ navigate • enter select • esc cancel"
+	help := "↑/↓ navigate • enter select • ← back"
 	if m.helpText != "" {
 		help = m.helpText
 	}
@@ -608,6 +612,10 @@ func (m multiSelectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 			m.cancelled = true
 			return m, tea.Quit

+		case tea.KeyLeft:
+			m.cancelled = true
+			return m, tea.Quit
+
 		case tea.KeyTab:
 			m.multi = !m.multi

@@ -810,7 +818,7 @@ func (m multiSelectorModel) View() string {
 	s.WriteString("\n")

 	if !m.multi {
-		s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • enter select • tab add multiple • esc cancel"))
+		s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • enter select • tab add multiple • ← back"))
 	} else {
 		count := m.selectedCount()
 		if count == 0 {
@@ -819,7 +827,7 @@ func (m multiSelectorModel) View() string {
 			s.WriteString(selectorDescStyle.Render(fmt.Sprintf("  %d selected - press enter to continue", count)))
 		}
 		s.WriteString("\n\n")
-		s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • space toggle • tab select single • enter confirm • esc cancel"))
+		s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • space toggle • tab select single • enter confirm • ← back"))
 	}

 	result := s.String()
--- a/cmd/tui/selector_test.go
+++ b/cmd/tui/selector_test.go
@@ -782,6 +782,9 @@ func TestMulti_MultiModeHelpText(t *testing.T) {
 	if !strings.Contains(content, "tab select single") {
 		t.Error("multi mode should show 'tab select single' in help")
 	}
+	if !strings.Contains(content, "← back") {
+		t.Error("multi mode should show '← back' in help")
+	}
 }

 // --- preChecked initialization order ---
@@ -868,6 +871,46 @@ func TestMulti_UncheckingTopDefaultFallsBackToNearestCheckedBelow(t *testing.T)
 	}
 }

+// --- Left arrow back navigation ---
+
+func TestSelectorLeftArrowCancelsWhenNoFilter(t *testing.T) {
+	m := selectorModelWithCurrent("Pick:", items("a", "b", "c"), "")
+	updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
+	got := updated.(selectorModel)
+	if !got.cancelled {
+		t.Error("left arrow with empty filter should cancel (go back)")
+	}
+}
+
+func TestSelectorLeftArrowCancelsWhenFiltering(t *testing.T) {
+	m := selectorModelWithCurrent("Pick:", items("a", "b", "c"), "")
+	m.filter = "a"
+	updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
+	got := updated.(selectorModel)
+	if !got.cancelled {
+		t.Error("left arrow with active filter should still cancel (go back)")
+	}
+}
+
+func TestMultiSelectorLeftArrowCancelsWhenNoFilter(t *testing.T) {
+	m := newMultiSelectorModel("Pick:", items("a", "b", "c"), nil)
+	updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
+	got := updated.(multiSelectorModel)
+	if !got.cancelled {
+		t.Error("left arrow with empty filter should cancel (go back)")
+	}
+}
+
+func TestMultiSelectorLeftArrowCancelsWhenFiltering(t *testing.T) {
+	m := newMultiSelectorModel("Pick:", items("a", "b", "c"), nil)
+	m.filter = "a"
+	updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
+	got := updated.(multiSelectorModel)
+	if !got.cancelled {
+		t.Error("left arrow with active filter should still cancel (go back)")
+	}
+}
+
 // Key message helpers for testing

 type keyType = int
--- a/cmd/tui/tui.go
+++ b/cmd/tui/tui.go
@@ -60,6 +60,9 @@ var mainMenuItems = []menuItem{
 	{
 		integration: "openclaw",
 	},
+	{
+		integration: "vscode",
+	},
 }

 var othersMenuItem = menuItem{
@@ -139,6 +142,7 @@ func otherIntegrationItems(state *launch.LauncherState) []menuItem {
 		"claude":   true,
 		"codex":    true,
 		"openclaw": true,
+		"vscode":   true,
 	}

 	var items []menuItem
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -160,6 +160,12 @@
            "group": "More information",
            "pages": [
              "/cli",
+              {
+                "group": "Assistant Sandboxing",
+                "pages": [
+                  "/integrations/nemoclaw"
+                ]
+              },
              "/modelfile",
              "/context-length",
              "/linux",
--- a/docs/integrations/claude-code.mdx
+++ b/docs/integrations/claude-code.mdx
@@ -96,6 +96,18 @@ The `/loop` command runs a prompt or slash command on a recurring schedule insid
 /loop 1h Remind me to review the deploy status
 ```

+## Telegram
+
+Chat with Claude Code from Telegram by connecting a bot to your session. Install the [Telegram plugin](https://github.com/anthropics/claude-plugins-official), create a bot via [@BotFather](https://t.me/BotFather), then launch with the channel flag:
+
+```shell
+ollama launch claude -- --channels plugin:telegram@claude-plugins-official
+```
+
+Claude Code will prompt for permission on most actions. To allow the bot to work autonomously, configure [permission rules](https://code.claude.com/docs/en/permissions) or pass `--dangerously-skip-permissions` in isolated environments.
+
+See the [plugin README](https://github.com/anthropics/claude-plugins-official/tree/main/external_plugins/telegram) for full setup instructions including pairing and access control.
+
 ## Manual setup

 Claude Code connects to Ollama using the Anthropic-compatible API.
--- a/docs/integrations/nemoclaw.mdx
+++ b/docs/integrations/nemoclaw.mdx
@@ -0,0 +1,67 @@
+---
+title: NemoClaw
+---
+
+NemoClaw is NVIDIA's open source security stack for [OpenClaw](/integrations/openclaw). It wraps OpenClaw with the NVIDIA OpenShell runtime to provide kernel-level sandboxing, network policy controls, and audit trails for AI agents. 
+
+## Quick start
+
+Pull a model:
+
+```bash
+ollama pull nemotron-3-nano:30b
+```
+
+Run the installer:
+
+```bash
+curl -fsSL https://www.nvidia.com/nemoclaw.sh | \
+  NEMOCLAW_NON_INTERACTIVE=1 \
+  NEMOCLAW_PROVIDER=ollama \
+  NEMOCLAW_MODEL=nemotron-3-nano:30b \
+  bash
+```
+
+Connect to your sandbox:
+
+```bash
+nemoclaw my-assistant connect
+```
+
+Open the TUI:
+
+```bash
+openclaw tui
+```
+
+<Note>Ollama support in NemoClaw is still experimental.</Note>
+
+## Platform support
+
+| Platform | Runtime | Status |
+|----------|---------|--------|
+| Linux (Ubuntu 22.04+) | Docker | Primary |
+| macOS (Apple Silicon) | Colima or Docker Desktop | Supported |
+| Windows | WSL2 with Docker Desktop | Supported |
+
+CMD and PowerShell are not supported on Windows — WSL2 is required.
+
+<Note>Ollama must be installed and running before the installer runs. When running inside WSL2 or a container, ensure Ollama is reachable from the sandbox (e.g. `OLLAMA_HOST=0.0.0.0`).</Note>
+
+## System requirements
+
+- CPU: 4 vCPU minimum
+- RAM: 8 GB minimum (16 GB recommended)
+- Disk: 20 GB free (40 GB recommended for local models)
+- Node.js 20+ and npm 10+
+- Container runtime (Docker preferred)
+
+## Recommended models
+
+- `nemotron-3-super:cloud` — Strong reasoning and coding
+- `qwen3.5:cloud` — 397B; reasoning and code generation
+- `nemotron-3-nano:30b` — Recommended local model; fits in 24 GB VRAM
+- `qwen3.5:27b` — Fast local reasoning (~18 GB VRAM)
+- `glm-4.7-flash` — Reasoning and code generation (~25 GB VRAM)
+
+More models at [ollama.com/search](https://ollama.com/search).
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -214,6 +214,8 @@ func LogLevel() slog.Level {
 var (
 	// FlashAttention enables the experimental flash attention feature.
 	FlashAttention = BoolWithDefault("OLLAMA_FLASH_ATTENTION")
+	// DebugLogRequests logs inference requests to disk for replay/debugging.
+	DebugLogRequests = Bool("OLLAMA_DEBUG_LOG_REQUESTS")
 	// KvCacheType is the quantization type for the K/V cache.
 	KvCacheType = String("OLLAMA_KV_CACHE_TYPE")
 	// NoHistory disables readline history.
@@ -302,28 +304,29 @@ type EnvVar struct {

 func AsMap() map[string]EnvVar {
 	ret := map[string]EnvVar{
-		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", LogLevel(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
-		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention(false), "Enabled flash attention"},
-		"OLLAMA_KV_CACHE_TYPE":     {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"},
-		"OLLAMA_GPU_OVERHEAD":      {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
-		"OLLAMA_HOST":              {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
-		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
-		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
-		"OLLAMA_LOAD_TIMEOUT":      {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
-		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
-		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
-		"OLLAMA_MODELS":            {"OLLAMA_MODELS", Models(), "The path to the models directory"},
-		"OLLAMA_NO_CLOUD":          {"OLLAMA_NO_CLOUD", NoCloud(), "Disable Ollama cloud features (remote inference and web search)"},
-		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
-		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
-		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
-		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
-		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
-		"OLLAMA_MULTIUSER_CACHE":   {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
-		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4k/32k/256k based on VRAM)"},
-		"OLLAMA_EDITOR":            {"OLLAMA_EDITOR", Editor(), "Path to editor for interactive prompt editing (Ctrl+G)"},
-		"OLLAMA_NEW_ENGINE":        {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
-		"OLLAMA_REMOTES":           {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},
+		"OLLAMA_DEBUG":              {"OLLAMA_DEBUG", LogLevel(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
+		"OLLAMA_DEBUG_LOG_REQUESTS": {"OLLAMA_DEBUG_LOG_REQUESTS", DebugLogRequests(), "Log inference request bodies and replay curl commands to a temp directory"},
+		"OLLAMA_FLASH_ATTENTION":    {"OLLAMA_FLASH_ATTENTION", FlashAttention(false), "Enabled flash attention"},
+		"OLLAMA_KV_CACHE_TYPE":      {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"},
+		"OLLAMA_GPU_OVERHEAD":       {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
+		"OLLAMA_HOST":               {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
+		"OLLAMA_KEEP_ALIVE":         {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
+		"OLLAMA_LLM_LIBRARY":        {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
+		"OLLAMA_LOAD_TIMEOUT":       {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
+		"OLLAMA_MAX_LOADED_MODELS":  {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
+		"OLLAMA_MAX_QUEUE":          {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
+		"OLLAMA_MODELS":             {"OLLAMA_MODELS", Models(), "The path to the models directory"},
+		"OLLAMA_NO_CLOUD":           {"OLLAMA_NO_CLOUD", NoCloud(), "Disable Ollama cloud features (remote inference and web search)"},
+		"OLLAMA_NOHISTORY":          {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
+		"OLLAMA_NOPRUNE":            {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
+		"OLLAMA_NUM_PARALLEL":       {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
+		"OLLAMA_ORIGINS":            {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
+		"OLLAMA_SCHED_SPREAD":       {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
+		"OLLAMA_MULTIUSER_CACHE":    {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
+		"OLLAMA_CONTEXT_LENGTH":     {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4k/32k/256k based on VRAM)"},
+		"OLLAMA_EDITOR":             {"OLLAMA_EDITOR", Editor(), "Path to editor for interactive prompt editing (Ctrl+G)"},
+		"OLLAMA_NEW_ENGINE":         {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
+		"OLLAMA_REMOTES":            {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},

 		// Informational
 		"HTTP_PROXY":  {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -874,7 +874,7 @@ func (f GGML) SupportsFlashAttention() bool {
 		return true
 	}

-	if slices.Contains([]string{"gemma2"}, arch) {
+	if slices.Contains([]string{"gemma2", "grok"}, arch) {
 		return false
 	}

--- a/integration/README.md
+++ b/integration/README.md
@@ -14,4 +14,15 @@ The integration tests have 2 modes of operating.
 > Before running the tests locally without the "test existing" setting, compile ollama from the top of the source tree  `go build .` in addition to GPU support with cmake if applicable on your platform.  The integration tests expect to find an ollama binary at the top of the tree.


-Many tests use a default small model suitable to run on many systems.  You can override this default model by setting `OLLAMA_TEST_DEFAULT_MODEL`
+## Testing a New Model
+
+When implementing new model architecture, use `OLLAMA_TEST_MODEL` to run the
+integration suite against your model.
+
+```bash
+# Build the binary first
+go build .
+
+# Run integration tests against it
+OLLAMA_TEST_MODEL=mymodel go test -tags integration -v -count 1 -timeout 15m ./integration/
+```
--- a/integration/api_test.go
+++ b/integration/api_test.go
@@ -48,9 +48,7 @@ func TestAPIGenerate(t *testing.T) {

 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
-		t.Fatalf("pull failed %s", err)
-	}
+	pullOrSkip(ctx, t, client, req.Model)

 	tests := []struct {
 		name   string
@@ -151,7 +149,11 @@ func TestAPIGenerate(t *testing.T) {
 		})
 	}

-	// Validate PS while we're at it...
+	// Validate PS while we're at it — skip for local-only models
+	// which may lack metadata fields like family, parameter_size, etc.
+	if testModel != "" {
+		return
+	}
 	resp, err := client.ListRunning(ctx)
 	if err != nil {
 		t.Fatalf("list models API error: %s", err)
@@ -208,9 +210,7 @@ func TestAPIChat(t *testing.T) {

 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
-		t.Fatalf("pull failed %s", err)
-	}
+	pullOrSkip(ctx, t, client, req.Model)

 	tests := []struct {
 		name   string
@@ -311,6 +311,9 @@ func TestAPIChat(t *testing.T) {
 }

 func TestAPIListModels(t *testing.T) {
+	if testModel != "" {
+		t.Skip("skipping metadata test with model override")
+	}
 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
@@ -361,6 +364,9 @@ func verifyModelDetails(t *testing.T, details api.ModelDetails) {
 }

 func TestAPIShowModel(t *testing.T) {
+	if testModel != "" {
+		t.Skip("skipping metadata test with model override")
+	}
 	modelName := "llama3.2"
 	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
 	defer cancel()
@@ -400,6 +406,10 @@ func TestAPIShowModel(t *testing.T) {
 }

 func TestAPIGenerateLogprobs(t *testing.T) {
+	if testModel != "" {
+		// Logprobs requires runner support (e.g. llama.cpp has it, MLX does not).
+		t.Skip("logprobs not supported by all runners")
+	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()

@@ -513,6 +523,10 @@ func TestAPIGenerateLogprobs(t *testing.T) {
 }

 func TestAPIChatLogprobs(t *testing.T) {
+	if testModel != "" {
+		// Logprobs requires runner support (e.g. llama.cpp has it, MLX does not).
+		t.Skip("logprobs not supported by all runners")
+	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()

--- a/integration/basic_test.go
+++ b/integration/basic_test.go
@@ -35,6 +35,9 @@ func TestBlueSky(t *testing.T) {
 }

 func TestUnicode(t *testing.T) {
+	if testModel != "" {
+		t.Skip("uses hardcoded model, not applicable with model override")
+	}
 	skipUnderMinVRAM(t, 6)
 	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
 	defer cancel()
@@ -59,9 +62,7 @@ func TestUnicode(t *testing.T) {
 	}
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
-		t.Fatal(err)
-	}
+	pullOrSkip(ctx, t, client, req.Model)
 	slog.Info("loading", "model", req.Model)
 	err := client.Generate(ctx, &api.GenerateRequest{Model: req.Model}, func(response api.GenerateResponse) error { return nil })
 	if err != nil {
@@ -81,6 +82,9 @@ func TestUnicode(t *testing.T) {
 }

 func TestExtendedUnicodeOutput(t *testing.T) {
+	if testModel != "" {
+		t.Skip("uses hardcoded model, not applicable with model override")
+	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
 	// Set up the test data
@@ -100,9 +104,7 @@ func TestExtendedUnicodeOutput(t *testing.T) {
 	}
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
-		t.Fatal(err)
-	}
+	pullOrSkip(ctx, t, client, req.Model)
 	DoChat(ctx, t, client, req, []string{"😀", "😊", "😁", "😂", "😄", "😃"}, 120*time.Second, 120*time.Second)
 }

@@ -148,15 +150,16 @@ func TestUnicodeModelDir(t *testing.T) {
 // TestNumPredict verifies that when num_predict is set, the model generates
 // exactly that many tokens. It uses logprobs to count the actual tokens output.
 func TestNumPredict(t *testing.T) {
+	if testModel != "" {
+		t.Skip("uses hardcoded model, not applicable with model override")
+	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()

 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()

-	if err := PullIfMissing(ctx, client, "qwen3:0.6b"); err != nil {
-		t.Fatalf("failed to pull model: %v", err)
-	}
+	pullOrSkip(ctx, t, client, "qwen3:0.6b")

 	req := api.GenerateRequest{
 		Model:    "qwen3:0.6b",
--- a/integration/concurrency_test.go
+++ b/integration/concurrency_test.go
@@ -67,6 +67,9 @@ func TestConcurrentChat(t *testing.T) {
 // Stress the scheduler and attempt to load more models than will fit to cause thrashing
 // This test will always load at least 2 models even on CPU based systems
 func TestMultiModelStress(t *testing.T) {
+	if testModel != "" {
+		t.Skip("uses hardcoded models, not applicable with model override")
+	}
 	s := os.Getenv("OLLAMA_MAX_VRAM")
 	if s == "" {
 		s = "0"
@@ -114,9 +117,7 @@ func TestMultiModelStress(t *testing.T) {

 	// Make sure all the models are pulled before we get started
 	for _, model := range chosenModels {
-		if err := PullIfMissing(ctx, client, model); err != nil {
-			t.Fatal(err)
-		}
+		pullOrSkip(ctx, t, client, model)
 	}

 	// Determine how many models we can load in parallel before we exceed VRAM
--- a/integration/context_test.go
+++ b/integration/context_test.go
@@ -38,9 +38,7 @@ func TestLongInputContext(t *testing.T) {
 	}
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
-		t.Fatalf("PullIfMissing failed: %v", err)
-	}
+	pullOrSkip(ctx, t, client, req.Model)
 	DoChat(ctx, t, client, req, []string{"russia", "german", "france", "england", "austria", "prussia", "europe", "individuals", "coalition", "conflict"}, 120*time.Second, 10*time.Second)
 }

@@ -70,14 +68,15 @@ func TestContextExhaustion(t *testing.T) {
 	}
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
-		t.Fatalf("PullIfMissing failed: %v", err)
-	}
+	pullOrSkip(ctx, t, client, req.Model)
 	DoChat(ctx, t, client, req, []string{"once", "upon", "lived", "sunny", "cloudy", "clear", "water", "time", "travel", "world"}, 120*time.Second, 10*time.Second)
 }

 // Send multiple generate requests with prior context and ensure the response is coherant and expected
 func TestParallelGenerateWithHistory(t *testing.T) {
+	if testModel != "" {
+		t.Skip("uses hardcoded model, not applicable with model override")
+	}
 	modelName := "gpt-oss:20b"
 	req, resp := GenerateRequests()
 	numParallel := 2
@@ -133,6 +132,12 @@ func TestParallelGenerateWithHistory(t *testing.T) {

 // Send generate requests with prior context and ensure the response is coherant and expected
 func TestGenerateWithHistory(t *testing.T) {
+	if testModel != "" {
+		// The Generate API's Context field (token array continuation) is not
+		// supported by all runners (e.g. MLX). Chat history works; this is
+		// the only generate-specific continuation path.
+		t.Skip("generate context continuation not supported by all runners")
+	}
 	req := api.GenerateRequest{
 		Model:     smol,
 		Prompt:    rainbowPrompt,
@@ -173,6 +178,9 @@ func TestGenerateWithHistory(t *testing.T) {

 // Send multiple chat requests with prior context and ensure the response is coherant and expected
 func TestParallelChatWithHistory(t *testing.T) {
+	if testModel != "" {
+		t.Skip("uses hardcoded model, not applicable with model override")
+	}
 	modelName := "gpt-oss:20b"
 	req, resp := ChatRequests()
 	numParallel := 2
--- a/integration/embed_test.go
+++ b/integration/embed_test.go
@@ -78,8 +78,11 @@ func TestEmbedCosineDistanceCorrelation(t *testing.T) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()

-	for _, model := range libraryEmbedModels {
+	for _, model := range testModels(libraryEmbedModels) {
 		t.Run(model, func(t *testing.T) {
+			if testModel != "" {
+				requireCapability(ctx, t, client, model, "embedding")
+			}
 			testCases := []struct {
 				a string
 				b string
@@ -145,6 +148,9 @@ func TestEmbedCosineDistanceCorrelation(t *testing.T) {
 }

 func TestAllMiniLMEmbeddings(t *testing.T) {
+	if testModel != "" {
+		t.Skip("uses hardcoded model, not applicable with model override")
+	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
@@ -175,6 +181,9 @@ func TestAllMiniLMEmbeddings(t *testing.T) {
 }

 func TestAllMiniLMEmbed(t *testing.T) {
+	if testModel != "" {
+		t.Skip("uses hardcoded model, not applicable with model override")
+	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
@@ -212,6 +221,9 @@ func TestAllMiniLMEmbed(t *testing.T) {
 }

 func TestAllMiniLMBatchEmbed(t *testing.T) {
+	if testModel != "" {
+		t.Skip("uses hardcoded model, not applicable with model override")
+	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
@@ -259,6 +271,9 @@ func TestAllMiniLMBatchEmbed(t *testing.T) {
 }

 func TestAllMiniLMEmbedTruncate(t *testing.T) {
+	if testModel != "" {
+		t.Skip("uses hardcoded model, not applicable with model override")
+	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
@@ -397,21 +412,13 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {

 func embeddingTestHelper(ctx context.Context, client *api.Client, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) {
 	t.Helper()
-
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
-		t.Fatal(err)
-	}
-
+	pullOrSkip(ctx, t, client, req.Model)
 	return client.Embeddings(ctx, &req)
 }

 func embedTestHelper(ctx context.Context, client *api.Client, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) {
 	t.Helper()
-
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
-		t.Fatal(err)
-	}
-
+	pullOrSkip(ctx, t, client, req.Model)
 	return client.Embed(ctx, &req)
 }

@@ -426,9 +433,12 @@ func TestEmbedTruncation(t *testing.T) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()

-	for _, model := range libraryEmbedModels {
+	for _, model := range testModels(libraryEmbedModels) {
 		model := model
 		t.Run(model, func(t *testing.T) {
+			if testModel != "" {
+				requireCapability(ctx, t, client, model, "embedding")
+			}
 			// Check if we're running out of time (reserve 20s for current model)
 			if deadline, ok := t.Deadline(); ok && time.Until(deadline) < 20*time.Second {
 				t.Skip("skipping remaining tests to avoid timeout")
@@ -494,9 +504,12 @@ func TestEmbedLargeInput(t *testing.T) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()

-	for _, model := range libraryEmbedModels {
+	for _, model := range testModels(libraryEmbedModels) {
 		model := model
 		t.Run(model, func(t *testing.T) {
+			if testModel != "" {
+				requireCapability(ctx, t, client, model, "embedding")
+			}
 			mctx, mcancel := context.WithTimeout(ctx, 2*time.Minute)
 			defer mcancel()

@@ -559,9 +572,12 @@ func TestEmbedStatusCode(t *testing.T) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()

-	for _, model := range libraryEmbedModels {
+	for _, model := range testModels(libraryEmbedModels) {
 		model := model
 		t.Run(model, func(t *testing.T) {
+			if testModel != "" {
+				requireCapability(ctx, t, client, model, "embedding")
+			}
 			// Check if we're running out of time (reserve 20s for current model)
 			if deadline, ok := t.Deadline(); ok && time.Until(deadline) < 20*time.Second {
 				t.Skip("skipping remaining tests to avoid timeout")
@@ -571,9 +587,7 @@ func TestEmbedStatusCode(t *testing.T) {
 			defer mcancel()

 			// Pull the model if needed
-			if err := PullIfMissing(mctx, client, model); err != nil {
-				t.Fatal(err)
-			}
+			pullOrSkip(mctx, t, client, model)

 			t.Run("truncation error status code", func(t *testing.T) {
 				truncFalse := false
--- a/integration/imagegen_test.go
+++ b/integration/imagegen_test.go
@@ -14,6 +14,9 @@ import (
 )

 func TestImageGeneration(t *testing.T) {
+	if testModel != "" {
+		t.Skip("uses hardcoded models, not applicable with model override")
+	}
 	skipUnderMinVRAM(t, 8)

 	type testCase struct {
@@ -41,12 +44,8 @@ func TestImageGeneration(t *testing.T) {
 			defer cleanup()

 			// Pull both models
-			if err := PullIfMissing(ctx, client, tc.imageGenModel); err != nil {
-				t.Fatalf("failed to pull image gen model: %v", err)
-			}
-			if err := PullIfMissing(ctx, client, tc.visionModel); err != nil {
-				t.Fatalf("failed to pull vision model: %v", err)
-			}
+			pullOrSkip(ctx, t, client, tc.imageGenModel)
+			pullOrSkip(ctx, t, client, tc.visionModel)

 			// Generate the image
 			t.Logf("Generating image with prompt: %s", tc.prompt)
--- a/integration/library_models_test.go
+++ b/integration/library_models_test.go
@@ -24,15 +24,12 @@ func TestLibraryModelsChat(t *testing.T) {
 	defer cleanup()
 	targetArch := os.Getenv("OLLAMA_TEST_ARCHITECTURE")

-	chatModels := libraryChatModels
-	for _, model := range chatModels {
+	for _, model := range testModels(libraryChatModels) {
 		t.Run(model, func(t *testing.T) {
 			if time.Now().Sub(started) > softTimeout {
 				t.Skip("skipping remaining tests to avoid excessive runtime")
 			}
-			if err := PullIfMissing(ctx, client, model); err != nil {
-				t.Fatalf("pull failed %s", err)
-			}
+			pullOrSkip(ctx, t, client, model)
 			if targetArch != "" {
 				resp, err := client.Show(ctx, &api.ShowRequest{Name: model})
 				if err != nil {
--- a/integration/llm_image_test.go
+++ b/integration/llm_image_test.go
@@ -13,39 +13,35 @@ import (

 func TestVisionModels(t *testing.T) {
 	skipUnderMinVRAM(t, 6)
-	type testCase struct {
-		model string
-	}
-	testCases := []testCase{
-		{
-			model: "qwen2.5vl",
-		},
-		{
-			model: "llama3.2-vision",
-		},
-		{
-			model: "gemma3",
-		},
-		{
-			model: "qwen3-vl:8b",
-		},
-		{
-			// Qwen 3 VL mixture of experts
-			model: "qwen3-vl:30b",
-		},
-		{
-			model: "ministral-3",
-		},
+
+	defaultVisionModels := []string{
+		"qwen2.5vl",
+		"llama3.2-vision",
+		"gemma3",
+		"qwen3-vl:8b",
+		"qwen3-vl:30b",
+		"ministral-3",
 	}

-	for _, v := range testCases {
-		t.Run(v.model, func(t *testing.T) {
+	for _, model := range testModels(defaultVisionModels) {
+		t.Run(model, func(t *testing.T) {
+			ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
+			defer cancel()
+			client, _, cleanup := InitServerConnection(ctx, t)
+			defer cleanup()
+
+			if testModel != "" {
+				requireCapability(ctx, t, client, model, "vision")
+			}
+
+			pullOrSkip(ctx, t, client, model)
+
 			image, err := base64.StdEncoding.DecodeString(imageEncoding)
 			if err != nil {
 				t.Fatal(err)
 			}
 			req := api.ChatRequest{
-				Model: v.model,
+				Model: model,
 				Messages: []api.Message{
 					{
 						Role:    "user",
@@ -61,16 +57,7 @@ func TestVisionModels(t *testing.T) {
 					"temperature": 0.0,
 				},
 			}
-			ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
-			defer cancel()
-			client, _, cleanup := InitServerConnection(ctx, t)

-			// Note: sometimes it returns "the ollamas" sometimes "the ollams"
-			resp := "the ollam"
-			defer cleanup()
-			if err := PullIfMissing(ctx, client, req.Model); err != nil {
-				t.Fatal(err)
-			}
 			// Preload to skip if we're less than 80% on GPU to avoid extremely slow tests
 			err = client.Generate(ctx, &api.GenerateRequest{Model: req.Model}, func(response api.GenerateResponse) error { return nil })
 			if err != nil {
@@ -78,13 +65,17 @@ func TestVisionModels(t *testing.T) {
 			}
 			skipIfNotGPULoaded(ctx, t, client, req.Model, 80)

+			// Note: sometimes it returns "the ollamas" sometimes "the ollams"
 			// llava models on CPU can be quite slow to start
-			DoChat(ctx, t, client, req, []string{resp}, 240*time.Second, 30*time.Second)
+			DoChat(ctx, t, client, req, []string{"the ollam"}, 240*time.Second, 30*time.Second)
 		})
 	}
 }

 func TestIntegrationSplitBatch(t *testing.T) {
+	if testModel != "" {
+		t.Skip("uses hardcoded model, not applicable with model override")
+	}
 	skipUnderMinVRAM(t, 6)
 	image, err := base64.StdEncoding.DecodeString(imageEncoding)
 	if err != nil {
@@ -111,9 +102,7 @@ func TestIntegrationSplitBatch(t *testing.T) {
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
-		t.Fatal(err)
-	}
+	pullOrSkip(ctx, t, client, req.Model)
 	// llava models on CPU can be quite slow to start,
 	DoGenerate(ctx, t, client, req, []string{resp}, 120*time.Second, 30*time.Second)
 }
--- a/integration/max_queue_test.go
+++ b/integration/max_queue_test.go
@@ -45,9 +45,7 @@ func TestMaxQueue(t *testing.T) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()

-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
-		t.Fatal(err)
-	}
+	pullOrSkip(ctx, t, client, req.Model)

 	// Context for the worker threads so we can shut them down
 	// embedCtx, embedCancel := context.WithCancel(ctx)
--- a/integration/model_arch_test.go
+++ b/integration/model_arch_test.go
@@ -46,14 +46,12 @@ func TestModelsChat(t *testing.T) {
 		chatModels = append(ollamaEngineChatModels, llamaRunnerChatModels...)
 	}

-	for _, model := range chatModels {
+	for _, model := range testModels(chatModels) {
 		t.Run(model, func(t *testing.T) {
 			if time.Now().Sub(started) > softTimeout {
 				t.Skip("skipping remaining tests to avoid excessive runtime")
 			}
-			if err := PullIfMissing(ctx, client, model); err != nil {
-				t.Fatalf("pull failed %s", err)
-			}
+			pullOrSkip(ctx, t, client, model)
 			if maxVram > 0 {
 				resp, err := client.List(ctx)
 				if err != nil {
@@ -133,14 +131,15 @@ func TestModelsEmbed(t *testing.T) {
 		t.Fatalf("failed to load test data: %s", err)
 	}
 	for model, expected := range testCase {
+		if testModel != "" && model != testModel {
+			continue
+		}

 		t.Run(model, func(t *testing.T) {
 			if time.Now().Sub(started) > softTimeout {
 				t.Skip("skipping remaining tests to avoid excessive runtime")
 			}
-			if err := PullIfMissing(ctx, client, model); err != nil {
-				t.Fatalf("pull failed %s", err)
-			}
+			pullOrSkip(ctx, t, client, model)
 			if maxVram > 0 {
 				resp, err := client.List(ctx)
 				if err != nil {
--- a/integration/model_perf_test.go
+++ b/integration/model_perf_test.go
@@ -87,9 +87,7 @@ func doModelPerfTest(t *testing.T, chatModels []string) {
 			if time.Now().Sub(started) > softTimeout {
 				t.Skip("skipping remaining tests to avoid excessive runtime")
 			}
-			if err := PullIfMissing(ctx, client, model); err != nil {
-				t.Fatalf("pull failed %s", err)
-			}
+			pullOrSkip(ctx, t, client, model)
 			var maxContext int

 			resp, err := client.Show(ctx, &api.ShowRequest{Model: model})
--- a/integration/quantization_test.go
+++ b/integration/quantization_test.go
@@ -33,9 +33,7 @@ func TestQuantization(t *testing.T) {
 	defer cleanup()

 	for _, base := range sourceModels {
-		if err := PullIfMissing(ctx, client, base); err != nil {
-			t.Fatalf("pull failed %s", err)
-		}
+		pullOrSkip(ctx, t, client, base)
 		for _, quant := range quantizations {
 			newName := fmt.Sprintf("%s__%s", base, quant)
 			t.Run(newName, func(t *testing.T) {
--- a/integration/tools_stress_test.go
+++ b/integration/tools_stress_test.go
@@ -0,0 +1,523 @@
+//go:build integration
+
+package integration
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"strconv"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/ollama/ollama/api"
+)
+
+// TestAPIToolCallingStress tests tool calling with complex, agent-style prompts
+// that include large system messages, multiple tools, and multi-turn conversations.
+// This catches cache corruption and parser bugs that simple tool tests miss.
+func TestAPIToolCallingStress(t *testing.T) {
+	initialTimeout := 120 * time.Second
+	streamTimeout := 120 * time.Second
+	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute)
+	defer cancel()
+
+	client, _, cleanup := InitServerConnection(ctx, t)
+	defer cleanup()
+
+	minVRAM := map[string]uint64{
+		"qwen3-vl":      16,
+		"gpt-oss:20b":   16,
+		"gpt-oss:120b":  70,
+		"qwen3":         6,
+		"llama3.1":      8,
+		"llama3.2":      4,
+		"mistral":       6,
+		"qwen2.5":       6,
+		"qwen2":         6,
+		"ministral-3":   20,
+		"mistral-nemo":  9,
+		"mistral-small": 16,
+		"mixtral:8x22b": 80,
+		"qwq":           20,
+		"granite3.3":    7,
+	}
+
+	// Models that don't reliably produce tool calls with complex/multi-tool prompts.
+	// The stress test uses a large system prompt with many tools, simulating coding agents.
+	// Some models are too small, too slow, or not designed for this use case.
+	skipModels := map[string]string{
+		"lfm2.5-thinking": "returns text instead of tool calls with complex system prompts",
+		"qwen3-vl":        "vision model, extremely slow with complex tool prompts",
+		"llama3.2":        "3B model too small for reliable multi-tool agent prompts",
+		"mistral":         "7B v0.3 returns text instead of tool calls with complex prompts",
+		"mixtral:8x22b":   "returns text instead of tool calls with complex prompts",
+		"qwen2":           "returns text instead of tool calls with complex prompts",
+		"granite3.3":      "returns text instead of tool calls with complex prompts",
+	}
+
+	models := testModels(libraryToolsModels)
+
+	for _, model := range models {
+		t.Run(model, func(t *testing.T) {
+			// Skip known-bad models unless explicitly requested via env var
+			if reason, ok := skipModels[model]; ok && testModel == "" {
+				t.Skipf("skipping: %s", reason)
+			}
+			if testModel != "" {
+				requireCapability(ctx, t, client, model, "tools")
+			}
+			if v, ok := minVRAM[model]; ok {
+				skipUnderMinVRAM(t, v)
+			}
+
+			pullOrSkip(ctx, t, client, model)
+
+			tools := stressTestTools()
+
+			// Large system prompt that mimics real coding agents (opencode, Claude Code, etc.)
+			// This is intentionally very long (~5000+ tokens) to match the prompt sizes that
+			// real coding agents send. The combination of a large system prompt, many tools,
+			// and thinking mode is what triggers failures in some models.
+			systemPrompt := stressTestSystemPrompt()
+
+			// Test 1: First request (fresh prompt processing)
+			// Use a direct prompt that tells the model exactly what tool to use,
+			// reducing the chance it asks for clarification instead.
+			t.Run("first_request", func(t *testing.T) {
+				testToolCall(t, ctx, client, model, systemPrompt, tools,
+					"Run git diff main to review the code changes on the current branch.",
+					initialTimeout, streamTimeout)
+			})
+
+			// Test 2: Repeat with same prompt (tests cache reuse)
+			t.Run("cached_request", func(t *testing.T) {
+				testToolCall(t, ctx, client, model, systemPrompt, tools,
+					"Run git diff main to review the code changes on the current branch.",
+					initialTimeout, streamTimeout)
+			})
+
+			// Test 3: Different user message (partial cache hit)
+			t.Run("different_user_message", func(t *testing.T) {
+				testToolCall(t, ctx, client, model, systemPrompt, tools,
+					"Read the file at ./go.mod and tell me what dependencies we have.",
+					initialTimeout, streamTimeout)
+			})
+
+			// Test 4: Multi-turn with tool response
+			t.Run("multi_turn", func(t *testing.T) {
+				testToolCallMultiTurn(t, ctx, client, model, systemPrompt, tools,
+					initialTimeout, streamTimeout)
+			})
+		})
+	}
+}
+
+func newTool(name, description string, required []string, props map[string]api.ToolProperty) api.Tool {
+	return api.Tool{
+		Type: "function",
+		Function: api.ToolFunction{
+			Name:        name,
+			Description: description,
+			Parameters: api.ToolFunctionParameters{
+				Type:       "object",
+				Required:   required,
+				Properties: testPropsMap(props),
+			},
+		},
+	}
+}
+
+// stressTestTools returns a set of tools matching the scale and verbosity of
+// real coding agent tool definitions (opencode, Claude Code, etc.). The tool
+// descriptions are intentionally verbose to match real-world prompt sizes.
+func stressTestTools() []api.Tool {
+	return []api.Tool{
+		newTool("bash", "Executes a given bash command in a persistent shell session with optional timeout, ensuring proper handling and security measures. All commands run in the working directory by default. Before executing the command, verify that the parent directory exists. Always quote file paths that contain spaces with double quotes. After ensuring proper quoting, execute the command and capture the output. Avoid using bash with find, grep, cat, head, tail, sed, awk, or echo commands unless explicitly instructed. Instead, always prefer using the dedicated tools for these commands. When issuing multiple commands, if they are independent and can run in parallel, make multiple tool calls in a single message.",
+			[]string{"command"},
+			map[string]api.ToolProperty{
+				"command":     {Type: api.PropertyType{"string"}, Description: "The bash command to execute"},
+				"description": {Type: api.PropertyType{"string"}, Description: "Short description of what this command does in 5-10 words"},
+				"timeout":     {Type: api.PropertyType{"number"}, Description: "Optional timeout in milliseconds. If not specified, commands will time out after 120000ms (2 minutes)"},
+			}),
+		newTool("read", "Read a file or directory from the local filesystem. If the path does not exist, an error is returned. By default, this tool returns up to 2000 lines from the start of the file. The offset parameter is the line number to start from (1-indexed). To read later sections, call this tool again with a larger offset. Use the grep tool to find specific content in large files or files with long lines. If you are unsure of the correct file path, use the glob tool to look up filenames by glob pattern. Contents are returned with each line prefixed by its line number. Any line longer than 2000 characters is truncated. Call this tool in parallel when you know there are multiple files you want to read. Avoid tiny repeated slices (30 line chunks). If you need more context, read a larger window. This tool can read image files and PDFs and return them as file attachments.",
+			[]string{"path"},
+			map[string]api.ToolProperty{
+				"path":   {Type: api.PropertyType{"string"}, Description: "The absolute path to the file to read"},
+				"offset": {Type: api.PropertyType{"number"}, Description: "Line number to start reading from (1-indexed)"},
+				"limit":  {Type: api.PropertyType{"number"}, Description: "Maximum number of lines to read"},
+			}),
+		newTool("glob", "Fast file pattern matching tool that works with any codebase size. Supports glob patterns like '**/*.js' or 'src/**/*.ts'. Returns matching file paths sorted by modification time. Use this tool when you need to find files by name patterns. When you are doing an open-ended search that may require multiple rounds of globbing and grepping, use the task tool instead. You have the capability to call multiple tools in a single response. It is always better to speculatively perform multiple searches as a batch that are potentially useful.",
+			[]string{"pattern"},
+			map[string]api.ToolProperty{
+				"pattern": {Type: api.PropertyType{"string"}, Description: "The glob pattern to match files against"},
+				"path":    {Type: api.PropertyType{"string"}, Description: "The directory to search in"},
+			}),
+		newTool("grep", "Fast content search tool that works with any codebase size. Searches file contents using regular expressions. Supports full regex syntax (eg. 'log.*Error', 'function\\s+\\w+'). Filter files by pattern with the include parameter (eg. '*.js', '*.{ts,tsx}'). Returns file paths and line numbers with at least one match sorted by modification time. Use this tool when you need to find files containing specific patterns. If you need to identify or count the number of matches within files, use the bash tool with rg (ripgrep) directly. When you are doing an open-ended search that may require multiple rounds of globbing and grepping, use the task tool instead.",
+			[]string{"pattern"},
+			map[string]api.ToolProperty{
+				"pattern": {Type: api.PropertyType{"string"}, Description: "The regex pattern to search for in file contents"},
+				"path":    {Type: api.PropertyType{"string"}, Description: "The directory to search in"},
+				"include": {Type: api.PropertyType{"string"}, Description: "File pattern to include (eg. '*.js', '*.{ts,tsx}')"},
+			}),
+		newTool("edit", "Performs exact string replacements in files. You must use your read tool at least once in the conversation before editing. This tool will error if you attempt an edit without reading the file. When editing text from read tool output, ensure you preserve the exact indentation (tabs/spaces) as it appears after the line number prefix. Always prefer editing existing files in the codebase. Never write new files unless explicitly required. Only use emojis if the user explicitly requests it. The edit will fail if oldString is not found in the file. The edit will fail if oldString is found multiple times in the file. Use replaceAll for replacing and renaming strings across the file.",
+			[]string{"path", "old_string", "new_string"},
+			map[string]api.ToolProperty{
+				"path":       {Type: api.PropertyType{"string"}, Description: "The absolute path to the file to modify"},
+				"old_string": {Type: api.PropertyType{"string"}, Description: "The text to replace (must be unique in the file)"},
+				"new_string": {Type: api.PropertyType{"string"}, Description: "The replacement text"},
+			}),
+		newTool("write", "Writes a file to the local filesystem. This tool will overwrite the existing file if there is one at the provided path. If this is an existing file, you must use the read tool first to read the file contents. This tool will fail if you did not read the file first. Always prefer editing existing files in the codebase. Never write new files unless explicitly required. Never proactively create documentation files or README files. Only create documentation files if explicitly requested by the user.",
+			[]string{"path", "content"},
+			map[string]api.ToolProperty{
+				"path":    {Type: api.PropertyType{"string"}, Description: "The absolute path to the file to write"},
+				"content": {Type: api.PropertyType{"string"}, Description: "The content to write to the file"},
+			}),
+		newTool("question", "Use this tool when you need to ask the user questions during execution. This allows you to gather user preferences or requirements, clarify ambiguous instructions, get decisions on implementation choices as you work, and offer choices to the user about what direction to take. When custom is enabled (default), a 'Type your own answer' option is added automatically. Answers are returned as arrays of labels. Set multiple to true to allow selecting more than one answer. If you recommend a specific option, make that the first option in the list and add '(Recommended)' at the end of the label.",
+			[]string{"questions"},
+			map[string]api.ToolProperty{
+				"questions": {Type: api.PropertyType{"string"}, Description: "The question to ask the user"},
+			}),
+		newTool("task", "Launch a new agent to handle complex, multistep tasks autonomously. Available agent types: general (general-purpose agent for researching complex questions and executing multi-step tasks, use this to execute multiple units of work in parallel) and explore (fast agent specialized for exploring codebases, use this when you need to quickly find files by patterns, search code for keywords, or answer questions about the codebase). Launch multiple agents concurrently whenever possible to maximize performance. When the agent is done, it will return a single message back to you. Each agent invocation starts with a fresh context unless you provide task_id to resume the same subagent session.",
+			[]string{"description", "prompt", "subagent_type"},
+			map[string]api.ToolProperty{
+				"description":   {Type: api.PropertyType{"string"}, Description: "A short (3-5 word) description of the task"},
+				"prompt":        {Type: api.PropertyType{"string"}, Description: "The task for the agent to perform"},
+				"subagent_type": {Type: api.PropertyType{"string"}, Description: "The type of specialized agent to use (general or explore)"},
+			}),
+		newTool("webfetch", "Fetches content from a specified URL. Takes a URL and optional format as input. Fetches the URL content, converts to requested format (markdown by default). Returns the content in the specified format. Use this tool when you need to retrieve and analyze web content. The URL must be a fully-formed valid URL. HTTP URLs will be automatically upgraded to HTTPS. Format options: markdown (default), text, or html. This tool is read-only and does not modify any files. Results may be summarized if the content is very large.",
+			[]string{"url", "format"},
+			map[string]api.ToolProperty{
+				"url":    {Type: api.PropertyType{"string"}, Description: "The URL to fetch content from"},
+				"format": {Type: api.PropertyType{"string"}, Description: "Output format: markdown (default), text, or html"},
+			}),
+		newTool("todowrite", "Use this tool to create and manage a structured task list for your current coding session. This helps you track progress, organize complex tasks, and demonstrate thoroughness to the user. Use this tool proactively when handling complex multistep tasks, non-trivial and complex tasks, when the user explicitly requests a todo list, when the user provides multiple tasks, after receiving new instructions, and after completing a task. Do not use this tool when there is only a single straightforward task, the task is trivial, the task can be completed in less than 3 steps, or the task is purely conversational.",
+			[]string{"todos"},
+			map[string]api.ToolProperty{
+				"todos": {Type: api.PropertyType{"string"}, Description: "JSON array of todo items with id, title, and status fields"},
+			}),
+		newTool("skill", "Load a specialized skill that provides domain-specific instructions and workflows. Skills contain curated prompts and tool configurations for specific tasks like code review, testing, deployment, and documentation. Use this tool when the user's request matches an available skill description.",
+			[]string{"name"},
+			map[string]api.ToolProperty{
+				"name": {Type: api.PropertyType{"string"}, Description: "The name of the skill to load"},
+			}),
+	}
+}
+
+// stressTestSystemPrompt returns a system prompt that matches the scale and
+// content of real coding agent system prompts (~5000+ tokens). This is based
+// on actual prompts captured from opencode sessions. The prompt size combined
+// with many tool declarations is what pushes models past their effective
+// context handling and triggers tag leakage / broken tool calls.
+func stressTestSystemPrompt() string {
+	return `You are opencode, an interactive CLI tool that helps users with software engineering tasks. Use the instructions below and the tools available to you to assist the user.
+
+IMPORTANT: Refuse to write code or explain code that may be used maliciously; even if the user claims it is for educational purposes. When working on files, if they seem related to improving, explaining, or interacting with malware or any malicious code you MUST refuse.
+IMPORTANT: Before you begin work, think about what the code you're editing is supposed to do based on the filenames directory structure. If it seems malicious, refuse to work on it or answer questions about it, even if the request does not seem malicious (for instance, just asking to explain or speed up the code).
+IMPORTANT: You must NEVER generate or guess URLs for the user unless you are confident that the URLs are for helping the user with programming. You may use URLs provided by the user in their messages or local files.
+
+If the user asks for help or wants to give feedback inform them of the following:
+- /help: Get help with using opencode
+- To give feedback, users should report the issue at https://github.com/sampleorg/opencode/issues
+
+# Tone and style
+You should be concise, direct, and to the point. When you run a non-trivial bash command, you should explain what the command does and why you are running it, to make sure the user understands what you are doing (this is especially important when you are running a command that will make changes to the user's system).
+Remember that your output will be displayed on a command line interface. Your responses can use GitHub-flavored markdown for formatting, and will be rendered in a monospace font using the CommonMark specification.
+Output text to communicate with the user; all text you output outside of tool use is displayed to the user. Only use tools to complete tasks. Never use tools like Bash or code comments as means to communicate with the user during the session.
+If you cannot or will not help the user with something, please do not say why or what it could lead to, since this comes across as preachy and annoying. Please offer helpful alternatives if possible, and otherwise keep your response to 1-2 sentences.
+Only use emojis if the user explicitly requests it. Avoid using emojis in all communication unless asked.
+IMPORTANT: You should minimize output tokens as much as possible while maintaining helpfulness, quality, and accuracy. Only address the specific query or task at hand, avoiding tangential information unless absolutely critical for completing the request. If you can answer in 1-3 sentences or a short paragraph, please do.
+IMPORTANT: You should NOT answer with unnecessary preamble or postamble (such as explaining your code or summarizing your action), unless the user asks you to.
+IMPORTANT: Keep your responses short, since they will be displayed on a command line interface. You MUST answer concisely with fewer than 4 lines (not including tool use or code generation), unless user asks for detail. Answer the user's question directly, without elaboration, explanation, or details. One word answers are best. Avoid introductions, conclusions, and explanations. You MUST avoid text before/after your response, such as "The answer is <answer>.", "Here is the content of the file..." or "Based on the information provided, the answer is..." or "Here is what I will do next...". Here are some examples to demonstrate appropriate verbosity:
+
+user: 2 + 2
+assistant: 4
+
+user: what is 2+2?
+assistant: 4
+
+user: is 11 a prime number?
+assistant: Yes
+
+user: what command should I run to list files in the current directory?
+assistant: ls
+
+user: what command should I run to watch files in the current directory?
+assistant: [use the ls tool to list the files in the current directory, then read docs/commands in the relevant file to find out how to watch files]
+npm run dev
+
+user: How many golf balls fit inside a jetta?
+assistant: 150000
+
+user: what files are in the directory src/?
+assistant: [runs ls and sees foo.c, bar.c, baz.c]
+user: which file contains the implementation of foo?
+assistant: src/foo.c
+
+user: write tests for new feature
+assistant: [uses grep and glob search tools to find where similar tests are defined, uses concurrent read file tool use blocks in one tool call to read relevant files at the same time, uses edit file tool to write new tests]
+
+# Proactiveness
+You are allowed to be proactive, but only when the user asks you to do something. You should strive to strike a balance between:
+1. Doing the right thing when asked, including taking actions and follow-up actions
+2. Not surprising the user with actions you take without asking
+For example, if the user asks you how to approach something, you should do your best to answer their question first, and not immediately jump into taking actions.
+3. Do not add additional code explanation summary unless requested by the user. After working on a file, just stop, rather than providing an explanation of what you did.
+
+# Following conventions
+When making changes to files, first understand the file's code conventions. Mimic code style, use existing libraries and utilities, and follow existing patterns.
+- NEVER assume that a given library is available, even if it is well known. Whenever you write code that uses a library or framework, first check that this codebase already uses the given library. For example, you might look at neighboring files, or check the package.json (or cargo.toml, and so on depending on the language).
+- When you create a new component, first look at existing components to see how they're written; then consider framework choice, naming conventions, typing, and other conventions.
+- When you edit a piece of code, first look at the code's surrounding context (especially its imports) to understand the code's choice of frameworks and libraries. Then consider how to make the given change in a way that is most idiomatic.
+- Always follow security best practices. Never introduce code that exposes or logs secrets and keys. Never commit secrets or keys to the repository.
+
+# Code style
+- IMPORTANT: DO NOT ADD ANY COMMENTS unless asked
+
+# Doing tasks
+The user will primarily request you perform software engineering tasks. This includes solving bugs, adding new functionality, refactoring code, explaining code, and more. For these tasks the following steps are recommended:
+- Use the available search tools to understand the codebase and the user's query. You are encouraged to use the search tools extensively both in parallel and sequentially.
+- Implement the solution using all tools available to you
+- Verify the solution if possible with tests. NEVER assume specific test framework or test script. Check the README or search codebase to determine the testing approach.
+- VERY IMPORTANT: When you have completed a task, you MUST run the lint and typecheck commands (e.g. npm run lint, npm run typecheck, ruff, etc.) with Bash if they were provided to you to ensure your code is correct. If you are unable to find the correct command, ask the user for the command to run and if they supply it, proactively suggest writing it to AGENTS.md so that you will know to run it next time.
+NEVER commit changes unless the user explicitly asks you to. It is VERY IMPORTANT to only commit when explicitly asked, otherwise the user will feel that you are being too proactive.
+
+# Tool usage policy
+- When doing file search, prefer to use the Task tool in order to reduce context usage.
+- You have the capability to call multiple tools in a single response. When multiple independent pieces of information are requested, batch your tool calls together for optimal performance. When making multiple bash tool calls, you MUST send a single message with multiple tools calls to run the calls in parallel.
+
+You MUST answer concisely with fewer than 4 lines of text (not including tool use or code generation), unless user asks for detail.
+
+# Code References
+When referencing specific functions or pieces of code include the pattern file_path:line_number to allow the user to easily navigate to the source code location.
+
+# Git workflow
+When working with git:
+- Create descriptive commit messages that explain WHY not just WHAT
+- Use conventional commit format: feat:, fix:, refactor:, docs:, test:, chore:
+- Check git status before and after operations
+- Never force push to main/master
+- Review diffs before committing
+- NEVER update the git config
+- NEVER run destructive/irreversible git commands unless the user explicitly requests them
+- NEVER skip hooks (--no-verify, --no-gpg-sign, etc) unless the user explicitly requests it
+- Avoid git commit --amend unless explicitly requested by the user
+- NEVER commit changes unless the user explicitly asks you to
+
+# Safety
+- Never delete files without confirmation
+- Never run destructive commands (rm -rf, DROP TABLE, etc.) without confirmation
+- Always validate inputs before using them in shell commands
+- Be careful with environment variables and secrets
+- Do not expose API keys, passwords, or tokens in code or logs
+
+# Environment
+Working directory: /Users/test/code/myproject
+Platform: darwin
+Shell: zsh
+Is directory a git repo: yes
+The project uses Go 1.22 with modules. Run tests with 'go test ./...' and build with 'go build ./...'.
+The CI pipeline runs golangci-lint, go vet, and go test with race detector enabled.
+
+# User instructions
+Never use cd to change into the repo root or any other directory in Bash commands. The working directory is always the repo root — use relative paths directly.
+Never use heredoc-style inline bash or python scripts in Bash tool calls. Instead, write the script to an ephemeral file under ./.tmp/ in the repo, then run it as a separate command.`
+}
+
+// validStressTools is the set of tool names used in the stress test.
+var validStressTools = map[string]bool{
+	"bash": true, "read": true, "glob": true, "grep": true,
+	"edit": true, "write": true, "question": true, "task": true,
+	"webfetch": true, "todowrite": true, "skill": true,
+}
+
+func testToolCall(t *testing.T, ctx context.Context, client *api.Client, model, systemPrompt string, tools []api.Tool, userMessage string, initialTimeout, streamTimeout time.Duration) {
+	t.Helper()
+
+	req := api.ChatRequest{
+		Model: model,
+		Messages: []api.Message{
+			{Role: "system", Content: systemPrompt},
+			{Role: "user", Content: userMessage},
+		},
+		Tools: tools,
+		Options: map[string]any{
+			"temperature": 0,
+			"num_ctx":     contextLength(16384),
+		},
+	}
+
+	stallTimer := time.NewTimer(initialTimeout)
+	var gotToolCall bool
+	var lastToolCall api.ToolCall
+	var allContent string
+
+	fn := func(response api.ChatResponse) error {
+		if len(response.Message.ToolCalls) > 0 {
+			gotToolCall = true
+			lastToolCall = response.Message.ToolCalls[len(response.Message.ToolCalls)-1]
+		}
+		allContent += response.Message.Content
+		if !stallTimer.Reset(streamTimeout) {
+			return fmt.Errorf("stall detected while streaming")
+		}
+		return nil
+	}
+
+	stream := true
+	req.Stream = &stream
+	done := make(chan int)
+	var genErr error
+	go func() {
+		genErr = client.Chat(ctx, &req, fn)
+		done <- 0
+	}()
+
+	select {
+	case <-stallTimer.C:
+		t.Fatalf("chat stalled after %s", initialTimeout)
+	case <-done:
+		if genErr != nil {
+			t.Fatalf("chat failed: %v", genErr)
+		}
+
+		// Check for leaked special tags in content — these should never
+		// appear in user-visible output regardless of model quality.
+		checkNoLeakedTags(t, allContent)
+
+		// The model must produce either a tool call or a text response.
+		// A text response (e.g. asking for clarification) is legitimate.
+		// Empty output with no tool call indicates a parser or model failure
+		// (e.g. malformed tool call that gets dropped).
+		if !gotToolCall && allContent == "" {
+			t.Fatal("model produced neither a tool call nor text content")
+		}
+		if gotToolCall {
+			if !validStressTools[lastToolCall.Function.Name] {
+				t.Errorf("unexpected tool: %q", lastToolCall.Function.Name)
+			}
+			argsJSON, _ := json.Marshal(lastToolCall.Function.Arguments)
+			t.Logf("tool call: %s(%s)", lastToolCall.Function.Name, string(argsJSON))
+		} else {
+			t.Logf("text response (no tool call): %q", truncate(allContent, 200))
+		}
+	case <-ctx.Done():
+		t.Fatal("context cancelled")
+	}
+}
+
+func testToolCallMultiTurn(t *testing.T, ctx context.Context, client *api.Client, model, systemPrompt string, tools []api.Tool, initialTimeout, streamTimeout time.Duration) {
+	t.Helper()
+
+	req := api.ChatRequest{
+		Model: model,
+		Messages: []api.Message{
+			{Role: "system", Content: systemPrompt},
+			{Role: "user", Content: "What files are in the current directory?"},
+			{Role: "assistant", Content: "", ToolCalls: []api.ToolCall{{
+				Function: api.ToolCallFunction{
+					Name:      "bash",
+					Arguments: api.ToolCallFunctionArguments{},
+				},
+			}}},
+			{Role: "tool", Content: "go.mod\ngo.sum\nmain.go\nREADME.md\n"},
+			// The model should now respond with content or another tool call
+		},
+		Tools: tools,
+		Options: map[string]any{
+			"temperature": 0,
+			"num_ctx":     contextLength(16384),
+		},
+	}
+
+	// For the tool response arguments, set the command
+	req.Messages[2].ToolCalls[0].Function.Arguments.Set("command", "ls")
+
+	stallTimer := time.NewTimer(initialTimeout)
+	var gotResponse bool
+	var allContent string
+	var gotToolCall bool
+
+	fn := func(response api.ChatResponse) error {
+		if response.Message.Content != "" {
+			gotResponse = true
+			allContent += response.Message.Content
+		}
+		if len(response.Message.ToolCalls) > 0 {
+			gotToolCall = true
+			gotResponse = true
+		}
+		if !stallTimer.Reset(streamTimeout) {
+			return fmt.Errorf("stall detected")
+		}
+		return nil
+	}
+
+	stream := true
+	req.Stream = &stream
+	done := make(chan int)
+	var genErr error
+	go func() {
+		genErr = client.Chat(ctx, &req, fn)
+		done <- 0
+	}()
+
+	select {
+	case <-stallTimer.C:
+		t.Fatalf("chat stalled after %s", initialTimeout)
+	case <-done:
+		if genErr != nil {
+			t.Fatalf("chat failed: %v", genErr)
+		}
+
+		checkNoLeakedTags(t, allContent)
+
+		if !gotResponse {
+			t.Fatal("expected response (content or tool call), got nothing")
+		}
+		if gotToolCall {
+			t.Log("multi-turn: got follow-up tool call")
+		} else {
+			t.Logf("multi-turn: got content response: %q", truncate(allContent, 200))
+		}
+	case <-ctx.Done():
+		t.Fatal("context cancelled")
+	}
+}
+
+// checkNoLeakedTags verifies that model-internal special tags do not appear in
+// user-visible content. These tags should be consumed by the parser and never
+// passed through. If they appear, either the parser has a bug or the model is
+// generating malformed output that the parser fails to handle.
+func checkNoLeakedTags(t *testing.T, content string) {
+	t.Helper()
+	leakedTags := []string{
+		"<|channel>", "<channel|>",
+		"<|tool_call>", "<tool_call|>",
+		"<|tool>", "<tool|>",
+		"<|turn>", "<turn|>",
+	}
+	for _, tag := range leakedTags {
+		if strings.Contains(content, tag) {
+			t.Errorf("leaked special tag %q in content: %q", tag, truncate(content, 300))
+		}
+	}
+}
+
+func contextLength(defaultVal int) int {
+	if s := os.Getenv("OLLAMA_CONTEXT_LENGTH"); s != "" {
+		if n, err := strconv.Atoi(s); err == nil {
+			return n
+		}
+	}
+	return defaultVal
+}
+
+func truncate(s string, n int) string {
+	if len(s) <= n {
+		return s
+	}
+	return s[:n] + "..."
+}
--- a/integration/tools_test.go
+++ b/integration/tools_test.go
@@ -47,15 +47,18 @@ func TestAPIToolCalling(t *testing.T) {
 		"granite3.3":    7,
 	}

-	for _, model := range libraryToolsModels {
+	models := testModels(libraryToolsModels)
+
+	for _, model := range models {
 		t.Run(model, func(t *testing.T) {
+			if testModel != "" {
+				requireCapability(ctx, t, client, model, "tools")
+			}
 			if v, ok := minVRAM[model]; ok {
 				skipUnderMinVRAM(t, v)
 			}

-			if err := PullIfMissing(ctx, client, model); err != nil {
-				t.Fatalf("pull failed %s", err)
-			}
+			pullOrSkip(ctx, t, client, model)

 			tools := []api.Tool{
 				{
--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -18,6 +18,7 @@ import (
 	"os/exec"
 	"path/filepath"
 	"runtime"
+	"slices"
 	"strconv"
 	"strings"
 	"sync"
@@ -26,11 +27,17 @@ import (

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/format"
+	"github.com/ollama/ollama/types/model"
 )

 var (
 	smol   = "llama3.2:1b"
 	stream = false
+
+	// testModel is set via OLLAMA_TEST_MODEL env var. When set, all tests
+	// that loop over model lists will test only this model, and smol is
+	// also overridden to use it.
+	testModel string
 )

 var (
@@ -288,23 +295,60 @@ var (

 	rainbowPrompt    = "how do rainbows form? Be brief but factual in your reply"
 	rainbowFollowups = []string{
-		"Explain the physics involved in them.  Be breif in your reply",
-		"Explain the chemistry involved in them.  Be breif in your reply",
+		"Explain the physics involved in them.  Be brief in your reply",
+		"Explain the chemistry involved in them.  Be brief in your reply",
 		"What are common myths related to them? Be brief in your reply",
-		"Can they form if there is no rain?  Be breif in your reply",
-		"Can they form if there are no clouds?  Be breif in your reply",
+		"Can they form if there is no rain?  Be brief in your reply",
+		"Can they form if there are no clouds?  Be brief in your reply",
 		"Do they happen on other planets? Be brief in your reply",
 	}
-	rainbowExpected = []string{"water", "droplet", "mist", "glow", "refract", "reflect", "scatter", "particles", "wave", "color", "spectrum", "raindrop", "atmosphere", "frequency", "shower", "sky", "shimmer", "light", "storm", "sunny", "sunburst", "phenomenon", "mars", "venus", "jupiter"}
+	rainbowExpected = []string{"water", "droplet", "mist", "glow", "refract", "reflect", "scatter", "particles", "wave", "color", "spectrum", "raindrop", "atmosphere", "frequency", "shower", "sky", "shimmer", "light", "storm", "sunny", "sunburst", "phenomenon", "mars", "venus", "jupiter", "rain", "sun", "rainbow", "optical", "gold", "cloud", "planet", "prism", "fog", "ice"}
 )

 func init() {
 	logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug}))
 	slog.SetDefault(logger)
-	custom := os.Getenv("OLLAMA_TEST_DEFAULT_MODEL")
-	if custom != "" {
-		slog.Info("setting default test model to " + custom)
-		smol = custom
+
+	testModel = os.Getenv("OLLAMA_TEST_MODEL")
+	if testModel != "" {
+		slog.Info("test model override", "model", testModel)
+		smol = testModel
+	}
+}
+
+// testModels returns the override model as a single-element slice when
+// OLLAMA_TEST_MODEL is set, otherwise returns the provided default list.
+func testModels(defaults []string) []string {
+	if testModel != "" {
+		return []string{testModel}
+	}
+	return defaults
+}
+
+// requireCapability skips the test if the model does not advertise the
+// given capability. It queries the server via Show and caches nothing —
+// call it once per subtest. For local-only models where Show may not
+// return capabilities (e.g. models created via ollama create), this is
+// a best-effort check.
+func requireCapability(ctx context.Context, t *testing.T, client *api.Client, modelName string, cap model.Capability) {
+	t.Helper()
+	resp, err := client.Show(ctx, &api.ShowRequest{Name: modelName})
+	if err != nil {
+		t.Fatalf("failed to show model %s: %v", modelName, err)
+	}
+	if len(resp.Capabilities) > 0 && !slices.Contains(resp.Capabilities, cap) {
+		t.Skipf("model %s does not have capability %q (has %v)", modelName, cap, resp.Capabilities)
+	}
+}
+
+// pullOrSkip pulls a model if it isn't already present locally. If the
+// pull fails (e.g. model not in registry), the test is skipped instead
+// of failed. PullIfMissing already checks Show first, so local-only
+// models that exist will return immediately without hitting the registry.
+func pullOrSkip(ctx context.Context, t *testing.T, client *api.Client, modelName string) {
+	t.Helper()
+	if err := PullIfMissing(ctx, client, modelName); err != nil {
+		t.Skipf("model %s not available: %v", modelName, err)
 	}
 }

@@ -540,9 +584,7 @@ func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, strin
 func ChatTestHelper(ctx context.Context, t *testing.T, req api.ChatRequest, anyResp []string) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
-		t.Fatal(err)
-	}
+	pullOrSkip(ctx, t, client, req.Model)
 	DoChat(ctx, t, client, req, anyResp, 30*time.Second, 10*time.Second)
 }

--- a/integration/vision_test.go
+++ b/integration/vision_test.go
@@ -0,0 +1,349 @@
+//go:build integration
+
+package integration
+
+import (
+	"context"
+	"encoding/base64"
+	"testing"
+	"time"
+
+	"github.com/ollama/ollama/api"
+)
+
+// Default set of vision models to test. When OLLAMA_TEST_MODEL is set,
+// only that model is tested (with a capability check for vision).
+var defaultVisionModels = []string{
+	"gemma3",
+	"llama3.2-vision",
+	"qwen2.5vl",
+	"qwen3-vl:8b",
+}
+
+// decodeTestImages returns the two test images (Abbey Road llamas, docs llamas).
+func decodeTestImages(t *testing.T) (abbeyRoad, docs api.ImageData) {
+	t.Helper()
+	var err error
+	abbeyRoad, err = base64.StdEncoding.DecodeString(imageEncoding)
+	if err != nil {
+		t.Fatalf("decode abbey road image: %v", err)
+	}
+	docs, err = base64.StdEncoding.DecodeString(imageEncodingDocs)
+	if err != nil {
+		t.Fatalf("decode docs image: %v", err)
+	}
+	return
+}
+
+// setupVisionModel pulls the model, preloads it, and skips if not GPU-loaded.
+func setupVisionModel(ctx context.Context, t *testing.T, client *api.Client, model string) {
+	t.Helper()
+	if testModel != "" {
+		requireCapability(ctx, t, client, model, "vision")
+	}
+	pullOrSkip(ctx, t, client, model)
+	err := client.Generate(ctx, &api.GenerateRequest{Model: model}, func(response api.GenerateResponse) error { return nil })
+	if err != nil {
+		t.Fatalf("failed to load model %s: %s", model, err)
+	}
+	skipIfNotGPULoaded(ctx, t, client, model, 80)
+}
+
+// TestVisionMultiTurn sends an image, gets a response, then asks follow-up
+// questions about the same image. This verifies that the KV cache correctly
+// handles cached image tokens across turns.
+func TestVisionMultiTurn(t *testing.T) {
+	skipUnderMinVRAM(t, 6)
+
+	// Models that fail on multi-turn detail questions (e.g. misidentifying objects).
+	skipModels := map[string]string{
+		"gemma3":          "misidentifies briefcase as smartphone on turn 3",
+		"llama3.2-vision": "miscounts animals (says 3 instead of 4) on turn 2",
+	}
+
+	for _, model := range testModels(defaultVisionModels) {
+		t.Run(model, func(t *testing.T) {
+			if reason, ok := skipModels[model]; ok && testModel == "" {
+				t.Skipf("skipping: %s", reason)
+			}
+			ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
+			defer cancel()
+			client, _, cleanup := InitServerConnection(ctx, t)
+			defer cleanup()
+
+			setupVisionModel(ctx, t, client, model)
+			abbeyRoad, _ := decodeTestImages(t)
+
+			// Turn 1: describe the image
+			req := api.ChatRequest{
+				Model: model,
+				Messages: []api.Message{
+					{
+						Role:    "user",
+						Content: "Describe this image briefly.",
+						Images:  []api.ImageData{abbeyRoad},
+					},
+				},
+				Stream: &stream,
+				Options: map[string]any{"temperature": 0.0, "seed": 42},
+			}
+			resp1 := DoChat(ctx, t, client, req, []string{
+				"llama", "cross", "walk", "road", "animal", "cartoon",
+			}, 120*time.Second, 30*time.Second)
+			if resp1 == nil {
+				t.Fatal("no response from turn 1")
+			}
+
+			// Turn 2: follow-up about count
+			req.Messages = append(req.Messages,
+				*resp1,
+				api.Message{Role: "user", Content: "How many animals are in the image?"},
+			)
+			resp2 := DoChat(ctx, t, client, req, []string{
+				"four", "4",
+			}, 60*time.Second, 30*time.Second)
+			if resp2 == nil {
+				t.Fatal("no response from turn 2")
+			}
+
+			// Turn 3: follow-up about specific detail
+			req.Messages = append(req.Messages,
+				*resp2,
+				api.Message{Role: "user", Content: "Is any animal carrying something? What is it?"},
+			)
+			DoChat(ctx, t, client, req, []string{
+				"briefcase", "suitcase", "bag", "case", "luggage",
+			}, 60*time.Second, 30*time.Second)
+		})
+	}
+}
+
+// TestVisionObjectCounting asks the model to count objects in an image.
+func TestVisionObjectCounting(t *testing.T) {
+	skipUnderMinVRAM(t, 6)
+
+	skipModels := map[string]string{
+		"llama3.2-vision": "consistently miscounts (says 3 instead of 4)",
+	}
+
+	for _, model := range testModels(defaultVisionModels) {
+		t.Run(model, func(t *testing.T) {
+			if reason, ok := skipModels[model]; ok && testModel == "" {
+				t.Skipf("skipping: %s", reason)
+			}
+			ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
+			defer cancel()
+			client, _, cleanup := InitServerConnection(ctx, t)
+			defer cleanup()
+
+			setupVisionModel(ctx, t, client, model)
+			_, docs := decodeTestImages(t)
+
+			req := api.ChatRequest{
+				Model: model,
+				Messages: []api.Message{
+					{
+						Role:    "user",
+						Content: "How many animals are shown in this image? Answer with just the number.",
+						Images:  []api.ImageData{docs},
+					},
+				},
+				Stream: &stream,
+				Options: map[string]any{"temperature": 0.0, "seed": 42},
+			}
+			DoChat(ctx, t, client, req, []string{"4", "four"}, 120*time.Second, 30*time.Second)
+		})
+	}
+}
+
+// TestVisionSceneUnderstanding tests whether the model can identify
+// cultural references and scene context from an image.
+func TestVisionSceneUnderstanding(t *testing.T) {
+	skipUnderMinVRAM(t, 6)
+
+	// Models known to be too small or not capable enough for cultural reference detection.
+	skipModels := map[string]string{
+		"llama3.2-vision": "3B model lacks cultural reference knowledge",
+		"minicpm-v":       "too small for cultural reference detection",
+	}
+
+	for _, model := range testModels(defaultVisionModels) {
+		t.Run(model, func(t *testing.T) {
+			if reason, ok := skipModels[model]; ok && testModel == "" {
+				t.Skipf("skipping: %s", reason)
+			}
+			ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
+			defer cancel()
+			client, _, cleanup := InitServerConnection(ctx, t)
+			defer cleanup()
+
+			setupVisionModel(ctx, t, client, model)
+			abbeyRoad, _ := decodeTestImages(t)
+
+			req := api.ChatRequest{
+				Model: model,
+				Messages: []api.Message{
+					{
+						Role:    "user",
+						Content: "What famous image or album cover is this a parody of?",
+						Images:  []api.ImageData{abbeyRoad},
+					},
+				},
+				Stream: &stream,
+				Options: map[string]any{"temperature": 0.0, "seed": 42},
+			}
+			DoChat(ctx, t, client, req, []string{
+				"abbey road", "beatles", "abbey",
+			}, 120*time.Second, 30*time.Second)
+		})
+	}
+}
+
+// TestVisionSpatialReasoning tests the model's ability to identify
+// objects based on their spatial position in the image.
+func TestVisionSpatialReasoning(t *testing.T) {
+	skipUnderMinVRAM(t, 6)
+
+	for _, model := range testModels(defaultVisionModels) {
+		t.Run(model, func(t *testing.T) {
+			ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
+			defer cancel()
+			client, _, cleanup := InitServerConnection(ctx, t)
+			defer cleanup()
+
+			setupVisionModel(ctx, t, client, model)
+			_, docs := decodeTestImages(t)
+
+			// The docs image has: leftmost llama on laptop with glasses,
+			// rightmost llama sleeping.
+			req := api.ChatRequest{
+				Model: model,
+				Messages: []api.Message{
+					{
+						Role:    "user",
+						Content: "What is the animal on the far left doing in this image?",
+						Images:  []api.ImageData{docs},
+					},
+				},
+				Stream: &stream,
+				Options: map[string]any{"temperature": 0.0, "seed": 42},
+			}
+			DoChat(ctx, t, client, req, []string{
+				"laptop", "computer", "typing", "working",
+			}, 120*time.Second, 30*time.Second)
+		})
+	}
+}
+
+// TestVisionDetailRecognition tests whether the model can identify
+// small details like accessories in an image.
+func TestVisionDetailRecognition(t *testing.T) {
+	skipUnderMinVRAM(t, 6)
+
+	for _, model := range testModels(defaultVisionModels) {
+		t.Run(model, func(t *testing.T) {
+			ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
+			defer cancel()
+			client, _, cleanup := InitServerConnection(ctx, t)
+			defer cleanup()
+
+			setupVisionModel(ctx, t, client, model)
+			_, docs := decodeTestImages(t)
+
+			req := api.ChatRequest{
+				Model: model,
+				Messages: []api.Message{
+					{
+						Role:    "user",
+						Content: "Are any of the animals wearing glasses? Describe what you see.",
+						Images:  []api.ImageData{docs},
+					},
+				},
+				Stream: &stream,
+				Options: map[string]any{"temperature": 0.0, "seed": 42},
+			}
+			DoChat(ctx, t, client, req, []string{
+				"glasses", "spectacles", "eyeglasses",
+			}, 120*time.Second, 30*time.Second)
+		})
+	}
+}
+
+// TestVisionMultiImage sends two images in a single message and asks
+// the model to compare and contrast them. This exercises multi-image
+// encoding and cross-image reasoning.
+func TestVisionMultiImage(t *testing.T) {
+	skipUnderMinVRAM(t, 6)
+
+	// Multi-image support varies across models.
+	skipModels := map[string]string{
+		"llama3.2-vision": "does not support multi-image input",
+	}
+
+	for _, model := range testModels(defaultVisionModels) {
+		t.Run(model, func(t *testing.T) {
+			if reason, ok := skipModels[model]; ok && testModel == "" {
+				t.Skipf("skipping: %s", reason)
+			}
+			ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
+			defer cancel()
+			client, _, cleanup := InitServerConnection(ctx, t)
+			defer cleanup()
+
+			setupVisionModel(ctx, t, client, model)
+			abbeyRoad, docs := decodeTestImages(t)
+
+			req := api.ChatRequest{
+				Model: model,
+				Messages: []api.Message{
+					{
+						Role:    "user",
+						Content: "I'm showing you two images. What do they have in common, and how are they different?",
+						Images:  []api.ImageData{abbeyRoad, docs},
+					},
+				},
+				Stream: &stream,
+				Options: map[string]any{"temperature": 0.0, "seed": 42},
+			}
+			// Both images feature cartoon llamas/alpacas — the model should
+			// note the common subject and the different settings.
+			DoChat(ctx, t, client, req, []string{
+				"llama", "alpaca", "animal", "cartoon",
+			}, 120*time.Second, 30*time.Second)
+		})
+	}
+}
+
+// TestVisionOCR tests text extraction from an image. The docs image
+// contains the text "Ollama's documentation" in a header.
+func TestVisionOCR(t *testing.T) {
+	skipUnderMinVRAM(t, 6)
+
+	for _, model := range testModels(defaultVisionModels) {
+		t.Run(model, func(t *testing.T) {
+			ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
+			defer cancel()
+			client, _, cleanup := InitServerConnection(ctx, t)
+			defer cleanup()
+
+			setupVisionModel(ctx, t, client, model)
+			_, docs := decodeTestImages(t)
+
+			req := api.ChatRequest{
+				Model: model,
+				Messages: []api.Message{
+					{
+						Role:    "user",
+						Content: "What text appears in this image? Read all visible text.",
+						Images:  []api.ImageData{docs},
+					},
+				},
+				Stream: &stream,
+				Options: map[string]any{"temperature": 0.0, "seed": 42},
+			}
+			DoChat(ctx, t, client, req, []string{
+				"ollama", "documentation",
+			}, 120*time.Second, 30*time.Second)
+		})
+	}
+}
--- a/integration/vision_test_data_test.go
+++ b/integration/vision_test_data_test.go
@@ -0,0 +1,385 @@
+//go:build integration
+
+package integration
+
+// imageEncodingDocs is a 400x250 PNG of four cartoon llamas at a desk.
+// One is on a laptop wearing glasses, one writing, one reading, one sleeping.
+// The header text reads "Ollama's documentation".
+const imageEncodingDocs = `iVBORw0KGgoAAAANSUhEUgAAAZAAAAD6CAYAAACPpxFEAAAKtmlDQ1BJQ0MgUHJvZmlsZQAASImVlwdQk9kWx+/3pYeElhCKlNA70gkgJYQWQEE62AhJgEAI
+MQUFO7K4ghUVEVQWdFVAwUYRO6LYFsWGfUEWEWVdLNhQeR8wBHffvPfmnZk75zfnO/fcc+98d+Z/ASCbcMRiIawKQKZIJokM8qPHJyTScS8BDDQBHlAAicOV
+ipkREWEAsUn/d/twD0Bj/rbtWK1///5fTY3Hl3IBgCIQTuZJuZkIHwfI8lyxRAYACmFgvEgmHuP7CFMlSIMID45x6jijx+pQkyeYOp4THclC2AIAPInDkaQC
+QHJG4vRsbipShxSNsL2IJxAhnI+wd2ZmFg/hNoQtkBwxwmP1Gck/1En9W81kRU0OJ1XBE3sZN7y/QCoWcnL+z+P435YplE+uYY4MUpokOBLxusi5/ZGRFapg
+UfKs8EkW8MbzxzlNHhwzyVwpK3GSpcIo9iTzOP6hijrCWWGTnCIIVOQIZOzoSeZLA6ImWZIVqVg3RcJiTjJHMtWDPCNGEU/jsxX1c9Oi4yY5WxA7S9FbRlTo
+VA5LEZfIIxV74YuC/KbWDVScQ6b0h70L2Iq5srToYMU5cKb654uYUzWl8YreeHz/gKmcGEW+WOanWEssjFDk84VBirg0O0oxV4b8nFNzIxRnmM4JiZhkEAVk
+QA54QACyAB34I14KxEAIOCBHxl8sG9sQK0ucIxGkpsnoTOTW8elsEdfOhu5o7+gKwNgdnvhF3tHG7yZEuzoVW10NgNeJ0dHRk1OxkJsAHEkCgNgwFbOYB4Bq
+PwCXT3HlkuyJ2PhdwwAiUAFUoA30gTGwALbAEbgCT+ALAkAICAfRIAHMB1yQBjKBBCwCS8EqUACKwCawDZSBCrAHHACHwFHQBE6B8+ASuAZugrvgEegGfeAV
+GAIfwAgEQTiIDFEgbcgAMoWsIUeIAXlDAVAYFAklQElQKiSC5NBSaDVUBBVDZVAlVA0dgU5A56ErUCf0AOqBBqC30BcYBZNgKqwHm8HTYQbMhEPhaHgenAov
+hHPhfHgDXApXwQfhRvg8fA2+C3fDr+BhFEApoWgoQ5QtioFiocJRiagUlAS1HFWIKkFVoepQLah21G1UN2oQ9RmNRVPQdLQt2hMdjI5Bc9EL0cvR69Bl6APo
+RnQb+ja6Bz2E/o4hY3Qx1hgPDBsTj0nFLMIUYEow+zANmIuYu5g+zAcsFkvDmmPdsMHYBGw6dgl2HXYXth57DtuJ7cUO43A4bZw1zgsXjuPgZLgC3A7cQdxZ
+3C1cH+4TXglvgHfEB+IT8SJ8Hr4EX4M/g7+F78ePEFQJpgQPQjiBR8ghbCTsJbQQbhD6CCNENaI50YsYTUwnriKWEuuIF4mPie+UlJSMlNyVZisJlFYqlSod
+Vrqs1KP0maROsiKxSHNJctIG0n7SOdID0jsymWxG9iUnkmXkDeRq8gXyU/InZYqynTJbmae8QrlcuVH5lvJrFYKKqQpTZb5KrkqJyjGVGyqDqgRVM1WWKkd1
+uWq56gnVLtVhNYqag1q4WqbaOrUatStqL9Rx6mbqAeo89Xz1PeoX1HspKIoxhUXhUlZT9lIuUvqoWKo5lU1NpxZRD1E7qEMa6hrOGrEaizXKNU5rdNNQNDMa
+myakbaQdpd2jfdHU02Rq8jXXatZp3tL8qDVNy1eLr1WoVa91V+uLNl07QDtDe7N2k/YTHbSOlc5snUU6u3Uu6gxOo07znMadVjjt6LSHurCulW6k7hLdPbrX
+dYf19PWC9MR6O/Qu6A3q0/R99dP1t+qf0R8woBh4GwgMthqcNXhJ16Az6UJ6Kb2NPmSoaxhsKDesNOwwHDEyN4oxyjOqN3piTDRmGKcYbzVuNR4yMTCZabLU
+pNbkoSnBlGGaZrrdtN30o5m5WZzZGrMmsxfmWuZs81zzWvPHFmQLH4uFFlUWdyyxlgzLDMtdljetYCsXqzSrcqsb1rC1q7XAepd1pw3Gxt1GZFNl02VLsmXa
+ZtvW2vbY0ezC7PLsmuxeTzeZnjh98/T26d/tXeyF9nvtHzmoO4Q45Dm0OLx1tHLkOpY73nEiOwU6rXBqdnrjbO3Md97tfN+F4jLTZY1Lq8s3VzdXiWud64Cb
+iVuS2063LgaVEcFYx7jsjnH3c1/hfsr9s4erh8zjqMdfnraeGZ41ni9mmM/gz9g7o9fLyIvjVenV7U33TvL+xbvbx9CH41Pl88zX2Jfnu8+3n2nJTGceZL72
+s/eT+DX4fWR5sJaxzvmj/IP8C/07AtQDYgLKAp4GGgWmBtYGDgW5BC0JOheMCQ4N3hzcxdZjc9nV7KEQt5BlIW2hpNCo0LLQZ2FWYZKwlpnwzJCZW2Y+nmU6
+SzSrKRyEs8O3hD+JMI9YGHFyNnZ2xOzy2c8jHSKXRrZHUaIWRNVEfYj2i94Y/SjGIkYe0xqrEjs3tjr2Y5x/XHFcd/z0+GXx1xJ0EgQJzYm4xNjEfYnDcwLm
+bJvTN9dlbsHce/PM5y2ed2W+znzh/NMLVBZwFhxLwiTFJdUkfeWEc6o4w8ns5J3JQ1wWdzv3Fc+Xt5U3wPfiF/P7U7xSilNepHqlbkkdSPNJK0kbFLAEZYI3
+6cHpFekfM8Iz9meMCuOE9Zn4zKTMEyJ1UYaoLUs/a3FWp9haXCDuXuixcNvCIUmoZJ8Uks6TNsuoiFi6LreQ/yTvyfbOLs/+tCh20bHFaotFi6/nWOWszenP
+Dcz9dQl6CXdJ61LDpauW9ixjLqtcDi1PXt66wnhF/oq+lUErD6wirspY9VuefV5x3vvVcatb8vXyV+b3/hT0U22BcoGkoGuN55qKn9E/C37uWOu0dsfa74W8
+wqtF9kUlRV/XcdddXe+wvnT96IaUDR0bXTfu3oTdJNp0b7PP5gPFasW5xb1bZm5p3ErfWrj1/bYF266UOJdUbCdul2/vLg0rbd5hsmPTjq9laWV3y/3K63fq
+7ly78+Mu3q5bu31311XoVRRVfPlF8Mv9yqDKxiqzqpI92D3Ze57vjd3b/ivj1+p9OvuK9n3bL9rffSDyQFu1W3V1jW7Nxlq4Vl47cHDuwZuH/A8119nWVdbT
+6osOg8Pywy+PJB25dzT0aOsxxrG646bHdzZQGgobocacxqGmtKbu5oTmzhMhJ1pbPFsaTtqd3H/K8FT5aY3TG88Qz+SfGT2be3b4nPjc4PnU872tC1ofXYi/
+cKdtdlvHxdCLly8FXrrQzmw/e9nr8qkrHldOXGVcbbrmeq3xusv1ht9cfmvocO1ovOF2o/mm+82WzhmdZ2753Dp/2//2pTvsO9fuzrrbeS/m3v2uuV3d93n3
+XzwQPnjzMPvhyKOVjzGPC5+oPil5qvu06nfL3+u7XbtP9/j3XH8W9exRL7f31R/SP7725T8nPy/pN+ivfuH44tRA4MDNl3Ne9r0SvxoZLPhT7c+dry1eH//L
+96/rQ/FDfW8kb0bfrnun/W7/e+f3rcMRw08/ZH4Y+Vj4SfvTgc+Mz+1f4r70jyz6ivta+s3yW8v30O+PRzNHR8UcCWdcCqCQAaekAPB2PwDkBAAoiIYgzpnQ
+2OMGTbwLxgn8J57Q4eOGKJc6xI3JI9Y5AA4jw2wlACq+AIxJo2hfADs5KcakHh7X7mOGRV4xdR5d60kXntpUg3/ahK7/oe9/eqCo+jf/LwkHEGPG+ODYAAAA
+imVYSWZNTQAqAAAACAAEARoABQAAAAEAAAA+ARsABQAAAAEAAABGASgAAwAAAAEAAgAAh2kABAAAAAEAAABOAAAAAAAAAJAAAAABAAAAkAAAAAEAA5KGAAcA
+AAASAAAAeKACAAQAAAABAAABkKADAAQAAAABAAAA+gAAAABBU0NJSQAAAFNjcmVlbnNob3T1Q1G8AAAACXBIWXMAABYlAAAWJQFJUiTwAAACqGlUWHRYTUw6
+Y29tLmFkb2JlLnhtcAAAAAAAPHg6eG1wbWV0YSB4bWxuczp4PSJhZG9iZTpuczptZXRhLyIgeDp4bXB0az0iWE1QIENvcmUgNi4wLjAiPgogICA8cmRmOlJE
+RiB4bWxuczpyZGY9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkvMDIvMjItcmRmLXN5bnRheC1ucyMiPgogICAgICA8cmRmOkRlc2NyaXB0aW9uIHJkZjphYm91
+dD0iIgogICAgICAgICAgICB4bWxuczp0aWZmPSJodHRwOi8vbnMuYWRvYmUuY29tL3RpZmYvMS4wLyIKICAgICAgICAgICAgeG1sbnM6ZXhpZj0iaHR0cDov
+L25zLmFkb2JlLmNvbS9leGlmLzEuMC8iPgogICAgICAgICA8dGlmZjpZUmVzb2x1dGlvbj4xNDQ8L3RpZmY6WVJlc29sdXRpb24+CiAgICAgICAgIDx0aWZm
+OlhSZXNvbHV0aW9uPjE0NDwvdGlmZjpYUmVzb2x1dGlvbj4KICAgICAgICAgPHRpZmY6UmVzb2x1dGlvblVuaXQ+MjwvdGlmZjpSZXNvbHV0aW9uVW5pdD4K
+ICAgICAgICAgPGV4aWY6UGl4ZWxZRGltZW5zaW9uPjc0NjwvZXhpZjpQaXhlbFlEaW1lbnNpb24+CiAgICAgICAgIDxleGlmOlVzZXJDb21tZW50PlNjcmVl
+bnNob3Q8L2V4aWY6VXNlckNvbW1lbnQ+CiAgICAgICAgIDxleGlmOlBpeGVsWERpbWVuc2lvbj4xMTk0PC9leGlmOlBpeGVsWERpbWVuc2lvbj4KICAgICAg
+PC9yZGY6RGVzY3JpcHRpb24+CiAgIDwvcmRmOlJERj4KPC94OnhtcG1ldGE+Cts1PlUAAEAASURBVHgB7d0JvH3XeDfwHWKepxpiiHlMEGNMiaEorVkjLRKi
+qHmoWSXkRVExV2poQlBSQhGqSkJFqBBDRYyJMeYppiLZ7/Nd8hz7nv8Z9j333HvPuXc9n8+9Z9p77bV+a3jG9aydmqb5ZfxVqghUBCoCFYGKwKoQ2Cmubld1
+R724IlARqAhUBCoCgcDZKgoVgYpARaAiUBGYBYHKQGZBrd5TEagIVAQqAs3OFYOKQEWgIrCdENh5550bf2c729manXZixa/Utm1zxhlnNL/73e+aM888szcg
+1QfSG6p6YUWgIrDMCJz97Gdvzn3uc5cmWCgtmBbO7U6YqD/4nOMc5yi4/OY3v+mFTWUg23301PZXBLYBAjQOzOP//u//ipS9DZo8UxMxknOd61xFQ/vlL385
+lYlUBjITzPWmikBFYFkQYKo63/nO1/zqV78q0vWy1Hsz63nOc56zaCOYyCQ6e/x40KQLRv12qUtdqrn61a/eXOMa12gudrGLlUt+8Ytf7HCpSlzmMpdpLnzh
+Cze//e1vm9///veNe91DXfr1r3+9wz2L8oU6/smf/Emxk1Ln1oMueMELNpe85CWb85///E0fbr8edahl/gGBvfbaq/H3ta99rYzVisvWQeC85z1v0TysP5X6
+IcC8l34i7ycRI2Cvv6tc5Srti170ovZTn/pUe8opp7SnnXZaefX5JS95SXu1q11tRTm77757+9nPfrY96aST2lvd6lblt3/7t39rY5K2Bx988Ipr+9Zho657
+1rOe1X7zm99sn/jEJ65bPf/mb/6m/cpXvtJ++MMfbi90oQut23M2CrP1fk4sBO0FLnCBNkwRM2EV6nkbkmgpI9T0QRm77rpre/LJJ7c/+9nP2r/9278dfL/e
+7anl91t31oJTLIKlz9dSxna9NzS3qdj1DuPdc889mze/+c3NX//1XzeXvvSlm9NPP7358pe/XF593nfffZt//dd/bW5xi1sE3n8gWgYp2x9uhkjbsVgOnFl/
+uHLx/nOuqed6OtnYGj0jFsUaDTJlCHDu/fM//3Pz0Y9+tHnKU54y5erRP8PZGD3uuOOaYN6Di2jPX//615sf/vCHzbe+9a3B9/XN8iNg3eEwXw0xeRkr5uZ2
+JtFY1j/r+DjqFcbLzPKCF7yg2WWXXZof//jHzUtf+tLm3//935uf//znhTn8xV/8RfPoRz+6mKue//znN/e85z2b0E7KM1UiK+KL4c/jKrbZ3zMpUd1Gmebm
+VTedk3jMq8ytWo5F4OIXv3hzuctdroy5WdrJjGosX/ayly028SwD4zBmLRjf/va38+v6ugUQwAz6mK7CWtLc/OY3L8Ix8zVB1/wM60rzspe9bF0FyUWG2fqE
+gYwzY/ViIPe73/2aq171qsWOGKan5o1vfOOgzTSRQw89tAn1v3ne857XXPnKV27uf//7l/eDi3q84Ru52c1u1lzhClcoi+pXv/rV5iMf+Ujz05/+tNytQ2k3
+JNGPfexjpUPvfOc7l0n/6U9/ugkzULmOv+U2t7lNE2aJIk3+53/+56CMrMZFL3rRMljUVdTBqaeeWp71gx/8IC8pDje/cbx1yQKmnhYhC9LnP//5ItGK7phE
+17rWtcozRYJ87nOfa4499tixnaKc6173us0NbnCD4j9Sr//5n/9pvvjFL458hOuuf/3rFyy+973vFSmdLR9pg/qaFN/4xjcaWCXRgGDKwfi///u/xf5vElmo
+3f+FL3yhueMd71j63r2w1N8mZZgkm912262MiQ984AON/homk/JGN7pRqdd3v/vdghNJPwn+1772tYv/54Mf/GDxq/FDqJdnwwjjoOHe+MY3LpOa34wgo+/1
+G/wRSfMmN7lJ45na8/3vf7/5+Mc/PsDMeLjpTW/a8MvxaYU5trnTne7UhOmqCVNluddvyunWkR8MJsal/lYvWlD2N3xp55e4xCWaMOs2YbJtbn3rW5f+I3x8
+4hOfaD7zmc9kk+vrJiAwzYpgHIapuvnQhz7UHHPMMWUt+8lPflIWTn1vTfF5Ellk7373u5cxZozSdMN0P+mWpfhtGnYaMdHmy1b8H//xH20sAO273vWuNhbw
+kdfHxGvf+c53lutioSnXxSJY/B2x+LQxqcp9Rx99dBsLYhuayqCcP/uzP2tjgWxDEmyDYbTRWaWc6Mw2FsdyHf8Kv0sskO3f/d3ftbHgFJu1ssLs0D7+8Y9v
+Y5KX75WhLHX2vFhwBs+KAdH+93//dxsLTHmWa5V5/PHHD+oIkwMOOKANDav9y7/8y8G99773vdtYDNof/ehHbWhipZ78QPw6wVgG1w1j+pCHPKT90pe+VJ6p
+beoWGl37mMc8ptTxk5/8ZBuDtNzPxv/CF76w4JbP8LxgHu3Tnva0Nha5wXMucpGLtK94xSvaWEhLfbTFtbGotg996EPLdeyY73nPe9pY+NuXv/zlg3vVMRa9
+0h6/PfzhDy+/vfe97y3tDnNRG5JXaaMyYRQmzDYW1OLvSgx8HwyxDaY9KPs85zlP++xnP7u0QXvVS1uCSbUPeMADBtc96lGPKvXS9zDiD3KtPv3Od77TvuY1
+ryntvetd71qui4W9DUZV+jsW8IKhdoRW0b7+9a9vQ3so9U0cYPbYxz62PE/5MbELVspwrc+PfOQji//Jte7T79l/d7vb3cq4zLGiLe57+9vf3kYASbkuFo72
+3e9+d6nfq1/96vY5z3lO6QPXus+z+LqyzPo6eb2ZNz78ZvpoUrm3v/3t27//+78v11hHDjvssPZ1r3td+Xvta1/bvuENb2iNVWvcuHIe8YhHtG95y1vKPPir
+v/qrMiaud73rDa43v7u+u5zH5nuuqX43X/MZPoeAMvic1/HjWZfzOq8hYJe/7neu8b3v8nl5red2rx33Xhnde4evm6qBkMBIfCQtkv84eyI1MRbhIimSzmNx
+K5pEPHAiXelKV2pe/OIXFwmZFkHKZX/cZ599GlL7P/zDPzR3uctdirRO6sMRY5IXSS8WxOaWt7xlo4xYmJo73OEO5XcqJ0k0OrBIwLF4NM94xjOK+YL5zfXa
+EhO/SKT3ute9iub03Oc+t/nzP//zYguPhbRIorQFdOW456CDDirtYr5729veVt4/7GEPa4IBFnMeuzqVr0skdTZ7mhNJ9/3vf3+Rckn2JOGUZN0D4xjIRYMj
+vZJiSLykeFpVDOKiEUUgQ5GOaIPB4IpWEIO8+KRI6iRg5cQi3ARTL5KzZw33HSw932+p5pOymSbVO5hDEwyqCeZdJHSSdizqRfvij6CpqBft5slPfnLBlMYW
+k7B58IMfXKQ2fUFzcm8wgiaCE0q5wdTKM13PdPS4xz2uaBzqTGPyTKbRY0MLoUlEkEbBmQmK1nD44YcXTYA2pF9If7SmmMSl/25729sWzSGEjWKGOOGEExrj
+RV95HtMEbSyYd+mPxCFVdZpZMPKizdAgjAe+PM8hlSrLGGXShRkN3FjUnle96lVFi7rd7W7X0HaDiRXJNhhkd2jU9wuCgHGub2nxIkuNmRwH5oi5ayzQWo29
+YbLWGVdPeMITBlqHcUETdi/z/nWuc50yv1lrrJPGrDm+a2jGrnn605/eGLOxWJc55nqWHGtH1sX8V09jytyzXlmf7nvf+5axR3v+r//6r+ZNb3pTWf+e9KQn
+lfEZwl4Zl9wLNHfmWmQd87dWmsiJYiEu0jNpfv/995947X777Vcis0iS17zmNdswT0zVQAK0oiXg9CmFR4PaaGSR9mgdyqJdiJQhmZIOkitGOHGJ9CIZhsmr
+jcWs1BGXpgml5qRM2gwtiUTR1Ur23nvvlnSrjDChjGxjLGbl2doW5rzBNeofndDGojEyYoEkT0oXjbbHHnsM7qMJkXppZzQQ9YUXiVUbaRvq7I9UEoyvtCUW
+vTYGT2kLzUOdaWR5LSkJPjE423e84x1FanpHSMykehF0eZ1X5ZD+/UZC9522eL46XfGKVyzfwZqW5Vlw6mJE06CF0bD0EVxF3bnWeOg+L0ycRfsiwZOsRDzp
+n9Qg89owWRWNEG6k+vyeNknzoaHldyQpY4cWQJPtfh/O8tK2jPgj0YX5qWgGMdkH14b5tPSPumSdjREaRDCw1u9ZbjCF0kd+e9CDHlS+P+qoo0p7aX7BwAfX
+BrMpeCnXeM4y6uvGaSF9NBCa+D/90z+1YdIcaLXDfWT+3fCGNxzZh9YgYyCl/e69tFjjMxb+NoSoMo+8N75oOyHMFC1Y1GeYhMvcdT+NhnbcLeuQQw4p9XMP
+awgNnXYVwk6Zd9a3rId1JwSc8txnPvOZbQjprXnld+upOpuHwcBWPKP7PO/XrIH0sYHFg3Yg95GopxHbNxs64uegvYQJpHBinFcZvs8NLT6HSa1Ife4h3cZi
+VezkomtwW+R6EiapIhaOIrGTOmkzSGw4zcqzlE86x8FpP6NIUIBrAtAibZP4aQfqnvUfvk+50VmlDa7x/CR19d097nGP8hUNgJTuHk5d5SfRanwmkdMIaVf2
+4WiDenV9Usoh3YS5sUjFbLNGyGqIREQyZ9NHJOwTTzyxaDbBKIpGkOWRpmLRLdjpGxoQiSxMOKVf1NX32iBqj7bDn0Zr0b/qRwIkNSVpE8mKxOc6pE7KMa7c
+k0SKo5Ei19BQSH760bOQz8g4UAbS16NIndi8+Xc8K5hwEwxgcKk+M65oaLSUWBzKdSRHmhJfVZI+JokaZ/qt0mIiEKbLMjb0+6T1btxvxpmxZwwk0YyNMVYU
+c4lWzx+GjAX3WPtorsYUTcN6QhN2j3WD1t0lY5Ofxj1eWR+sX7SRP/3TPy11UA/zzzpi/Hkunx3txtqqjcF8SjuNa/evhUbPok6JwNUoE08FJpGJ4joLjolj
+Mk8jnUL9C7thcaia7DoC+BbDnPBZjoVo2LHtO9cNf68eKDvea0jPRTW0sFEHAe4Pc/B7XpvPy1cL6L/8y7+UxYoZgypo4bMoUhuZm3Rsl3QOc4n6hWbR/am8
+x/yyfV45i73CDhPpEtMOBzamAVdBBwZpSMI7OPgs/Ln4W2zzGd3ypr23MHcpmbmx4H1S4uazdmYbtP3wMDN1ST3UB1MwkF2PTKbsq7w++3Ja3fVXaD4NMyXT
+kslp/PhTvjE0rk/zWd1X95iQ6u++DEbIa3wn1Fe9tINA4Tuf9U+XYOP5SF9VWkwE9BMTp3VgFiLAEpiYhkJjLmMnpP7CGAhc1hzjg0BkXTO/PUvwC/NoWCbK
+d8aKRZ8pnAA8bPJUBoGYOZWA4xrBG8Y+E7731jVMA3O4d5jm33300WV91UZrinoyP7uGiV9wzFpoKgOxQAFI5UjIudgOPxQwfkds/UCymEwjdmT2RQsLv4So
+GloEO5/oLzS8AAByFA1/3/1s0cOl2actwqQCCz9QSaY6oHv9cPnuNyh0Hoano0jS7N6kUX/Z8XmveufCO2pwdiUW1+pkBMvhBaf7nev8uaf7fT53Na/j2jz8
+fX72mu89Z/h9ttcAxUBEpOQ16muSaFuYzVZoEsN1znuGvx/+bIyFA70IH2HSK9opxo4B2pukj1ZDnttlOqP6DeZIWzGcrGu+5vOGP+f39XXxEAgTVpnD5vUo
+6mq9w78bLyJQ+TEs8IQPAh9mYIzwYb7yla8sWjEh1Npo/qaWIVuHtQXxkfKr/uM//uNAwMrnEbJca0tFmJeL79hzMKmnhgbz42AO5p15xnccQTpFWCbwEDoJ
+lWEGbiLQozAf9aBNr4WmMhASp4UWMLieSWnCDhN1SuipSUMly4Vi+Lr8rJHIxkRSHGcvxyvAEWZkUVfOWidiPouWw7TBWa8d2oaYg9IMUr4Y8Q9Dw3g40Zgp
+1MnihWmELbwMPiGpXROGRQwzpLnh9sNE8kgp3G8WwJSADRAhoUmcyrQZgwiDNkhhpQ40P5pQEic605iBQw1OTKm2XSJp08K6dej+vtr3GIM2eJ5FllbWNf8o
+L01StLVJk3LUs3McZH+6hqOayo9pMKVlWK/fBEQMM+JRZbg2yfWEJvWjJek3ARNJmL6+UAeClf7IMvOa+rpYCPTtH1YC8yl8D0Urdp9+NtfMPWN7HBl31hBj
+w9hJC4Drn/rUp5bAHRpqzgdCK6ZFgLVOWMzNB9o0E+6xETwyTOqBATCFm2PGPOK8t71A2ead8gi0BHKC8v777z+waGBg1nPP6tZx+Fn5OTHIz8OvvfTqI444
+oixQpLFw7jbh4Gkuf/nLF2BVHMcU5eR3atcoBjP84PxsQuokFc3JbmETuTBv0mm5WOagwlBEAGU9xj2T3ZDdkW9h1113LXXG/am+ytKhuVBnGUwzGIr2sZfT
+qNTBQKB2+i5NN7CL8OKyeKmTyB1MC2EQEWZb8GVS4R8g3VBHDWz1t9gh/WLAGswWVkyStK9u4QQsGpNnwdgz2ErVbx6kXH4AjEv5olkwXaTNIsMMan4DAzj7
+YtqzE19YJYPNe9KE5HP2qfcPfOADS0RNanW+I4wk3uN8EurFfGYPkvEockw/KdsYIeSI1FEuX1ylxUYg15Y+tSQQRFBI0ZpTs/fKVHTggQcO/KvjyjJuSPTD
+C7O5R8BL5uF+/hDzlwkJ80CYF4uMSFFzdpgIlMqw7iTzcI0xjfmpJ+ZljjCP80daG3wv+ivJM4frmL8Nvxr3k+bpVA1EgSqLy2kYDomJmEg4qsWOecvEUzEb
+crpAmYTJGJSlQt3vbNxhC+TkOfLIIws4tB2gA0YHJuV93YWiW+a47/P51EPPoVKSKoFIq/IMphb+l+Ey8tnujaihYirBIC0wmAFp3yuu3pV+876IKS8b1vgs
+hN2Ski20tBL2TxKuZ6qDAQxj6iwzGZspTL3uGkzLAKXaesVIqMWcbxHpUZiFhZsEpI/8LtzXBBIyK3gAkxGGS5JSH8yFhmTgZru9wis/Zzumfe8eY4DPjNZD
+pRdeCF+DW/1pXBZxAon+df2oZ43qU4NYOUJ8aafS6tBwMHUTkXaFMZG4tM3ztA1DzbaYSDYJYgA2EdKG9I+JOdw+4ceeA3vl6ivM8Lph4tDfEc3SvO997xvU
+v287Es/6ujEIGGfmVleQmPRkc3jUPJ50zyy/DTvIlWH+MzuNI2O1L2FYBLa1UM4JGI6jXhqIm0Xb3Oc+9ymTn4qFcViovGYkkN9JxkkmvUlLpcpKWKh9TvMR
+NQ4wrrOwm9jKwPFJ8L53r4Uwy7IIdSnLTOkyf/MMz0o7vGglC4NFjq1TVBOTFOboOuUPl51lWZCZqjA8dkgSLjMYqVS5Ol59h4lGRnsgmcCDucxiR2OLMLoy
+sDGE1AIs8OojIoN2h0Fh0BZGmh4NKElbaBsGi75wLa0Ffuqa9k1qLz+ThdbiZwHFPGkgJB3PzwmmDd3+yWfBNrHM77zCK7FLSSXxsLtde5nTxLVbhDHhNAl5
+ZpaZ7c+ys++6mBorNBiLNQZrZ7m2i3fXP+y8GCWtTH/AUN/nWPMMe47Ui+biWlqaNsBAXRIH+GD29gppFwc9E+33Ay+4065gYpKNw8zzstzhsZntrK/ri4D+
+TEFlfZ+09tKH58DaS1xbCQRdY39SvXaKR6zafkH9Z1Yh0VKZcM6uSpXVTlOJz9Q0E5lpw6JrsXZvkgXQQsn+TOrW6WlmIEliIn43YTEAC0NSlmkB4ERKspDQ
+Kgyirkqo7qRu5VoolElqRRbZ7qKVZeWrevFPWKwsPKR+7Z9GFm6LqYXLM9VH3dRR23zOBVhZpF0pFvgoYEf6HrcIYeLq5FWbMK1RjJDWQyKHu4VXedqhTb7T
+J4mlhQ/OScr2B0t22xxU2pW+Fd/nAuw+ZiubNv2uXzyz22/MZ9qnru7ttt/YgoExo01JnscxjlHSpGgUCI6epU2w0ofj2gL3q4ZmccG4hwPSmEstOnHI53nd
+NbQZ40XbPK87lvye0VjDmGF0Wa72G5+VNh4B6w0tRP9U6oeAsWueWBO683L47pkYyHAh9XNFoCJQEVhkBAgzBEUCTAo/i1zfzawb5pHCW1cgHFWnykBGoVK/
+qwhUBLYcArRXmgitltZbGcnKLsY4WI38wWga83B3ZSArMayfKgIVgS2MAHMtk5bFElUm8ofApuxy5nTMoy8ulYEkcvW1IlAR2DYIMGfl37Zp9ISGYhiTfB3j
+bq0MZBwy9fuKQEWgIlARmIhA7zDeiaXUHysCFYGKQEVg2yFQGci26/La4IpARaAiMB8EKgOZD461lIpARaAisO0QqAxk23V5bXBFoCJQEZgPApWBzAfHWkpF
+oCJQEdh2CFQGsu26vDa4IlARqAjMB4HKQOaDYy2lIlARqAhsOwQqA9l2XV4bXBGoCFQE5oNAZSDzwbGWUhGoCFQEth0ClYFsuy6vDa4IVAQqAvNBoDKQ+eBY
+S6kIVAQqAtsOgcpAtl2X1wZXBCoCFYH5IFAZyHxwrKVUBCoCFYFth0BlINuuy2uDKwIVgYrAfBCoDGQ+ONZSKgIVgYrAtkOgMpBt1+W1wRWBikBFYD4IVAYy
+HxxrKRWBikBFYNshUBnItuvy2uCKQEWgIjAfBCoDmQ+OtZSKQEWgIrDtENh5PVt89rOffT2Lr2VXBCoCFYGKQA8EzjzzzKZt2x5Xru6SuTGQ85znPM31r3/9
+5la3ulWz++67N1e4whWaC13oQk1lIqvrkHp1RaAiUBGYJwIYx69+9avmO9/5TnPyySc3H/3oR5vjjz+++d73vrfmx+wUJayJLV3iEpdo9t9//+a+971vs9tu
+uzU77aTIShWBikBFoCKwqAicdtppzbve9a7m0EMPbU488cSZqzkzA6FZHHDAAc2Tn/zk5opXvOLMFag3VgQqAhWBisDmIEAzOeyww5qDDz54Jo1kJgZyyUte
+snnJS17S7LPPPpvT6vrUikBFoCJQEZgbAkxbD3nIQ5oPf/jDqypz1QyEtvHWt7612WOPPVb1oHpxRaAiUBGoCCwuAqeffnrzoAc9qDnyyCN7V3JVDORSl7pU
+8973vre53vWu1/sB9cKKQEWgIlARWA4EmLTuc5/7FP9Inxr3ZiDnOMc5Cme6293u1qfcek1FoCJQEagILCECorP22muv5otf/OLU2vfeSPjQhz60qcxjKp71
+gopARaAisNQI8HG//OUvb855znNObUcvDeSyl71sc8IJJzQKrlQRqAhUBCoCWx+B+93vfs0b3vCGiQ3tpYHwzlfmMRHH+mNFoCJQEdhSCDz+8Y9vzn3uc09s
+01QGYje5TYKVKgIVgYpARWD7ICBY6ja3uc3EBk9lIDe/+c2bXXfddWIh9ceKQEWgIlAR2HoI3OMe95jYqKkMZBoHmlh6/bEiUBGoCFQElhaBm93sZhPNWBMZ
+iLxW173udZe28bXiFYGKQEWgIjA7Ape//OWby13ucmMLmMhAznWuczW77LLL2JvrDxWBikBFoCKwdRE473nP21zmMpcZ28CJDEQcMCd6pYpARaAiUBHYfgiw
+Ql3wghcc2/CJDMTN9TyPsdjVHyoCFYGKwJZHYBIPmMhAtjwytYEVgYpARaAiMDMClYHMDF29sSJQEagIbG8EKgPZ3v1fW18RqAhUBGZGoDKQmaGrN1YEKgIV
+ge2NQGUg27v/a+srAhWBisDMCFQGMjN09caKQEWgIrC9EagMZHv3f219RaAiUBGYGYHKQGaGrt5YEagIVAS2NwKVgWzv/q+trwhUBCoCMyNQGcjM0NUbKwIV
+gYrA9kagMpDt3f+19RWBikBFYGYEKgOZGbp6Y0WgIlAR2N4IVAayvfu/tr4iUBGoCMyMQGUgM0NXb6wIVAQqAtsbgcpAtnf/19ZXBCoCFYGZEagMZGbo6o0V
+gYpARWB7I1AZyPbu/9r6ikBFoCIwMwKVgcwMXb2xIlARqAhsbwQqA9ne/V9bXxGoCFQEZkagMpCZoas3VgQqAhWB7Y1AZSDbu/9r6ysCFYGKwMwIVAYyM3T1
+xopARaAisL0RqAxke/d/bX1FoCJQEZgZgcpAZoau3lgRqAhUBLY3ApWBbO/+r62vCFQEKgIzI1AZyMzQ1RsrAhWBisD2RqAykO3d/7X1FYGKQEVgZgQqA5kZ
+unpjRaAiUBHY3ghUBrK9+7+2viJQEagIzIzAzjPfWW+sCFQEKgILhsBvfvOb5vvf/35z2mmnNd/+9reb78Zrs9NOzZ/92Z81V7ziFReststfncpAlr8Pawsq
+AtsegR//+MfNIx/5yOaTn/xk89Of/rRp27Y597nP3Zz/fOdrTvrCF5rHP/7xzT/+4z9ue5zmDUBlIPNGtJZXEagIbDgCO4WWcc1rXrO5yU1u0uy2227NFa5w
+heZCF7pQ86xnPrP5STCUv/zLv9zwOm2HB1YGsh16ubaxIrDFEbjIRS7SPP3pTy+tfPOb39y89KUvbX73u981Rx99dHPwwQc3N7zhDaciQGvBiCr1R6A60ftj
+Va+sCFQElgCBK1/5ys0HP/jB5thjjy1ayWGHHdZ86lOfmljzyjwmwjP2x6qBjIWm/lAR2GQEQiLmAK7UH4Ff//rXzSte8YrmUpe6VEMTue51r1sc6jSUSQTl
+ykQmITT6t8pARuNSv+0g8NnPfrZ5//vf35x++unNzW9+8+Z2t7vduqj6Jv9//Md/NCeeeGJzmctcprnDHe6wbSJnYHvSSSc1Zzvb2ZprX/vazXnPe94m2Eez
+HuzjzDPOaD4fz/r5z3/eXPWqV23+5E/+pPT2si+gxs9DHvKQ5g1veEMxZ33sYx9rXvWqVzXf/OY3m+985zvN8573vOZP//RPOyP7j28L1pVZ/xGQVbyD3ci/
+cEK13/3ud2NcVdqOCPz+979vn/nMZ7YXvOAFB+MjFrj2gQ98YPuLX/xirpBEyGV7+9vffvAcY/LSl750e/hhh831OYtYWDDN9nrXu1678847t+c85znbcAS3
+wbBLVc8888y29Tcn+vznP9/e+U53aoNBtfoyQlvbWGTnVPrmFhNMor361a/eXvjCF24vf/nLt7vvvnt7l7vcpf3bv/3b9p/+6Z/aL3/5y61rvva1r7UhFLXH
+HXdc+9GPfrT95S9/GRDPD+PNRWH+T7/b3e62Yl4O8YvRzMNFy8xA/u///q99zWte0/75n/95e+c737l9+ctfPvdF77e//W37jne8o33CE57QhgOv/dCHPjT/
+3tvEEsP5OBg4ERLZXvSiFx18fvSjHz23moXk2N4pFrUcmCERt+c4xznK53PEovqv//qvc3vWohX0kY98pCx42fZ8vcAFLtC+973vnWt1v/CFL7RXutKVBjjn
+szCS1772tXN91mYVRuDFKGIvSPu7mJ9d+pu/+Zv2Epe4RHvJS16yDQ13gPtRRx3Vvay+H0Jg2zEQC9J+++23w0SJzUTtD3/4wyF4ZvtIkiHd5CT0Snp84hOf
+2Eb0x2yFLtBdH//4x4uUql0RFlmkNRNz7733Lm3W1g9/+MNzqbHFK3HUb9/61rcKYw47dvn+cpe7XPluLg9boEJ+9atftTe72c1KG89znvO0/+///b/2oIMO
+GuBOO4DFPMicCPNNeVZEGrVh6ilSOS0P9hbUr3/96/N41MKW8T//8z/tm970pjZ8I+3b3va29rKXvWwbJtmilfStdGxQbMPUOLd+6fvczbxu2zGQ5z/veWVS
+mBjXuc512gjhG3x+wAMe0J5xxhlr6o+wVw8mo2dY6KjNuQgecsghayp/EW7+67/+69Ie5qtPfOITgyqZPOGQLL/d+973XrPqz3xw/etfv5R3rWtdq/3JT34y
+eBbN4+xnP3v5zeK61Yj2mmPmKU95yqB5L3nJS1qLvN+e9tSnDr5fyxtY5rMwj6QjjjiimLL8RuPc6vTpT3+6mLkwzJve9KbtD37wg6lNZsp9+9vf3lpICTPn
+O9/5inn1YQ97WPuzn/1s6v3LfsG2YiCkKCqqCXGjG92o/d73vtda8O91r3uV79iZ0zQwq93TYpaTkT+ANvK///u/xebqe2oyO+uyUjgdSxu0RfuG6aEPfWhp
+P+byla98ZfjnVX3+z//8z8ECFrH7K+6lyaWEvscee7SkaDRrv60ofAE+3Pe+9y04Gi+nnnrqoEbMr7e4xS3Kb7EhrtciN7h5xBs4RuBDKW+XXXZp+ZuSIvXH
+QMAyX3zeysS3dP7zn7+FK2ZwzDHHlOYaU6PGVQR0tCwXOd+HXx/72MduZbhK27YVA3nhC19YOpsN/X3ve9+gczEWk8cAuOMd79iSKmYhJgXSi3JuectbFgdc
+lvNf//VfxYzltwMPPDC/XrrXlIzZxt/97nfvUH++HoxYO1/5ylfu8PtqvnjUox5VyhleRLOM7E8+GNIjGjXR8/pleY10G21EQJW23/Oe9xxUO9v2ute9rvwG
+47e+9a2D32d5Q7ghNSsrF7zugpm+Lo51zuWtSvxNxhmf6PHHH98+NbS75z//+W3sGWkjMm2HcXX44Ye3F7/4xQf9wAe4zz77tM9+9rOLWReemFAfLWaZMd02
+DIRTm01Tx1JPSXIoJyVHt9/Ym0kWsxBnvDK6mkyWwzSWdmZ+A+aZZaQnPelJpY2c2bSRYaLRiXaBw7777jv8c+/PNAqahXI40bOfugX893//94BZsV2jUdd1
+71mG97GxrT3Xuc5V2p6aV3dRJ6jAHzacv2shJjHlMAfG5rpSVPdZBJ80mfERbFWKMN4SnAFXQqBILdaK+93vfu3vQ0tLgo3owxSSYOca5tukZzzjGQVT2ozg
+hK1MkxjIltqJHmaA5nOf+1z0d9OEltGEo7e8z/QE8uGEJNbEwlVSHJQfV/EvGEQTzrdyhw1K4VBecXdI7E1Ik+W7k08+uQmJecXvy/IBjig0tiYktvK++y8m
+TRMLf/lKG4NRdn8u7yNYofna177WfPGLX2xOOeWUJnwbO1wTpr9yjR9ufOMbj9xbEo7k5mIXu1i5NyTp8pr9WT4s6T+YhIBT2iyHU1K2LRa45gY3uEH5Opy/
+ZczmNfkq62yE5Tb2O0giqEzZaIcpQlXLV7Fglo11PnhOPutqV7taE9J1ucaen61KEiqGP6/MYfM4tOvyOaI1m7Pt/IctccE8yh6SsCA0YaVowrfZRJBH8/rX
+v77sak9sgsGXtxEt14QpN7/edq9baiOhBcbmKBMjzEs7dKaJalJG9FDzgQ98oHna0562wzWTvgh7/yAlQqjBJdvn8PW3utWtGoPKxrCPHndcEzb84UsW+jMm
+KRU2CvW8CSl5ZH13j4R14ZhtvhMps0OFL4w5JLHm3//930sKCZviTo++OCMmZEhyjZ3AEZdfNiH+xV/8RWMxw2DCCVnKx0BGkYR4GEj4sgb1GnXdsn0XEW2l
+yphx2ON3qL4xbKyGv64whghPLZsqYf3Od76z/EnPgTGH5l02INp8GGG6RbAhLMFUPqgUqmKvSVkQPSwk5gEDwTxCKm9+9KMflQ13O1RmSb/ADLSJwIgZhOra
+RKBGE+btHVoEjwCkecELXtA85znPKb8ToDCO29zmNiuuJxyF76R8Z02x63270pZiIKQxZDKEiWWHPrWQ7bXXXoWBWOw+85nPlAFG6rLgmZwGmjTQEeJXBhtN
+w8LnuwhtLQueAXj7MTtaTWASHYnw+JAMl41IxSYdCvV+bPX3OEs6Pv0Xvyi71DHviOgZqWkoxEKHYYR/pQkbchMRLI0JbuLSCnfdddeRzwpz42DRk6Z7q1Ay
+adIrgWMUhVO7fE0ooumF76ksbsl8hu/Rd8adv/BNNfe5z31KFlrMB0VE4uCW1D58gfFkqg9zYKsQvCIIpDBKTASOf//3f988+clP3qGJ8IhIqyZMU+U3TOHf
+/u3fmj333HOHazEV2h6KSMTCvLsMeYcbtvAXW4qBMJcgksO4xS8HBPVfWgPSRJE+xnQyZiE9dNhAm64p4Drx3SgisZuoJrH6MO9YIJeFLEJpBiH9j6PYP1CY
+qmudw+A+BK8Iy23CB1WkYRI2LYP2Bj/M2uJpImPKyGRlshlFYbcfLLCx+53oXCTFUdcu03cWNGTxHqflYaowgvHDH/7wFRrYVa5ylSIMEXCYGWkhX/3qV5vw
+GTXhIG5ij0nzL//yL2URTBOjcTyKLJ5phqE5b5XFEDbGJkwIi8xRsXdrFAQNpvyIRzyi4GjMHn744TswD2NcObQUxLwa/oHyvsuQyxfb5N+WYiBs6ogGQtsY
+JhPMwECk35S2SF8WMKq8BdCE9ZsTzUxEpgJ/OUgskJMWV6aHiKIp96uTfEPLQkxYtDAEi3Fk0bLowMrE4v+JUOkyYZlO0v/UvR+WpMIXv/jFTYTvDhgV3Cfh
+mWV5DpMYprLsZPwhuPkbRRgr851xmBqLsRSZD4qvLf0W3Xst/p8In8khL3pRc+SRRxZTqt9hhiGNo5wvxdQTZeRYH3f9MnwPH766v/u7vysYM2uPmovMfK4x
+V435iMwqedh8b3zzMfGZnHDCCYXR0GQQ4ZOWhzEzcyk/oraWAZq51XHHVXZuRW9sQSSwdNSy3Q8T00mEjJbEan4jjXC03+Me92jYhknUzCUms0lEaovQ3+a4
+j3ykeUtMRH6T1FRIaQZRSm3Dz2LfRyTvZWMg2Ub1n7SIYABpUjJpItKn+au/+iu3jSXStqNFb3vb2zaRm6g4K+H85S99qZjBxiW6m1SPsQ9b8B+yTfDuYt6t
+9s9j/FjEkuDrVD1jdRwp98ZxqJJMtJJechwbqxgWM22axYbv7zK04d+W9bN2v+LlLy/WiAMOOKDh+Oa3s+inUKJtki/yKyFrB1MrHydTrjK+FOOTYDVM1gEC
+kb+IzmykkY8NuM2DH/zgYgUZvn6rfg6bwOh8WCEVLk0yxR9FipJQKUtbhNih6Pj2uc99btlElSGTIWmVzXHdkLxy8YR/MYlL6oPIkjrASoK2cSREOJ8XUuC4
+yxby+5Cq2pBUSzvDVjyyjifEzvSQjMs1UmHMmtJEOgk5n4w/OZpCQxz5PHH7rgnGs+YsAiMfsAlf5mZM7Yb5MMXitCK55GMe85hWmPpqKRbGktMOfkJWwwQz
+sgjYumbvSFUTzGTkNcv2pXGpTSG4lJBd49qem24i0DgKd7Afx7Xj/uyjCf9pG6bENoJvSsqi/fffvyS+DEFyxX32hrz61a/eMmN1UhjvltFAfhGSLGkBkWpF
+WB166KGNs5KTmLacVBabgfKrXq/Ue5oK/0kwjhJpxKQQE2qklM7/wo7K5JLhfr0etAAXkczSJk99R1R1oaSxEauo8EIhSWfs86+OdNmjIt76NAWm+icW0yL1
+xV6QYnIQ2SKajSkQjkxfiAYzztzT53mLdE1iTHMm3TKvwpfvjNNbCnKmExTpd4rdPc1ME9sRYzIGZbnE+BTx9rIY8wc86EElkk2YOd8A05jgEH0nfbyximjh
+qR2VL5b4H1PqscceW9ojKIbVgak0TaC0rkNe+MIyprvNFNRwpfBvXDrM2vqJVsIZD7fh8UdDpLGIlhNEwtSt72LvTgm6ibRGAx9ePoP/6+sRKv+1cMR/Jfrd
+e9YKlg/PC0ZX1pqbRwTnOWI+Ljqt4J5R2cHnZdJAaBspzcbCNmhD2DRLOgySQ0zONQtRwZxKKhQbvUYR6S2c5yVRGyxtylsmioVkkNpCenXSVqjmAzy740MW
+4rUSvGxG7JbrfUzUkhrGrt9rXOMa5Xf5ubYK5aZW0qvMCN0dz10sbLQkJc9CXU1C/qtuufmeZE37oEn6Tn6z7UA0vAc96EGDDZTaLidb+OdKWiLzfLWkzDjM
+quTGS3ylUKLxhMBZkjhKDRSMaLBW5XXDr8a/TcnWks2mSRpI1PuPDGP4/WYxEDs7JX/rToBpIEpVor7dNtz97ndv7WQOzj7t9rn9HjHkJWVKSPKlLvNMez63
+Sk4pKE1GXSy9t8g5q0I6h2c961ltaHxTSur3s9xMYatvQyMp52IM92PWQ9qTrUIHRdbdbFf31UKOadz61rcueIREO3OTu/NHTjiLl3LlvGJmsUh1n+19+Adm
+ft6y3GjcWhuy7RGsUFLydE1ba2mL1DEwzvJlxbDrPT93X60T0qvIzcWcKd1SaCGDa+WCw5g2k5aOgRx8VrJCGTD7kgUtVNMCvA6JEMZVMaC+z5l2XTg6B51v
+oKQ/Ztp9i/I7e3x38PMZYSjhaCzpvmko60mh3hffFUku09LkhDOQ/b7sFCaPwiizXbSQ+9///i1/BUlVG7uL/7zbqw/1czh/W76V1D7UxwI2q09r3vVcj/L4
+M8OhPpijYeZqPzen/F/6LPtNCiAHWmUf5yshDBN37IM5FWbKktwyTMIlE7V+kXaerybvCTPcekDRu8ylYyARUlfAM5idoDaNLDYpTcUmvtIB0+5Zr99lpzUo
+s/PnYeZZr7oOl2sQZ9ZW9ScJZ+bi7rXdidL9ft7vwwfTxqatIp0lnhHhMpMzed51m7U8ps/w7QzGh8XEgjF3WoUjXPACBpYYyxNlYduKxLmd7TRPv3VWrre5
+jOkOA4HdxyJhI82DmfCOd7hDe1icrgnr8HlNhZYAnPV0YuVm0lIxEODqWGYStlmREySlceRIyvR9iJIaF8kz7v71+P4b3/hGmxFbzs5YlwVizhVn5usuIvAX
+2bYIxA7c1UaW9WwQUVTMdLkwsMHPy2wyj35ifk1BjPTM7LWViFbgECn407r6CKdrbb+1YDURn54Xof/lHCP1ZF5T782khWcgqfYBCbdlNolcVcV5eNe73rUc
+1sQ0lI7rvJ6EGtE6ZUAY+K6RLfeNb3xjybZLXV0vYpfEvJymJyW05x599NGDU90wjbTl3yGkj1lCMNer7qPKJennwmbApBMxsR51z0Z+x09CI1JHEh3sl426
+Jy9yrhq/i0aRIWAwDjL1+6LVcdb6dNsWEZqlmEUZ39km4zw2JQ76QFbgzaaFZyAA0pE0DXs5HLSTju8I4SzRERxLwxIDRpGL3vArJrRXxG3bazBPinC71hkV
+TjocfqbP/C9OPXT8a56zgLmNOldjnvVaS1mxAbN1GqD6i7hKiUefLNIE6zJlZ93nGFlL2zfqXud/RHhywdgYT2Foo57f9zmYGrOascA3s1rpue9zNvo6YzzP
+XxFtFWH/pQqLNL6dV6Juua7wPaYgt9F4dZ+30AwkO5DzMJ15zi/IxeFlL3tZOb/jXe96V7dNxZ4YqQoK2Gy2QkFx63/4h39oI//NCiclu3lGMuTzVhTW84PD
+dmIvSHkmpmCiCdN1hC1moQ42axkAnGUHxqFSV4joCp+ZLtby7J5VnOkyZ0DkoGUjXmR63OMeV+oqVHsZTIOJZfdI2Tz/I38bfrXYRbbXwsiZuNaqvf4uTGcE
+MSYpDvIUEIafm58dsJRRhN2jdvP3ZXx15kma51gMkPmYczJfN6NthAnrSJrizcXYvzNyg+lm1G8pGAiG8Z73vKfdb7/9iokCgMcdd1yxAdqL0CXmFhFXBoTw
+z1ETwoQxaYXG6RA2/bWYDDCPLItJSt1GDTp1EeNvgVNHPhDPt3Nb9M0iUqT+LnXUvln3HGxUu4RIxubCUl+hsMtCuddFYEhsFJxYbZkM2OoJIYJCHE6Wh2lN
+vHHEj0x9nPYOACPcGI9HTdHKmX7TNGzPgrm07JTHUDtMLkOjuwxko9snYMVOdgep0UhTgLNvjZC0SL6xhWYgozrO3g0mFQuFCceplBRnG5TFmIQUB8Hk12Nf
+v3jyyQNzkxPJ0KiFf2wB8QOJMENbpTDpTqgchPma5TgWNn0gOThmXQSyzPV4ZVrJjYLCG9Fq8VmPeo0rk6ARu6fLhLPRqk9Ey7iyNup72m+e4EiQ6UO0BefF
+85vQyGc99Y5mL0oxco8VM2pfISbNr+bgVjBjCVgwD0VFwSQtHKPG+vBc7tNfq70GY891wSth2BrDj7potHQMBIAGOuYhHjpJx9qNDHDnGfclIYkmAv/Eqaee
+2ve2wXUpvdgxvBotxuTPvSnqzLS2aBQnJw5UZ4vMMpCsAvDE+Oa1mXE92y0yMDVRudlGkWgdAobf7b5nVjzllFNGXTrzd8Y+v6HymXrf8pa3jJ0PFjIY+1tk
+/11fMEj62sIPxccqy0IyU/tiYMLSsVZzYd/60DC6mxmF+S6Cv2NU/ZeSgWgIG3f37HIdzk5oF+1qQwz5QQwgqQpWQ7QPph0mqThnYTW3tkwBmaTOszl+F420
+KW3DRx111KJVb2R9kqHzfU0zB40sYIO/JMAwTRgDzK9dsnjxlTFxXujCF26vHiYrqVsiH1VxYjNzkJjXQgI/pDIhRHkObSjOEyn1ifxwZVPbsMkkDk8bnAke
+RxOs5fGbfq85nz4dEU60QH1BGxG6LtjGZ7gkA7HWYKKiK/lMRG3ZEzVPpm7sMk96NuH2pB573jYDzKVlIMNgkdCALSXDJBqllr7vfe8r9+L6SSavaCmmArve
+4zjWEh4ah03lJa1doJ45q7nE5HO/vxve8IaDATp4wCa/EfmR9dP+ZSCmSHUWdNE1by5q3fnLRAWqMw2gS8xLhBMSsEXLLnQ70tnqs19MYGO1S3FeTXG0RwK/
+9tBXvrJkXqApjNoHJZ1OJBJsX/WqV5Vdz7RowSld7ZjdvTtvImHmYNG1qW1ZifNcNl5YYpZMR/x81hDh4ImxvWfazD9iOwBGm791XwXuiI6K0wp36JNZMNLX
+2Q+LmqpnyzCQtMu+6EUvmtpXbPvi2DMNgIllUrIz2i1OirWgk8q6A8R7pjNJ5UgcL4oIK9/N6rBlP86BSrIclvSmNmSdL5De2wKmjcsiaUoDob4i3Bbd6a/7
+BGDkGBg2E3KuXz/2t3RJehHto4V4tfjbH4AkBI1Mr0V69tvwn2uZa4Svp53fptY4UbP7iJLAsfsMzvpumhjRWqmZMnUtI1kDBAFopzE+LCAx39p3hsHzazI5
+d/2WNBSRliIoWRJ87uJNm1lrJCCN5xa3uEUpl1a4iON5EgNZqnTuAXb0XzM4CrV8GPEvbL1NqKbl3OKwd5YrgsuXVNUxaEoK6zALlO/DNl3SNMfAKYf3OOrW
+/c5D9heqZblOKvg+FBOtpJGXFjvMD+WUw2BSJcV8n/s3+hqp50PaKqfewWYchSmxiYlWjvGMKLSRJz7mvcGsG2fOO5Ar7LolnXVEmjTStMein5ft8OrakKLL
+QVUOlwrT4Q7X+CLPBNc3sF10crKgVOLaN4yx9OoOIQonbzmIKPw65dS8WLzLIVDaFrvwy/0Rpl4OlAohpDQZlnCN/Rrl5LwIBy3YRDRj40/ZoWmUVOSON5CW
+3GFKXiNIpYnFsxxJoDCHT0klngTj0EhKP7t+GclxDo6gRcGUy3G25memqw+TVeMPBZMsxwqE2bkcJRDCZxMLe5kbsf9sgIOz6Z02GtaQJsKdy6F0DkcLs1gp
+Z7X/nIAYVpGSyj/MY+XQL/29TLSCq0bFB59x40WyMYtGUb9JDnQSAU4epwy2cXbxIB1ydPyKLJe4PombCaQbyUMSYbOWjyv3mXhmH62H9CAMmbSTkqaw3nSg
+UpPTxrooEh0/TZw7UHAlbXWxyDpS17uSmYOmuuYO1/ks8oz02722O56YBQRFMBkOE+nX7uy8np+LHX6YhD9m2CO7/jIQjFPKtI8ox0BiyKxlk9t5Q0OWuoJv
+Bw78fcwpMk3nAVS+hzHTh8R7XeKEhZkgg9R4bEyjffMDKJc5h+mPzV1ZQoWZTkjrXbIZ1u+08dX6G7vlbNb774TGlmlLOM6HserWS/syRJ9GmHvGXMOsCCMp
+lQQ4PDfSvfCLWiNyTxisc5+aPmUipNXojz7EjJxmrGl7hPqUN+9rJmkgMUb+yDCG3y8aA9Ep1HphnCblMFHbqfASAlpcTBZ7QZCO0T5mrDgWtKjrVHyhwDqQ
+TZnDrJs8kOkhczBxwk8i9lNMy0AzWUXVIBvC0hRwl7vcZVIRm/ZbpnhgzrPPoksWu27yPxjaT9AVLDDhdEwmxsyDUoOzNTMHMiWkIxke+qerrmP8iVOOw25q
+8VxsmSHyOtlMl4VkZU5sjJVhEk32ofC3/fM//3MJ22U2yvBZgpB7ZWMwtrpMPs1O+ilNVspmuk1GG8cIl8cZ40w2hBs+EH4/C+Qw8QGmucbikdgPX7fIn1PY
+hNuhgekkyowW2twd1+4xRtPRneMyTXo/DmEGtr4373M/Gue7dYpfqQ8xqVu3lLOIGze3DAPhP7AQcUiaCCsoOL/wPKGy/BXSMLBpIlEoksNZeFIz8L3FXaeR
+IFICMVi6Dkvx9xZWA6S74LkfmbScjJgtxoU5kdoyXDN3TnvOojrJLN7pB+Ff6BJpKlNwaIM/2oHFCNlF64wQ38NA9mF7dYYZvHLY7x/2sIcNHMRCKRPTYQ1R
+eTaTdsnCiQn7DcbL4EDP+mt7OsanCSN5j1cLfDp0cwd1/g5rKXWMd9IwDa67R4nAlQvTavYgYS4w9rdMTDpx6Wp8Qr1prZPIZmRtpX0ME8b+lNC4MYQbhQWB
+9aNbHt9URrSlZUTyTxqe9YavdVi7G35GmMyLlqMOw/Nv+NrN+LxlGAjwMAVAW+iH8wmlNGYxomIm5QCxg7wrpZlsTAOpPkrWlxt5/JYSRWZQHZ7AyiexcI6T
+HEgmtB5OTmpwDiT19beaSZx134hXE84eF3UUqeJ0xy6RejOKSDikz4jUm7HsnLDdBIf6IvdoYBJd3DH/NNNgKCRcdRAtlFgx36TTMyVgkXQZjrlsif4wv8SK
+QELrHaZsZ/f7XMyZorrMwbXMW4mXVybSrvlFOYlpN/qwW/7we2OesKQ8m3mnLX7D9y/CZ0JfpgV5xCMeMbVKrtHe3EjrBlib01eIQI1cH5ivRlH2EbwSf9pd
+mnIxefvX9gvzNqvGMHVPU2UNWDTaUgyEdpCpN0i+3X0iw8BbwPgySA8GSO5zeP/731/KYCI4LiYybcXvGI0JgymYnCRvkjO7smv4MuQJ6pKFIW3E4riPPCti
+xUAiYSvXH4nklDlvDOvWY63vaWOphYzyLQiH5AfqHjbE7qttpNxuNIo+kkUZfuzxJqHIoi5hIp7HLJP3YjLKxKBOOOGE7uWFWeWeGjb8UaaXFTcs4AdCTS5s
+fcPC7QOBcUq33WaxxeemSmMtzabda+DsfotYlwEF1+5eNniPMbvenDkszq9YRnrLWeH+2pFzflI70rzd9VFKmOp+fkEaI+YQJ80XU/hwWZK8smIYz7TvJAIV
+wSyFHuWN0uisR7lGLWLI9JZiIDqHzTDVeq+YhPQnJhCVEpd/ZcTGd2337O/J/cXL6zCLF8na4m7w6GAOsyxb2pKUnNMJyv6fNtAcKMOvNI+U6HNgLGqIXtYd
+k0zH6yhVPq/rvgoNhdnwsb2Yatch7ppRqnlK0H0O3fptMCVHgyqLrTrDWrv1WfT3GCXtQxv6JtdM3xI/yCgShq3McSHYzFiex0n/8zDlTqMDY1Oj643bww8/
+fNrlC/m7pKrawBqQfqRJFf1sYGjsWyMEJyCMGwZwta4wWzGHMaEPm2f9bs2gqaR/K7XJM0Iosu4ceeSRRfjMAIpufdI/Zj2aJBB379nI91uOgegk3F4Hp3pp
+wNAQMIB01vrOQk4Cdi0pO8l5I1RK0oXyaAz8I2zs/mSlpX0g0RR5QJQy/QGV016ECxON3cKY2BMjqyYzkGvEiTukx7PZQ9M8lnVYlFdMkr9BnandOQkm1c89
+HOXuec9Z+XvgBWNanEmWpr9kHqKJSGU5ufLUNX20gkI6zmu63785AiKyv/swne69i/A+Ga7FalSE2ag6pg/NjulRBGdMZFzEj82D+oiPJIWhUeXkd92IJALY
+uHLz+kV8zSNr+ckmRV9l3Y01/lM4McXySQhCyOhJDJqwkwE6eV++iio0Lq09xjhS5qgxnPfkKytJri2CcBYR7y3HQN761reWzsYoDg81+/mxM/kOocJfI1IR
+kBJIqmzrOtakSUmXpjILMd94Frs9e2f3LBCLgdDLHGwG4QViwJn4PwnGYgAbyL5fVLs90wrpRx2HTU3j8LItHrA4AAA14UlEQVRw5SatPDESMyX10eaUc4mY
+UMqkfcFDVJD3uZkyMwswE8Rs++Ojuu//+G3py8xppC+WyYkOmwwL70aXdZo38m1G9Fic+kjT3UIsRqm1Cb3uSwJA9BuhpxuV2Pf+zb4u8+XRugTQ9CH9Q9DU
+bv4mTFmWClGaIq3Mf2ZslgjXJjEL8q26j+B5xpixm9cPv4q6c68/G6UXkbYcA0mpymIl224Sbp7Sb37nlUMrdg+VRb6v5Jf3U11zEnK4I7ZnOY0wJlIDpsWR
+zq4t6qJ78BVzTtruh6OK8hmb/YqxGsAmSLfu0+qVIYzScSBMhZ/E4k5D3CtyDPksAs4EJN1h/imZZfgwTbAvMTFY2NT3sCVKsSFEXJ1JqjtEEE5ovGCEzD5M
+o+2m2Zlw24qgBMyHWbUv8S8lsxMQsmyUe1iYOrsRU9PaQRDK4A4mKYw08YYJTRlzyewWML3nWUcUW4vSP5jje9rzCAT5PHtWhoOCpt2/Ub9vOQZiwTIZmVv6
+TAxO3QSBBDwqAmZUZ1Blc5HcNaSTU0Y4wU1wTCujL0aVk6YLZgSL7CLRr0OCSk3CBMHw+hJnt34wqbqSnklHSksTIGy+Gn6r7jW0h4z2yb06fZ6rDEnvPJd/
+YBnIgpLjDxPt4tCn/nwnufGPdsf8OokIARlogtmu9pAw9ZX4E8awzmi6Sc9cpN/Sp2BRX20afOHWKTBqP+uCfUwc7QIWmGWNe8Jj9gkneQpRsOvDQFyT/i1+
+WJrIolKOXXiM+Bv5ZbnQAj28sWYRGmlCaIgBMrzxbVz9OF1zYIiEEXHFudXd8+FeC+jXYvGznyNVWj6NlDrGlT/pe1Kc+rIp56I66fqN/C1Sawwig9JR22cC
+qCN7Oe1L29idV0T5TGiEBTQHpdxkq12gMjKpOIZD61x0ImDkPqPhnFR9684ca/8NrGkxoq6chOkQNr43kTwWIeaxDB+17yRDrvs+J6+T7j3nGHPOMpEgF3X3
+Ny3gZVS7jE9zwfjKcsa9skBkuPmossZ9x7SeZwwRwBZNsOzWO+fqGAyWj4EUk1QMDtKVKJNplAsiqVeEUQKBAXEEM6GIBXfojJPYcvOV624WaT5Wa/Yaro9J
+rSymiD6OzOH71/OziDR14+jn61ktuT+jt2hrk7DSDxhxmmSYVvr033CdMuyyrwY6fP9Gf6YRZPQVwWRWom2TfI3bHMPjXvfee++pmsqkemT2auX3CYWdVNZG
+/0b7zUjKtWipgmO0XZQhM/QesdDT1q0RQt1FVq1W+OliIdQdvsocFmS71232+0kMZKmSKQbYhcIp3YTa2ERIXBNmpiYW+fxp5GssjuX72D/QRNqCklAu/Cgl
+gVnsN2j8dSns9yWBnYSMMWHXlLAvNJpGkjsUKm8T0mP3UZv+PpNKhhO9gc9qKVJbN+EPaiLkuQmHaxMMogmneBMTriSqg2VoJiW5YkjJ5fdgoiWpYmiSU/tu
+VH0kEEQxeZsQCpowC426bGG+C+23yQSIYbabuV7aafxKbCnpZES0NWEhKGXrPwk/Q5pt9MktIxHgOWKOzEqSbCozFrYm8krNWsym3CcJZ/jfmlj8m/CZNREZ
+WMbkaisTgTEl0aFkhwgWIQQVXEJ4XW1xO1wv4SqyjpkT1rRlo6VkICahyWJhDvtwWeT7Ao+ZRLRE+QtJpQnpsGTttRjp0DBblUylsnSGbbJvsWOviyisxnNQ
+mHvGXrdZP4S6Xh4dUm3JmjtLPcIuXBbxONOiiY2AJROsbLCjKLSGJuzzTaSHaK44JtvuqPu6310oss+awCZz+Fe6Py3k+4iGGtQrAhUG72d9g0n4Q+GDKwtb
+BCk0oQnOWuQO9xkPyUB+vgQYdxsQ4fWNrLkIPhGWPhMD6ZbpPTzmSQQrZJ2Zx1ozz7r1LWvtK2TfJ83xujB9NBFKWxhIOMRLKmuS7mrJgr7ei3o45YqUqG4R
+ArjaKq779aEel2ekljbrA/VHOMOb2InbhPmjaHWRFqMsbhhzRMQ0kTmgkQo+U2jP+qyd5iD9zfrsRbsPtinJzrNuax0P86xL37IsyAceeGATkX8NzR9htGGC
+6VvEhl0XPo+BZSItKhv28Dk+aCkZiMEd9vZynoEFyyK9qDn0qdEWaeah8LfMsevmU1QEFJSCSGq/6kjKs5YeOcoaf8gkppqTruZpuqN10D7QPKXuUuA6/OvW
+kaa7DERrioCPUtV5aE3r3eYI6GgiWKVowJ5F0429L034KhqmqEUjptdTw/yOaNPhbylnB5Uvlujf2g15m9TYCDMsgLMf8mcsIhkgbLCITyBivheumlknDOTb
+MajnSZgGtX+ezEP9TomDdxDml/UvXyzoPwcyJaOONDwLWsuV1eK3MbdQ7FFY+eOCfVLXSJ0zYB6x76j44zCQUcyDQMcvEpkQiv8ugnKKb84haLFvpAg9691E
+/itMAzH3RkRWwyeYTHu9nz/P8sdGdIhyWcQw3oxKyJQFolL67u3Ie+f9KsJomDIL8M6xiW6WCKfh8tbjczdzqbPGl4Gk9YgJ8If8TksQxmvnfe5dEdGyDPT0
+pz+9YCx6TKj3opL1Kc/sMSaEN0/bkNdNre+e/LOfw96OMHuVc2xknRBVKAx7niQS054rz5Vbz7zLdCYiFI+NSMVFoklRWNGGPwI4/H7RGYjFLzfzyMy7ml2n
+691BwlPtN4HpXSLP1KLGeduXkjmtpGlY1Hpmf3UPO5K/aFkod0cLXQ4Namq19YN9NcJEpcORa03SPrmWpFy3cP4sFjbXCAG1f2leFNroIBGpvQqLtncp26me
+mW/NPPO+z2KfmRckXJUVQDiuvWUyI0gCaj7k3FWu/WDCp9/0pjeNzHic9en7au+OzAzKzuwWQoZlsfBc2xOEDvfJ49X3mWu5bssyEBv+IiJrIEHYH7AIZNMi
+KcYAkRLiU5/61CJUa2wdInqq1DV8Fa19HYtM9lHA1Z9klstC0tSH767Uu3vmA+leUkm7p+0tcOTv3rGHw9k0NrJJdWEMSTNjvw1t26vPpFcbFJ2NY2+CfQVO
+gHzWs57VOoZYLqdZGIt7w+xY6ioZ6KKSg7RyLDjMrQ/zsFBjCDYVj7IaaCvmTYthNYClFDIw9yy46yNjb7UZBZTtILbM/KA8KW66ZM/QPe5+9/Is/Zp55rrX
+bPT7LclASGYmTQ4gKUoWwUxkUGbqEnWjIXXPCNjozu/zPBKxMzbUlyqdB2n1uXcjr3FGSGY6pnFmUsaNrMOsz4oAhbIpFcYYgiwINrxJleE7jEHbMATMQ4JJ
+CxUzLWlUMkSM5xnxx7wkwzGGI+WI3dCZu0pZ+ceCwLzjeNdpZp1sF00nzSnGBK1nEYmZJ6LPSlslUe1bz9yEDBOEWdDgxjGTbHv4M9sjjjiiaCiYCIylpVEO
+TagPSW4pLUr2jz733ubmrplQXSRxtBlSTrlMk9LnGetxzZZkIJnvRgfwNZAs0LSBsB4Ad8sk8eUO05Q4TfJFNw1Ji5EDO1OadNu1CO+7k4/ZYdnI4sU8AWfH
+CEgFI+sBe7gFinSKKa52rFgAab1MI8rNXdgWWIuc52FOz4pzMqZJzY40yHGwqNlhYSRDhHpqozQvfUmOMH4dzBvR0t7xjneU00LtvieYTiMCF8aRjBZjmUZM
+jgSCxJZWQyDSX77TZxgF/0jSZ+Jk1Ux3spl9seUYiCNrSVeAJ6n1lQCyY9b7lRQXUR7lfHZ1ZA4wuReZpF9XV0zPhFpEktk3mbLDvjZbWFgtRrSIrH+ahggX
+tGe+jHmRlCf8Q5hV7Eov6TgyDb58bJNMqt1sxwQhzGnRyEFXxqq/Rz3qUb2rZ+HnYyCIdEkbMe/j46wafg5zlTXj+OOPL8laYyNl9/LB+0ypM82UKvklc1TW
+mYZJYEiSxyxN3tIqdTVrmb/zrJ7NCnLZUgzEopG5pdglu2efZ4csyiuzQZ4Fcu9wzi3qgsfhn+q0Mw0WcdHQpyTzzDJLivz0iScuSldPrYdFO+3oghXSXu/o
+VEylzyFeUx8ydIEFx6LFUUszdv6KhIz8KuNykJGA5YTLxc49i0Qc+ql9MLHlAU6T6ogZOFnwaU97WmkXf9Q4YmaK1C2DoAX94kwWvokumSPMqFLGZ192f8/3
+HOGugad+5qvpMo+8jgVFP7lO8lVmb4KA9hk7GJAosc04n2VLMRAdmrZPh0ZtFFm8DECH1TA99CVJGg0KvpBRA6dvOet1nQUjJRxa3aJnXjWZktmNOid8vXBa
+a7mp4Rm7sucmOaHRwuKMm/WgPNGQlMzMJdKHZuJwr+7i2xVuCD6Z+deCt1qT2nq0I8vkVM4Ipj4HxH3wgx8cMG5BIhzY445ewBS6JqR8JuY7HInm7A/9xpSO
+6fgbRTRLAoM1gAAxnD28i7tnZ1SZPtJOPhBak7orQ5jyqDqOeva8vttSDIRDEZAcWX1z/c8SiTIMvgHjuf5WE6nEPpt2b4cpLRoxteWE7HsaoTZ0B/5Gtklf
+OrhLPzAFDE/sjaxL32dZXNJeru7akPiRUAkXTBfrQULbOeYtRP7gFps7yyuBaBzlPOOj4SdYFCI0ZBumaW0YX44V9/hL0+Fwe2LTZDkQim+Ng16QA4br+1Ek
+uIEwILBB32ISzGCjKE/e9PzD4gTVSZQHjzmygMmTiY4Qam6yZojM65q4JpU1r9+2DAMBXE7EyJDZCx9SFocVm/laTDOp1nN2pQOuTwU4z9KMRRpcJLKIiXs3
+sLWr70LhvlwAN6M96fAnRPQ5UGwz6th9Zjed+6gABf4JpzYOxuec8SXwME0++UlPKmdX0OIsRBbXccShTMI2Njj/F4G6wgO8pvmN4Jl7nHKMf+lLXxrZlBzT
+zFE2Jef+kMh+3L47TF6YSmoZIr4EJdAKUjPATCLLwMiyHSORUY7TzoN5fKwRyhyei9puM6qoukXSQJYqF5astv5QRK6U12n/5KIKNbZk5wxpYaaU5THomjAB
+lEdJoSJddF+KhbmRfjykmSZMWH1v25Dr5BGTgh3tExlyw5nb67mxsJTr5KSSmiMcfU1s8CupL6S+DpW7VznTLopJ0wSDaOI40UauI5SJ50KrK+ncjYdFT+eu
+30P4KfWPhaqR1j4Ei5IrzPiQxtsYk3yyJPeE71lJLstNa/wnvbu/LsWC1oTEXBL6ea73+jFWwHLsgPfS0ISGN0gd071/M97HAt7E4l0erc9j0Z5YDbg+5tGP
+bu6/334ld9o1r3nNMsZPiVQ4xpXknpnqJMd0mHFX5NULhlLGnqzVeY2kocZ7UmhpTfiyxq4LEapdEohGcE15bjCAkdl3gyE27z766JIoVvLRLsn3F0yliUiu
+uacG6j5nte/XnYGEGjnIjLnaynWvt2CEfb4MaANDvhu5ekzIcaTDnZ2ABucbxIQgVg0oJsy5pwzECG0skytMAM1DHvzgwa193oQ0McglZNBZFOedG6pPPUZd
+ExEnjUkZPoXmgQccMOqSsd/BPZx+TWw6a/RxkvY6Q2UtBCflmqj6PBffLFM/JA3/lt8v0uuPI78SMh5jD8cKvLKefoNbRAiV82rWM/8U5oD5y/10kxvfuPne
+WeMy6+JVP+bcSuaNueQi2r12o95bYPP4AYtyH7rPvvs2R8ZYkpNOinfHP8gcHdpJyZ0V5ttSTFg2iiDSHVt+wBz8Ic+PkxqbOCu9fPYvjhhuIqS3iY2Jg++G
+38BMPjQE+4gaHSlkhbO/MJgws+1wNkiceliY+7AgMPysjf68rgwkbJRlkTHJLZxrGXzulegMYRwRiVVANqgnUZiwys+4t04evt7ncKw1cRxoI038MDkv5HVx
+YBLSeTeZMFCG783PYWopb+FgsV0EBmIByUSPYb9tYjNaVrfXq8UlNj+VNO0R7VPaFQEGTdj0e90/6iLSroRycQxr0TryGn0fUVflI4bnuqSwLzdhomxI8otI
+xt8hkV4cGWvJbEn32pUClt8sbv4sSOEgbiJIpMkFbl5to0kfdNBBTThzS5HfCakYqYs6IYsrKTkp9jk0EWixQjLP3zbyFUb+EIES+azu48hcg6UjBsy/N7zh
+DU3kuGqucY1rFA0GvuaCQ+VOOumkJnJRFS23W57xFqG9TZgfm4j6LD+5T//AMhlM957h9zQbhHmMYiDGgTT0sSF0BwFMvQlUEYnVhN9vuOhN/byuDIRaHPbW
+hupI5cRE1kLKyMXXAM/BNKlMA8UAc213UrjHILTgMXNFrqKGeapL7omoleanITVQlw3EHLjd66a9NyGRQTfL/dPKn+V3KjGzCXIgVOLatyzXR1RJWXSo1UwL
+2tdd3PuW5ToM/uEPf3gx7+R9Jou67bnnnmVSWyakwf54LLLhjCzSmr4zweLs614TOcveiFfMTorx1IKZTGBF2qURw1B6d6YN/WGRC+dtMSuF07QchBT5mkZK
+q6utP4YfaTkah37lPGCKDT9IwZeEbCE0VwhqxoZTD9UJ5vvvv38xBackvdrnz+N6c5B5lEYEMzSJeeQzjR/Zbp0dFHs7CoPELCOlSV5STEzmaTJRQioMmBuN
+rUiqOLjWOkQLMTa7NImZJeZnC3xHzbUIFW6OPfbY5klPetKKeik/AnhKXWg/wxpS9/mb9R5LH/knrJMTeFayoe6i4ZyNhWHWItb9Ps4qTqtR0RmxOA2cZNJG
+oJiIveoUg3HgFI1DlAq+QiIXhTLbKkf0OOfftLo6Az3MAcWpJ/2Jsp4Zu51XS5y6NtPlOLSpKibtRCepcZkBAO6TjWDRKDSpQZtkEA5GN7GKwXDKRrYQegb3
+2RMQgtfE+/r8mIEHsBL15bMIsElkrD8n8qTFIl3qI/Ko7/ifVO6sv8Eho6o4lE/vsWs8nwVHbd81cueFGSm/XvHqe5torVci/DJcPMeldDFPeMITyh6RFTf2
+JC70EOYHjjj8zb9ztkvrU2mnOnibOOjfU+zztN8zqyvmxaFBQQdbkIs2m7xBFNqBxt9hnen6kiRKgaP3zPXTrdjs4zhVxNT5FecAVJSrOTGRwNQKunNppCG
+BnnE4uzs3lEdduiKJMsoNJiF07f0rTZbmFa7NwPOJhWcQzsr4YqZlmYaTmF6GCwohJ0wQUy7ZcN+14aQVEu7rhNhnqvJFG2sSesNk5BWVxU2PqqBItUyV5YF
+VIryvmSsZGoei9jwPoa+5czrupD8B2Nl2g7w7jPD/Fzu0/4cv/k7xv785z9/MN+TYeSrdUDusW6+qry3z2uYp9rwlZTn7xX5+4YFAnuAPEsdkDUm1xkRfKF1
+lP1nfZ61HtdsGgPRGJK9SbDIG9RoCHZ60hpQmGEGG3pIX5nrptux5cIx/7TZgHCvdotXJ537zk7qHBxjbl/3r22EzLDCvpI7bTIcu6UNYfctdZT5VTlCH+UQ
+8vtqUksoJHffYh6yAq+WbMQLs0Opl13di0J2DKfkLr/UaslihSEbMwSQLmGcmMIxsYcnnKttBBu0YdsvGWI/ELuuT4mUHV2yeVA5YWJspSpZLXkOLV0ZNJfN
+pC9HGK6U+OpC+xUiO43sRM8sAOa67Mf+vhtjGrPeO7IfKy//zFXZesOcVNKarIb5j6rLVyO8N8yDpXz7a7oUASNFK6JR/WREPj8Cm3G0mVl5JzGQdfWBRIeU
+c8D5Po4P55PIqUUkTvTYHVxCK4XPhRmm+EXUle03o4qiI6dWn9M0NgOV6yJbcBPSS/GfxEax4rxj6xQ+u5lYnHrqqSVsUyXDXDS1TS5wglpoC+XaDGMWAsnn
+wQHIyS1yh02/LwkfzfDoGKTlCNK+9+Z16s/PJVqL/2BRSB/HQlH8RMbBaikWlGavvfZqYvNpCVY4I3x+p0Xor9M39YOy2ev5BdjU2eyFnPLp8WHwFcTepRJs
+cnJEWyHh5GFGXW1VmsifVcKLhb7y1WwmXSXmE78SH4RAgBDImkhCOHY+8eNwdvNHid586lOe0twoIqf4pX4ZUVWRzLD4HrQJ5o7AdVy2kHbjeR4Ui3/pG2UJ
+WOnSK2KtMB8FKlx46Ohd/RwpWJpgcE2kb+netjDv54PQhOYImRNzHpLRqkNgJxQ7158sQkL0dFhsIGpClSzl67Q4f2JVjm+RGpyOBh+HWMaqY0SO0DSQ7b3Y
+TAainZx6HPpdR+IkUEWpIEz0wAMPLAu/vQyhTTW/DuxEsFnQRLT0JQu+QAtlWuzUx8LYh1HnMywKue9EHy4K2feDRNXEprPe1eq2P3HwXXjeCqOwz+bK4fy+
+4FlRPcMFG3sWV87fU2O/Q6GzBJ9Rztvh+0d9hq85jIGk83rUdRvxnfFm/CZxjFtgw8xWGC4mCS8OcJFTBLZ0YId5tblVMGVkbr/6Na9pItFh+cwhLlxfcMNa
+yTMxDcyIsHVERH4hGHYZCDxfHBF3BAWMcJgwD8JA7ESfGzMbfsY8Pg9Utyhsxfu1OtGjIws5zYt5Y5zzKq/brFdmJmaQ6MjirIIDpy4zAZpmcsrfj+2cUTBq
+x2kevckeu5kksZw2sq32tWlT+dOM0R0nTE8323PPcn4Bmz/Hel+S2kVZTJxSW89CTGvMaMphclgUkuJCnexYFmDQl3IsBWNtg7mXMgQL9CXBBcZt1+fI5Kgu
+IczMlLSRzV7iQGXY0b6ZZId4jkO+1TRNqZu/YLrlNT97lWVBJudhyuCWWORX5aMaLqf72dxK5zvcpTLK+g5nopBXz7ozKiGsMcOpLnHksM+k+7yNeL/uJiyb
+Y+yXwOlTaoqOK0TqEQNNarBxSUjdvIg0QmpdK9m0RTogNSC7nW2yE66HhttUvuz88zvN4imhHovxFmoXzKKE3wX3KVeePTSSsK2W9+LN3xeaTtnAGNKSEb9R
+RDNKU49698WPCY40xVSk/sKzmRC096MRGulPeUJ7E8cQBce2zXONCa9MnMJFmUpWS4cffvhAKoY7idS4iIm12qLmdv3OMebhgpiUaG/GWB/KsWYvSO5hsgej
+L5Ggh6XoSJVSNsCpkz07NmgaB33Jfh9zF7mPuVefbTQZK3Z8W2doRRERVeplDMAZDfe7MRtpjMqGvzIujYsYp0x+siggmxKN6dRUlJH9UC4Y/jc0rl3rj7lM
+vzHpqqvwcmuCcmnnzNkftsbEtV8K7UN4sI2I6hLC56Du1sw4UKqY1FkumI+zbsNVMdbDV1Y2R06s8/CNc/y8A8eOsst3fTUQnDTvmfQ6Kg/QajlogF1OZCMJ
+7R3Or2kJ1fqWn9JITL6SVK3vfXldmHV6YTAJn43+Lcw/M0eF6QeamyM/u2Gns7YhFoT29SG9rYbeETmaRAbN+syNuk+IZt9w+N8FruaJvlE/53f0OeRoEm4k
+WOeDZHuFqU47WCrLE1iSUYR5f339w/q4KDgYK0Lh14smaSC8wgbWSKI5sNMNSzTDF7P5cfbhkuO4oGvC1NG8813vGnvNcLnDnzkmOcwiuVsTWTOLT0EuJA6v
+mKTF5mgz2yz02Mc+tokDYko72JmjM3rXk08jzAzFj8IRZ8PYOAmti49nbDSRjKQKCdW6tNFu9NVIuKPqGyGrJXDAJjXSFE2AI5I2khrYqPtg8duQ1vidON9J
+XmzBnJ76UznDBDOSOX+SHet8LjQ7Ura+3wxMh+vos7bxg8jDhkiaNvKxd49qFwd4nNHRvCzG4Puib5Dd9fpnHg5UAQsyKZhDiLYXEXMlp9woH43xy9EsrYb5
+hmQr4NMsGG/A2KW1nyt8XL+LdSUYa6lDANtMXLT+cNVs/9fapqjbOBpV51Hfde+f9rtr9dOuu+5atDH+tvUg62Hsjxlb9EAyiStWvO+rgcSAmkpCPtkiv7+K
+szS6hQrFk4mXxOss9EiwV2zeJFASmzz/9iLMStIsx6QfaDShGvYqKibk4MAY4YUx6Xrdt5kXheloEAoZKnapSt/2Tqq3voEhm69T1vqS/SVp7zcGhZtGkEG7
+zz77lH0hNj3ybxhDtM4M43StkMtFO/Qo222DmL0yOa/4emySDLNE0aJtunzcYx/bkvByz1FeywfnJLt5UjiWSzbXfIZX537wWz3ykY9sDwqbPU3aplm2dz6T
+vNbGutwLNc86TSpL+GwEsQzm5KRr62/rh8AkDSTGx0qm0f08TwYiLbKyV3N+cReSyBFUFg/fWUwiZ1IZ6ExPHLB3DZMWVX1W4shSv0MPPbQU0WdBxcQsdO7j
+DFMnm4bC1rzir09Zs9Z7lvt+H3W00VG9xdLbVzAPssAQEpRrQ6H9NBbRaX8w48y3cEVocLlfGZP+QpMqxxkbF11SFvNaaMPdrzftvU1qmHTfdmGO9tKs12LN
+HGafUkitE/FN7ENbau2iJ3RsJAkEYPpTD5sgpVivtDkITGIgE7Wkvias6OSpJNeR0NXYuFbyS029YeiCgK7hUJKLh2rN6cUxxVQgVJYZ64hIenjOs74bun3q
+R/l1rnWta5W4eWYyz1P2OOJUtD9Ehs8k+xHcl/cKMfU+jhAtSQKHUzTnfZvxGoyyOBc9WyJJ8fBrJSYbIdFCc40dzkkqNgymkX0kwp7lLHpvhF9+KPL/MFUx
+UclRxPTGJCZcmDlIOn+BCsYVE1jmkGJOY0qFPdNMLJSNfRhxklsJqZ1Wj/X63XhhjhI2Gsyh7AuAjbHLRMysyIwlmGBSZtd51c94t6ckTtQrQSPMapmynEnQ
+mBVyKjyWSXYjidM4fDYlHD52gBeHsr5nDpyUi8s4sU/F2AuNdpCAcyPrvhWfNcmEtWEMxEaoG8dAZKdm62XvXg3ZsGavgIgQi51BbSObQYOZ2K8hIWKYOVZT
+7OBak1mZ6ieSZ1L9+A/EeJ8aG4DQ7W5727IJiP0fWewsmga8BdAZD+Lyrxgx6otC4sstwHxcos4sXJL8rYVkDJUlOTSusvjoH++nMRB4yUJr4cKQLRYInr+J
+yCF+EtdYbP0hix1fiWfqO4zC4iI6B+MXCSM6UB9ZLPmm+E3m4U8oFVjDP/4e4wI2Fm5JDPkjbKrEaPg+NoIwerhIIsr/p04IM8/EihtRj+4z4rzyIigaM2+J
+TMsXjwgj9dPfor8IKKNIf4u2eu1rX1vmrrFAqJTBluAqotL8I4CY45X6IzCJgShlrCo7TxNWDIiSR0mMdEiWPq6KpCyg0qYPRXQIc0WSXDWOg1wLSbgmfQTb
+6yjiY3EMaMZ5s2k7E3nYXKJuYr5j0Su5moZz74wqezO+k7BQG4wBPqS+e0JG1RVm4WQtZckdJkXDakgqDvWIBaDcNs3sF+Go5Xo+BmZR5kNms/3iaFj7Jphd
+pGlxnVdly+m0qHT38IPwe0xLuDjv+sfi0Ia2UdLRzLvs1Zan75mCQxgoUUX2fEgBwiz6rgkpWEKAKL4yfSz3nv5m2uZX40/zvT9mRCmL7EtjxnPSYgifJV/d
+auvq+ti4WtayCMEtJxiqx1akSSaswHXjGIi8PZ43y9ngFunYLdqG1FkWiL3Dmcr5J5su2nfffdecdC7PI+5u7PFc+Xc488LENRiMBvVhI843xhwzJPgRES4Z
+Ut1Cj6nMyqtfQmoreb+6jLlP5W16ciyrMjjRZ8lJxTYvHBgT4kOaRmHOLM/TDxacSeM4mWRI2xMz/E575nr9buGRe8lG02mMc951yE2lIfnPu+je5Wlz5uvi
+rOdbJFBgJqE1tJGCfWJZOYb59WTTleuNH8zc40sRpBH7M4pwQQjtBmEYGxio9cTaYk4TALtkPhBg+dveGPWSkZcwYjyFtjwYe9qwFWlhGAhHnKgpHTULha2+
+SGnHHHNMOfA+zBNlQbDgGDwyV66FSBK0BpEyGJLEh6JRMhFaLlKiwEZl1Q0Vu2SnJfWsVRtaSztWcy8Gycmr3dk+k+nwww8vSfnGMROLHvxfGhOT5Jz3mvSk
+ulnIJFdOnzO4pcfGPJx1b1Gwa9dkDhNX+ROdRQLVloMPPrhkRhYvL/swYYCDf1HIngzMe1T2gvWuI+bP0kBj2wyyWAsa0O8EL5kF9I/PmMm0DLhxRka5VnYH
+zOOUSCSJGbpPWYjlgICCjFsMxv4xgT1PfvKTW/cawzkHwpxXkk4aNyIBrQcwyjHu1XiTMVngRxxQVQI6fP/617++PGcr/VsYBmKxsviK/Bnm8n0ANyDcL7xR
+p4X9u9wmT/6tb33rHfLs9ymze43JlBlQu4PFe0whfAUl1DG1iq60KNWHBYz5ZjUhrN3nb+Z7Ep+UDt12W6BtZLORjFmA+c6EC19TGxkFBhFX7qF50ATgNIq5
+9mmbiS0tCWFgHOPKcmwkVV/XGVfhwC+MS1i1PyGrFo7sIxqL8OJrhNSovk996h/Cl7O8zXy12GFuwqA3g5gB9XXfzY7zqqN5lJuQMTCpjtLcyBRF6p9EaTFw
+rU2thB5CZPiRijXic5/9bLndusH6MSmyTaSgcdcd/97TVswB9aNhYE6eMWzmtnYwj1kDbDnYSrQwDASoYs2pjbPa218QOfNJA3E6YDl8R5mxSbElka6VMDVS
+DynW4hQRQaXcAyM2Xsw+DWWYSLLqYrBhbhauZSWTQP/kORbDk2ncZ9KYvRiYqGvWEnIJa2VE2omJMGIgzBsmfh9i98acMBUTQp37njvSp/y1XMPEos0OMNsM
+ovF5PnPWRlJqnIQSzCOZib5yRMAkIulbR5j+MD7zkKkrIiiLYBmBOuV2AgStg1A3vOh3y6c1y4ZA23CsgPFsT5ecZH0pIgHLGDPOCDFbhRaKgViEDdZ0lq4W
+ZKqok/1S+3A/57fT3+ZBUj4wf3DKkmT4WDhpR50GxiQXESKlPerAFLEVyGTGlNmGmRyZiKj4zCy0DJIW/5OJT+Paa6+9SrNzIVqLBMYEAf9pCQQNanXp63Sm
+RZnYNBbCAEkRw1wEkniRdhT5kTalOk72w4z7aH7zqiDtwx4qfUiqZ0GwLsRRvlOtE8aZ/SnGofHSJUxi2IfG2U0DGf6+e58x4fnDggtGhhlgTvwpmNEkslYQ
+QLVtmgY1qZxF+m0SA+mfUS3QnQdFp5f4/GOOOWamkFthmkJ2hesKhxTeK5GjNBrzICF/Yc4pYYzKta8gpN0d0rmEo73UwT4ECd6kQpm0b2QedduoMoRw2gcQ
+0nrZtxO24JIYLjS0EiIpzNMeHCTkWdI4FNJeeY1JVl5n+RcLStkDIJmcvR36YxQZB8J8w3w16ucdvjNO1Dns3CWdR2ixzQWjjM0mWIVQVcJTr3pW8s5unWDq
+Gn/aGgyw/Hmv/d0//ROLZEns6fVXkeDz19Fubc8/13ivz2IRL68+G+exGBdsxoXKduu11vdSEOnf0CJKKp1g6GVehwlrYtH20USkXQk9l87IeOmSvUj22Rij
+xo5QcqHfUrBYc6RjCUGie0vBS2JDCS/hIqQdrsKc4Ww/kT1efjd+JlGY00q6JeuTbQXCzI3VrUobzkDka7Hfwl4QA3gWcB0AJaZfnh4D0GIjI+U8yADDCGTq
+tJFKjiUDsEsOf5Gj36APyaZc0/19K7x/5StfWSZLOHYLxhbcURSO6oKBjLMmnuviiNrBpSZgmJnKXhB4hY9k8Nu4N/b56FObHZ3RMIpsvrMY+utDxpoxoo7y
+soW5ZOyZGn3Km9c1oQWVzW8Ytn0MFrCy+MfZJs438d5fMopsczISjCWZTEitY6tl4cNA7TuxyXL4z34Qf/YEbQRZ+DECm0VtCiWkdcfNqDqEFlD2GWEO9sxY
+1LsEo7AKNHGUdDlHg3BjPtsTIisvwUgG6VNib9AN9thjIAR9ODat2ktmThsjGCqs4OHe4fnffab3cE/m7j6MAyMLzbKJEwjLJmLr1FakDWcgQNTBUh+Hv2Cm
+FN7KwERIDfMmA03KatJYmGhK8QYIpmKASohH27FbGBObNujnXb+NKM8kJDlhnsOTdPj5do9Lk20BsPhJhkhiC7W/pKz+YWxYhBuCa/geyi5hkxmmtMg4yrNs
+xJQJwMKmb5VHE7QT2WIzTHb1u5eWNCoZYPd6iy4J2yKhXExPvReBMFWLnLkQppmycMHAQu/P4oUp53vX+ztvfH/eeO1+71p/ySi85ntl5mfvMXKLmj7xisFs
+pAatTeH07t0FNpiGs78s1ua9cUQIxTwvEmPparHYa8t5AjP9jRHamGpsapvrtRmjthmZECG5JEwIhLG3q1gRzP9xhFlj6sq3sVGfySDgM4ZuHFrbCMkOryIc
+2O2PCYWJbFyxS/39pjAQO6DRR2Jnqaygi0S7xABz4puBkWRiRSRHE2mwyyQnOcUBNUU9zmu20ivTgF3cdv5PW1SYJCMcspghSPmkNvdblOz8lX3Agqcck47p
+wgSmlSBmJKfrXSImd/dZkdyvZEa2s5gJbZjsLDZpLSwYzyQykZkanccQoaJNnMNedilPumejfiPxyuYMO8zAgpaLOwy7mGxUnRbtObIlEEyMn7cfdVSxYGAA
+mWLltDiF8aPBTPbae+/mCiFsSGcSPpVi3YhQ23Iei3Ns4ArvywZz+UEIHhiCseHkQlrQOOZBmyAQEVbcQyAiVGJKTGOYIQaEgWBiCNNi2lY+oZPAc8ABBywa
+tHOpT3EeRUk7vAYw6xLaF5JqiXKKNOLRD4tHnEacipxvSMy40OOYzG0cdztwpIX0s3iVX2ONRDXZMMkJGKaTFaWFml4iWYRFclbGAj74HUZHve1tJeiAU7ZL
+kjfGpOt+Vd7HZNzhu/zCszhWOfKF9w6TaCph1UI4p/WDeH7je9hBOlxm/bx4CAi9DQ2tbCrMDcgh8beRJqgEt1hLEOd1jjFRgMNZsd0zapy86JBDytiwh8T9
+gmU+/vGPt8HUB5GinOdCdwWXRGqdkU5+ZYvYEuTTJXNFZGcIByUwpfvbsrxfKCc6lkcilZNIfiimhfXKY+9ZsxAnIqmQFiJxI4cYyYLdVV4YFJ0/S9ELf4/8
+XUw9ciORiJE++lRIdd8OSa/rjCV1kdqYhJgMmAckBPxknFZIWssgB/ZhxHQiFxFNgFrPlt0l0p0/kjdpUX4oeNNo2Ke7FPsWikbIVBCbyYpJitQ3TBIt+t35
+JBuRpHD4+fXz7AjQ+iUsNR4PD3NXhOqXwoyjCJkviSlDuChmLOPFH6KZ5DimnTI7mb+0OfM2tTqmpzfGyaOc4xFJWMascWvc+y41kjyZtBR+1j9rg/rRHM0P
+497z9w4tqOvXpaUwkdF8+UaC0RUTbbesZX6/U1R+7EpINaM+TjtQahYA+A+odBxY1MxFIo5WC44kbtRbDIUpZVxE0CLVfS11MblC2iiRVZyNGa3CJ2KS+Iz5
+h7RVTFwmNnMURmHhR8UOHXZ4QoFFPl/Z5qn+3wqV/lthEuAYdm2aaXJSY0qy7VL5XeMIZBM9NJwdGI7f999//8Jg9BXHuMSVyo0QytJ3Mg3z4wh28Ft3AVkL
+VvXe9UeArzHSlJQxJ8rR4sxfZFwhJi2OdWa/G0cW5/OFQJL9a6yJlDR2/BlHeV/WXHLGCF0uJijji8m1S0yknkEwMaaMe4yDOUv55sPFYpx7Lp+eiK8cx91y
+vFcX/jdmLuY3Qtey0KRkipvGQCw+BoNIJwuGzlkEMgBIFqQGJLts7HovkvMi1G8964Bp8EnFnpZywmCfZ5lI/CUkMRON45JmkdrLuDL4QDCA1E7cS7LEQLwm
+paDBz0IatUCgnKjKOOSQQ0p9Y0PZ4DfXYVqRiqLYoGk8w/eWi+u/hURAX/GXiQZkqbBoIxI97UN6fq8WfYv7OMpxaVzlmMlr+TQzyo+Wy0mPrEVCjAlH3hvj
+6uO9sb1b+D2uEZGGozTeLNsrBkRDIswQxJ2Xzj9jTMZGxe6lC/1+IRkIcDlpnZGwKMwje9HAIFGon2idSQM079kKryKeLMZhA16Y4AYSIA2QFiyiBnMxmS0G
+Qp7MfscpadGiKYJzwTBwd+VKl2PhheS8mX9t1AIdPuW2ZTAeWzsl2ECIs0bF+YlgQczoUkQSM3bUdQtz+80ifBNFCHDAk+TdQ4QMpY8z+/MUYQcGrZ1Ic1k
+5cKz/imbEEVTF/7/+RDEPhsMD9MwHjExhIkRZAho0/a7nFX0QrwsJAOBjI5JqXEhkDqrEgaEzh5WeRepjvOui0kjfNaZIMx2i7LIMnHxYfB1qBfT4vBiMG8s
+anmLjYCoKOfHMDVjKhZqxA/Hl5H+LsKD8TOKDjvssOLbZJo+PPwrTLT2Mk3TnJlrMR/akefabMxPKtqqu5YxVfGhMJ/6IwQxy2IgzMDLRJMYyHjdbwNaSIIg
+KS4ylcVKBUPi3cpkMjLdYeqczenT2Ow2k/rsL0H8GcItF4W5bTY22/H55iNGYVHzxxTqEC7aqcAX+8veFI5xfhHaSaQ9auwOp43y6SYxUTOHRcqcsp9Ddgva
+Qu57Mg/MB9oJ0y6NBLPANHzPr4KYbK1hfCwEMGHs9oapI//fNDNX1mdZXzfNB7KsgG3VepOiDjrooGIaWESTImGDo9zO3spAtuoonN4uDCRp1DignTgSl99E
+6hImJUT63zuc8MYQLSPOFiqnikZG38J4bJq1P4jWwqGOUdhMnMILTYb2EDnhyr4jATWis1xP2+j67bJ+W+V1kgZSGchW6eVt0o5cQEYtHtsEgtrMngjQTmgO
+mEkk+Gw+EWavX5y1gdX4EYVlUzOTVCQMLWYoRdtsiDEIAaZNMD9hHjSN4dDznlVZ6ssmMZBNNWEtNaq18puCQGUcmwL7Uj6UryGOZCh/fGiSLAr7tTcIkxD9
+SSChQUibwwneNT9tl+CZtXRuZSBrQa/eWxGoCCwNAsxY/oTRJqVGKyR4HLmmCi6j0dlx6+7o6+q3FYGKQEVgyyHQhzH0uWbLAdOzQZWB9ASqXlYRqAhUBCoC
+KxGoDGQlHvVTRaAiUBGoCPREoDKQnkDVyyoCFYGKQEVgJQKVgazEo36qCFQEKgIVgZ4IVAbSE6h6WUWgIlARqAisRKAykJV41E8VgYpARaAi0BOBykB6AlUv
+qwhUBCoCFYGVCFQGshKP+qkiUBGoCFQEeiJQGUhPoOplFYGKQEWgIrASgcpAVuJRP1UEKgIVgYpATwQqA+kJVL2sIlARqAhUBFYiUBnISjzqp4pARaAiUBHo
+iUBlID2BqpdVBCoCFYGKwEoEKgNZiUf9VBGoCFQEKgI9EagMpCdQ9bKKQEWgIlARWIlAZSAr8aifKgIVgYpARaAnApWB9ASqXlYRqAhUBCoCKxGoDGQlHvVT
+RaAiUBGoCPREoDKQnkDVyyoCFYGKQEVgJQKVgazEo36qCFQEKgIVgZ4IVAbSE6h6WUWgIlARqAisRKAykJV41E8VgYpARaAi0BOBykB6AlUvqwhUBCoCFYGV
+CFQGshKP+qkiUBGoCFQEeiJQGUhPoOplFYGKQEWgIrASgcpAVuJRP1UEKgIVgYpATwQqA+kJVL2sIlARqAhUBFYiUBnISjzqp4pARaAiUBHoicBEBtK2bfO7
+3/2uZ1H1sopARaAiUBHYaghM4gETGchvfvOb5sc//vFWw6O2pyJQEagIVAR6IHDmmWc2P/3pT8deOZGB/Pa3v22+/vWvj725/lARqAhUBCoCWxeB008/vfnG
+N74xtoETGYi7TjjhhLE31x8qAhWBikBFYOsi8JWvfKU57bTTxjZwKgP5wAc+0FBjKlUEKgIVgYrA9kLgmGOOaX7/+9+PbfRUBvLJT36y+exnPzu2gPpDRaAi
+UBGoCGw9BDCOt771rRMbNpWBcKS/6lWvmlhI/bEiUBGoCFQEthYCH/zgB5tPfOITExu1U/zaTrwifrzABS7QHH/88c21r33taZfW3ysCFYGKQEVgyRGgfdz+
+9rdvmLAm0VQNxM088U95ylOaM844Y1JZ9beKQEWgIlAR2AIIHHbYYVOZh2aePf4O8mYafelLX2rOd77zNTe/+c2nXVp/rwhUBCoCFYElReDEE09s7n//+ze/
+/vWvp7agNwNR0kc+8pHm6le/ejVlTYW1XlARqAhUBJYPAfv+7nWve03c+9FtVS8TVt7Aof6ABzygeeMb35hf1deKQEWgIlAR2AIInHzyyc3d7na35vOf/3zv
+1qxKA1GqvCjvfOc7i3qz5557Nuc85zl7P6xeWBGoCFQEKgKLh8Db3/72Zt99922+/OUvr6pyvaKwxpV405vetHn605/e3OlOd2p22klRlSoCFYGKQEVgWRCg
+dTzvec9rjjjiiJmCpNbEQBKk29zmNs1+++3X3Pa2t2122WWX/Lq+VgQqAhWBisCCISCqVooqroi3ve1tE5MlTqv6XBhIPuTiF794s/vuuze77bZbc6UrXam5
+yEUu0pztbKtys2RR9bUiUBGoCFQE5oTAL3/5y+ab3/xmc9JJJzWf+cxnmq9+9atzKXmuDGQuNaqFVAQqAhWBisBSIFDVg6XoplrJikBFoCKweAhUBrJ4fVJr
+VBGoCFQElgKBykCWoptqJSsCFYGKwOIhUBnI4vVJrVFFoCJQEVgKBCoDWYpuqpWsCFQEKgKLh0BlIIvXJ7VGFYGKQEVgKRDYOWr5q6Woaa1kRaAiUBGoCCwU
+Av8fgwPy24mbuF8AAAAASUVORK5CYII=
+`
--- a/llm/server.go
+++ b/llm/server.go
@@ -87,7 +87,8 @@ type LlamaServer interface {
 type llmServer struct {
 	port      int
 	cmd       *exec.Cmd
-	done      chan error // Channel to signal when the process exits
+	done      chan struct{} // closed when the process exits
+	doneErr   error         // valid after done is closed
 	status    *StatusWriter
 	options   api.Options
 	modelPath string
@@ -280,7 +281,7 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
 		sem:            semaphore.NewWeighted(int64(numParallel)),
 		totalLayers:    f.KV().BlockCount() + 1,
 		loadStart:      time.Now(),
-		done:           make(chan error, 1),
+		done:           make(chan struct{}),
 	}

 	if err != nil {
@@ -304,10 +305,11 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
 			if strings.Contains(s.status.LastErrMsg, "unknown model") {
 				s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
 			}
-			s.done <- errors.New(s.status.LastErrMsg)
+			s.doneErr = errors.New(s.status.LastErrMsg)
 		} else {
-			s.done <- err
+			s.doneErr = err
 		}
+		close(s.done)
 	}()

 	if tok != nil {
@@ -1356,8 +1358,8 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
 		case <-ctx.Done():
 			slog.Warn("client connection closed before server finished loading, aborting load")
 			return fmt.Errorf("timed out waiting for llama runner to start: %w", ctx.Err())
-		case err := <-s.done:
-			return fmt.Errorf("llama runner process has terminated: %w", err)
+		case <-s.done:
+			return fmt.Errorf("llama runner process has terminated: %w", s.doneErr)
 		default:
 		}
 		if time.Now().After(stallTimer) {
--- a/server/inference_request_log.go
+++ b/server/inference_request_log.go
@@ -0,0 +1,144 @@
+package server
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync/atomic"
+	"time"
+
+	"github.com/gin-gonic/gin"
+
+	"github.com/ollama/ollama/envconfig"
+)
+
+type inferenceRequestLogger struct {
+	dir     string
+	counter uint64
+}
+
+func newInferenceRequestLogger() (*inferenceRequestLogger, error) {
+	dir, err := os.MkdirTemp("", "ollama-request-logs-*")
+	if err != nil {
+		return nil, err
+	}
+
+	return &inferenceRequestLogger{dir: dir}, nil
+}
+
+func (s *Server) initRequestLogging() error {
+	if !envconfig.DebugLogRequests() {
+		return nil
+	}
+
+	requestLogger, err := newInferenceRequestLogger()
+	if err != nil {
+		return fmt.Errorf("enable OLLAMA_DEBUG_LOG_REQUESTS: %w", err)
+	}
+
+	s.requestLogger = requestLogger
+	slog.Info(fmt.Sprintf("request debug logging enabled; inference request logs will be stored in %s and include request bodies and replay curl commands", requestLogger.dir))
+
+	return nil
+}
+
+func (s *Server) withInferenceRequestLogging(route string, handlers ...gin.HandlerFunc) []gin.HandlerFunc {
+	if s.requestLogger == nil {
+		return handlers
+	}
+
+	return append([]gin.HandlerFunc{s.requestLogger.middleware(route)}, handlers...)
+}
+
+func (l *inferenceRequestLogger) middleware(route string) gin.HandlerFunc {
+	return func(c *gin.Context) {
+		if c.Request == nil {
+			c.Next()
+			return
+		}
+
+		method := c.Request.Method
+		host := c.Request.Host
+		scheme := "http"
+		if c.Request.TLS != nil {
+			scheme = "https"
+		}
+		contentType := c.GetHeader("Content-Type")
+
+		var body []byte
+		if c.Request.Body != nil {
+			var err error
+			body, err = io.ReadAll(c.Request.Body)
+			c.Request.Body = io.NopCloser(bytes.NewReader(body))
+			if err != nil {
+				slog.Warn("failed to read request body for debug logging", "route", route, "error", err)
+			}
+		}
+
+		c.Next()
+		l.log(route, method, scheme, host, contentType, body)
+	}
+}
+
+func (l *inferenceRequestLogger) log(route, method, scheme, host, contentType string, body []byte) {
+	if l == nil || l.dir == "" {
+		return
+	}
+
+	if contentType == "" {
+		contentType = "application/json"
+	}
+	if host == "" || scheme == "" {
+		base := envconfig.Host()
+		if host == "" {
+			host = base.Host
+		}
+		if scheme == "" {
+			scheme = base.Scheme
+		}
+	}
+
+	routeForFilename := sanitizeRouteForFilename(route)
+	timestamp := fmt.Sprintf("%s-%06d", time.Now().UTC().Format("20060102T150405.000000000Z"), atomic.AddUint64(&l.counter, 1))
+	bodyFilename := fmt.Sprintf("%s_%s_body.json", timestamp, routeForFilename)
+	curlFilename := fmt.Sprintf("%s_%s_request.sh", timestamp, routeForFilename)
+	bodyPath := filepath.Join(l.dir, bodyFilename)
+	curlPath := filepath.Join(l.dir, curlFilename)
+
+	if err := os.WriteFile(bodyPath, body, 0o600); err != nil {
+		slog.Warn("failed to write debug request body", "route", route, "error", err)
+		return
+	}
+
+	url := fmt.Sprintf("%s://%s%s", scheme, host, route)
+	curl := fmt.Sprintf("#!/bin/sh\nSCRIPT_DIR=\"$(CDPATH= cd -- \"$(dirname -- \"$0\")\" && pwd)\"\ncurl --request %s --url %q --header %q --data-binary @\"${SCRIPT_DIR}/%s\"\n", method, url, "Content-Type: "+contentType, bodyFilename)
+	if err := os.WriteFile(curlPath, []byte(curl), 0o600); err != nil {
+		slog.Warn("failed to write debug request replay command", "route", route, "error", err)
+		return
+	}
+
+	slog.Info(fmt.Sprintf("logged to %s, replay using curl with `sh %s`", bodyPath, curlPath))
+}
+
+func sanitizeRouteForFilename(route string) string {
+	route = strings.TrimPrefix(route, "/")
+	if route == "" {
+		return "root"
+	}
+
+	var b strings.Builder
+	b.Grow(len(route))
+	for _, r := range route {
+		if ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') || ('0' <= r && r <= '9') {
+			b.WriteRune(r)
+		} else {
+			b.WriteByte('_')
+		}
+	}
+
+	return b.String()
+}
--- a/server/routes.go
+++ b/server/routes.go
@@ -63,6 +63,7 @@ const (
 	cloudErrRemoteModelDetailsUnavailable = "remote model details are unavailable"
 	cloudErrWebSearchUnavailable          = "web search is unavailable"
 	cloudErrWebFetchUnavailable           = "web fetch is unavailable"
+	copilotChatUserAgentPrefix            = "GitHubCopilotChat/"
 )

 func writeModelRefParseError(c *gin.Context, err error, fallbackStatus int, fallbackMessage string) {
@@ -100,6 +101,7 @@ type Server struct {
 	addr          net.Addr
 	sched         *Scheduler
 	defaultNumCtx int
+	requestLogger *inferenceRequestLogger
 }

 func init() {
@@ -1157,6 +1159,17 @@ func (s *Server) ShowHandler(c *gin.Context) {
 		return
 	}

+	userAgent := c.Request.UserAgent()
+	if strings.HasPrefix(userAgent, copilotChatUserAgentPrefix) {
+		if resp.ModelInfo == nil {
+			resp.ModelInfo = map[string]any{}
+		}
+		// Copilot Chat prefers `general.basename`, but this is usually not what
+		// users are familiar with, so let's just echo back what we had returned in
+		// `/api/tags`
+		resp.ModelInfo["general.basename"] = req.Model
+	}
+
 	c.JSON(http.StatusOK, resp)
 }

@@ -1686,26 +1699,26 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {

 	// Inference
 	r.GET("/api/ps", s.PsHandler)
-	r.POST("/api/generate", s.GenerateHandler)
-	r.POST("/api/chat", s.ChatHandler)
+	r.POST("/api/generate", s.withInferenceRequestLogging("/api/generate", s.GenerateHandler)...)
+	r.POST("/api/chat", s.withInferenceRequestLogging("/api/chat", s.ChatHandler)...)
 	r.POST("/api/embed", s.EmbedHandler)
 	r.POST("/api/embeddings", s.EmbeddingsHandler)

 	// Inference (OpenAI compatibility)
 	// TODO(cloud-stage-a): apply Modelfile overlay deltas for local models with cloud
 	// parents on v1 request families while preserving this explicit :cloud passthrough.
-	r.POST("/v1/chat/completions", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ChatMiddleware(), s.ChatHandler)
-	r.POST("/v1/completions", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.CompletionsMiddleware(), s.GenerateHandler)
+	r.POST("/v1/chat/completions", s.withInferenceRequestLogging("/v1/chat/completions", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ChatMiddleware(), s.ChatHandler)...)
+	r.POST("/v1/completions", s.withInferenceRequestLogging("/v1/completions", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.CompletionsMiddleware(), s.GenerateHandler)...)
 	r.POST("/v1/embeddings", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.EmbeddingsMiddleware(), s.EmbedHandler)
 	r.GET("/v1/models", middleware.ListMiddleware(), s.ListHandler)
 	r.GET("/v1/models/:model", cloudModelPathPassthroughMiddleware(cloudErrRemoteModelDetailsUnavailable), middleware.RetrieveMiddleware(), s.ShowHandler)
-	r.POST("/v1/responses", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ResponsesMiddleware(), s.ChatHandler)
+	r.POST("/v1/responses", s.withInferenceRequestLogging("/v1/responses", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ResponsesMiddleware(), s.ChatHandler)...)
 	// OpenAI-compatible image generation endpoints
 	r.POST("/v1/images/generations", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ImageGenerationsMiddleware(), s.GenerateHandler)
 	r.POST("/v1/images/edits", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ImageEditsMiddleware(), s.GenerateHandler)

 	// Inference (Anthropic compatibility)
-	r.POST("/v1/messages", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.AnthropicMessagesMiddleware(), s.ChatHandler)
+	r.POST("/v1/messages", s.withInferenceRequestLogging("/v1/messages", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.AnthropicMessagesMiddleware(), s.ChatHandler)...)

 	if rc != nil {
 		// wrap old with new
@@ -1757,6 +1770,9 @@ func Serve(ln net.Listener) error {
 	}

 	s := &Server{addr: ln.Addr()}
+	if err := s.initRequestLogging(); err != nil {
+		return err
+	}

 	var rc *ollama.Registry
 	if useClient2 {
--- a/server/routes_request_log_test.go
+++ b/server/routes_request_log_test.go
@@ -0,0 +1,128 @@
+package server
+
+import (
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+)
+
+func TestInferenceRequestLoggerMiddlewareWritesReplayArtifacts(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	logDir := t.TempDir()
+	requestLogger := &inferenceRequestLogger{dir: logDir}
+
+	const route = "/v1/chat/completions"
+	const requestBody = `{"model":"test-model","messages":[{"role":"user","content":"hello"}]}`
+
+	var bodySeenByHandler string
+
+	r := gin.New()
+	r.POST(route, requestLogger.middleware(route), func(c *gin.Context) {
+		body, err := io.ReadAll(c.Request.Body)
+		if err != nil {
+			t.Fatalf("failed to read body in handler: %v", err)
+		}
+
+		bodySeenByHandler = string(body)
+		c.Status(http.StatusOK)
+	})
+
+	req := httptest.NewRequest(http.MethodPost, route, strings.NewReader(requestBody))
+	req.Host = "127.0.0.1:11434"
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+	r.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected status 200, got %d", w.Code)
+	}
+
+	if bodySeenByHandler != requestBody {
+		t.Fatalf("handler body mismatch:\nexpected: %s\ngot: %s", requestBody, bodySeenByHandler)
+	}
+
+	bodyFiles, err := filepath.Glob(filepath.Join(logDir, "*_v1_chat_completions_body.json"))
+	if err != nil {
+		t.Fatalf("failed to glob body logs: %v", err)
+	}
+	if len(bodyFiles) != 1 {
+		t.Fatalf("expected 1 body log, got %d (%v)", len(bodyFiles), bodyFiles)
+	}
+
+	curlFiles, err := filepath.Glob(filepath.Join(logDir, "*_v1_chat_completions_request.sh"))
+	if err != nil {
+		t.Fatalf("failed to glob curl logs: %v", err)
+	}
+	if len(curlFiles) != 1 {
+		t.Fatalf("expected 1 curl log, got %d (%v)", len(curlFiles), curlFiles)
+	}
+
+	bodyData, err := os.ReadFile(bodyFiles[0])
+	if err != nil {
+		t.Fatalf("failed to read body log: %v", err)
+	}
+	if string(bodyData) != requestBody {
+		t.Fatalf("body log mismatch:\nexpected: %s\ngot: %s", requestBody, string(bodyData))
+	}
+
+	curlData, err := os.ReadFile(curlFiles[0])
+	if err != nil {
+		t.Fatalf("failed to read curl log: %v", err)
+	}
+
+	curlString := string(curlData)
+	if !strings.Contains(curlString, "http://127.0.0.1:11434"+route) {
+		t.Fatalf("curl log does not contain expected route URL: %s", curlString)
+	}
+
+	bodyFileName := filepath.Base(bodyFiles[0])
+	if !strings.Contains(curlString, "@\"${SCRIPT_DIR}/"+bodyFileName+"\"") {
+		t.Fatalf("curl log does not reference sibling body file: %s", curlString)
+	}
+}
+
+func TestNewInferenceRequestLoggerCreatesDirectory(t *testing.T) {
+	requestLogger, err := newInferenceRequestLogger()
+	if err != nil {
+		t.Fatalf("expected no error creating request logger: %v", err)
+	}
+	t.Cleanup(func() {
+		_ = os.RemoveAll(requestLogger.dir)
+	})
+
+	if requestLogger == nil || requestLogger.dir == "" {
+		t.Fatalf("expected request logger directory to be set")
+	}
+
+	info, err := os.Stat(requestLogger.dir)
+	if err != nil {
+		t.Fatalf("expected directory to exist: %v", err)
+	}
+	if !info.IsDir() {
+		t.Fatalf("expected %q to be a directory", requestLogger.dir)
+	}
+}
+
+func TestSanitizeRouteForFilename(t *testing.T) {
+	tests := []struct {
+		route string
+		want  string
+	}{
+		{route: "/api/generate", want: "api_generate"},
+		{route: "/v1/chat/completions", want: "v1_chat_completions"},
+		{route: "/v1/messages", want: "v1_messages"},
+	}
+
+	for _, tt := range tests {
+		if got := sanitizeRouteForFilename(tt.route); got != tt.want {
+			t.Fatalf("sanitizeRouteForFilename(%q) = %q, want %q", tt.route, got, tt.want)
+		}
+	}
+}
--- a/server/routes_test.go
+++ b/server/routes_test.go
@@ -721,6 +721,111 @@ func TestShow(t *testing.T) {
 	}
 }

+func TestShowCopilotUserAgentOverwritesExistingBasename(t *testing.T) {
+	t.Setenv("OLLAMA_MODELS", t.TempDir())
+
+	var s Server
+
+	w := createRequest(t, s.CreateHandler, api.CreateRequest{
+		Model:      "show-model",
+		From:       "bob",
+		RemoteHost: "https://ollama.com",
+		Info: map[string]any{
+			"model_family": "gptoss",
+			"base_name":    "upstream-base-name",
+		},
+		Stream: &stream,
+	})
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected status code 200 creating model, actual %d", w.Code)
+	}
+
+	h, err := s.GenerateRoutes(nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	makeRequest := func(userAgent string) api.ShowResponse {
+		t.Helper()
+
+		w := httptest.NewRecorder()
+		req := httptest.NewRequest(http.MethodPost, "/api/show", strings.NewReader(`{"model":"show-model"}`))
+		req.Header.Set("Content-Type", "application/json")
+		if userAgent != "" {
+			req.Header.Set("User-Agent", userAgent)
+		}
+		h.ServeHTTP(w, req)
+
+		if w.Code != http.StatusOK {
+			t.Fatalf("expected status code 200, actual %d", w.Code)
+		}
+
+		var resp api.ShowResponse
+		if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
+			t.Fatal(err)
+		}
+		return resp
+	}
+
+	withoutCopilot := makeRequest("")
+	if withoutCopilot.ModelInfo["general.basename"] != "upstream-base-name" {
+		t.Fatalf("expected general.basename to be %q, got %v", "upstream-base-name", withoutCopilot.ModelInfo["general.basename"])
+	}
+
+	withCopilot := makeRequest("GitHubCopilotChat/0.41.1")
+	if withCopilot.ModelInfo["general.basename"] != "show-model" {
+		t.Fatalf("expected general.basename to be %q, got %v", "show-model", withCopilot.ModelInfo["general.basename"])
+	}
+
+	if withCopilot.ModelInfo["general.architecture"] != "gptoss" {
+		t.Fatalf("expected general.architecture to be %q, got %v", "gptoss", withCopilot.ModelInfo["general.architecture"])
+	}
+}
+
+func TestShowCopilotUserAgentSetsBasenameWhenModelInfoIsEmpty(t *testing.T) {
+	t.Setenv("OLLAMA_MODELS", t.TempDir())
+
+	var s Server
+
+	w := createRequest(t, s.CreateHandler, api.CreateRequest{
+		Model:      "show-remote",
+		From:       "bob",
+		RemoteHost: "https://ollama.com",
+		Stream:     &stream,
+	})
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected status code 200 creating model, actual %d", w.Code)
+	}
+
+	h, err := s.GenerateRoutes(nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	w = httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/api/show", strings.NewReader(`{"model":"show-remote"}`))
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("User-Agent", "GitHubCopilotChat/0.41.1")
+	h.ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected status code 200, actual %d", w.Code)
+	}
+
+	var resp api.ShowResponse
+	if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
+		t.Fatal(err)
+	}
+
+	if resp.ModelInfo["general.basename"] != "show-remote" {
+		t.Fatalf("expected general.basename to be %q, got %v", "show-remote", resp.ModelInfo["general.basename"])
+	}
+
+	if len(resp.ModelInfo) != 1 {
+		t.Fatalf("expected model_info to contain only general.basename, got %#v", resp.ModelInfo)
+	}
+}
+
 func TestNormalize(t *testing.T) {
 	type testCase struct {
 		input       []float32
--- a/x/create/client/create.go
+++ b/x/create/client/create.go
@@ -109,7 +109,7 @@ func ConfigFromModelfile(modelfile *parser.Modelfile) (string, *ModelfileConfig,
 type CreateOptions struct {
 	ModelName string
 	ModelDir  string
-	Quantize  string           // "int4", "int8", "nvfp4", or "mxfp8" for quantization
+	Quantize  string           // "int4", "int8", "nvfp4", "mxfp4", or "mxfp8" for quantization
 	Modelfile *ModelfileConfig // template/system/license/parser/renderer/parameters from Modelfile
 }

@@ -280,7 +280,7 @@ func newPackedTensorLayerCreator() create.PackedTensorLayerCreator {
 			if !QuantizeSupported() {
 				return create.LayerInfo{}, fmt.Errorf("quantization requires MLX support")
 			}
-			blobData, err := quantizePackedGroup(tensors)
+			blobData, err := quantizePackedGroup(groupName, tensors)
 			if err != nil {
 				return create.LayerInfo{}, fmt.Errorf("failed to quantize packed group %s: %w", groupName, err)
 			}
--- a/x/create/client/quantize.go
+++ b/x/create/client/quantize.go
@@ -7,29 +7,27 @@ import (
 	"io"
 	"os"
 	"path/filepath"
+	"regexp"
+	"sort"
 	"strconv"
+	"strings"

 	"github.com/ollama/ollama/x/create"
-	"github.com/ollama/ollama/x/imagegen/mlx"
+	"github.com/ollama/ollama/x/mlxrunner/mlx"
+	"github.com/ollama/ollama/x/mlxrunner/model"
 )

-// quantizeParams maps quantization type names to MLX quantize parameters.
-var quantizeParams = map[string]struct {
-	groupSize int
-	bits      int
-	mode      string
-}{
-	"int4":  {64, 4, "affine"},
-	"nvfp4": {16, 4, "nvfp4"},
-	"int8":  {64, 8, "affine"},
-	"mxfp8": {32, 8, "mxfp8"},
-}
-
 // loadAndQuantizeArray writes a safetensors reader to a temp file, loads it with MLX,
 // quantizes the tensor, and appends the resulting arrays (weight, scale, optional bias)
 // to the provided maps. If quantize is empty, the tensor is kept as-is.
 // Returns any temp file paths created (caller must clean up) and arrays needing eval.
 func loadAndQuantizeArray(r io.Reader, name, quantize string, arrays map[string]*mlx.Array) (tmpPath string, toEval []*mlx.Array, nativeHandle *mlx.SafetensorsFile, err error) {
+	if quantize != "" {
+		if gs, _, _ := model.QuantizationParams(quantize); gs == 0 {
+			return "", nil, nil, fmt.Errorf("unsupported quantization type: %s", quantize)
+		}
+	}
+
 	tmpDir := ensureTempDir()

 	tmpFile, err := os.CreateTemp(tmpDir, "quant-*.safetensors")
@@ -50,11 +48,16 @@ func loadAndQuantizeArray(r io.Reader, name, quantize string, arrays map[string]
 	}

 	// Find the tensor key (may differ from name for single-tensor blobs)
-	inputKey, err := findSafetensorsKey(tmpPath)
+	header, err := readSafetensorsHeader(tmpPath)
 	if err != nil {
 		st.Free()
 		return tmpPath, nil, nil, fmt.Errorf("failed to read blob header for %s: %w", name, err)
 	}
+	inputKey, err := safetensorsKey(name, header)
+	if err != nil {
+		st.Free()
+		return tmpPath, nil, nil, fmt.Errorf("failed to resolve tensor key for %s: %w", name, err)
+	}

 	arr := st.Get(inputKey)
 	if arr == nil {
@@ -62,34 +65,46 @@ func loadAndQuantizeArray(r io.Reader, name, quantize string, arrays map[string]
 		return tmpPath, nil, nil, fmt.Errorf("tensor %q not found in safetensors", inputKey)
 	}

+	// Decode FP8 source encoding before checking quantize, so that callers
+	// requesting decode-only (quantize="") receive usable float data.
+	if info, ok := header[inputKey]; ok && info.Dtype == "F8_E4M3" {
+		scaleKey := inputKey + ".scale_inv"
+		scaleInv := st.Get(scaleKey)
+		if scaleInv == nil {
+			st.Free()
+			return tmpPath, nil, nil, fmt.Errorf("missing companion tensor %q for fp8 source tensor %q", scaleKey, inputKey)
+		}
+		arr, err = decodeSourceFP8Tensor(arr, scaleInv)
+		if err != nil {
+			st.Free()
+			return tmpPath, nil, nil, fmt.Errorf("failed to decode fp8 tensor %s: %w", inputKey, err)
+		}
+		mlx.Eval(arr)
+	}
+
 	if quantize == "" {
-		arr = mlx.Contiguous(arr)
+		arr = mlx.Contiguous(arr, false)
 		arrays[name] = arr
 		return tmpPath, []*mlx.Array{arr}, st, nil
 	}

-	// Convert to float type if needed (quantize expects float)
-	if arr.Dtype() != mlx.DtypeBFloat16 && arr.Dtype() != mlx.DtypeFloat32 && arr.Dtype() != mlx.DtypeFloat16 {
-		arr = mlx.AsType(arr, mlx.DtypeBFloat16)
+	if arr.DType() != mlx.DTypeBFloat16 && arr.DType() != mlx.DTypeFloat32 && arr.DType() != mlx.DTypeFloat16 {
+		// Convert to float type if needed (quantize expects float)
+		arr = arr.AsType(mlx.DTypeBFloat16)
 		mlx.Eval(arr)
 	}

-	params, ok := quantizeParams[quantize]
-	if !ok {
-		st.Free()
-		return tmpPath, nil, nil, fmt.Errorf("unsupported quantization type: %s", quantize)
-	}
+	groupSize, bits, mode := model.QuantizationParams(quantize)
+	qweight, scales, qbiases := mlx.Quantize(arr, groupSize, bits, mode)

-	qweight, scales, qbiases := mlx.Quantize(arr, params.groupSize, params.bits, params.mode)
-
-	qweight = mlx.Contiguous(qweight)
-	scales = mlx.Contiguous(scales)
+	qweight = mlx.Contiguous(qweight, false)
+	scales = mlx.Contiguous(scales, false)
 	arrays[name] = qweight
 	arrays[name+".scale"] = scales
 	toEval = append(toEval, qweight, scales)

 	if qbiases != nil {
-		qbiases = mlx.Contiguous(qbiases)
+		qbiases = mlx.Contiguous(qbiases, false)
 		arrays[name+".bias"] = qbiases
 		toEval = append(toEval, qbiases)
 	}
@@ -101,27 +116,45 @@ func loadAndQuantizeArray(r io.Reader, name, quantize string, arrays map[string]
 // and returns a single combined safetensors blob with the quantized weight, scale, and optional bias.
 // Tensor keys use the original tensor name: name, name.scale, name.bias.
 // The blob includes __metadata__ with quant_type and group_size.
-// Supported quantization types: "int4", "nvfp4", "int8", "mxfp8".
+// Supported quantization types: "int4", "nvfp4", "mxfp4", "int8", "mxfp8".
 func quantizeTensor(r io.Reader, tensorName, dtype string, shape []int32, quantize string) (blobData []byte, err error) {
 	arrays := make(map[string]*mlx.Array)
 	tmpPath, toEval, st, err := loadAndQuantizeArray(r, tensorName, quantize, arrays)
 	if tmpPath != "" {
 		defer os.Remove(tmpPath)
 	}
-	if st != nil {
-		defer st.Free()
-	}
 	if err != nil {
 		return nil, err
 	}

+	finalArrays := make([]*mlx.Array, 0, len(arrays))
+	for _, arr := range arrays {
+		if arr != nil {
+			finalArrays = append(finalArrays, arr)
+		}
+	}
+	mlx.Pin(finalArrays...)
+	defer func() {
+		if st != nil {
+			st.Free()
+		}
+		mlx.Unpin(finalArrays...)
+		mlx.Sweep()
+	}()
+
 	mlx.Eval(toEval...)
+	mlx.Sweep()
+	// Free early to release mmap; defer guard handles error paths
+	if st != nil {
+		st.Free()
+		st = nil
+	}

 	// Build metadata for single-tensor blobs
-	params := quantizeParams[quantize]
+	groupSize, _, _ := model.QuantizationParams(quantize)
 	metadata := map[string]string{
 		"quant_type": quantize,
-		"group_size": strconv.Itoa(params.groupSize),
+		"group_size": strconv.Itoa(groupSize),
 	}

 	tmpDir := ensureTempDir()
@@ -135,48 +168,81 @@ func quantizeTensor(r io.Reader, tensorName, dtype string, shape []int32, quanti

 // quantizePackedGroup quantizes multiple tensors and saves them all into a single
 // combined safetensors blob. Used for packing expert groups.
+// When the inputs are per-expert 2D tensors (e.g., experts.0.gate_proj.weight),
+// they are stacked into 3D switch_mlp tensors before quantization.
 // Each tensor may have a different quantization type (mixed-precision).
-// Returns the blob bytes. No __metadata__ is added because different tensors
-// may use different quantization types.
-func quantizePackedGroup(inputs []create.PackedTensorInput) ([]byte, error) {
+// Returns the blob bytes.
+func quantizePackedGroup(groupName string, inputs []create.PackedTensorInput) ([]byte, error) {
+	// Check if inputs are per-expert tensors that should be stacked into 3D
+	if projGroups, quantize := parsePerExpertInputs(groupName, inputs); projGroups != nil {
+		return stackAndQuantizeExpertGroup(groupName, projGroups, quantize)
+	}
+
 	allArrays := make(map[string]*mlx.Array)
-	var allToEval []*mlx.Array
-	var tmpPaths []string
-	var handles []*mlx.SafetensorsFile
+	var pinned []*mlx.Array
+
+	var metadata map[string]string
+	uniformQuantize := ""
+	hasQuantized := false
+	mixedQuantize := false
+	for _, input := range inputs {
+		if input.Quantize == "" {
+			if hasQuantized {
+				mixedQuantize = true
+			}
+			continue
+		}
+		if !hasQuantized {
+			hasQuantized = true
+			uniformQuantize = input.Quantize
+			continue
+		}
+		if input.Quantize != uniformQuantize {
+			mixedQuantize = true
+		}
+	}
+	if hasQuantized && !mixedQuantize {
+		if groupSize, _, _ := model.QuantizationParams(uniformQuantize); groupSize > 0 {
+			metadata = map[string]string{
+				"quant_type": uniformQuantize,
+				"group_size": strconv.Itoa(groupSize),
+			}
+		}
+	}

 	for _, input := range inputs {
 		tmpPath, toEval, st, err := loadAndQuantizeArray(input.Reader, input.Name, input.Quantize, allArrays)
-		if tmpPath != "" {
-			tmpPaths = append(tmpPaths, tmpPath)
-		}
-		if st != nil {
-			handles = append(handles, st)
-		}
 		if err != nil {
-			// Cleanup on error
-			for _, h := range handles {
-				h.Free()
-			}
-			for _, p := range tmpPaths {
-				os.Remove(p)
-			}
+			mlx.Unpin(pinned...)
+			mlx.Sweep()
 			return nil, err
 		}
-		allToEval = append(allToEval, toEval...)
+
+		mlx.Eval(toEval...)
+
+		finalArrays := arraysForPackedInput(allArrays, input)
+		mlx.Pin(finalArrays...)
+		pinned = append(pinned, finalArrays...)
+
+		if st != nil {
+			st.Free()
+		}
+		if tmpPath != "" {
+			os.Remove(tmpPath)
+		}
+		mlx.Sweep()
 	}
+	defer func() {
+		mlx.Unpin(pinned...)
+		mlx.Sweep()
+	}()

-	mlx.Eval(allToEval...)
-
-	// Free native handles after eval
-	for _, h := range handles {
-		h.Free()
-	}
-
-	// Save combined blob (no global metadata for mixed-precision packed blobs)
+	// Save combined blob. Add global metadata only when every packed tensor uses
+	// the same quantization mode and group size.
 	tmpDir := ensureTempDir()
 	outPath := filepath.Join(tmpDir, "packed-combined.safetensors")
 	defer os.Remove(outPath)
-	if err := mlx.SaveSafetensorsWithMetadata(outPath, allArrays, nil); err != nil {
+	if err := mlx.SaveSafetensorsWithMetadata(outPath, allArrays, metadata); err != nil {
 		return nil, fmt.Errorf("failed to save packed blob: %w", err)
 	}

@@ -185,17 +251,193 @@ func quantizePackedGroup(inputs []create.PackedTensorInput) ([]byte, error) {
 		return nil, fmt.Errorf("failed to read packed blob: %w", err)
 	}

-	for _, p := range tmpPaths {
-		os.Remove(p)
+	return blobData, nil
+}
+
+func arraysForPackedInput(allArrays map[string]*mlx.Array, input create.PackedTensorInput) []*mlx.Array {
+	keys := []string{input.Name}
+	if input.Quantize != "" {
+		keys = append(keys, input.Name+".scale", input.Name+".bias")
 	}

+	out := make([]*mlx.Array, 0, len(keys))
+	for _, key := range keys {
+		if arr := allArrays[key]; arr != nil {
+			out = append(out, arr)
+		}
+	}
+	return out
+}
+
+// perExpertSuffix matches ".{index}.{proj_and_suffix}" after the group prefix.
+var perExpertSuffix = regexp.MustCompile(`^\.(\d+)\.(.+)$`)
+
+type expertTensorInfo struct {
+	index int
+	proj  string // e.g., "gate_proj.weight"
+	input create.PackedTensorInput
+}
+
+// parsePerExpertInputs groups per-expert 2D tensor inputs by projection type
+// and returns the uniform quantization type shared by all inputs.
+// Returns nil if the inputs are not per-expert tensors (e.g., already stacked 3D)
+// or if the inputs have mixed quantization types.
+// Only handles ".experts" groups; ".shared_experts" groups are left unpacked.
+func parsePerExpertInputs(groupName string, inputs []create.PackedTensorInput) (map[string][]expertTensorInfo, string) {
+	if !strings.HasSuffix(groupName, ".experts") {
+		return nil, ""
+	}
+
+	quantize := inputs[0].Quantize
+	groups := make(map[string][]expertTensorInfo)
+	for _, input := range inputs {
+		if input.Quantize != quantize {
+			return nil, "" // mixed quantization types
+		}
+		suffix := strings.TrimPrefix(input.Name, groupName)
+		m := perExpertSuffix.FindStringSubmatch(suffix)
+		if m == nil {
+			return nil, "" // not a per-expert pattern
+		}
+		index, err := strconv.Atoi(m[1])
+		if err != nil {
+			return nil, ""
+		}
+		groups[m[2]] = append(groups[m[2]], expertTensorInfo{
+			index: index,
+			proj:  m[2],
+			input: input,
+		})
+	}
+	if len(groups) == 0 {
+		return nil, ""
+	}
+	return groups, quantize
+}
+
+// stackAndQuantizeExpertGroup decodes per-expert tensors, stacks them into 3D
+// switch_mlp tensors, quantizes, and returns the combined safetensors blob.
+func stackAndQuantizeExpertGroup(groupName string, projGroups map[string][]expertTensorInfo, quantize string) ([]byte, error) {
+	groupBase := strings.TrimSuffix(groupName, ".experts")
+
+	allArrays := make(map[string]*mlx.Array)
+	var pinned []*mlx.Array
+
+	var metadata map[string]string
+	if groupSize, _, _ := model.QuantizationParams(quantize); groupSize > 0 && quantize != "" {
+		metadata = map[string]string{
+			"quant_type": quantize,
+			"group_size": strconv.Itoa(groupSize),
+		}
+	}
+
+	// Sort projection names for deterministic output
+	projNames := make([]string, 0, len(projGroups))
+	for proj := range projGroups {
+		projNames = append(projNames, proj)
+	}
+	sort.Strings(projNames)
+
+	cleanup := func() {
+		mlx.Unpin(pinned...)
+		mlx.Sweep()
+	}
+
+	for _, proj := range projNames {
+		experts := projGroups[proj]
+
+		// Sort by expert index
+		sort.Slice(experts, func(i, j int) bool {
+			return experts[i].index < experts[j].index
+		})
+
+		// Load and decode each expert tensor
+		var decoded []*mlx.Array
+		for _, expert := range experts {
+			dummyArrays := make(map[string]*mlx.Array)
+			tmpPath, toEval, st, err := loadAndQuantizeArray(expert.input.Reader, expert.input.Name, "", dummyArrays)
+			if err != nil {
+				cleanup()
+				return nil, fmt.Errorf("failed to decode expert tensor %s: %w", expert.input.Name, err)
+			}
+			mlx.Eval(toEval...)
+
+			arr := dummyArrays[expert.input.Name]
+			mlx.Pin(arr)
+			pinned = append(pinned, arr)
+			decoded = append(decoded, arr)
+
+			if st != nil {
+				st.Free()
+			}
+			if tmpPath != "" {
+				os.Remove(tmpPath)
+			}
+			mlx.Sweep()
+		}
+
+		// Stack into 3D along axis 0: [numExperts, rows, cols]
+		stacked := mlx.Stack(decoded, 0)
+		mlx.Eval(stacked)
+		mlx.Pin(stacked)
+		pinned = append(pinned, stacked)
+
+		// Free individual decoded arrays
+		mlx.Unpin(decoded...)
+		mlx.Sweep()
+
+		stackedName := groupBase + ".switch_mlp." + proj
+
+		// Quantize the stacked tensor
+		if quantize != "" {
+			groupSize, bits, mode := model.QuantizationParams(quantize)
+
+			qweight, scales, qbiases := mlx.Quantize(stacked, groupSize, bits, mode)
+
+			qweight = mlx.Contiguous(qweight, false)
+			scales = mlx.Contiguous(scales, false)
+			allArrays[stackedName] = qweight
+			allArrays[stackedName+".scale"] = scales
+
+			toEval := []*mlx.Array{qweight, scales}
+			if qbiases != nil {
+				qbiases = mlx.Contiguous(qbiases, false)
+				allArrays[stackedName+".bias"] = qbiases
+				toEval = append(toEval, qbiases)
+			}
+			mlx.Eval(toEval...)
+			mlx.Pin(toEval...)
+			pinned = append(pinned, toEval...)
+
+			// Free stacked source array
+			mlx.Unpin(stacked)
+			mlx.Sweep()
+		} else {
+			stacked = mlx.Contiguous(stacked, false)
+			mlx.Eval(stacked)
+			allArrays[stackedName] = stacked
+		}
+	}
+
+	defer cleanup()
+
+	tmpDir := ensureTempDir()
+	outPath := filepath.Join(tmpDir, "stacked-combined.safetensors")
+	defer os.Remove(outPath)
+	if err := mlx.SaveSafetensorsWithMetadata(outPath, allArrays, metadata); err != nil {
+		return nil, fmt.Errorf("failed to save stacked blob: %w", err)
+	}
+
+	blobData, err := os.ReadFile(outPath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read stacked blob: %w", err)
+	}
 	return blobData, nil
 }

 // QuantizeSupported returns true if quantization is supported (MLX library available)
 func QuantizeSupported() bool {
-	mlx.InitMLX()
-	return mlx.IsMLXAvailable()
+	return mlx.CheckInit() == nil
 }

 // ensureTempDir creates the temp directory for quantization if it doesn't exist
@@ -205,32 +447,97 @@ func ensureTempDir() string {
 	return tmpDir
 }

-// findSafetensorsKey reads the first non-metadata tensor key from a safetensors file.
-func findSafetensorsKey(path string) (string, error) {
+type safetensorsHeaderEntry struct {
+	Dtype string  `json:"dtype"`
+	Shape []int32 `json:"shape"`
+}
+
+func readSafetensorsHeader(path string) (map[string]safetensorsHeaderEntry, error) {
 	f, err := os.Open(path)
 	if err != nil {
-		return "", err
+		return nil, err
 	}
 	defer f.Close()

 	var headerSize uint64
 	if err := binary.Read(f, binary.LittleEndian, &headerSize); err != nil {
-		return "", err
+		return nil, err
 	}
 	headerBytes := make([]byte, headerSize)
 	if _, err := io.ReadFull(f, headerBytes); err != nil {
-		return "", err
+		return nil, err
 	}

-	var header map[string]json.RawMessage
+	var header map[string]safetensorsHeaderEntry
 	if err := json.Unmarshal(headerBytes, &header); err != nil {
-		return "", err
+		return nil, err
 	}
+	return header, nil
+}

-	for k := range header {
-		if k != "__metadata__" {
-			return k, nil
+// safetensorsKey resolves the primary tensor key from a header.
+func safetensorsKey(preferred string, header map[string]safetensorsHeaderEntry) (string, error) {
+	if preferred != "" {
+		if _, ok := header[preferred]; ok {
+			return preferred, nil
 		}
 	}
-	return "", fmt.Errorf("no tensor found in safetensors header")
+
+	keys := make([]string, 0, len(header))
+	for k := range header {
+		if k == "__metadata__" || strings.HasSuffix(k, ".scale_inv") {
+			continue
+		}
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+	if len(keys) == 0 {
+		return "", fmt.Errorf("no tensor found in safetensors header")
+	}
+	return keys[0], nil
+}
+
+func decodeSourceFP8Tensor(weight, scaleInv *mlx.Array) (*mlx.Array, error) {
+	if weight == nil || scaleInv == nil {
+		return nil, fmt.Errorf("fp8 weight and scale tensors are required")
+	}
+
+	weightShape := weight.Dims()
+	scaleShape := scaleInv.Dims()
+	if len(weightShape) != 2 || len(scaleShape) != 2 {
+		return nil, fmt.Errorf("expected 2D fp8 weight and scale tensors, got %v and %v", weightShape, scaleShape)
+	}
+
+	// These must match the block size validated by resolveEffectiveQuantization
+	// in create.go, which rejects any source model with a different block size.
+	const blockRows = 128
+	const blockCols = 128
+	rows, cols := weightShape[0], weightShape[1]
+	expectedScaleRows := (rows + blockRows - 1) / blockRows
+	expectedScaleCols := (cols + blockCols - 1) / blockCols
+	if scaleShape[0] != expectedScaleRows || scaleShape[1] != expectedScaleCols {
+		return nil, fmt.Errorf(
+			"unexpected fp8 scale shape %v for weight shape %v; want [%d %d]",
+			scaleShape,
+			weightShape,
+			expectedScaleRows,
+			expectedScaleCols,
+		)
+	}
+
+	decoded := mlx.FromFP8(weight, mlx.DTypeBFloat16)
+	padBottom := blockRows*scaleShape[0] - rows
+	padSide := blockCols*scaleShape[1] - cols
+	if padBottom > 0 || padSide > 0 {
+		decoded = mlx.Pad(decoded, []int32{0, int32(padBottom), 0, int32(padSide)})
+	}
+
+	decoded = mlx.Reshape(decoded, int32(scaleShape[0]), int32(blockRows), int32(scaleShape[1]), int32(blockCols))
+	decoded = mlx.Mul(decoded, mlx.ExpandDims(mlx.ExpandDims(scaleInv, 1), 3))
+	decoded = mlx.Reshape(decoded, int32(rows+padBottom), int32(cols+padSide))
+	if padBottom > 0 || padSide > 0 {
+		decoded = mlx.SliceStartStop(decoded, []int32{0, 0}, []int32{int32(rows), int32(cols)})
+	}
+
+	return decoded, nil
 }
--- a/x/create/create.go
+++ b/x/create/create.go
@@ -267,13 +267,13 @@ func ShouldQuantize(name, component string) bool {

 // ShouldQuantizeTensor returns true if a tensor should be quantized based on name, shape, and quantize type.
 // This is a more detailed check that also considers tensor dimensions.
-// The quantize parameter specifies the quantization type (e.g., "int4", "nvfp4", "int8", "mxfp8").
+// The quantize parameter specifies the quantization type (e.g., "int4", "nvfp4", "mxfp4", "int8", "mxfp8").
 func ShouldQuantizeTensor(name string, shape []int32, quantize string) bool {
 	return GetTensorQuantization(name, shape, quantize) != ""
 }

 // normalizeQuantType converts various quantization type aliases to canonical forms.
-// Supports: q4/Q4/int4/INT4/fp4/FP4 -> int4, q8/Q8/int8/INT8/fp8/FP8 -> int8, nvfp4/NVFP4, mxfp8/MXFP8
+// Supports: q4/Q4/int4/INT4/fp4/FP4 -> int4, q8/Q8/int8/INT8/fp8/FP8 -> int8, nvfp4/NVFP4, mxfp4/MXFP4, mxfp8/MXFP8
 func normalizeQuantType(quantize string) string {
 	switch strings.ToUpper(quantize) {
 	case "Q4", "INT4", "FP4":
@@ -282,6 +282,8 @@ func normalizeQuantType(quantize string) string {
 		return "int8"
 	case "NVFP4":
 		return "nvfp4"
+	case "MXFP4":
+		return "mxfp4"
 	case "MXFP8":
 		return "mxfp8"
 	default:
@@ -335,7 +337,7 @@ func GetTensorQuantization(name string, shape []int32, quantize string) string {
 	quantNorm := normalizeQuantType(quantize)

 	// MLX quantization requires last dimension to be divisible by group size
-	// nvfp4: 16, mxfp8: 32, int4/int8: 64
+	// nvfp4: 16, mxfp4/mxfp8: 32, int4/int8: 64
 	groupSize := int32(32)
 	switch quantNorm {
 	case "nvfp4":
@@ -353,8 +355,8 @@ func GetTensorQuantization(name string, shape []int32, quantize string) string {
 		return ""
 	}

-	// For NVFP4 or MXFP8, use the same quantization for all (no mixed precision)
-	if quantNorm == "nvfp4" || quantNorm == "mxfp8" {
+	// For non-affine modes, use the same quantization for all eligible tensors.
+	if quantNorm == "nvfp4" || quantNorm == "mxfp4" || quantNorm == "mxfp8" {
 		return quantNorm
 	}

@@ -391,23 +393,39 @@ func GetTensorQuantization(name string, shape []int32, quantize string) string {
 	return quantNorm
 }

-// expertGroupRegexp matches expert tensor names and captures the group prefix.
-// Matches: model.layers.{L}.mlp.experts.{E}.{proj}.weight (and .scale, .bias suffixes)
-// Captures: model.layers.{L}.mlp.experts
-var expertGroupRegexp = regexp.MustCompile(`^(model\.layers\.\d+\.mlp\.(?:shared_)?experts)\..*\.weight`)
+var expertLayerPrefixRegexp = regexp.MustCompile(`^(?:model\.language_model\.|language_model(?:\.model)?\.|model\.)?layers\.\d+$`)

 // ExpertGroupPrefix returns the group prefix for expert tensors that should be packed together.
 // For example:
 //   - "model.layers.1.mlp.experts.0.down_proj.weight" -> "model.layers.1.mlp.experts"
 //   - "model.layers.1.mlp.shared_experts.down_proj.weight" -> "model.layers.1.mlp.shared_experts"
+//   - "language_model.model.layers.1.mlp.switch_mlp.down_proj.weight" -> "language_model.model.layers.1.mlp.switch_mlp"
 //   - "model.layers.0.mlp.down_proj.weight" -> "" (dense layer, no experts)
 //   - "model.layers.1.mlp.gate.weight" -> "" (routing gate, not an expert)
 func ExpertGroupPrefix(tensorName string) string {
-	m := expertGroupRegexp.FindStringSubmatch(tensorName)
-	if m == nil {
+	if !strings.HasSuffix(tensorName, ".weight") {
 		return ""
 	}
-	return m[1]
+
+	for _, marker := range []string{
+		".mlp.experts.",
+		".mlp.shared_experts.",
+		".mlp.switch_mlp.",
+	} {
+		idx := strings.Index(tensorName, marker)
+		if idx == -1 {
+			continue
+		}
+
+		layerPrefix := tensorName[:idx]
+		if !expertLayerPrefixRegexp.MatchString(layerPrefix) {
+			continue
+		}
+
+		return layerPrefix + strings.TrimSuffix(marker, ".")
+	}
+
+	return ""
 }

 // PackedTensorInput holds metadata for a tensor that will be packed into a multi-tensor blob.
@@ -424,9 +442,11 @@ type PackedTensorInput struct {
 type PackedTensorLayerCreator func(groupName string, tensors []PackedTensorInput) (LayerInfo, error)

 type sourceQuantization struct {
-	Bits      int    `json:"bits"`
-	GroupSize int    `json:"group_size"`
-	Mode      string `json:"mode"`
+	Bits            int     `json:"bits"`
+	GroupSize       int     `json:"group_size"`
+	Mode            string  `json:"mode"`
+	QuantMethod     string  `json:"quant_method"`
+	WeightBlockSize []int32 `json:"weight_block_size"`
 }

 type sourceModelConfig struct {
@@ -493,6 +513,98 @@ func (cfg sourceModelConfig) QuantMetadata() map[string]string {
 	return metadata
 }

+type sourceQuantizedKind string
+
+const (
+	sourceQuantizedKindNone         sourceQuantizedKind = ""
+	sourceQuantizedKindPrequantized sourceQuantizedKind = "prequantized"
+	sourceQuantizedKindHFFP8        sourceQuantizedKind = "hf_fp8"
+)
+
+func (cfg sourceModelConfig) quantizationConfigs() []sourceQuantization {
+	return []sourceQuantization{
+		cfg.Quantization,
+		cfg.QuantizationConfig,
+		cfg.TextConfig.Quantization,
+		cfg.TextConfig.QuantizationConfig,
+	}
+}
+
+func (cfg sourceModelConfig) HFFP8WeightBlockSize() (rows, cols int32, ok bool) {
+	for _, q := range cfg.quantizationConfigs() {
+		if !strings.EqualFold(q.QuantMethod, "fp8") || len(q.WeightBlockSize) != 2 {
+			continue
+		}
+		return q.WeightBlockSize[0], q.WeightBlockSize[1], true
+	}
+	return 0, 0, false
+}
+
+func inspectSourceQuantization(modelDir string, cfg sourceModelConfig) (sourceQuantizedKind, error) {
+	entries, err := os.ReadDir(modelDir)
+	if err != nil {
+		return sourceQuantizedKindNone, err
+	}
+
+	hasScaleInv := false
+	for _, entry := range entries {
+		if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".safetensors") {
+			continue
+		}
+
+		extractor, err := safetensors.OpenForExtraction(filepath.Join(modelDir, entry.Name()))
+		if err != nil {
+			return sourceQuantizedKindNone, err
+		}
+
+		for _, name := range extractor.ListTensors() {
+			switch {
+			case strings.HasSuffix(name, ".scales"):
+				extractor.Close()
+				return sourceQuantizedKindPrequantized, nil
+			case strings.HasSuffix(name, ".weight_scale_inv"):
+				hasScaleInv = true
+			}
+		}
+
+		extractor.Close()
+	}
+
+	if hasScaleInv {
+		if _, _, ok := cfg.HFFP8WeightBlockSize(); ok {
+			return sourceQuantizedKindHFFP8, nil
+		}
+	}
+
+	return sourceQuantizedKindNone, nil
+}
+
+func resolveEffectiveQuantization(cfg sourceModelConfig, sourceKind sourceQuantizedKind, requested string) (string, error) {
+	switch sourceKind {
+	case sourceQuantizedKindNone:
+		return requested, nil
+	case sourceQuantizedKindPrequantized:
+		if requested != "" {
+			return "", fmt.Errorf("cannot requantize already-quantized source model with --quantize %q", requested)
+		}
+		return "", nil
+	case sourceQuantizedKindHFFP8:
+		if requested != "" {
+			return "", fmt.Errorf("cannot requantize already-quantized fp8 source model with --quantize %q", requested)
+		}
+		rows, cols, ok := cfg.HFFP8WeightBlockSize()
+		if !ok {
+			return "", fmt.Errorf("fp8 source model missing weight_block_size metadata")
+		}
+		if rows != 128 || cols != 128 {
+			return "", fmt.Errorf("unsupported fp8 source block size %dx%d", rows, cols)
+		}
+		return "mxfp8", nil
+	default:
+		return "", fmt.Errorf("unsupported source quantization kind %q", sourceKind)
+	}
+}
+
 type tensorImportTransform interface {
 	skipTensor(name string) bool
 	transformTensor(td *safetensors.TensorData) ([]*safetensors.TensorData, error)
@@ -546,6 +658,14 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
 	if err != nil {
 		return fmt.Errorf("failed to read source config.json: %w", err)
 	}
+	sourceQuantKind, err := inspectSourceQuantization(modelDir, sourceConfig)
+	if err != nil {
+		return fmt.Errorf("failed to inspect source quantization: %w", err)
+	}
+	effectiveQuantize, err := resolveEffectiveQuantization(sourceConfig, sourceQuantKind, quantize)
+	if err != nil {
+		return err
+	}
 	sourceQuantMetadata := sourceConfig.QuantMetadata()
 	importTransform, err := newTensorImportTransform(modelDir, sourceConfig)
 	if err != nil {
@@ -557,7 +677,6 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
 	if len(createPackedLayer) > 0 {
 		packedCreator = createPackedLayer[0]
 	}
-
 	// Accumulate expert tensors by group prefix for packing.
 	// Readers reference file-backed SectionReaders, so we keep extractors
 	// open until each group is flushed to avoid buffering tensor data in memory.
@@ -600,8 +719,8 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
 			tensorSet[name] = struct{}{}
 		}
 		quantizeMsg := ""
-		if quantize != "" {
-			quantizeMsg = fmt.Sprintf(", quantizing to %s", quantize)
+		if effectiveQuantize != "" {
+			quantizeMsg = fmt.Sprintf(", quantizing to %s", effectiveQuantize)
 		}
 		fn(fmt.Sprintf("importing %s (%d tensors%s)", entry.Name(), len(tensorNames), quantizeMsg))

@@ -612,9 +731,10 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
 			if importTransform.skipTensor(tensorName) {
 				continue
 			}
-			if shouldSkipPrequantizedCompanion(tensorName, tensorSet) {
+			if shouldSkipSourceCompanion(tensorName, tensorSet) {
 				continue
 			}
+			sourceFP8ScaleName, hasSourceFP8Scale := sourceFP8Companion(tensorName, tensorSet)

 			td, err := extractor.GetTensor(tensorName)
 			if err != nil {
@@ -623,7 +743,7 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
 				return fmt.Errorf("failed to get tensor %s: %w", tensorName, err)
 			}

-			if quantize == "" {
+			if effectiveQuantize == "" {
 				layer, ok, err := createPrequantizedLayer(extractor, td, tensorName, tensorSet, sourceQuantMetadata, createLayer)
 				if err != nil {
 					extractor.Close()
@@ -647,8 +767,33 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
 				// Determine quantization type for this tensor (empty string if not quantizing)
 				// GetTensorQuantization handles mixed-precision (e.g., Q8 for attention, Q4 for FFN)
 				quantizeType := ""
-				if quantize != "" {
-					quantizeType = importTransform.quantizationType(outTD.Name, outTD.Shape, quantize)
+				switch {
+				case sourceQuantKind == sourceQuantizedKindHFFP8 && hasSourceFP8Scale:
+					quantizeType = "mxfp8"
+				case sourceQuantKind == sourceQuantizedKindHFFP8:
+					quantizeType = ""
+				case effectiveQuantize != "":
+					quantizeType = importTransform.quantizationType(outTD.Name, outTD.Shape, effectiveQuantize)
+				}
+				reader := outTD.SafetensorsReader()
+				if hasSourceFP8Scale {
+					if len(outputTensors) != 1 {
+						extractor.Close()
+						closeExtractors()
+						return fmt.Errorf("source fp8 tensor %s rewrote into %d tensors; only 1:1 rewrites are supported", tensorName, len(outputTensors))
+					}
+					if quantizeType == "" {
+						extractor.Close()
+						closeExtractors()
+						return fmt.Errorf("source fp8 tensor %s was not scheduled for mxfp8 conversion", tensorName)
+					}
+					scaleTD, err := extractor.GetTensor(sourceFP8ScaleName)
+					if err != nil {
+						extractor.Close()
+						closeExtractors()
+						return fmt.Errorf("failed to get fp8 scale tensor %s: %w", sourceFP8ScaleName, err)
+					}
+					reader = buildSourceFP8Reader(outTD, scaleTD.WithName(outTD.Name+".scale_inv"))
 				}

 				// Check if this tensor belongs to an expert group for packing
@@ -670,13 +815,13 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
 						Dtype:    outTD.Dtype,
 						Shape:    outTD.Shape,
 						Quantize: quantizeType,
-						Reader:   outTD.SafetensorsReader(),
+						Reader:   reader,
 					})
 				} else {
 					// Store as minimal safetensors format (88 bytes header overhead)
 					// This enables native mmap loading via mlx_load_safetensors
 					// createTensorLayer returns multiple layers if quantizing (weight + scales)
-					newLayers, err := createTensorLayer(outTD.SafetensorsReader(), outTD.Name, outTD.Dtype, outTD.Shape, quantizeType)
+					newLayers, err := createTensorLayer(reader, outTD.Name, outTD.Dtype, outTD.Shape, quantizeType)
 					if err != nil {
 						extractor.Close()
 						closeExtractors()
@@ -760,7 +905,7 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
 	return nil
 }

-func shouldSkipPrequantizedCompanion(name string, tensorSet map[string]struct{}) bool {
+func shouldSkipSourceCompanion(name string, tensorSet map[string]struct{}) bool {
 	switch {
 	case strings.HasSuffix(name, ".scales"):
 		_, ok := tensorSet[strings.TrimSuffix(name, ".scales")+".weight"]
@@ -768,11 +913,28 @@ func shouldSkipPrequantizedCompanion(name string, tensorSet map[string]struct{})
 	case strings.HasSuffix(name, ".biases"):
 		_, ok := tensorSet[strings.TrimSuffix(name, ".biases")+".weight"]
 		return ok
+	case strings.HasSuffix(name, ".weight_scale_inv"):
+		_, ok := tensorSet[strings.TrimSuffix(name, "_scale_inv")]
+		return ok
 	default:
 		return false
 	}
 }

+func sourceFP8Companion(weightName string, tensorSet map[string]struct{}) (scaleName string, ok bool) {
+	if !strings.HasSuffix(weightName, ".weight") {
+		return "", false
+	}
+
+	scaleName = weightName + "_scale_inv"
+	_, ok = tensorSet[scaleName]
+	return scaleName, ok
+}
+
+func buildSourceFP8Reader(weightTD, scaleTD *safetensors.TensorData) io.Reader {
+	return safetensors.BuildPackedSafetensorsReader([]*safetensors.TensorData{weightTD, scaleTD})
+}
+
 func createPrequantizedLayer(
 	extractor *safetensors.TensorExtractor,
 	td *safetensors.TensorData,
--- a/x/create/create_test.go
+++ b/x/create/create_test.go
@@ -246,6 +246,30 @@ func readSingleTensorRaw(t *testing.T, data []byte) []byte {
 	return nil
 }

+func readSafetensorsHeaderNames(t *testing.T, data []byte) []string {
+	t.Helper()
+
+	var headerSize uint64
+	if err := binary.Read(bytes.NewReader(data[:8]), binary.LittleEndian, &headerSize); err != nil {
+		t.Fatalf("failed to read header size: %v", err)
+	}
+
+	var header map[string]json.RawMessage
+	if err := json.Unmarshal(data[8:8+headerSize], &header); err != nil {
+		t.Fatalf("failed to parse header: %v", err)
+	}
+
+	names := make([]string, 0, len(header))
+	for name := range header {
+		if name == "__metadata__" {
+			continue
+		}
+		names = append(names, name)
+	}
+	slices.Sort(names)
+	return names
+}
+
 func TestCreateSafetensorsModel(t *testing.T) {
 	dir := t.TempDir()

@@ -546,6 +570,215 @@ func TestCreateSafetensorsModel_PacksPrequantizedTensorTriplets(t *testing.T) {
 	}
 }

+func TestCreateSafetensorsModel_HFFP8AutoConvertsToMXFP8(t *testing.T) {
+	dir := t.TempDir()
+
+	configJSON := `{
+		"model_type": "test",
+		"architectures": ["TestModel"],
+		"quantization_config": {"quant_method": "fp8", "weight_block_size": [128, 128]}
+	}`
+	if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(configJSON), 0o644); err != nil {
+		t.Fatalf("failed to write config.json: %v", err)
+	}
+
+	createTestSafetensors(t, filepath.Join(dir, "model.safetensors"), []*st.TensorData{
+		st.NewTensorDataFromBytes("linear.weight", "F8_E4M3", []int32{2, 2}, []byte{1, 2, 3, 4}),
+		st.NewTensorDataFromBytes("linear.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
+		st.NewTensorDataFromBytes("dense.weight", "BF16", []int32{128, 128}, make([]byte, 128*128*2)),
+		st.NewTensorDataFromBytes("norm.weight", "BF16", []int32{2}, make([]byte, 4)),
+	})
+
+	quantizeByName := make(map[string]string)
+	headerNamesByName := make(map[string][]string)
+
+	createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
+		_, err := io.ReadAll(r)
+		if err != nil {
+			return LayerInfo{}, err
+		}
+		return LayerInfo{Name: name, Digest: "sha256:" + name, MediaType: mediaType}, nil
+	}
+
+	createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
+		data, err := io.ReadAll(r)
+		if err != nil {
+			return nil, err
+		}
+		quantizeByName[name] = quantize
+		headerNamesByName[name] = readSafetensorsHeaderNames(t, data)
+		return []LayerInfo{{Name: name, Digest: "sha256:tensor_" + name, MediaType: "application/vnd.ollama.image.tensor"}}, nil
+	}
+
+	writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error { return nil }
+
+	if err := CreateSafetensorsModel("test-model", dir, "", createLayer, createTensorLayer, writeManifest, func(string) {}); err != nil {
+		t.Fatalf("CreateSafetensorsModel failed: %v", err)
+	}
+
+	if got := quantizeByName["linear.weight"]; got != "mxfp8" {
+		t.Fatalf("linear.weight quantization = %q, want %q", got, "mxfp8")
+	}
+
+	if got := quantizeByName["norm.weight"]; got != "" {
+		t.Fatalf("norm.weight quantization = %q, want empty", got)
+	}
+	if got := quantizeByName["dense.weight"]; got != "" {
+		t.Fatalf("dense.weight quantization = %q, want empty", got)
+	}
+
+	if _, ok := quantizeByName["linear.weight_scale_inv"]; ok {
+		t.Fatal("linear.weight_scale_inv should not be imported as a standalone tensor")
+	}
+
+	if got := headerNamesByName["linear.weight"]; !slices.Equal(got, []string{"linear.weight", "linear.weight.scale_inv"}) {
+		t.Fatalf("linear.weight blob tensors = %v, want %v", got, []string{"linear.weight", "linear.weight.scale_inv"})
+	}
+
+	if got := headerNamesByName["norm.weight"]; !slices.Equal(got, []string{"norm.weight"}) {
+		t.Fatalf("norm.weight blob tensors = %v, want %v", got, []string{"norm.weight"})
+	}
+	if got := headerNamesByName["dense.weight"]; !slices.Equal(got, []string{"dense.weight"}) {
+		t.Fatalf("dense.weight blob tensors = %v, want %v", got, []string{"dense.weight"})
+	}
+}
+
+func TestCreateSafetensorsModel_RejectsRequantizingQuantizedSources(t *testing.T) {
+	tests := []struct {
+		name       string
+		configJSON string
+		tensors    []*st.TensorData
+		wantErr    string
+	}{
+		{
+			name:       "prequantized affine",
+			configJSON: `{"model_type": "test", "architectures": ["TestModel"]}`,
+			tensors: []*st.TensorData{
+				st.NewTensorDataFromBytes("linear.weight", "U32", []int32{4, 4}, make([]byte, 16)),
+				st.NewTensorDataFromBytes("linear.scales", "BF16", []int32{4, 1}, make([]byte, 8)),
+			},
+			wantErr: `cannot requantize already-quantized source model with --quantize "int4"`,
+		},
+		{
+			name: "hf fp8 source",
+			configJSON: `{
+				"model_type": "test",
+				"architectures": ["TestModel"],
+				"quantization_config": {"quant_method": "fp8", "weight_block_size": [128, 128]}
+			}`,
+			tensors: []*st.TensorData{
+				st.NewTensorDataFromBytes("linear.weight", "F8_E4M3", []int32{2, 2}, []byte{1, 2, 3, 4}),
+				st.NewTensorDataFromBytes("linear.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
+			},
+			wantErr: `cannot requantize already-quantized fp8 source model with --quantize "int4"`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			dir := t.TempDir()
+			if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(tt.configJSON), 0o644); err != nil {
+				t.Fatalf("failed to write config.json: %v", err)
+			}
+			createTestSafetensors(t, filepath.Join(dir, "model.safetensors"), tt.tensors)
+
+			createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
+				return LayerInfo{}, nil
+			}
+			createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
+				return nil, nil
+			}
+			writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error { return nil }
+
+			err := CreateSafetensorsModel("test-model", dir, "int4", createLayer, createTensorLayer, writeManifest, func(string) {})
+			if err == nil {
+				t.Fatal("expected error, got nil")
+			}
+			if !strings.Contains(err.Error(), tt.wantErr) {
+				t.Fatalf("error = %q, want substring %q", err, tt.wantErr)
+			}
+		})
+	}
+}
+
+func TestCreateSafetensorsModel_HFFP8PacksExperts(t *testing.T) {
+	dir := t.TempDir()
+
+	configJSON := `{
+		"model_type": "test",
+		"architectures": ["Qwen3_5MoeForConditionalGeneration"],
+		"quantization_config": {"quant_method": "fp8", "weight_block_size": [128, 128]}
+	}`
+	if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(configJSON), 0o644); err != nil {
+		t.Fatalf("failed to write config.json: %v", err)
+	}
+
+	// Create 2 experts so stacking produces a [2, 128, 128] tensor
+	createTestSafetensors(t, filepath.Join(dir, "model.safetensors"), []*st.TensorData{
+		st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.gate_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
+		st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.gate_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
+		st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.up_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
+		st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.up_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
+		st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.down_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
+		st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.down_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
+		st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.gate_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
+		st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.gate_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
+		st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.up_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
+		st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.up_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
+		st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.down_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
+		st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.down_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
+	})
+
+	var packedLayerNames []string
+	var packedLayerTensors [][]PackedTensorInput
+
+	createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
+		if _, err := io.ReadAll(r); err != nil {
+			return LayerInfo{}, err
+		}
+		return LayerInfo{Name: name, Digest: "sha256:" + name, MediaType: mediaType}, nil
+	}
+
+	createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
+		if _, err := io.ReadAll(r); err != nil {
+			return nil, err
+		}
+		return []LayerInfo{{Name: name, Digest: "sha256:tensor_" + name, MediaType: "application/vnd.ollama.image.tensor"}}, nil
+	}
+
+	createPackedLayer := func(groupName string, tensors []PackedTensorInput) (LayerInfo, error) {
+		packedLayerNames = append(packedLayerNames, groupName)
+		packedLayerTensors = append(packedLayerTensors, tensors)
+		return LayerInfo{Name: groupName, Digest: "sha256:packed_" + groupName, MediaType: "application/vnd.ollama.image.tensor"}, nil
+	}
+
+	writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error { return nil }
+
+	if err := CreateSafetensorsModel("test-model", dir, "", createLayer, createTensorLayer, writeManifest, func(string) {}, createPackedLayer); err != nil {
+		t.Fatalf("CreateSafetensorsModel failed: %v", err)
+	}
+
+	if len(packedLayerNames) != 1 {
+		t.Fatalf("expected 1 packed layer, got %d: %v", len(packedLayerNames), packedLayerNames)
+	}
+	if packedLayerNames[0] != "language_model.model.layers.0.mlp.experts" {
+		t.Fatalf("unexpected packed layer name: %s", packedLayerNames[0])
+	}
+
+	// Verify all 6 expert tensors (2 experts × 3 proj types) were accumulated
+	tensors := packedLayerTensors[0]
+	if len(tensors) != 6 {
+		t.Fatalf("expected 6 tensors in packed group, got %d", len(tensors))
+	}
+
+	// All should be marked for mxfp8 quantization
+	for _, tensor := range tensors {
+		if tensor.Quantize != "mxfp8" {
+			t.Fatalf("expected mxfp8 quantize for %s, got %q", tensor.Name, tensor.Quantize)
+		}
+	}
+}
+
 func TestCreateSafetensorsModel_Qwen35Transforms(t *testing.T) {
 	dir := t.TempDir()

@@ -693,6 +926,113 @@ func TestCreateSafetensorsModel_Qwen35Transforms(t *testing.T) {
 	}
 }

+func TestCreateSafetensorsModel_Qwen35DirectNonAffineKeepsSensitiveWeightsBF16(t *testing.T) {
+	for _, quantize := range []string{"nvfp4", "mxfp8", "mxfp4"} {
+		t.Run(quantize, func(t *testing.T) {
+			dir := t.TempDir()
+
+			configJSON := `{
+		"model_type": "test",
+		"architectures": ["Qwen3_5MoeForConditionalGeneration"],
+		"text_config": {"dtype": "bfloat16"}
+	}`
+			if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(configJSON), 0o644); err != nil {
+				t.Fatalf("failed to write config.json: %v", err)
+			}
+
+			gateUpValues := make([]float32, 2*128*64)
+			for expert := range 2 {
+				base := expert * 128 * 64
+				for i := range 64 * 64 {
+					gateUpValues[base+i] = 1
+					gateUpValues[base+64*64+i] = 2
+				}
+			}
+
+			createTestSafetensors(t, filepath.Join(dir, "model.safetensors"), []*st.TensorData{
+				st.NewTensorDataFromBytes("model.language_model.embed_tokens.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
+				st.NewTensorDataFromBytes("lm_head.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
+				st.NewTensorDataFromBytes("model.language_model.layers.0.linear_attn.in_proj_a.weight", "BF16", []int32{32, 64}, make([]byte, 32*64*2)),
+				st.NewTensorDataFromBytes("model.language_model.layers.0.linear_attn.in_proj_b.weight", "BF16", []int32{32, 64}, make([]byte, 32*64*2)),
+				st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.gate.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
+				st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.shared_expert_gate.weight", "BF16", []int32{1, 64}, make([]byte, 64*2)),
+				st.NewTensorDataFromBytes("model.language_model.layers.0.self_attn.q_proj.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
+				st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.gate_up_proj", "BF16", []int32{2, 128, 64}, bfloat16.EncodeFloat32(gateUpValues)),
+				st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.down_proj", "BF16", []int32{2, 64, 64}, bfloat16.EncodeFloat32(make([]float32, 2*64*64))),
+			})
+
+			type tensorCall struct {
+				quantize string
+			}
+			type packedTensorCall struct {
+				Name     string
+				Quantize string
+			}
+
+			tensorCalls := make(map[string]tensorCall)
+			packedCalls := make(map[string][]packedTensorCall)
+
+			createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
+				_, _ = io.ReadAll(r)
+				return LayerInfo{Name: name, Digest: "sha256:" + name, MediaType: mediaType}, nil
+			}
+
+			createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantizeType string) ([]LayerInfo, error) {
+				_, _ = io.ReadAll(r)
+				tensorCalls[name] = tensorCall{quantize: quantizeType}
+				return []LayerInfo{{Name: name, Digest: "sha256:" + name, MediaType: "application/vnd.ollama.image.tensor"}}, nil
+			}
+
+			createPackedLayer := func(groupName string, tensors []PackedTensorInput) (LayerInfo, error) {
+				group := make([]packedTensorCall, 0, len(tensors))
+				for _, tensor := range tensors {
+					group = append(group, packedTensorCall{
+						Name:     tensor.Name,
+						Quantize: tensor.Quantize,
+					})
+				}
+				packedCalls[groupName] = group
+				return LayerInfo{Name: groupName, Digest: "sha256:" + groupName, MediaType: "application/vnd.ollama.image.tensor"}, nil
+			}
+
+			writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error {
+				return nil
+			}
+
+			if err := CreateSafetensorsModel("test-model", dir, quantize, createLayer, createTensorLayer, writeManifest, func(string) {}, createPackedLayer); err != nil {
+				t.Fatalf("CreateSafetensorsModel failed: %v", err)
+			}
+
+			for _, name := range []string{
+				"language_model.model.embed_tokens.weight",
+				"language_model.lm_head.weight",
+				"language_model.model.layers.0.linear_attn.in_proj_a.weight",
+				"language_model.model.layers.0.linear_attn.in_proj_b.weight",
+				"language_model.model.layers.0.mlp.gate.weight",
+				"language_model.model.layers.0.mlp.shared_expert_gate.weight",
+			} {
+				if got := tensorCalls[name].quantize; got != "" {
+					t.Fatalf("%s quantize = %q, want empty", name, got)
+				}
+			}
+
+			if got := tensorCalls["language_model.model.layers.0.self_attn.q_proj.weight"].quantize; got != quantize {
+				t.Fatalf("q_proj quantize = %q, want %q", got, quantize)
+			}
+
+			group := packedCalls["language_model.model.layers.0.mlp.switch_mlp"]
+			if len(group) != 3 {
+				t.Fatalf("packed switch_mlp tensor count = %d, want 3", len(group))
+			}
+			for _, tensor := range group {
+				if tensor.Quantize != quantize {
+					t.Fatalf("packed tensor %q quantize = %q, want %q", tensor.Name, tensor.Quantize, quantize)
+				}
+			}
+		})
+	}
+}
+
 func TestResolveManifestPath(t *testing.T) {
 	tests := []struct {
 		name      string
@@ -865,6 +1205,7 @@ func TestShouldQuantizeTensor(t *testing.T) {
 		{"large 2D weight fp8", "q_proj.weight", []int32{4096, 4096}, "fp8", true},
 		{"medium 2D weight fp8", "small_proj.weight", []int32{128, 128}, "fp8", true},
 		{"large 2D weight nvfp4", "q_proj.weight", []int32{4096, 4096}, "nvfp4", true},
+		{"large 2D weight mxfp4", "q_proj.weight", []int32{4096, 4096}, "mxfp4", true},

 		// Small tensors should not be quantized (< 1024 elements)
 		{"tiny 2D weight", "tiny.weight", []int32{16, 16}, "fp8", false},
@@ -891,9 +1232,11 @@ func TestShouldQuantizeTensor(t *testing.T) {
 		{"bias 2D", "proj.bias", []int32{4096, 1}, "fp8", false},

 		// Group size divisibility tests
-		// FP8/FP4 require divisible by 32
+		// FP8/FP4/MXFP4 require divisible by 32
 		{"not divisible by 32 fp8", "proj.weight", []int32{128, 48}, "fp8", false},
 		{"divisible by 32 fp8", "proj.weight", []int32{128, 64}, "fp8", true},
+		{"not divisible by 32 mxfp4", "proj.weight", []int32{128, 48}, "mxfp4", false},
+		{"divisible by 32 mxfp4", "proj.weight", []int32{128, 64}, "mxfp4", true},
 		// NVFP4 requires divisible by 16
 		{"not divisible by 16 nvfp4", "proj.weight", []int32{128, 24}, "nvfp4", false},
 		{"divisible by 16 nvfp4", "proj.weight", []int32{128, 48}, "nvfp4", true},
@@ -919,10 +1262,20 @@ func TestExpertGroupPrefix(t *testing.T) {
 		{"model.layers.1.mlp.experts.63.gate_proj.weight", "model.layers.1.mlp.experts"},
 		{"model.layers.0.mlp.experts.0.up_proj.weight", "model.layers.0.mlp.experts"},

+		// Expert tensors with language_model prefix should also match
+		{"language_model.model.layers.0.mlp.experts.0.gate_proj.weight", "language_model.model.layers.0.mlp.experts"},
+		{"language_model.model.layers.1.mlp.experts.255.down_proj.weight", "language_model.model.layers.1.mlp.experts"},
+
 		// Shared expert tensors should return their own group prefix
 		{"model.layers.1.mlp.shared_experts.down_proj.weight", "model.layers.1.mlp.shared_experts"},
 		{"model.layers.2.mlp.shared_experts.gate_proj.weight", "model.layers.2.mlp.shared_experts"},

+		// Rewritten Qwen switch_mlp tensors should also be packed per-layer.
+		{"model.layers.1.mlp.switch_mlp.down_proj.weight", "model.layers.1.mlp.switch_mlp"},
+		{"language_model.layers.2.mlp.switch_mlp.gate_proj.weight", "language_model.layers.2.mlp.switch_mlp"},
+		{"language_model.model.layers.3.mlp.switch_mlp.up_proj.weight", "language_model.model.layers.3.mlp.switch_mlp"},
+		{"model.language_model.layers.4.mlp.switch_mlp.gate_proj.weight", "model.language_model.layers.4.mlp.switch_mlp"},
+
 		// Non-expert tensors should return empty string
 		{"model.layers.0.mlp.down_proj.weight", ""},    // dense layer, no experts
 		{"model.layers.1.mlp.gate.weight", ""},         // routing gate, not an expert
@@ -978,6 +1331,161 @@ func TestGetTensorQuantization_StackedExpert3D(t *testing.T) {
 	if combinedDown != "int8" {
 		t.Fatalf("combined down_proj quantization = %q, want %q", combinedDown, "int8")
 	}
+
+	nvfp4GateUp := GetTensorQuantization(
+		"language_model.model.layers.0.mlp.switch_mlp.gate_proj.weight",
+		[]int32{64, 11008, 4096},
+		"nvfp4",
+	)
+	if nvfp4GateUp != "nvfp4" {
+		t.Fatalf("nvfp4 gate_proj quantization = %q, want %q", nvfp4GateUp, "nvfp4")
+	}
+
+	nvfp4Down := GetTensorQuantization(
+		"language_model.model.layers.0.mlp.switch_mlp.down_proj.weight",
+		[]int32{64, 4096, 11008},
+		"nvfp4",
+	)
+	if nvfp4Down != "nvfp4" {
+		t.Fatalf("nvfp4 down_proj quantization = %q, want %q", nvfp4Down, "nvfp4")
+	}
+
+	mxfp4GateUp := GetTensorQuantization(
+		"language_model.model.layers.0.mlp.switch_mlp.gate_proj.weight",
+		[]int32{64, 11008, 4096},
+		"mxfp4",
+	)
+	if mxfp4GateUp != "mxfp4" {
+		t.Fatalf("mxfp4 gate_proj quantization = %q, want %q", mxfp4GateUp, "mxfp4")
+	}
+
+	mxfp4Down := GetTensorQuantization(
+		"language_model.model.layers.0.mlp.switch_mlp.down_proj.weight",
+		[]int32{64, 4096, 11008},
+		"mxfp4",
+	)
+	if mxfp4Down != "mxfp4" {
+		t.Fatalf("mxfp4 down_proj quantization = %q, want %q", mxfp4Down, "mxfp4")
+	}
+}
+
+func TestCreateSafetensorsModel_Qwen35NVFP4PacksSwitchMLPExperts(t *testing.T) {
+	dir := t.TempDir()
+
+	configJSON := `{
+		"model_type": "test",
+		"architectures": ["Qwen3_5MoeForConditionalGeneration"],
+		"text_config": {"dtype": "bfloat16"}
+	}`
+	if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(configJSON), 0o644); err != nil {
+		t.Fatalf("failed to write config.json: %v", err)
+	}
+
+	gateUpValues := make([]float32, 2*128*64)
+	for expert := range 2 {
+		base := expert * 128 * 64
+		for i := range 64 * 64 {
+			gateUpValues[base+i] = 1
+			gateUpValues[base+64*64+i] = 2
+		}
+	}
+
+	createTestSafetensors(t, filepath.Join(dir, "model.safetensors"), []*st.TensorData{
+		st.NewTensorDataFromBytes("model.language_model.embed_tokens.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
+		st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.gate.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
+		st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.gate_up_proj", "BF16", []int32{2, 128, 64}, bfloat16.EncodeFloat32(gateUpValues)),
+		st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.down_proj", "BF16", []int32{2, 64, 64}, bfloat16.EncodeFloat32(make([]float32, 2*64*64))),
+	})
+
+	type tensorCall struct {
+		quantize string
+	}
+	type packedTensorCall struct {
+		Name     string
+		Dtype    string
+		Shape    []int32
+		Quantize string
+	}
+
+	tensorCalls := make(map[string]tensorCall)
+	packedCalls := make(map[string][]packedTensorCall)
+
+	createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
+		_, _ = io.ReadAll(r)
+		return LayerInfo{Name: name, Digest: "sha256:" + name, MediaType: mediaType}, nil
+	}
+
+	createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
+		_, _ = io.ReadAll(r)
+		tensorCalls[name] = tensorCall{quantize: quantize}
+		return []LayerInfo{{Name: name, Digest: "sha256:" + name, MediaType: "application/vnd.ollama.image.tensor"}}, nil
+	}
+
+	createPackedLayer := func(groupName string, tensors []PackedTensorInput) (LayerInfo, error) {
+		group := make([]packedTensorCall, 0, len(tensors))
+		for _, tensor := range tensors {
+			group = append(group, packedTensorCall{
+				Name:     tensor.Name,
+				Dtype:    tensor.Dtype,
+				Shape:    append([]int32(nil), tensor.Shape...),
+				Quantize: tensor.Quantize,
+			})
+		}
+		packedCalls[groupName] = group
+		return LayerInfo{Name: groupName, Digest: "sha256:" + groupName, MediaType: "application/vnd.ollama.image.tensor"}, nil
+	}
+
+	writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error {
+		return nil
+	}
+
+	if err := CreateSafetensorsModel("test-model", dir, "nvfp4", createLayer, createTensorLayer, writeManifest, func(string) {}, createPackedLayer); err != nil {
+		t.Fatalf("CreateSafetensorsModel failed: %v", err)
+	}
+
+	groupName := "language_model.model.layers.0.mlp.switch_mlp"
+	group, ok := packedCalls[groupName]
+	if !ok {
+		t.Fatalf("missing packed group %q: %v", groupName, packedCalls)
+	}
+
+	if len(group) != 3 {
+		t.Fatalf("packed group %q has %d tensors, want 3", groupName, len(group))
+	}
+
+	gotNames := make([]string, 0, len(group))
+	for _, tensor := range group {
+		gotNames = append(gotNames, tensor.Name)
+		if tensor.Quantize != "nvfp4" {
+			t.Fatalf("packed tensor %q quantize = %q, want %q", tensor.Name, tensor.Quantize, "nvfp4")
+		}
+		if tensor.Dtype != "BF16" {
+			t.Fatalf("packed tensor %q dtype = %q, want %q", tensor.Name, tensor.Dtype, "BF16")
+		}
+	}
+	slices.Sort(gotNames)
+
+	wantNames := []string{
+		"language_model.model.layers.0.mlp.switch_mlp.down_proj.weight",
+		"language_model.model.layers.0.mlp.switch_mlp.gate_proj.weight",
+		"language_model.model.layers.0.mlp.switch_mlp.up_proj.weight",
+	}
+	if !slices.Equal(gotNames, wantNames) {
+		t.Fatalf("packed tensor names = %v, want %v", gotNames, wantNames)
+	}
+
+	for _, name := range wantNames {
+		if _, ok := tensorCalls[name]; ok {
+			t.Fatalf("packed expert tensor %q unexpectedly handled by createTensorLayer", name)
+		}
+	}
+
+	if got := tensorCalls["language_model.model.embed_tokens.weight"].quantize; got != "" {
+		t.Fatalf("embed_tokens quantize = %q, want empty", got)
+	}
+	if got := tensorCalls["language_model.model.layers.0.mlp.gate.weight"].quantize; got != "" {
+		t.Fatalf("mlp.gate quantize = %q, want empty", got)
+	}
 }

 func TestCreateSafetensorsModel_WithQuantize(t *testing.T) {
--- a/x/create/qwen35.go
+++ b/x/create/qwen35.go
@@ -87,6 +87,27 @@ func (t qwen35ImportTransform) skipTensor(name string) bool {
 	return strings.Contains(name, "mtp.")
 }

+func qwen35ShouldKeepBF16ForDirectNonAffine(name string) bool {
+	switch {
+	case strings.HasSuffix(name, "embed_tokens.weight"):
+		return true
+	case strings.HasSuffix(name, "lm_head.weight"):
+		return true
+	case strings.HasSuffix(name, ".linear_attn.in_proj_a.weight"):
+		return true
+	case strings.HasSuffix(name, ".linear_attn.in_proj_b.weight"):
+		return true
+	case strings.HasSuffix(name, ".linear_attn.in_proj_ba.weight"):
+		return true
+	case strings.HasSuffix(name, ".mlp.gate.weight") && !strings.Contains(name, "_proj"):
+		return true
+	case strings.HasSuffix(name, ".mlp.shared_expert_gate.weight"):
+		return true
+	default:
+		return false
+	}
+}
+
 func (t qwen35ImportTransform) quantizationType(name string, shape []int32, quantize string) string {
 	if strings.HasPrefix(name, "vision_tower.") {
 		return ""
@@ -127,6 +148,13 @@ func (t qwen35ImportTransform) quantizationType(name string, shape []int32, quan
 		return ""
 	}

+	// Match the working HF-FP8 import policy for direct NVFP4/MXFP4/MXFP8 imports:
+	// keep embeddings, LM head, low-rank linear_attn projections, and routing
+	// gates in BF16 rather than forcing them into a non-affine quantized format.
+	if (quantNorm == "nvfp4" || quantNorm == "mxfp4" || quantNorm == "mxfp8") && qwen35ShouldKeepBF16ForDirectNonAffine(name) {
+		return ""
+	}
+
 	return quantNorm
 }

--- a/x/imagegen/mlx/CMakeLists.txt
+++ b/x/imagegen/mlx/CMakeLists.txt
@@ -1,11 +1,11 @@
 include(FetchContent)

-# Read MLX version from top-level file (shared with Dockerfile)
-file(READ "${CMAKE_SOURCE_DIR}/MLX_VERSION" MLX_C_GIT_TAG)
+# Read MLX-C version from top-level file (shared with Dockerfile)
+file(READ "${CMAKE_SOURCE_DIR}/MLX_C_VERSION" MLX_C_GIT_TAG)
 string(STRIP "${MLX_C_GIT_TAG}" MLX_C_GIT_TAG)

-# Read MLX core version from top-level file
-file(READ "${CMAKE_SOURCE_DIR}/MLX_CORE_VERSION" MLX_GIT_TAG)
+# Read MLX version from top-level file
+file(READ "${CMAKE_SOURCE_DIR}/MLX_VERSION" MLX_GIT_TAG)
 string(STRIP "${MLX_GIT_TAG}" MLX_GIT_TAG)

 set(MLX_C_BUILD_EXAMPLES OFF)
@@ -98,6 +98,28 @@ FetchContent_MakeAvailable(mlx-c)
 file(GLOB _mlx_c_hdrs "${mlx-c_SOURCE_DIR}/mlx/c/*.h")
 file(COPY ${_mlx_c_hdrs} DESTINATION "${CMAKE_SOURCE_DIR}/x/mlxrunner/mlx/include/mlx/c/")

+# Regenerate Go/C shim wrappers from the (possibly updated) headers.
+find_program(GO_EXECUTABLE go REQUIRED)
+message(STATUS "Regenerating MLX Go wrappers")
+
+# Go's cgo splits CC on whitespace, so a CC like "C:/Program Files/…/cl.exe"
+# (set by cmake on Windows) breaks with "C:/Program" not found.  Clear CC
+# when it contains spaces so cgo falls back to its default (gcc).
+if(WIN32 AND "$ENV{CC}" MATCHES " ")
+    set(_SAVE_CC "$ENV{CC}")
+    set(ENV{CC} "")
+endif()
+
+execute_process(
+    COMMAND ${GO_EXECUTABLE} generate ./x/...
+    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+    COMMAND_ERROR_IS_FATAL ANY
+)
+
+if(DEFINED _SAVE_CC)
+    set(ENV{CC} "${_SAVE_CC}")
+endif()
+
 # For local dev builds, override MLX_VERSION with git describe output
 if(TARGET mlx_version AND DEFINED FETCHCONTENT_SOURCE_DIR_MLX)
    execute_process(
--- a/x/imagegen/mlx/mlx.c
+++ b/x/imagegen/mlx/mlx.c
@@ -165,8 +165,8 @@ int (*mlx_distributed_sum_scatter_ptr)(mlx_array* res, const mlx_array x, const
 int (*mlx_distributed_group_rank_ptr)(mlx_distributed_group group) = NULL;
 int (*mlx_distributed_group_size_ptr)(mlx_distributed_group group) = NULL;
 mlx_distributed_group (*mlx_distributed_group_split_ptr)(mlx_distributed_group group, int color, int key) = NULL;
-bool (*mlx_distributed_is_available_ptr)(void) = NULL;
-mlx_distributed_group (*mlx_distributed_init_ptr)(bool strict) = NULL;
+bool (*mlx_distributed_is_available_ptr)(const char* bk) = NULL;
+mlx_distributed_group (*mlx_distributed_init_ptr)(bool strict, const char* bk) = NULL;
 void (*mlx_set_error_handler_ptr)(mlx_error_handler_func handler, void* data, void (*dtor)(void*)) = NULL;
 void (*_mlx_error_ptr)(const char* file, const int line, const char* fmt, ...) = NULL;
 int (*mlx_export_function_ptr)(const char* file, const mlx_closure fun, const mlx_vector_array args, bool shapeless) = NULL;
@@ -319,10 +319,12 @@ int (*mlx_astype_ptr)(mlx_array* res, const mlx_array a, mlx_dtype dtype, const
 int (*mlx_atleast_1d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
 int (*mlx_atleast_2d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
 int (*mlx_atleast_3d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
+int (*mlx_bartlett_ptr)(mlx_array* res, int M, const mlx_stream s) = NULL;
 int (*mlx_bitwise_and_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
 int (*mlx_bitwise_invert_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
 int (*mlx_bitwise_or_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
 int (*mlx_bitwise_xor_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
+int (*mlx_blackman_ptr)(mlx_array* res, int M, const mlx_stream s) = NULL;
 int (*mlx_block_masked_mm_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, int block_size, const mlx_array mask_out , const mlx_array mask_lhs , const mlx_array mask_rhs , const mlx_stream s) = NULL;
 int (*mlx_broadcast_arrays_ptr)(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_stream s) = NULL;
 int (*mlx_broadcast_to_ptr)(mlx_array* res, const mlx_array a, const int* shape, size_t shape_num, const mlx_stream s) = NULL;
@@ -348,7 +350,7 @@ int (*mlx_cumprod_ptr)(mlx_array* res, const mlx_array a, int axis, bool reverse
 int (*mlx_cumsum_ptr)(mlx_array* res, const mlx_array a, int axis, bool reverse, bool inclusive, const mlx_stream s) = NULL;
 int (*mlx_degrees_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
 int (*mlx_depends_ptr)(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_vector_array dependencies) = NULL;
-int (*mlx_dequantize_ptr)(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, mlx_optional_dtype dtype, const mlx_stream s) = NULL;
+int (*mlx_dequantize_ptr)(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , mlx_optional_dtype dtype, const mlx_stream s) = NULL;
 int (*mlx_diag_ptr)(mlx_array* res, const mlx_array a, int k, const mlx_stream s) = NULL;
 int (*mlx_diagonal_ptr)(mlx_array* res, const mlx_array a, int offset, int axis1, int axis2, const mlx_stream s) = NULL;
 int (*mlx_divide_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
@@ -375,6 +377,8 @@ int (*mlx_gather_qmm_ptr)(mlx_array* res, const mlx_array x, const mlx_array w,
 int (*mlx_greater_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
 int (*mlx_greater_equal_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
 int (*mlx_hadamard_transform_ptr)(mlx_array* res, const mlx_array a, mlx_optional_float scale, const mlx_stream s) = NULL;
+int (*mlx_hamming_ptr)(mlx_array* res, int M, const mlx_stream s) = NULL;
+int (*mlx_hanning_ptr)(mlx_array* res, int M, const mlx_stream s) = NULL;
 int (*mlx_identity_ptr)(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s) = NULL;
 int (*mlx_imag_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
 int (*mlx_inner_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
@@ -434,8 +438,8 @@ int (*mlx_prod_axes_ptr)(mlx_array* res, const mlx_array a, const int* axes, siz
 int (*mlx_prod_axis_ptr)(mlx_array* res, const mlx_array a, int axis, bool keepdims, const mlx_stream s) = NULL;
 int (*mlx_prod_ptr)(mlx_array* res, const mlx_array a, bool keepdims, const mlx_stream s) = NULL;
 int (*mlx_put_along_axis_ptr)(mlx_array* res, const mlx_array a, const mlx_array indices, const mlx_array values, int axis, const mlx_stream s) = NULL;
-int (*mlx_qqmm_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) = NULL;
-int (*mlx_quantize_ptr)(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) = NULL;
+int (*mlx_qqmm_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale_x , const mlx_array global_scale_w , const mlx_stream s) = NULL;
+int (*mlx_quantize_ptr)(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , const mlx_stream s) = NULL;
 int (*mlx_quantized_matmul_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array scales, const mlx_array biases , bool transpose, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) = NULL;
 int (*mlx_radians_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
 int (*mlx_real_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
@@ -2101,6 +2105,11 @@ int mlx_load_functions(void* handle) {
        fprintf(stderr, "MLX: Failed to load symbol: mlx_atleast_3d\n");
        return -1;
    }
+    mlx_bartlett_ptr = GET_SYM(handle, "mlx_bartlett");
+    if (mlx_bartlett_ptr == NULL) {
+        fprintf(stderr, "MLX: Failed to load symbol: mlx_bartlett\n");
+        return -1;
+    }
    mlx_bitwise_and_ptr = GET_SYM(handle, "mlx_bitwise_and");
    if (mlx_bitwise_and_ptr == NULL) {
        fprintf(stderr, "MLX: Failed to load symbol: mlx_bitwise_and\n");
@@ -2121,6 +2130,11 @@ int mlx_load_functions(void* handle) {
        fprintf(stderr, "MLX: Failed to load symbol: mlx_bitwise_xor\n");
        return -1;
    }
+    mlx_blackman_ptr = GET_SYM(handle, "mlx_blackman");
+    if (mlx_blackman_ptr == NULL) {
+        fprintf(stderr, "MLX: Failed to load symbol: mlx_blackman\n");
+        return -1;
+    }
    mlx_block_masked_mm_ptr = GET_SYM(handle, "mlx_block_masked_mm");
    if (mlx_block_masked_mm_ptr == NULL) {
        fprintf(stderr, "MLX: Failed to load symbol: mlx_block_masked_mm\n");
@@ -2381,6 +2395,16 @@ int mlx_load_functions(void* handle) {
        fprintf(stderr, "MLX: Failed to load symbol: mlx_hadamard_transform\n");
        return -1;
    }
+    mlx_hamming_ptr = GET_SYM(handle, "mlx_hamming");
+    if (mlx_hamming_ptr == NULL) {
+        fprintf(stderr, "MLX: Failed to load symbol: mlx_hamming\n");
+        return -1;
+    }
+    mlx_hanning_ptr = GET_SYM(handle, "mlx_hanning");
+    if (mlx_hanning_ptr == NULL) {
+        fprintf(stderr, "MLX: Failed to load symbol: mlx_hanning\n");
+        return -1;
+    }
    mlx_identity_ptr = GET_SYM(handle, "mlx_identity");
    if (mlx_identity_ptr == NULL) {
        fprintf(stderr, "MLX: Failed to load symbol: mlx_identity\n");
@@ -4132,12 +4156,12 @@ mlx_distributed_group mlx_distributed_group_split(mlx_distributed_group group, i
    return mlx_distributed_group_split_ptr(group, color, key);
 }

-bool mlx_distributed_is_available(void) {
-    return mlx_distributed_is_available_ptr();
+bool mlx_distributed_is_available(const char* bk) {
+    return mlx_distributed_is_available_ptr(bk);
 }

-mlx_distributed_group mlx_distributed_init(bool strict) {
-    return mlx_distributed_init_ptr(strict);
+mlx_distributed_group mlx_distributed_init(bool strict, const char* bk) {
+    return mlx_distributed_init_ptr(strict, bk);
 }

 void mlx_set_error_handler(mlx_error_handler_func handler, void* data, void (*dtor)(void*)) {
@@ -4748,6 +4772,10 @@ int mlx_atleast_3d(mlx_array* res, const mlx_array a, const mlx_stream s) {
    return mlx_atleast_3d_ptr(res, a, s);
 }

+int mlx_bartlett(mlx_array* res, int M, const mlx_stream s) {
+    return mlx_bartlett_ptr(res, M, s);
+}
+
 int mlx_bitwise_and(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) {
    return mlx_bitwise_and_ptr(res, a, b, s);
 }
@@ -4764,6 +4792,10 @@ int mlx_bitwise_xor(mlx_array* res, const mlx_array a, const mlx_array b, const
    return mlx_bitwise_xor_ptr(res, a, b, s);
 }

+int mlx_blackman(mlx_array* res, int M, const mlx_stream s) {
+    return mlx_blackman_ptr(res, M, s);
+}
+
 int mlx_block_masked_mm(mlx_array* res, const mlx_array a, const mlx_array b, int block_size, const mlx_array mask_out , const mlx_array mask_lhs , const mlx_array mask_rhs , const mlx_stream s) {
    return mlx_block_masked_mm_ptr(res, a, b, block_size, mask_out, mask_lhs, mask_rhs, s);
 }
@@ -4864,8 +4896,8 @@ int mlx_depends(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_
    return mlx_depends_ptr(res, inputs, dependencies);
 }

-int mlx_dequantize(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, mlx_optional_dtype dtype, const mlx_stream s) {
-    return mlx_dequantize_ptr(res, w, scales, biases, group_size, bits, mode, dtype, s);
+int mlx_dequantize(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , mlx_optional_dtype dtype, const mlx_stream s) {
+    return mlx_dequantize_ptr(res, w, scales, biases, group_size, bits, mode, global_scale, dtype, s);
 }

 int mlx_diag(mlx_array* res, const mlx_array a, int k, const mlx_stream s) {
@@ -4972,6 +5004,14 @@ int mlx_hadamard_transform(mlx_array* res, const mlx_array a, mlx_optional_float
    return mlx_hadamard_transform_ptr(res, a, scale, s);
 }

+int mlx_hamming(mlx_array* res, int M, const mlx_stream s) {
+    return mlx_hamming_ptr(res, M, s);
+}
+
+int mlx_hanning(mlx_array* res, int M, const mlx_stream s) {
+    return mlx_hanning_ptr(res, M, s);
+}
+
 int mlx_identity(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s) {
    return mlx_identity_ptr(res, n, dtype, s);
 }
@@ -5208,12 +5248,12 @@ int mlx_put_along_axis(mlx_array* res, const mlx_array a, const mlx_array indice
    return mlx_put_along_axis_ptr(res, a, indices, values, axis, s);
 }

-int mlx_qqmm(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) {
-    return mlx_qqmm_ptr(res, x, w, w_scales, group_size, bits, mode, s);
+int mlx_qqmm(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale_x , const mlx_array global_scale_w , const mlx_stream s) {
+    return mlx_qqmm_ptr(res, x, w, w_scales, group_size, bits, mode, global_scale_x, global_scale_w, s);
 }

-int mlx_quantize(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) {
-    return mlx_quantize_ptr(res, w, group_size, bits, mode, s);
+int mlx_quantize(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , const mlx_stream s) {
+    return mlx_quantize_ptr(res, w, group_size, bits, mode, global_scale, s);
 }

 int mlx_quantized_matmul(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array scales, const mlx_array biases , bool transpose, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) {
--- a/x/imagegen/mlx/mlx.go
+++ b/x/imagegen/mlx/mlx.go
@@ -2125,7 +2125,8 @@ func Quantize(w *Array, groupSize, bits int, mode string) (weights, scales, bias
 	optGroupSize := C.mlx_optional_int{value: C.int(groupSize), has_value: true}
 	optBits := C.mlx_optional_int{value: C.int(bits), has_value: true}
 	res := C.mlx_vector_array_new()
-	C.mlx_quantize(&res, w.c, optGroupSize, optBits, cMode, C.default_stream())
+	var globalScale C.mlx_array
+	C.mlx_quantize(&res, w.c, optGroupSize, optBits, cMode, globalScale, C.default_stream())

 	// Result is a vector of arrays: [weights, scales, biases?]
 	// mxfp8 mode returns only 2 elements (no biases)
@@ -2161,7 +2162,8 @@ func Dequantize(w, scales, biases *Array, groupSize, bits int, mode string) *Arr
 	}

 	res := C.mlx_array_new()
-	C.mlx_dequantize(&res, w.c, scales.c, b, optGroupSize, optBits, cMode, optDtype, C.default_stream())
+	var globalScale C.mlx_array
+	C.mlx_dequantize(&res, w.c, scales.c, b, optGroupSize, optBits, cMode, globalScale, optDtype, C.default_stream())
 	return newArray(res)
 }

--- a/x/imagegen/mlx/mlx.h
+++ b/x/imagegen/mlx/mlx.h
@@ -309,10 +309,12 @@
 #undef mlx_atleast_1d
 #undef mlx_atleast_2d
 #undef mlx_atleast_3d
+#undef mlx_bartlett
 #undef mlx_bitwise_and
 #undef mlx_bitwise_invert
 #undef mlx_bitwise_or
 #undef mlx_bitwise_xor
+#undef mlx_blackman
 #undef mlx_block_masked_mm
 #undef mlx_broadcast_arrays
 #undef mlx_broadcast_to
@@ -365,6 +367,8 @@
 #undef mlx_greater
 #undef mlx_greater_equal
 #undef mlx_hadamard_transform
+#undef mlx_hamming
+#undef mlx_hanning
 #undef mlx_identity
 #undef mlx_imag
 #undef mlx_inner
@@ -751,8 +755,8 @@ extern int (*mlx_distributed_sum_scatter_ptr)(mlx_array* res, const mlx_array x,
 extern int (*mlx_distributed_group_rank_ptr)(mlx_distributed_group group);
 extern int (*mlx_distributed_group_size_ptr)(mlx_distributed_group group);
 extern mlx_distributed_group (*mlx_distributed_group_split_ptr)(mlx_distributed_group group, int color, int key);
-extern bool (*mlx_distributed_is_available_ptr)(void);
-extern mlx_distributed_group (*mlx_distributed_init_ptr)(bool strict);
+extern bool (*mlx_distributed_is_available_ptr)(const char* bk);
+extern mlx_distributed_group (*mlx_distributed_init_ptr)(bool strict, const char* bk);
 extern void (*mlx_set_error_handler_ptr)(mlx_error_handler_func handler, void* data, void (*dtor)(void*));
 extern void (*_mlx_error_ptr)(const char* file, const int line, const char* fmt, ...);
 extern int (*mlx_export_function_ptr)(const char* file, const mlx_closure fun, const mlx_vector_array args, bool shapeless);
@@ -905,10 +909,12 @@ extern int (*mlx_astype_ptr)(mlx_array* res, const mlx_array a, mlx_dtype dtype,
 extern int (*mlx_atleast_1d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
 extern int (*mlx_atleast_2d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
 extern int (*mlx_atleast_3d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
+extern int (*mlx_bartlett_ptr)(mlx_array* res, int M, const mlx_stream s);
 extern int (*mlx_bitwise_and_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
 extern int (*mlx_bitwise_invert_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
 extern int (*mlx_bitwise_or_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
 extern int (*mlx_bitwise_xor_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
+extern int (*mlx_blackman_ptr)(mlx_array* res, int M, const mlx_stream s);
 extern int (*mlx_block_masked_mm_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, int block_size, const mlx_array mask_out , const mlx_array mask_lhs , const mlx_array mask_rhs , const mlx_stream s);
 extern int (*mlx_broadcast_arrays_ptr)(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_stream s);
 extern int (*mlx_broadcast_to_ptr)(mlx_array* res, const mlx_array a, const int* shape, size_t shape_num, const mlx_stream s);
@@ -934,7 +940,7 @@ extern int (*mlx_cumprod_ptr)(mlx_array* res, const mlx_array a, int axis, bool
 extern int (*mlx_cumsum_ptr)(mlx_array* res, const mlx_array a, int axis, bool reverse, bool inclusive, const mlx_stream s);
 extern int (*mlx_degrees_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
 extern int (*mlx_depends_ptr)(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_vector_array dependencies);
-extern int (*mlx_dequantize_ptr)(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, mlx_optional_dtype dtype, const mlx_stream s);
+extern int (*mlx_dequantize_ptr)(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , mlx_optional_dtype dtype, const mlx_stream s);
 extern int (*mlx_diag_ptr)(mlx_array* res, const mlx_array a, int k, const mlx_stream s);
 extern int (*mlx_diagonal_ptr)(mlx_array* res, const mlx_array a, int offset, int axis1, int axis2, const mlx_stream s);
 extern int (*mlx_divide_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
@@ -961,6 +967,8 @@ extern int (*mlx_gather_qmm_ptr)(mlx_array* res, const mlx_array x, const mlx_ar
 extern int (*mlx_greater_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
 extern int (*mlx_greater_equal_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
 extern int (*mlx_hadamard_transform_ptr)(mlx_array* res, const mlx_array a, mlx_optional_float scale, const mlx_stream s);
+extern int (*mlx_hamming_ptr)(mlx_array* res, int M, const mlx_stream s);
+extern int (*mlx_hanning_ptr)(mlx_array* res, int M, const mlx_stream s);
 extern int (*mlx_identity_ptr)(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s);
 extern int (*mlx_imag_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
 extern int (*mlx_inner_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
@@ -1020,8 +1028,8 @@ extern int (*mlx_prod_axes_ptr)(mlx_array* res, const mlx_array a, const int* ax
 extern int (*mlx_prod_axis_ptr)(mlx_array* res, const mlx_array a, int axis, bool keepdims, const mlx_stream s);
 extern int (*mlx_prod_ptr)(mlx_array* res, const mlx_array a, bool keepdims, const mlx_stream s);
 extern int (*mlx_put_along_axis_ptr)(mlx_array* res, const mlx_array a, const mlx_array indices, const mlx_array values, int axis, const mlx_stream s);
-extern int (*mlx_qqmm_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
-extern int (*mlx_quantize_ptr)(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
+extern int (*mlx_qqmm_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale_x , const mlx_array global_scale_w , const mlx_stream s);
+extern int (*mlx_quantize_ptr)(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , const mlx_stream s);
 extern int (*mlx_quantized_matmul_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array scales, const mlx_array biases , bool transpose, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
 extern int (*mlx_radians_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
 extern int (*mlx_real_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
@@ -1492,9 +1500,9 @@ int mlx_distributed_group_size(mlx_distributed_group group);

 mlx_distributed_group mlx_distributed_group_split(mlx_distributed_group group, int color, int key);

-bool mlx_distributed_is_available(void);
+bool mlx_distributed_is_available(const char* bk);

-mlx_distributed_group mlx_distributed_init(bool strict);
+mlx_distributed_group mlx_distributed_init(bool strict, const char* bk);

 void mlx_set_error_handler(mlx_error_handler_func handler, void* data, void (*dtor)(void*));

@@ -1800,6 +1808,8 @@ int mlx_atleast_2d(mlx_array* res, const mlx_array a, const mlx_stream s);

 int mlx_atleast_3d(mlx_array* res, const mlx_array a, const mlx_stream s);

+int mlx_bartlett(mlx_array* res, int M, const mlx_stream s);
+
 int mlx_bitwise_and(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);

 int mlx_bitwise_invert(mlx_array* res, const mlx_array a, const mlx_stream s);
@@ -1808,6 +1818,8 @@ int mlx_bitwise_or(mlx_array* res, const mlx_array a, const mlx_array b, const m

 int mlx_bitwise_xor(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);

+int mlx_blackman(mlx_array* res, int M, const mlx_stream s);
+
 int mlx_block_masked_mm(mlx_array* res, const mlx_array a, const mlx_array b, int block_size, const mlx_array mask_out , const mlx_array mask_lhs , const mlx_array mask_rhs , const mlx_stream s);

 int mlx_broadcast_arrays(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_stream s);
@@ -1858,7 +1870,7 @@ int mlx_degrees(mlx_array* res, const mlx_array a, const mlx_stream s);

 int mlx_depends(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_vector_array dependencies);

-int mlx_dequantize(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, mlx_optional_dtype dtype, const mlx_stream s);
+int mlx_dequantize(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , mlx_optional_dtype dtype, const mlx_stream s);

 int mlx_diag(mlx_array* res, const mlx_array a, int k, const mlx_stream s);

@@ -1912,6 +1924,10 @@ int mlx_greater_equal(mlx_array* res, const mlx_array a, const mlx_array b, cons

 int mlx_hadamard_transform(mlx_array* res, const mlx_array a, mlx_optional_float scale, const mlx_stream s);

+int mlx_hamming(mlx_array* res, int M, const mlx_stream s);
+
+int mlx_hanning(mlx_array* res, int M, const mlx_stream s);
+
 int mlx_identity(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s);

 int mlx_imag(mlx_array* res, const mlx_array a, const mlx_stream s);
@@ -2030,9 +2046,9 @@ int mlx_prod(mlx_array* res, const mlx_array a, bool keepdims, const mlx_stream

 int mlx_put_along_axis(mlx_array* res, const mlx_array a, const mlx_array indices, const mlx_array values, int axis, const mlx_stream s);

-int mlx_qqmm(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
+int mlx_qqmm(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale_x , const mlx_array global_scale_w , const mlx_stream s);

-int mlx_quantize(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
+int mlx_quantize(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , const mlx_stream s);

 int mlx_quantized_matmul(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array scales, const mlx_array biases , bool transpose, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);

--- a/x/mlxrunner/cache.go
+++ b/x/mlxrunner/cache.go
@@ -93,21 +93,8 @@ func (c *kvCache) begin(m base.Model, inputs []int32) *cacheSession {
 		matchPath, matched = findBestMatch(c.root, inputs[:len(inputs)-1])
 	}

-	// Check for partial match within a node's edge — truncate path
-	// to the parent boundary. snapshot() will split the node and
-	// create the branch point during prefill when caches are ready.
-	partialMatch := false
-	if len(matchPath) > 1 {
-		lastNode := matchPath[len(matchPath)-1]
-		matchedInEdge := matched - lastNode.startOffset()
-		if matchedInEdge > 0 && matchedInEdge < len(lastNode.tokens) {
-			matchPath = matchPath[:len(matchPath)-1]
-			partialMatch = true
-		}
-	}
-
 	// Switch to the matched path, paging in/out as needed.
-	c.switchToPath(matchPath)
+	c.switchToPath(matchPath, matched)

 	// switchToPath aligns caches to a common offset
 	prefix := c.minCacheOffset()
@@ -116,7 +103,7 @@ func (c *kvCache) begin(m base.Model, inputs []int32) *cacheSession {
 	// Schedule a snapshot at the branch point during prefill so future
 	// requests diverging here can restore instead of re-evaluating.
 	var snapshotAt int
-	if partialMatch || (prefix == 0 && matched > 0) {
+	if prefix < matched {
 		snapshotAt = matched
 	}

@@ -142,7 +129,7 @@ func (c *kvCache) begin(m base.Model, inputs []int32) *cacheSession {

 // switchToPath transitions from the current active path to a new path,
 // paging out diverging segments and paging in the new path.
-func (c *kvCache) switchToPath(newPath []*trieNode) {
+func (c *kvCache) switchToPath(newPath []*trieNode, matched int) {
 	defer c.enforceEvictionPolicy()

 	// Find common ancestor index.
@@ -167,7 +154,10 @@ func (c *kvCache) switchToPath(newPath []*trieNode) {
 	// non-leaf nodes here would produce wrong results for non-rewindable
 	// caches (e.g. RecurrentCache) whose state reflects the leaf, not
 	// the intermediate boundary.
-	if leaf := len(c.activePath) - 1; leaf >= commonLen {
+	leaf := len(c.activePath) - 1
+	leafDiverges := leaf >= commonLen
+	leafNeedsRewind := matched < c.activePath[leaf].endOffset
+	if leafDiverges || leafNeedsRewind {
 		node := c.activePath[leaf]
 		if !node.hasAllSnapshots() {
 			fromOffset := node.startOffset()
@@ -184,14 +174,16 @@ func (c *kvCache) switchToPath(newPath []*trieNode) {
 		}
 	}

-	// Rewind each cache to the ancestor offset or free it. Freed
-	// caches (e.g. RecurrentCache that can't rewind) will be restored
-	// from snapshots during page-in.
+	// Rewind each cache to the target offset or free it. When matched
+	// falls within the ancestor's range (same-path case), we rewind
+	// directly to the match point. Otherwise we rewind to the ancestor
+	// and let page-in bring us forward to matched.
+	rewindTarget := min(ancestorOffset, matched)
 	for _, kv := range c.caches {
 		if kv == nil {
 			continue
 		}
-		if !kv.Restore(nil, ancestorOffset) {
+		if !kv.Restore(nil, rewindTarget) {
 			kv.Free()
 		}
 	}
@@ -199,10 +191,12 @@ func (c *kvCache) switchToPath(newPath []*trieNode) {
 	// Page in — walk the full new path, restoring from snapshots.
 	// Freed caches naturally pick up the first available snapshot.
 	// Caches already past a node skip it via offset check.
+pageIn:
 	for _, node := range newPath {
-		if len(node.snapshots) == 0 {
+		if !node.hasSnapshots() {
 			continue
 		}
+		nodeTarget := min(node.endOffset, matched)
 		for j, kv := range c.caches {
 			if kv == nil {
 				continue
@@ -210,19 +204,18 @@ func (c *kvCache) switchToPath(newPath []*trieNode) {
 			if j >= len(node.snapshots) || node.snapshots[j] == nil {
 				continue
 			}
-			if kv.Offset() >= node.endOffset {
+			if kv.Offset() >= nodeTarget {
 				continue
 			}
-			if !kv.Restore(node.snapshots[j], node.endOffset) {
-				slog.Warn("cache restore failure during page-in, freeing all caches", "layer", j, "offset", node.startOffset())
-				c.freeAll()
-				c.activePath = []*trieNode{c.root}
-				return
+			if !kv.Restore(node.snapshots[j], nodeTarget) {
+				// Restore failed — stop page-in and let alignment
+				// bring all caches to a consistent offset.
+				break pageIn
 			}
 		}
 		if node.endOffset > ancestorOffset {
 			pageInCount++
-			logutil.Trace(fmt.Sprintf("page in: [%d, %d)", node.startOffset(), node.endOffset))
+			logutil.Trace(fmt.Sprintf("page in: [%d, %d)", node.startOffset(), nodeTarget))
 		}
 	}

@@ -536,6 +529,9 @@ func (c *kvCache) dumpTree() {
 		if nodeBytes > 0 {
 			label += " " + mlx.PrettyBytes(int(nodeBytes)).String()
 		}
+		if !n.lastUsed.IsZero() {
+			label += fmt.Sprintf(" %s ago", time.Since(n.lastUsed).Truncate(time.Millisecond))
+		}
 		var flags []string
 		if n.user {
 			flags = append(flags, "user")
--- a/x/mlxrunner/cache/cache.go
+++ b/x/mlxrunner/cache/cache.go
@@ -17,7 +17,8 @@ type Cache interface {
 	Snapshot(fromOffset int) Snapshot

 	// Restore brings the cache to target. If snapshot is nil, rewinds
-	// using the cache's own live state.
+	// using the cache's own live state. Returns false if the target is
+	// unreachable (e.g. target > current offset, or negative).
 	Restore(snapshot Snapshot, target int) bool

 	// Merge combines two sequential snapshots [a,b) and [b,c) into [a,c).
@@ -108,8 +109,8 @@ func (c *KVCache) Snapshot(fromOffset int) Snapshot {

 	kSlice := c.keys.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(from, to), mlx.Slice())
 	vSlice := c.values.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(from, to), mlx.Slice())
-	kCopy := mlx.Copy(kSlice)
-	vCopy := mlx.Copy(vSlice)
+	kCopy := mlx.Contiguous(kSlice, false)
+	vCopy := mlx.Contiguous(vSlice, false)
 	mlx.Pin(kCopy, vCopy)
 	mlx.AsyncEval(kCopy, vCopy)

@@ -122,17 +123,21 @@ func (c *KVCache) Snapshot(fromOffset int) Snapshot {
 }

 func (c *KVCache) Restore(snapshot Snapshot, target int) bool {
+	if target < 0 {
+		return false
+	}
+
 	if snapshot == nil {
-		// Rewind using live state — just clamp offset.
-		target = max(0, min(target, c.offset))
+		if target > c.offset {
+			return false
+		}
 		c.offset = target
 		return true
 	}

 	snap := snapshot.(*kvSnapshot)

-	// Check that the cache has data up to the snapshot's starting point.
-	if c.offset < snap.fromOffset {
+	if target > snap.toOffset || c.offset < snap.fromOffset {
 		return false
 	}

@@ -191,10 +196,10 @@ func (c *KVCache) Split(snapshot Snapshot, at int) (Snapshot, Snapshot) {
 		return snapshot, nil
 	}

-	pk := mlx.Copy(snap.keys.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(0, splitIdx), mlx.Slice()))
-	pv := mlx.Copy(snap.values.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(0, splitIdx), mlx.Slice()))
-	ck := mlx.Copy(snap.keys.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(splitIdx, seqLen), mlx.Slice()))
-	cv := mlx.Copy(snap.values.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(splitIdx, seqLen), mlx.Slice()))
+	pk := mlx.Contiguous(snap.keys.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(0, splitIdx), mlx.Slice()), false)
+	pv := mlx.Contiguous(snap.values.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(0, splitIdx), mlx.Slice()), false)
+	ck := mlx.Contiguous(snap.keys.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(splitIdx, seqLen), mlx.Slice()), false)
+	cv := mlx.Contiguous(snap.values.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(splitIdx, seqLen), mlx.Slice()), false)
 	mlx.Pin(pk, pv, ck, cv)
 	mlx.AsyncEval(pk, pv, ck, cv)

@@ -354,7 +359,14 @@ func (c *RotatingKVCache) Snapshot(fromOffset int) Snapshot {
 }

 func (c *RotatingKVCache) Restore(snapshot Snapshot, target int) bool {
+	if target < 0 {
+		return false
+	}
+
 	if snapshot == nil {
+		if target >= c.offset {
+			return target == c.offset
+		}
 		// Live rewind is only safe when the buffer hasn't filled yet
 		// (offset <= maxSize). Once the window has shifted, rewinding
 		// leaves fewer than maxSize trailing tokens to attend to —
@@ -362,7 +374,6 @@ func (c *RotatingKVCache) Restore(snapshot Snapshot, target int) bool {
 		if c.offset > c.maxSize {
 			return false
 		}
-		target = max(0, min(target, c.offset))
 		c.offset = target
 		c.idx = target
 		return true
@@ -370,6 +381,10 @@ func (c *RotatingKVCache) Restore(snapshot Snapshot, target int) bool {

 	snap := snapshot.(*rotatingSnapshot)

+	if target > snap.toOffset {
+		return false
+	}
+
 	// Reject if clamping would leave an incomplete window.
 	if target < snap.toOffset && snap.toOffset > c.maxSize {
 		return false
@@ -388,7 +403,6 @@ func (c *RotatingKVCache) Restore(snapshot Snapshot, target int) bool {

 	// Clamp to target if needed.
 	if target < c.offset {
-		target = max(0, target)
 		c.offset = target
 		c.idx = target
 	}
--- a/x/mlxrunner/cache/recurrent.go
+++ b/x/mlxrunner/cache/recurrent.go
@@ -22,14 +22,9 @@ func (c *RecurrentCache) setStateRaw(old, v *mlx.Array) *mlx.Array {
 	if v == nil || !v.Valid() {
 		return old
 	}
-	if old == v {
-		return old
-	}

 	mlx.Pin(v)
-	if old != nil && old != v {
-		mlx.Unpin(old)
-	}
+	mlx.Unpin(old)

 	return v
 }
@@ -38,9 +33,6 @@ func (c *RecurrentCache) setStateDetached(old, v *mlx.Array, ensureContiguous bo
 	if v == nil || !v.Valid() {
 		return old
 	}
-	if old == v {
-		return old
-	}

 	root := v
 	if ensureContiguous {
@@ -49,9 +41,7 @@ func (c *RecurrentCache) setStateDetached(old, v *mlx.Array, ensureContiguous bo
 	detached := root.Clone()

 	mlx.Pin(detached)
-	if old != nil && old != detached {
-		mlx.Unpin(old)
-	}
+	mlx.Unpin(old)

 	return detached
 }
@@ -150,10 +140,10 @@ func (c *RecurrentCache) Restore(snapshot Snapshot, target int) bool {

 	snap := snapshot.(*recurrentSnapshot)

-	// Recurrent state encodes all tokens up to snap.offset. Restoring
-	// to a target before that would leave stale state from tokens
-	// [target, snap.offset) baked in. Only allow restoring forward.
-	if target < snap.offset {
+	// Recurrent snapshots encode cumulative state up to exactly
+	// snap.offset. Target must match — rewinding would leave stale
+	// state, and advancing isn't possible without feeding tokens.
+	if target != snap.offset {
 		return false
 	}

--- a/x/mlxrunner/cache/recurrent_test.go
+++ b/x/mlxrunner/cache/recurrent_test.go
@@ -6,39 +6,35 @@ import (
 	"github.com/ollama/ollama/x/mlxrunner/mlx"
 )

-// TestRecurrentCacheRestoreDirectionality verifies that RecurrentCache only
-// allows restoring forward (target >= snapshot offset), never backward.
-func TestRecurrentCacheRestoreDirectionality(t *testing.T) {
+// TestRecurrentCacheRestoreExactOffset verifies that RecurrentCache restore
+// only succeeds when target exactly matches the snapshot's offset. Recurrent
+// state is cumulative, so it can't be rewound or fast-forwarded.
+func TestRecurrentCacheRestoreExactOffset(t *testing.T) {
 	skipIfNoMLX(t)
 	c := NewRecurrentCache(3, 12, 4, 8, 8)
 	_ = c.ConvState(1, mlx.DTypeFloat16)
 	_ = c.DeltaState(1, mlx.DTypeFloat16)
 	c.Advance(10)

-	snap := c.Snapshot(0)
+	snap := c.Snapshot(0) // snap.offset == 10

-	c.Advance(5) // now at 15
+	c.Advance(5) // cache now at 15

-	// Restore backward should fail.
+	// target < snap.offset: fails (can't rewind past snapshot)
 	if c.Restore(snap, 5) {
-		t.Fatal("Restore(snap, 5) should fail — target < snap.offset")
+		t.Fatal("Restore(snap, 5) should fail — target != snap.offset")
 	}

-	// Restore to exact snap offset should succeed.
+	// target > snap.offset: fails (can't advance without feeding tokens)
+	if c.Restore(snap, 15) {
+		t.Fatal("Restore(snap, 15) should fail — target != snap.offset")
+	}
+
+	// target == snap.offset: succeeds
 	if !c.Restore(snap, 10) {
-		t.Fatal("Restore(snap, 10) should succeed")
+		t.Fatal("Restore(snap, 10) should succeed — target == snap.offset")
 	}
 	if c.Offset() != 10 {
 		t.Fatalf("offset = %d, want 10", c.Offset())
 	}
-
-	// Restore forward (target > snap offset) should succeed, offset = snap.offset.
-	snap2 := c.Snapshot(0)
-	if !c.Restore(snap2, 15) {
-		t.Fatal("Restore(snap, 15) should succeed")
-	}
-	// Recurrent state is at snap.offset (10), not target (15).
-	if c.Offset() != 10 {
-		t.Fatalf("offset = %d, want 10 (snap offset)", c.Offset())
-	}
 }
--- a/x/mlxrunner/cache_test.go
+++ b/x/mlxrunner/cache_test.go
@@ -79,20 +79,20 @@ func (c *fakeRewindableCache) Snapshot(fromOffset int) cache.Snapshot {
 }

 func (c *fakeRewindableCache) Restore(snapshot cache.Snapshot, target int) bool {
+	if target < 0 {
+		return false
+	}
+
 	if snapshot == nil {
-		// Rewind live state.
-		if target < 0 {
-			target = 0
-		}
 		if target > len(c.tokens) {
-			target = len(c.tokens)
+			return false
 		}
 		c.tokens = c.tokens[:target]
 		return true
 	}
 	s := snapshot.(*fakeSnapshot)
-	if len(c.tokens) < s.from {
-		return false // don't have base data up to snapshot start
+	if target > s.to || len(c.tokens) < s.from {
+		return false
 	}
 	c.tokens = append(c.tokens[:s.from], s.tokens...)
 	if target < len(c.tokens) {
@@ -196,9 +196,13 @@ func (c *fakeSlidingWindowCache) Snapshot(fromOffset int) cache.Snapshot {
 }

 func (c *fakeSlidingWindowCache) Restore(snapshot cache.Snapshot, target int) bool {
+	if target < 0 {
+		return false
+	}
+
 	if snapshot == nil {
-		if target == len(c.tokens) {
-			return true
+		if target >= len(c.tokens) {
+			return target == len(c.tokens)
 		}
 		// Live rewind only works when buffer hasn't filled (offset <= maxSize).
 		if len(c.tokens) > c.maxSize {
@@ -208,6 +212,14 @@ func (c *fakeSlidingWindowCache) Restore(snapshot cache.Snapshot, target int) bo
 		return true
 	}
 	s := snapshot.(*fakeSnapshot)
+	if target > s.to {
+		return false
+	}
+	// Reject if clamping would leave an incomplete window
+	// (matches RotatingKVCache behavior).
+	if target < s.to && s.to > c.maxSize {
+		return false
+	}
 	c.tokens = slices.Clone(s.tokens)
 	if target < len(c.tokens) {
 		c.tokens = c.tokens[:target]
@@ -268,8 +280,8 @@ func (c *fakeRecurrentCache) Restore(snapshot cache.Snapshot, target int) bool {
 		return target == len(c.tokens) // can only no-op
 	}
 	s := snapshot.(*fakeSnapshot)
-	if target < s.to {
-		return false // can't go backward
+	if target != s.to {
+		return false // cumulative state requires exact match
 	}
 	c.tokens = slices.Clone(s.tokens)
 	return true
@@ -294,9 +306,10 @@ type feedableCache interface {

 // testEnv encapsulates a kvCache and its fake caches for a test scenario.
 type testEnv struct {
-	kvc     *kvCache
-	caches  []cache.Cache // typed references for assertions
-	tracker *snapshotTracker
+	kvc        *kvCache
+	caches     []cache.Cache // typed references for assertions
+	tracker    *snapshotTracker
+	rewindable bool // true when all caches support arbitrary Restore(nil, target)
 }

 // newTransformerEnv creates a test environment with a single rewindable cache
@@ -305,23 +318,28 @@ func newTransformerEnv() *testEnv {
 	tracker := &snapshotTracker{}
 	caches := []cache.Cache{&fakeRewindableCache{tracker: tracker}}
 	return &testEnv{
-		kvc:     &kvCache{caches: caches},
-		caches:  caches,
-		tracker: tracker,
+		kvc:        &kvCache{caches: caches},
+		caches:     caches,
+		tracker:    tracker,
+		rewindable: true,
 	}
 }

 // newSlidingWindowEnv creates a test environment with one rewindable cache and
-// one sliding window cache (Mistral-style architecture).
+// one sliding window cache (Mistral-style architecture). The sliding window
+// maxSize is set small enough that test sequences fill it, making
+// Restore(nil, target) fail — the same behavior as production models where
+// the window fills after a few turns.
 func newSlidingWindowEnv() *testEnv {
 	tr := &snapshotTracker{}
 	rc := &fakeRewindableCache{tracker: tr}
-	sw := &fakeSlidingWindowCache{maxSize: 32, tracker: tr}
+	sw := &fakeSlidingWindowCache{maxSize: 4, tracker: tr}
 	caches := []cache.Cache{rc, sw}
 	return &testEnv{
-		kvc:     &kvCache{caches: caches},
-		caches:  caches,
-		tracker: tr,
+		kvc:        &kvCache{caches: caches},
+		caches:     caches,
+		tracker:    tr,
+		rewindable: false,
 	}
 }

@@ -333,9 +351,10 @@ func newRecurrentEnv() *testEnv {
 	nrc := &fakeRecurrentCache{tracker: tr}
 	caches := []cache.Cache{rc, nrc}
 	return &testEnv{
-		kvc:     &kvCache{caches: caches},
-		caches:  caches,
-		tracker: tr,
+		kvc:        &kvCache{caches: caches},
+		caches:     caches,
+		tracker:    tr,
+		rewindable: false,
 	}
 }

@@ -590,15 +609,24 @@ func TestBranchCreationAndReuse(t *testing.T) {
 		}

 		// Request B: [1,2,3,4,5,10,11,12] — shares 5-token prefix with A.
-		// Partial match in A's edge triggers snapshotOffset.
+		// For rewindable caches, switchToPath rewinds to the match point
+		// so only the non-matching suffix needs evaluation. For non-rewindable
+		// caches (RecurrentCache), the rewind fails and freeAll fires.
 		resB := simulateRequest(t, kvc, []int32{1, 2, 3, 4, 5, 10, 11, 12}, []int32{30, 31})
-		if resB.snapshotOffset != 5 {
-			t.Fatalf("B: snapshotOffset = %d, want 5", resB.snapshotOffset)
-		}
-		// Cache was rewound to 0 (partial match truncates path to root),
-		// so all tokens were re-evaluated.
-		if len(resB.remaining) != 8 {
-			t.Fatalf("B: remaining = %d, want 8", len(resB.remaining))
+		if env.rewindable {
+			if resB.snapshotOffset != 0 {
+				t.Fatalf("B: snapshotOffset = %d, want 0 (rewind succeeded)", resB.snapshotOffset)
+			}
+			if len(resB.remaining) != 3 {
+				t.Fatalf("B: remaining = %d, want 3 (rewind to match point)", len(resB.remaining))
+			}
+		} else {
+			if resB.snapshotOffset != 5 {
+				t.Fatalf("B: snapshotOffset = %d, want 5", resB.snapshotOffset)
+			}
+			if len(resB.remaining) != 8 {
+				t.Fatalf("B: remaining = %d, want 8 (freeAll fallback)", len(resB.remaining))
+			}
 		}
 		env.assertAllTokens(t, "after B", []int32{1, 2, 3, 4, 5, 10, 11, 12, 30, 31})

@@ -635,14 +663,24 @@ func TestExactMatchSeedBehavior(t *testing.T) {
 		simulateRequest(t, kvc, []int32{1, 2, 3, 4, 5}, []int32{10, 11})

 		// Request B: identical prompt. Holdback means matched=4, partial in
-		// the 5-token edge, so path truncates to root and all tokens are
-		// re-evaluated. snapshotOffset should be set at the holdback point.
+		// the 5-token edge. For rewindable caches, switchToPath rewinds to
+		// offset 4, so only the held-back token needs re-evaluation. For
+		// non-rewindable caches, the rewind fails and freeAll fires.
 		resB := simulateRequest(t, kvc, []int32{1, 2, 3, 4, 5}, []int32{20, 21})
-		if len(resB.remaining) != 5 {
-			t.Fatalf("B: remaining = %d, want 5 (full re-eval due to holdback)", len(resB.remaining))
-		}
-		if resB.snapshotOffset != 4 {
-			t.Fatalf("B: snapshotOffset = %d, want 4", resB.snapshotOffset)
+		if env.rewindable {
+			if len(resB.remaining) != 1 {
+				t.Fatalf("B: remaining = %d, want 1 (rewind to holdback point)", len(resB.remaining))
+			}
+			if resB.snapshotOffset != 0 {
+				t.Fatalf("B: snapshotOffset = %d, want 0 (rewind succeeded)", resB.snapshotOffset)
+			}
+		} else {
+			if len(resB.remaining) != 5 {
+				t.Fatalf("B: remaining = %d, want 5 (freeAll fallback)", len(resB.remaining))
+			}
+			if resB.snapshotOffset != 4 {
+				t.Fatalf("B: snapshotOffset = %d, want 4", resB.snapshotOffset)
+			}
 		}
 		env.assertAllTokens(t, "after B", []int32{1, 2, 3, 4, 5, 20, 21})

--- a/x/mlxrunner/client.go
+++ b/x/mlxrunner/client.go
@@ -2,6 +2,7 @@ package mlxrunner

 import (
 	"bufio"
+	"bytes"
 	"context"
 	"encoding/json"
 	"errors"
@@ -36,14 +37,69 @@ type Client struct {
 	modelName     string
 	contextLength atomic.Int64
 	memory        atomic.Uint64
-	done          chan error
+	done          chan struct{}
+	doneErr       error // valid after done is closed
 	client        *http.Client
-	lastErr       string
-	lastErrLock   sync.Mutex
+	status        *statusWriter
 	mu            sync.Mutex
 	cmd           *exec.Cmd
 }

+// statusWriter captures the last stderr line from the subprocess while
+// forwarding all output to os.Stderr. Lines longer than maxStatusLen are
+// truncated to the first maxStatusLen bytes.
+type statusWriter struct {
+	lastErrMsg string
+	buf        []byte
+	discarding bool
+	mu         sync.Mutex
+	out        *os.File
+}
+
+const maxStatusLen = 256
+
+func (w *statusWriter) Write(b []byte) (int, error) {
+	n, err := w.out.Write(b)
+
+	w.mu.Lock()
+	defer w.mu.Unlock()
+
+	w.buf = append(w.buf, b...)
+	for {
+		i := bytes.IndexByte(w.buf, '\n')
+		if i < 0 {
+			break
+		}
+		if !w.discarding {
+			line := bytes.TrimSpace(w.buf[:i])
+			if len(line) > 0 {
+				if len(line) > maxStatusLen {
+					line = line[:maxStatusLen]
+				}
+				w.lastErrMsg = string(line)
+			}
+		}
+		w.buf = w.buf[i+1:]
+		w.discarding = false
+	}
+	// if the buffer grows past maxStatusLen without a newline, keep the front
+	if len(w.buf) > maxStatusLen {
+		if !w.discarding {
+			w.lastErrMsg = string(bytes.TrimSpace(w.buf[:maxStatusLen]))
+			w.discarding = true
+		}
+		w.buf = w.buf[:0]
+	}
+
+	return n, err
+}
+
+func (w *statusWriter) getLastErr() string {
+	w.mu.Lock()
+	defer w.mu.Unlock()
+	return w.lastErrMsg
+}
+
 // NewClient prepares a new MLX runner client for LLM models.
 // The subprocess is not started until Load() is called.
 func NewClient(modelName string) (*Client, error) {
@@ -53,7 +109,7 @@ func NewClient(modelName string) (*Client, error) {

 	c := &Client{
 		modelName: modelName,
-		done:      make(chan error, 1),
+		done:      make(chan struct{}),
 		client:    &http.Client{Timeout: 10 * time.Minute},
 	}

@@ -66,12 +122,6 @@ func NewClient(modelName string) (*Client, error) {
 	return c, nil
 }

-func (c *Client) getLastErr() string {
-	c.lastErrLock.Lock()
-	defer c.lastErrLock.Unlock()
-	return c.lastErr
-}
-
 // WaitUntilRunning waits for the subprocess to be ready.
 func (c *Client) WaitUntilRunning(ctx context.Context) error {
 	timeout := time.After(2 * time.Minute)
@@ -82,16 +132,14 @@ func (c *Client) WaitUntilRunning(ctx context.Context) error {
 		select {
 		case <-ctx.Done():
 			return ctx.Err()
-		case err := <-c.done:
-			errMsg := c.getLastErr()
-			if errMsg != "" {
-				return fmt.Errorf("mlx runner failed: %s (exit: %v)", errMsg, err)
+		case <-c.done:
+			if msg := c.status.getLastErr(); msg != "" {
+				return fmt.Errorf("mlx runner failed: %s (exit: %v)", msg, c.doneErr)
 			}
-			return fmt.Errorf("mlx runner exited unexpectedly: %w", err)
+			return fmt.Errorf("mlx runner exited unexpectedly: %w", c.doneErr)
 		case <-timeout:
-			errMsg := c.getLastErr()
-			if errMsg != "" {
-				return fmt.Errorf("timeout waiting for mlx runner: %s", errMsg)
+			if msg := c.status.getLastErr(); msg != "" {
+				return fmt.Errorf("timeout waiting for mlx runner: %s", msg)
 			}
 			return errors.New("timeout waiting for mlx runner to start")
 		case <-ticker.C:
@@ -182,6 +230,9 @@ func (c *Client) Completion(ctx context.Context, req llm.CompletionRequest, fn f

 	resp, err := c.client.Do(httpReq)
 	if err != nil {
+		if errMsg := c.status.getLastErr(); errMsg != "" {
+			return fmt.Errorf("mlx runner failed: %s", errMsg)
+		}
 		return err
 	}
 	defer resp.Body.Close()
@@ -219,7 +270,13 @@ func (c *Client) Completion(ctx context.Context, req llm.CompletionRequest, fn f
 		}
 	}

-	return scanner.Err()
+	if err := scanner.Err(); err != nil {
+		if errMsg := c.status.getLastErr(); errMsg != "" {
+			return fmt.Errorf("mlx runner failed: %s", errMsg)
+		}
+		return err
+	}
+	return nil
 }

 func (c *Client) ContextLength() int {
@@ -348,18 +405,13 @@ func (c *Client) Load(ctx context.Context, _ ml.SystemInfo, gpus []ml.DeviceInfo
 	// Forward subprocess stdout/stderr to server logs
 	stdout, _ := cmd.StdoutPipe()
 	stderr, _ := cmd.StderrPipe()
+	status := &statusWriter{out: os.Stderr}
+	c.status = status
 	go func() {
 		io.Copy(os.Stderr, stdout) //nolint:errcheck
 	}()
 	go func() {
-		scanner := bufio.NewScanner(stderr)
-		for scanner.Scan() {
-			line := scanner.Text()
-			fmt.Fprintln(os.Stderr, line)
-			c.lastErrLock.Lock()
-			c.lastErr = line
-			c.lastErrLock.Unlock()
-		}
+		io.Copy(status, stderr) //nolint:errcheck
 	}()

 	slog.Info("starting mlx runner subprocess", "model", c.modelName, "port", c.port)
@@ -369,8 +421,8 @@ func (c *Client) Load(ctx context.Context, _ ml.SystemInfo, gpus []ml.DeviceInfo

 	// Reap subprocess when it exits
 	go func() {
-		err := cmd.Wait()
-		c.done <- err
+		c.doneErr = cmd.Wait()
+		close(c.done)
 	}()

 	return nil, nil
--- a/x/mlxrunner/mlx/CMakeLists.txt
+++ b/x/mlxrunner/mlx/CMakeLists.txt
@@ -15,7 +15,9 @@ set(CMAKE_INSTALL_RPATH "@loader_path")

 include(FetchContent)

-set(MLX_C_GIT_TAG "v0.5.0" CACHE STRING "")
+# Read MLX-C version from top-level file (shared with imagegen CMakeLists)
+file(READ "${CMAKE_SOURCE_DIR}/MLX_C_VERSION" MLX_C_GIT_TAG)
+string(STRIP "${MLX_C_GIT_TAG}" MLX_C_GIT_TAG)

 FetchContent_Declare(
  mlx-c
--- a/x/mlxrunner/mlx/array.go
+++ b/x/mlxrunner/mlx/array.go
@@ -137,6 +137,9 @@ func Unpin(s ...*Array) {
 	for _, t := range s {
 		if t != nil {
 			t.pinned--
+			if t.pinned < 0 {
+				panic(fmt.Sprintf("mlx.Unpin: negative pin count on array %q", t.name))
+			}
 		}
 	}
 }
@@ -259,9 +262,11 @@ func LogArrays() {
 		return arrays[i].NumBytes() > arrays[j].NumBytes()
 	})

+	var total int
 	for _, t := range arrays {
 		nb := t.NumBytes()
-		logutil.Trace(fmt.Sprintf("tensor %-60s %5s %5s %v", t.name, t.DType(), PrettyBytes(nb), t.Dims()))
+		total += nb
+		logutil.Trace(fmt.Sprintf("tensor %-60s %5s %5s pinned=%d %v", t.name, t.DType(), PrettyBytes(nb), t.pinned, t.Dims()))
 	}
-	logutil.Trace(fmt.Sprintf("tensors total: %d, size: %s", len(arrays), PrettyBytes(ActiveMemory())))
+	logutil.Trace(fmt.Sprintf("tensors total: %d, size: %s, active: %s", len(arrays), PrettyBytes(total), PrettyBytes(ActiveMemory())))
 }
--- a/x/mlxrunner/mlx/gated_delta.go
+++ b/x/mlxrunner/mlx/gated_delta.go
@@ -13,6 +13,10 @@ var (
 	gatedDeltaMetalKernelOnce sync.Once
 	gatedDeltaMetalKernel     C.mlx_fast_metal_kernel
 	gatedDeltaMetalDisabled   bool
+
+	gatedDeltaCUDAKernelOnce sync.Once
+	gatedDeltaCUDAKernel     C.mlx_fast_cuda_kernel
+	gatedDeltaCUDADisabled   bool
 )

 const gatedDeltaMetalKernelSource = `
@@ -83,6 +87,86 @@ for (int i = 0; i < n_per_t; ++i) {
 }
 `

+const gatedDeltaCUDAKernelSource = `
+auto tid_x = threadIdx.x;
+auto tid_y = threadIdx.y;
+auto grid_y = blockIdx.y * blockDim.y + tid_y;
+auto grid_z = blockIdx.z;
+
+int T_val = static_cast<int>(*T);
+
+auto n = grid_z;
+auto b_idx = n / Hv;
+auto hv_idx = n % Hv;
+auto hk_idx = hv_idx / (Hv / Hk);
+constexpr int n_per_t = Dk / 32;
+
+// q, k: [B, T, Hk, Dk]
+auto q_ = q + b_idx * T_val * Hk * Dk + hk_idx * Dk;
+auto k_ = k + b_idx * T_val * Hk * Dk + hk_idx * Dk;
+
+// v, y: [B, T, Hv, Dv]
+auto dv_idx = grid_y;
+auto v_ = v + b_idx * T_val * Hv * Dv + hv_idx * Dv;
+y += b_idx * T_val * Hv * Dv + hv_idx * Dv;
+
+auto dk_idx = tid_x;
+
+// state_in, state_out: [B, Hv, Dv, Dk]
+auto i_state = state_in + (n * Dv + dv_idx) * Dk;
+auto o_state = state_out + (n * Dv + dv_idx) * Dk;
+
+float state[n_per_t];
+for (int i = 0; i < n_per_t; ++i) {
+  auto s_idx = n_per_t * dk_idx + i;
+  state[i] = static_cast<float>(i_state[s_idx]);
+}
+
+// g: [B, T, Hv]
+auto g_ = g + b_idx * T_val * Hv;
+auto beta_ = beta + b_idx * T_val * Hv;
+
+for (int t = 0; t < T_val; ++t) {
+  float kv_mem = 0.0f;
+  for (int i = 0; i < n_per_t; ++i) {
+    auto s_idx = n_per_t * dk_idx + i;
+    state[i] = state[i] * static_cast<float>(g_[hv_idx]);
+    kv_mem += state[i] * static_cast<float>(k_[s_idx]);
+  }
+  // Warp reduction (full warp, 32 threads in x)
+  for (int offset = 16; offset > 0; offset >>= 1)
+    kv_mem += __shfl_down_sync(0xffffffff, kv_mem, offset);
+  kv_mem = __shfl_sync(0xffffffff, kv_mem, 0);
+
+  auto delta = (static_cast<float>(v_[dv_idx]) - kv_mem) * static_cast<float>(beta_[hv_idx]);
+
+  float out = 0.0f;
+  for (int i = 0; i < n_per_t; ++i) {
+    auto s_idx = n_per_t * dk_idx + i;
+    state[i] = state[i] + static_cast<float>(k_[s_idx]) * delta;
+    out += state[i] * static_cast<float>(q_[s_idx]);
+  }
+  // Warp reduction
+  for (int offset = 16; offset > 0; offset >>= 1)
+    out += __shfl_down_sync(0xffffffff, out, offset);
+  if (tid_x == 0) {
+    y[dv_idx] = static_cast<InT>(out);
+  }
+
+  q_ += Hk * Dk;
+  k_ += Hk * Dk;
+  v_ += Hv * Dv;
+  y += Hv * Dv;
+  g_ += Hv;
+  beta_ += Hv;
+}
+
+for (int i = 0; i < n_per_t; ++i) {
+  auto s_idx = n_per_t * dk_idx + i;
+  o_state[s_idx] = static_cast<InT>(state[i]);
+}
+`
+
 func cStringVector(values []string) (C.mlx_vector_string, func(), bool) {
 	vec := C.mlx_vector_string_new()
 	ok := true
@@ -352,11 +436,184 @@ func gatedDeltaFallback(q, k, v, g, beta, state *Array) (y, nextState *Array) {
 	return Concatenate(outs, 1), nextState
 }

+func initGatedDeltaCUDAKernel() {
+	var cudaAvail C.bool
+	if C.mlx_cuda_is_available(&cudaAvail) != 0 || !bool(cudaAvail) {
+		gatedDeltaCUDADisabled = true
+		return
+	}
+
+	inputs, freeInputs, ok := cStringVector([]string{"q", "k", "v", "g", "beta", "state_in", "T"})
+	if !ok {
+		gatedDeltaCUDADisabled = true
+		freeInputs()
+		return
+	}
+	defer freeInputs()
+
+	outputs, freeOutputs, ok := cStringVector([]string{"y", "state_out"})
+	if !ok {
+		gatedDeltaCUDADisabled = true
+		freeOutputs()
+		return
+	}
+	defer freeOutputs()
+
+	cName := C.CString("gated_delta_step")
+	defer C.free(unsafe.Pointer(cName))
+	cSource := C.CString(gatedDeltaCUDAKernelSource)
+	defer C.free(unsafe.Pointer(cSource))
+	cHeader := C.CString("")
+	defer C.free(unsafe.Pointer(cHeader))
+
+	gatedDeltaCUDAKernel = C.mlx_fast_cuda_kernel_new(
+		cName,
+		inputs,
+		outputs,
+		cSource,
+		cHeader,
+		C.bool(true),
+		C.int(0),
+	)
+}
+
+func gatedDeltaCUDAKernelApply(q, k, v, g, beta, state *Array) (y, nextState *Array, ok bool) {
+	if gatedDeltaCUDADisabled {
+		return nil, nil, false
+	}
+	if q == nil || k == nil || v == nil || g == nil || beta == nil || state == nil {
+		return nil, nil, false
+	}
+
+	qd := q.Dims()
+	kd := k.Dims()
+	vd := v.Dims()
+	gd := g.Dims()
+	bd := beta.Dims()
+	sd := state.Dims()
+	if len(qd) != 4 || len(kd) != 4 || len(vd) != 4 || len(gd) != 3 || len(bd) != 3 || len(sd) != 4 {
+		return nil, nil, false
+	}
+
+	B, T, Hk, Dk := qd[0], qd[1], qd[2], qd[3]
+	if T <= 0 || Hk <= 0 || Dk <= 0 || Dk%32 != 0 {
+		return nil, nil, false
+	}
+	if kd[0] != B || kd[1] != T || kd[2] != Hk || kd[3] != Dk {
+		return nil, nil, false
+	}
+	Hv, Dv := vd[2], vd[3]
+	if vd[0] != B || vd[1] != T || Hv <= 0 || Dv <= 0 || Hv%Hk != 0 {
+		return nil, nil, false
+	}
+	if gd[0] != B || gd[1] != T || gd[2] != Hv {
+		return nil, nil, false
+	}
+	if bd[0] != B || bd[1] != T || bd[2] != Hv {
+		return nil, nil, false
+	}
+	if sd[0] != B || sd[1] != Hv || sd[2] != Dv || sd[3] != Dk {
+		return nil, nil, false
+	}
+
+	dtype := q.DType()
+	if k.DType() != dtype || v.DType() != dtype || g.DType() != dtype || beta.DType() != dtype || state.DType() != dtype {
+		return nil, nil, false
+	}
+
+	gatedDeltaCUDAKernelOnce.Do(initGatedDeltaCUDAKernel)
+	if gatedDeltaCUDADisabled {
+		return nil, nil, false
+	}
+
+	cfg := C.mlx_fast_cuda_kernel_config_new()
+	defer C.mlx_fast_cuda_kernel_config_free(cfg)
+
+	cInT := C.CString("InT")
+	defer C.free(unsafe.Pointer(cInT))
+	if C.mlx_fast_cuda_kernel_config_add_template_arg_dtype(cfg, cInT, C.mlx_dtype(dtype)) != 0 {
+		gatedDeltaCUDADisabled = true
+		return nil, nil, false
+	}
+	for _, tpl := range []struct {
+		name  string
+		value int
+	}{
+		{name: "Dk", value: Dk},
+		{name: "Dv", value: Dv},
+		{name: "Hk", value: Hk},
+		{name: "Hv", value: Hv},
+	} {
+		cn := C.CString(tpl.name)
+		rc := C.mlx_fast_cuda_kernel_config_add_template_arg_int(cfg, cn, C.int(tpl.value))
+		C.free(unsafe.Pointer(cn))
+		if rc != 0 {
+			gatedDeltaCUDADisabled = true
+			return nil, nil, false
+		}
+	}
+
+	yShape := []C.int{C.int(B), C.int(T), C.int(Hv), C.int(Dv)}
+	stateShape := []C.int{C.int(B), C.int(Hv), C.int(Dv), C.int(Dk)}
+	if C.mlx_fast_cuda_kernel_config_add_output_arg(cfg, unsafe.SliceData(yShape), C.size_t(len(yShape)), C.mlx_dtype(dtype)) != 0 {
+		gatedDeltaCUDADisabled = true
+		return nil, nil, false
+	}
+	if C.mlx_fast_cuda_kernel_config_add_output_arg(cfg, unsafe.SliceData(stateShape), C.size_t(len(stateShape)), C.mlx_dtype(dtype)) != 0 {
+		gatedDeltaCUDADisabled = true
+		return nil, nil, false
+	}
+	if C.mlx_fast_cuda_kernel_config_set_grid(cfg, 32, C.int(Dv), C.int(B*Hv)) != 0 {
+		gatedDeltaCUDADisabled = true
+		return nil, nil, false
+	}
+	threadY := Dv
+	if threadY > 4 {
+		threadY = 4
+	}
+	if C.mlx_fast_cuda_kernel_config_set_thread_group(cfg, 32, C.int(threadY), 1) != 0 {
+		gatedDeltaCUDADisabled = true
+		return nil, nil, false
+	}
+
+	tScalar := FromValue(T)
+	inputs := []C.mlx_array{
+		q.ctx,
+		k.ctx,
+		v.ctx,
+		g.ctx,
+		beta.ctx,
+		state.ctx,
+		tScalar.ctx,
+	}
+	inVec := C.mlx_vector_array_new_data(unsafe.SliceData(inputs), C.size_t(len(inputs)))
+	defer C.mlx_vector_array_free(inVec)
+
+	outVec := C.mlx_vector_array_new()
+	defer C.mlx_vector_array_free(outVec)
+	if C.mlx_fast_cuda_kernel_apply(&outVec, gatedDeltaCUDAKernel, inVec, cfg, DefaultStream().ctx) != 0 {
+		gatedDeltaCUDADisabled = true
+		return nil, nil, false
+	}
+	if int(C.mlx_vector_array_size(outVec)) < 2 {
+		return nil, nil, false
+	}
+
+	y = New("GATED_DELTA_CUDA_Y")
+	nextState = New("GATED_DELTA_CUDA_STATE")
+	C.mlx_vector_array_get(&y.ctx, outVec, 0)
+	C.mlx_vector_array_get(&nextState.ctx, outVec, 1)
+	return y, nextState, true
+}
+
 // GatedDelta runs the recurrent update operation.
 //
-// It uses the fused Metal kernel when available and otherwise falls back to a
+// It tries the fused CUDA kernel first, then Metal, then falls back to a
 // backend-agnostic MLX implementation with identical inputs/outputs.
 func GatedDelta(q, k, v, g, beta, state *Array) (y, nextState *Array) {
+	if y, nextState, ok := gatedDeltaCUDAKernelApply(q, k, v, g, beta, state); ok {
+		return y, nextState
+	}
 	if y, nextState, ok := gatedDeltaKernel(q, k, v, g, beta, state); ok {
 		return y, nextState
 	}
--- a/x/mlxrunner/mlx/generated.c
+++ b/x/mlxrunner/mlx/generated.c
@@ -326,8 +326,10 @@ int (*mlx_distributed_sum_scatter_)(
 int (*mlx_distributed_group_rank_)(mlx_distributed_group group) = NULL;
 int (*mlx_distributed_group_size_)(mlx_distributed_group group) = NULL;
 mlx_distributed_group (*mlx_distributed_group_split_)(mlx_distributed_group group, int color, int key) = NULL;
-bool (*mlx_distributed_is_available_)(void) = NULL;
-mlx_distributed_group (*mlx_distributed_init_)(bool strict) = NULL;
+bool (*mlx_distributed_is_available_)(const char* bk /* may be null */) = NULL;
+mlx_distributed_group (*mlx_distributed_init_)(
+    bool strict,
+    const char* bk /* may be null */) = NULL;
 void (*mlx_set_error_handler_)(
    mlx_error_handler_func handler,
    void* data,
@@ -924,6 +926,7 @@ int (*mlx_astype_)(
 int (*mlx_atleast_1d_)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
 int (*mlx_atleast_2d_)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
 int (*mlx_atleast_3d_)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
+int (*mlx_bartlett_)(mlx_array* res, int M, const mlx_stream s) = NULL;
 int (*mlx_bitwise_and_)(
    mlx_array* res,
    const mlx_array a,
@@ -940,6 +943,7 @@ int (*mlx_bitwise_xor_)(
    const mlx_array a,
    const mlx_array b,
    const mlx_stream s) = NULL;
+int (*mlx_blackman_)(mlx_array* res, int M, const mlx_stream s) = NULL;
 int (*mlx_block_masked_mm_)(
    mlx_array* res,
    const mlx_array a,
@@ -1120,6 +1124,7 @@ int (*mlx_dequantize_)(
    mlx_optional_int group_size,
    mlx_optional_int bits,
    const char* mode,
+    const mlx_array global_scale /* may be null */,
    mlx_optional_dtype dtype,
    const mlx_stream s) = NULL;
 int (*mlx_diag_)(mlx_array* res, const mlx_array a, int k, const mlx_stream s) = NULL;
@@ -1256,6 +1261,8 @@ int (*mlx_hadamard_transform_)(
    const mlx_array a,
    mlx_optional_float scale,
    const mlx_stream s) = NULL;
+int (*mlx_hamming_)(mlx_array* res, int M, const mlx_stream s) = NULL;
+int (*mlx_hanning_)(mlx_array* res, int M, const mlx_stream s) = NULL;
 int (*mlx_identity_)(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s) = NULL;
 int (*mlx_imag_)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
 int (*mlx_inner_)(
@@ -1548,6 +1555,8 @@ int (*mlx_qqmm_)(
    mlx_optional_int group_size,
    mlx_optional_int bits,
    const char* mode,
+    const mlx_array global_scale_x /* may be null */,
+    const mlx_array global_scale_w /* may be null */,
    const mlx_stream s) = NULL;
 int (*mlx_quantize_)(
    mlx_vector_array* res,
@@ -1555,6 +1564,7 @@ int (*mlx_quantize_)(
    mlx_optional_int group_size,
    mlx_optional_int bits,
    const char* mode,
+    const mlx_array global_scale /* may be null */,
    const mlx_stream s) = NULL;
 int (*mlx_quantized_matmul_)(
    mlx_array* res,
@@ -2550,10 +2560,12 @@ int mlx_dynamic_load_symbols(mlx_dynamic_handle handle) {
    CHECK_LOAD(handle, mlx_atleast_1d);
    CHECK_LOAD(handle, mlx_atleast_2d);
    CHECK_LOAD(handle, mlx_atleast_3d);
+    CHECK_LOAD(handle, mlx_bartlett);
    CHECK_LOAD(handle, mlx_bitwise_and);
    CHECK_LOAD(handle, mlx_bitwise_invert);
    CHECK_LOAD(handle, mlx_bitwise_or);
    CHECK_LOAD(handle, mlx_bitwise_xor);
+    CHECK_LOAD(handle, mlx_blackman);
    CHECK_LOAD(handle, mlx_block_masked_mm);
    CHECK_LOAD(handle, mlx_broadcast_arrays);
    CHECK_LOAD(handle, mlx_broadcast_to);
@@ -2606,6 +2618,8 @@ int mlx_dynamic_load_symbols(mlx_dynamic_handle handle) {
    CHECK_LOAD(handle, mlx_greater);
    CHECK_LOAD(handle, mlx_greater_equal);
    CHECK_LOAD(handle, mlx_hadamard_transform);
+    CHECK_LOAD(handle, mlx_hamming);
+    CHECK_LOAD(handle, mlx_hanning);
    CHECK_LOAD(handle, mlx_identity);
    CHECK_LOAD(handle, mlx_imag);
    CHECK_LOAD(handle, mlx_inner);
--- a/x/mlxrunner/mlx/generated.h
+++ b/x/mlxrunner/mlx/generated.h
@@ -300,10 +300,12 @@
 #define mlx_atleast_1d mlx_atleast_1d_mlx_gen_orig_
 #define mlx_atleast_2d mlx_atleast_2d_mlx_gen_orig_
 #define mlx_atleast_3d mlx_atleast_3d_mlx_gen_orig_
+#define mlx_bartlett mlx_bartlett_mlx_gen_orig_
 #define mlx_bitwise_and mlx_bitwise_and_mlx_gen_orig_
 #define mlx_bitwise_invert mlx_bitwise_invert_mlx_gen_orig_
 #define mlx_bitwise_or mlx_bitwise_or_mlx_gen_orig_
 #define mlx_bitwise_xor mlx_bitwise_xor_mlx_gen_orig_
+#define mlx_blackman mlx_blackman_mlx_gen_orig_
 #define mlx_block_masked_mm mlx_block_masked_mm_mlx_gen_orig_
 #define mlx_broadcast_arrays mlx_broadcast_arrays_mlx_gen_orig_
 #define mlx_broadcast_to mlx_broadcast_to_mlx_gen_orig_
@@ -356,6 +358,8 @@
 #define mlx_greater mlx_greater_mlx_gen_orig_
 #define mlx_greater_equal mlx_greater_equal_mlx_gen_orig_
 #define mlx_hadamard_transform mlx_hadamard_transform_mlx_gen_orig_
+#define mlx_hamming mlx_hamming_mlx_gen_orig_
+#define mlx_hanning mlx_hanning_mlx_gen_orig_
 #define mlx_identity mlx_identity_mlx_gen_orig_
 #define mlx_imag mlx_imag_mlx_gen_orig_
 #define mlx_inner mlx_inner_mlx_gen_orig_
@@ -889,10 +893,12 @@
 #undef mlx_atleast_1d
 #undef mlx_atleast_2d
 #undef mlx_atleast_3d
+#undef mlx_bartlett
 #undef mlx_bitwise_and
 #undef mlx_bitwise_invert
 #undef mlx_bitwise_or
 #undef mlx_bitwise_xor
+#undef mlx_blackman
 #undef mlx_block_masked_mm
 #undef mlx_broadcast_arrays
 #undef mlx_broadcast_to
@@ -945,6 +951,8 @@
 #undef mlx_greater
 #undef mlx_greater_equal
 #undef mlx_hadamard_transform
+#undef mlx_hamming
+#undef mlx_hanning
 #undef mlx_identity
 #undef mlx_imag
 #undef mlx_inner
@@ -1501,8 +1509,10 @@ extern int (*mlx_distributed_sum_scatter_)(
 extern int (*mlx_distributed_group_rank_)(mlx_distributed_group group);
 extern int (*mlx_distributed_group_size_)(mlx_distributed_group group);
 extern mlx_distributed_group (*mlx_distributed_group_split_)(mlx_distributed_group group, int color, int key);
-extern bool (*mlx_distributed_is_available_)(void);
-extern mlx_distributed_group (*mlx_distributed_init_)(bool strict);
+extern bool (*mlx_distributed_is_available_)(const char* bk /* may be null */);
+extern mlx_distributed_group (*mlx_distributed_init_)(
+    bool strict,
+    const char* bk /* may be null */);
 extern void (*mlx_set_error_handler_)(
    mlx_error_handler_func handler,
    void* data,
@@ -2099,6 +2109,7 @@ extern int (*mlx_astype_)(
 extern int (*mlx_atleast_1d_)(mlx_array* res, const mlx_array a, const mlx_stream s);
 extern int (*mlx_atleast_2d_)(mlx_array* res, const mlx_array a, const mlx_stream s);
 extern int (*mlx_atleast_3d_)(mlx_array* res, const mlx_array a, const mlx_stream s);
+extern int (*mlx_bartlett_)(mlx_array* res, int M, const mlx_stream s);
 extern int (*mlx_bitwise_and_)(
    mlx_array* res,
    const mlx_array a,
@@ -2115,6 +2126,7 @@ extern int (*mlx_bitwise_xor_)(
    const mlx_array a,
    const mlx_array b,
    const mlx_stream s);
+extern int (*mlx_blackman_)(mlx_array* res, int M, const mlx_stream s);
 extern int (*mlx_block_masked_mm_)(
    mlx_array* res,
    const mlx_array a,
@@ -2295,6 +2307,7 @@ extern int (*mlx_dequantize_)(
    mlx_optional_int group_size,
    mlx_optional_int bits,
    const char* mode,
+    const mlx_array global_scale /* may be null */,
    mlx_optional_dtype dtype,
    const mlx_stream s);
 extern int (*mlx_diag_)(mlx_array* res, const mlx_array a, int k, const mlx_stream s);
@@ -2431,6 +2444,8 @@ extern int (*mlx_hadamard_transform_)(
    const mlx_array a,
    mlx_optional_float scale,
    const mlx_stream s);
+extern int (*mlx_hamming_)(mlx_array* res, int M, const mlx_stream s);
+extern int (*mlx_hanning_)(mlx_array* res, int M, const mlx_stream s);
 extern int (*mlx_identity_)(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s);
 extern int (*mlx_imag_)(mlx_array* res, const mlx_array a, const mlx_stream s);
 extern int (*mlx_inner_)(
@@ -2723,6 +2738,8 @@ extern int (*mlx_qqmm_)(
    mlx_optional_int group_size,
    mlx_optional_int bits,
    const char* mode,
+    const mlx_array global_scale_x /* may be null */,
+    const mlx_array global_scale_w /* may be null */,
    const mlx_stream s);
 extern int (*mlx_quantize_)(
    mlx_vector_array* res,
@@ -2730,6 +2747,7 @@ extern int (*mlx_quantize_)(
    mlx_optional_int group_size,
    mlx_optional_int bits,
    const char* mode,
+    const mlx_array global_scale /* may be null */,
    const mlx_stream s);
 extern int (*mlx_quantized_matmul_)(
    mlx_array* res,
@@ -4033,11 +4051,13 @@ static inline int mlx_distributed_group_size(mlx_distributed_group group) {
 static inline mlx_distributed_group mlx_distributed_group_split(mlx_distributed_group group, int color, int key) {
    return mlx_distributed_group_split_(group, color, key);
 }
-static inline bool mlx_distributed_is_available(void) {
-    return mlx_distributed_is_available_();
+static inline bool mlx_distributed_is_available(const char* bk /* may be null */) {
+    return mlx_distributed_is_available_(bk);
 }
-static inline mlx_distributed_group mlx_distributed_init(bool strict) {
-    return mlx_distributed_init_(strict);
+static inline mlx_distributed_group mlx_distributed_init(
+    bool strict,
+    const char* bk /* may be null */) {
+    return mlx_distributed_init_(strict, bk);
 }
 static inline void mlx_set_error_handler(
    mlx_error_handler_func handler,
@@ -4939,6 +4959,9 @@ static inline int mlx_atleast_2d(mlx_array* res, const mlx_array a, const mlx_st
 static inline int mlx_atleast_3d(mlx_array* res, const mlx_array a, const mlx_stream s) {
    return mlx_atleast_3d_(res, a, s);
 }
+static inline int mlx_bartlett(mlx_array* res, int M, const mlx_stream s) {
+    return mlx_bartlett_(res, M, s);
+}
 static inline int mlx_bitwise_and(
    mlx_array* res,
    const mlx_array a,
@@ -4963,6 +4986,9 @@ static inline int mlx_bitwise_xor(
    const mlx_stream s) {
    return mlx_bitwise_xor_(res, a, b, s);
 }
+static inline int mlx_blackman(mlx_array* res, int M, const mlx_stream s) {
+    return mlx_blackman_(res, M, s);
+}
 static inline int mlx_block_masked_mm(
    mlx_array* res,
    const mlx_array a,
@@ -5193,9 +5219,10 @@ static inline int mlx_dequantize(
    mlx_optional_int group_size,
    mlx_optional_int bits,
    const char* mode,
+    const mlx_array global_scale /* may be null */,
    mlx_optional_dtype dtype,
    const mlx_stream s) {
-    return mlx_dequantize_(res, w, scales, biases, group_size, bits, mode, dtype, s);
+    return mlx_dequantize_(res, w, scales, biases, group_size, bits, mode, global_scale, dtype, s);
 }
 static inline int mlx_diag(mlx_array* res, const mlx_array a, int k, const mlx_stream s) {
    return mlx_diag_(res, a, k, s);
@@ -5383,6 +5410,12 @@ static inline int mlx_hadamard_transform(
    const mlx_stream s) {
    return mlx_hadamard_transform_(res, a, scale, s);
 }
+static inline int mlx_hamming(mlx_array* res, int M, const mlx_stream s) {
+    return mlx_hamming_(res, M, s);
+}
+static inline int mlx_hanning(mlx_array* res, int M, const mlx_stream s) {
+    return mlx_hanning_(res, M, s);
+}
 static inline int mlx_identity(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s) {
    return mlx_identity_(res, n, dtype, s);
 }
@@ -5793,8 +5826,10 @@ static inline int mlx_qqmm(
    mlx_optional_int group_size,
    mlx_optional_int bits,
    const char* mode,
+    const mlx_array global_scale_x /* may be null */,
+    const mlx_array global_scale_w /* may be null */,
    const mlx_stream s) {
-    return mlx_qqmm_(res, x, w, w_scales, group_size, bits, mode, s);
+    return mlx_qqmm_(res, x, w, w_scales, group_size, bits, mode, global_scale_x, global_scale_w, s);
 }
 static inline int mlx_quantize(
    mlx_vector_array* res,
@@ -5802,8 +5837,9 @@ static inline int mlx_quantize(
    mlx_optional_int group_size,
    mlx_optional_int bits,
    const char* mode,
+    const mlx_array global_scale /* may be null */,
    const mlx_stream s) {
-    return mlx_quantize_(res, w, group_size, bits, mode, s);
+    return mlx_quantize_(res, w, group_size, bits, mode, global_scale, s);
 }
 static inline int mlx_quantized_matmul(
    mlx_array* res,
--- a/x/mlxrunner/mlx/include/mlx/c/README.md
+++ b/x/mlxrunner/mlx/include/mlx/c/README.md
@@ -1,7 +1,7 @@
 # Vendored MLX-C Headers

 These header files are vendored from [mlx-c](https://github.com/ml-explore/mlx-c).
-The pinned version is in `MLX_VERSION` at the repo root.
+The pinned version is in `MLX_C_VERSION` at the repo root.

 Headers are automatically refreshed when you run a CMake build:

--- a/x/mlxrunner/mlx/include/mlx/c/distributed_group.h
+++ b/x/mlxrunner/mlx/include/mlx/c/distributed_group.h
@@ -42,12 +42,14 @@ mlx_distributed_group_split(mlx_distributed_group group, int color, int key);
 /**
 * Check if distributed is available.
 */
-bool mlx_distributed_is_available(void);
+bool mlx_distributed_is_available(const char* bk /* may be null */);

 /**
 * Initialize distributed.
 */
-mlx_distributed_group mlx_distributed_init(bool strict);
+mlx_distributed_group mlx_distributed_init(
+    bool strict,
+    const char* bk /* may be null */);

 /**@}*/

--- a/x/mlxrunner/mlx/include/mlx/c/ops.h
+++ b/x/mlxrunner/mlx/include/mlx/c/ops.h
@@ -166,6 +166,7 @@ int mlx_astype(
 int mlx_atleast_1d(mlx_array* res, const mlx_array a, const mlx_stream s);
 int mlx_atleast_2d(mlx_array* res, const mlx_array a, const mlx_stream s);
 int mlx_atleast_3d(mlx_array* res, const mlx_array a, const mlx_stream s);
+int mlx_bartlett(mlx_array* res, int M, const mlx_stream s);
 int mlx_bitwise_and(
    mlx_array* res,
    const mlx_array a,
@@ -182,6 +183,7 @@ int mlx_bitwise_xor(
    const mlx_array a,
    const mlx_array b,
    const mlx_stream s);
+int mlx_blackman(mlx_array* res, int M, const mlx_stream s);
 int mlx_block_masked_mm(
    mlx_array* res,
    const mlx_array a,
@@ -362,6 +364,7 @@ int mlx_dequantize(
    mlx_optional_int group_size,
    mlx_optional_int bits,
    const char* mode,
+    const mlx_array global_scale /* may be null */,
    mlx_optional_dtype dtype,
    const mlx_stream s);
 int mlx_diag(mlx_array* res, const mlx_array a, int k, const mlx_stream s);
@@ -498,6 +501,8 @@ int mlx_hadamard_transform(
    const mlx_array a,
    mlx_optional_float scale,
    const mlx_stream s);
+int mlx_hamming(mlx_array* res, int M, const mlx_stream s);
+int mlx_hanning(mlx_array* res, int M, const mlx_stream s);
 int mlx_identity(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s);
 int mlx_imag(mlx_array* res, const mlx_array a, const mlx_stream s);
 int mlx_inner(
@@ -790,6 +795,8 @@ int mlx_qqmm(
    mlx_optional_int group_size,
    mlx_optional_int bits,
    const char* mode,
+    const mlx_array global_scale_x /* may be null */,
+    const mlx_array global_scale_w /* may be null */,
    const mlx_stream s);
 int mlx_quantize(
    mlx_vector_array* res,
@@ -797,6 +804,7 @@ int mlx_quantize(
    mlx_optional_int group_size,
    mlx_optional_int bits,
    const char* mode,
+    const mlx_array global_scale /* may be null */,
    const mlx_stream s);
 int mlx_quantized_matmul(
    mlx_array* res,
--- a/x/mlxrunner/mlx/io.go
+++ b/x/mlxrunner/mlx/io.go
@@ -4,35 +4,91 @@ package mlx
 import "C"

 import (
+	"fmt"
 	"iter"
 	"runtime"
 	"unsafe"
 )

+// SafetensorsFile represents a loaded safetensors file.
+type SafetensorsFile struct {
+	arrays   C.mlx_map_string_to_array
+	metadata C.mlx_map_string_to_string
+}
+
+func loadSafetensorsStream() C.mlx_stream {
+	if runtime.GOOS == "darwin" {
+		return C.mlx_default_cpu_stream_new()
+	}
+	return C.mlx_default_gpu_stream_new()
+}
+
+// LoadSafetensorsNative loads a safetensors file using MLX's native loader.
+func LoadSafetensorsNative(path string) (*SafetensorsFile, error) {
+	var arrays C.mlx_map_string_to_array
+	var metadata C.mlx_map_string_to_string
+
+	cPath := C.CString(path)
+	defer C.free(unsafe.Pointer(cPath))
+
+	stream := loadSafetensorsStream()
+	defer C.mlx_stream_free(stream)
+
+	if C.mlx_load_safetensors(&arrays, &metadata, cPath, stream) != 0 {
+		return nil, fmt.Errorf("failed to load safetensors: %s", path)
+	}
+
+	return &SafetensorsFile{arrays: arrays, metadata: metadata}, nil
+}
+
+// Get retrieves a tensor by name.
+func (s *SafetensorsFile) Get(name string) *Array {
+	cName := C.CString(name)
+	defer C.free(unsafe.Pointer(cName))
+
+	value := C.mlx_array_new()
+	if C.mlx_map_string_to_array_get(&value, s.arrays, cName) != 0 {
+		return nil
+	}
+	if value.ctx == nil {
+		return nil
+	}
+
+	arr := New(name)
+	arr.ctx = value
+	return arr
+}
+
+// GetMetadata retrieves a metadata value by key.
+func (s *SafetensorsFile) GetMetadata(key string) string {
+	cKey := C.CString(key)
+	defer C.free(unsafe.Pointer(cKey))
+
+	var cValue *C.char
+	if C.mlx_map_string_to_string_get(&cValue, s.metadata, cKey) != 0 {
+		return ""
+	}
+	return C.GoString(cValue)
+}
+
+// Free releases the loaded safetensors maps.
+func (s *SafetensorsFile) Free() {
+	if s == nil {
+		return
+	}
+	C.mlx_map_string_to_array_free(s.arrays)
+	C.mlx_map_string_to_string_free(s.metadata)
+}
+
 func Load(path string) iter.Seq2[string, *Array] {
 	return func(yield func(string, *Array) bool) {
-		string2array := C.mlx_map_string_to_array_new()
-		defer C.mlx_map_string_to_array_free(string2array)
-
-		string2string := C.mlx_map_string_to_string_new()
-		defer C.mlx_map_string_to_string_free(string2string)
-
-		cPath := C.CString(path)
-		defer C.free(unsafe.Pointer(cPath))
-
-		// Use GPU stream so tensors load directly to GPU memory (CUDA has Load::eval_gpu).
-		// macOS Metal doesn't implement eval_gpu for Load, so fall back to CPU stream.
-		var stream C.mlx_stream
-		if runtime.GOOS == "darwin" {
-			stream = C.mlx_default_cpu_stream_new()
-		} else {
-			stream = C.mlx_default_gpu_stream_new()
+		sf, err := LoadSafetensorsNative(path)
+		if err != nil {
+			return
 		}
-		defer C.mlx_stream_free(stream)
+		defer sf.Free()

-		C.mlx_load_safetensors(&string2array, &string2string, cPath, stream)
-
-		it := C.mlx_map_string_to_array_iterator_new(string2array)
+		it := C.mlx_map_string_to_array_iterator_new(sf.arrays)
 		defer C.mlx_map_string_to_array_iterator_free(it)

 		for {
@@ -51,3 +107,43 @@ func Load(path string) iter.Seq2[string, *Array] {
 		}
 	}
 }
+
+// SaveSafetensors saves arrays to a safetensors file without metadata.
+func SaveSafetensors(path string, arrays map[string]*Array) error {
+	return SaveSafetensorsWithMetadata(path, arrays, nil)
+}
+
+// SaveSafetensorsWithMetadata saves arrays to a safetensors file with metadata.
+func SaveSafetensorsWithMetadata(path string, arrays map[string]*Array, metadata map[string]string) error {
+	cPath := C.CString(path)
+	defer C.free(unsafe.Pointer(cPath))
+
+	cArrays := C.mlx_map_string_to_array_new()
+	defer C.mlx_map_string_to_array_free(cArrays)
+
+	for name, arr := range arrays {
+		if arr == nil {
+			continue
+		}
+		cName := C.CString(name)
+		C.mlx_map_string_to_array_insert(cArrays, cName, arr.ctx)
+		C.free(unsafe.Pointer(cName))
+	}
+
+	cMetadata := C.mlx_map_string_to_string_new()
+	defer C.mlx_map_string_to_string_free(cMetadata)
+
+	for key, value := range metadata {
+		cKey := C.CString(key)
+		cValue := C.CString(value)
+		C.mlx_map_string_to_string_insert(cMetadata, cKey, cValue)
+		C.free(unsafe.Pointer(cKey))
+		C.free(unsafe.Pointer(cValue))
+	}
+
+	if C.mlx_save_safetensors(cPath, cArrays, cMetadata) != 0 {
+		return fmt.Errorf("failed to save safetensors: %s", path)
+	}
+
+	return nil
+}
--- a/x/mlxrunner/mlx/mlx.go
+++ b/x/mlxrunner/mlx/mlx.go
@@ -7,8 +7,44 @@ package mlx
 // #cgo LDFLAGS: -lstdc++
 // #cgo darwin LDFLAGS: -framework Foundation -framework Metal -framework Accelerate
 // #include "generated.h"
+// #include <string.h>
+//
+// static char _mlx_last_error_msg[1024] = {0};
+// static int  _mlx_last_error_flag = 0;
+//
+// static void _mlx_capture_error_handler(const char* msg, void* data) {
+//     (void)data;
+//     strncpy(_mlx_last_error_msg, msg, sizeof(_mlx_last_error_msg) - 1);
+//     _mlx_last_error_msg[sizeof(_mlx_last_error_msg) - 1] = '\0';
+//     _mlx_last_error_flag = 1;
+// }
+//
+// static void mlx_install_capture_handler(void) {
+//     if (mlx_set_error_handler_) {
+//         mlx_set_error_handler_(_mlx_capture_error_handler, NULL, NULL);
+//     }
+// }
+//
+// static void mlx_clear_last_error(void) {
+//     _mlx_last_error_flag = 0;
+//     _mlx_last_error_msg[0] = '\0';
+// }
+//
+// static int mlx_had_last_error(void) {
+//     return _mlx_last_error_flag;
+// }
+//
+// static const char* mlx_get_last_error(void) {
+//     return _mlx_last_error_flag ? _mlx_last_error_msg : NULL;
+// }
 import "C"

+func init() {
+	// Replace the default exit(-1) error handler with one that captures
+	// the error message so we can surface it in Go.
+	C.mlx_install_capture_handler()
+}
+
 // Version returns the MLX core library version string.
 func Version() string {
 	str := C.mlx_string_new()
@@ -31,10 +67,19 @@ func doEval(outputs []*Array, async bool) {
 		}
 	}

+	C.mlx_clear_last_error()
+	var rc C.int
 	if async {
-		C.mlx_async_eval(vector)
+		rc = C.mlx_async_eval(vector)
 	} else {
-		C.mlx_eval(vector)
+		rc = C.mlx_eval(vector)
+	}
+	if rc != 0 {
+		msg := "mlx eval failed"
+		if C.mlx_had_last_error() != 0 {
+			msg = C.GoString(C.mlx_get_last_error())
+		}
+		panic("mlx: " + msg)
 	}
 }

--- a/x/mlxrunner/mlx/ops_extra.go
+++ b/x/mlxrunner/mlx/ops_extra.go
@@ -17,7 +17,8 @@ func Quantize(w *Array, groupSize, bits int, mode string) (weights, scales, bias
 	optBits := C.mlx_optional_int{value: C.int(bits), has_value: true}
 	res := C.mlx_vector_array_new()
 	defer C.mlx_vector_array_free(res)
-	C.mlx_quantize(&res, w.ctx, optGroupSize, optBits, cMode, DefaultStream().ctx)
+	var globalScale C.mlx_array
+	C.mlx_quantize(&res, w.ctx, optGroupSize, optBits, cMode, globalScale, DefaultStream().ctx)

 	vecSize := int(C.mlx_vector_array_size(res))
 	w0 := New("QUANTIZE_W")
@@ -32,6 +33,18 @@ func Quantize(w *Array, groupSize, bits int, mode string) (weights, scales, bias
 	return w0, w1, nil
 }

+func FromFP8(x *Array, dtype DType) *Array {
+	out := New("FROM_FP8")
+	C.mlx_from_fp8(&out.ctx, x.ctx, C.mlx_dtype(dtype), DefaultStream().ctx)
+	return out
+}
+
+func ToFP8(x *Array) *Array {
+	out := New("TO_FP8")
+	C.mlx_to_fp8(&out.ctx, x.ctx, DefaultStream().ctx)
+	return out
+}
+
 func Dequantize(w, scales, biases *Array, groupSize, bits int, mode string) *Array {
 	cMode := C.CString(mode)
 	defer C.free(unsafe.Pointer(cMode))
@@ -45,7 +58,8 @@ func Dequantize(w, scales, biases *Array, groupSize, bits int, mode string) *Arr
 	}

 	out := New("DEQUANTIZE")
-	C.mlx_dequantize(&out.ctx, w.ctx, scales.ctx, b, optGroupSize, optBits, cMode, optDtype, DefaultStream().ctx)
+	var globalScale C.mlx_array
+	C.mlx_dequantize(&out.ctx, w.ctx, scales.ctx, b, optGroupSize, optBits, cMode, globalScale, optDtype, DefaultStream().ctx)
 	return out
 }

@@ -135,6 +149,40 @@ func Contiguous(a *Array, allowColMajor bool) *Array {
 	return out
 }

+func Pad(a *Array, paddings []int32) *Array {
+	numAxes := len(paddings) / 2
+	axes := make([]C.int, numAxes)
+	lowPad := make([]C.int, numAxes)
+	highPad := make([]C.int, numAxes)
+	for i := range numAxes {
+		axes[i] = C.int(i)
+		lowPad[i] = C.int(paddings[i*2])
+		highPad[i] = C.int(paddings[i*2+1])
+	}
+
+	padValue := C.mlx_array_new_float(C.float(0))
+	defer C.mlx_array_free(padValue)
+
+	cMode := C.CString("constant")
+	defer C.free(unsafe.Pointer(cMode))
+
+	out := New("PAD")
+	C.mlx_pad(
+		&out.ctx,
+		a.ctx,
+		unsafe.SliceData(axes),
+		C.size_t(len(axes)),
+		unsafe.SliceData(lowPad),
+		C.size_t(len(lowPad)),
+		unsafe.SliceData(highPad),
+		C.size_t(len(highPad)),
+		padValue,
+		cMode,
+		DefaultStream().ctx,
+	)
+	return out
+}
+
 func DepthwiseConv1d(x, weight *Array, bias *Array) *Array {
 	groups := int32(x.Dim(x.NumDims() - 1))
 	return Conv1d(x, weight, bias, 1, 0, 1, groups)
@@ -446,15 +494,6 @@ func Collect(v any) []*Array {
 	return arrays
 }

-func Copy(a *Array) *Array {
-	if a == nil || !a.Valid() {
-		return a
-	}
-	out := New("COPY")
-	C.mlx_copy(&out.ctx, a.ctx, DefaultStream().ctx)
-	return out
-}
-
 func collect(v reflect.Value, arrays *[]*Array, seen map[uintptr]bool) {
 	if !v.IsValid() {
 		return
--- a/x/mlxrunner/model/quant.go
+++ b/x/mlxrunner/model/quant.go
@@ -11,8 +11,10 @@ func QuantizationParams(quantization string) (groupSize, bits int, mode string)
 	switch strings.ToUpper(quantization) {
 	case "NVFP4":
 		return 16, 4, "nvfp4"
+	case "MXFP4":
+		return 32, 4, "mxfp4"
 	case "FP4", "Q4", "INT4":
-		return 32, 4, "affine"
+		return 64, 4, "affine"
 	case "MXFP8":
 		return 32, 8, "mxfp8"
 	case "FP8", "Q8", "INT8":
--- a/x/models/nn/nn_test.go
+++ b/x/models/nn/nn_test.go
@@ -144,3 +144,44 @@ func TestLayerNormDefaultEps(t *testing.T) {
 		}
 	}
 }
+
+func TestQuantizedLinearMXFP4MatchesDequantizedWeight(t *testing.T) {
+	skipIfNoMLX(t)
+
+	weightVals := make([]float32, 3*32)
+	for i := range weightVals {
+		weightVals[i] = float32((i%11)-5) / 7
+	}
+	inputVals := make([]float32, 2*32)
+	for i := range inputVals {
+		inputVals[i] = float32((i%7)-3) / 5
+	}
+
+	weight := mlx.FromValues(weightVals, 3, 32).AsType(mlx.DTypeBFloat16)
+	input := mlx.FromValues(inputVals, 2, 32).AsType(mlx.DTypeBFloat16)
+	mlx.Eval(weight, input)
+
+	ql := NewQuantizedLinear(weight, nil, 32, 4, "mxfp4")
+	if ql.QBiases != nil {
+		t.Fatalf("mxfp4 qbiases = %v, want nil", ql.QBiases)
+	}
+
+	dequantizedWeight := mlx.Dequantize(ql.Weight, ql.Scales, ql.QBiases, 32, 4, "mxfp4")
+	mlx.Eval(dequantizedWeight)
+
+	qOut := ql.Forward(input)
+	dOut := NewLinear(dequantizedWeight, nil).Forward(input)
+	mlx.Eval(qOut, dOut)
+
+	got := qOut.Floats()
+	want := dOut.Floats()
+	if len(got) != len(want) {
+		t.Fatalf("output length = %d, want %d", len(got), len(want))
+	}
+
+	for i := range got {
+		if !approxEqual(got[i], want[i], 1e-3) {
+			t.Fatalf("output[%d] = %.6f, want %.6f", i, got[i], want[i])
+		}
+	}
+}
--- a/x/models/qwen3_5/qwen3_5.go
+++ b/x/models/qwen3_5/qwen3_5.go
@@ -420,7 +420,16 @@ func tensorByBase(tensors map[string]*mlx.Array, base string) (*mlx.Array, strin
 }

 func supportsGatherQMM(mode string, bits int) bool {
-	return mode == "affine" && (bits == 4 || bits == 8)
+	switch mode {
+	case "affine":
+		return bits == 4 || bits == 8
+	case "mxfp8":
+		return bits == 8
+	case "nvfp4", "mxfp4":
+		return bits == 4
+	default:
+		return false
+	}
 }

 func freeTensorKeys(tensors map[string]*mlx.Array, keys ...string) {
--- a/x/models/qwen3_5/qwen3_5_test.go
+++ b/x/models/qwen3_5/qwen3_5_test.go
@@ -83,6 +83,28 @@ func TestLayerSelectionHelpers(t *testing.T) {
 	}
 }

+func TestSupportsGatherQMM(t *testing.T) {
+	tests := []struct {
+		mode string
+		bits int
+		want bool
+	}{
+		{mode: "affine", bits: 4, want: true},
+		{mode: "affine", bits: 8, want: true},
+		{mode: "mxfp8", bits: 8, want: true},
+		{mode: "nvfp4", bits: 4, want: true},
+		{mode: "mxfp4", bits: 4, want: true},
+		{mode: "mxfp8", bits: 4, want: false},
+		{mode: "affine", bits: 3, want: false},
+	}
+
+	for _, tt := range tests {
+		if got := supportsGatherQMM(tt.mode, tt.bits); got != tt.want {
+			t.Fatalf("supportsGatherQMM(%q, %d) = %v, want %v", tt.mode, tt.bits, got, tt.want)
+		}
+	}
+}
+
 func TestResolveTensorPathLayout(t *testing.T) {
 	dummy := mlx.New("dummy")
Author	SHA1	Message	Date
Eva Ho	7a2306087b	wip	2026-03-26 19:55:13 -04:00
Eva Ho	8b8bcf0952	launch: set default model as active selection in vscode copilot chat	2026-03-26 14:50:13 -04:00
Jesse Gross	d1151e18a1	mlx: fix KV cache snapshot memory leak mlx.Copy shares the backing buffer with its source (via copy_shared_buffer) rather than allocating independent storage. When used to snapshot a slice of the KV cache, the snapshot array holds the entire original cache buffer alive through the shared data pointer — even after eval detaches the computation graph. Replace Copy with Contiguous in Snapshot and Split. Contiguous allocates a compact buffer when the source buffer is significantly larger than the logical slice (Contiguous::eval checks buffer_size > nbytes + 16384), which is always the case for KV cache slices.	2026-03-25 17:26:34 -07:00
rick	ebbce136c7	ggml: force flash attention off for grok	2026-03-25 16:15:49 -07:00
Devon Rifkin	26b9f53f8e	api/show: overwrite basename for copilot chat (#15062 ) Copilot Chat prefers to use `general.basename` in the built-in Ollama integration, but this name isn't usually shown directly to users (and there may be many models that share this name). Instead we pass back `req.Model`, which for this extension is the value that we return from `/api/tags`	2026-03-25 14:02:22 -07:00
Eva H	7575438366	cmd: ollama launch vscode (#15060 ) Co-authored-by: Parth Sareen <parth.sareen@ollama.com>	2026-03-25 16:37:02 -04:00
Eva H	7d7c90d702	tui: add left arrow back navigation in model selector (#14940 )	2026-03-25 11:53:48 -07:00
Daniel Hiltgen	4fda69809a	ci: fix windows cgo compiler error (#15046 )	2026-03-24 16:45:36 -07:00
Daniel Hiltgen	c9b5da6b0c	integration: improve ability to test individual models (#14948 ) * integration: improve ability to test individual models Add OLLAMA_TEST_MODEL env var to run integration tests against a single model. Enhance vision tests: multi-turn chat with cached image tokens, object counting, spatial reasoning, detail recognition, scene understanding, OCR, and multi-image comparison. Add tool calling stress tests with complex agent-style prompts, large system messages, and multi-turn tool response handling. * review comments	2026-03-24 14:28:23 -07:00
Patrick Devine	de5cb7311f	mlx: add mxfp4/mxfp8/nvfp4 importing (#15015 ) This change allows importing bf16 and converting to mxfp4/mxfp8/nvfp4 and also importing fp8 and converting directly to mxfp8.	2026-03-24 13:45:44 -07:00
Jesse Gross	95ee7fbd29	mlxrunner: panic on double unpin	2026-03-23 17:44:19 -07:00
Jesse Gross	ec55536734	mlxrunner: show time since last used in cache dump tree	2026-03-23 17:44:19 -07:00
Jesse Gross	77491439c2	mlxrunner: support partial match on pure transformer caches Previously, a partial match within a node's edge would truncate the path to the parent snapshot - effectively making all cache types behave as recurrent caches. Caches with only transformer layers can rewind to arbitrary boundary so this restores this capability to improve cache hits	2026-03-23 17:44:19 -07:00
Parth Sareen	b166b36cd2	docs: update Claude Code with Telegram guide (#15026 )	2026-03-23 16:31:21 -07:00
Daniel Hiltgen	c2b0bb7a52	mlx: update as of 3/23 (#14789 ) * mlx: update to HEAD on 3/23 Also fixes a few misc vendoring bugs uncovered with this first update. This also renames the version files to make them clearer. * CUDA Fast Gated Delta kernel * mlx: detect eval errors and panic On model errors or missing kernels, don't mask the error, bubble it up.	2026-03-23 11:28:44 -07:00
Bruce MacDonald	22c2bdbd8a	docs: nemoclaw integration (#14962 ) --------- Co-authored-by: ParthSareen <parth.sareen@ollama.com>	2026-03-20 15:27:37 -07:00
Bruce MacDonald	6df6d097d9	launch: skip openclaw gateway health check when no daemon install (#14984 )	2026-03-20 15:20:14 -07:00
Jesse Gross	d7c176ab91	llm, mlxrunner: fix done channel value consumed by first receiver Receiving from a buffered chan error consumes the value, so only the first caller (WaitUntilRunning, HasExited, or Close) sees the signal. Subsequent receivers block or take the wrong branch. Replace with a closed chan struct{} which can be received from any number of times, and store the error in a separate field.	2026-03-19 17:44:28 -07:00
Jesse Gross	0ff7d724ff	mlx: fix subprocess log deadlock The stderr reader used bufio.Scanner which has a 64KB max line size. If the subprocess wrote a line exceeding this limit, the scanner would stop reading, the OS pipe buffer would fill, and the subprocess would deadlock. Replace the scanner with a statusWriter that wraps io.Copy. The writer forwards all stderr to os.Stderr while capturing the last short line (≤256 bytes) for error reporting, avoiding both the deadlock and the need to buffer arbitrarily long lines.	2026-03-19 17:44:28 -07:00
Devon Rifkin	46cb7795e1	add ability to turn on debug request logging (#14106 ) If `OLLAMA_DEBUG_LOG_REQUESTS` is set, then on server startup a temp folder will be created. Upon any inference request, the body will be logged to a file in this folder, as well as a small shell script to "replay" the request using cURL. This is just intended for debugging scenarios, not as something to turn on normally.	2026-03-19 17:08:17 -07:00