Compare commits

...

20 Commits

Author SHA1 Message Date
Eva Ho
7a2306087b wip 2026-03-26 19:55:13 -04:00
Eva Ho
8b8bcf0952 launch: set default model as active selection in vscode copilot chat 2026-03-26 14:50:13 -04:00
Jesse Gross
d1151e18a1 mlx: fix KV cache snapshot memory leak
mlx.Copy shares the backing buffer with its source (via
copy_shared_buffer) rather than allocating independent storage.
When used to snapshot a slice of the KV cache, the snapshot array
holds the entire original cache buffer alive through the shared
data pointer — even after eval detaches the computation graph.

Replace Copy with Contiguous in Snapshot and Split. Contiguous
allocates a compact buffer when the source buffer is significantly
larger than the logical slice (Contiguous::eval checks
buffer_size > nbytes + 16384), which is always the case for KV
cache slices.
2026-03-25 17:26:34 -07:00
rick
ebbce136c7 ggml: force flash attention off for grok 2026-03-25 16:15:49 -07:00
Devon Rifkin
26b9f53f8e api/show: overwrite basename for copilot chat (#15062)
Copilot Chat prefers to use `general.basename` in the built-in Ollama
integration, but this name isn't usually shown directly to users (and
there may be many models that share this name). Instead we pass back
`req.Model`, which for this extension is the value that we return from
`/api/tags`
2026-03-25 14:02:22 -07:00
Eva H
7575438366 cmd: ollama launch vscode (#15060)
Co-authored-by: Parth Sareen <parth.sareen@ollama.com>
2026-03-25 16:37:02 -04:00
Eva H
7d7c90d702 tui: add left arrow back navigation in model selector (#14940) 2026-03-25 11:53:48 -07:00
Daniel Hiltgen
4fda69809a ci: fix windows cgo compiler error (#15046) 2026-03-24 16:45:36 -07:00
Daniel Hiltgen
c9b5da6b0c integration: improve ability to test individual models (#14948)
* integration: improve ability to test individual models

Add OLLAMA_TEST_MODEL env var to run integration tests against a
single model.

Enhance vision tests: multi-turn chat with cached image tokens, object
counting, spatial reasoning, detail recognition, scene understanding, OCR, and
multi-image comparison.

Add tool calling stress tests with complex agent-style prompts, large
system messages, and multi-turn tool response handling.

* review comments
2026-03-24 14:28:23 -07:00
Patrick Devine
de5cb7311f mlx: add mxfp4/mxfp8/nvfp4 importing (#15015)
This change allows importing bf16 and converting to mxfp4/mxfp8/nvfp4
and also importing fp8 and converting directly to mxfp8.
2026-03-24 13:45:44 -07:00
Jesse Gross
95ee7fbd29 mlxrunner: panic on double unpin 2026-03-23 17:44:19 -07:00
Jesse Gross
ec55536734 mlxrunner: show time since last used in cache dump tree 2026-03-23 17:44:19 -07:00
Jesse Gross
77491439c2 mlxrunner: support partial match on pure transformer caches
Previously, a partial match within a node's edge would truncate the path
to the parent snapshot - effectively making all cache types behave as
recurrent caches. Caches with only transformer layers can rewind to
arbitrary boundary so this restores this capability to improve cache
hits
2026-03-23 17:44:19 -07:00
Parth Sareen
b166b36cd2 docs: update Claude Code with Telegram guide (#15026) 2026-03-23 16:31:21 -07:00
Daniel Hiltgen
c2b0bb7a52 mlx: update as of 3/23 (#14789)
* mlx: update to HEAD on 3/23

Also fixes a few misc vendoring bugs uncovered with this first update.
This also renames the version files to make them clearer.

* CUDA Fast Gated Delta kernel

* mlx: detect eval errors and panic

On model errors or missing kernels, don't mask the error, bubble it up.
2026-03-23 11:28:44 -07:00
Bruce MacDonald
22c2bdbd8a docs: nemoclaw integration (#14962)
---------

Co-authored-by: ParthSareen <parth.sareen@ollama.com>
2026-03-20 15:27:37 -07:00
Bruce MacDonald
6df6d097d9 launch: skip openclaw gateway health check when no daemon install (#14984) 2026-03-20 15:20:14 -07:00
Jesse Gross
d7c176ab91 llm, mlxrunner: fix done channel value consumed by first receiver
Receiving from a buffered chan error consumes the value, so only the
first caller (WaitUntilRunning, HasExited, or Close) sees the signal.
Subsequent receivers block or take the wrong branch. Replace with a
closed chan struct{} which can be received from any number of times,
and store the error in a separate field.
2026-03-19 17:44:28 -07:00
Jesse Gross
0ff7d724ff mlx: fix subprocess log deadlock
The stderr reader used bufio.Scanner which has a 64KB max line size.
If the subprocess wrote a line exceeding this limit, the scanner would
stop reading, the OS pipe buffer would fill, and the subprocess would
deadlock.

Replace the scanner with a statusWriter that wraps io.Copy. The writer
forwards all stderr to os.Stderr while capturing the last short line
(≤256 bytes) for error reporting, avoiding both the deadlock and the
need to buffer arbitrarily long lines.
2026-03-19 17:44:28 -07:00
Devon Rifkin
46cb7795e1 add ability to turn on debug request logging (#14106)
If `OLLAMA_DEBUG_LOG_REQUESTS` is set, then on server startup a temp
folder will be created. Upon any inference request, the body will be
logged to a file in this folder, as well as a small shell script to
"replay" the request using cURL.

This is just intended for debugging scenarios, not as something to turn
on normally.
2026-03-19 17:08:17 -07:00
73 changed files with 5504 additions and 510 deletions

View File

@@ -64,6 +64,7 @@ jobs:
container: nvidia/cuda:13.0.0-devel-ubuntu22.04
extra-packages: libcudnn9-dev-cuda-13 libopenblas-dev liblapack-dev liblapacke-dev git curl
flags: '-DCMAKE_CUDA_ARCHITECTURES=87 -DBLAS_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu -DLAPACK_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu'
install-go: true
runs-on: linux
container: ${{ matrix.container }}
steps:
@@ -90,6 +91,12 @@ jobs:
fi
env:
DEBIAN_FRONTEND: noninteractive
- if: matrix.install-go
name: Install Go
run: |
GO_VERSION=$(awk '/^go / { print $2 }' go.mod)
curl -fsSL "https://golang.org/dl/go${GO_VERSION}.linux-$(dpkg --print-architecture).tar.gz" | tar xz -C /usr/local
echo "/usr/local/go/bin" >> $GITHUB_PATH
- uses: actions/cache@v4
with:
path: /github/home/.cache/ccache

View File

@@ -157,7 +157,7 @@ COPY CMakeLists.txt CMakePresets.json .
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
COPY x/imagegen/mlx x/imagegen/mlx
COPY go.mod go.sum .
COPY MLX_VERSION MLX_CORE_VERSION .
COPY MLX_VERSION MLX_C_VERSION .
RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
ENV PATH=/usr/local/go/bin:$PATH
RUN go mod download

View File

@@ -1 +0,0 @@
v0.30.6

1
MLX_C_VERSION Normal file
View File

@@ -0,0 +1 @@
0726ca922fc902c4c61ef9c27d94132be418e945

View File

@@ -1 +1 @@
v0.5.0
38ad257088fb2193ad47e527cf6534a689f30943

View File

@@ -2065,6 +2065,10 @@ func runLauncherAction(cmd *cobra.Command, action tui.TUIAction, deps launcherDe
if err != nil {
return true, fmt.Errorf("launching %s: %w", action.Integration, err)
}
// VS Code is a GUI app — exit the TUI loop after launching
if action.Integration == "vscode" {
return false, nil
}
return true, nil
default:
return false, fmt.Errorf("unknown launcher action: %d", action.Kind)

View File

@@ -209,6 +209,43 @@ func TestRunLauncherAction_RunModelContinuesAfterCancellation(t *testing.T) {
}
}
func TestRunLauncherAction_VSCodeExitsTUILoop(t *testing.T) {
setCmdTestHome(t, t.TempDir())
cmd := &cobra.Command{}
cmd.SetContext(context.Background())
// VS Code should exit the TUI loop (return false) after a successful launch.
continueLoop, err := runLauncherAction(cmd, tui.TUIAction{Kind: tui.TUIActionLaunchIntegration, Integration: "vscode"}, launcherDeps{
resolveRunModel: unexpectedRunModelResolution(t),
launchIntegration: func(ctx context.Context, req launch.IntegrationLaunchRequest) error {
return nil
},
runModel: unexpectedModelLaunch(t),
})
if err != nil {
t.Fatalf("expected nil error, got %v", err)
}
if continueLoop {
t.Fatal("expected vscode launch to exit the TUI loop (return false)")
}
// Other integrations should continue the TUI loop (return true).
continueLoop, err = runLauncherAction(cmd, tui.TUIAction{Kind: tui.TUIActionLaunchIntegration, Integration: "claude"}, launcherDeps{
resolveRunModel: unexpectedRunModelResolution(t),
launchIntegration: func(ctx context.Context, req launch.IntegrationLaunchRequest) error {
return nil
},
runModel: unexpectedModelLaunch(t),
})
if err != nil {
t.Fatalf("expected nil error, got %v", err)
}
if !continueLoop {
t.Fatal("expected non-vscode integration to continue the TUI loop (return true)")
}
}
func TestRunLauncherAction_IntegrationContinuesAfterCancellation(t *testing.T) {
setCmdTestHome(t, t.TempDir())

View File

@@ -179,6 +179,7 @@ Supported integrations:
opencode OpenCode
openclaw OpenClaw (aliases: clawdbot, moltbot)
pi Pi
vscode    VS Code (aliases: code)
Examples:
ollama launch
@@ -801,13 +802,6 @@ func cloneAliases(aliases map[string]string) map[string]string {
return cloned
}
func singleModelPrechecked(current string) []string {
if current == "" {
return nil
}
return []string{current}
}
func firstModel(models []string) string {
if len(models) == 0 {
return ""

View File

@@ -80,6 +80,12 @@ func (c *Openclaw) Run(model string, args []string) error {
}
if canInstallDaemon() {
onboardArgs = append(onboardArgs, "--install-daemon")
} else {
// When we can't install a daemon (e.g. no systemd, sudo dropped
// XDG_RUNTIME_DIR, or container environment), skip the gateway
// health check so non-interactive onboarding completes. The
// gateway is started as a foreground child process after onboarding.
onboardArgs = append(onboardArgs, "--skip-health")
}
cmd := exec.Command(bin, onboardArgs...)
cmd.Stdin = os.Stdin

View File

@@ -33,7 +33,7 @@ type IntegrationInfo struct {
Description string
}
var launcherIntegrationOrder = []string{"opencode", "droid", "pi", "cline"}
var launcherIntegrationOrder = []string{"vscode", "opencode", "droid", "pi", "cline"}
var integrationSpecs = []*IntegrationSpec{
{
@@ -131,6 +131,18 @@ var integrationSpecs = []*IntegrationSpec{
Command: []string{"npm", "install", "-g", "@mariozechner/pi-coding-agent"},
},
},
{
Name: "vscode",
Runner: &VSCode{},
Aliases: []string{"code"},
Description: "Microsoft's open-source AI code editor",
Install: IntegrationInstallSpec{
CheckInstalled: func() bool {
return (&VSCode{}).findBinary() != ""
},
URL: "https://code.visualstudio.com",
},
},
}
var integrationSpecsByName map[string]*IntegrationSpec

660
cmd/launch/vscode.go Normal file
View File

@@ -0,0 +1,660 @@
package launch
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"time"
_ "github.com/mattn/go-sqlite3"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/cmd/internal/fileutil"
"github.com/ollama/ollama/envconfig"
)
// VSCode implements Runner and Editor for Visual Studio Code integration.
type VSCode struct{}
func (v *VSCode) String() string { return "Visual Studio Code" }
// findBinary returns the path/command to launch VS Code, or "" if not found.
// It checks for the "code" CLI on PATH first, then falls back to platform-specific locations.
func (v *VSCode) findBinary() string {
if _, err := exec.LookPath("code"); err == nil {
return "code"
}
var candidates []string
switch runtime.GOOS {
case "darwin":
candidates = []string{
"/Applications/Visual Studio Code.app",
}
case "windows":
if localAppData := os.Getenv("LOCALAPPDATA"); localAppData != "" {
candidates = append(candidates, filepath.Join(localAppData, "Programs", "Microsoft VS Code", "bin", "code.cmd"))
}
default: // linux
candidates = []string{
"/usr/bin/code",
"/snap/bin/code",
}
}
for _, c := range candidates {
if _, err := os.Stat(c); err == nil {
return c
}
}
return ""
}
// IsRunning reports whether VS Code is currently running.
// Each platform uses a pattern specific enough to avoid matching Cursor or
// other VS Code forks.
func (v *VSCode) IsRunning() bool {
switch runtime.GOOS {
case "darwin":
out, err := exec.Command("pgrep", "-f", "Visual Studio Code.app/Contents/MacOS/Code").Output()
return err == nil && len(out) > 0
case "windows":
// Match VS Code by executable path to avoid matching Cursor or other forks.
out, err := exec.Command("powershell", "-NoProfile", "-Command",
`Get-Process Code -ErrorAction SilentlyContinue | Where-Object { $_.Path -like '*Microsoft VS Code*' } | Select-Object -First 1`).Output()
return err == nil && len(strings.TrimSpace(string(out))) > 0
default:
// Match VS Code specifically by its install path to avoid matching
// Cursor (/cursor/) or other forks.
for _, pattern := range []string{"/usr/share/code/", "/snap/code/"} {
out, err := exec.Command("pgrep", "-f", pattern).Output()
if err == nil && len(out) > 0 {
return true
}
}
return false
}
}
// Quit gracefully quits VS Code and waits for it to exit so that it flushes
// its in-memory state back to the database.
func (v *VSCode) Quit() {
if !v.IsRunning() {
return
}
switch runtime.GOOS {
case "darwin":
_ = exec.Command("osascript", "-e", `quit app "Visual Studio Code"`).Run()
case "windows":
// Kill VS Code by executable path to avoid killing Cursor or other forks.
_ = exec.Command("powershell", "-NoProfile", "-Command",
`Get-Process Code -ErrorAction SilentlyContinue | Where-Object { $_.Path -like '*Microsoft VS Code*' } | Stop-Process -Force`).Run()
default:
for _, pattern := range []string{"/usr/share/code/", "/snap/code/"} {
_ = exec.Command("pkill", "-f", pattern).Run()
}
}
// Wait for the process to fully exit and flush its state to disk
// TODO(hoyyeva): update spinner to use bubble tea
spinnerFrames := []string{"|", "/", "-", "\\"}
frame := 0
fmt.Fprintf(os.Stderr, "\033[90mRestarting VS Code... %s\033[0m", spinnerFrames[0])
ticker := time.NewTicker(200 * time.Millisecond)
defer ticker.Stop()
for range 150 { // 150 ticks × 200ms = 30s timeout
<-ticker.C
frame++
fmt.Fprintf(os.Stderr, "\r\033[90mRestarting VS Code... %s\033[0m", spinnerFrames[frame%len(spinnerFrames)])
if frame%5 == 0 { // check every ~1s
if !v.IsRunning() {
fmt.Fprintf(os.Stderr, "\r\033[K")
// Give VS Code a moment to finish writing its state DB
time.Sleep(1 * time.Second)
return
}
}
}
fmt.Fprintf(os.Stderr, "\r\033[K")
}
const (
minCopilotChatVersion = "0.41.0"
minVSCodeVersion = "1.113"
)
func (v *VSCode) Run(model string, args []string) error {
v.checkVSCodeVersion()
v.checkCopilotChatVersion()
// Get all configured models (saved by the launcher framework before Run is called)
models := []string{model}
if cfg, err := loadStoredIntegrationConfig("vscode"); err == nil && len(cfg.Models) > 0 {
models = cfg.Models
}
// VS Code discovers models from ollama ls. Cloud models that pass Show
// (the server knows about them) but aren't in ls need to be pulled to
// register them so VS Code can find them.
if client, err := api.ClientFromEnvironment(); err == nil {
v.ensureModelsRegistered(context.Background(), client, models)
}
// Warn if the default model doesn't support tool calling
if client, err := api.ClientFromEnvironment(); err == nil {
if resp, err := client.Show(context.Background(), &api.ShowRequest{Model: models[0]}); err == nil {
hasTools := false
for _, c := range resp.Capabilities {
if c == "tools" {
hasTools = true
break
}
}
if !hasTools {
fmt.Fprintf(os.Stderr, "Note: %s does not support tool calling and may not appear in the Copilot Chat model picker.\n", models[0])
}
}
}
v.printModelAccessTip()
if v.IsRunning() {
restart, err := ConfirmPrompt("Restart VS Code?")
if err != nil {
restart = false
}
if restart {
v.Quit()
if err := v.ShowInModelPicker(models); err != nil {
fmt.Fprintf(os.Stderr, "%s Warning: could not update VS Code model picker: %v%s\n", ansiYellow, err, ansiReset)
}
v.FocusVSCode()
} else {
fmt.Fprintf(os.Stderr, "\nTo get the latest model configuration, restart VS Code when you're ready.\n")
}
} else {
if err := v.ShowInModelPicker(models); err != nil {
fmt.Fprintf(os.Stderr, "%s Warning: could not update VS Code model picker: %v%s\n", ansiYellow, err, ansiReset)
}
v.FocusVSCode()
}
return nil
}
// ensureModelsRegistered pulls models that the server knows about (Show succeeds)
// but aren't in ollama ls yet. This is needed for cloud models so that VS Code
// can discover them from the Ollama API.
func (v *VSCode) ensureModelsRegistered(ctx context.Context, client *api.Client, models []string) {
listed, err := client.List(ctx)
if err != nil {
return
}
registered := make(map[string]bool, len(listed.Models))
for _, m := range listed.Models {
registered[m.Name] = true
}
for _, model := range models {
if registered[model] {
continue
}
// Also check without :latest suffix
if !strings.Contains(model, ":") && registered[model+":latest"] {
continue
}
if err := pullModel(ctx, client, model, false); err != nil {
fmt.Fprintf(os.Stderr, "%s Warning: could not register model %s: %v%s\n", ansiYellow, model, err, ansiReset)
}
}
}
// FocusVSCode brings VS Code to the foreground.
func (v *VSCode) FocusVSCode() {
binary := v.findBinary()
if binary == "" {
return
}
if runtime.GOOS == "darwin" && strings.HasSuffix(binary, ".app") {
_ = exec.Command("open", "-a", binary).Run()
} else {
_ = exec.Command(binary).Start()
}
}
// printModelAccessTip shows instructions for finding Ollama models in VS Code.
func (v *VSCode) printModelAccessTip() {
fmt.Fprintf(os.Stderr, "\nTip: To use Ollama models, open Copilot Chat and click the model picker.\n")
fmt.Fprintf(os.Stderr, " If you don't see your models, click \"Other models\" to find them.\n\n")
}
func (v *VSCode) Paths() []string {
if p := v.chatLanguageModelsPath(); fileExists(p) {
return []string{p}
}
return nil
}
func (v *VSCode) Edit(models []string) error {
if len(models) == 0 {
return nil
}
// Write chatLanguageModels.json with Ollama vendor entry
clmPath := v.chatLanguageModelsPath()
if err := os.MkdirAll(filepath.Dir(clmPath), 0o755); err != nil {
return err
}
var entries []map[string]any
if data, err := os.ReadFile(clmPath); err == nil {
_ = json.Unmarshal(data, &entries)
}
// Remove any existing Ollama entries, preserve others
filtered := make([]map[string]any, 0, len(entries))
for _, entry := range entries {
if vendor, _ := entry["vendor"].(string); vendor != "ollama" {
filtered = append(filtered, entry)
}
}
// Add new Ollama entry
filtered = append(filtered, map[string]any{
"vendor": "ollama",
"name": "Ollama",
"url": envconfig.Host().String(),
})
data, err := json.MarshalIndent(filtered, "", " ")
if err != nil {
return err
}
if err := fileutil.WriteWithBackup(clmPath, data); err != nil {
return err
}
// Clean up legacy settings from older Ollama integrations
v.updateSettings()
return nil
}
func (v *VSCode) Models() []string {
if !v.hasOllamaVendor() {
return nil
}
if cfg, err := loadStoredIntegrationConfig("vscode"); err == nil {
return cfg.Models
}
return nil
}
// hasOllamaVendor checks if chatLanguageModels.json contains an Ollama vendor entry.
func (v *VSCode) hasOllamaVendor() bool {
data, err := os.ReadFile(v.chatLanguageModelsPath())
if err != nil {
return false
}
var entries []map[string]any
if err := json.Unmarshal(data, &entries); err != nil {
return false
}
for _, entry := range entries {
if vendor, _ := entry["vendor"].(string); vendor == "ollama" {
return true
}
}
return false
}
func (v *VSCode) chatLanguageModelsPath() string {
return v.vscodePath("chatLanguageModels.json")
}
func (v *VSCode) settingsPath() string {
return v.vscodePath("settings.json")
}
// updateSettings cleans up legacy settings from older Ollama integrations.
func (v *VSCode) updateSettings() {
settingsPath := v.settingsPath()
data, err := os.ReadFile(settingsPath)
if err != nil {
return
}
var settings map[string]any
if err := json.Unmarshal(data, &settings); err != nil {
return
}
changed := false
for _, key := range []string{"github.copilot.chat.byok.ollamaEndpoint", "ollama.launch.configured"} {
if _, ok := settings[key]; ok {
delete(settings, key)
changed = true
}
}
if !changed {
return
}
updated, err := json.MarshalIndent(settings, "", " ")
if err != nil {
return
}
_ = fileutil.WriteWithBackup(settingsPath, updated)
}
func (v *VSCode) statePath() string {
return v.vscodePath("globalStorage", "state.vscdb")
}
// ShowInModelPicker ensures the given models are visible in VS Code's Copilot
// Chat model picker and sets the primary model as the active selection. It sets
// the configured models to true in the picker preferences so they appear in the
// dropdown, and writes the first model as the selected model for both the panel
// and editor chat views. Models use the VS Code identifier format
// "ollama/Ollama/<name>".
func (v *VSCode) ShowInModelPicker(models []string) error {
if len(models) == 0 {
return nil
}
dbPath := v.statePath()
needsCreate := !fileExists(dbPath)
if needsCreate {
if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil {
return fmt.Errorf("creating state directory: %w", err)
}
}
db, err := sql.Open("sqlite3", dbPath+"?_busy_timeout=5000")
if err != nil {
return fmt.Errorf("opening state database: %w", err)
}
defer db.Close()
// Create the table if this is a fresh DB. Schema must match what VS Code creates.
if needsCreate {
if _, err := db.Exec("CREATE TABLE ItemTable (key TEXT UNIQUE ON CONFLICT REPLACE, value BLOB)"); err != nil {
return fmt.Errorf("initializing state database: %w", err)
}
}
// Read existing preferences
prefs := make(map[string]bool)
var prefsJSON string
if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = 'chatModelPickerPreferences'").Scan(&prefsJSON); err == nil {
_ = json.Unmarshal([]byte(prefsJSON), &prefs)
}
// Build name→ID map from VS Code's cached model list.
// VS Code uses numeric IDs like "ollama/Ollama/4", not "ollama/Ollama/kimi-k2.5:cloud".
nameToID := make(map[string]string)
var cached []map[string]any
var cacheJSON string
if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = 'chat.cachedLanguageModels.v2'").Scan(&cacheJSON); err == nil {
_ = json.Unmarshal([]byte(cacheJSON), &cached)
}
cachedNames := make(map[string]bool)
for _, entry := range cached {
meta, _ := entry["metadata"].(map[string]any)
if meta == nil {
continue
}
if vendor, _ := meta["vendor"].(string); vendor == "ollama" {
name, _ := meta["name"].(string)
id, _ := entry["identifier"].(string)
if name != "" && id != "" {
nameToID[name] = id
}
if name != "" {
cachedNames[name] = true
}
}
}
// Ollama config is authoritative: always show configured models,
// hide Ollama models that are no longer in the config.
configuredIDs := make(map[string]bool)
for _, m := range models {
for _, id := range v.modelVSCodeIDs(m, nameToID) {
prefs[id] = true
configuredIDs[id] = true
}
}
for id := range prefs {
if strings.HasPrefix(id, "ollama/") && !configuredIDs[id] {
prefs[id] = false
}
}
data, _ := json.Marshal(prefs)
if _, err = db.Exec("INSERT OR REPLACE INTO ItemTable (key, value) VALUES ('chatModelPickerPreferences', ?)", string(data)); err != nil {
return err
}
// Set the primary model as the active selection in Copilot Chat so it
// doesn't default to "auto" or whatever the user last picked manually.
primaryID := v.modelVSCodeIDs(models[0], nameToID)[0]
for _, key := range []string{"chat.currentLanguageModel.panel", "chat.currentLanguageModel.editor"} {
if _, err := db.Exec("INSERT OR REPLACE INTO ItemTable (key, value) VALUES (?, ?)", key, primaryID); err != nil {
return err
}
if _, err := db.Exec("INSERT OR REPLACE INTO ItemTable (key, value) VALUES (?, ?)", key+".isDefault", "false"); err != nil {
return err
}
}
// Ensure configured models exist in the cached model list so VS Code can
// restore the selection immediately on startup, before extensions load.
// Without this, a model that was never previously used won't be in the
// cache, and VS Code falls back to "auto" until the Ollama BYOK provider
// discovers it via the API (which is slow).
cacheChanged := false
for _, m := range models {
if cachedNames[m] {
continue
}
if !strings.Contains(m, ":") && cachedNames[m+":latest"] {
continue
}
cacheID := m
if !strings.Contains(m, ":") {
cacheID = m + ":latest"
}
cached = append(cached, map[string]any{
"identifier": "ollama/Ollama/" + cacheID,
"metadata": map[string]any{
"extension": map[string]any{"value": "github.copilot-chat"},
"name": m,
"id": m,
"vendor": "ollama",
"version": "1.0.0",
"family": m,
"detail": "Ollama",
"maxInputTokens": 4096,
"maxOutputTokens": 4096,
"isDefaultForLocation": map[string]any{},
"isUserSelectable": true,
"capabilities": map[string]any{"toolCalling": true},
},
})
cacheChanged = true
}
if cacheChanged {
cacheData, _ := json.Marshal(cached)
if _, err := db.Exec("INSERT OR REPLACE INTO ItemTable (key, value) VALUES ('chat.cachedLanguageModels.v2', ?)", string(cacheData)); err != nil {
return err
}
}
return nil
}
// modelVSCodeIDs returns all possible VS Code picker IDs for a model name.
// The primary (first) ID should match the live identifier that VS Code assigns
// at runtime via toModelIdentifier(vendor, group, m.id), where m.id comes from
// /api/tags and always includes the tag (e.g. "llama3.2:latest").
func (v *VSCode) modelVSCodeIDs(model string, nameToID map[string]string) []string {
var ids []string
if id, ok := nameToID[model]; ok {
ids = append(ids, id)
} else if !strings.Contains(model, ":") {
if id, ok := nameToID[model+":latest"]; ok {
ids = append(ids, id)
}
}
// For untagged models, the live identifier includes :latest
// (e.g. ollama/Ollama/llama3.2:latest), so prefer that format
// to avoid a mismatch that causes VS Code to reset to "auto".
if !strings.Contains(model, ":") {
ids = append(ids, "ollama/Ollama/"+model+":latest")
}
ids = append(ids, "ollama/Ollama/"+model)
return ids
}
func (v *VSCode) vscodePath(parts ...string) string {
home, _ := os.UserHomeDir()
var base string
switch runtime.GOOS {
case "darwin":
base = filepath.Join(home, "Library", "Application Support", "Code", "User")
case "windows":
base = filepath.Join(os.Getenv("APPDATA"), "Code", "User")
default:
base = filepath.Join(home, ".config", "Code", "User")
}
return filepath.Join(append([]string{base}, parts...)...)
}
// checkVSCodeVersion warns if VS Code is older than minVSCodeVersion.
func (v *VSCode) checkVSCodeVersion() {
codeCLI := v.findCodeCLI()
if codeCLI == "" {
return
}
out, err := exec.Command(codeCLI, "--version").Output()
if err != nil {
return
}
// "code --version" outputs: version\ncommit\narch
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
if len(lines) == 0 || lines[0] == "" {
return
}
version := strings.TrimSpace(lines[0])
if compareVersions(version, minVSCodeVersion) < 0 {
fmt.Fprintf(os.Stderr, "\n%sWarning: VS Code version (%s) is older than the recommended version (%s)%s\n", ansiYellow, version, minVSCodeVersion, ansiReset)
fmt.Fprintf(os.Stderr, "Please update VS Code to the latest version.\n\n")
}
}
// checkCopilotChatVersion warns if the GitHub Copilot Chat extension is
// missing or older than minCopilotChatVersion.
func (v *VSCode) checkCopilotChatVersion() {
codeCLI := v.findCodeCLI()
if codeCLI == "" {
return
}
out, err := exec.Command(codeCLI, "--list-extensions", "--show-versions").Output()
if err != nil {
return
}
installed, version := parseCopilotChatVersion(string(out))
if !installed {
fmt.Fprintf(os.Stderr, "\n%sWarning: GitHub Copilot Chat extension is not installed%s\n", ansiYellow, ansiReset)
fmt.Fprintf(os.Stderr, "Install it in VS Code: Extensions → search \"GitHub Copilot Chat\" → Install\n\n")
return
}
if compareVersions(version, minCopilotChatVersion) < 0 {
fmt.Fprintf(os.Stderr, "\n%sWarning: GitHub Copilot Chat extension version (%s) is older than the recommended version (%s)%s\n", ansiYellow, version, minCopilotChatVersion, ansiReset)
fmt.Fprintf(os.Stderr, "Please update it in VS Code: Extensions → search \"GitHub Copilot Chat\" → Update\n\n")
}
}
// findCodeCLI returns the path to the VS Code CLI for querying extensions.
// On macOS, findBinary may return an .app bundle which can't run --list-extensions,
// so this resolves to the actual CLI binary inside the bundle.
func (v *VSCode) findCodeCLI() string {
binary := v.findBinary()
if binary == "" {
return ""
}
if runtime.GOOS == "darwin" && strings.HasSuffix(binary, ".app") {
bundleCLI := binary + "/Contents/Resources/app/bin/code"
if _, err := os.Stat(bundleCLI); err == nil {
return bundleCLI
}
return ""
}
return binary
}
// parseCopilotChatVersion extracts the version of the GitHub Copilot Chat
// extension from "code --list-extensions --show-versions" output.
func parseCopilotChatVersion(output string) (installed bool, version string) {
for _, line := range strings.Split(output, "\n") {
// Format: github.copilot-chat@0.40.1
if !strings.HasPrefix(strings.ToLower(line), "github.copilot-chat@") {
continue
}
parts := strings.SplitN(line, "@", 2)
if len(parts) != 2 {
continue
}
return true, strings.TrimSpace(parts[1])
}
return false, ""
}
// compareVersions compares two dot-separated version strings.
// Returns -1 if a < b, 0 if a == b, 1 if a > b.
func compareVersions(a, b string) int {
aParts := strings.Split(a, ".")
bParts := strings.Split(b, ".")
maxLen := len(aParts)
if len(bParts) > maxLen {
maxLen = len(bParts)
}
for i := range maxLen {
var aNum, bNum int
if i < len(aParts) {
aNum, _ = strconv.Atoi(aParts[i])
}
if i < len(bParts) {
bNum, _ = strconv.Atoi(bParts[i])
}
if aNum < bNum {
return -1
}
if aNum > bNum {
return 1
}
}
return 0
}
func fileExists(path string) bool {
_, err := os.Stat(path)
return err == nil
}

656
cmd/launch/vscode_test.go Normal file
View File

@@ -0,0 +1,656 @@
package launch
import (
"database/sql"
"encoding/json"
"os"
"path/filepath"
"runtime"
"testing"
_ "github.com/mattn/go-sqlite3"
)
func TestVSCodeIntegration(t *testing.T) {
v := &VSCode{}
t.Run("String", func(t *testing.T) {
if got := v.String(); got != "Visual Studio Code" {
t.Errorf("String() = %q, want %q", got, "Visual Studio Code")
}
})
t.Run("implements Runner", func(t *testing.T) {
var _ Runner = v
})
t.Run("implements Editor", func(t *testing.T) {
var _ Editor = v
})
}
func TestVSCodeEdit(t *testing.T) {
v := &VSCode{}
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("XDG_CONFIG_HOME", "")
clmPath := testVSCodePath(t, tmpDir, "chatLanguageModels.json")
tests := []struct {
name string
setup string // initial chatLanguageModels.json content, empty means no file
models []string
validate func(t *testing.T, data []byte)
}{
{
name: "fresh install",
models: []string{"llama3.2"},
validate: func(t *testing.T, data []byte) {
assertOllamaVendorConfigured(t, data)
},
},
{
name: "preserve other vendor entries",
setup: `[{"vendor": "azure", "name": "Azure", "url": "https://example.com"}]`,
models: []string{"llama3.2"},
validate: func(t *testing.T, data []byte) {
var entries []map[string]any
json.Unmarshal(data, &entries)
if len(entries) != 2 {
t.Errorf("expected 2 entries, got %d", len(entries))
}
// Check Azure entry preserved
found := false
for _, e := range entries {
if v, _ := e["vendor"].(string); v == "azure" {
found = true
}
}
if !found {
t.Error("azure vendor entry was not preserved")
}
assertOllamaVendorConfigured(t, data)
},
},
{
name: "update existing ollama entry",
setup: `[{"vendor": "ollama", "name": "Ollama", "url": "http://old:11434"}]`,
models: []string{"llama3.2"},
validate: func(t *testing.T, data []byte) {
assertOllamaVendorConfigured(t, data)
},
},
{
name: "empty models is no-op",
setup: `[{"vendor": "azure", "name": "Azure"}]`,
models: []string{},
validate: func(t *testing.T, data []byte) {
if string(data) != `[{"vendor": "azure", "name": "Azure"}]` {
t.Error("empty models should not modify file")
}
},
},
{
name: "corrupted JSON treated as empty",
setup: `{corrupted json`,
models: []string{"llama3.2"},
validate: func(t *testing.T, data []byte) {
var entries []map[string]any
if err := json.Unmarshal(data, &entries); err != nil {
t.Errorf("result is not valid JSON: %v", err)
}
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
os.RemoveAll(filepath.Dir(clmPath))
if tt.setup != "" {
os.MkdirAll(filepath.Dir(clmPath), 0o755)
os.WriteFile(clmPath, []byte(tt.setup), 0o644)
}
if err := v.Edit(tt.models); err != nil {
t.Fatal(err)
}
data, _ := os.ReadFile(clmPath)
tt.validate(t, data)
})
}
}
func TestVSCodeEditCleansUpOldSettings(t *testing.T) {
v := &VSCode{}
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("XDG_CONFIG_HOME", "")
settingsPath := testVSCodePath(t, tmpDir, "settings.json")
// Create settings.json with old byok setting
os.MkdirAll(filepath.Dir(settingsPath), 0o755)
os.WriteFile(settingsPath, []byte(`{"github.copilot.chat.byok.ollamaEndpoint": "http://old:11434", "ollama.launch.configured": true, "editor.fontSize": 14}`), 0o644)
if err := v.Edit([]string{"llama3.2"}); err != nil {
t.Fatal(err)
}
// Verify old settings were removed
data, err := os.ReadFile(settingsPath)
if err != nil {
t.Fatal(err)
}
var settings map[string]any
json.Unmarshal(data, &settings)
if _, ok := settings["github.copilot.chat.byok.ollamaEndpoint"]; ok {
t.Error("github.copilot.chat.byok.ollamaEndpoint should have been removed")
}
if _, ok := settings["ollama.launch.configured"]; ok {
t.Error("ollama.launch.configured should have been removed")
}
if settings["editor.fontSize"] != float64(14) {
t.Error("editor.fontSize should have been preserved")
}
}
func TestVSCodePaths(t *testing.T) {
v := &VSCode{}
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("XDG_CONFIG_HOME", "")
clmPath := testVSCodePath(t, tmpDir, "chatLanguageModels.json")
t.Run("no file returns nil", func(t *testing.T) {
os.Remove(clmPath)
if paths := v.Paths(); paths != nil {
t.Errorf("expected nil, got %v", paths)
}
})
t.Run("existing file returns path", func(t *testing.T) {
os.MkdirAll(filepath.Dir(clmPath), 0o755)
os.WriteFile(clmPath, []byte(`[]`), 0o644)
if paths := v.Paths(); len(paths) != 1 {
t.Errorf("expected 1 path, got %d", len(paths))
}
})
}
// testVSCodePath returns the expected VS Code config path for the given file in tests.
func testVSCodePath(t *testing.T, tmpDir, filename string) string {
t.Helper()
switch runtime.GOOS {
case "darwin":
return filepath.Join(tmpDir, "Library", "Application Support", "Code", "User", filename)
case "windows":
t.Setenv("APPDATA", tmpDir)
return filepath.Join(tmpDir, "Code", "User", filename)
default:
return filepath.Join(tmpDir, ".config", "Code", "User", filename)
}
}
func assertOllamaVendorConfigured(t *testing.T, data []byte) {
t.Helper()
var entries []map[string]any
if err := json.Unmarshal(data, &entries); err != nil {
t.Fatalf("invalid JSON: %v", err)
}
for _, entry := range entries {
if vendor, _ := entry["vendor"].(string); vendor == "ollama" {
if name, _ := entry["name"].(string); name != "Ollama" {
t.Errorf("expected name \"Ollama\", got %q", name)
}
if url, _ := entry["url"].(string); url == "" {
t.Error("url not set")
}
return
}
}
t.Error("no ollama vendor entry found")
}
func TestShowInModelPicker(t *testing.T) {
v := &VSCode{}
// helper to create a state DB with optional seed data
setupDB := func(t *testing.T, tmpDir string, seedPrefs map[string]bool, seedCache []map[string]any) string {
t.Helper()
dbDir := filepath.Join(tmpDir, "globalStorage")
os.MkdirAll(dbDir, 0o755)
dbPath := filepath.Join(dbDir, "state.vscdb")
db, err := sql.Open("sqlite3", dbPath)
if err != nil {
t.Fatal(err)
}
defer db.Close()
if _, err := db.Exec("CREATE TABLE ItemTable (key TEXT UNIQUE ON CONFLICT REPLACE, value BLOB)"); err != nil {
t.Fatal(err)
}
if seedPrefs != nil {
data, _ := json.Marshal(seedPrefs)
db.Exec("INSERT INTO ItemTable (key, value) VALUES ('chatModelPickerPreferences', ?)", string(data))
}
if seedCache != nil {
data, _ := json.Marshal(seedCache)
db.Exec("INSERT INTO ItemTable (key, value) VALUES ('chat.cachedLanguageModels.v2', ?)", string(data))
}
return dbPath
}
// helper to read prefs back from DB
readPrefs := func(t *testing.T, dbPath string) map[string]bool {
t.Helper()
db, err := sql.Open("sqlite3", dbPath)
if err != nil {
t.Fatal(err)
}
defer db.Close()
var raw string
if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = 'chatModelPickerPreferences'").Scan(&raw); err != nil {
t.Fatal(err)
}
prefs := make(map[string]bool)
json.Unmarshal([]byte(raw), &prefs)
return prefs
}
t.Run("fresh DB creates table and shows models", func(t *testing.T) {
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("XDG_CONFIG_HOME", "")
if runtime.GOOS == "windows" {
t.Setenv("APPDATA", tmpDir)
}
err := v.ShowInModelPicker([]string{"llama3.2"})
if err != nil {
t.Fatal(err)
}
dbPath := testVSCodePath(t, tmpDir, filepath.Join("globalStorage", "state.vscdb"))
prefs := readPrefs(t, dbPath)
if !prefs["ollama/Ollama/llama3.2"] {
t.Error("expected llama3.2 to be shown")
}
if !prefs["ollama/Ollama/llama3.2:latest"] {
t.Error("expected llama3.2:latest to be shown")
}
})
t.Run("configured models are shown", func(t *testing.T) {
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("XDG_CONFIG_HOME", "")
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, nil)
err := v.ShowInModelPicker([]string{"llama3.2", "qwen3:8b"})
if err != nil {
t.Fatal(err)
}
prefs := readPrefs(t, dbPath)
if !prefs["ollama/Ollama/llama3.2"] {
t.Error("expected llama3.2 to be shown")
}
if !prefs["ollama/Ollama/qwen3:8b"] {
t.Error("expected qwen3:8b to be shown")
}
})
t.Run("removed models are hidden", func(t *testing.T) {
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("XDG_CONFIG_HOME", "")
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), map[string]bool{
"ollama/Ollama/llama3.2": true,
"ollama/Ollama/llama3.2:latest": true,
"ollama/Ollama/mistral": true,
"ollama/Ollama/mistral:latest": true,
}, nil)
// Only configure llama3.2 — mistral should get hidden
err := v.ShowInModelPicker([]string{"llama3.2"})
if err != nil {
t.Fatal(err)
}
prefs := readPrefs(t, dbPath)
if !prefs["ollama/Ollama/llama3.2"] {
t.Error("expected llama3.2 to stay shown")
}
if prefs["ollama/Ollama/mistral"] {
t.Error("expected mistral to be hidden")
}
if prefs["ollama/Ollama/mistral:latest"] {
t.Error("expected mistral:latest to be hidden")
}
})
t.Run("non-ollama prefs are preserved", func(t *testing.T) {
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("XDG_CONFIG_HOME", "")
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), map[string]bool{
"copilot/gpt-4o": true,
}, nil)
err := v.ShowInModelPicker([]string{"llama3.2"})
if err != nil {
t.Fatal(err)
}
prefs := readPrefs(t, dbPath)
if !prefs["copilot/gpt-4o"] {
t.Error("expected copilot/gpt-4o to stay shown")
}
})
t.Run("uses cached numeric IDs when available", func(t *testing.T) {
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("XDG_CONFIG_HOME", "")
cache := []map[string]any{
{
"identifier": "ollama/Ollama/4",
"metadata": map[string]any{"vendor": "ollama", "name": "llama3.2"},
},
}
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, cache)
err := v.ShowInModelPicker([]string{"llama3.2"})
if err != nil {
t.Fatal(err)
}
prefs := readPrefs(t, dbPath)
if !prefs["ollama/Ollama/4"] {
t.Error("expected numeric ID ollama/Ollama/4 to be shown")
}
// Name-based fallback should also be set
if !prefs["ollama/Ollama/llama3.2"] {
t.Error("expected name-based ID to also be shown")
}
})
t.Run("empty models is no-op", func(t *testing.T) {
err := v.ShowInModelPicker([]string{})
if err != nil {
t.Fatal(err)
}
})
// helper to read a string value from the state DB
readValue := func(t *testing.T, dbPath, key string) string {
t.Helper()
db, err := sql.Open("sqlite3", dbPath)
if err != nil {
t.Fatal(err)
}
defer db.Close()
var val string
if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = ?", key).Scan(&val); err != nil {
return ""
}
return val
}
t.Run("sets primary model as active selection", func(t *testing.T) {
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("XDG_CONFIG_HOME", "")
setupDB(t, testVSCodePath(t, tmpDir, ""), nil, nil)
err := v.ShowInModelPicker([]string{"llama3.2", "qwen3:8b"})
if err != nil {
t.Fatal(err)
}
dbPath := testVSCodePath(t, tmpDir, filepath.Join("globalStorage", "state.vscdb"))
panelModel := readValue(t, dbPath, "chat.currentLanguageModel.panel")
if panelModel != "ollama/Ollama/llama3.2:latest" {
t.Errorf("expected panel model ollama/Ollama/llama3.2:latest, got %q", panelModel)
}
editorModel := readValue(t, dbPath, "chat.currentLanguageModel.editor")
if editorModel != "ollama/Ollama/llama3.2:latest" {
t.Errorf("expected editor model ollama/Ollama/llama3.2:latest, got %q", editorModel)
}
panelDefault := readValue(t, dbPath, "chat.currentLanguageModel.panel.isDefault")
if panelDefault != "false" {
t.Errorf("expected panel isDefault false, got %q", panelDefault)
}
})
t.Run("sets cached numeric ID as active selection", func(t *testing.T) {
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("XDG_CONFIG_HOME", "")
cache := []map[string]any{
{
"identifier": "ollama/Ollama/4",
"metadata": map[string]any{"vendor": "ollama", "name": "llama3.2"},
},
}
setupDB(t, testVSCodePath(t, tmpDir, ""), nil, cache)
err := v.ShowInModelPicker([]string{"llama3.2"})
if err != nil {
t.Fatal(err)
}
dbPath := testVSCodePath(t, tmpDir, filepath.Join("globalStorage", "state.vscdb"))
panelModel := readValue(t, dbPath, "chat.currentLanguageModel.panel")
if panelModel != "ollama/Ollama/4" {
t.Errorf("expected panel model to use cached numeric ID ollama/Ollama/4, got %q", panelModel)
}
})
t.Run("previously hidden model is re-shown when configured", func(t *testing.T) {
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("XDG_CONFIG_HOME", "")
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), map[string]bool{
"ollama/Ollama/llama3.2": false,
"ollama/Ollama/llama3.2:latest": false,
}, nil)
// Ollama config is authoritative — should override the hidden state
err := v.ShowInModelPicker([]string{"llama3.2"})
if err != nil {
t.Fatal(err)
}
prefs := readPrefs(t, dbPath)
if !prefs["ollama/Ollama/llama3.2"] {
t.Error("expected llama3.2 to be re-shown")
}
})
// helper to read and parse the cached models from the state DB
readCache := func(t *testing.T, dbPath string) []map[string]any {
t.Helper()
db, err := sql.Open("sqlite3", dbPath)
if err != nil {
t.Fatal(err)
}
defer db.Close()
var raw string
if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = 'chat.cachedLanguageModels.v2'").Scan(&raw); err != nil {
return nil
}
var result []map[string]any
_ = json.Unmarshal([]byte(raw), &result)
return result
}
t.Run("adds uncached model to cache for instant startup display", func(t *testing.T) {
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("XDG_CONFIG_HOME", "")
// No seed cache — model has never been used in VS Code before
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, nil)
err := v.ShowInModelPicker([]string{"qwen3:8b"})
if err != nil {
t.Fatal(err)
}
cache := readCache(t, dbPath)
if len(cache) != 1 {
t.Fatalf("expected 1 cached entry, got %d", len(cache))
}
entry := cache[0]
if id, _ := entry["identifier"].(string); id != "ollama/Ollama/qwen3:8b" {
t.Errorf("expected identifier ollama/Ollama/qwen3:8b, got %q", id)
}
meta, _ := entry["metadata"].(map[string]any)
if meta == nil {
t.Fatal("expected metadata in cache entry")
}
if v, _ := meta["vendor"].(string); v != "ollama" {
t.Errorf("expected vendor ollama, got %q", v)
}
if sel, ok := meta["isUserSelectable"].(bool); !ok || !sel {
t.Error("expected isUserSelectable to be true")
}
})
t.Run("does not duplicate already-cached model", func(t *testing.T) {
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("XDG_CONFIG_HOME", "")
cache := []map[string]any{
{
"identifier": "ollama/Ollama/4",
"metadata": map[string]any{"vendor": "ollama", "name": "llama3.2"},
},
{
"identifier": "copilot/copilot/auto",
"metadata": map[string]any{"vendor": "copilot", "name": "Auto"},
},
}
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, cache)
err := v.ShowInModelPicker([]string{"llama3.2"})
if err != nil {
t.Fatal(err)
}
// Cache should still have exactly 2 entries (no duplicate added)
result := readCache(t, dbPath)
if len(result) != 2 {
t.Errorf("expected 2 cached entries (no duplicate), got %d", len(result))
}
})
t.Run("adds only missing models to existing cache", func(t *testing.T) {
tmpDir := t.TempDir()
setTestHome(t, tmpDir)
t.Setenv("XDG_CONFIG_HOME", "")
cache := []map[string]any{
{
"identifier": "ollama/Ollama/4",
"metadata": map[string]any{"vendor": "ollama", "name": "llama3.2"},
},
}
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, cache)
// llama3.2 is cached, qwen3:8b is not
err := v.ShowInModelPicker([]string{"llama3.2", "qwen3:8b"})
if err != nil {
t.Fatal(err)
}
result := readCache(t, dbPath)
if len(result) != 2 {
t.Fatalf("expected 2 cached entries, got %d", len(result))
}
// Second entry should be the newly added qwen3:8b
if id, _ := result[1]["identifier"].(string); id != "ollama/Ollama/qwen3:8b" {
t.Errorf("expected new entry ollama/Ollama/qwen3:8b, got %q", id)
}
})
}
func TestParseCopilotChatVersion(t *testing.T) {
tests := []struct {
name string
output string
wantInstalled bool
wantVersion string
}{
{
name: "found among other extensions",
output: "ms-python.python@2024.1.1\ngithub.copilot-chat@0.40.1\ngithub.copilot@1.200.0\n",
wantInstalled: true,
wantVersion: "0.40.1",
},
{
name: "only extension",
output: "GitHub.copilot-chat@0.41.0\n",
wantInstalled: true,
wantVersion: "0.41.0",
},
{
name: "not installed",
output: "ms-python.python@2024.1.1\ngithub.copilot@1.200.0\n",
wantInstalled: false,
},
{
name: "empty output",
output: "",
wantInstalled: false,
},
{
name: "case insensitive match",
output: "GitHub.Copilot-Chat@0.39.0\n",
wantInstalled: true,
wantVersion: "0.39.0",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
installed, version := parseCopilotChatVersion(tt.output)
if installed != tt.wantInstalled {
t.Errorf("installed = %v, want %v", installed, tt.wantInstalled)
}
if installed && version != tt.wantVersion {
t.Errorf("version = %q, want %q", version, tt.wantVersion)
}
})
}
}
func TestCompareVersions(t *testing.T) {
tests := []struct {
a, b string
want int
}{
{"0.40.1", "0.40.1", 0},
{"0.40.2", "0.40.1", 1},
{"0.40.0", "0.40.1", -1},
{"0.41.0", "0.40.1", 1},
{"0.39.9", "0.40.1", -1},
{"1.0.0", "0.40.1", 1},
{"0.40", "0.40.1", -1},
{"0.40.1.1", "0.40.1", 1},
}
for _, tt := range tests {
t.Run(tt.a+"_vs_"+tt.b, func(t *testing.T) {
got := compareVersions(tt.a, tt.b)
if got != tt.want {
t.Errorf("compareVersions(%q, %q) = %d, want %d", tt.a, tt.b, got, tt.want)
}
})
}
}

View File

@@ -242,6 +242,10 @@ func (m selectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
m.cancelled = true
return m, tea.Quit
case tea.KeyLeft:
m.cancelled = true
return m, tea.Quit
case tea.KeyEnter:
filtered := m.filteredItems()
if len(filtered) > 0 && m.cursor < len(filtered) {
@@ -354,7 +358,7 @@ func (m selectorModel) renderContent() string {
}
s.WriteString("\n")
help := "↑/↓ navigate • enter select • esc cancel"
help := "↑/↓ navigate • enter select • ← back"
if m.helpText != "" {
help = m.helpText
}
@@ -608,6 +612,10 @@ func (m multiSelectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
m.cancelled = true
return m, tea.Quit
case tea.KeyLeft:
m.cancelled = true
return m, tea.Quit
case tea.KeyTab:
m.multi = !m.multi
@@ -810,7 +818,7 @@ func (m multiSelectorModel) View() string {
s.WriteString("\n")
if !m.multi {
s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • enter select • tab add multiple • esc cancel"))
s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • enter select • tab add multiple • ← back"))
} else {
count := m.selectedCount()
if count == 0 {
@@ -819,7 +827,7 @@ func (m multiSelectorModel) View() string {
s.WriteString(selectorDescStyle.Render(fmt.Sprintf(" %d selected - press enter to continue", count)))
}
s.WriteString("\n\n")
s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • space toggle • tab select single • enter confirm • esc cancel"))
s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • space toggle • tab select single • enter confirm • ← back"))
}
result := s.String()

View File

@@ -782,6 +782,9 @@ func TestMulti_MultiModeHelpText(t *testing.T) {
if !strings.Contains(content, "tab select single") {
t.Error("multi mode should show 'tab select single' in help")
}
if !strings.Contains(content, "← back") {
t.Error("multi mode should show '← back' in help")
}
}
// --- preChecked initialization order ---
@@ -868,6 +871,46 @@ func TestMulti_UncheckingTopDefaultFallsBackToNearestCheckedBelow(t *testing.T)
}
}
// --- Left arrow back navigation ---
func TestSelectorLeftArrowCancelsWhenNoFilter(t *testing.T) {
m := selectorModelWithCurrent("Pick:", items("a", "b", "c"), "")
updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
got := updated.(selectorModel)
if !got.cancelled {
t.Error("left arrow with empty filter should cancel (go back)")
}
}
func TestSelectorLeftArrowCancelsWhenFiltering(t *testing.T) {
m := selectorModelWithCurrent("Pick:", items("a", "b", "c"), "")
m.filter = "a"
updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
got := updated.(selectorModel)
if !got.cancelled {
t.Error("left arrow with active filter should still cancel (go back)")
}
}
func TestMultiSelectorLeftArrowCancelsWhenNoFilter(t *testing.T) {
m := newMultiSelectorModel("Pick:", items("a", "b", "c"), nil)
updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
got := updated.(multiSelectorModel)
if !got.cancelled {
t.Error("left arrow with empty filter should cancel (go back)")
}
}
func TestMultiSelectorLeftArrowCancelsWhenFiltering(t *testing.T) {
m := newMultiSelectorModel("Pick:", items("a", "b", "c"), nil)
m.filter = "a"
updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
got := updated.(multiSelectorModel)
if !got.cancelled {
t.Error("left arrow with active filter should still cancel (go back)")
}
}
// Key message helpers for testing
type keyType = int

View File

@@ -60,6 +60,9 @@ var mainMenuItems = []menuItem{
{
integration: "openclaw",
},
{
integration: "vscode",
},
}
var othersMenuItem = menuItem{
@@ -139,6 +142,7 @@ func otherIntegrationItems(state *launch.LauncherState) []menuItem {
"claude": true,
"codex": true,
"openclaw": true,
"vscode": true,
}
var items []menuItem

View File

@@ -160,6 +160,12 @@
"group": "More information",
"pages": [
"/cli",
{
"group": "Assistant Sandboxing",
"pages": [
"/integrations/nemoclaw"
]
},
"/modelfile",
"/context-length",
"/linux",

View File

@@ -96,6 +96,18 @@ The `/loop` command runs a prompt or slash command on a recurring schedule insid
/loop 1h Remind me to review the deploy status
```
## Telegram
Chat with Claude Code from Telegram by connecting a bot to your session. Install the [Telegram plugin](https://github.com/anthropics/claude-plugins-official), create a bot via [@BotFather](https://t.me/BotFather), then launch with the channel flag:
```shell
ollama launch claude -- --channels plugin:telegram@claude-plugins-official
```
Claude Code will prompt for permission on most actions. To allow the bot to work autonomously, configure [permission rules](https://code.claude.com/docs/en/permissions) or pass `--dangerously-skip-permissions` in isolated environments.
See the [plugin README](https://github.com/anthropics/claude-plugins-official/tree/main/external_plugins/telegram) for full setup instructions including pairing and access control.
## Manual setup
Claude Code connects to Ollama using the Anthropic-compatible API.

View File

@@ -0,0 +1,67 @@
---
title: NemoClaw
---
NemoClaw is NVIDIA's open source security stack for [OpenClaw](/integrations/openclaw). It wraps OpenClaw with the NVIDIA OpenShell runtime to provide kernel-level sandboxing, network policy controls, and audit trails for AI agents.
## Quick start
Pull a model:
```bash
ollama pull nemotron-3-nano:30b
```
Run the installer:
```bash
curl -fsSL https://www.nvidia.com/nemoclaw.sh | \
NEMOCLAW_NON_INTERACTIVE=1 \
NEMOCLAW_PROVIDER=ollama \
NEMOCLAW_MODEL=nemotron-3-nano:30b \
bash
```
Connect to your sandbox:
```bash
nemoclaw my-assistant connect
```
Open the TUI:
```bash
openclaw tui
```
<Note>Ollama support in NemoClaw is still experimental.</Note>
## Platform support
| Platform | Runtime | Status |
|----------|---------|--------|
| Linux (Ubuntu 22.04+) | Docker | Primary |
| macOS (Apple Silicon) | Colima or Docker Desktop | Supported |
| Windows | WSL2 with Docker Desktop | Supported |
CMD and PowerShell are not supported on Windows — WSL2 is required.
<Note>Ollama must be installed and running before the installer runs. When running inside WSL2 or a container, ensure Ollama is reachable from the sandbox (e.g. `OLLAMA_HOST=0.0.0.0`).</Note>
## System requirements
- CPU: 4 vCPU minimum
- RAM: 8 GB minimum (16 GB recommended)
- Disk: 20 GB free (40 GB recommended for local models)
- Node.js 20+ and npm 10+
- Container runtime (Docker preferred)
## Recommended models
- `nemotron-3-super:cloud` — Strong reasoning and coding
- `qwen3.5:cloud` — 397B; reasoning and code generation
- `nemotron-3-nano:30b` — Recommended local model; fits in 24 GB VRAM
- `qwen3.5:27b` — Fast local reasoning (~18 GB VRAM)
- `glm-4.7-flash` — Reasoning and code generation (~25 GB VRAM)
More models at [ollama.com/search](https://ollama.com/search).

View File

@@ -214,6 +214,8 @@ func LogLevel() slog.Level {
var (
// FlashAttention enables the experimental flash attention feature.
FlashAttention = BoolWithDefault("OLLAMA_FLASH_ATTENTION")
// DebugLogRequests logs inference requests to disk for replay/debugging.
DebugLogRequests = Bool("OLLAMA_DEBUG_LOG_REQUESTS")
// KvCacheType is the quantization type for the K/V cache.
KvCacheType = String("OLLAMA_KV_CACHE_TYPE")
// NoHistory disables readline history.
@@ -302,28 +304,29 @@ type EnvVar struct {
func AsMap() map[string]EnvVar {
ret := map[string]EnvVar{
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", LogLevel(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(false), "Enabled flash attention"},
"OLLAMA_KV_CACHE_TYPE": {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"},
"OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
"OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
"OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
"OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
"OLLAMA_NO_CLOUD": {"OLLAMA_NO_CLOUD", NoCloud(), "Disable Ollama cloud features (remote inference and web search)"},
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4k/32k/256k based on VRAM)"},
"OLLAMA_EDITOR": {"OLLAMA_EDITOR", Editor(), "Path to editor for interactive prompt editing (Ctrl+G)"},
"OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
"OLLAMA_REMOTES": {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", LogLevel(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
"OLLAMA_DEBUG_LOG_REQUESTS": {"OLLAMA_DEBUG_LOG_REQUESTS", DebugLogRequests(), "Log inference request bodies and replay curl commands to a temp directory"},
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(false), "Enabled flash attention"},
"OLLAMA_KV_CACHE_TYPE": {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"},
"OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
"OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
"OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
"OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
"OLLAMA_NO_CLOUD": {"OLLAMA_NO_CLOUD", NoCloud(), "Disable Ollama cloud features (remote inference and web search)"},
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4k/32k/256k based on VRAM)"},
"OLLAMA_EDITOR": {"OLLAMA_EDITOR", Editor(), "Path to editor for interactive prompt editing (Ctrl+G)"},
"OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
"OLLAMA_REMOTES": {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},
// Informational
"HTTP_PROXY": {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},

View File

@@ -874,7 +874,7 @@ func (f GGML) SupportsFlashAttention() bool {
return true
}
if slices.Contains([]string{"gemma2"}, arch) {
if slices.Contains([]string{"gemma2", "grok"}, arch) {
return false
}

View File

@@ -14,4 +14,15 @@ The integration tests have 2 modes of operating.
> Before running the tests locally without the "test existing" setting, compile ollama from the top of the source tree `go build .` in addition to GPU support with cmake if applicable on your platform. The integration tests expect to find an ollama binary at the top of the tree.
Many tests use a default small model suitable to run on many systems. You can override this default model by setting `OLLAMA_TEST_DEFAULT_MODEL`
## Testing a New Model
When implementing new model architecture, use `OLLAMA_TEST_MODEL` to run the
integration suite against your model.
```bash
# Build the binary first
go build .
# Run integration tests against it
OLLAMA_TEST_MODEL=mymodel go test -tags integration -v -count 1 -timeout 15m ./integration/
```

View File

@@ -48,9 +48,7 @@ func TestAPIGenerate(t *testing.T) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatalf("pull failed %s", err)
}
pullOrSkip(ctx, t, client, req.Model)
tests := []struct {
name string
@@ -151,7 +149,11 @@ func TestAPIGenerate(t *testing.T) {
})
}
// Validate PS while we're at it...
// Validate PS while we're at it — skip for local-only models
// which may lack metadata fields like family, parameter_size, etc.
if testModel != "" {
return
}
resp, err := client.ListRunning(ctx)
if err != nil {
t.Fatalf("list models API error: %s", err)
@@ -208,9 +210,7 @@ func TestAPIChat(t *testing.T) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatalf("pull failed %s", err)
}
pullOrSkip(ctx, t, client, req.Model)
tests := []struct {
name string
@@ -311,6 +311,9 @@ func TestAPIChat(t *testing.T) {
}
func TestAPIListModels(t *testing.T) {
if testModel != "" {
t.Skip("skipping metadata test with model override")
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
@@ -361,6 +364,9 @@ func verifyModelDetails(t *testing.T, details api.ModelDetails) {
}
func TestAPIShowModel(t *testing.T) {
if testModel != "" {
t.Skip("skipping metadata test with model override")
}
modelName := "llama3.2"
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
@@ -400,6 +406,10 @@ func TestAPIShowModel(t *testing.T) {
}
func TestAPIGenerateLogprobs(t *testing.T) {
if testModel != "" {
// Logprobs requires runner support (e.g. llama.cpp has it, MLX does not).
t.Skip("logprobs not supported by all runners")
}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
@@ -513,6 +523,10 @@ func TestAPIGenerateLogprobs(t *testing.T) {
}
func TestAPIChatLogprobs(t *testing.T) {
if testModel != "" {
// Logprobs requires runner support (e.g. llama.cpp has it, MLX does not).
t.Skip("logprobs not supported by all runners")
}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()

View File

@@ -35,6 +35,9 @@ func TestBlueSky(t *testing.T) {
}
func TestUnicode(t *testing.T) {
if testModel != "" {
t.Skip("uses hardcoded model, not applicable with model override")
}
skipUnderMinVRAM(t, 6)
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
defer cancel()
@@ -59,9 +62,7 @@ func TestUnicode(t *testing.T) {
}
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatal(err)
}
pullOrSkip(ctx, t, client, req.Model)
slog.Info("loading", "model", req.Model)
err := client.Generate(ctx, &api.GenerateRequest{Model: req.Model}, func(response api.GenerateResponse) error { return nil })
if err != nil {
@@ -81,6 +82,9 @@ func TestUnicode(t *testing.T) {
}
func TestExtendedUnicodeOutput(t *testing.T) {
if testModel != "" {
t.Skip("uses hardcoded model, not applicable with model override")
}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
// Set up the test data
@@ -100,9 +104,7 @@ func TestExtendedUnicodeOutput(t *testing.T) {
}
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatal(err)
}
pullOrSkip(ctx, t, client, req.Model)
DoChat(ctx, t, client, req, []string{"😀", "😊", "😁", "😂", "😄", "😃"}, 120*time.Second, 120*time.Second)
}
@@ -148,15 +150,16 @@ func TestUnicodeModelDir(t *testing.T) {
// TestNumPredict verifies that when num_predict is set, the model generates
// exactly that many tokens. It uses logprobs to count the actual tokens output.
func TestNumPredict(t *testing.T) {
if testModel != "" {
t.Skip("uses hardcoded model, not applicable with model override")
}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, "qwen3:0.6b"); err != nil {
t.Fatalf("failed to pull model: %v", err)
}
pullOrSkip(ctx, t, client, "qwen3:0.6b")
req := api.GenerateRequest{
Model: "qwen3:0.6b",

View File

@@ -67,6 +67,9 @@ func TestConcurrentChat(t *testing.T) {
// Stress the scheduler and attempt to load more models than will fit to cause thrashing
// This test will always load at least 2 models even on CPU based systems
func TestMultiModelStress(t *testing.T) {
if testModel != "" {
t.Skip("uses hardcoded models, not applicable with model override")
}
s := os.Getenv("OLLAMA_MAX_VRAM")
if s == "" {
s = "0"
@@ -114,9 +117,7 @@ func TestMultiModelStress(t *testing.T) {
// Make sure all the models are pulled before we get started
for _, model := range chosenModels {
if err := PullIfMissing(ctx, client, model); err != nil {
t.Fatal(err)
}
pullOrSkip(ctx, t, client, model)
}
// Determine how many models we can load in parallel before we exceed VRAM

View File

@@ -38,9 +38,7 @@ func TestLongInputContext(t *testing.T) {
}
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatalf("PullIfMissing failed: %v", err)
}
pullOrSkip(ctx, t, client, req.Model)
DoChat(ctx, t, client, req, []string{"russia", "german", "france", "england", "austria", "prussia", "europe", "individuals", "coalition", "conflict"}, 120*time.Second, 10*time.Second)
}
@@ -70,14 +68,15 @@ func TestContextExhaustion(t *testing.T) {
}
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatalf("PullIfMissing failed: %v", err)
}
pullOrSkip(ctx, t, client, req.Model)
DoChat(ctx, t, client, req, []string{"once", "upon", "lived", "sunny", "cloudy", "clear", "water", "time", "travel", "world"}, 120*time.Second, 10*time.Second)
}
// Send multiple generate requests with prior context and ensure the response is coherant and expected
func TestParallelGenerateWithHistory(t *testing.T) {
if testModel != "" {
t.Skip("uses hardcoded model, not applicable with model override")
}
modelName := "gpt-oss:20b"
req, resp := GenerateRequests()
numParallel := 2
@@ -133,6 +132,12 @@ func TestParallelGenerateWithHistory(t *testing.T) {
// Send generate requests with prior context and ensure the response is coherant and expected
func TestGenerateWithHistory(t *testing.T) {
if testModel != "" {
// The Generate API's Context field (token array continuation) is not
// supported by all runners (e.g. MLX). Chat history works; this is
// the only generate-specific continuation path.
t.Skip("generate context continuation not supported by all runners")
}
req := api.GenerateRequest{
Model: smol,
Prompt: rainbowPrompt,
@@ -173,6 +178,9 @@ func TestGenerateWithHistory(t *testing.T) {
// Send multiple chat requests with prior context and ensure the response is coherant and expected
func TestParallelChatWithHistory(t *testing.T) {
if testModel != "" {
t.Skip("uses hardcoded model, not applicable with model override")
}
modelName := "gpt-oss:20b"
req, resp := ChatRequests()
numParallel := 2

View File

@@ -78,8 +78,11 @@ func TestEmbedCosineDistanceCorrelation(t *testing.T) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
for _, model := range libraryEmbedModels {
for _, model := range testModels(libraryEmbedModels) {
t.Run(model, func(t *testing.T) {
if testModel != "" {
requireCapability(ctx, t, client, model, "embedding")
}
testCases := []struct {
a string
b string
@@ -145,6 +148,9 @@ func TestEmbedCosineDistanceCorrelation(t *testing.T) {
}
func TestAllMiniLMEmbeddings(t *testing.T) {
if testModel != "" {
t.Skip("uses hardcoded model, not applicable with model override")
}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
@@ -175,6 +181,9 @@ func TestAllMiniLMEmbeddings(t *testing.T) {
}
func TestAllMiniLMEmbed(t *testing.T) {
if testModel != "" {
t.Skip("uses hardcoded model, not applicable with model override")
}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
@@ -212,6 +221,9 @@ func TestAllMiniLMEmbed(t *testing.T) {
}
func TestAllMiniLMBatchEmbed(t *testing.T) {
if testModel != "" {
t.Skip("uses hardcoded model, not applicable with model override")
}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
@@ -259,6 +271,9 @@ func TestAllMiniLMBatchEmbed(t *testing.T) {
}
func TestAllMiniLMEmbedTruncate(t *testing.T) {
if testModel != "" {
t.Skip("uses hardcoded model, not applicable with model override")
}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
@@ -397,21 +412,13 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
func embeddingTestHelper(ctx context.Context, client *api.Client, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) {
t.Helper()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatal(err)
}
pullOrSkip(ctx, t, client, req.Model)
return client.Embeddings(ctx, &req)
}
func embedTestHelper(ctx context.Context, client *api.Client, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) {
t.Helper()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatal(err)
}
pullOrSkip(ctx, t, client, req.Model)
return client.Embed(ctx, &req)
}
@@ -426,9 +433,12 @@ func TestEmbedTruncation(t *testing.T) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
for _, model := range libraryEmbedModels {
for _, model := range testModels(libraryEmbedModels) {
model := model
t.Run(model, func(t *testing.T) {
if testModel != "" {
requireCapability(ctx, t, client, model, "embedding")
}
// Check if we're running out of time (reserve 20s for current model)
if deadline, ok := t.Deadline(); ok && time.Until(deadline) < 20*time.Second {
t.Skip("skipping remaining tests to avoid timeout")
@@ -494,9 +504,12 @@ func TestEmbedLargeInput(t *testing.T) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
for _, model := range libraryEmbedModels {
for _, model := range testModels(libraryEmbedModels) {
model := model
t.Run(model, func(t *testing.T) {
if testModel != "" {
requireCapability(ctx, t, client, model, "embedding")
}
mctx, mcancel := context.WithTimeout(ctx, 2*time.Minute)
defer mcancel()
@@ -559,9 +572,12 @@ func TestEmbedStatusCode(t *testing.T) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
for _, model := range libraryEmbedModels {
for _, model := range testModels(libraryEmbedModels) {
model := model
t.Run(model, func(t *testing.T) {
if testModel != "" {
requireCapability(ctx, t, client, model, "embedding")
}
// Check if we're running out of time (reserve 20s for current model)
if deadline, ok := t.Deadline(); ok && time.Until(deadline) < 20*time.Second {
t.Skip("skipping remaining tests to avoid timeout")
@@ -571,9 +587,7 @@ func TestEmbedStatusCode(t *testing.T) {
defer mcancel()
// Pull the model if needed
if err := PullIfMissing(mctx, client, model); err != nil {
t.Fatal(err)
}
pullOrSkip(mctx, t, client, model)
t.Run("truncation error status code", func(t *testing.T) {
truncFalse := false

View File

@@ -14,6 +14,9 @@ import (
)
func TestImageGeneration(t *testing.T) {
if testModel != "" {
t.Skip("uses hardcoded models, not applicable with model override")
}
skipUnderMinVRAM(t, 8)
type testCase struct {
@@ -41,12 +44,8 @@ func TestImageGeneration(t *testing.T) {
defer cleanup()
// Pull both models
if err := PullIfMissing(ctx, client, tc.imageGenModel); err != nil {
t.Fatalf("failed to pull image gen model: %v", err)
}
if err := PullIfMissing(ctx, client, tc.visionModel); err != nil {
t.Fatalf("failed to pull vision model: %v", err)
}
pullOrSkip(ctx, t, client, tc.imageGenModel)
pullOrSkip(ctx, t, client, tc.visionModel)
// Generate the image
t.Logf("Generating image with prompt: %s", tc.prompt)

View File

@@ -24,15 +24,12 @@ func TestLibraryModelsChat(t *testing.T) {
defer cleanup()
targetArch := os.Getenv("OLLAMA_TEST_ARCHITECTURE")
chatModels := libraryChatModels
for _, model := range chatModels {
for _, model := range testModels(libraryChatModels) {
t.Run(model, func(t *testing.T) {
if time.Now().Sub(started) > softTimeout {
t.Skip("skipping remaining tests to avoid excessive runtime")
}
if err := PullIfMissing(ctx, client, model); err != nil {
t.Fatalf("pull failed %s", err)
}
pullOrSkip(ctx, t, client, model)
if targetArch != "" {
resp, err := client.Show(ctx, &api.ShowRequest{Name: model})
if err != nil {

View File

@@ -13,39 +13,35 @@ import (
func TestVisionModels(t *testing.T) {
skipUnderMinVRAM(t, 6)
type testCase struct {
model string
}
testCases := []testCase{
{
model: "qwen2.5vl",
},
{
model: "llama3.2-vision",
},
{
model: "gemma3",
},
{
model: "qwen3-vl:8b",
},
{
// Qwen 3 VL mixture of experts
model: "qwen3-vl:30b",
},
{
model: "ministral-3",
},
defaultVisionModels := []string{
"qwen2.5vl",
"llama3.2-vision",
"gemma3",
"qwen3-vl:8b",
"qwen3-vl:30b",
"ministral-3",
}
for _, v := range testCases {
t.Run(v.model, func(t *testing.T) {
for _, model := range testModels(defaultVisionModels) {
t.Run(model, func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if testModel != "" {
requireCapability(ctx, t, client, model, "vision")
}
pullOrSkip(ctx, t, client, model)
image, err := base64.StdEncoding.DecodeString(imageEncoding)
if err != nil {
t.Fatal(err)
}
req := api.ChatRequest{
Model: v.model,
Model: model,
Messages: []api.Message{
{
Role: "user",
@@ -61,16 +57,7 @@ func TestVisionModels(t *testing.T) {
"temperature": 0.0,
},
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
// Note: sometimes it returns "the ollamas" sometimes "the ollams"
resp := "the ollam"
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatal(err)
}
// Preload to skip if we're less than 80% on GPU to avoid extremely slow tests
err = client.Generate(ctx, &api.GenerateRequest{Model: req.Model}, func(response api.GenerateResponse) error { return nil })
if err != nil {
@@ -78,13 +65,17 @@ func TestVisionModels(t *testing.T) {
}
skipIfNotGPULoaded(ctx, t, client, req.Model, 80)
// Note: sometimes it returns "the ollamas" sometimes "the ollams"
// llava models on CPU can be quite slow to start
DoChat(ctx, t, client, req, []string{resp}, 240*time.Second, 30*time.Second)
DoChat(ctx, t, client, req, []string{"the ollam"}, 240*time.Second, 30*time.Second)
})
}
}
func TestIntegrationSplitBatch(t *testing.T) {
if testModel != "" {
t.Skip("uses hardcoded model, not applicable with model override")
}
skipUnderMinVRAM(t, 6)
image, err := base64.StdEncoding.DecodeString(imageEncoding)
if err != nil {
@@ -111,9 +102,7 @@ func TestIntegrationSplitBatch(t *testing.T) {
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatal(err)
}
pullOrSkip(ctx, t, client, req.Model)
// llava models on CPU can be quite slow to start,
DoGenerate(ctx, t, client, req, []string{resp}, 120*time.Second, 30*time.Second)
}

View File

@@ -45,9 +45,7 @@ func TestMaxQueue(t *testing.T) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatal(err)
}
pullOrSkip(ctx, t, client, req.Model)
// Context for the worker threads so we can shut them down
// embedCtx, embedCancel := context.WithCancel(ctx)

View File

@@ -46,14 +46,12 @@ func TestModelsChat(t *testing.T) {
chatModels = append(ollamaEngineChatModels, llamaRunnerChatModels...)
}
for _, model := range chatModels {
for _, model := range testModels(chatModels) {
t.Run(model, func(t *testing.T) {
if time.Now().Sub(started) > softTimeout {
t.Skip("skipping remaining tests to avoid excessive runtime")
}
if err := PullIfMissing(ctx, client, model); err != nil {
t.Fatalf("pull failed %s", err)
}
pullOrSkip(ctx, t, client, model)
if maxVram > 0 {
resp, err := client.List(ctx)
if err != nil {
@@ -133,14 +131,15 @@ func TestModelsEmbed(t *testing.T) {
t.Fatalf("failed to load test data: %s", err)
}
for model, expected := range testCase {
if testModel != "" && model != testModel {
continue
}
t.Run(model, func(t *testing.T) {
if time.Now().Sub(started) > softTimeout {
t.Skip("skipping remaining tests to avoid excessive runtime")
}
if err := PullIfMissing(ctx, client, model); err != nil {
t.Fatalf("pull failed %s", err)
}
pullOrSkip(ctx, t, client, model)
if maxVram > 0 {
resp, err := client.List(ctx)
if err != nil {

View File

@@ -87,9 +87,7 @@ func doModelPerfTest(t *testing.T, chatModels []string) {
if time.Now().Sub(started) > softTimeout {
t.Skip("skipping remaining tests to avoid excessive runtime")
}
if err := PullIfMissing(ctx, client, model); err != nil {
t.Fatalf("pull failed %s", err)
}
pullOrSkip(ctx, t, client, model)
var maxContext int
resp, err := client.Show(ctx, &api.ShowRequest{Model: model})

View File

@@ -33,9 +33,7 @@ func TestQuantization(t *testing.T) {
defer cleanup()
for _, base := range sourceModels {
if err := PullIfMissing(ctx, client, base); err != nil {
t.Fatalf("pull failed %s", err)
}
pullOrSkip(ctx, t, client, base)
for _, quant := range quantizations {
newName := fmt.Sprintf("%s__%s", base, quant)
t.Run(newName, func(t *testing.T) {

View File

@@ -0,0 +1,523 @@
//go:build integration
package integration
import (
"context"
"encoding/json"
"fmt"
"os"
"strconv"
"strings"
"testing"
"time"
"github.com/ollama/ollama/api"
)
// TestAPIToolCallingStress tests tool calling with complex, agent-style prompts
// that include large system messages, multiple tools, and multi-turn conversations.
// This catches cache corruption and parser bugs that simple tool tests miss.
func TestAPIToolCallingStress(t *testing.T) {
initialTimeout := 120 * time.Second
streamTimeout := 120 * time.Second
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
minVRAM := map[string]uint64{
"qwen3-vl": 16,
"gpt-oss:20b": 16,
"gpt-oss:120b": 70,
"qwen3": 6,
"llama3.1": 8,
"llama3.2": 4,
"mistral": 6,
"qwen2.5": 6,
"qwen2": 6,
"ministral-3": 20,
"mistral-nemo": 9,
"mistral-small": 16,
"mixtral:8x22b": 80,
"qwq": 20,
"granite3.3": 7,
}
// Models that don't reliably produce tool calls with complex/multi-tool prompts.
// The stress test uses a large system prompt with many tools, simulating coding agents.
// Some models are too small, too slow, or not designed for this use case.
skipModels := map[string]string{
"lfm2.5-thinking": "returns text instead of tool calls with complex system prompts",
"qwen3-vl": "vision model, extremely slow with complex tool prompts",
"llama3.2": "3B model too small for reliable multi-tool agent prompts",
"mistral": "7B v0.3 returns text instead of tool calls with complex prompts",
"mixtral:8x22b": "returns text instead of tool calls with complex prompts",
"qwen2": "returns text instead of tool calls with complex prompts",
"granite3.3": "returns text instead of tool calls with complex prompts",
}
models := testModels(libraryToolsModels)
for _, model := range models {
t.Run(model, func(t *testing.T) {
// Skip known-bad models unless explicitly requested via env var
if reason, ok := skipModels[model]; ok && testModel == "" {
t.Skipf("skipping: %s", reason)
}
if testModel != "" {
requireCapability(ctx, t, client, model, "tools")
}
if v, ok := minVRAM[model]; ok {
skipUnderMinVRAM(t, v)
}
pullOrSkip(ctx, t, client, model)
tools := stressTestTools()
// Large system prompt that mimics real coding agents (opencode, Claude Code, etc.)
// This is intentionally very long (~5000+ tokens) to match the prompt sizes that
// real coding agents send. The combination of a large system prompt, many tools,
// and thinking mode is what triggers failures in some models.
systemPrompt := stressTestSystemPrompt()
// Test 1: First request (fresh prompt processing)
// Use a direct prompt that tells the model exactly what tool to use,
// reducing the chance it asks for clarification instead.
t.Run("first_request", func(t *testing.T) {
testToolCall(t, ctx, client, model, systemPrompt, tools,
"Run git diff main to review the code changes on the current branch.",
initialTimeout, streamTimeout)
})
// Test 2: Repeat with same prompt (tests cache reuse)
t.Run("cached_request", func(t *testing.T) {
testToolCall(t, ctx, client, model, systemPrompt, tools,
"Run git diff main to review the code changes on the current branch.",
initialTimeout, streamTimeout)
})
// Test 3: Different user message (partial cache hit)
t.Run("different_user_message", func(t *testing.T) {
testToolCall(t, ctx, client, model, systemPrompt, tools,
"Read the file at ./go.mod and tell me what dependencies we have.",
initialTimeout, streamTimeout)
})
// Test 4: Multi-turn with tool response
t.Run("multi_turn", func(t *testing.T) {
testToolCallMultiTurn(t, ctx, client, model, systemPrompt, tools,
initialTimeout, streamTimeout)
})
})
}
}
func newTool(name, description string, required []string, props map[string]api.ToolProperty) api.Tool {
return api.Tool{
Type: "function",
Function: api.ToolFunction{
Name: name,
Description: description,
Parameters: api.ToolFunctionParameters{
Type: "object",
Required: required,
Properties: testPropsMap(props),
},
},
}
}
// stressTestTools returns a set of tools matching the scale and verbosity of
// real coding agent tool definitions (opencode, Claude Code, etc.). The tool
// descriptions are intentionally verbose to match real-world prompt sizes.
func stressTestTools() []api.Tool {
return []api.Tool{
newTool("bash", "Executes a given bash command in a persistent shell session with optional timeout, ensuring proper handling and security measures. All commands run in the working directory by default. Before executing the command, verify that the parent directory exists. Always quote file paths that contain spaces with double quotes. After ensuring proper quoting, execute the command and capture the output. Avoid using bash with find, grep, cat, head, tail, sed, awk, or echo commands unless explicitly instructed. Instead, always prefer using the dedicated tools for these commands. When issuing multiple commands, if they are independent and can run in parallel, make multiple tool calls in a single message.",
[]string{"command"},
map[string]api.ToolProperty{
"command": {Type: api.PropertyType{"string"}, Description: "The bash command to execute"},
"description": {Type: api.PropertyType{"string"}, Description: "Short description of what this command does in 5-10 words"},
"timeout": {Type: api.PropertyType{"number"}, Description: "Optional timeout in milliseconds. If not specified, commands will time out after 120000ms (2 minutes)"},
}),
newTool("read", "Read a file or directory from the local filesystem. If the path does not exist, an error is returned. By default, this tool returns up to 2000 lines from the start of the file. The offset parameter is the line number to start from (1-indexed). To read later sections, call this tool again with a larger offset. Use the grep tool to find specific content in large files or files with long lines. If you are unsure of the correct file path, use the glob tool to look up filenames by glob pattern. Contents are returned with each line prefixed by its line number. Any line longer than 2000 characters is truncated. Call this tool in parallel when you know there are multiple files you want to read. Avoid tiny repeated slices (30 line chunks). If you need more context, read a larger window. This tool can read image files and PDFs and return them as file attachments.",
[]string{"path"},
map[string]api.ToolProperty{
"path": {Type: api.PropertyType{"string"}, Description: "The absolute path to the file to read"},
"offset": {Type: api.PropertyType{"number"}, Description: "Line number to start reading from (1-indexed)"},
"limit": {Type: api.PropertyType{"number"}, Description: "Maximum number of lines to read"},
}),
newTool("glob", "Fast file pattern matching tool that works with any codebase size. Supports glob patterns like '**/*.js' or 'src/**/*.ts'. Returns matching file paths sorted by modification time. Use this tool when you need to find files by name patterns. When you are doing an open-ended search that may require multiple rounds of globbing and grepping, use the task tool instead. You have the capability to call multiple tools in a single response. It is always better to speculatively perform multiple searches as a batch that are potentially useful.",
[]string{"pattern"},
map[string]api.ToolProperty{
"pattern": {Type: api.PropertyType{"string"}, Description: "The glob pattern to match files against"},
"path": {Type: api.PropertyType{"string"}, Description: "The directory to search in"},
}),
newTool("grep", "Fast content search tool that works with any codebase size. Searches file contents using regular expressions. Supports full regex syntax (eg. 'log.*Error', 'function\\s+\\w+'). Filter files by pattern with the include parameter (eg. '*.js', '*.{ts,tsx}'). Returns file paths and line numbers with at least one match sorted by modification time. Use this tool when you need to find files containing specific patterns. If you need to identify or count the number of matches within files, use the bash tool with rg (ripgrep) directly. When you are doing an open-ended search that may require multiple rounds of globbing and grepping, use the task tool instead.",
[]string{"pattern"},
map[string]api.ToolProperty{
"pattern": {Type: api.PropertyType{"string"}, Description: "The regex pattern to search for in file contents"},
"path": {Type: api.PropertyType{"string"}, Description: "The directory to search in"},
"include": {Type: api.PropertyType{"string"}, Description: "File pattern to include (eg. '*.js', '*.{ts,tsx}')"},
}),
newTool("edit", "Performs exact string replacements in files. You must use your read tool at least once in the conversation before editing. This tool will error if you attempt an edit without reading the file. When editing text from read tool output, ensure you preserve the exact indentation (tabs/spaces) as it appears after the line number prefix. Always prefer editing existing files in the codebase. Never write new files unless explicitly required. Only use emojis if the user explicitly requests it. The edit will fail if oldString is not found in the file. The edit will fail if oldString is found multiple times in the file. Use replaceAll for replacing and renaming strings across the file.",
[]string{"path", "old_string", "new_string"},
map[string]api.ToolProperty{
"path": {Type: api.PropertyType{"string"}, Description: "The absolute path to the file to modify"},
"old_string": {Type: api.PropertyType{"string"}, Description: "The text to replace (must be unique in the file)"},
"new_string": {Type: api.PropertyType{"string"}, Description: "The replacement text"},
}),
newTool("write", "Writes a file to the local filesystem. This tool will overwrite the existing file if there is one at the provided path. If this is an existing file, you must use the read tool first to read the file contents. This tool will fail if you did not read the file first. Always prefer editing existing files in the codebase. Never write new files unless explicitly required. Never proactively create documentation files or README files. Only create documentation files if explicitly requested by the user.",
[]string{"path", "content"},
map[string]api.ToolProperty{
"path": {Type: api.PropertyType{"string"}, Description: "The absolute path to the file to write"},
"content": {Type: api.PropertyType{"string"}, Description: "The content to write to the file"},
}),
newTool("question", "Use this tool when you need to ask the user questions during execution. This allows you to gather user preferences or requirements, clarify ambiguous instructions, get decisions on implementation choices as you work, and offer choices to the user about what direction to take. When custom is enabled (default), a 'Type your own answer' option is added automatically. Answers are returned as arrays of labels. Set multiple to true to allow selecting more than one answer. If you recommend a specific option, make that the first option in the list and add '(Recommended)' at the end of the label.",
[]string{"questions"},
map[string]api.ToolProperty{
"questions": {Type: api.PropertyType{"string"}, Description: "The question to ask the user"},
}),
newTool("task", "Launch a new agent to handle complex, multistep tasks autonomously. Available agent types: general (general-purpose agent for researching complex questions and executing multi-step tasks, use this to execute multiple units of work in parallel) and explore (fast agent specialized for exploring codebases, use this when you need to quickly find files by patterns, search code for keywords, or answer questions about the codebase). Launch multiple agents concurrently whenever possible to maximize performance. When the agent is done, it will return a single message back to you. Each agent invocation starts with a fresh context unless you provide task_id to resume the same subagent session.",
[]string{"description", "prompt", "subagent_type"},
map[string]api.ToolProperty{
"description": {Type: api.PropertyType{"string"}, Description: "A short (3-5 word) description of the task"},
"prompt": {Type: api.PropertyType{"string"}, Description: "The task for the agent to perform"},
"subagent_type": {Type: api.PropertyType{"string"}, Description: "The type of specialized agent to use (general or explore)"},
}),
newTool("webfetch", "Fetches content from a specified URL. Takes a URL and optional format as input. Fetches the URL content, converts to requested format (markdown by default). Returns the content in the specified format. Use this tool when you need to retrieve and analyze web content. The URL must be a fully-formed valid URL. HTTP URLs will be automatically upgraded to HTTPS. Format options: markdown (default), text, or html. This tool is read-only and does not modify any files. Results may be summarized if the content is very large.",
[]string{"url", "format"},
map[string]api.ToolProperty{
"url": {Type: api.PropertyType{"string"}, Description: "The URL to fetch content from"},
"format": {Type: api.PropertyType{"string"}, Description: "Output format: markdown (default), text, or html"},
}),
newTool("todowrite", "Use this tool to create and manage a structured task list for your current coding session. This helps you track progress, organize complex tasks, and demonstrate thoroughness to the user. Use this tool proactively when handling complex multistep tasks, non-trivial and complex tasks, when the user explicitly requests a todo list, when the user provides multiple tasks, after receiving new instructions, and after completing a task. Do not use this tool when there is only a single straightforward task, the task is trivial, the task can be completed in less than 3 steps, or the task is purely conversational.",
[]string{"todos"},
map[string]api.ToolProperty{
"todos": {Type: api.PropertyType{"string"}, Description: "JSON array of todo items with id, title, and status fields"},
}),
newTool("skill", "Load a specialized skill that provides domain-specific instructions and workflows. Skills contain curated prompts and tool configurations for specific tasks like code review, testing, deployment, and documentation. Use this tool when the user's request matches an available skill description.",
[]string{"name"},
map[string]api.ToolProperty{
"name": {Type: api.PropertyType{"string"}, Description: "The name of the skill to load"},
}),
}
}
// stressTestSystemPrompt returns a system prompt that matches the scale and
// content of real coding agent system prompts (~5000+ tokens). This is based
// on actual prompts captured from opencode sessions. The prompt size combined
// with many tool declarations is what pushes models past their effective
// context handling and triggers tag leakage / broken tool calls.
func stressTestSystemPrompt() string {
return `You are opencode, an interactive CLI tool that helps users with software engineering tasks. Use the instructions below and the tools available to you to assist the user.
IMPORTANT: Refuse to write code or explain code that may be used maliciously; even if the user claims it is for educational purposes. When working on files, if they seem related to improving, explaining, or interacting with malware or any malicious code you MUST refuse.
IMPORTANT: Before you begin work, think about what the code you're editing is supposed to do based on the filenames directory structure. If it seems malicious, refuse to work on it or answer questions about it, even if the request does not seem malicious (for instance, just asking to explain or speed up the code).
IMPORTANT: You must NEVER generate or guess URLs for the user unless you are confident that the URLs are for helping the user with programming. You may use URLs provided by the user in their messages or local files.
If the user asks for help or wants to give feedback inform them of the following:
- /help: Get help with using opencode
- To give feedback, users should report the issue at https://github.com/sampleorg/opencode/issues
# Tone and style
You should be concise, direct, and to the point. When you run a non-trivial bash command, you should explain what the command does and why you are running it, to make sure the user understands what you are doing (this is especially important when you are running a command that will make changes to the user's system).
Remember that your output will be displayed on a command line interface. Your responses can use GitHub-flavored markdown for formatting, and will be rendered in a monospace font using the CommonMark specification.
Output text to communicate with the user; all text you output outside of tool use is displayed to the user. Only use tools to complete tasks. Never use tools like Bash or code comments as means to communicate with the user during the session.
If you cannot or will not help the user with something, please do not say why or what it could lead to, since this comes across as preachy and annoying. Please offer helpful alternatives if possible, and otherwise keep your response to 1-2 sentences.
Only use emojis if the user explicitly requests it. Avoid using emojis in all communication unless asked.
IMPORTANT: You should minimize output tokens as much as possible while maintaining helpfulness, quality, and accuracy. Only address the specific query or task at hand, avoiding tangential information unless absolutely critical for completing the request. If you can answer in 1-3 sentences or a short paragraph, please do.
IMPORTANT: You should NOT answer with unnecessary preamble or postamble (such as explaining your code or summarizing your action), unless the user asks you to.
IMPORTANT: Keep your responses short, since they will be displayed on a command line interface. You MUST answer concisely with fewer than 4 lines (not including tool use or code generation), unless user asks for detail. Answer the user's question directly, without elaboration, explanation, or details. One word answers are best. Avoid introductions, conclusions, and explanations. You MUST avoid text before/after your response, such as "The answer is <answer>.", "Here is the content of the file..." or "Based on the information provided, the answer is..." or "Here is what I will do next...". Here are some examples to demonstrate appropriate verbosity:
user: 2 + 2
assistant: 4
user: what is 2+2?
assistant: 4
user: is 11 a prime number?
assistant: Yes
user: what command should I run to list files in the current directory?
assistant: ls
user: what command should I run to watch files in the current directory?
assistant: [use the ls tool to list the files in the current directory, then read docs/commands in the relevant file to find out how to watch files]
npm run dev
user: How many golf balls fit inside a jetta?
assistant: 150000
user: what files are in the directory src/?
assistant: [runs ls and sees foo.c, bar.c, baz.c]
user: which file contains the implementation of foo?
assistant: src/foo.c
user: write tests for new feature
assistant: [uses grep and glob search tools to find where similar tests are defined, uses concurrent read file tool use blocks in one tool call to read relevant files at the same time, uses edit file tool to write new tests]
# Proactiveness
You are allowed to be proactive, but only when the user asks you to do something. You should strive to strike a balance between:
1. Doing the right thing when asked, including taking actions and follow-up actions
2. Not surprising the user with actions you take without asking
For example, if the user asks you how to approach something, you should do your best to answer their question first, and not immediately jump into taking actions.
3. Do not add additional code explanation summary unless requested by the user. After working on a file, just stop, rather than providing an explanation of what you did.
# Following conventions
When making changes to files, first understand the file's code conventions. Mimic code style, use existing libraries and utilities, and follow existing patterns.
- NEVER assume that a given library is available, even if it is well known. Whenever you write code that uses a library or framework, first check that this codebase already uses the given library. For example, you might look at neighboring files, or check the package.json (or cargo.toml, and so on depending on the language).
- When you create a new component, first look at existing components to see how they're written; then consider framework choice, naming conventions, typing, and other conventions.
- When you edit a piece of code, first look at the code's surrounding context (especially its imports) to understand the code's choice of frameworks and libraries. Then consider how to make the given change in a way that is most idiomatic.
- Always follow security best practices. Never introduce code that exposes or logs secrets and keys. Never commit secrets or keys to the repository.
# Code style
- IMPORTANT: DO NOT ADD ANY COMMENTS unless asked
# Doing tasks
The user will primarily request you perform software engineering tasks. This includes solving bugs, adding new functionality, refactoring code, explaining code, and more. For these tasks the following steps are recommended:
- Use the available search tools to understand the codebase and the user's query. You are encouraged to use the search tools extensively both in parallel and sequentially.
- Implement the solution using all tools available to you
- Verify the solution if possible with tests. NEVER assume specific test framework or test script. Check the README or search codebase to determine the testing approach.
- VERY IMPORTANT: When you have completed a task, you MUST run the lint and typecheck commands (e.g. npm run lint, npm run typecheck, ruff, etc.) with Bash if they were provided to you to ensure your code is correct. If you are unable to find the correct command, ask the user for the command to run and if they supply it, proactively suggest writing it to AGENTS.md so that you will know to run it next time.
NEVER commit changes unless the user explicitly asks you to. It is VERY IMPORTANT to only commit when explicitly asked, otherwise the user will feel that you are being too proactive.
# Tool usage policy
- When doing file search, prefer to use the Task tool in order to reduce context usage.
- You have the capability to call multiple tools in a single response. When multiple independent pieces of information are requested, batch your tool calls together for optimal performance. When making multiple bash tool calls, you MUST send a single message with multiple tools calls to run the calls in parallel.
You MUST answer concisely with fewer than 4 lines of text (not including tool use or code generation), unless user asks for detail.
# Code References
When referencing specific functions or pieces of code include the pattern file_path:line_number to allow the user to easily navigate to the source code location.
# Git workflow
When working with git:
- Create descriptive commit messages that explain WHY not just WHAT
- Use conventional commit format: feat:, fix:, refactor:, docs:, test:, chore:
- Check git status before and after operations
- Never force push to main/master
- Review diffs before committing
- NEVER update the git config
- NEVER run destructive/irreversible git commands unless the user explicitly requests them
- NEVER skip hooks (--no-verify, --no-gpg-sign, etc) unless the user explicitly requests it
- Avoid git commit --amend unless explicitly requested by the user
- NEVER commit changes unless the user explicitly asks you to
# Safety
- Never delete files without confirmation
- Never run destructive commands (rm -rf, DROP TABLE, etc.) without confirmation
- Always validate inputs before using them in shell commands
- Be careful with environment variables and secrets
- Do not expose API keys, passwords, or tokens in code or logs
# Environment
Working directory: /Users/test/code/myproject
Platform: darwin
Shell: zsh
Is directory a git repo: yes
The project uses Go 1.22 with modules. Run tests with 'go test ./...' and build with 'go build ./...'.
The CI pipeline runs golangci-lint, go vet, and go test with race detector enabled.
# User instructions
Never use cd to change into the repo root or any other directory in Bash commands. The working directory is always the repo root — use relative paths directly.
Never use heredoc-style inline bash or python scripts in Bash tool calls. Instead, write the script to an ephemeral file under ./.tmp/ in the repo, then run it as a separate command.`
}
// validStressTools is the set of tool names used in the stress test.
var validStressTools = map[string]bool{
"bash": true, "read": true, "glob": true, "grep": true,
"edit": true, "write": true, "question": true, "task": true,
"webfetch": true, "todowrite": true, "skill": true,
}
func testToolCall(t *testing.T, ctx context.Context, client *api.Client, model, systemPrompt string, tools []api.Tool, userMessage string, initialTimeout, streamTimeout time.Duration) {
t.Helper()
req := api.ChatRequest{
Model: model,
Messages: []api.Message{
{Role: "system", Content: systemPrompt},
{Role: "user", Content: userMessage},
},
Tools: tools,
Options: map[string]any{
"temperature": 0,
"num_ctx": contextLength(16384),
},
}
stallTimer := time.NewTimer(initialTimeout)
var gotToolCall bool
var lastToolCall api.ToolCall
var allContent string
fn := func(response api.ChatResponse) error {
if len(response.Message.ToolCalls) > 0 {
gotToolCall = true
lastToolCall = response.Message.ToolCalls[len(response.Message.ToolCalls)-1]
}
allContent += response.Message.Content
if !stallTimer.Reset(streamTimeout) {
return fmt.Errorf("stall detected while streaming")
}
return nil
}
stream := true
req.Stream = &stream
done := make(chan int)
var genErr error
go func() {
genErr = client.Chat(ctx, &req, fn)
done <- 0
}()
select {
case <-stallTimer.C:
t.Fatalf("chat stalled after %s", initialTimeout)
case <-done:
if genErr != nil {
t.Fatalf("chat failed: %v", genErr)
}
// Check for leaked special tags in content — these should never
// appear in user-visible output regardless of model quality.
checkNoLeakedTags(t, allContent)
// The model must produce either a tool call or a text response.
// A text response (e.g. asking for clarification) is legitimate.
// Empty output with no tool call indicates a parser or model failure
// (e.g. malformed tool call that gets dropped).
if !gotToolCall && allContent == "" {
t.Fatal("model produced neither a tool call nor text content")
}
if gotToolCall {
if !validStressTools[lastToolCall.Function.Name] {
t.Errorf("unexpected tool: %q", lastToolCall.Function.Name)
}
argsJSON, _ := json.Marshal(lastToolCall.Function.Arguments)
t.Logf("tool call: %s(%s)", lastToolCall.Function.Name, string(argsJSON))
} else {
t.Logf("text response (no tool call): %q", truncate(allContent, 200))
}
case <-ctx.Done():
t.Fatal("context cancelled")
}
}
func testToolCallMultiTurn(t *testing.T, ctx context.Context, client *api.Client, model, systemPrompt string, tools []api.Tool, initialTimeout, streamTimeout time.Duration) {
t.Helper()
req := api.ChatRequest{
Model: model,
Messages: []api.Message{
{Role: "system", Content: systemPrompt},
{Role: "user", Content: "What files are in the current directory?"},
{Role: "assistant", Content: "", ToolCalls: []api.ToolCall{{
Function: api.ToolCallFunction{
Name: "bash",
Arguments: api.ToolCallFunctionArguments{},
},
}}},
{Role: "tool", Content: "go.mod\ngo.sum\nmain.go\nREADME.md\n"},
// The model should now respond with content or another tool call
},
Tools: tools,
Options: map[string]any{
"temperature": 0,
"num_ctx": contextLength(16384),
},
}
// For the tool response arguments, set the command
req.Messages[2].ToolCalls[0].Function.Arguments.Set("command", "ls")
stallTimer := time.NewTimer(initialTimeout)
var gotResponse bool
var allContent string
var gotToolCall bool
fn := func(response api.ChatResponse) error {
if response.Message.Content != "" {
gotResponse = true
allContent += response.Message.Content
}
if len(response.Message.ToolCalls) > 0 {
gotToolCall = true
gotResponse = true
}
if !stallTimer.Reset(streamTimeout) {
return fmt.Errorf("stall detected")
}
return nil
}
stream := true
req.Stream = &stream
done := make(chan int)
var genErr error
go func() {
genErr = client.Chat(ctx, &req, fn)
done <- 0
}()
select {
case <-stallTimer.C:
t.Fatalf("chat stalled after %s", initialTimeout)
case <-done:
if genErr != nil {
t.Fatalf("chat failed: %v", genErr)
}
checkNoLeakedTags(t, allContent)
if !gotResponse {
t.Fatal("expected response (content or tool call), got nothing")
}
if gotToolCall {
t.Log("multi-turn: got follow-up tool call")
} else {
t.Logf("multi-turn: got content response: %q", truncate(allContent, 200))
}
case <-ctx.Done():
t.Fatal("context cancelled")
}
}
// checkNoLeakedTags verifies that model-internal special tags do not appear in
// user-visible content. These tags should be consumed by the parser and never
// passed through. If they appear, either the parser has a bug or the model is
// generating malformed output that the parser fails to handle.
func checkNoLeakedTags(t *testing.T, content string) {
t.Helper()
leakedTags := []string{
"<|channel>", "<channel|>",
"<|tool_call>", "<tool_call|>",
"<|tool>", "<tool|>",
"<|turn>", "<turn|>",
}
for _, tag := range leakedTags {
if strings.Contains(content, tag) {
t.Errorf("leaked special tag %q in content: %q", tag, truncate(content, 300))
}
}
}
func contextLength(defaultVal int) int {
if s := os.Getenv("OLLAMA_CONTEXT_LENGTH"); s != "" {
if n, err := strconv.Atoi(s); err == nil {
return n
}
}
return defaultVal
}
func truncate(s string, n int) string {
if len(s) <= n {
return s
}
return s[:n] + "..."
}

View File

@@ -47,15 +47,18 @@ func TestAPIToolCalling(t *testing.T) {
"granite3.3": 7,
}
for _, model := range libraryToolsModels {
models := testModels(libraryToolsModels)
for _, model := range models {
t.Run(model, func(t *testing.T) {
if testModel != "" {
requireCapability(ctx, t, client, model, "tools")
}
if v, ok := minVRAM[model]; ok {
skipUnderMinVRAM(t, v)
}
if err := PullIfMissing(ctx, client, model); err != nil {
t.Fatalf("pull failed %s", err)
}
pullOrSkip(ctx, t, client, model)
tools := []api.Tool{
{

View File

@@ -18,6 +18,7 @@ import (
"os/exec"
"path/filepath"
"runtime"
"slices"
"strconv"
"strings"
"sync"
@@ -26,11 +27,17 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/types/model"
)
var (
smol = "llama3.2:1b"
stream = false
// testModel is set via OLLAMA_TEST_MODEL env var. When set, all tests
// that loop over model lists will test only this model, and smol is
// also overridden to use it.
testModel string
)
var (
@@ -288,23 +295,60 @@ var (
rainbowPrompt = "how do rainbows form? Be brief but factual in your reply"
rainbowFollowups = []string{
"Explain the physics involved in them. Be breif in your reply",
"Explain the chemistry involved in them. Be breif in your reply",
"Explain the physics involved in them. Be brief in your reply",
"Explain the chemistry involved in them. Be brief in your reply",
"What are common myths related to them? Be brief in your reply",
"Can they form if there is no rain? Be breif in your reply",
"Can they form if there are no clouds? Be breif in your reply",
"Can they form if there is no rain? Be brief in your reply",
"Can they form if there are no clouds? Be brief in your reply",
"Do they happen on other planets? Be brief in your reply",
}
rainbowExpected = []string{"water", "droplet", "mist", "glow", "refract", "reflect", "scatter", "particles", "wave", "color", "spectrum", "raindrop", "atmosphere", "frequency", "shower", "sky", "shimmer", "light", "storm", "sunny", "sunburst", "phenomenon", "mars", "venus", "jupiter"}
rainbowExpected = []string{"water", "droplet", "mist", "glow", "refract", "reflect", "scatter", "particles", "wave", "color", "spectrum", "raindrop", "atmosphere", "frequency", "shower", "sky", "shimmer", "light", "storm", "sunny", "sunburst", "phenomenon", "mars", "venus", "jupiter", "rain", "sun", "rainbow", "optical", "gold", "cloud", "planet", "prism", "fog", "ice"}
)
func init() {
logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug}))
slog.SetDefault(logger)
custom := os.Getenv("OLLAMA_TEST_DEFAULT_MODEL")
if custom != "" {
slog.Info("setting default test model to " + custom)
smol = custom
testModel = os.Getenv("OLLAMA_TEST_MODEL")
if testModel != "" {
slog.Info("test model override", "model", testModel)
smol = testModel
}
}
// testModels returns the override model as a single-element slice when
// OLLAMA_TEST_MODEL is set, otherwise returns the provided default list.
func testModels(defaults []string) []string {
if testModel != "" {
return []string{testModel}
}
return defaults
}
// requireCapability skips the test if the model does not advertise the
// given capability. It queries the server via Show and caches nothing —
// call it once per subtest. For local-only models where Show may not
// return capabilities (e.g. models created via ollama create), this is
// a best-effort check.
func requireCapability(ctx context.Context, t *testing.T, client *api.Client, modelName string, cap model.Capability) {
t.Helper()
resp, err := client.Show(ctx, &api.ShowRequest{Name: modelName})
if err != nil {
t.Fatalf("failed to show model %s: %v", modelName, err)
}
if len(resp.Capabilities) > 0 && !slices.Contains(resp.Capabilities, cap) {
t.Skipf("model %s does not have capability %q (has %v)", modelName, cap, resp.Capabilities)
}
}
// pullOrSkip pulls a model if it isn't already present locally. If the
// pull fails (e.g. model not in registry), the test is skipped instead
// of failed. PullIfMissing already checks Show first, so local-only
// models that exist will return immediately without hitting the registry.
func pullOrSkip(ctx context.Context, t *testing.T, client *api.Client, modelName string) {
t.Helper()
if err := PullIfMissing(ctx, client, modelName); err != nil {
t.Skipf("model %s not available: %v", modelName, err)
}
}
@@ -540,9 +584,7 @@ func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, strin
func ChatTestHelper(ctx context.Context, t *testing.T, req api.ChatRequest, anyResp []string) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatal(err)
}
pullOrSkip(ctx, t, client, req.Model)
DoChat(ctx, t, client, req, anyResp, 30*time.Second, 10*time.Second)
}

349
integration/vision_test.go Normal file
View File

@@ -0,0 +1,349 @@
//go:build integration
package integration
import (
"context"
"encoding/base64"
"testing"
"time"
"github.com/ollama/ollama/api"
)
// Default set of vision models to test. When OLLAMA_TEST_MODEL is set,
// only that model is tested (with a capability check for vision).
var defaultVisionModels = []string{
"gemma3",
"llama3.2-vision",
"qwen2.5vl",
"qwen3-vl:8b",
}
// decodeTestImages returns the two test images (Abbey Road llamas, docs llamas).
func decodeTestImages(t *testing.T) (abbeyRoad, docs api.ImageData) {
t.Helper()
var err error
abbeyRoad, err = base64.StdEncoding.DecodeString(imageEncoding)
if err != nil {
t.Fatalf("decode abbey road image: %v", err)
}
docs, err = base64.StdEncoding.DecodeString(imageEncodingDocs)
if err != nil {
t.Fatalf("decode docs image: %v", err)
}
return
}
// setupVisionModel pulls the model, preloads it, and skips if not GPU-loaded.
func setupVisionModel(ctx context.Context, t *testing.T, client *api.Client, model string) {
t.Helper()
if testModel != "" {
requireCapability(ctx, t, client, model, "vision")
}
pullOrSkip(ctx, t, client, model)
err := client.Generate(ctx, &api.GenerateRequest{Model: model}, func(response api.GenerateResponse) error { return nil })
if err != nil {
t.Fatalf("failed to load model %s: %s", model, err)
}
skipIfNotGPULoaded(ctx, t, client, model, 80)
}
// TestVisionMultiTurn sends an image, gets a response, then asks follow-up
// questions about the same image. This verifies that the KV cache correctly
// handles cached image tokens across turns.
func TestVisionMultiTurn(t *testing.T) {
skipUnderMinVRAM(t, 6)
// Models that fail on multi-turn detail questions (e.g. misidentifying objects).
skipModels := map[string]string{
"gemma3": "misidentifies briefcase as smartphone on turn 3",
"llama3.2-vision": "miscounts animals (says 3 instead of 4) on turn 2",
}
for _, model := range testModels(defaultVisionModels) {
t.Run(model, func(t *testing.T) {
if reason, ok := skipModels[model]; ok && testModel == "" {
t.Skipf("skipping: %s", reason)
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
setupVisionModel(ctx, t, client, model)
abbeyRoad, _ := decodeTestImages(t)
// Turn 1: describe the image
req := api.ChatRequest{
Model: model,
Messages: []api.Message{
{
Role: "user",
Content: "Describe this image briefly.",
Images: []api.ImageData{abbeyRoad},
},
},
Stream: &stream,
Options: map[string]any{"temperature": 0.0, "seed": 42},
}
resp1 := DoChat(ctx, t, client, req, []string{
"llama", "cross", "walk", "road", "animal", "cartoon",
}, 120*time.Second, 30*time.Second)
if resp1 == nil {
t.Fatal("no response from turn 1")
}
// Turn 2: follow-up about count
req.Messages = append(req.Messages,
*resp1,
api.Message{Role: "user", Content: "How many animals are in the image?"},
)
resp2 := DoChat(ctx, t, client, req, []string{
"four", "4",
}, 60*time.Second, 30*time.Second)
if resp2 == nil {
t.Fatal("no response from turn 2")
}
// Turn 3: follow-up about specific detail
req.Messages = append(req.Messages,
*resp2,
api.Message{Role: "user", Content: "Is any animal carrying something? What is it?"},
)
DoChat(ctx, t, client, req, []string{
"briefcase", "suitcase", "bag", "case", "luggage",
}, 60*time.Second, 30*time.Second)
})
}
}
// TestVisionObjectCounting asks the model to count objects in an image.
func TestVisionObjectCounting(t *testing.T) {
skipUnderMinVRAM(t, 6)
skipModels := map[string]string{
"llama3.2-vision": "consistently miscounts (says 3 instead of 4)",
}
for _, model := range testModels(defaultVisionModels) {
t.Run(model, func(t *testing.T) {
if reason, ok := skipModels[model]; ok && testModel == "" {
t.Skipf("skipping: %s", reason)
}
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
setupVisionModel(ctx, t, client, model)
_, docs := decodeTestImages(t)
req := api.ChatRequest{
Model: model,
Messages: []api.Message{
{
Role: "user",
Content: "How many animals are shown in this image? Answer with just the number.",
Images: []api.ImageData{docs},
},
},
Stream: &stream,
Options: map[string]any{"temperature": 0.0, "seed": 42},
}
DoChat(ctx, t, client, req, []string{"4", "four"}, 120*time.Second, 30*time.Second)
})
}
}
// TestVisionSceneUnderstanding tests whether the model can identify
// cultural references and scene context from an image.
func TestVisionSceneUnderstanding(t *testing.T) {
skipUnderMinVRAM(t, 6)
// Models known to be too small or not capable enough for cultural reference detection.
skipModels := map[string]string{
"llama3.2-vision": "3B model lacks cultural reference knowledge",
"minicpm-v": "too small for cultural reference detection",
}
for _, model := range testModels(defaultVisionModels) {
t.Run(model, func(t *testing.T) {
if reason, ok := skipModels[model]; ok && testModel == "" {
t.Skipf("skipping: %s", reason)
}
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
setupVisionModel(ctx, t, client, model)
abbeyRoad, _ := decodeTestImages(t)
req := api.ChatRequest{
Model: model,
Messages: []api.Message{
{
Role: "user",
Content: "What famous image or album cover is this a parody of?",
Images: []api.ImageData{abbeyRoad},
},
},
Stream: &stream,
Options: map[string]any{"temperature": 0.0, "seed": 42},
}
DoChat(ctx, t, client, req, []string{
"abbey road", "beatles", "abbey",
}, 120*time.Second, 30*time.Second)
})
}
}
// TestVisionSpatialReasoning tests the model's ability to identify
// objects based on their spatial position in the image.
func TestVisionSpatialReasoning(t *testing.T) {
skipUnderMinVRAM(t, 6)
for _, model := range testModels(defaultVisionModels) {
t.Run(model, func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
setupVisionModel(ctx, t, client, model)
_, docs := decodeTestImages(t)
// The docs image has: leftmost llama on laptop with glasses,
// rightmost llama sleeping.
req := api.ChatRequest{
Model: model,
Messages: []api.Message{
{
Role: "user",
Content: "What is the animal on the far left doing in this image?",
Images: []api.ImageData{docs},
},
},
Stream: &stream,
Options: map[string]any{"temperature": 0.0, "seed": 42},
}
DoChat(ctx, t, client, req, []string{
"laptop", "computer", "typing", "working",
}, 120*time.Second, 30*time.Second)
})
}
}
// TestVisionDetailRecognition tests whether the model can identify
// small details like accessories in an image.
func TestVisionDetailRecognition(t *testing.T) {
skipUnderMinVRAM(t, 6)
for _, model := range testModels(defaultVisionModels) {
t.Run(model, func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
setupVisionModel(ctx, t, client, model)
_, docs := decodeTestImages(t)
req := api.ChatRequest{
Model: model,
Messages: []api.Message{
{
Role: "user",
Content: "Are any of the animals wearing glasses? Describe what you see.",
Images: []api.ImageData{docs},
},
},
Stream: &stream,
Options: map[string]any{"temperature": 0.0, "seed": 42},
}
DoChat(ctx, t, client, req, []string{
"glasses", "spectacles", "eyeglasses",
}, 120*time.Second, 30*time.Second)
})
}
}
// TestVisionMultiImage sends two images in a single message and asks
// the model to compare and contrast them. This exercises multi-image
// encoding and cross-image reasoning.
func TestVisionMultiImage(t *testing.T) {
skipUnderMinVRAM(t, 6)
// Multi-image support varies across models.
skipModels := map[string]string{
"llama3.2-vision": "does not support multi-image input",
}
for _, model := range testModels(defaultVisionModels) {
t.Run(model, func(t *testing.T) {
if reason, ok := skipModels[model]; ok && testModel == "" {
t.Skipf("skipping: %s", reason)
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
setupVisionModel(ctx, t, client, model)
abbeyRoad, docs := decodeTestImages(t)
req := api.ChatRequest{
Model: model,
Messages: []api.Message{
{
Role: "user",
Content: "I'm showing you two images. What do they have in common, and how are they different?",
Images: []api.ImageData{abbeyRoad, docs},
},
},
Stream: &stream,
Options: map[string]any{"temperature": 0.0, "seed": 42},
}
// Both images feature cartoon llamas/alpacas — the model should
// note the common subject and the different settings.
DoChat(ctx, t, client, req, []string{
"llama", "alpaca", "animal", "cartoon",
}, 120*time.Second, 30*time.Second)
})
}
}
// TestVisionOCR tests text extraction from an image. The docs image
// contains the text "Ollama's documentation" in a header.
func TestVisionOCR(t *testing.T) {
skipUnderMinVRAM(t, 6)
for _, model := range testModels(defaultVisionModels) {
t.Run(model, func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
setupVisionModel(ctx, t, client, model)
_, docs := decodeTestImages(t)
req := api.ChatRequest{
Model: model,
Messages: []api.Message{
{
Role: "user",
Content: "What text appears in this image? Read all visible text.",
Images: []api.ImageData{docs},
},
},
Stream: &stream,
Options: map[string]any{"temperature": 0.0, "seed": 42},
}
DoChat(ctx, t, client, req, []string{
"ollama", "documentation",
}, 120*time.Second, 30*time.Second)
})
}
}

View File

@@ -0,0 +1,385 @@
//go:build integration
package integration
// imageEncodingDocs is a 400x250 PNG of four cartoon llamas at a desk.
// One is on a laptop wearing glasses, one writing, one reading, one sleeping.
// The header text reads "Ollama's documentation".
const imageEncodingDocs = `iVBORw0KGgoAAAANSUhEUgAAAZAAAAD6CAYAAACPpxFEAAAKtmlDQ1BJQ0MgUHJvZmlsZQAASImVlwdQk9kWx+/3pYeElhCKlNA70gkgJYQWQEE62AhJgEAI
MQUFO7K4ghUVEVQWdFVAwUYRO6LYFsWGfUEWEWVdLNhQeR8wBHffvPfmnZk75zfnO/fcc+98d+Z/ASCbcMRiIawKQKZIJokM8qPHJyTScS8BDDQBHlAAicOV
ipkREWEAsUn/d/twD0Bj/rbtWK1///5fTY3Hl3IBgCIQTuZJuZkIHwfI8lyxRAYACmFgvEgmHuP7CFMlSIMID45x6jijx+pQkyeYOp4THclC2AIAPInDkaQC
QHJG4vRsbipShxSNsL2IJxAhnI+wd2ZmFg/hNoQtkBwxwmP1Gck/1En9W81kRU0OJ1XBE3sZN7y/QCoWcnL+z+P435YplE+uYY4MUpokOBLxusi5/ZGRFapg
UfKs8EkW8MbzxzlNHhwzyVwpK3GSpcIo9iTzOP6hijrCWWGTnCIIVOQIZOzoSeZLA6ImWZIVqVg3RcJiTjJHMtWDPCNGEU/jsxX1c9Oi4yY5WxA7S9FbRlTo
VA5LEZfIIxV74YuC/KbWDVScQ6b0h70L2Iq5srToYMU5cKb654uYUzWl8YreeHz/gKmcGEW+WOanWEssjFDk84VBirg0O0oxV4b8nFNzIxRnmM4JiZhkEAVk
QA54QACyAB34I14KxEAIOCBHxl8sG9sQK0ucIxGkpsnoTOTW8elsEdfOhu5o7+gKwNgdnvhF3tHG7yZEuzoVW10NgNeJ0dHRk1OxkJsAHEkCgNgwFbOYB4Bq
PwCXT3HlkuyJ2PhdwwAiUAFUoA30gTGwALbAEbgCT+ALAkAICAfRIAHMB1yQBjKBBCwCS8EqUACKwCawDZSBCrAHHACHwFHQBE6B8+ASuAZugrvgEegGfeAV
GAIfwAgEQTiIDFEgbcgAMoWsIUeIAXlDAVAYFAklQElQKiSC5NBSaDVUBBVDZVAlVA0dgU5A56ErUCf0AOqBBqC30BcYBZNgKqwHm8HTYQbMhEPhaHgenAov
hHPhfHgDXApXwQfhRvg8fA2+C3fDr+BhFEApoWgoQ5QtioFiocJRiagUlAS1HFWIKkFVoepQLah21G1UN2oQ9RmNRVPQdLQt2hMdjI5Bc9EL0cvR69Bl6APo
RnQb+ja6Bz2E/o4hY3Qx1hgPDBsTj0nFLMIUYEow+zANmIuYu5g+zAcsFkvDmmPdsMHYBGw6dgl2HXYXth57DtuJ7cUO43A4bZw1zgsXjuPgZLgC3A7cQdxZ
3C1cH+4TXglvgHfEB+IT8SJ8Hr4EX4M/g7+F78ePEFQJpgQPQjiBR8ghbCTsJbQQbhD6CCNENaI50YsYTUwnriKWEuuIF4mPie+UlJSMlNyVZisJlFYqlSod
Vrqs1KP0maROsiKxSHNJctIG0n7SOdID0jsymWxG9iUnkmXkDeRq8gXyU/InZYqynTJbmae8QrlcuVH5lvJrFYKKqQpTZb5KrkqJyjGVGyqDqgRVM1WWKkd1
uWq56gnVLtVhNYqag1q4WqbaOrUatStqL9Rx6mbqAeo89Xz1PeoX1HspKIoxhUXhUlZT9lIuUvqoWKo5lU1NpxZRD1E7qEMa6hrOGrEaizXKNU5rdNNQNDMa
myakbaQdpd2jfdHU02Rq8jXXatZp3tL8qDVNy1eLr1WoVa91V+uLNl07QDtDe7N2k/YTHbSOlc5snUU6u3Uu6gxOo07znMadVjjt6LSHurCulW6k7hLdPbrX
dYf19PWC9MR6O/Qu6A3q0/R99dP1t+qf0R8woBh4GwgMthqcNXhJ16Az6UJ6Kb2NPmSoaxhsKDesNOwwHDEyN4oxyjOqN3piTDRmGKcYbzVuNR4yMTCZabLU
pNbkoSnBlGGaZrrdtN30o5m5WZzZGrMmsxfmWuZs81zzWvPHFmQLH4uFFlUWdyyxlgzLDMtdljetYCsXqzSrcqsb1rC1q7XAepd1pw3Gxt1GZFNl02VLsmXa
ZtvW2vbY0ezC7PLsmuxeTzeZnjh98/T26d/tXeyF9nvtHzmoO4Q45Dm0OLx1tHLkOpY73nEiOwU6rXBqdnrjbO3Md97tfN+F4jLTZY1Lq8s3VzdXiWud64Cb
iVuS2063LgaVEcFYx7jsjnH3c1/hfsr9s4erh8zjqMdfnraeGZ41ni9mmM/gz9g7o9fLyIvjVenV7U33TvL+xbvbx9CH41Pl88zX2Jfnu8+3n2nJTGceZL72
s/eT+DX4fWR5sJaxzvmj/IP8C/07AtQDYgLKAp4GGgWmBtYGDgW5BC0JOheMCQ4N3hzcxdZjc9nV7KEQt5BlIW2hpNCo0LLQZ2FWYZKwlpnwzJCZW2Y+nmU6
SzSrKRyEs8O3hD+JMI9YGHFyNnZ2xOzy2c8jHSKXRrZHUaIWRNVEfYj2i94Y/SjGIkYe0xqrEjs3tjr2Y5x/XHFcd/z0+GXx1xJ0EgQJzYm4xNjEfYnDcwLm
bJvTN9dlbsHce/PM5y2ed2W+znzh/NMLVBZwFhxLwiTFJdUkfeWEc6o4w8ns5J3JQ1wWdzv3Fc+Xt5U3wPfiF/P7U7xSilNepHqlbkkdSPNJK0kbFLAEZYI3
6cHpFekfM8Iz9meMCuOE9Zn4zKTMEyJ1UYaoLUs/a3FWp9haXCDuXuixcNvCIUmoZJ8Uks6TNsuoiFi6LreQ/yTvyfbOLs/+tCh20bHFaotFi6/nWOWszenP
Dcz9dQl6CXdJ61LDpauW9ixjLqtcDi1PXt66wnhF/oq+lUErD6wirspY9VuefV5x3vvVcatb8vXyV+b3/hT0U22BcoGkoGuN55qKn9E/C37uWOu0dsfa74W8
wqtF9kUlRV/XcdddXe+wvnT96IaUDR0bXTfu3oTdJNp0b7PP5gPFasW5xb1bZm5p3ErfWrj1/bYF266UOJdUbCdul2/vLg0rbd5hsmPTjq9laWV3y/3K63fq
7ly78+Mu3q5bu31311XoVRRVfPlF8Mv9yqDKxiqzqpI92D3Ze57vjd3b/ivj1+p9OvuK9n3bL9rffSDyQFu1W3V1jW7Nxlq4Vl47cHDuwZuH/A8119nWVdbT
6osOg8Pywy+PJB25dzT0aOsxxrG646bHdzZQGgobocacxqGmtKbu5oTmzhMhJ1pbPFsaTtqd3H/K8FT5aY3TG88Qz+SfGT2be3b4nPjc4PnU872tC1ofXYi/
cKdtdlvHxdCLly8FXrrQzmw/e9nr8qkrHldOXGVcbbrmeq3xusv1ht9cfmvocO1ovOF2o/mm+82WzhmdZ2753Dp/2//2pTvsO9fuzrrbeS/m3v2uuV3d93n3
XzwQPnjzMPvhyKOVjzGPC5+oPil5qvu06nfL3+u7XbtP9/j3XH8W9exRL7f31R/SP7725T8nPy/pN+ivfuH44tRA4MDNl3Ne9r0SvxoZLPhT7c+dry1eH//L
96/rQ/FDfW8kb0bfrnun/W7/e+f3rcMRw08/ZH4Y+Vj4SfvTgc+Mz+1f4r70jyz6ivta+s3yW8v30O+PRzNHR8UcCWdcCqCQAaekAPB2PwDkBAAoiIYgzpnQ
2OMGTbwLxgn8J57Q4eOGKJc6xI3JI9Y5AA4jw2wlACq+AIxJo2hfADs5KcakHh7X7mOGRV4xdR5d60kXntpUg3/ahK7/oe9/eqCo+jf/LwkHEGPG+ODYAAAA
imVYSWZNTQAqAAAACAAEARoABQAAAAEAAAA+ARsABQAAAAEAAABGASgAAwAAAAEAAgAAh2kABAAAAAEAAABOAAAAAAAAAJAAAAABAAAAkAAAAAEAA5KGAAcA
AAASAAAAeKACAAQAAAABAAABkKADAAQAAAABAAAA+gAAAABBU0NJSQAAAFNjcmVlbnNob3T1Q1G8AAAACXBIWXMAABYlAAAWJQFJUiTwAAACqGlUWHRYTUw6
Y29tLmFkb2JlLnhtcAAAAAAAPHg6eG1wbWV0YSB4bWxuczp4PSJhZG9iZTpuczptZXRhLyIgeDp4bXB0az0iWE1QIENvcmUgNi4wLjAiPgogICA8cmRmOlJE
RiB4bWxuczpyZGY9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkvMDIvMjItcmRmLXN5bnRheC1ucyMiPgogICAgICA8cmRmOkRlc2NyaXB0aW9uIHJkZjphYm91
dD0iIgogICAgICAgICAgICB4bWxuczp0aWZmPSJodHRwOi8vbnMuYWRvYmUuY29tL3RpZmYvMS4wLyIKICAgICAgICAgICAgeG1sbnM6ZXhpZj0iaHR0cDov
L25zLmFkb2JlLmNvbS9leGlmLzEuMC8iPgogICAgICAgICA8dGlmZjpZUmVzb2x1dGlvbj4xNDQ8L3RpZmY6WVJlc29sdXRpb24+CiAgICAgICAgIDx0aWZm
OlhSZXNvbHV0aW9uPjE0NDwvdGlmZjpYUmVzb2x1dGlvbj4KICAgICAgICAgPHRpZmY6UmVzb2x1dGlvblVuaXQ+MjwvdGlmZjpSZXNvbHV0aW9uVW5pdD4K
ICAgICAgICAgPGV4aWY6UGl4ZWxZRGltZW5zaW9uPjc0NjwvZXhpZjpQaXhlbFlEaW1lbnNpb24+CiAgICAgICAgIDxleGlmOlVzZXJDb21tZW50PlNjcmVl
bnNob3Q8L2V4aWY6VXNlckNvbW1lbnQ+CiAgICAgICAgIDxleGlmOlBpeGVsWERpbWVuc2lvbj4xMTk0PC9leGlmOlBpeGVsWERpbWVuc2lvbj4KICAgICAg
PC9yZGY6RGVzY3JpcHRpb24+CiAgIDwvcmRmOlJERj4KPC94OnhtcG1ldGE+Cts1PlUAAEAASURBVHgB7d0JvH3XeDfwHWKepxpiiHlMEGNMiaEorVkjLRKi
qHmoWSXkRVExV2poQlBSQhGqSkJFqBBDRYyJMeYppiLZ7/Nd8hz7nv8Z9j333HvPuXc9n8+9Z9p77bV+a3jG9aydmqb5ZfxVqghUBCoCFYGKwKoQ2Cmubld1
R724IlARqAhUBCoCgcDZKgoVgYpARaAiUBGYBYHKQGZBrd5TEagIVAQqAs3OFYOKQEWgIrCdENh5550bf2c729manXZixa/Utm1zxhlnNL/73e+aM888szcg
1QfSG6p6YUWgIrDMCJz97Gdvzn3uc5cmWCgtmBbO7U6YqD/4nOMc5yi4/OY3v+mFTWUg23301PZXBLYBAjQOzOP//u//ipS9DZo8UxMxknOd61xFQ/vlL385
lYlUBjITzPWmikBFYFkQYKo63/nO1/zqV78q0vWy1Hsz63nOc56zaCOYyCQ6e/x40KQLRv12qUtdqrn61a/eXOMa12gudrGLlUt+8Ytf7HCpSlzmMpdpLnzh
Cze//e1vm9///veNe91DXfr1r3+9wz2L8oU6/smf/Emxk1Ln1oMueMELNpe85CWb85///E0fbr8edahl/gGBvfbaq/H3ta99rYzVisvWQeC85z1v0TysP5X6
IcC8l34i7ycRI2Cvv6tc5Srti170ovZTn/pUe8opp7SnnXZaefX5JS95SXu1q11tRTm77757+9nPfrY96aST2lvd6lblt3/7t39rY5K2Bx988Ipr+9Zho657
1rOe1X7zm99sn/jEJ65bPf/mb/6m/cpXvtJ++MMfbi90oQut23M2CrP1fk4sBO0FLnCBNkwRM2EV6nkbkmgpI9T0QRm77rpre/LJJ7c/+9nP2r/9278dfL/e
7anl91t31oJTLIKlz9dSxna9NzS3qdj1DuPdc889mze/+c3NX//1XzeXvvSlm9NPP7358pe/XF593nfffZt//dd/bW5xi1sE3n8gWgYp2x9uhkjbsVgOnFl/
uHLx/nOuqed6OtnYGj0jFsUaDTJlCHDu/fM//3Pz0Y9+tHnKU54y5erRP8PZGD3uuOOaYN6Di2jPX//615sf/vCHzbe+9a3B9/XN8iNg3eEwXw0xeRkr5uZ2
JtFY1j/r+DjqFcbLzPKCF7yg2WWXXZof//jHzUtf+tLm3//935uf//znhTn8xV/8RfPoRz+6mKue//znN/e85z2b0E7KM1UiK+KL4c/jKrbZ3zMpUd1Gmebm
VTedk3jMq8ytWo5F4OIXv3hzuctdroy5WdrJjGosX/ayly028SwD4zBmLRjf/va38+v6ugUQwAz6mK7CWtLc/OY3L8Ix8zVB1/wM60rzspe9bF0FyUWG2fqE
gYwzY/ViIPe73/2aq171qsWOGKan5o1vfOOgzTSRQw89tAn1v3ne857XXPnKV27uf//7l/eDi3q84Ru52c1u1lzhClcoi+pXv/rV5iMf+Ujz05/+tNytQ2k3
JNGPfexjpUPvfOc7l0n/6U9/ugkzULmOv+U2t7lNE2aJIk3+53/+56CMrMZFL3rRMljUVdTBqaeeWp71gx/8IC8pDje/cbx1yQKmnhYhC9LnP//5ItGK7phE
17rWtcozRYJ87nOfa4499tixnaKc6173us0NbnCD4j9Sr//5n/9pvvjFL458hOuuf/3rFyy+973vFSmdLR9pg/qaFN/4xjcaWCXRgGDKwfi///u/xf5vElmo
3f+FL3yhueMd71j63r2w1N8mZZgkm912262MiQ984AON/homk/JGN7pRqdd3v/vdghNJPwn+1772tYv/54Mf/GDxq/FDqJdnwwjjoOHe+MY3LpOa34wgo+/1
G/wRSfMmN7lJ45na8/3vf7/5+Mc/PsDMeLjpTW/a8MvxaYU5trnTne7UhOmqCVNluddvyunWkR8MJsal/lYvWlD2N3xp55e4xCWaMOs2YbJtbn3rW5f+I3x8
4hOfaD7zmc9kk+vrJiAwzYpgHIapuvnQhz7UHHPMMWUt+8lPflIWTn1vTfF5Ellk7373u5cxZozSdMN0P+mWpfhtGnYaMdHmy1b8H//xH20sAO273vWuNhbw
kdfHxGvf+c53lutioSnXxSJY/B2x+LQxqcp9Rx99dBsLYhuayqCcP/uzP2tjgWxDEmyDYbTRWaWc6Mw2FsdyHf8Kv0sskO3f/d3ftbHgFJu1ssLs0D7+8Y9v
Y5KX75WhLHX2vFhwBs+KAdH+93//dxsLTHmWa5V5/PHHD+oIkwMOOKANDav9y7/8y8G99773vdtYDNof/ehHbWhipZ78QPw6wVgG1w1j+pCHPKT90pe+VJ6p
beoWGl37mMc8ptTxk5/8ZBuDtNzPxv/CF76w4JbP8LxgHu3Tnva0Nha5wXMucpGLtK94xSvaWEhLfbTFtbGotg996EPLdeyY73nPe9pY+NuXv/zlg3vVMRa9
0h6/PfzhDy+/vfe97y3tDnNRG5JXaaMyYRQmzDYW1OLvSgx8HwyxDaY9KPs85zlP++xnP7u0QXvVS1uCSbUPeMADBtc96lGPKvXS9zDiD3KtPv3Od77TvuY1
ryntvetd71qui4W9DUZV+jsW8IKhdoRW0b7+9a9vQ3so9U0cYPbYxz62PE/5MbELVspwrc+PfOQji//Jte7T79l/d7vb3cq4zLGiLe57+9vf3kYASbkuFo72
3e9+d6nfq1/96vY5z3lO6QPXus+z+LqyzPo6eb2ZNz78ZvpoUrm3v/3t27//+78v11hHDjvssPZ1r3td+Xvta1/bvuENb2iNVWvcuHIe8YhHtG95y1vKPPir
v/qrMiaud73rDa43v7u+u5zH5nuuqX43X/MZPoeAMvic1/HjWZfzOq8hYJe/7neu8b3v8nl5red2rx33Xhnde4evm6qBkMBIfCQtkv84eyI1MRbhIimSzmNx
K5pEPHAiXelKV2pe/OIXFwmZFkHKZX/cZ599GlL7P/zDPzR3uctdirRO6sMRY5IXSS8WxOaWt7xlo4xYmJo73OEO5XcqJ0k0OrBIwLF4NM94xjOK+YL5zfXa
EhO/SKT3ute9iub03Oc+t/nzP//zYguPhbRIorQFdOW456CDDirtYr5729veVt4/7GEPa4IBFnMeuzqVr0skdTZ7mhNJ9/3vf3+Rckn2JOGUZN0D4xjIRYMj
vZJiSLykeFpVDOKiEUUgQ5GOaIPB4IpWEIO8+KRI6iRg5cQi3ARTL5KzZw33HSw932+p5pOymSbVO5hDEwyqCeZdJHSSdizqRfvij6CpqBft5slPfnLBlMYW
k7B58IMfXKQ2fUFzcm8wgiaCE0q5wdTKM13PdPS4xz2uaBzqTGPyTKbRY0MLoUlEkEbBmQmK1nD44YcXTYA2pF9If7SmmMSl/25729sWzSGEjWKGOOGEExrj
RV95HtMEbSyYd+mPxCFVdZpZMPKizdAgjAe+PM8hlSrLGGXShRkN3FjUnle96lVFi7rd7W7X0HaDiRXJNhhkd2jU9wuCgHGub2nxIkuNmRwH5oi5ayzQWo29
YbLWGVdPeMITBlqHcUETdi/z/nWuc50yv1lrrJPGrDm+a2jGrnn605/eGLOxWJc55nqWHGtH1sX8V09jytyzXlmf7nvf+5axR3v+r//6r+ZNb3pTWf+e9KQn
lfEZwl4Zl9wLNHfmWmQd87dWmsiJYiEu0jNpfv/995947X777Vcis0iS17zmNdswT0zVQAK0oiXg9CmFR4PaaGSR9mgdyqJdiJQhmZIOkitGOHGJ9CIZhsmr
jcWs1BGXpgml5qRM2gwtiUTR1Ur23nvvlnSrjDChjGxjLGbl2doW5rzBNeofndDGojEyYoEkT0oXjbbHHnsM7qMJkXppZzQQ9YUXiVUbaRvq7I9UEoyvtCUW
vTYGT2kLzUOdaWR5LSkJPjE423e84x1FanpHSMykehF0eZ1X5ZD+/UZC9522eL46XfGKVyzfwZqW5Vlw6mJE06CF0bD0EVxF3bnWeOg+L0ycRfsiwZOsRDzp
n9Qg89owWRWNEG6k+vyeNknzoaHldyQpY4cWQJPtfh/O8tK2jPgj0YX5qWgGMdkH14b5tPSPumSdjREaRDCw1u9ZbjCF0kd+e9CDHlS+P+qoo0p7aX7BwAfX
BrMpeCnXeM4y6uvGaSF9NBCa+D/90z+1YdIcaLXDfWT+3fCGNxzZh9YgYyCl/e69tFjjMxb+NoSoMo+8N75oOyHMFC1Y1GeYhMvcdT+NhnbcLeuQQw4p9XMP
awgNnXYVwk6Zd9a3rId1JwSc8txnPvOZbQjprXnld+upOpuHwcBWPKP7PO/XrIH0sYHFg3Yg95GopxHbNxs64uegvYQJpHBinFcZvs8NLT6HSa1Ife4h3cZi
VezkomtwW+R6EiapIhaOIrGTOmkzSGw4zcqzlE86x8FpP6NIUIBrAtAibZP4aQfqnvUfvk+50VmlDa7x/CR19d097nGP8hUNgJTuHk5d5SfRanwmkdMIaVf2
4WiDenV9Usoh3YS5sUjFbLNGyGqIREQyZ9NHJOwTTzyxaDbBKIpGkOWRpmLRLdjpGxoQiSxMOKVf1NX32iBqj7bDn0Zr0b/qRwIkNSVpE8mKxOc6pE7KMa7c
k0SKo5Ei19BQSH760bOQz8g4UAbS16NIndi8+Xc8K5hwEwxgcKk+M65oaLSUWBzKdSRHmhJfVZI+JokaZ/qt0mIiEKbLMjb0+6T1btxvxpmxZwwk0YyNMVYU
c4lWzx+GjAX3WPtorsYUTcN6QhN2j3WD1t0lY5Ofxj1eWR+sX7SRP/3TPy11UA/zzzpi/Hkunx3txtqqjcF8SjuNa/evhUbPok6JwNUoE08FJpGJ4joLjolj
Mk8jnUL9C7thcaia7DoC+BbDnPBZjoVo2LHtO9cNf68eKDvea0jPRTW0sFEHAe4Pc/B7XpvPy1cL6L/8y7+UxYoZgypo4bMoUhuZm3Rsl3QOc4n6hWbR/am8
x/yyfV45i73CDhPpEtMOBzamAVdBBwZpSMI7OPgs/Ln4W2zzGd3ypr23MHcpmbmx4H1S4uazdmYbtP3wMDN1ST3UB1MwkF2PTKbsq7w++3Ja3fVXaD4NMyXT
kslp/PhTvjE0rk/zWd1X95iQ6u++DEbIa3wn1Fe9tINA4Tuf9U+XYOP5SF9VWkwE9BMTp3VgFiLAEpiYhkJjLmMnpP7CGAhc1hzjg0BkXTO/PUvwC/NoWCbK
d8aKRZ8pnAA8bPJUBoGYOZWA4xrBG8Y+E7731jVMA3O4d5jm33300WV91UZrinoyP7uGiV9wzFpoKgOxQAFI5UjIudgOPxQwfkds/UCymEwjdmT2RQsLv4So
GloEO5/oLzS8AAByFA1/3/1s0cOl2actwqQCCz9QSaY6oHv9cPnuNyh0Hoano0jS7N6kUX/Z8XmveufCO2pwdiUW1+pkBMvhBaf7nev8uaf7fT53Na/j2jz8
fX72mu89Z/h9ttcAxUBEpOQ16muSaFuYzVZoEsN1znuGvx/+bIyFA70IH2HSK9opxo4B2pukj1ZDnttlOqP6DeZIWzGcrGu+5vOGP+f39XXxEAgTVpnD5vUo
6mq9w78bLyJQ+TEs8IQPAh9mYIzwYb7yla8sWjEh1Npo/qaWIVuHtQXxkfKr/uM//uNAwMrnEbJca0tFmJeL79hzMKmnhgbz42AO5p15xnccQTpFWCbwEDoJ
lWEGbiLQozAf9aBNr4WmMhASp4UWMLieSWnCDhN1SuipSUMly4Vi+Lr8rJHIxkRSHGcvxyvAEWZkUVfOWidiPouWw7TBWa8d2oaYg9IMUr4Y8Q9Dw3g40Zgp
1MnihWmELbwMPiGpXROGRQwzpLnh9sNE8kgp3G8WwJSADRAhoUmcyrQZgwiDNkhhpQ40P5pQEic605iBQw1OTKm2XSJp08K6dej+vtr3GIM2eJ5FllbWNf8o
L01StLVJk3LUs3McZH+6hqOayo9pMKVlWK/fBEQMM+JRZbg2yfWEJvWjJek3ARNJmL6+UAeClf7IMvOa+rpYCPTtH1YC8yl8D0Urdp9+NtfMPWN7HBl31hBj
w9hJC4Drn/rUp5bAHRpqzgdCK6ZFgLVOWMzNB9o0E+6xETwyTOqBATCFm2PGPOK8t71A2ead8gi0BHKC8v777z+waGBg1nPP6tZx+Fn5OTHIz8OvvfTqI444
oixQpLFw7jbh4Gkuf/nLF2BVHMcU5eR3atcoBjP84PxsQuokFc3JbmETuTBv0mm5WOagwlBEAGU9xj2T3ZDdkW9h1113LXXG/am+ytKhuVBnGUwzGIr2sZfT
qNTBQKB2+i5NN7CL8OKyeKmTyB1MC2EQEWZb8GVS4R8g3VBHDWz1t9gh/WLAGswWVkyStK9u4QQsGpNnwdgz2ErVbx6kXH4AjEv5olkwXaTNIsMMan4DAzj7
YtqzE19YJYPNe9KE5HP2qfcPfOADS0RNanW+I4wk3uN8EurFfGYPkvEockw/KdsYIeSI1FEuX1ylxUYg15Y+tSQQRFBI0ZpTs/fKVHTggQcO/KvjyjJuSPTD
C7O5R8BL5uF+/hDzlwkJ80CYF4uMSFFzdpgIlMqw7iTzcI0xjfmpJ+ZljjCP80daG3wv+ivJM4frmL8Nvxr3k+bpVA1EgSqLy2kYDomJmEg4qsWOecvEUzEb
crpAmYTJGJSlQt3vbNxhC+TkOfLIIws4tB2gA0YHJuV93YWiW+a47/P51EPPoVKSKoFIq/IMphb+l+Ey8tnujaihYirBIC0wmAFp3yuu3pV+876IKS8b1vgs
hN2Ski20tBL2TxKuZ6qDAQxj6iwzGZspTL3uGkzLAKXaesVIqMWcbxHpUZiFhZsEpI/8LtzXBBIyK3gAkxGGS5JSH8yFhmTgZru9wis/Zzumfe8eY4DPjNZD
pRdeCF+DW/1pXBZxAon+df2oZ43qU4NYOUJ8aafS6tBwMHUTkXaFMZG4tM3ztA1DzbaYSDYJYgA2EdKG9I+JOdw+4ceeA3vl6ivM8Lph4tDfEc3SvO997xvU
v287Es/6ujEIGGfmVleQmPRkc3jUPJ50zyy/DTvIlWH+MzuNI2O1L2FYBLa1UM4JGI6jXhqIm0Xb3Oc+9ymTn4qFcViovGYkkN9JxkkmvUlLpcpKWKh9TvMR
NQ4wrrOwm9jKwPFJ8L53r4Uwy7IIdSnLTOkyf/MMz0o7vGglC4NFjq1TVBOTFOboOuUPl51lWZCZqjA8dkgSLjMYqVS5Ol59h4lGRnsgmcCDucxiR2OLMLoy
sDGE1AIs8OojIoN2h0Fh0BZGmh4NKElbaBsGi75wLa0Ffuqa9k1qLz+ThdbiZwHFPGkgJB3PzwmmDd3+yWfBNrHM77zCK7FLSSXxsLtde5nTxLVbhDHhNAl5
ZpaZ7c+ys++6mBorNBiLNQZrZ7m2i3fXP+y8GCWtTH/AUN/nWPMMe47Ui+biWlqaNsBAXRIH+GD29gppFwc9E+33Ay+4065gYpKNw8zzstzhsZntrK/ri4D+
TEFlfZ+09tKH58DaS1xbCQRdY39SvXaKR6zafkH9Z1Yh0VKZcM6uSpXVTlOJz9Q0E5lpw6JrsXZvkgXQQsn+TOrW6WlmIEliIn43YTEAC0NSlmkB4ERKspDQ
Kgyirkqo7qRu5VoolElqRRbZ7qKVZeWrevFPWKwsPKR+7Z9GFm6LqYXLM9VH3dRR23zOBVhZpF0pFvgoYEf6HrcIYeLq5FWbMK1RjJDWQyKHu4VXedqhTb7T
J4mlhQ/OScr2B0t22xxU2pW+Fd/nAuw+ZiubNv2uXzyz22/MZ9qnru7ttt/YgoExo01JnscxjlHSpGgUCI6epU2w0ofj2gL3q4ZmccG4hwPSmEstOnHI53nd
NbQZ40XbPK87lvye0VjDmGF0Wa72G5+VNh4B6w0tRP9U6oeAsWueWBO683L47pkYyHAh9XNFoCJQEVhkBAgzBEUCTAo/i1zfzawb5pHCW1cgHFWnykBGoVK/
qwhUBLYcArRXmgitltZbGcnKLsY4WI38wWga83B3ZSArMayfKgIVgS2MAHMtk5bFElUm8ofApuxy5nTMoy8ulYEkcvW1IlAR2DYIMGfl37Zp9ISGYhiTfB3j
bq0MZBwy9fuKQEWgIlARmIhA7zDeiaXUHysCFYGKQEVg2yFQGci26/La4IpARaAiMB8EKgOZD461lIpARaAisO0QqAxk23V5bXBFoCJQEZgPApWBzAfHWkpF
oCJQEdh2CFQGsu26vDa4IlARqAjMB4HKQOaDYy2lIlARqAhsOwQqA9l2XV4bXBGoCFQE5oNAZSDzwbGWUhGoCFQEth0ClYFsuy6vDa4IVAQqAvNBoDKQ+eBY
S6kIVAQqAtsOgcpAtl2X1wZXBCoCFYH5IFAZyHxwrKVUBCoCFYFth0BlINuuy2uDKwIVgYrAfBCoDGQ+ONZSKgIVgYrAtkOgMpBt1+W1wRWBikBFYD4IVAYy
HxxrKRWBikBFYNshUBnItuvy2uCKQEWgIjAfBCoDmQ+OtZSKQEWgIrDtENh5PVt89rOffT2Lr2VXBCoCFYGKQA8EzjzzzKZt2x5Xru6SuTGQ85znPM31r3/9
5la3ulWz++67N1e4whWaC13oQk1lIqvrkHp1RaAiUBGYJwIYx69+9avmO9/5TnPyySc3H/3oR5vjjz+++d73vrfmx+wUJayJLV3iEpdo9t9//+a+971vs9tu
uzU77aTIShWBikBFoCKwqAicdtppzbve9a7m0EMPbU488cSZqzkzA6FZHHDAAc2Tn/zk5opXvOLMFag3VgQqAhWBisDmIEAzOeyww5qDDz54Jo1kJgZyyUte
snnJS17S7LPPPpvT6vrUikBFoCJQEZgbAkxbD3nIQ5oPf/jDqypz1QyEtvHWt7612WOPPVb1oHpxRaAiUBGoCCwuAqeffnrzoAc9qDnyyCN7V3JVDORSl7pU
8973vre53vWu1/sB9cKKQEWgIlARWA4EmLTuc5/7FP9Inxr3ZiDnOMc5Cme6293u1qfcek1FoCJQEagILCECorP22muv5otf/OLU2vfeSPjQhz60qcxjKp71
gopARaAisNQI8HG//OUvb855znNObUcvDeSyl71sc8IJJzQKrlQRqAhUBCoCWx+B+93vfs0b3vCGiQ3tpYHwzlfmMRHH+mNFoCJQEdhSCDz+8Y9vzn3uc09s
01QGYje5TYKVKgIVgYpARWD7ICBY6ja3uc3EBk9lIDe/+c2bXXfddWIh9ceKQEWgIlAR2HoI3OMe95jYqKkMZBoHmlh6/bEiUBGoCFQElhaBm93sZhPNWBMZ
iLxW173udZe28bXiFYGKQEWgIjA7Ape//OWby13ucmMLmMhAznWuczW77LLL2JvrDxWBikBFoCKwdRE473nP21zmMpcZ28CJDEQcMCd6pYpARaAiUBHYfgiw
Ql3wghcc2/CJDMTN9TyPsdjVHyoCFYGKwJZHYBIPmMhAtjwytYEVgYpARaAiMDMClYHMDF29sSJQEagIbG8EKgPZ3v1fW18RqAhUBGZGoDKQmaGrN1YEKgIV
ge2NQGUg27v/a+srAhWBisDMCFQGMjN09caKQEWgIrC9EagMZHv3f219RaAiUBGYGYHKQGaGrt5YEagIVAS2NwKVgWzv/q+trwhUBCoCMyNQGcjM0NUbKwIV
gYrA9kagMpDt3f+19RWBikBFYGYEKgOZGbp6Y0WgIlAR2N4IVAayvfu/tr4iUBGoCMyMQGUgM0NXb6wIVAQqAtsbgcpAtnf/19ZXBCoCFYGZEagMZGbo6o0V
gYpARWB7I1AZyPbu/9r6ikBFoCIwMwKVgcwMXb2xIlARqAhsbwQqA9ne/V9bXxGoCFQEZkagMpCZoas3VgQqAhWB7Y1AZSDbu/9r6ysCFYGKwMwIVAYyM3T1
xopARaAisL0RqAxke/d/bX1FoCJQEZgZgcpAZoau3lgRqAhUBLY3ApWBbO/+r62vCFQEKgIzI1AZyMzQ1RsrAhWBisD2RqAykO3d/7X1FYGKQEVgZgQqA5kZ
unpjRaAiUBHY3ghUBrK9+7+2viJQEagIzIzAzjPfWW+sCFQEKgILhsBvfvOb5vvf/35z2mmnNd/+9reb78Zrs9NOzZ/92Z81V7ziFReststfncpAlr8Pawsq
AtsegR//+MfNIx/5yOaTn/xk89Of/rRp27Y597nP3Zz/fOdrTvrCF5rHP/7xzT/+4z9ue5zmDUBlIPNGtJZXEagIbDgCO4WWcc1rXrO5yU1u0uy2227NFa5w
heZCF7pQ86xnPrP5STCUv/zLv9zwOm2HB1YGsh16ubaxIrDFEbjIRS7SPP3pTy+tfPOb39y89KUvbX73u981Rx99dHPwwQc3N7zhDaciQGvBiCr1R6A60ftj
Va+sCFQElgCBK1/5ys0HP/jB5thjjy1ayWGHHdZ86lOfmljzyjwmwjP2x6qBjIWm/lAR2GQEQiLmAK7UH4Ff//rXzSte8YrmUpe6VEMTue51r1sc6jSUSQTl
ykQmITT6t8pARuNSv+0g8NnPfrZ5//vf35x++unNzW9+8+Z2t7vduqj6Jv9//Md/NCeeeGJzmctcprnDHe6wbSJnYHvSSSc1Zzvb2ZprX/vazXnPe94m2Eez
HuzjzDPOaD4fz/r5z3/eXPWqV23+5E/+pPT2si+gxs9DHvKQ5g1veEMxZ33sYx9rXvWqVzXf/OY3m+985zvN8573vOZP//RPOyP7j28L1pVZ/xGQVbyD3ci/
cEK13/3ud2NcVdqOCPz+979vn/nMZ7YXvOAFB+MjFrj2gQ98YPuLX/xirpBEyGV7+9vffvAcY/LSl750e/hhh831OYtYWDDN9nrXu1678847t+c85znbcAS3
wbBLVc8888y29Tcn+vznP9/e+U53aoNBtfoyQlvbWGTnVPrmFhNMor361a/eXvjCF24vf/nLt7vvvnt7l7vcpf3bv/3b9p/+6Z/aL3/5y61rvva1r7UhFLXH
HXdc+9GPfrT95S9/GRDPD+PNRWH+T7/b3e62Yl4O8YvRzMNFy8xA/u///q99zWte0/75n/95e+c737l9+ctfPvdF77e//W37jne8o33CE57QhgOv/dCHPjT/
3tvEEsP5OBg4ERLZXvSiFx18fvSjHz23moXk2N4pFrUcmCERt+c4xznK53PEovqv//qvc3vWohX0kY98pCx42fZ8vcAFLtC+973vnWt1v/CFL7RXutKVBjjn
szCS1772tXN91mYVRuDFKGIvSPu7mJ9d+pu/+Zv2Epe4RHvJS16yDQ13gPtRRx3Vvay+H0Jg2zEQC9J+++23w0SJzUTtD3/4wyF4ZvtIkiHd5CT0Snp84hOf
2Eb0x2yFLtBdH//4x4uUql0RFlmkNRNz7733Lm3W1g9/+MNzqbHFK3HUb9/61rcKYw47dvn+cpe7XPluLg9boEJ+9atftTe72c1KG89znvO0/+///b/2oIMO
GuBOO4DFPMicCPNNeVZEGrVh6ilSOS0P9hbUr3/96/N41MKW8T//8z/tm970pjZ8I+3b3va29rKXvWwbJtmilfStdGxQbMPUOLd+6fvczbxu2zGQ5z/veWVS
mBjXuc512gjhG3x+wAMe0J5xxhlr6o+wVw8mo2dY6KjNuQgecsghayp/EW7+67/+69Ie5qtPfOITgyqZPOGQLL/d+973XrPqz3xw/etfv5R3rWtdq/3JT34y
eBbN4+xnP3v5zeK61Yj2mmPmKU95yqB5L3nJS1qLvN+e9tSnDr5fyxtY5rMwj6QjjjiimLL8RuPc6vTpT3+6mLkwzJve9KbtD37wg6lNZsp9+9vf3lpICTPn
O9/5inn1YQ97WPuzn/1s6v3LfsG2YiCkKCqqCXGjG92o/d73vtda8O91r3uV79iZ0zQwq93TYpaTkT+ANvK///u/xebqe2oyO+uyUjgdSxu0RfuG6aEPfWhp
P+byla98ZfjnVX3+z//8z8ECFrH7K+6lyaWEvscee7SkaDRrv60ofAE+3Pe+9y04Gi+nnnrqoEbMr7e4xS3Kb7EhrtciN7h5xBs4RuBDKW+XXXZp+ZuSIvXH
QMAyX3zeysS3dP7zn7+FK2ZwzDHHlOYaU6PGVQR0tCwXOd+HXx/72MduZbhK27YVA3nhC19YOpsN/X3ve9+gczEWk8cAuOMd79iSKmYhJgXSi3JuectbFgdc
lvNf//VfxYzltwMPPDC/XrrXlIzZxt/97nfvUH++HoxYO1/5ylfu8PtqvnjUox5VyhleRLOM7E8+GNIjGjXR8/pleY10G21EQJW23/Oe9xxUO9v2ute9rvwG
47e+9a2D32d5Q7ghNSsrF7zugpm+Lo51zuWtSvxNxhmf6PHHH98+NbS75z//+W3sGWkjMm2HcXX44Ye3F7/4xQf9wAe4zz77tM9+9rOLWReemFAfLWaZMd02
DIRTm01Tx1JPSXIoJyVHt9/Ym0kWsxBnvDK6mkyWwzSWdmZ+A+aZZaQnPelJpY2c2bSRYaLRiXaBw7777jv8c+/PNAqahXI40bOfugX893//94BZsV2jUdd1
71mG97GxrT3Xuc5V2p6aV3dRJ6jAHzacv2shJjHlMAfG5rpSVPdZBJ80mfERbFWKMN4SnAFXQqBILdaK+93vfu3vQ0tLgo3owxSSYOca5tukZzzjGQVT2ozg
hK1MkxjIltqJHmaA5nOf+1z0d9OEltGEo7e8z/QE8uGEJNbEwlVSHJQfV/EvGEQTzrdyhw1K4VBecXdI7E1Ik+W7k08+uQmJecXvy/IBjig0tiYktvK++y8m
TRMLf/lKG4NRdn8u7yNYofna177WfPGLX2xOOeWUJnwbO1wTpr9yjR9ufOMbj9xbEo7k5mIXu1i5NyTp8pr9WT4s6T+YhIBT2iyHU1K2LRa45gY3uEH5Opy/
ZczmNfkq62yE5Tb2O0giqEzZaIcpQlXLV7Fglo11PnhOPutqV7taE9J1ucaen61KEiqGP6/MYfM4tOvyOaI1m7Pt/IctccE8yh6SsCA0YaVowrfZRJBH8/rX
v77sak9sgsGXtxEt14QpN7/edq9baiOhBcbmKBMjzEs7dKaJalJG9FDzgQ98oHna0562wzWTvgh7/yAlQqjBJdvn8PW3utWtGoPKxrCPHndcEzb84UsW+jMm
KRU2CvW8CSl5ZH13j4R14ZhtvhMps0OFL4w5JLHm3//930sKCZviTo++OCMmZEhyjZ3AEZdfNiH+xV/8RWMxw2DCCVnKx0BGkYR4GEj4sgb1GnXdsn0XEW2l
yphx2ON3qL4xbKyGv64whghPLZsqYf3Od76z/EnPgTGH5l02INp8GGG6RbAhLMFUPqgUqmKvSVkQPSwk5gEDwTxCKm9+9KMflQ13O1RmSb/ADLSJwIgZhOra
RKBGE+btHVoEjwCkecELXtA85znPKb8ToDCO29zmNiuuJxyF76R8Z02x63270pZiIKQxZDKEiWWHPrWQ7bXXXoWBWOw+85nPlAFG6rLgmZwGmjTQEeJXBhtN
w8LnuwhtLQueAXj7MTtaTWASHYnw+JAMl41IxSYdCvV+bPX3OEs6Pv0Xvyi71DHviOgZqWkoxEKHYYR/pQkbchMRLI0JbuLSCnfdddeRzwpz42DRk6Z7q1Ay
adIrgWMUhVO7fE0ooumF76ksbsl8hu/Rd8adv/BNNfe5z31KFlrMB0VE4uCW1D58gfFkqg9zYKsQvCIIpDBKTASOf//3f988+clP3qGJ8IhIqyZMU+U3TOHf
/u3fmj333HOHazEV2h6KSMTCvLsMeYcbtvAXW4qBMJcgksO4xS8HBPVfWgPSRJE+xnQyZiE9dNhAm64p4Drx3SgisZuoJrH6MO9YIJeFLEJpBiH9j6PYP1CY
qmudw+A+BK8Iy23CB1WkYRI2LYP2Bj/M2uJpImPKyGRlshlFYbcfLLCx+53oXCTFUdcu03cWNGTxHqflYaowgvHDH/7wFRrYVa5ylSIMEXCYGWkhX/3qV5vw
GTXhIG5ij0nzL//yL2URTBOjcTyKLJ5phqE5b5XFEDbGJkwIi8xRsXdrFAQNpvyIRzyi4GjMHn744TswD2NcObQUxLwa/oHyvsuQyxfb5N+WYiBs6ogGQtsY
JhPMwECk35S2SF8WMKq8BdCE9ZsTzUxEpgJ/OUgskJMWV6aHiKIp96uTfEPLQkxYtDAEi3Fk0bLowMrE4v+JUOkyYZlO0v/UvR+WpMIXv/jFTYTvDhgV3Cfh
mWV5DpMYprLsZPwhuPkbRRgr851xmBqLsRSZD4qvLf0W3Xst/p8In8khL3pRc+SRRxZTqt9hhiGNo5wvxdQTZeRYH3f9MnwPH766v/u7vysYM2uPmovMfK4x
V435iMwqedh8b3zzMfGZnHDCCYXR0GQQ4ZOWhzEzcyk/oraWAZq51XHHVXZuRW9sQSSwdNSy3Q8T00mEjJbEan4jjXC03+Me92jYhknUzCUms0lEaovQ3+a4
j3ykeUtMRH6T1FRIaQZRSm3Dz2LfRyTvZWMg2Ub1n7SIYABpUjJpItKn+au/+iu3jSXStqNFb3vb2zaRm6g4K+H85S99qZjBxiW6m1SPsQ9b8B+yTfDuYt6t
9s9j/FjEkuDrVD1jdRwp98ZxqJJMtJJechwbqxgWM22axYbv7zK04d+W9bN2v+LlLy/WiAMOOKDh+Oa3s+inUKJtki/yKyFrB1MrHydTrjK+FOOTYDVM1gEC
kb+IzmykkY8NuM2DH/zgYgUZvn6rfg6bwOh8WCEVLk0yxR9FipJQKUtbhNih6Pj2uc99btlElSGTIWmVzXHdkLxy8YR/MYlL6oPIkjrASoK2cSREOJ8XUuC4
yxby+5Cq2pBUSzvDVjyyjifEzvSQjMs1UmHMmtJEOgk5n4w/OZpCQxz5PHH7rgnGs+YsAiMfsAlf5mZM7Yb5MMXitCK55GMe85hWmPpqKRbGktMOfkJWwwQz
sgjYumbvSFUTzGTkNcv2pXGpTSG4lJBd49qem24i0DgKd7Afx7Xj/uyjCf9pG6bENoJvSsqi/fffvyS+DEFyxX32hrz61a/eMmN1UhjvltFAfhGSLGkBkWpF
WB166KGNs5KTmLacVBabgfKrXq/Ue5oK/0kwjhJpxKQQE2qklM7/wo7K5JLhfr0etAAXkczSJk99R1R1oaSxEauo8EIhSWfs86+OdNmjIt76NAWm+icW0yL1
xV6QYnIQ2SKajSkQjkxfiAYzztzT53mLdE1iTHMm3TKvwpfvjNNbCnKmExTpd4rdPc1ME9sRYzIGZbnE+BTx9rIY8wc86EElkk2YOd8A05jgEH0nfbyximjh
qR2VL5b4H1PqscceW9ojKIbVgak0TaC0rkNe+MIyprvNFNRwpfBvXDrM2vqJVsIZD7fh8UdDpLGIlhNEwtSt72LvTgm6ibRGAx9ePoP/6+sRKv+1cMR/Jfrd
e9YKlg/PC0ZX1pqbRwTnOWI+Ljqt4J5R2cHnZdJAaBspzcbCNmhD2DRLOgySQ0zONQtRwZxKKhQbvUYR6S2c5yVRGyxtylsmioVkkNpCenXSVqjmAzy740MW
4rUSvGxG7JbrfUzUkhrGrt9rXOMa5Xf5ubYK5aZW0qvMCN0dz10sbLQkJc9CXU1C/qtuufmeZE37oEn6Tn6z7UA0vAc96EGDDZTaLidb+OdKWiLzfLWkzDjM
quTGS3ylUKLxhMBZkjhKDRSMaLBW5XXDr8a/TcnWks2mSRpI1PuPDGP4/WYxEDs7JX/rToBpIEpVor7dNtz97ndv7WQOzj7t9rn9HjHkJWVKSPKlLvNMez63
Sk4pKE1GXSy9t8g5q0I6h2c961ltaHxTSur3s9xMYatvQyMp52IM92PWQ9qTrUIHRdbdbFf31UKOadz61rcueIREO3OTu/NHTjiLl3LlvGJmsUh1n+19+Adm
ft6y3GjcWhuy7RGsUFLydE1ba2mL1DEwzvJlxbDrPT93X60T0qvIzcWcKd1SaCGDa+WCw5g2k5aOgRx8VrJCGTD7kgUtVNMCvA6JEMZVMaC+z5l2XTg6B51v
oKQ/Ztp9i/I7e3x38PMZYSjhaCzpvmko60mh3hffFUku09LkhDOQ/b7sFCaPwiizXbSQ+9///i1/BUlVG7uL/7zbqw/1czh/W76V1D7UxwI2q09r3vVcj/L4
M8OhPpijYeZqPzen/F/6LPtNCiAHWmUf5yshDBN37IM5FWbKktwyTMIlE7V+kXaerybvCTPcekDRu8ylYyARUlfAM5idoDaNLDYpTcUmvtIB0+5Zr99lpzUo
s/PnYeZZr7oOl2sQZ9ZW9ScJZ+bi7rXdidL9ft7vwwfTxqatIp0lnhHhMpMzed51m7U8ps/w7QzGh8XEgjF3WoUjXPACBpYYyxNlYduKxLmd7TRPv3VWrre5
jOkOA4HdxyJhI82DmfCOd7hDe1icrgnr8HlNhZYAnPV0YuVm0lIxEODqWGYStlmREySlceRIyvR9iJIaF8kz7v71+P4b3/hGmxFbzs5YlwVizhVn5usuIvAX
2bYIxA7c1UaW9WwQUVTMdLkwsMHPy2wyj35ifk1BjPTM7LWViFbgECn407r6CKdrbb+1YDURn54Xof/lHCP1ZF5T782khWcgqfYBCbdlNolcVcV5eNe73rUc
1sQ0lI7rvJ6EGtE6ZUAY+K6RLfeNb3xjybZLXV0vYpfEvJymJyW05x599NGDU90wjbTl3yGkj1lCMNer7qPKJennwmbApBMxsR51z0Z+x09CI1JHEh3sl426
Jy9yrhq/i0aRIWAwDjL1+6LVcdb6dNsWEZqlmEUZ39km4zw2JQ76QFbgzaaFZyAA0pE0DXs5HLSTju8I4SzRERxLwxIDRpGL3vArJrRXxG3bazBPinC71hkV
TjocfqbP/C9OPXT8a56zgLmNOldjnvVaS1mxAbN1GqD6i7hKiUefLNIE6zJlZ93nGFlL2zfqXud/RHhywdgYT2Foo57f9zmYGrOascA3s1rpue9zNvo6YzzP
XxFtFWH/pQqLNL6dV6Juua7wPaYgt9F4dZ+30AwkO5DzMJ15zi/IxeFlL3tZOb/jXe96V7dNxZ4YqQoK2Gy2QkFx63/4h39oI//NCiclu3lGMuTzVhTW84PD
dmIvSHkmpmCiCdN1hC1moQ42axkAnGUHxqFSV4joCp+ZLtby7J5VnOkyZ0DkoGUjXmR63OMeV+oqVHsZTIOJZfdI2Tz/I38bfrXYRbbXwsiZuNaqvf4uTGcE
MSYpDvIUEIafm58dsJRRhN2jdvP3ZXx15kma51gMkPmYczJfN6NthAnrSJrizcXYvzNyg+lm1G8pGAiG8Z73vKfdb7/9iokCgMcdd1yxAdqL0CXmFhFXBoTw
z1ETwoQxaYXG6RA2/bWYDDCPLItJSt1GDTp1EeNvgVNHPhDPt3Nb9M0iUqT+LnXUvln3HGxUu4RIxubCUl+hsMtCuddFYEhsFJxYbZkM2OoJIYJCHE6Wh2lN
vHHEj0x9nPYOACPcGI9HTdHKmX7TNGzPgrm07JTHUDtMLkOjuwxko9snYMVOdgep0UhTgLNvjZC0SL6xhWYgozrO3g0mFQuFCceplBRnG5TFmIQUB8Hk12Nf
v3jyyQNzkxPJ0KiFf2wB8QOJMENbpTDpTqgchPma5TgWNn0gOThmXQSyzPV4ZVrJjYLCG9Fq8VmPeo0rk6ARu6fLhLPRqk9Ey7iyNup72m+e4EiQ6UO0BefF
85vQyGc99Y5mL0oxco8VM2pfISbNr+bgVjBjCVgwD0VFwSQtHKPG+vBc7tNfq70GY891wSth2BrDj7potHQMBIAGOuYhHjpJx9qNDHDnGfclIYkmAv/Eqaee
2ve2wXUpvdgxvBotxuTPvSnqzLS2aBQnJw5UZ4vMMpCsAvDE+Oa1mXE92y0yMDVRudlGkWgdAobf7b5nVjzllFNGXTrzd8Y+v6HymXrf8pa3jJ0PFjIY+1tk
/11fMEj62sIPxccqy0IyU/tiYMLSsVZzYd/60DC6mxmF+S6Cv2NU/ZeSgWgIG3f37HIdzk5oF+1qQwz5QQwgqQpWQ7QPph0mqThnYTW3tkwBmaTOszl+F420
KW3DRx111KJVb2R9kqHzfU0zB40sYIO/JMAwTRgDzK9dsnjxlTFxXujCF26vHiYrqVsiH1VxYjNzkJjXQgI/pDIhRHkObSjOEyn1ifxwZVPbsMkkDk8bnAke
RxOs5fGbfq85nz4dEU60QH1BGxG6LtjGZ7gkA7HWYKKiK/lMRG3ZEzVPpm7sMk96NuH2pB573jYDzKVlIMNgkdCALSXDJBqllr7vfe8r9+L6SSavaCmmArve
4zjWEh4ah03lJa1doJ45q7nE5HO/vxve8IaDATp4wCa/EfmR9dP+ZSCmSHUWdNE1by5q3fnLRAWqMw2gS8xLhBMSsEXLLnQ70tnqs19MYGO1S3FeTXG0RwK/
9tBXvrJkXqApjNoHJZ1OJBJsX/WqV5Vdz7RowSld7ZjdvTtvImHmYNG1qW1ZifNcNl5YYpZMR/x81hDh4ImxvWfazD9iOwBGm791XwXuiI6K0wp36JNZMNLX
2Q+LmqpnyzCQtMu+6EUvmtpXbPvi2DMNgIllUrIz2i1OirWgk8q6A8R7pjNJ5UgcL4oIK9/N6rBlP86BSrIclvSmNmSdL5De2wKmjcsiaUoDob4i3Bbd6a/7
BGDkGBg2E3KuXz/2t3RJehHto4V4tfjbH4AkBI1Mr0V69tvwn2uZa4Svp53fptY4UbP7iJLAsfsMzvpumhjRWqmZMnUtI1kDBAFopzE+LCAx39p3hsHzazI5
d/2WNBSRliIoWRJ87uJNm1lrJCCN5xa3uEUpl1a4iON5EgNZqnTuAXb0XzM4CrV8GPEvbL1NqKbl3OKwd5YrgsuXVNUxaEoK6zALlO/DNl3SNMfAKYf3OOrW
/c5D9heqZblOKvg+FBOtpJGXFjvMD+WUw2BSJcV8n/s3+hqp50PaKqfewWYchSmxiYlWjvGMKLSRJz7mvcGsG2fOO5Ar7LolnXVEmjTStMein5ft8OrakKLL
QVUOlwrT4Q7X+CLPBNc3sF10crKgVOLaN4yx9OoOIQonbzmIKPw65dS8WLzLIVDaFrvwy/0Rpl4OlAohpDQZlnCN/Rrl5LwIBy3YRDRj40/ZoWmUVOSON5CW
3GFKXiNIpYnFsxxJoDCHT0klngTj0EhKP7t+GclxDo6gRcGUy3G25memqw+TVeMPBZMsxwqE2bkcJRDCZxMLe5kbsf9sgIOz6Z02GtaQJsKdy6F0DkcLs1gp
Z7X/nIAYVpGSyj/MY+XQL/29TLSCq0bFB59x40WyMYtGUb9JDnQSAU4epwy2cXbxIB1ydPyKLJe4PombCaQbyUMSYbOWjyv3mXhmH62H9CAMmbSTkqaw3nSg
UpPTxrooEh0/TZw7UHAlbXWxyDpS17uSmYOmuuYO1/ks8oz02722O56YBQRFMBkOE+nX7uy8np+LHX6YhD9m2CO7/jIQjFPKtI8ox0BiyKxlk9t5Q0OWuoJv
Bw78fcwpMk3nAVS+hzHTh8R7XeKEhZkgg9R4bEyjffMDKJc5h+mPzV1ZQoWZTkjrXbIZ1u+08dX6G7vlbNb774TGlmlLOM6HserWS/syRJ9GmHvGXMOsCCMp
lQQ4PDfSvfCLWiNyTxisc5+aPmUipNXojz7EjJxmrGl7hPqUN+9rJmkgMUb+yDCG3y8aA9Ep1HphnCblMFHbqfASAlpcTBZ7QZCO0T5mrDgWtKjrVHyhwDqQ
TZnDrJs8kOkhczBxwk8i9lNMy0AzWUXVIBvC0hRwl7vcZVIRm/ZbpnhgzrPPoksWu27yPxjaT9AVLDDhdEwmxsyDUoOzNTMHMiWkIxke+qerrmP8iVOOw25q
8VxsmSHyOtlMl4VkZU5sjJVhEk32ofC3/fM//3MJ22U2yvBZgpB7ZWMwtrpMPs1O+ilNVspmuk1GG8cIl8cZ40w2hBs+EH4/C+Qw8QGmucbikdgPX7fIn1PY
hNuhgekkyowW2twd1+4xRtPRneMyTXo/DmEGtr4373M/Gue7dYpfqQ8xqVu3lLOIGze3DAPhP7AQcUiaCCsoOL/wPKGy/BXSMLBpIlEoksNZeFIz8L3FXaeR
IFICMVi6Dkvx9xZWA6S74LkfmbScjJgtxoU5kdoyXDN3TnvOojrJLN7pB+Ff6BJpKlNwaIM/2oHFCNlF64wQ38NA9mF7dYYZvHLY7x/2sIcNHMRCKRPTYQ1R
eTaTdsnCiQn7DcbL4EDP+mt7OsanCSN5j1cLfDp0cwd1/g5rKXWMd9IwDa67R4nAlQvTavYgYS4w9rdMTDpx6Wp8Qr1prZPIZmRtpX0ME8b+lNC4MYQbhQWB
9aNbHt9URrSlZUTyTxqe9YavdVi7G35GmMyLlqMOw/Nv+NrN+LxlGAjwMAVAW+iH8wmlNGYxomIm5QCxg7wrpZlsTAOpPkrWlxt5/JYSRWZQHZ7AyiexcI6T
HEgmtB5OTmpwDiT19beaSZx134hXE84eF3UUqeJ0xy6RejOKSDikz4jUm7HsnLDdBIf6IvdoYBJd3DH/NNNgKCRcdRAtlFgx36TTMyVgkXQZjrlsif4wv8SK
QELrHaZsZ/f7XMyZorrMwbXMW4mXVybSrvlFOYlpN/qwW/7we2OesKQ8m3mnLX7D9y/CZ0JfpgV5xCMeMbVKrtHe3EjrBlib01eIQI1cH5ivRlH2EbwSf9pd
mnIxefvX9gvzNqvGMHVPU2UNWDTaUgyEdpCpN0i+3X0iw8BbwPgySA8GSO5zeP/731/KYCI4LiYybcXvGI0JgymYnCRvkjO7smv4MuQJ6pKFIW3E4riPPCti
xUAiYSvXH4nklDlvDOvWY63vaWOphYzyLQiH5AfqHjbE7qttpNxuNIo+kkUZfuzxJqHIoi5hIp7HLJP3YjLKxKBOOOGE7uWFWeWeGjb8UaaXFTcs4AdCTS5s
fcPC7QOBcUq33WaxxeemSmMtzabda+DsfotYlwEF1+5eNniPMbvenDkszq9YRnrLWeH+2pFzflI70rzd9VFKmOp+fkEaI+YQJ80XU/hwWZK8smIYz7TvJAIV
wSyFHuWN0uisR7lGLWLI9JZiIDqHzTDVeq+YhPQnJhCVEpd/ZcTGd2337O/J/cXL6zCLF8na4m7w6GAOsyxb2pKUnNMJyv6fNtAcKMOvNI+U6HNgLGqIXtYd
k0zH6yhVPq/rvgoNhdnwsb2Yatch7ppRqnlK0H0O3fptMCVHgyqLrTrDWrv1WfT3GCXtQxv6JtdM3xI/yCgShq3McSHYzFiex0n/8zDlTqMDY1Oj643bww8/
fNrlC/m7pKrawBqQfqRJFf1sYGjsWyMEJyCMGwZwta4wWzGHMaEPm2f9bs2gqaR/K7XJM0Iosu4ceeSRRfjMAIpufdI/Zj2aJBB379nI91uOgegk3F4Hp3pp
wNAQMIB01vrOQk4Cdi0pO8l5I1RK0oXyaAz8I2zs/mSlpX0g0RR5QJQy/QGV016ECxON3cKY2BMjqyYzkGvEiTukx7PZQ9M8lnVYlFdMkr9BnandOQkm1c89
HOXuec9Z+XvgBWNanEmWpr9kHqKJSGU5ufLUNX20gkI6zmu63785AiKyv/swne69i/A+Ga7FalSE2ag6pg/NjulRBGdMZFzEj82D+oiPJIWhUeXkd92IJALY
uHLz+kV8zSNr+ckmRV9l3Y01/lM4McXySQhCyOhJDJqwkwE6eV++iio0Lq09xjhS5qgxnPfkKytJri2CcBYR7y3HQN761reWzsYoDg81+/mxM/kOocJfI1IR
kBJIqmzrOtakSUmXpjILMd94Frs9e2f3LBCLgdDLHGwG4QViwJn4PwnGYgAbyL5fVLs90wrpRx2HTU3j8LItHrA4AAA14UlEQVRw5SatPDESMyX10eaUc4mY
UMqkfcFDVJD3uZkyMwswE8Rs++Ojuu//+G3py8xppC+WyYkOmwwL70aXdZo38m1G9Fic+kjT3UIsRqm1Cb3uSwJA9BuhpxuV2Pf+zb4u8+XRugTQ9CH9Q9DU
bv4mTFmWClGaIq3Mf2ZslgjXJjEL8q26j+B5xpixm9cPv4q6c68/G6UXkbYcA0mpymIl224Sbp7Sb37nlUMrdg+VRb6v5Jf3U11zEnK4I7ZnOY0wJlIDpsWR
zq4t6qJ78BVzTtruh6OK8hmb/YqxGsAmSLfu0+qVIYzScSBMhZ/E4k5D3CtyDPksAs4EJN1h/imZZfgwTbAvMTFY2NT3sCVKsSFEXJ1JqjtEEE5ovGCEzD5M
o+2m2Zlw24qgBMyHWbUv8S8lsxMQsmyUe1iYOrsRU9PaQRDK4A4mKYw08YYJTRlzyewWML3nWUcUW4vSP5jje9rzCAT5PHtWhoOCpt2/Ub9vOQZiwTIZmVv6
TAxO3QSBBDwqAmZUZ1Blc5HcNaSTU0Y4wU1wTCujL0aVk6YLZgSL7CLRr0OCSk3CBMHw+hJnt34wqbqSnklHSksTIGy+Gn6r7jW0h4z2yb06fZ6rDEnvPJd/
YBnIgpLjDxPt4tCn/nwnufGPdsf8OokIARlogtmu9pAw9ZX4E8awzmi6Sc9cpN/Sp2BRX20afOHWKTBqP+uCfUwc7QIWmGWNe8Jj9gkneQpRsOvDQFyT/i1+
WJrIolKOXXiM+Bv5ZbnQAj28sWYRGmlCaIgBMrzxbVz9OF1zYIiEEXHFudXd8+FeC+jXYvGznyNVWj6NlDrGlT/pe1Kc+rIp56I66fqN/C1Sawwig9JR22cC
qCN7Oe1L29idV0T5TGiEBTQHpdxkq12gMjKpOIZD61x0ImDkPqPhnFR9684ca/8NrGkxoq6chOkQNr43kTwWIeaxDB+17yRDrvs+J6+T7j3nGHPOMpEgF3X3
Ny3gZVS7jE9zwfjKcsa9skBkuPmossZ9x7SeZwwRwBZNsOzWO+fqGAyWj4EUk1QMDtKVKJNplAsiqVeEUQKBAXEEM6GIBXfojJPYcvOV624WaT5Wa/Yaro9J
rSymiD6OzOH71/OziDR14+jn61ktuT+jt2hrk7DSDxhxmmSYVvr033CdMuyyrwY6fP9Gf6YRZPQVwWRWom2TfI3bHMPjXvfee++pmsqkemT2auX3CYWdVNZG
/0b7zUjKtWipgmO0XZQhM/QesdDT1q0RQt1FVq1W+OliIdQdvsocFmS71232+0kMZKmSKQbYhcIp3YTa2ERIXBNmpiYW+fxp5GssjuX72D/QRNqCklAu/Cgl
gVnsN2j8dSns9yWBnYSMMWHXlLAvNJpGkjsUKm8T0mP3UZv+PpNKhhO9gc9qKVJbN+EPaiLkuQmHaxMMogmneBMTriSqg2VoJiW5YkjJ5fdgoiWpYmiSU/tu
VH0kEEQxeZsQCpowC426bGG+C+23yQSIYbabuV7aafxKbCnpZES0NWEhKGXrPwk/Q5pt9MktIxHgOWKOzEqSbCozFrYm8krNWsym3CcJZ/jfmlj8m/CZNREZ
WMbkaisTgTEl0aFkhwgWIQQVXEJ4XW1xO1wv4SqyjpkT1rRlo6VkICahyWJhDvtwWeT7Ao+ZRLRE+QtJpQnpsGTttRjp0DBblUylsnSGbbJvsWOviyisxnNQ
mHvGXrdZP4S6Xh4dUm3JmjtLPcIuXBbxONOiiY2AJROsbLCjKLSGJuzzTaSHaK44JtvuqPu6310oss+awCZz+Fe6Py3k+4iGGtQrAhUG72d9g0n4Q+GDKwtb
BCk0oQnOWuQO9xkPyUB+vgQYdxsQ4fWNrLkIPhGWPhMD6ZbpPTzmSQQrZJ2Zx1ozz7r1LWvtK2TfJ83xujB9NBFKWxhIOMRLKmuS7mrJgr7ei3o45YqUqG4R
ArjaKq779aEel2ekljbrA/VHOMOb2InbhPmjaHWRFqMsbhhzRMQ0kTmgkQo+U2jP+qyd5iD9zfrsRbsPtinJzrNuax0P86xL37IsyAceeGATkX8NzR9htGGC
6VvEhl0XPo+BZSItKhv28Dk+aCkZiMEd9vZynoEFyyK9qDn0qdEWaeah8LfMsevmU1QEFJSCSGq/6kjKs5YeOcoaf8gkppqTruZpuqN10D7QPKXuUuA6/OvW
kaa7DERrioCPUtV5aE3r3eYI6GgiWKVowJ5F0429L034KhqmqEUjptdTw/yOaNPhbylnB5Uvlujf2g15m9TYCDMsgLMf8mcsIhkgbLCITyBivheumlknDOTb
MajnSZgGtX+ezEP9TomDdxDml/UvXyzoPwcyJaOONDwLWsuV1eK3MbdQ7FFY+eOCfVLXSJ0zYB6x76j44zCQUcyDQMcvEpkQiv8ugnKKb84haLFvpAg9691E
/itMAzH3RkRWwyeYTHu9nz/P8sdGdIhyWcQw3oxKyJQFolL67u3Ie+f9KsJomDIL8M6xiW6WCKfh8tbjczdzqbPGl4Gk9YgJ8If8TksQxmvnfe5dEdGyDPT0
pz+9YCx6TKj3opL1Kc/sMSaEN0/bkNdNre+e/LOfw96OMHuVc2xknRBVKAx7niQS054rz5Vbz7zLdCYiFI+NSMVFoklRWNGGPwI4/H7RGYjFLzfzyMy7ml2n
691BwlPtN4HpXSLP1KLGeduXkjmtpGlY1Hpmf3UPO5K/aFkod0cLXQ4Namq19YN9NcJEpcORa03SPrmWpFy3cP4sFjbXCAG1f2leFNroIBGpvQqLtncp26me
mW/NPPO+z2KfmRckXJUVQDiuvWUyI0gCaj7k3FWu/WDCp9/0pjeNzHic9en7au+OzAzKzuwWQoZlsfBc2xOEDvfJ49X3mWu5bssyEBv+IiJrIEHYH7AIZNMi
KcYAkRLiU5/61CJUa2wdInqq1DV8Fa19HYtM9lHA1Z9klstC0tSH767Uu3vmA+leUkm7p+0tcOTv3rGHw9k0NrJJdWEMSTNjvw1t26vPpFcbFJ2NY2+CfQVO
gHzWs57VOoZYLqdZGIt7w+xY6ioZ6KKSg7RyLDjMrQ/zsFBjCDYVj7IaaCvmTYthNYClFDIw9yy46yNjb7UZBZTtILbM/KA8KW66ZM/QPe5+9/Is/Zp55rrX
bPT7LclASGYmTQ4gKUoWwUxkUGbqEnWjIXXPCNjozu/zPBKxMzbUlyqdB2n1uXcjr3FGSGY6pnFmUsaNrMOsz4oAhbIpFcYYgiwINrxJleE7jEHbMATMQ4JJ
CxUzLWlUMkSM5xnxx7wkwzGGI+WI3dCZu0pZ+ceCwLzjeNdpZp1sF00nzSnGBK1nEYmZJ6LPSlslUe1bz9yEDBOEWdDgxjGTbHv4M9sjjjiiaCiYCIylpVEO
TagPSW4pLUr2jz733ubmrplQXSRxtBlSTrlMk9LnGetxzZZkIJnvRgfwNZAs0LSBsB4Ad8sk8eUO05Q4TfJFNw1Ji5EDO1OadNu1CO+7k4/ZYdnI4sU8AWfH
CEgFI+sBe7gFinSKKa52rFgAab1MI8rNXdgWWIuc52FOz4pzMqZJzY40yHGwqNlhYSRDhHpqozQvfUmOMH4dzBvR0t7xjneU00LtvieYTiMCF8aRjBZjmUZM
jgSCxJZWQyDSX77TZxgF/0jSZ+Jk1Ux3spl9seUYiCNrSVeAJ6n1lQCyY9b7lRQXUR7lfHZ1ZA4wuReZpF9XV0zPhFpEktk3mbLDvjZbWFgtRrSIrH+ahggX
tGe+jHmRlCf8Q5hV7Eov6TgyDb58bJNMqt1sxwQhzGnRyEFXxqq/Rz3qUb2rZ+HnYyCIdEkbMe/j46wafg5zlTXj+OOPL8laYyNl9/LB+0ypM82UKvklc1TW
mYZJYEiSxyxN3tIqdTVrmb/zrJ7NCnLZUgzEopG5pdglu2efZ4csyiuzQZ4Fcu9wzi3qgsfhn+q0Mw0WcdHQpyTzzDJLivz0iScuSldPrYdFO+3oghXSXu/o
VEylzyFeUx8ydIEFx6LFUUszdv6KhIz8KuNykJGA5YTLxc49i0Qc+ql9MLHlAU6T6ogZOFnwaU97WmkXf9Q4YmaK1C2DoAX94kwWvokumSPMqFLGZ192f8/3
HOGugad+5qvpMo+8jgVFP7lO8lVmb4KA9hk7GJAosc04n2VLMRAdmrZPh0ZtFFm8DECH1TA99CVJGg0KvpBRA6dvOet1nQUjJRxa3aJnXjWZktmNOid8vXBa
a7mp4Rm7sucmOaHRwuKMm/WgPNGQlMzMJdKHZuJwr+7i2xVuCD6Z+deCt1qT2nq0I8vkVM4Ipj4HxH3wgx8cMG5BIhzY445ewBS6JqR8JuY7HInm7A/9xpSO
6fgbRTRLAoM1gAAxnD28i7tnZ1SZPtJOPhBak7orQ5jyqDqOeva8vttSDIRDEZAcWX1z/c8SiTIMvgHjuf5WE6nEPpt2b4cpLRoxteWE7HsaoTZ0B/5Gtklf
OrhLPzAFDE/sjaxL32dZXNJeru7akPiRUAkXTBfrQULbOeYtRP7gFps7yyuBaBzlPOOj4SdYFCI0ZBumaW0YX44V9/hL0+Fwe2LTZDkQim+Ng16QA4br+1Ek
uIEwILBB32ISzGCjKE/e9PzD4gTVSZQHjzmygMmTiY4Qam6yZojM65q4JpU1r9+2DAMBXE7EyJDZCx9SFocVm/laTDOp1nN2pQOuTwU4z9KMRRpcJLKIiXs3
sLWr70LhvlwAN6M96fAnRPQ5UGwz6th9Zjed+6gABf4JpzYOxuec8SXwME0++UlPKmdX0OIsRBbXccShTMI2Njj/F4G6wgO8pvmN4Jl7nHKMf+lLXxrZlBzT
zFE2Jef+kMh+3L47TF6YSmoZIr4EJdAKUjPATCLLwMiyHSORUY7TzoN5fKwRyhyei9puM6qoukXSQJYqF5astv5QRK6U12n/5KIKNbZk5wxpYaaU5THomjAB
lEdJoSJddF+KhbmRfjykmSZMWH1v25Dr5BGTgh3tExlyw5nb67mxsJTr5KSSmiMcfU1s8CupL6S+DpW7VznTLopJ0wSDaOI40UauI5SJ50KrK+ncjYdFT+eu
30P4KfWPhaqR1j4Ei5IrzPiQxtsYk3yyJPeE71lJLstNa/wnvbu/LsWC1oTEXBL6ea73+jFWwHLsgPfS0ISGN0gd071/M97HAt7E4l0erc9j0Z5YDbg+5tGP
bu6/334ld9o1r3nNMsZPiVQ4xpXknpnqJMd0mHFX5NULhlLGnqzVeY2kocZ7UmhpTfiyxq4LEapdEohGcE15bjCAkdl3gyE27z766JIoVvLRLsn3F0yliUiu
uacG6j5nte/XnYGEGjnIjLnaynWvt2CEfb4MaANDvhu5ekzIcaTDnZ2ABucbxIQgVg0oJsy5pwzECG0skytMAM1DHvzgwa193oQ0McglZNBZFOedG6pPPUZd
ExEnjUkZPoXmgQccMOqSsd/BPZx+TWw6a/RxkvY6Q2UtBCflmqj6PBffLFM/JA3/lt8v0uuPI78SMh5jD8cKvLKefoNbRAiV82rWM/8U5oD5y/10kxvfuPne
WeMy6+JVP+bcSuaNueQi2r12o95bYPP4AYtyH7rPvvs2R8ZYkpNOinfHP8gcHdpJyZ0V5ttSTFg2iiDSHVt+wBz8Ic+PkxqbOCu9fPYvjhhuIqS3iY2Jg++G
38BMPjQE+4gaHSlkhbO/MJgws+1wNkiceliY+7AgMPysjf68rgwkbJRlkTHJLZxrGXzulegMYRwRiVVANqgnUZiwys+4t04evt7ncKw1cRxoI038MDkv5HVx
YBLSeTeZMFCG783PYWopb+FgsV0EBmIByUSPYb9tYjNaVrfXq8UlNj+VNO0R7VPaFQEGTdj0e90/6iLSroRycQxr0TryGn0fUVflI4bnuqSwLzdhomxI8otI
xt8hkV4cGWvJbEn32pUClt8sbv4sSOEgbiJIpMkFbl5to0kfdNBBTThzS5HfCakYqYs6IYsrKTkp9jk0EWixQjLP3zbyFUb+EIES+azu48hcg6UjBsy/N7zh
DU3kuGqucY1rFA0GvuaCQ+VOOumkJnJRFS23W57xFqG9TZgfm4j6LD+5T//AMhlM957h9zQbhHmMYiDGgTT0sSF0BwFMvQlUEYnVhN9vuOhN/byuDIRaHPbW
hupI5cRE1kLKyMXXAM/BNKlMA8UAc213UrjHILTgMXNFrqKGeapL7omoleanITVQlw3EHLjd66a9NyGRQTfL/dPKn+V3KjGzCXIgVOLatyzXR1RJWXSo1UwL
2tdd3PuW5ToM/uEPf3gx7+R9Jou67bnnnmVSWyakwf54LLLhjCzSmr4zweLs614TOcveiFfMTorx1IKZTGBF2qURw1B6d6YN/WGRC+dtMSuF07QchBT5mkZK
q6utP4YfaTkah37lPGCKDT9IwZeEbCE0VwhqxoZTD9UJ5vvvv38xBackvdrnz+N6c5B5lEYEMzSJeeQzjR/Zbp0dFHs7CoPELCOlSV5STEzmaTJRQioMmBuN
rUiqOLjWOkQLMTa7NImZJeZnC3xHzbUIFW6OPfbY5klPetKKeik/AnhKXWg/wxpS9/mb9R5LH/knrJMTeFayoe6i4ZyNhWHWItb9Ps4qTqtR0RmxOA2cZNJG
oJiIveoUg3HgFI1DlAq+QiIXhTLbKkf0OOfftLo6Az3MAcWpJ/2Jsp4Zu51XS5y6NtPlOLSpKibtRCepcZkBAO6TjWDRKDSpQZtkEA5GN7GKwXDKRrYQegb3
2RMQgtfE+/r8mIEHsBL15bMIsElkrD8n8qTFIl3qI/Ko7/ifVO6sv8Eho6o4lE/vsWs8nwVHbd81cueFGSm/XvHqe5torVci/DJcPMeldDFPeMITyh6RFTf2
+JC70EOYHjjj8zb9ztkvrU2mnOnibOOjfU+zztN8zqyvmxaFBQQdbkIs2m7xBFNqBxt9hnen6kiRKgaP3zPXTrdjs4zhVxNT5FecAVJSrOTGRwNQKunNppCG
BnnE4uzs3lEdduiKJMsoNJiF07f0rTZbmFa7NwPOJhWcQzsr4YqZlmYaTmF6GCwohJ0wQUy7ZcN+14aQVEu7rhNhnqvJFG2sSesNk5BWVxU2PqqBItUyV5YF
VIryvmSsZGoei9jwPoa+5czrupD8B2Nl2g7w7jPD/Fzu0/4cv/k7xv785z9/MN+TYeSrdUDusW6+qry3z2uYp9rwlZTn7xX5+4YFAnuAPEsdkDUm1xkRfKF1
lP1nfZ61HtdsGgPRGJK9SbDIG9RoCHZ60hpQmGEGG3pIX5nrptux5cIx/7TZgHCvdotXJ537zk7qHBxjbl/3r22EzLDCvpI7bTIcu6UNYfctdZT5VTlCH+UQ
8vtqUksoJHffYh6yAq+WbMQLs0Opl13di0J2DKfkLr/UaslihSEbMwSQLmGcmMIxsYcnnKttBBu0YdsvGWI/ELuuT4mUHV2yeVA5YWJspSpZLXkOLV0ZNJfN
pC9HGK6U+OpC+xUiO43sRM8sAOa67Mf+vhtjGrPeO7IfKy//zFXZesOcVNKarIb5j6rLVyO8N8yDpXz7a7oUASNFK6JR/WREPj8Cm3G0mVl5JzGQdfWBRIeU
c8D5Po4P55PIqUUkTvTYHVxCK4XPhRmm+EXUle03o4qiI6dWn9M0NgOV6yJbcBPSS/GfxEax4rxj6xQ+u5lYnHrqqSVsUyXDXDS1TS5wglpoC+XaDGMWAsnn
wQHIyS1yh02/LwkfzfDoGKTlCNK+9+Z16s/PJVqL/2BRSB/HQlH8RMbBaikWlGavvfZqYvNpCVY4I3x+p0Xor9M39YOy2ev5BdjU2eyFnPLp8WHwFcTepRJs
cnJEWyHh5GFGXW1VmsifVcKLhb7y1WwmXSXmE78SH4RAgBDImkhCOHY+8eNwdvNHid586lOe0twoIqf4pX4ZUVWRzLD4HrQJ5o7AdVy2kHbjeR4Ui3/pG2UJ
WOnSK2KtMB8FKlx46Ohd/RwpWJpgcE2kb+netjDv54PQhOYImRNzHpLRqkNgJxQ7158sQkL0dFhsIGpClSzl67Q4f2JVjm+RGpyOBh+HWMaqY0SO0DSQ7b3Y
TAainZx6HPpdR+IkUEWpIEz0wAMPLAu/vQyhTTW/DuxEsFnQRLT0JQu+QAtlWuzUx8LYh1HnMywKue9EHy4K2feDRNXEprPe1eq2P3HwXXjeCqOwz+bK4fy+
4FlRPcMFG3sWV87fU2O/Q6GzBJ9Rztvh+0d9hq85jIGk83rUdRvxnfFm/CZxjFtgw8xWGC4mCS8OcJFTBLZ0YId5tblVMGVkbr/6Na9pItFh+cwhLlxfcMNa
yTMxDcyIsHVERH4hGHYZCDxfHBF3BAWMcJgwD8JA7ESfGzMbfsY8Pg9Utyhsxfu1OtGjIws5zYt5Y5zzKq/brFdmJmaQ6MjirIIDpy4zAZpmcsrfj+2cUTBq
x2kevckeu5kksZw2sq32tWlT+dOM0R0nTE8323PPcn4Bmz/Hel+S2kVZTJxSW89CTGvMaMphclgUkuJCnexYFmDQl3IsBWNtg7mXMgQL9CXBBcZt1+fI5Kgu
IczMlLSRzV7iQGXY0b6ZZId4jkO+1TRNqZu/YLrlNT97lWVBJudhyuCWWORX5aMaLqf72dxK5zvcpTLK+g5nopBXz7ozKiGsMcOpLnHksM+k+7yNeL/uJiyb
Y+yXwOlTaoqOK0TqEQNNarBxSUjdvIg0QmpdK9m0RTogNSC7nW2yE66HhttUvuz88zvN4imhHovxFmoXzKKE3wX3KVeePTSSsK2W9+LN3xeaTtnAGNKSEb9R
RDNKU49698WPCY40xVSk/sKzmRC096MRGulPeUJ7E8cQBce2zXONCa9MnMJFmUpWS4cffvhAKoY7idS4iIm12qLmdv3OMebhgpiUaG/GWB/KsWYvSO5hsgej
L5Ggh6XoSJVSNsCpkz07NmgaB33Jfh9zF7mPuVefbTQZK3Z8W2doRRERVeplDMAZDfe7MRtpjMqGvzIujYsYp0x+siggmxKN6dRUlJH9UC4Y/jc0rl3rj7lM
vzHpqqvwcmuCcmnnzNkftsbEtV8K7UN4sI2I6hLC56Du1sw4UKqY1FkumI+zbsNVMdbDV1Y2R06s8/CNc/y8A8eOsst3fTUQnDTvmfQ6Kg/QajlogF1OZCMJ
7R3Or2kJ1fqWn9JITL6SVK3vfXldmHV6YTAJn43+Lcw/M0eF6QeamyM/u2Gns7YhFoT29SG9rYbeETmaRAbN+syNuk+IZt9w+N8FruaJvlE/53f0OeRoEm4k
WOeDZHuFqU47WCrLE1iSUYR5f339w/q4KDgYK0Lh14smaSC8wgbWSKI5sNMNSzTDF7P5cfbhkuO4oGvC1NG8813vGnvNcLnDnzkmOcwiuVsTWTOLT0EuJA6v
mKTF5mgz2yz02Mc+tokDYko72JmjM3rXk08jzAzFj8IRZ8PYOAmti49nbDSRjKQKCdW6tNFu9NVIuKPqGyGrJXDAJjXSFE2AI5I2khrYqPtg8duQ1vidON9J
XmzBnJ76UznDBDOSOX+SHet8LjQ7Ura+3wxMh+vos7bxg8jDhkiaNvKxd49qFwd4nNHRvCzG4Puib5Dd9fpnHg5UAQsyKZhDiLYXEXMlp9woH43xy9EsrYb5
hmQr4NMsGG/A2KW1nyt8XL+LdSUYa6lDANtMXLT+cNVs/9fapqjbOBpV51Hfde+f9rtr9dOuu+5atDH+tvUg62Hsjxlb9EAyiStWvO+rgcSAmkpCPtkiv7+K
szS6hQrFk4mXxOss9EiwV2zeJFASmzz/9iLMStIsx6QfaDShGvYqKibk4MAY4YUx6Xrdt5kXheloEAoZKnapSt/2Tqq3voEhm69T1vqS/SVp7zcGhZtGkEG7
zz77lH0hNj3ybxhDtM4M43StkMtFO/Qo222DmL0yOa/4emySDLNE0aJtunzcYx/bkvByz1FeywfnJLt5UjiWSzbXfIZX537wWz3ykY9sDwqbPU3aplm2dz6T
vNbGutwLNc86TSpL+GwEsQzm5KRr62/rh8AkDSTGx0qm0f08TwYiLbKyV3N+cReSyBFUFg/fWUwiZ1IZ6ExPHLB3DZMWVX1W4shSv0MPPbQU0WdBxcQsdO7j
DFMnm4bC1rzir09Zs9Z7lvt+H3W00VG9xdLbVzAPssAQEpRrQ6H9NBbRaX8w48y3cEVocLlfGZP+QpMqxxkbF11SFvNaaMPdrzftvU1qmHTfdmGO9tKs12LN
HGafUkitE/FN7ENbau2iJ3RsJAkEYPpTD5sgpVivtDkITGIgE7Wkvias6OSpJNeR0NXYuFbyS029YeiCgK7hUJKLh2rN6cUxxVQgVJYZ64hIenjOs74bun3q
R/l1rnWta5W4eWYyz1P2OOJUtD9Ehs8k+xHcl/cKMfU+jhAtSQKHUzTnfZvxGoyyOBc9WyJJ8fBrJSYbIdFCc40dzkkqNgymkX0kwp7lLHpvhF9+KPL/MFUx
UclRxPTGJCZcmDlIOn+BCsYVE1jmkGJOY0qFPdNMLJSNfRhxklsJqZ1Wj/X63XhhjhI2Gsyh7AuAjbHLRMysyIwlmGBSZtd51c94t6ckTtQrQSPMapmynEnQ
mBVyKjyWSXYjidM4fDYlHD52gBeHsr5nDpyUi8s4sU/F2AuNdpCAcyPrvhWfNcmEtWEMxEaoG8dAZKdm62XvXg3ZsGavgIgQi51BbSObQYOZ2K8hIWKYOVZT
7OBak1mZ6ieSZ1L9+A/EeJ8aG4DQ7W5727IJiP0fWewsmga8BdAZD+Lyrxgx6otC4sstwHxcos4sXJL8rYVkDJUlOTSusvjoH++nMRB4yUJr4cKQLRYInr+J
yCF+EtdYbP0hix1fiWfqO4zC4iI6B+MXCSM6UB9ZLPmm+E3m4U8oFVjDP/4e4wI2Fm5JDPkjbKrEaPg+NoIwerhIIsr/p04IM8/EihtRj+4z4rzyIigaM2+J
TMsXjwgj9dPfor8IKKNIf4u2eu1rX1vmrrFAqJTBluAqotL8I4CY45X6IzCJgShlrCo7TxNWDIiSR0mMdEiWPq6KpCyg0qYPRXQIc0WSXDWOg1wLSbgmfQTb
6yjiY3EMaMZ5s2k7E3nYXKJuYr5j0Su5moZz74wqezO+k7BQG4wBPqS+e0JG1RVm4WQtZckdJkXDakgqDvWIBaDcNs3sF+Go5Xo+BmZR5kNms/3iaFj7Jphd
pGlxnVdly+m0qHT38IPwe0xLuDjv+sfi0Ia2UdLRzLvs1Zan75mCQxgoUUX2fEgBwiz6rgkpWEKAKL4yfSz3nv5m2uZX40/zvT9mRCmL7EtjxnPSYgifJV/d
auvq+ti4WtayCMEtJxiqx1akSSaswHXjGIi8PZ43y9ngFunYLdqG1FkWiL3Dmcr5J5su2nfffdecdC7PI+5u7PFc+Xc488LENRiMBvVhI843xhwzJPgRES4Z
Ut1Cj6nMyqtfQmoreb+6jLlP5W16ciyrMjjRZ8lJxTYvHBgT4kOaRmHOLM/TDxacSeM4mWRI2xMz/E575nr9buGRe8lG02mMc951yE2lIfnPu+je5Wlz5uvi
rOdbJFBgJqE1tJGCfWJZOYb59WTTleuNH8zc40sRpBH7M4pwQQjtBmEYGxio9cTaYk4TALtkPhBg+dveGPWSkZcwYjyFtjwYe9qwFWlhGAhHnKgpHTULha2+
SGnHHHNMOfA+zBNlQbDgGDwyV66FSBK0BpEyGJLEh6JRMhFaLlKiwEZl1Q0Vu2SnJfWsVRtaSztWcy8Gycmr3dk+k+nwww8vSfnGMROLHvxfGhOT5Jz3mvSk
ulnIJFdOnzO4pcfGPJx1b1Gwa9dkDhNX+ROdRQLVloMPPrhkRhYvL/swYYCDf1HIngzMe1T2gvWuI+bP0kBj2wyyWAsa0O8EL5kF9I/PmMm0DLhxRka5VnYH
zOOUSCSJGbpPWYjlgICCjFsMxv4xgT1PfvKTW/cawzkHwpxXkk4aNyIBrQcwyjHu1XiTMVngRxxQVQI6fP/617++PGcr/VsYBmKxsviK/Bnm8n0ANyDcL7xR
p4X9u9wmT/6tb33rHfLs9ymze43JlBlQu4PFe0whfAUl1DG1iq60KNWHBYz5ZjUhrN3nb+Z7Ep+UDt12W6BtZLORjFmA+c6EC19TGxkFBhFX7qF50ATgNIq5
9mmbiS0tCWFgHOPKcmwkVV/XGVfhwC+MS1i1PyGrFo7sIxqL8OJrhNSovk996h/Cl7O8zXy12GFuwqA3g5gB9XXfzY7zqqN5lJuQMTCpjtLcyBRF6p9EaTFw
rU2thB5CZPiRijXic5/9bLndusH6MSmyTaSgcdcd/97TVswB9aNhYE6eMWzmtnYwj1kDbDnYSrQwDASoYs2pjbPa218QOfNJA3E6YDl8R5mxSbElka6VMDVS
DynW4hQRQaXcAyM2Xsw+DWWYSLLqYrBhbhauZSWTQP/kORbDk2ncZ9KYvRiYqGvWEnIJa2VE2omJMGIgzBsmfh9i98acMBUTQp37njvSp/y1XMPEos0OMNsM
ovF5PnPWRlJqnIQSzCOZib5yRMAkIulbR5j+MD7zkKkrIiiLYBmBOuV2AgStg1A3vOh3y6c1y4ZA23CsgPFsT5ecZH0pIgHLGDPOCDFbhRaKgViEDdZ0lq4W
ZKqok/1S+3A/57fT3+ZBUj4wf3DKkmT4WDhpR50GxiQXESKlPerAFLEVyGTGlNmGmRyZiKj4zCy0DJIW/5OJT+Paa6+9SrNzIVqLBMYEAf9pCQQNanXp63Sm
RZnYNBbCAEkRw1wEkniRdhT5kTalOk72w4z7aH7zqiDtwx4qfUiqZ0GwLsRRvlOtE8aZ/SnGofHSJUxi2IfG2U0DGf6+e58x4fnDggtGhhlgTvwpmNEkslYQ
QLVtmgY1qZxF+m0SA+mfUS3QnQdFp5f4/GOOOWamkFthmkJ2hesKhxTeK5GjNBrzICF/Yc4pYYzKta8gpN0d0rmEo73UwT4ECd6kQpm0b2QedduoMoRw2gcQ
0nrZtxO24JIYLjS0EiIpzNMeHCTkWdI4FNJeeY1JVl5n+RcLStkDIJmcvR36YxQZB8J8w3w16ucdvjNO1Dns3CWdR2ixzQWjjM0mWIVQVcJTr3pW8s5unWDq
Gn/aGgyw/Hmv/d0//ROLZEns6fVXkeDz19Fubc8/13ivz2IRL68+G+exGBdsxoXKduu11vdSEOnf0CJKKp1g6GVehwlrYtH20USkXQk9l87IeOmSvUj22Rij
xo5QcqHfUrBYc6RjCUGie0vBS2JDCS/hIqQdrsKc4Ww/kT1efjd+JlGY00q6JeuTbQXCzI3VrUobzkDka7Hfwl4QA3gWcB0AJaZfnh4D0GIjI+U8yADDCGTq
tJFKjiUDsEsOf5Gj36APyaZc0/19K7x/5StfWSZLOHYLxhbcURSO6oKBjLMmnuviiNrBpSZgmJnKXhB4hY9k8Nu4N/b56FObHZ3RMIpsvrMY+utDxpoxoo7y
soW5ZOyZGn3Km9c1oQWVzW8Ytn0MFrCy+MfZJs438d5fMopsczISjCWZTEitY6tl4cNA7TuxyXL4z34Qf/YEbQRZ+DECm0VtCiWkdcfNqDqEFlD2GWEO9sxY
1LsEo7AKNHGUdDlHg3BjPtsTIisvwUgG6VNib9AN9thjIAR9ODat2ktmThsjGCqs4OHe4fnffab3cE/m7j6MAyMLzbKJEwjLJmLr1FakDWcgQNTBUh+Hv2Cm
FN7KwERIDfMmA03KatJYmGhK8QYIpmKASohH27FbGBObNujnXb+NKM8kJDlhnsOTdPj5do9Lk20BsPhJhkhiC7W/pKz+YWxYhBuCa/geyi5hkxmmtMg4yrNs
xJQJwMKmb5VHE7QT2WIzTHb1u5eWNCoZYPd6iy4J2yKhXExPvReBMFWLnLkQppmycMHAQu/P4oUp53vX+ztvfH/eeO1+71p/ySi85ntl5mfvMXKLmj7xisFs
pAatTeH07t0FNpiGs78s1ua9cUQIxTwvEmPparHYa8t5AjP9jRHamGpsapvrtRmjthmZECG5JEwIhLG3q1gRzP9xhFlj6sq3sVGfySDgM4ZuHFrbCMkOryIc
2O2PCYWJbFyxS/39pjAQO6DRR2Jnqaygi0S7xABz4puBkWRiRSRHE2mwyyQnOcUBNUU9zmu20ivTgF3cdv5PW1SYJCMcspghSPmkNvdblOz8lX3Agqcck47p
wgSmlSBmJKfrXSImd/dZkdyvZEa2s5gJbZjsLDZpLSwYzyQykZkanccQoaJNnMNedilPumejfiPxyuYMO8zAgpaLOwy7mGxUnRbtObIlEEyMn7cfdVSxYGAA
mWLltDiF8aPBTPbae+/mCiFsSGcSPpVi3YhQ23Iei3Ns4ArvywZz+UEIHhiCseHkQlrQOOZBmyAQEVbcQyAiVGJKTGOYIQaEgWBiCNNi2lY+oZPAc8ABBywa
tHOpT3EeRUk7vAYw6xLaF5JqiXKKNOLRD4tHnEacipxvSMy40OOYzG0cdztwpIX0s3iVX2ONRDXZMMkJGKaTFaWFml4iWYRFclbGAj74HUZHve1tJeiAU7ZL
kjfGpOt+Vd7HZNzhu/zCszhWOfKF9w6TaCph1UI4p/WDeH7je9hBOlxm/bx4CAi9DQ2tbCrMDcgh8beRJqgEt1hLEOd1jjFRgMNZsd0zapy86JBDytiwh8T9
gmU+/vGPt8HUB5GinOdCdwWXRGqdkU5+ZYvYEuTTJXNFZGcIByUwpfvbsrxfKCc6lkcilZNIfiimhfXKY+9ZsxAnIqmQFiJxI4cYyYLdVV4YFJ0/S9ELf4/8
XUw9ciORiJE++lRIdd8OSa/rjCV1kdqYhJgMmAckBPxknFZIWssgB/ZhxHQiFxFNgFrPlt0l0p0/kjdpUX4oeNNo2Ke7FPsWikbIVBCbyYpJitQ3TBIt+t35
JBuRpHD4+fXz7AjQ+iUsNR4PD3NXhOqXwoyjCJkviSlDuChmLOPFH6KZ5DimnTI7mb+0OfM2tTqmpzfGyaOc4xFJWMascWvc+y41kjyZtBR+1j9rg/rRHM0P
497z9w4tqOvXpaUwkdF8+UaC0RUTbbesZX6/U1R+7EpINaM+TjtQahYA+A+odBxY1MxFIo5WC44kbtRbDIUpZVxE0CLVfS11MblC2iiRVZyNGa3CJ2KS+Iz5
h7RVTFwmNnMURmHhR8UOHXZ4QoFFPl/Z5qn+3wqV/lthEuAYdm2aaXJSY0qy7VL5XeMIZBM9NJwdGI7f999//8Jg9BXHuMSVyo0QytJ3Mg3z4wh28Ft3AVkL
VvXe9UeArzHSlJQxJ8rR4sxfZFwhJi2OdWa/G0cW5/OFQJL9a6yJlDR2/BlHeV/WXHLGCF0uJijji8m1S0yknkEwMaaMe4yDOUv55sPFYpx7Lp+eiK8cx91y
vFcX/jdmLuY3Qtey0KRkipvGQCw+BoNIJwuGzlkEMgBIFqQGJLts7HovkvMi1G8964Bp8EnFnpZywmCfZ5lI/CUkMRON45JmkdrLuDL4QDCA1E7cS7LEQLwm
paDBz0IatUCgnKjKOOSQQ0p9Y0PZ4DfXYVqRiqLYoGk8w/eWi+u/hURAX/GXiQZkqbBoIxI97UN6fq8WfYv7OMpxaVzlmMlr+TQzyo+Wy0mPrEVCjAlH3hvj
6uO9sb1b+D2uEZGGozTeLNsrBkRDIswQxJ2Xzj9jTMZGxe6lC/1+IRkIcDlpnZGwKMwje9HAIFGon2idSQM079kKryKeLMZhA16Y4AYSIA2QFiyiBnMxmS0G
+Qp7MfscpadGiKYJzwTBwd+VKl2PhheS8mX9t1AIdPuW2ZTAeWzsl2ECIs0bF+YlgQczoUkQSM3bUdQtz+80ifBNFCHDAk+TdQ4QMpY8z+/MUYQcGrZ1Ic1k
5cKz/imbEEVTF/7/+RDEPhsMD9MwHjExhIkRZAho0/a7nFX0QrwsJAOBjI5JqXEhkDqrEgaEzh5WeRepjvOui0kjfNaZIMx2i7LIMnHxYfB1qBfT4vBiMG8s
anmLjYCoKOfHMDVjKhZqxA/Hl5H+LsKD8TOKDjvssOLbZJo+PPwrTLT2Mk3TnJlrMR/akefabMxPKtqqu5YxVfGhMJ/6IwQxy2IgzMDLRJMYyHjdbwNaSIIg
KS4ylcVKBUPi3cpkMjLdYeqczenT2Ow2k/rsL0H8GcItF4W5bTY22/H55iNGYVHzxxTqEC7aqcAX+8veFI5xfhHaSaQ9auwOp43y6SYxUTOHRcqcsp9Ddgva
Qu57Mg/MB9oJ0y6NBLPANHzPr4KYbK1hfCwEMGHs9oapI//fNDNX1mdZXzfNB7KsgG3VepOiDjrooGIaWESTImGDo9zO3spAtuoonN4uDCRp1DignTgSl99E
6hImJUT63zuc8MYQLSPOFiqnikZG38J4bJq1P4jWwqGOUdhMnMILTYb2EDnhyr4jATWis1xP2+j67bJ+W+V1kgZSGchW6eVt0o5cQEYtHtsEgtrMngjQTmgO
mEkk+Gw+EWavX5y1gdX4EYVlUzOTVCQMLWYoRdtsiDEIAaZNMD9hHjSN4dDznlVZ6ssmMZBNNWEtNaq18puCQGUcmwL7Uj6UryGOZCh/fGiSLAr7tTcIkxD9
SSChQUibwwneNT9tl+CZtXRuZSBrQa/eWxGoCCwNAsxY/oTRJqVGKyR4HLmmCi6j0dlx6+7o6+q3FYGKQEVgyyHQhzH0uWbLAdOzQZWB9ASqXlYRqAhUBCoC
KxGoDGQlHvVTRaAiUBGoCPREoDKQnkDVyyoCFYGKQEVgJQKVgazEo36qCFQEKgIVgZ4IVAbSE6h6WUWgIlARqAisRKAykJV41E8VgYpARaAi0BOBykB6AlUv
qwhUBCoCFYGVCFQGshKP+qkiUBGoCFQEeiJQGUhPoOplFYGKQEWgIrASgcpAVuJRP1UEKgIVgYpATwQqA+kJVL2sIlARqAhUBFYiUBnISjzqp4pARaAiUBHo
iUBlID2BqpdVBCoCFYGKwEoEKgNZiUf9VBGoCFQEKgI9EagMpCdQ9bKKQEWgIlARWIlAZSAr8aifKgIVgYpARaAnApWB9ASqXlYRqAhUBCoCKxGoDGQlHvVT
RaAiUBGoCPREoDKQnkDVyyoCFYGKQEVgJQKVgazEo36qCFQEKgIVgZ4IVAbSE6h6WUWgIlARqAisRKAykJV41E8VgYpARaAi0BOBykB6AlUvqwhUBCoCFYGV
CFQGshKP+qkiUBGoCFQEeiJQGUhPoOplFYGKQEWgIrASgcpAVuJRP1UEKgIVgYpATwQqA+kJVL2sIlARqAhUBFYiUBnISjzqp4pARaAiUBHoicBEBtK2bfO7
3/2uZ1H1sopARaAiUBHYaghM4gETGchvfvOb5sc//vFWw6O2pyJQEagIVAR6IHDmmWc2P/3pT8deOZGB/Pa3v22+/vWvj725/lARqAhUBCoCWxeB008/vfnG
N74xtoETGYi7TjjhhLE31x8qAhWBikBFYOsi8JWvfKU57bTTxjZwKgP5wAc+0FBjKlUEKgIVgYrA9kLgmGOOaX7/+9+PbfRUBvLJT36y+exnPzu2gPpDRaAi
UBGoCGw9BDCOt771rRMbNpWBcKS/6lWvmlhI/bEiUBGoCFQEthYCH/zgB5tPfOITExu1U/zaTrwifrzABS7QHH/88c21r33taZfW3ysCFYGKQEVgyRGgfdz+
9rdvmLAm0VQNxM088U95ylOaM844Y1JZ9beKQEWgIlAR2AIIHHbYYVOZh2aePf4O8mYafelLX2rOd77zNTe/+c2nXVp/rwhUBCoCFYElReDEE09s7n//+ze/
/vWvp7agNwNR0kc+8pHm6le/ejVlTYW1XlARqAhUBJYPAfv+7nWve03c+9FtVS8TVt7Aof6ABzygeeMb35hf1deKQEWgIlAR2AIInHzyyc3d7na35vOf/3zv
1qxKA1GqvCjvfOc7i3qz5557Nuc85zl7P6xeWBGoCFQEKgKLh8Db3/72Zt99922+/OUvr6pyvaKwxpV405vetHn605/e3OlOd2p22klRlSoCFYGKQEVgWRCg
dTzvec9rjjjiiJmCpNbEQBKk29zmNs1+++3X3Pa2t2122WWX/Lq+VgQqAhWBisCCISCqVooqroi3ve1tE5MlTqv6XBhIPuTiF794s/vuuze77bZbc6UrXam5
yEUu0pztbKtys2RR9bUiUBGoCFQE5oTAL3/5y+ab3/xmc9JJJzWf+cxnmq9+9atzKXmuDGQuNaqFVAQqAhWBisBSIFDVg6XoplrJikBFoCKweAhUBrJ4fVJr
VBGoCFQElgKBykCWoptqJSsCFYGKwOIhUBnI4vVJrVFFoCJQEVgKBCoDWYpuqpWsCFQEKgKLh0BlIIvXJ7VGFYGKQEVgKRDYOWr5q6Woaa1kRaAiUBGoCCwU
Av8fgwPy24mbuF8AAAAASUVORK5CYII=
`

View File

@@ -87,7 +87,8 @@ type LlamaServer interface {
type llmServer struct {
port int
cmd *exec.Cmd
done chan error // Channel to signal when the process exits
done chan struct{} // closed when the process exits
doneErr error // valid after done is closed
status *StatusWriter
options api.Options
modelPath string
@@ -280,7 +281,7 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
sem: semaphore.NewWeighted(int64(numParallel)),
totalLayers: f.KV().BlockCount() + 1,
loadStart: time.Now(),
done: make(chan error, 1),
done: make(chan struct{}),
}
if err != nil {
@@ -304,10 +305,11 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
if strings.Contains(s.status.LastErrMsg, "unknown model") {
s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
}
s.done <- errors.New(s.status.LastErrMsg)
s.doneErr = errors.New(s.status.LastErrMsg)
} else {
s.done <- err
s.doneErr = err
}
close(s.done)
}()
if tok != nil {
@@ -1356,8 +1358,8 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
case <-ctx.Done():
slog.Warn("client connection closed before server finished loading, aborting load")
return fmt.Errorf("timed out waiting for llama runner to start: %w", ctx.Err())
case err := <-s.done:
return fmt.Errorf("llama runner process has terminated: %w", err)
case <-s.done:
return fmt.Errorf("llama runner process has terminated: %w", s.doneErr)
default:
}
if time.Now().After(stallTimer) {

View File

@@ -0,0 +1,144 @@
package server
import (
"bytes"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"strings"
"sync/atomic"
"time"
"github.com/gin-gonic/gin"
"github.com/ollama/ollama/envconfig"
)
type inferenceRequestLogger struct {
dir string
counter uint64
}
func newInferenceRequestLogger() (*inferenceRequestLogger, error) {
dir, err := os.MkdirTemp("", "ollama-request-logs-*")
if err != nil {
return nil, err
}
return &inferenceRequestLogger{dir: dir}, nil
}
func (s *Server) initRequestLogging() error {
if !envconfig.DebugLogRequests() {
return nil
}
requestLogger, err := newInferenceRequestLogger()
if err != nil {
return fmt.Errorf("enable OLLAMA_DEBUG_LOG_REQUESTS: %w", err)
}
s.requestLogger = requestLogger
slog.Info(fmt.Sprintf("request debug logging enabled; inference request logs will be stored in %s and include request bodies and replay curl commands", requestLogger.dir))
return nil
}
func (s *Server) withInferenceRequestLogging(route string, handlers ...gin.HandlerFunc) []gin.HandlerFunc {
if s.requestLogger == nil {
return handlers
}
return append([]gin.HandlerFunc{s.requestLogger.middleware(route)}, handlers...)
}
func (l *inferenceRequestLogger) middleware(route string) gin.HandlerFunc {
return func(c *gin.Context) {
if c.Request == nil {
c.Next()
return
}
method := c.Request.Method
host := c.Request.Host
scheme := "http"
if c.Request.TLS != nil {
scheme = "https"
}
contentType := c.GetHeader("Content-Type")
var body []byte
if c.Request.Body != nil {
var err error
body, err = io.ReadAll(c.Request.Body)
c.Request.Body = io.NopCloser(bytes.NewReader(body))
if err != nil {
slog.Warn("failed to read request body for debug logging", "route", route, "error", err)
}
}
c.Next()
l.log(route, method, scheme, host, contentType, body)
}
}
func (l *inferenceRequestLogger) log(route, method, scheme, host, contentType string, body []byte) {
if l == nil || l.dir == "" {
return
}
if contentType == "" {
contentType = "application/json"
}
if host == "" || scheme == "" {
base := envconfig.Host()
if host == "" {
host = base.Host
}
if scheme == "" {
scheme = base.Scheme
}
}
routeForFilename := sanitizeRouteForFilename(route)
timestamp := fmt.Sprintf("%s-%06d", time.Now().UTC().Format("20060102T150405.000000000Z"), atomic.AddUint64(&l.counter, 1))
bodyFilename := fmt.Sprintf("%s_%s_body.json", timestamp, routeForFilename)
curlFilename := fmt.Sprintf("%s_%s_request.sh", timestamp, routeForFilename)
bodyPath := filepath.Join(l.dir, bodyFilename)
curlPath := filepath.Join(l.dir, curlFilename)
if err := os.WriteFile(bodyPath, body, 0o600); err != nil {
slog.Warn("failed to write debug request body", "route", route, "error", err)
return
}
url := fmt.Sprintf("%s://%s%s", scheme, host, route)
curl := fmt.Sprintf("#!/bin/sh\nSCRIPT_DIR=\"$(CDPATH= cd -- \"$(dirname -- \"$0\")\" && pwd)\"\ncurl --request %s --url %q --header %q --data-binary @\"${SCRIPT_DIR}/%s\"\n", method, url, "Content-Type: "+contentType, bodyFilename)
if err := os.WriteFile(curlPath, []byte(curl), 0o600); err != nil {
slog.Warn("failed to write debug request replay command", "route", route, "error", err)
return
}
slog.Info(fmt.Sprintf("logged to %s, replay using curl with `sh %s`", bodyPath, curlPath))
}
func sanitizeRouteForFilename(route string) string {
route = strings.TrimPrefix(route, "/")
if route == "" {
return "root"
}
var b strings.Builder
b.Grow(len(route))
for _, r := range route {
if ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') || ('0' <= r && r <= '9') {
b.WriteRune(r)
} else {
b.WriteByte('_')
}
}
return b.String()
}

View File

@@ -63,6 +63,7 @@ const (
cloudErrRemoteModelDetailsUnavailable = "remote model details are unavailable"
cloudErrWebSearchUnavailable = "web search is unavailable"
cloudErrWebFetchUnavailable = "web fetch is unavailable"
copilotChatUserAgentPrefix = "GitHubCopilotChat/"
)
func writeModelRefParseError(c *gin.Context, err error, fallbackStatus int, fallbackMessage string) {
@@ -100,6 +101,7 @@ type Server struct {
addr net.Addr
sched *Scheduler
defaultNumCtx int
requestLogger *inferenceRequestLogger
}
func init() {
@@ -1157,6 +1159,17 @@ func (s *Server) ShowHandler(c *gin.Context) {
return
}
userAgent := c.Request.UserAgent()
if strings.HasPrefix(userAgent, copilotChatUserAgentPrefix) {
if resp.ModelInfo == nil {
resp.ModelInfo = map[string]any{}
}
// Copilot Chat prefers `general.basename`, but this is usually not what
// users are familiar with, so let's just echo back what we had returned in
// `/api/tags`
resp.ModelInfo["general.basename"] = req.Model
}
c.JSON(http.StatusOK, resp)
}
@@ -1686,26 +1699,26 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
// Inference
r.GET("/api/ps", s.PsHandler)
r.POST("/api/generate", s.GenerateHandler)
r.POST("/api/chat", s.ChatHandler)
r.POST("/api/generate", s.withInferenceRequestLogging("/api/generate", s.GenerateHandler)...)
r.POST("/api/chat", s.withInferenceRequestLogging("/api/chat", s.ChatHandler)...)
r.POST("/api/embed", s.EmbedHandler)
r.POST("/api/embeddings", s.EmbeddingsHandler)
// Inference (OpenAI compatibility)
// TODO(cloud-stage-a): apply Modelfile overlay deltas for local models with cloud
// parents on v1 request families while preserving this explicit :cloud passthrough.
r.POST("/v1/chat/completions", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ChatMiddleware(), s.ChatHandler)
r.POST("/v1/completions", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.CompletionsMiddleware(), s.GenerateHandler)
r.POST("/v1/chat/completions", s.withInferenceRequestLogging("/v1/chat/completions", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ChatMiddleware(), s.ChatHandler)...)
r.POST("/v1/completions", s.withInferenceRequestLogging("/v1/completions", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.CompletionsMiddleware(), s.GenerateHandler)...)
r.POST("/v1/embeddings", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.EmbeddingsMiddleware(), s.EmbedHandler)
r.GET("/v1/models", middleware.ListMiddleware(), s.ListHandler)
r.GET("/v1/models/:model", cloudModelPathPassthroughMiddleware(cloudErrRemoteModelDetailsUnavailable), middleware.RetrieveMiddleware(), s.ShowHandler)
r.POST("/v1/responses", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ResponsesMiddleware(), s.ChatHandler)
r.POST("/v1/responses", s.withInferenceRequestLogging("/v1/responses", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ResponsesMiddleware(), s.ChatHandler)...)
// OpenAI-compatible image generation endpoints
r.POST("/v1/images/generations", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ImageGenerationsMiddleware(), s.GenerateHandler)
r.POST("/v1/images/edits", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ImageEditsMiddleware(), s.GenerateHandler)
// Inference (Anthropic compatibility)
r.POST("/v1/messages", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.AnthropicMessagesMiddleware(), s.ChatHandler)
r.POST("/v1/messages", s.withInferenceRequestLogging("/v1/messages", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.AnthropicMessagesMiddleware(), s.ChatHandler)...)
if rc != nil {
// wrap old with new
@@ -1757,6 +1770,9 @@ func Serve(ln net.Listener) error {
}
s := &Server{addr: ln.Addr()}
if err := s.initRequestLogging(); err != nil {
return err
}
var rc *ollama.Registry
if useClient2 {

View File

@@ -0,0 +1,128 @@
package server
import (
"io"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"github.com/gin-gonic/gin"
)
func TestInferenceRequestLoggerMiddlewareWritesReplayArtifacts(t *testing.T) {
gin.SetMode(gin.TestMode)
logDir := t.TempDir()
requestLogger := &inferenceRequestLogger{dir: logDir}
const route = "/v1/chat/completions"
const requestBody = `{"model":"test-model","messages":[{"role":"user","content":"hello"}]}`
var bodySeenByHandler string
r := gin.New()
r.POST(route, requestLogger.middleware(route), func(c *gin.Context) {
body, err := io.ReadAll(c.Request.Body)
if err != nil {
t.Fatalf("failed to read body in handler: %v", err)
}
bodySeenByHandler = string(body)
c.Status(http.StatusOK)
})
req := httptest.NewRequest(http.MethodPost, route, strings.NewReader(requestBody))
req.Host = "127.0.0.1:11434"
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("expected status 200, got %d", w.Code)
}
if bodySeenByHandler != requestBody {
t.Fatalf("handler body mismatch:\nexpected: %s\ngot: %s", requestBody, bodySeenByHandler)
}
bodyFiles, err := filepath.Glob(filepath.Join(logDir, "*_v1_chat_completions_body.json"))
if err != nil {
t.Fatalf("failed to glob body logs: %v", err)
}
if len(bodyFiles) != 1 {
t.Fatalf("expected 1 body log, got %d (%v)", len(bodyFiles), bodyFiles)
}
curlFiles, err := filepath.Glob(filepath.Join(logDir, "*_v1_chat_completions_request.sh"))
if err != nil {
t.Fatalf("failed to glob curl logs: %v", err)
}
if len(curlFiles) != 1 {
t.Fatalf("expected 1 curl log, got %d (%v)", len(curlFiles), curlFiles)
}
bodyData, err := os.ReadFile(bodyFiles[0])
if err != nil {
t.Fatalf("failed to read body log: %v", err)
}
if string(bodyData) != requestBody {
t.Fatalf("body log mismatch:\nexpected: %s\ngot: %s", requestBody, string(bodyData))
}
curlData, err := os.ReadFile(curlFiles[0])
if err != nil {
t.Fatalf("failed to read curl log: %v", err)
}
curlString := string(curlData)
if !strings.Contains(curlString, "http://127.0.0.1:11434"+route) {
t.Fatalf("curl log does not contain expected route URL: %s", curlString)
}
bodyFileName := filepath.Base(bodyFiles[0])
if !strings.Contains(curlString, "@\"${SCRIPT_DIR}/"+bodyFileName+"\"") {
t.Fatalf("curl log does not reference sibling body file: %s", curlString)
}
}
func TestNewInferenceRequestLoggerCreatesDirectory(t *testing.T) {
requestLogger, err := newInferenceRequestLogger()
if err != nil {
t.Fatalf("expected no error creating request logger: %v", err)
}
t.Cleanup(func() {
_ = os.RemoveAll(requestLogger.dir)
})
if requestLogger == nil || requestLogger.dir == "" {
t.Fatalf("expected request logger directory to be set")
}
info, err := os.Stat(requestLogger.dir)
if err != nil {
t.Fatalf("expected directory to exist: %v", err)
}
if !info.IsDir() {
t.Fatalf("expected %q to be a directory", requestLogger.dir)
}
}
func TestSanitizeRouteForFilename(t *testing.T) {
tests := []struct {
route string
want string
}{
{route: "/api/generate", want: "api_generate"},
{route: "/v1/chat/completions", want: "v1_chat_completions"},
{route: "/v1/messages", want: "v1_messages"},
}
for _, tt := range tests {
if got := sanitizeRouteForFilename(tt.route); got != tt.want {
t.Fatalf("sanitizeRouteForFilename(%q) = %q, want %q", tt.route, got, tt.want)
}
}
}

View File

@@ -721,6 +721,111 @@ func TestShow(t *testing.T) {
}
}
func TestShowCopilotUserAgentOverwritesExistingBasename(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
var s Server
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "show-model",
From: "bob",
RemoteHost: "https://ollama.com",
Info: map[string]any{
"model_family": "gptoss",
"base_name": "upstream-base-name",
},
Stream: &stream,
})
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200 creating model, actual %d", w.Code)
}
h, err := s.GenerateRoutes(nil)
if err != nil {
t.Fatal(err)
}
makeRequest := func(userAgent string) api.ShowResponse {
t.Helper()
w := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/api/show", strings.NewReader(`{"model":"show-model"}`))
req.Header.Set("Content-Type", "application/json")
if userAgent != "" {
req.Header.Set("User-Agent", userAgent)
}
h.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
var resp api.ShowResponse
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatal(err)
}
return resp
}
withoutCopilot := makeRequest("")
if withoutCopilot.ModelInfo["general.basename"] != "upstream-base-name" {
t.Fatalf("expected general.basename to be %q, got %v", "upstream-base-name", withoutCopilot.ModelInfo["general.basename"])
}
withCopilot := makeRequest("GitHubCopilotChat/0.41.1")
if withCopilot.ModelInfo["general.basename"] != "show-model" {
t.Fatalf("expected general.basename to be %q, got %v", "show-model", withCopilot.ModelInfo["general.basename"])
}
if withCopilot.ModelInfo["general.architecture"] != "gptoss" {
t.Fatalf("expected general.architecture to be %q, got %v", "gptoss", withCopilot.ModelInfo["general.architecture"])
}
}
func TestShowCopilotUserAgentSetsBasenameWhenModelInfoIsEmpty(t *testing.T) {
t.Setenv("OLLAMA_MODELS", t.TempDir())
var s Server
w := createRequest(t, s.CreateHandler, api.CreateRequest{
Model: "show-remote",
From: "bob",
RemoteHost: "https://ollama.com",
Stream: &stream,
})
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200 creating model, actual %d", w.Code)
}
h, err := s.GenerateRoutes(nil)
if err != nil {
t.Fatal(err)
}
w = httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/api/show", strings.NewReader(`{"model":"show-remote"}`))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("User-Agent", "GitHubCopilotChat/0.41.1")
h.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("expected status code 200, actual %d", w.Code)
}
var resp api.ShowResponse
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
t.Fatal(err)
}
if resp.ModelInfo["general.basename"] != "show-remote" {
t.Fatalf("expected general.basename to be %q, got %v", "show-remote", resp.ModelInfo["general.basename"])
}
if len(resp.ModelInfo) != 1 {
t.Fatalf("expected model_info to contain only general.basename, got %#v", resp.ModelInfo)
}
}
func TestNormalize(t *testing.T) {
type testCase struct {
input []float32

View File

@@ -109,7 +109,7 @@ func ConfigFromModelfile(modelfile *parser.Modelfile) (string, *ModelfileConfig,
type CreateOptions struct {
ModelName string
ModelDir string
Quantize string // "int4", "int8", "nvfp4", or "mxfp8" for quantization
Quantize string // "int4", "int8", "nvfp4", "mxfp4", or "mxfp8" for quantization
Modelfile *ModelfileConfig // template/system/license/parser/renderer/parameters from Modelfile
}
@@ -280,7 +280,7 @@ func newPackedTensorLayerCreator() create.PackedTensorLayerCreator {
if !QuantizeSupported() {
return create.LayerInfo{}, fmt.Errorf("quantization requires MLX support")
}
blobData, err := quantizePackedGroup(tensors)
blobData, err := quantizePackedGroup(groupName, tensors)
if err != nil {
return create.LayerInfo{}, fmt.Errorf("failed to quantize packed group %s: %w", groupName, err)
}

View File

@@ -7,29 +7,27 @@ import (
"io"
"os"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
"github.com/ollama/ollama/x/create"
"github.com/ollama/ollama/x/imagegen/mlx"
"github.com/ollama/ollama/x/mlxrunner/mlx"
"github.com/ollama/ollama/x/mlxrunner/model"
)
// quantizeParams maps quantization type names to MLX quantize parameters.
var quantizeParams = map[string]struct {
groupSize int
bits int
mode string
}{
"int4": {64, 4, "affine"},
"nvfp4": {16, 4, "nvfp4"},
"int8": {64, 8, "affine"},
"mxfp8": {32, 8, "mxfp8"},
}
// loadAndQuantizeArray writes a safetensors reader to a temp file, loads it with MLX,
// quantizes the tensor, and appends the resulting arrays (weight, scale, optional bias)
// to the provided maps. If quantize is empty, the tensor is kept as-is.
// Returns any temp file paths created (caller must clean up) and arrays needing eval.
func loadAndQuantizeArray(r io.Reader, name, quantize string, arrays map[string]*mlx.Array) (tmpPath string, toEval []*mlx.Array, nativeHandle *mlx.SafetensorsFile, err error) {
if quantize != "" {
if gs, _, _ := model.QuantizationParams(quantize); gs == 0 {
return "", nil, nil, fmt.Errorf("unsupported quantization type: %s", quantize)
}
}
tmpDir := ensureTempDir()
tmpFile, err := os.CreateTemp(tmpDir, "quant-*.safetensors")
@@ -50,11 +48,16 @@ func loadAndQuantizeArray(r io.Reader, name, quantize string, arrays map[string]
}
// Find the tensor key (may differ from name for single-tensor blobs)
inputKey, err := findSafetensorsKey(tmpPath)
header, err := readSafetensorsHeader(tmpPath)
if err != nil {
st.Free()
return tmpPath, nil, nil, fmt.Errorf("failed to read blob header for %s: %w", name, err)
}
inputKey, err := safetensorsKey(name, header)
if err != nil {
st.Free()
return tmpPath, nil, nil, fmt.Errorf("failed to resolve tensor key for %s: %w", name, err)
}
arr := st.Get(inputKey)
if arr == nil {
@@ -62,34 +65,46 @@ func loadAndQuantizeArray(r io.Reader, name, quantize string, arrays map[string]
return tmpPath, nil, nil, fmt.Errorf("tensor %q not found in safetensors", inputKey)
}
// Decode FP8 source encoding before checking quantize, so that callers
// requesting decode-only (quantize="") receive usable float data.
if info, ok := header[inputKey]; ok && info.Dtype == "F8_E4M3" {
scaleKey := inputKey + ".scale_inv"
scaleInv := st.Get(scaleKey)
if scaleInv == nil {
st.Free()
return tmpPath, nil, nil, fmt.Errorf("missing companion tensor %q for fp8 source tensor %q", scaleKey, inputKey)
}
arr, err = decodeSourceFP8Tensor(arr, scaleInv)
if err != nil {
st.Free()
return tmpPath, nil, nil, fmt.Errorf("failed to decode fp8 tensor %s: %w", inputKey, err)
}
mlx.Eval(arr)
}
if quantize == "" {
arr = mlx.Contiguous(arr)
arr = mlx.Contiguous(arr, false)
arrays[name] = arr
return tmpPath, []*mlx.Array{arr}, st, nil
}
// Convert to float type if needed (quantize expects float)
if arr.Dtype() != mlx.DtypeBFloat16 && arr.Dtype() != mlx.DtypeFloat32 && arr.Dtype() != mlx.DtypeFloat16 {
arr = mlx.AsType(arr, mlx.DtypeBFloat16)
if arr.DType() != mlx.DTypeBFloat16 && arr.DType() != mlx.DTypeFloat32 && arr.DType() != mlx.DTypeFloat16 {
// Convert to float type if needed (quantize expects float)
arr = arr.AsType(mlx.DTypeBFloat16)
mlx.Eval(arr)
}
params, ok := quantizeParams[quantize]
if !ok {
st.Free()
return tmpPath, nil, nil, fmt.Errorf("unsupported quantization type: %s", quantize)
}
groupSize, bits, mode := model.QuantizationParams(quantize)
qweight, scales, qbiases := mlx.Quantize(arr, groupSize, bits, mode)
qweight, scales, qbiases := mlx.Quantize(arr, params.groupSize, params.bits, params.mode)
qweight = mlx.Contiguous(qweight)
scales = mlx.Contiguous(scales)
qweight = mlx.Contiguous(qweight, false)
scales = mlx.Contiguous(scales, false)
arrays[name] = qweight
arrays[name+".scale"] = scales
toEval = append(toEval, qweight, scales)
if qbiases != nil {
qbiases = mlx.Contiguous(qbiases)
qbiases = mlx.Contiguous(qbiases, false)
arrays[name+".bias"] = qbiases
toEval = append(toEval, qbiases)
}
@@ -101,27 +116,45 @@ func loadAndQuantizeArray(r io.Reader, name, quantize string, arrays map[string]
// and returns a single combined safetensors blob with the quantized weight, scale, and optional bias.
// Tensor keys use the original tensor name: name, name.scale, name.bias.
// The blob includes __metadata__ with quant_type and group_size.
// Supported quantization types: "int4", "nvfp4", "int8", "mxfp8".
// Supported quantization types: "int4", "nvfp4", "mxfp4", "int8", "mxfp8".
func quantizeTensor(r io.Reader, tensorName, dtype string, shape []int32, quantize string) (blobData []byte, err error) {
arrays := make(map[string]*mlx.Array)
tmpPath, toEval, st, err := loadAndQuantizeArray(r, tensorName, quantize, arrays)
if tmpPath != "" {
defer os.Remove(tmpPath)
}
if st != nil {
defer st.Free()
}
if err != nil {
return nil, err
}
finalArrays := make([]*mlx.Array, 0, len(arrays))
for _, arr := range arrays {
if arr != nil {
finalArrays = append(finalArrays, arr)
}
}
mlx.Pin(finalArrays...)
defer func() {
if st != nil {
st.Free()
}
mlx.Unpin(finalArrays...)
mlx.Sweep()
}()
mlx.Eval(toEval...)
mlx.Sweep()
// Free early to release mmap; defer guard handles error paths
if st != nil {
st.Free()
st = nil
}
// Build metadata for single-tensor blobs
params := quantizeParams[quantize]
groupSize, _, _ := model.QuantizationParams(quantize)
metadata := map[string]string{
"quant_type": quantize,
"group_size": strconv.Itoa(params.groupSize),
"group_size": strconv.Itoa(groupSize),
}
tmpDir := ensureTempDir()
@@ -135,48 +168,81 @@ func quantizeTensor(r io.Reader, tensorName, dtype string, shape []int32, quanti
// quantizePackedGroup quantizes multiple tensors and saves them all into a single
// combined safetensors blob. Used for packing expert groups.
// When the inputs are per-expert 2D tensors (e.g., experts.0.gate_proj.weight),
// they are stacked into 3D switch_mlp tensors before quantization.
// Each tensor may have a different quantization type (mixed-precision).
// Returns the blob bytes. No __metadata__ is added because different tensors
// may use different quantization types.
func quantizePackedGroup(inputs []create.PackedTensorInput) ([]byte, error) {
// Returns the blob bytes.
func quantizePackedGroup(groupName string, inputs []create.PackedTensorInput) ([]byte, error) {
// Check if inputs are per-expert tensors that should be stacked into 3D
if projGroups, quantize := parsePerExpertInputs(groupName, inputs); projGroups != nil {
return stackAndQuantizeExpertGroup(groupName, projGroups, quantize)
}
allArrays := make(map[string]*mlx.Array)
var allToEval []*mlx.Array
var tmpPaths []string
var handles []*mlx.SafetensorsFile
var pinned []*mlx.Array
var metadata map[string]string
uniformQuantize := ""
hasQuantized := false
mixedQuantize := false
for _, input := range inputs {
if input.Quantize == "" {
if hasQuantized {
mixedQuantize = true
}
continue
}
if !hasQuantized {
hasQuantized = true
uniformQuantize = input.Quantize
continue
}
if input.Quantize != uniformQuantize {
mixedQuantize = true
}
}
if hasQuantized && !mixedQuantize {
if groupSize, _, _ := model.QuantizationParams(uniformQuantize); groupSize > 0 {
metadata = map[string]string{
"quant_type": uniformQuantize,
"group_size": strconv.Itoa(groupSize),
}
}
}
for _, input := range inputs {
tmpPath, toEval, st, err := loadAndQuantizeArray(input.Reader, input.Name, input.Quantize, allArrays)
if tmpPath != "" {
tmpPaths = append(tmpPaths, tmpPath)
}
if st != nil {
handles = append(handles, st)
}
if err != nil {
// Cleanup on error
for _, h := range handles {
h.Free()
}
for _, p := range tmpPaths {
os.Remove(p)
}
mlx.Unpin(pinned...)
mlx.Sweep()
return nil, err
}
allToEval = append(allToEval, toEval...)
mlx.Eval(toEval...)
finalArrays := arraysForPackedInput(allArrays, input)
mlx.Pin(finalArrays...)
pinned = append(pinned, finalArrays...)
if st != nil {
st.Free()
}
if tmpPath != "" {
os.Remove(tmpPath)
}
mlx.Sweep()
}
defer func() {
mlx.Unpin(pinned...)
mlx.Sweep()
}()
mlx.Eval(allToEval...)
// Free native handles after eval
for _, h := range handles {
h.Free()
}
// Save combined blob (no global metadata for mixed-precision packed blobs)
// Save combined blob. Add global metadata only when every packed tensor uses
// the same quantization mode and group size.
tmpDir := ensureTempDir()
outPath := filepath.Join(tmpDir, "packed-combined.safetensors")
defer os.Remove(outPath)
if err := mlx.SaveSafetensorsWithMetadata(outPath, allArrays, nil); err != nil {
if err := mlx.SaveSafetensorsWithMetadata(outPath, allArrays, metadata); err != nil {
return nil, fmt.Errorf("failed to save packed blob: %w", err)
}
@@ -185,17 +251,193 @@ func quantizePackedGroup(inputs []create.PackedTensorInput) ([]byte, error) {
return nil, fmt.Errorf("failed to read packed blob: %w", err)
}
for _, p := range tmpPaths {
os.Remove(p)
return blobData, nil
}
func arraysForPackedInput(allArrays map[string]*mlx.Array, input create.PackedTensorInput) []*mlx.Array {
keys := []string{input.Name}
if input.Quantize != "" {
keys = append(keys, input.Name+".scale", input.Name+".bias")
}
out := make([]*mlx.Array, 0, len(keys))
for _, key := range keys {
if arr := allArrays[key]; arr != nil {
out = append(out, arr)
}
}
return out
}
// perExpertSuffix matches ".{index}.{proj_and_suffix}" after the group prefix.
var perExpertSuffix = regexp.MustCompile(`^\.(\d+)\.(.+)$`)
type expertTensorInfo struct {
index int
proj string // e.g., "gate_proj.weight"
input create.PackedTensorInput
}
// parsePerExpertInputs groups per-expert 2D tensor inputs by projection type
// and returns the uniform quantization type shared by all inputs.
// Returns nil if the inputs are not per-expert tensors (e.g., already stacked 3D)
// or if the inputs have mixed quantization types.
// Only handles ".experts" groups; ".shared_experts" groups are left unpacked.
func parsePerExpertInputs(groupName string, inputs []create.PackedTensorInput) (map[string][]expertTensorInfo, string) {
if !strings.HasSuffix(groupName, ".experts") {
return nil, ""
}
quantize := inputs[0].Quantize
groups := make(map[string][]expertTensorInfo)
for _, input := range inputs {
if input.Quantize != quantize {
return nil, "" // mixed quantization types
}
suffix := strings.TrimPrefix(input.Name, groupName)
m := perExpertSuffix.FindStringSubmatch(suffix)
if m == nil {
return nil, "" // not a per-expert pattern
}
index, err := strconv.Atoi(m[1])
if err != nil {
return nil, ""
}
groups[m[2]] = append(groups[m[2]], expertTensorInfo{
index: index,
proj: m[2],
input: input,
})
}
if len(groups) == 0 {
return nil, ""
}
return groups, quantize
}
// stackAndQuantizeExpertGroup decodes per-expert tensors, stacks them into 3D
// switch_mlp tensors, quantizes, and returns the combined safetensors blob.
func stackAndQuantizeExpertGroup(groupName string, projGroups map[string][]expertTensorInfo, quantize string) ([]byte, error) {
groupBase := strings.TrimSuffix(groupName, ".experts")
allArrays := make(map[string]*mlx.Array)
var pinned []*mlx.Array
var metadata map[string]string
if groupSize, _, _ := model.QuantizationParams(quantize); groupSize > 0 && quantize != "" {
metadata = map[string]string{
"quant_type": quantize,
"group_size": strconv.Itoa(groupSize),
}
}
// Sort projection names for deterministic output
projNames := make([]string, 0, len(projGroups))
for proj := range projGroups {
projNames = append(projNames, proj)
}
sort.Strings(projNames)
cleanup := func() {
mlx.Unpin(pinned...)
mlx.Sweep()
}
for _, proj := range projNames {
experts := projGroups[proj]
// Sort by expert index
sort.Slice(experts, func(i, j int) bool {
return experts[i].index < experts[j].index
})
// Load and decode each expert tensor
var decoded []*mlx.Array
for _, expert := range experts {
dummyArrays := make(map[string]*mlx.Array)
tmpPath, toEval, st, err := loadAndQuantizeArray(expert.input.Reader, expert.input.Name, "", dummyArrays)
if err != nil {
cleanup()
return nil, fmt.Errorf("failed to decode expert tensor %s: %w", expert.input.Name, err)
}
mlx.Eval(toEval...)
arr := dummyArrays[expert.input.Name]
mlx.Pin(arr)
pinned = append(pinned, arr)
decoded = append(decoded, arr)
if st != nil {
st.Free()
}
if tmpPath != "" {
os.Remove(tmpPath)
}
mlx.Sweep()
}
// Stack into 3D along axis 0: [numExperts, rows, cols]
stacked := mlx.Stack(decoded, 0)
mlx.Eval(stacked)
mlx.Pin(stacked)
pinned = append(pinned, stacked)
// Free individual decoded arrays
mlx.Unpin(decoded...)
mlx.Sweep()
stackedName := groupBase + ".switch_mlp." + proj
// Quantize the stacked tensor
if quantize != "" {
groupSize, bits, mode := model.QuantizationParams(quantize)
qweight, scales, qbiases := mlx.Quantize(stacked, groupSize, bits, mode)
qweight = mlx.Contiguous(qweight, false)
scales = mlx.Contiguous(scales, false)
allArrays[stackedName] = qweight
allArrays[stackedName+".scale"] = scales
toEval := []*mlx.Array{qweight, scales}
if qbiases != nil {
qbiases = mlx.Contiguous(qbiases, false)
allArrays[stackedName+".bias"] = qbiases
toEval = append(toEval, qbiases)
}
mlx.Eval(toEval...)
mlx.Pin(toEval...)
pinned = append(pinned, toEval...)
// Free stacked source array
mlx.Unpin(stacked)
mlx.Sweep()
} else {
stacked = mlx.Contiguous(stacked, false)
mlx.Eval(stacked)
allArrays[stackedName] = stacked
}
}
defer cleanup()
tmpDir := ensureTempDir()
outPath := filepath.Join(tmpDir, "stacked-combined.safetensors")
defer os.Remove(outPath)
if err := mlx.SaveSafetensorsWithMetadata(outPath, allArrays, metadata); err != nil {
return nil, fmt.Errorf("failed to save stacked blob: %w", err)
}
blobData, err := os.ReadFile(outPath)
if err != nil {
return nil, fmt.Errorf("failed to read stacked blob: %w", err)
}
return blobData, nil
}
// QuantizeSupported returns true if quantization is supported (MLX library available)
func QuantizeSupported() bool {
mlx.InitMLX()
return mlx.IsMLXAvailable()
return mlx.CheckInit() == nil
}
// ensureTempDir creates the temp directory for quantization if it doesn't exist
@@ -205,32 +447,97 @@ func ensureTempDir() string {
return tmpDir
}
// findSafetensorsKey reads the first non-metadata tensor key from a safetensors file.
func findSafetensorsKey(path string) (string, error) {
type safetensorsHeaderEntry struct {
Dtype string `json:"dtype"`
Shape []int32 `json:"shape"`
}
func readSafetensorsHeader(path string) (map[string]safetensorsHeaderEntry, error) {
f, err := os.Open(path)
if err != nil {
return "", err
return nil, err
}
defer f.Close()
var headerSize uint64
if err := binary.Read(f, binary.LittleEndian, &headerSize); err != nil {
return "", err
return nil, err
}
headerBytes := make([]byte, headerSize)
if _, err := io.ReadFull(f, headerBytes); err != nil {
return "", err
return nil, err
}
var header map[string]json.RawMessage
var header map[string]safetensorsHeaderEntry
if err := json.Unmarshal(headerBytes, &header); err != nil {
return "", err
return nil, err
}
return header, nil
}
for k := range header {
if k != "__metadata__" {
return k, nil
// safetensorsKey resolves the primary tensor key from a header.
func safetensorsKey(preferred string, header map[string]safetensorsHeaderEntry) (string, error) {
if preferred != "" {
if _, ok := header[preferred]; ok {
return preferred, nil
}
}
return "", fmt.Errorf("no tensor found in safetensors header")
keys := make([]string, 0, len(header))
for k := range header {
if k == "__metadata__" || strings.HasSuffix(k, ".scale_inv") {
continue
}
keys = append(keys, k)
}
sort.Strings(keys)
if len(keys) == 0 {
return "", fmt.Errorf("no tensor found in safetensors header")
}
return keys[0], nil
}
func decodeSourceFP8Tensor(weight, scaleInv *mlx.Array) (*mlx.Array, error) {
if weight == nil || scaleInv == nil {
return nil, fmt.Errorf("fp8 weight and scale tensors are required")
}
weightShape := weight.Dims()
scaleShape := scaleInv.Dims()
if len(weightShape) != 2 || len(scaleShape) != 2 {
return nil, fmt.Errorf("expected 2D fp8 weight and scale tensors, got %v and %v", weightShape, scaleShape)
}
// These must match the block size validated by resolveEffectiveQuantization
// in create.go, which rejects any source model with a different block size.
const blockRows = 128
const blockCols = 128
rows, cols := weightShape[0], weightShape[1]
expectedScaleRows := (rows + blockRows - 1) / blockRows
expectedScaleCols := (cols + blockCols - 1) / blockCols
if scaleShape[0] != expectedScaleRows || scaleShape[1] != expectedScaleCols {
return nil, fmt.Errorf(
"unexpected fp8 scale shape %v for weight shape %v; want [%d %d]",
scaleShape,
weightShape,
expectedScaleRows,
expectedScaleCols,
)
}
decoded := mlx.FromFP8(weight, mlx.DTypeBFloat16)
padBottom := blockRows*scaleShape[0] - rows
padSide := blockCols*scaleShape[1] - cols
if padBottom > 0 || padSide > 0 {
decoded = mlx.Pad(decoded, []int32{0, int32(padBottom), 0, int32(padSide)})
}
decoded = mlx.Reshape(decoded, int32(scaleShape[0]), int32(blockRows), int32(scaleShape[1]), int32(blockCols))
decoded = mlx.Mul(decoded, mlx.ExpandDims(mlx.ExpandDims(scaleInv, 1), 3))
decoded = mlx.Reshape(decoded, int32(rows+padBottom), int32(cols+padSide))
if padBottom > 0 || padSide > 0 {
decoded = mlx.SliceStartStop(decoded, []int32{0, 0}, []int32{int32(rows), int32(cols)})
}
return decoded, nil
}

View File

@@ -267,13 +267,13 @@ func ShouldQuantize(name, component string) bool {
// ShouldQuantizeTensor returns true if a tensor should be quantized based on name, shape, and quantize type.
// This is a more detailed check that also considers tensor dimensions.
// The quantize parameter specifies the quantization type (e.g., "int4", "nvfp4", "int8", "mxfp8").
// The quantize parameter specifies the quantization type (e.g., "int4", "nvfp4", "mxfp4", "int8", "mxfp8").
func ShouldQuantizeTensor(name string, shape []int32, quantize string) bool {
return GetTensorQuantization(name, shape, quantize) != ""
}
// normalizeQuantType converts various quantization type aliases to canonical forms.
// Supports: q4/Q4/int4/INT4/fp4/FP4 -> int4, q8/Q8/int8/INT8/fp8/FP8 -> int8, nvfp4/NVFP4, mxfp8/MXFP8
// Supports: q4/Q4/int4/INT4/fp4/FP4 -> int4, q8/Q8/int8/INT8/fp8/FP8 -> int8, nvfp4/NVFP4, mxfp4/MXFP4, mxfp8/MXFP8
func normalizeQuantType(quantize string) string {
switch strings.ToUpper(quantize) {
case "Q4", "INT4", "FP4":
@@ -282,6 +282,8 @@ func normalizeQuantType(quantize string) string {
return "int8"
case "NVFP4":
return "nvfp4"
case "MXFP4":
return "mxfp4"
case "MXFP8":
return "mxfp8"
default:
@@ -335,7 +337,7 @@ func GetTensorQuantization(name string, shape []int32, quantize string) string {
quantNorm := normalizeQuantType(quantize)
// MLX quantization requires last dimension to be divisible by group size
// nvfp4: 16, mxfp8: 32, int4/int8: 64
// nvfp4: 16, mxfp4/mxfp8: 32, int4/int8: 64
groupSize := int32(32)
switch quantNorm {
case "nvfp4":
@@ -353,8 +355,8 @@ func GetTensorQuantization(name string, shape []int32, quantize string) string {
return ""
}
// For NVFP4 or MXFP8, use the same quantization for all (no mixed precision)
if quantNorm == "nvfp4" || quantNorm == "mxfp8" {
// For non-affine modes, use the same quantization for all eligible tensors.
if quantNorm == "nvfp4" || quantNorm == "mxfp4" || quantNorm == "mxfp8" {
return quantNorm
}
@@ -391,23 +393,39 @@ func GetTensorQuantization(name string, shape []int32, quantize string) string {
return quantNorm
}
// expertGroupRegexp matches expert tensor names and captures the group prefix.
// Matches: model.layers.{L}.mlp.experts.{E}.{proj}.weight (and .scale, .bias suffixes)
// Captures: model.layers.{L}.mlp.experts
var expertGroupRegexp = regexp.MustCompile(`^(model\.layers\.\d+\.mlp\.(?:shared_)?experts)\..*\.weight`)
var expertLayerPrefixRegexp = regexp.MustCompile(`^(?:model\.language_model\.|language_model(?:\.model)?\.|model\.)?layers\.\d+$`)
// ExpertGroupPrefix returns the group prefix for expert tensors that should be packed together.
// For example:
// - "model.layers.1.mlp.experts.0.down_proj.weight" -> "model.layers.1.mlp.experts"
// - "model.layers.1.mlp.shared_experts.down_proj.weight" -> "model.layers.1.mlp.shared_experts"
// - "language_model.model.layers.1.mlp.switch_mlp.down_proj.weight" -> "language_model.model.layers.1.mlp.switch_mlp"
// - "model.layers.0.mlp.down_proj.weight" -> "" (dense layer, no experts)
// - "model.layers.1.mlp.gate.weight" -> "" (routing gate, not an expert)
func ExpertGroupPrefix(tensorName string) string {
m := expertGroupRegexp.FindStringSubmatch(tensorName)
if m == nil {
if !strings.HasSuffix(tensorName, ".weight") {
return ""
}
return m[1]
for _, marker := range []string{
".mlp.experts.",
".mlp.shared_experts.",
".mlp.switch_mlp.",
} {
idx := strings.Index(tensorName, marker)
if idx == -1 {
continue
}
layerPrefix := tensorName[:idx]
if !expertLayerPrefixRegexp.MatchString(layerPrefix) {
continue
}
return layerPrefix + strings.TrimSuffix(marker, ".")
}
return ""
}
// PackedTensorInput holds metadata for a tensor that will be packed into a multi-tensor blob.
@@ -424,9 +442,11 @@ type PackedTensorInput struct {
type PackedTensorLayerCreator func(groupName string, tensors []PackedTensorInput) (LayerInfo, error)
type sourceQuantization struct {
Bits int `json:"bits"`
GroupSize int `json:"group_size"`
Mode string `json:"mode"`
Bits int `json:"bits"`
GroupSize int `json:"group_size"`
Mode string `json:"mode"`
QuantMethod string `json:"quant_method"`
WeightBlockSize []int32 `json:"weight_block_size"`
}
type sourceModelConfig struct {
@@ -493,6 +513,98 @@ func (cfg sourceModelConfig) QuantMetadata() map[string]string {
return metadata
}
type sourceQuantizedKind string
const (
sourceQuantizedKindNone sourceQuantizedKind = ""
sourceQuantizedKindPrequantized sourceQuantizedKind = "prequantized"
sourceQuantizedKindHFFP8 sourceQuantizedKind = "hf_fp8"
)
func (cfg sourceModelConfig) quantizationConfigs() []sourceQuantization {
return []sourceQuantization{
cfg.Quantization,
cfg.QuantizationConfig,
cfg.TextConfig.Quantization,
cfg.TextConfig.QuantizationConfig,
}
}
func (cfg sourceModelConfig) HFFP8WeightBlockSize() (rows, cols int32, ok bool) {
for _, q := range cfg.quantizationConfigs() {
if !strings.EqualFold(q.QuantMethod, "fp8") || len(q.WeightBlockSize) != 2 {
continue
}
return q.WeightBlockSize[0], q.WeightBlockSize[1], true
}
return 0, 0, false
}
func inspectSourceQuantization(modelDir string, cfg sourceModelConfig) (sourceQuantizedKind, error) {
entries, err := os.ReadDir(modelDir)
if err != nil {
return sourceQuantizedKindNone, err
}
hasScaleInv := false
for _, entry := range entries {
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".safetensors") {
continue
}
extractor, err := safetensors.OpenForExtraction(filepath.Join(modelDir, entry.Name()))
if err != nil {
return sourceQuantizedKindNone, err
}
for _, name := range extractor.ListTensors() {
switch {
case strings.HasSuffix(name, ".scales"):
extractor.Close()
return sourceQuantizedKindPrequantized, nil
case strings.HasSuffix(name, ".weight_scale_inv"):
hasScaleInv = true
}
}
extractor.Close()
}
if hasScaleInv {
if _, _, ok := cfg.HFFP8WeightBlockSize(); ok {
return sourceQuantizedKindHFFP8, nil
}
}
return sourceQuantizedKindNone, nil
}
func resolveEffectiveQuantization(cfg sourceModelConfig, sourceKind sourceQuantizedKind, requested string) (string, error) {
switch sourceKind {
case sourceQuantizedKindNone:
return requested, nil
case sourceQuantizedKindPrequantized:
if requested != "" {
return "", fmt.Errorf("cannot requantize already-quantized source model with --quantize %q", requested)
}
return "", nil
case sourceQuantizedKindHFFP8:
if requested != "" {
return "", fmt.Errorf("cannot requantize already-quantized fp8 source model with --quantize %q", requested)
}
rows, cols, ok := cfg.HFFP8WeightBlockSize()
if !ok {
return "", fmt.Errorf("fp8 source model missing weight_block_size metadata")
}
if rows != 128 || cols != 128 {
return "", fmt.Errorf("unsupported fp8 source block size %dx%d", rows, cols)
}
return "mxfp8", nil
default:
return "", fmt.Errorf("unsupported source quantization kind %q", sourceKind)
}
}
type tensorImportTransform interface {
skipTensor(name string) bool
transformTensor(td *safetensors.TensorData) ([]*safetensors.TensorData, error)
@@ -546,6 +658,14 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
if err != nil {
return fmt.Errorf("failed to read source config.json: %w", err)
}
sourceQuantKind, err := inspectSourceQuantization(modelDir, sourceConfig)
if err != nil {
return fmt.Errorf("failed to inspect source quantization: %w", err)
}
effectiveQuantize, err := resolveEffectiveQuantization(sourceConfig, sourceQuantKind, quantize)
if err != nil {
return err
}
sourceQuantMetadata := sourceConfig.QuantMetadata()
importTransform, err := newTensorImportTransform(modelDir, sourceConfig)
if err != nil {
@@ -557,7 +677,6 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
if len(createPackedLayer) > 0 {
packedCreator = createPackedLayer[0]
}
// Accumulate expert tensors by group prefix for packing.
// Readers reference file-backed SectionReaders, so we keep extractors
// open until each group is flushed to avoid buffering tensor data in memory.
@@ -600,8 +719,8 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
tensorSet[name] = struct{}{}
}
quantizeMsg := ""
if quantize != "" {
quantizeMsg = fmt.Sprintf(", quantizing to %s", quantize)
if effectiveQuantize != "" {
quantizeMsg = fmt.Sprintf(", quantizing to %s", effectiveQuantize)
}
fn(fmt.Sprintf("importing %s (%d tensors%s)", entry.Name(), len(tensorNames), quantizeMsg))
@@ -612,9 +731,10 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
if importTransform.skipTensor(tensorName) {
continue
}
if shouldSkipPrequantizedCompanion(tensorName, tensorSet) {
if shouldSkipSourceCompanion(tensorName, tensorSet) {
continue
}
sourceFP8ScaleName, hasSourceFP8Scale := sourceFP8Companion(tensorName, tensorSet)
td, err := extractor.GetTensor(tensorName)
if err != nil {
@@ -623,7 +743,7 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
return fmt.Errorf("failed to get tensor %s: %w", tensorName, err)
}
if quantize == "" {
if effectiveQuantize == "" {
layer, ok, err := createPrequantizedLayer(extractor, td, tensorName, tensorSet, sourceQuantMetadata, createLayer)
if err != nil {
extractor.Close()
@@ -647,8 +767,33 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
// Determine quantization type for this tensor (empty string if not quantizing)
// GetTensorQuantization handles mixed-precision (e.g., Q8 for attention, Q4 for FFN)
quantizeType := ""
if quantize != "" {
quantizeType = importTransform.quantizationType(outTD.Name, outTD.Shape, quantize)
switch {
case sourceQuantKind == sourceQuantizedKindHFFP8 && hasSourceFP8Scale:
quantizeType = "mxfp8"
case sourceQuantKind == sourceQuantizedKindHFFP8:
quantizeType = ""
case effectiveQuantize != "":
quantizeType = importTransform.quantizationType(outTD.Name, outTD.Shape, effectiveQuantize)
}
reader := outTD.SafetensorsReader()
if hasSourceFP8Scale {
if len(outputTensors) != 1 {
extractor.Close()
closeExtractors()
return fmt.Errorf("source fp8 tensor %s rewrote into %d tensors; only 1:1 rewrites are supported", tensorName, len(outputTensors))
}
if quantizeType == "" {
extractor.Close()
closeExtractors()
return fmt.Errorf("source fp8 tensor %s was not scheduled for mxfp8 conversion", tensorName)
}
scaleTD, err := extractor.GetTensor(sourceFP8ScaleName)
if err != nil {
extractor.Close()
closeExtractors()
return fmt.Errorf("failed to get fp8 scale tensor %s: %w", sourceFP8ScaleName, err)
}
reader = buildSourceFP8Reader(outTD, scaleTD.WithName(outTD.Name+".scale_inv"))
}
// Check if this tensor belongs to an expert group for packing
@@ -670,13 +815,13 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
Dtype: outTD.Dtype,
Shape: outTD.Shape,
Quantize: quantizeType,
Reader: outTD.SafetensorsReader(),
Reader: reader,
})
} else {
// Store as minimal safetensors format (88 bytes header overhead)
// This enables native mmap loading via mlx_load_safetensors
// createTensorLayer returns multiple layers if quantizing (weight + scales)
newLayers, err := createTensorLayer(outTD.SafetensorsReader(), outTD.Name, outTD.Dtype, outTD.Shape, quantizeType)
newLayers, err := createTensorLayer(reader, outTD.Name, outTD.Dtype, outTD.Shape, quantizeType)
if err != nil {
extractor.Close()
closeExtractors()
@@ -760,7 +905,7 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
return nil
}
func shouldSkipPrequantizedCompanion(name string, tensorSet map[string]struct{}) bool {
func shouldSkipSourceCompanion(name string, tensorSet map[string]struct{}) bool {
switch {
case strings.HasSuffix(name, ".scales"):
_, ok := tensorSet[strings.TrimSuffix(name, ".scales")+".weight"]
@@ -768,11 +913,28 @@ func shouldSkipPrequantizedCompanion(name string, tensorSet map[string]struct{})
case strings.HasSuffix(name, ".biases"):
_, ok := tensorSet[strings.TrimSuffix(name, ".biases")+".weight"]
return ok
case strings.HasSuffix(name, ".weight_scale_inv"):
_, ok := tensorSet[strings.TrimSuffix(name, "_scale_inv")]
return ok
default:
return false
}
}
func sourceFP8Companion(weightName string, tensorSet map[string]struct{}) (scaleName string, ok bool) {
if !strings.HasSuffix(weightName, ".weight") {
return "", false
}
scaleName = weightName + "_scale_inv"
_, ok = tensorSet[scaleName]
return scaleName, ok
}
func buildSourceFP8Reader(weightTD, scaleTD *safetensors.TensorData) io.Reader {
return safetensors.BuildPackedSafetensorsReader([]*safetensors.TensorData{weightTD, scaleTD})
}
func createPrequantizedLayer(
extractor *safetensors.TensorExtractor,
td *safetensors.TensorData,

View File

@@ -246,6 +246,30 @@ func readSingleTensorRaw(t *testing.T, data []byte) []byte {
return nil
}
func readSafetensorsHeaderNames(t *testing.T, data []byte) []string {
t.Helper()
var headerSize uint64
if err := binary.Read(bytes.NewReader(data[:8]), binary.LittleEndian, &headerSize); err != nil {
t.Fatalf("failed to read header size: %v", err)
}
var header map[string]json.RawMessage
if err := json.Unmarshal(data[8:8+headerSize], &header); err != nil {
t.Fatalf("failed to parse header: %v", err)
}
names := make([]string, 0, len(header))
for name := range header {
if name == "__metadata__" {
continue
}
names = append(names, name)
}
slices.Sort(names)
return names
}
func TestCreateSafetensorsModel(t *testing.T) {
dir := t.TempDir()
@@ -546,6 +570,215 @@ func TestCreateSafetensorsModel_PacksPrequantizedTensorTriplets(t *testing.T) {
}
}
func TestCreateSafetensorsModel_HFFP8AutoConvertsToMXFP8(t *testing.T) {
dir := t.TempDir()
configJSON := `{
"model_type": "test",
"architectures": ["TestModel"],
"quantization_config": {"quant_method": "fp8", "weight_block_size": [128, 128]}
}`
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(configJSON), 0o644); err != nil {
t.Fatalf("failed to write config.json: %v", err)
}
createTestSafetensors(t, filepath.Join(dir, "model.safetensors"), []*st.TensorData{
st.NewTensorDataFromBytes("linear.weight", "F8_E4M3", []int32{2, 2}, []byte{1, 2, 3, 4}),
st.NewTensorDataFromBytes("linear.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
st.NewTensorDataFromBytes("dense.weight", "BF16", []int32{128, 128}, make([]byte, 128*128*2)),
st.NewTensorDataFromBytes("norm.weight", "BF16", []int32{2}, make([]byte, 4)),
})
quantizeByName := make(map[string]string)
headerNamesByName := make(map[string][]string)
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
_, err := io.ReadAll(r)
if err != nil {
return LayerInfo{}, err
}
return LayerInfo{Name: name, Digest: "sha256:" + name, MediaType: mediaType}, nil
}
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
data, err := io.ReadAll(r)
if err != nil {
return nil, err
}
quantizeByName[name] = quantize
headerNamesByName[name] = readSafetensorsHeaderNames(t, data)
return []LayerInfo{{Name: name, Digest: "sha256:tensor_" + name, MediaType: "application/vnd.ollama.image.tensor"}}, nil
}
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error { return nil }
if err := CreateSafetensorsModel("test-model", dir, "", createLayer, createTensorLayer, writeManifest, func(string) {}); err != nil {
t.Fatalf("CreateSafetensorsModel failed: %v", err)
}
if got := quantizeByName["linear.weight"]; got != "mxfp8" {
t.Fatalf("linear.weight quantization = %q, want %q", got, "mxfp8")
}
if got := quantizeByName["norm.weight"]; got != "" {
t.Fatalf("norm.weight quantization = %q, want empty", got)
}
if got := quantizeByName["dense.weight"]; got != "" {
t.Fatalf("dense.weight quantization = %q, want empty", got)
}
if _, ok := quantizeByName["linear.weight_scale_inv"]; ok {
t.Fatal("linear.weight_scale_inv should not be imported as a standalone tensor")
}
if got := headerNamesByName["linear.weight"]; !slices.Equal(got, []string{"linear.weight", "linear.weight.scale_inv"}) {
t.Fatalf("linear.weight blob tensors = %v, want %v", got, []string{"linear.weight", "linear.weight.scale_inv"})
}
if got := headerNamesByName["norm.weight"]; !slices.Equal(got, []string{"norm.weight"}) {
t.Fatalf("norm.weight blob tensors = %v, want %v", got, []string{"norm.weight"})
}
if got := headerNamesByName["dense.weight"]; !slices.Equal(got, []string{"dense.weight"}) {
t.Fatalf("dense.weight blob tensors = %v, want %v", got, []string{"dense.weight"})
}
}
func TestCreateSafetensorsModel_RejectsRequantizingQuantizedSources(t *testing.T) {
tests := []struct {
name string
configJSON string
tensors []*st.TensorData
wantErr string
}{
{
name: "prequantized affine",
configJSON: `{"model_type": "test", "architectures": ["TestModel"]}`,
tensors: []*st.TensorData{
st.NewTensorDataFromBytes("linear.weight", "U32", []int32{4, 4}, make([]byte, 16)),
st.NewTensorDataFromBytes("linear.scales", "BF16", []int32{4, 1}, make([]byte, 8)),
},
wantErr: `cannot requantize already-quantized source model with --quantize "int4"`,
},
{
name: "hf fp8 source",
configJSON: `{
"model_type": "test",
"architectures": ["TestModel"],
"quantization_config": {"quant_method": "fp8", "weight_block_size": [128, 128]}
}`,
tensors: []*st.TensorData{
st.NewTensorDataFromBytes("linear.weight", "F8_E4M3", []int32{2, 2}, []byte{1, 2, 3, 4}),
st.NewTensorDataFromBytes("linear.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
},
wantErr: `cannot requantize already-quantized fp8 source model with --quantize "int4"`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dir := t.TempDir()
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(tt.configJSON), 0o644); err != nil {
t.Fatalf("failed to write config.json: %v", err)
}
createTestSafetensors(t, filepath.Join(dir, "model.safetensors"), tt.tensors)
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
return LayerInfo{}, nil
}
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
return nil, nil
}
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error { return nil }
err := CreateSafetensorsModel("test-model", dir, "int4", createLayer, createTensorLayer, writeManifest, func(string) {})
if err == nil {
t.Fatal("expected error, got nil")
}
if !strings.Contains(err.Error(), tt.wantErr) {
t.Fatalf("error = %q, want substring %q", err, tt.wantErr)
}
})
}
}
func TestCreateSafetensorsModel_HFFP8PacksExperts(t *testing.T) {
dir := t.TempDir()
configJSON := `{
"model_type": "test",
"architectures": ["Qwen3_5MoeForConditionalGeneration"],
"quantization_config": {"quant_method": "fp8", "weight_block_size": [128, 128]}
}`
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(configJSON), 0o644); err != nil {
t.Fatalf("failed to write config.json: %v", err)
}
// Create 2 experts so stacking produces a [2, 128, 128] tensor
createTestSafetensors(t, filepath.Join(dir, "model.safetensors"), []*st.TensorData{
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.gate_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.gate_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.up_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.up_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.down_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.down_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.gate_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.gate_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.up_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.up_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.down_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.down_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
})
var packedLayerNames []string
var packedLayerTensors [][]PackedTensorInput
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
if _, err := io.ReadAll(r); err != nil {
return LayerInfo{}, err
}
return LayerInfo{Name: name, Digest: "sha256:" + name, MediaType: mediaType}, nil
}
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
if _, err := io.ReadAll(r); err != nil {
return nil, err
}
return []LayerInfo{{Name: name, Digest: "sha256:tensor_" + name, MediaType: "application/vnd.ollama.image.tensor"}}, nil
}
createPackedLayer := func(groupName string, tensors []PackedTensorInput) (LayerInfo, error) {
packedLayerNames = append(packedLayerNames, groupName)
packedLayerTensors = append(packedLayerTensors, tensors)
return LayerInfo{Name: groupName, Digest: "sha256:packed_" + groupName, MediaType: "application/vnd.ollama.image.tensor"}, nil
}
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error { return nil }
if err := CreateSafetensorsModel("test-model", dir, "", createLayer, createTensorLayer, writeManifest, func(string) {}, createPackedLayer); err != nil {
t.Fatalf("CreateSafetensorsModel failed: %v", err)
}
if len(packedLayerNames) != 1 {
t.Fatalf("expected 1 packed layer, got %d: %v", len(packedLayerNames), packedLayerNames)
}
if packedLayerNames[0] != "language_model.model.layers.0.mlp.experts" {
t.Fatalf("unexpected packed layer name: %s", packedLayerNames[0])
}
// Verify all 6 expert tensors (2 experts × 3 proj types) were accumulated
tensors := packedLayerTensors[0]
if len(tensors) != 6 {
t.Fatalf("expected 6 tensors in packed group, got %d", len(tensors))
}
// All should be marked for mxfp8 quantization
for _, tensor := range tensors {
if tensor.Quantize != "mxfp8" {
t.Fatalf("expected mxfp8 quantize for %s, got %q", tensor.Name, tensor.Quantize)
}
}
}
func TestCreateSafetensorsModel_Qwen35Transforms(t *testing.T) {
dir := t.TempDir()
@@ -693,6 +926,113 @@ func TestCreateSafetensorsModel_Qwen35Transforms(t *testing.T) {
}
}
func TestCreateSafetensorsModel_Qwen35DirectNonAffineKeepsSensitiveWeightsBF16(t *testing.T) {
for _, quantize := range []string{"nvfp4", "mxfp8", "mxfp4"} {
t.Run(quantize, func(t *testing.T) {
dir := t.TempDir()
configJSON := `{
"model_type": "test",
"architectures": ["Qwen3_5MoeForConditionalGeneration"],
"text_config": {"dtype": "bfloat16"}
}`
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(configJSON), 0o644); err != nil {
t.Fatalf("failed to write config.json: %v", err)
}
gateUpValues := make([]float32, 2*128*64)
for expert := range 2 {
base := expert * 128 * 64
for i := range 64 * 64 {
gateUpValues[base+i] = 1
gateUpValues[base+64*64+i] = 2
}
}
createTestSafetensors(t, filepath.Join(dir, "model.safetensors"), []*st.TensorData{
st.NewTensorDataFromBytes("model.language_model.embed_tokens.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
st.NewTensorDataFromBytes("lm_head.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
st.NewTensorDataFromBytes("model.language_model.layers.0.linear_attn.in_proj_a.weight", "BF16", []int32{32, 64}, make([]byte, 32*64*2)),
st.NewTensorDataFromBytes("model.language_model.layers.0.linear_attn.in_proj_b.weight", "BF16", []int32{32, 64}, make([]byte, 32*64*2)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.gate.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.shared_expert_gate.weight", "BF16", []int32{1, 64}, make([]byte, 64*2)),
st.NewTensorDataFromBytes("model.language_model.layers.0.self_attn.q_proj.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.gate_up_proj", "BF16", []int32{2, 128, 64}, bfloat16.EncodeFloat32(gateUpValues)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.down_proj", "BF16", []int32{2, 64, 64}, bfloat16.EncodeFloat32(make([]float32, 2*64*64))),
})
type tensorCall struct {
quantize string
}
type packedTensorCall struct {
Name string
Quantize string
}
tensorCalls := make(map[string]tensorCall)
packedCalls := make(map[string][]packedTensorCall)
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
_, _ = io.ReadAll(r)
return LayerInfo{Name: name, Digest: "sha256:" + name, MediaType: mediaType}, nil
}
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantizeType string) ([]LayerInfo, error) {
_, _ = io.ReadAll(r)
tensorCalls[name] = tensorCall{quantize: quantizeType}
return []LayerInfo{{Name: name, Digest: "sha256:" + name, MediaType: "application/vnd.ollama.image.tensor"}}, nil
}
createPackedLayer := func(groupName string, tensors []PackedTensorInput) (LayerInfo, error) {
group := make([]packedTensorCall, 0, len(tensors))
for _, tensor := range tensors {
group = append(group, packedTensorCall{
Name: tensor.Name,
Quantize: tensor.Quantize,
})
}
packedCalls[groupName] = group
return LayerInfo{Name: groupName, Digest: "sha256:" + groupName, MediaType: "application/vnd.ollama.image.tensor"}, nil
}
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error {
return nil
}
if err := CreateSafetensorsModel("test-model", dir, quantize, createLayer, createTensorLayer, writeManifest, func(string) {}, createPackedLayer); err != nil {
t.Fatalf("CreateSafetensorsModel failed: %v", err)
}
for _, name := range []string{
"language_model.model.embed_tokens.weight",
"language_model.lm_head.weight",
"language_model.model.layers.0.linear_attn.in_proj_a.weight",
"language_model.model.layers.0.linear_attn.in_proj_b.weight",
"language_model.model.layers.0.mlp.gate.weight",
"language_model.model.layers.0.mlp.shared_expert_gate.weight",
} {
if got := tensorCalls[name].quantize; got != "" {
t.Fatalf("%s quantize = %q, want empty", name, got)
}
}
if got := tensorCalls["language_model.model.layers.0.self_attn.q_proj.weight"].quantize; got != quantize {
t.Fatalf("q_proj quantize = %q, want %q", got, quantize)
}
group := packedCalls["language_model.model.layers.0.mlp.switch_mlp"]
if len(group) != 3 {
t.Fatalf("packed switch_mlp tensor count = %d, want 3", len(group))
}
for _, tensor := range group {
if tensor.Quantize != quantize {
t.Fatalf("packed tensor %q quantize = %q, want %q", tensor.Name, tensor.Quantize, quantize)
}
}
})
}
}
func TestResolveManifestPath(t *testing.T) {
tests := []struct {
name string
@@ -865,6 +1205,7 @@ func TestShouldQuantizeTensor(t *testing.T) {
{"large 2D weight fp8", "q_proj.weight", []int32{4096, 4096}, "fp8", true},
{"medium 2D weight fp8", "small_proj.weight", []int32{128, 128}, "fp8", true},
{"large 2D weight nvfp4", "q_proj.weight", []int32{4096, 4096}, "nvfp4", true},
{"large 2D weight mxfp4", "q_proj.weight", []int32{4096, 4096}, "mxfp4", true},
// Small tensors should not be quantized (< 1024 elements)
{"tiny 2D weight", "tiny.weight", []int32{16, 16}, "fp8", false},
@@ -891,9 +1232,11 @@ func TestShouldQuantizeTensor(t *testing.T) {
{"bias 2D", "proj.bias", []int32{4096, 1}, "fp8", false},
// Group size divisibility tests
// FP8/FP4 require divisible by 32
// FP8/FP4/MXFP4 require divisible by 32
{"not divisible by 32 fp8", "proj.weight", []int32{128, 48}, "fp8", false},
{"divisible by 32 fp8", "proj.weight", []int32{128, 64}, "fp8", true},
{"not divisible by 32 mxfp4", "proj.weight", []int32{128, 48}, "mxfp4", false},
{"divisible by 32 mxfp4", "proj.weight", []int32{128, 64}, "mxfp4", true},
// NVFP4 requires divisible by 16
{"not divisible by 16 nvfp4", "proj.weight", []int32{128, 24}, "nvfp4", false},
{"divisible by 16 nvfp4", "proj.weight", []int32{128, 48}, "nvfp4", true},
@@ -919,10 +1262,20 @@ func TestExpertGroupPrefix(t *testing.T) {
{"model.layers.1.mlp.experts.63.gate_proj.weight", "model.layers.1.mlp.experts"},
{"model.layers.0.mlp.experts.0.up_proj.weight", "model.layers.0.mlp.experts"},
// Expert tensors with language_model prefix should also match
{"language_model.model.layers.0.mlp.experts.0.gate_proj.weight", "language_model.model.layers.0.mlp.experts"},
{"language_model.model.layers.1.mlp.experts.255.down_proj.weight", "language_model.model.layers.1.mlp.experts"},
// Shared expert tensors should return their own group prefix
{"model.layers.1.mlp.shared_experts.down_proj.weight", "model.layers.1.mlp.shared_experts"},
{"model.layers.2.mlp.shared_experts.gate_proj.weight", "model.layers.2.mlp.shared_experts"},
// Rewritten Qwen switch_mlp tensors should also be packed per-layer.
{"model.layers.1.mlp.switch_mlp.down_proj.weight", "model.layers.1.mlp.switch_mlp"},
{"language_model.layers.2.mlp.switch_mlp.gate_proj.weight", "language_model.layers.2.mlp.switch_mlp"},
{"language_model.model.layers.3.mlp.switch_mlp.up_proj.weight", "language_model.model.layers.3.mlp.switch_mlp"},
{"model.language_model.layers.4.mlp.switch_mlp.gate_proj.weight", "model.language_model.layers.4.mlp.switch_mlp"},
// Non-expert tensors should return empty string
{"model.layers.0.mlp.down_proj.weight", ""}, // dense layer, no experts
{"model.layers.1.mlp.gate.weight", ""}, // routing gate, not an expert
@@ -978,6 +1331,161 @@ func TestGetTensorQuantization_StackedExpert3D(t *testing.T) {
if combinedDown != "int8" {
t.Fatalf("combined down_proj quantization = %q, want %q", combinedDown, "int8")
}
nvfp4GateUp := GetTensorQuantization(
"language_model.model.layers.0.mlp.switch_mlp.gate_proj.weight",
[]int32{64, 11008, 4096},
"nvfp4",
)
if nvfp4GateUp != "nvfp4" {
t.Fatalf("nvfp4 gate_proj quantization = %q, want %q", nvfp4GateUp, "nvfp4")
}
nvfp4Down := GetTensorQuantization(
"language_model.model.layers.0.mlp.switch_mlp.down_proj.weight",
[]int32{64, 4096, 11008},
"nvfp4",
)
if nvfp4Down != "nvfp4" {
t.Fatalf("nvfp4 down_proj quantization = %q, want %q", nvfp4Down, "nvfp4")
}
mxfp4GateUp := GetTensorQuantization(
"language_model.model.layers.0.mlp.switch_mlp.gate_proj.weight",
[]int32{64, 11008, 4096},
"mxfp4",
)
if mxfp4GateUp != "mxfp4" {
t.Fatalf("mxfp4 gate_proj quantization = %q, want %q", mxfp4GateUp, "mxfp4")
}
mxfp4Down := GetTensorQuantization(
"language_model.model.layers.0.mlp.switch_mlp.down_proj.weight",
[]int32{64, 4096, 11008},
"mxfp4",
)
if mxfp4Down != "mxfp4" {
t.Fatalf("mxfp4 down_proj quantization = %q, want %q", mxfp4Down, "mxfp4")
}
}
func TestCreateSafetensorsModel_Qwen35NVFP4PacksSwitchMLPExperts(t *testing.T) {
dir := t.TempDir()
configJSON := `{
"model_type": "test",
"architectures": ["Qwen3_5MoeForConditionalGeneration"],
"text_config": {"dtype": "bfloat16"}
}`
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(configJSON), 0o644); err != nil {
t.Fatalf("failed to write config.json: %v", err)
}
gateUpValues := make([]float32, 2*128*64)
for expert := range 2 {
base := expert * 128 * 64
for i := range 64 * 64 {
gateUpValues[base+i] = 1
gateUpValues[base+64*64+i] = 2
}
}
createTestSafetensors(t, filepath.Join(dir, "model.safetensors"), []*st.TensorData{
st.NewTensorDataFromBytes("model.language_model.embed_tokens.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.gate.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.gate_up_proj", "BF16", []int32{2, 128, 64}, bfloat16.EncodeFloat32(gateUpValues)),
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.down_proj", "BF16", []int32{2, 64, 64}, bfloat16.EncodeFloat32(make([]float32, 2*64*64))),
})
type tensorCall struct {
quantize string
}
type packedTensorCall struct {
Name string
Dtype string
Shape []int32
Quantize string
}
tensorCalls := make(map[string]tensorCall)
packedCalls := make(map[string][]packedTensorCall)
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
_, _ = io.ReadAll(r)
return LayerInfo{Name: name, Digest: "sha256:" + name, MediaType: mediaType}, nil
}
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
_, _ = io.ReadAll(r)
tensorCalls[name] = tensorCall{quantize: quantize}
return []LayerInfo{{Name: name, Digest: "sha256:" + name, MediaType: "application/vnd.ollama.image.tensor"}}, nil
}
createPackedLayer := func(groupName string, tensors []PackedTensorInput) (LayerInfo, error) {
group := make([]packedTensorCall, 0, len(tensors))
for _, tensor := range tensors {
group = append(group, packedTensorCall{
Name: tensor.Name,
Dtype: tensor.Dtype,
Shape: append([]int32(nil), tensor.Shape...),
Quantize: tensor.Quantize,
})
}
packedCalls[groupName] = group
return LayerInfo{Name: groupName, Digest: "sha256:" + groupName, MediaType: "application/vnd.ollama.image.tensor"}, nil
}
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error {
return nil
}
if err := CreateSafetensorsModel("test-model", dir, "nvfp4", createLayer, createTensorLayer, writeManifest, func(string) {}, createPackedLayer); err != nil {
t.Fatalf("CreateSafetensorsModel failed: %v", err)
}
groupName := "language_model.model.layers.0.mlp.switch_mlp"
group, ok := packedCalls[groupName]
if !ok {
t.Fatalf("missing packed group %q: %v", groupName, packedCalls)
}
if len(group) != 3 {
t.Fatalf("packed group %q has %d tensors, want 3", groupName, len(group))
}
gotNames := make([]string, 0, len(group))
for _, tensor := range group {
gotNames = append(gotNames, tensor.Name)
if tensor.Quantize != "nvfp4" {
t.Fatalf("packed tensor %q quantize = %q, want %q", tensor.Name, tensor.Quantize, "nvfp4")
}
if tensor.Dtype != "BF16" {
t.Fatalf("packed tensor %q dtype = %q, want %q", tensor.Name, tensor.Dtype, "BF16")
}
}
slices.Sort(gotNames)
wantNames := []string{
"language_model.model.layers.0.mlp.switch_mlp.down_proj.weight",
"language_model.model.layers.0.mlp.switch_mlp.gate_proj.weight",
"language_model.model.layers.0.mlp.switch_mlp.up_proj.weight",
}
if !slices.Equal(gotNames, wantNames) {
t.Fatalf("packed tensor names = %v, want %v", gotNames, wantNames)
}
for _, name := range wantNames {
if _, ok := tensorCalls[name]; ok {
t.Fatalf("packed expert tensor %q unexpectedly handled by createTensorLayer", name)
}
}
if got := tensorCalls["language_model.model.embed_tokens.weight"].quantize; got != "" {
t.Fatalf("embed_tokens quantize = %q, want empty", got)
}
if got := tensorCalls["language_model.model.layers.0.mlp.gate.weight"].quantize; got != "" {
t.Fatalf("mlp.gate quantize = %q, want empty", got)
}
}
func TestCreateSafetensorsModel_WithQuantize(t *testing.T) {

View File

@@ -87,6 +87,27 @@ func (t qwen35ImportTransform) skipTensor(name string) bool {
return strings.Contains(name, "mtp.")
}
func qwen35ShouldKeepBF16ForDirectNonAffine(name string) bool {
switch {
case strings.HasSuffix(name, "embed_tokens.weight"):
return true
case strings.HasSuffix(name, "lm_head.weight"):
return true
case strings.HasSuffix(name, ".linear_attn.in_proj_a.weight"):
return true
case strings.HasSuffix(name, ".linear_attn.in_proj_b.weight"):
return true
case strings.HasSuffix(name, ".linear_attn.in_proj_ba.weight"):
return true
case strings.HasSuffix(name, ".mlp.gate.weight") && !strings.Contains(name, "_proj"):
return true
case strings.HasSuffix(name, ".mlp.shared_expert_gate.weight"):
return true
default:
return false
}
}
func (t qwen35ImportTransform) quantizationType(name string, shape []int32, quantize string) string {
if strings.HasPrefix(name, "vision_tower.") {
return ""
@@ -127,6 +148,13 @@ func (t qwen35ImportTransform) quantizationType(name string, shape []int32, quan
return ""
}
// Match the working HF-FP8 import policy for direct NVFP4/MXFP4/MXFP8 imports:
// keep embeddings, LM head, low-rank linear_attn projections, and routing
// gates in BF16 rather than forcing them into a non-affine quantized format.
if (quantNorm == "nvfp4" || quantNorm == "mxfp4" || quantNorm == "mxfp8") && qwen35ShouldKeepBF16ForDirectNonAffine(name) {
return ""
}
return quantNorm
}

View File

@@ -1,11 +1,11 @@
include(FetchContent)
# Read MLX version from top-level file (shared with Dockerfile)
file(READ "${CMAKE_SOURCE_DIR}/MLX_VERSION" MLX_C_GIT_TAG)
# Read MLX-C version from top-level file (shared with Dockerfile)
file(READ "${CMAKE_SOURCE_DIR}/MLX_C_VERSION" MLX_C_GIT_TAG)
string(STRIP "${MLX_C_GIT_TAG}" MLX_C_GIT_TAG)
# Read MLX core version from top-level file
file(READ "${CMAKE_SOURCE_DIR}/MLX_CORE_VERSION" MLX_GIT_TAG)
# Read MLX version from top-level file
file(READ "${CMAKE_SOURCE_DIR}/MLX_VERSION" MLX_GIT_TAG)
string(STRIP "${MLX_GIT_TAG}" MLX_GIT_TAG)
set(MLX_C_BUILD_EXAMPLES OFF)
@@ -98,6 +98,28 @@ FetchContent_MakeAvailable(mlx-c)
file(GLOB _mlx_c_hdrs "${mlx-c_SOURCE_DIR}/mlx/c/*.h")
file(COPY ${_mlx_c_hdrs} DESTINATION "${CMAKE_SOURCE_DIR}/x/mlxrunner/mlx/include/mlx/c/")
# Regenerate Go/C shim wrappers from the (possibly updated) headers.
find_program(GO_EXECUTABLE go REQUIRED)
message(STATUS "Regenerating MLX Go wrappers")
# Go's cgo splits CC on whitespace, so a CC like "C:/Program Files/…/cl.exe"
# (set by cmake on Windows) breaks with "C:/Program" not found. Clear CC
# when it contains spaces so cgo falls back to its default (gcc).
if(WIN32 AND "$ENV{CC}" MATCHES " ")
set(_SAVE_CC "$ENV{CC}")
set(ENV{CC} "")
endif()
execute_process(
COMMAND ${GO_EXECUTABLE} generate ./x/...
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
COMMAND_ERROR_IS_FATAL ANY
)
if(DEFINED _SAVE_CC)
set(ENV{CC} "${_SAVE_CC}")
endif()
# For local dev builds, override MLX_VERSION with git describe output
if(TARGET mlx_version AND DEFINED FETCHCONTENT_SOURCE_DIR_MLX)
execute_process(

View File

@@ -165,8 +165,8 @@ int (*mlx_distributed_sum_scatter_ptr)(mlx_array* res, const mlx_array x, const
int (*mlx_distributed_group_rank_ptr)(mlx_distributed_group group) = NULL;
int (*mlx_distributed_group_size_ptr)(mlx_distributed_group group) = NULL;
mlx_distributed_group (*mlx_distributed_group_split_ptr)(mlx_distributed_group group, int color, int key) = NULL;
bool (*mlx_distributed_is_available_ptr)(void) = NULL;
mlx_distributed_group (*mlx_distributed_init_ptr)(bool strict) = NULL;
bool (*mlx_distributed_is_available_ptr)(const char* bk) = NULL;
mlx_distributed_group (*mlx_distributed_init_ptr)(bool strict, const char* bk) = NULL;
void (*mlx_set_error_handler_ptr)(mlx_error_handler_func handler, void* data, void (*dtor)(void*)) = NULL;
void (*_mlx_error_ptr)(const char* file, const int line, const char* fmt, ...) = NULL;
int (*mlx_export_function_ptr)(const char* file, const mlx_closure fun, const mlx_vector_array args, bool shapeless) = NULL;
@@ -319,10 +319,12 @@ int (*mlx_astype_ptr)(mlx_array* res, const mlx_array a, mlx_dtype dtype, const
int (*mlx_atleast_1d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
int (*mlx_atleast_2d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
int (*mlx_atleast_3d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
int (*mlx_bartlett_ptr)(mlx_array* res, int M, const mlx_stream s) = NULL;
int (*mlx_bitwise_and_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
int (*mlx_bitwise_invert_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
int (*mlx_bitwise_or_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
int (*mlx_bitwise_xor_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
int (*mlx_blackman_ptr)(mlx_array* res, int M, const mlx_stream s) = NULL;
int (*mlx_block_masked_mm_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, int block_size, const mlx_array mask_out , const mlx_array mask_lhs , const mlx_array mask_rhs , const mlx_stream s) = NULL;
int (*mlx_broadcast_arrays_ptr)(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_stream s) = NULL;
int (*mlx_broadcast_to_ptr)(mlx_array* res, const mlx_array a, const int* shape, size_t shape_num, const mlx_stream s) = NULL;
@@ -348,7 +350,7 @@ int (*mlx_cumprod_ptr)(mlx_array* res, const mlx_array a, int axis, bool reverse
int (*mlx_cumsum_ptr)(mlx_array* res, const mlx_array a, int axis, bool reverse, bool inclusive, const mlx_stream s) = NULL;
int (*mlx_degrees_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
int (*mlx_depends_ptr)(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_vector_array dependencies) = NULL;
int (*mlx_dequantize_ptr)(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, mlx_optional_dtype dtype, const mlx_stream s) = NULL;
int (*mlx_dequantize_ptr)(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , mlx_optional_dtype dtype, const mlx_stream s) = NULL;
int (*mlx_diag_ptr)(mlx_array* res, const mlx_array a, int k, const mlx_stream s) = NULL;
int (*mlx_diagonal_ptr)(mlx_array* res, const mlx_array a, int offset, int axis1, int axis2, const mlx_stream s) = NULL;
int (*mlx_divide_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
@@ -375,6 +377,8 @@ int (*mlx_gather_qmm_ptr)(mlx_array* res, const mlx_array x, const mlx_array w,
int (*mlx_greater_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
int (*mlx_greater_equal_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
int (*mlx_hadamard_transform_ptr)(mlx_array* res, const mlx_array a, mlx_optional_float scale, const mlx_stream s) = NULL;
int (*mlx_hamming_ptr)(mlx_array* res, int M, const mlx_stream s) = NULL;
int (*mlx_hanning_ptr)(mlx_array* res, int M, const mlx_stream s) = NULL;
int (*mlx_identity_ptr)(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s) = NULL;
int (*mlx_imag_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
int (*mlx_inner_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
@@ -434,8 +438,8 @@ int (*mlx_prod_axes_ptr)(mlx_array* res, const mlx_array a, const int* axes, siz
int (*mlx_prod_axis_ptr)(mlx_array* res, const mlx_array a, int axis, bool keepdims, const mlx_stream s) = NULL;
int (*mlx_prod_ptr)(mlx_array* res, const mlx_array a, bool keepdims, const mlx_stream s) = NULL;
int (*mlx_put_along_axis_ptr)(mlx_array* res, const mlx_array a, const mlx_array indices, const mlx_array values, int axis, const mlx_stream s) = NULL;
int (*mlx_qqmm_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) = NULL;
int (*mlx_quantize_ptr)(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) = NULL;
int (*mlx_qqmm_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale_x , const mlx_array global_scale_w , const mlx_stream s) = NULL;
int (*mlx_quantize_ptr)(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , const mlx_stream s) = NULL;
int (*mlx_quantized_matmul_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array scales, const mlx_array biases , bool transpose, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) = NULL;
int (*mlx_radians_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
int (*mlx_real_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
@@ -2101,6 +2105,11 @@ int mlx_load_functions(void* handle) {
fprintf(stderr, "MLX: Failed to load symbol: mlx_atleast_3d\n");
return -1;
}
mlx_bartlett_ptr = GET_SYM(handle, "mlx_bartlett");
if (mlx_bartlett_ptr == NULL) {
fprintf(stderr, "MLX: Failed to load symbol: mlx_bartlett\n");
return -1;
}
mlx_bitwise_and_ptr = GET_SYM(handle, "mlx_bitwise_and");
if (mlx_bitwise_and_ptr == NULL) {
fprintf(stderr, "MLX: Failed to load symbol: mlx_bitwise_and\n");
@@ -2121,6 +2130,11 @@ int mlx_load_functions(void* handle) {
fprintf(stderr, "MLX: Failed to load symbol: mlx_bitwise_xor\n");
return -1;
}
mlx_blackman_ptr = GET_SYM(handle, "mlx_blackman");
if (mlx_blackman_ptr == NULL) {
fprintf(stderr, "MLX: Failed to load symbol: mlx_blackman\n");
return -1;
}
mlx_block_masked_mm_ptr = GET_SYM(handle, "mlx_block_masked_mm");
if (mlx_block_masked_mm_ptr == NULL) {
fprintf(stderr, "MLX: Failed to load symbol: mlx_block_masked_mm\n");
@@ -2381,6 +2395,16 @@ int mlx_load_functions(void* handle) {
fprintf(stderr, "MLX: Failed to load symbol: mlx_hadamard_transform\n");
return -1;
}
mlx_hamming_ptr = GET_SYM(handle, "mlx_hamming");
if (mlx_hamming_ptr == NULL) {
fprintf(stderr, "MLX: Failed to load symbol: mlx_hamming\n");
return -1;
}
mlx_hanning_ptr = GET_SYM(handle, "mlx_hanning");
if (mlx_hanning_ptr == NULL) {
fprintf(stderr, "MLX: Failed to load symbol: mlx_hanning\n");
return -1;
}
mlx_identity_ptr = GET_SYM(handle, "mlx_identity");
if (mlx_identity_ptr == NULL) {
fprintf(stderr, "MLX: Failed to load symbol: mlx_identity\n");
@@ -4132,12 +4156,12 @@ mlx_distributed_group mlx_distributed_group_split(mlx_distributed_group group, i
return mlx_distributed_group_split_ptr(group, color, key);
}
bool mlx_distributed_is_available(void) {
return mlx_distributed_is_available_ptr();
bool mlx_distributed_is_available(const char* bk) {
return mlx_distributed_is_available_ptr(bk);
}
mlx_distributed_group mlx_distributed_init(bool strict) {
return mlx_distributed_init_ptr(strict);
mlx_distributed_group mlx_distributed_init(bool strict, const char* bk) {
return mlx_distributed_init_ptr(strict, bk);
}
void mlx_set_error_handler(mlx_error_handler_func handler, void* data, void (*dtor)(void*)) {
@@ -4748,6 +4772,10 @@ int mlx_atleast_3d(mlx_array* res, const mlx_array a, const mlx_stream s) {
return mlx_atleast_3d_ptr(res, a, s);
}
int mlx_bartlett(mlx_array* res, int M, const mlx_stream s) {
return mlx_bartlett_ptr(res, M, s);
}
int mlx_bitwise_and(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) {
return mlx_bitwise_and_ptr(res, a, b, s);
}
@@ -4764,6 +4792,10 @@ int mlx_bitwise_xor(mlx_array* res, const mlx_array a, const mlx_array b, const
return mlx_bitwise_xor_ptr(res, a, b, s);
}
int mlx_blackman(mlx_array* res, int M, const mlx_stream s) {
return mlx_blackman_ptr(res, M, s);
}
int mlx_block_masked_mm(mlx_array* res, const mlx_array a, const mlx_array b, int block_size, const mlx_array mask_out , const mlx_array mask_lhs , const mlx_array mask_rhs , const mlx_stream s) {
return mlx_block_masked_mm_ptr(res, a, b, block_size, mask_out, mask_lhs, mask_rhs, s);
}
@@ -4864,8 +4896,8 @@ int mlx_depends(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_
return mlx_depends_ptr(res, inputs, dependencies);
}
int mlx_dequantize(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, mlx_optional_dtype dtype, const mlx_stream s) {
return mlx_dequantize_ptr(res, w, scales, biases, group_size, bits, mode, dtype, s);
int mlx_dequantize(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , mlx_optional_dtype dtype, const mlx_stream s) {
return mlx_dequantize_ptr(res, w, scales, biases, group_size, bits, mode, global_scale, dtype, s);
}
int mlx_diag(mlx_array* res, const mlx_array a, int k, const mlx_stream s) {
@@ -4972,6 +5004,14 @@ int mlx_hadamard_transform(mlx_array* res, const mlx_array a, mlx_optional_float
return mlx_hadamard_transform_ptr(res, a, scale, s);
}
int mlx_hamming(mlx_array* res, int M, const mlx_stream s) {
return mlx_hamming_ptr(res, M, s);
}
int mlx_hanning(mlx_array* res, int M, const mlx_stream s) {
return mlx_hanning_ptr(res, M, s);
}
int mlx_identity(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s) {
return mlx_identity_ptr(res, n, dtype, s);
}
@@ -5208,12 +5248,12 @@ int mlx_put_along_axis(mlx_array* res, const mlx_array a, const mlx_array indice
return mlx_put_along_axis_ptr(res, a, indices, values, axis, s);
}
int mlx_qqmm(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) {
return mlx_qqmm_ptr(res, x, w, w_scales, group_size, bits, mode, s);
int mlx_qqmm(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale_x , const mlx_array global_scale_w , const mlx_stream s) {
return mlx_qqmm_ptr(res, x, w, w_scales, group_size, bits, mode, global_scale_x, global_scale_w, s);
}
int mlx_quantize(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) {
return mlx_quantize_ptr(res, w, group_size, bits, mode, s);
int mlx_quantize(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , const mlx_stream s) {
return mlx_quantize_ptr(res, w, group_size, bits, mode, global_scale, s);
}
int mlx_quantized_matmul(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array scales, const mlx_array biases , bool transpose, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) {

View File

@@ -2125,7 +2125,8 @@ func Quantize(w *Array, groupSize, bits int, mode string) (weights, scales, bias
optGroupSize := C.mlx_optional_int{value: C.int(groupSize), has_value: true}
optBits := C.mlx_optional_int{value: C.int(bits), has_value: true}
res := C.mlx_vector_array_new()
C.mlx_quantize(&res, w.c, optGroupSize, optBits, cMode, C.default_stream())
var globalScale C.mlx_array
C.mlx_quantize(&res, w.c, optGroupSize, optBits, cMode, globalScale, C.default_stream())
// Result is a vector of arrays: [weights, scales, biases?]
// mxfp8 mode returns only 2 elements (no biases)
@@ -2161,7 +2162,8 @@ func Dequantize(w, scales, biases *Array, groupSize, bits int, mode string) *Arr
}
res := C.mlx_array_new()
C.mlx_dequantize(&res, w.c, scales.c, b, optGroupSize, optBits, cMode, optDtype, C.default_stream())
var globalScale C.mlx_array
C.mlx_dequantize(&res, w.c, scales.c, b, optGroupSize, optBits, cMode, globalScale, optDtype, C.default_stream())
return newArray(res)
}

View File

@@ -309,10 +309,12 @@
#undef mlx_atleast_1d
#undef mlx_atleast_2d
#undef mlx_atleast_3d
#undef mlx_bartlett
#undef mlx_bitwise_and
#undef mlx_bitwise_invert
#undef mlx_bitwise_or
#undef mlx_bitwise_xor
#undef mlx_blackman
#undef mlx_block_masked_mm
#undef mlx_broadcast_arrays
#undef mlx_broadcast_to
@@ -365,6 +367,8 @@
#undef mlx_greater
#undef mlx_greater_equal
#undef mlx_hadamard_transform
#undef mlx_hamming
#undef mlx_hanning
#undef mlx_identity
#undef mlx_imag
#undef mlx_inner
@@ -751,8 +755,8 @@ extern int (*mlx_distributed_sum_scatter_ptr)(mlx_array* res, const mlx_array x,
extern int (*mlx_distributed_group_rank_ptr)(mlx_distributed_group group);
extern int (*mlx_distributed_group_size_ptr)(mlx_distributed_group group);
extern mlx_distributed_group (*mlx_distributed_group_split_ptr)(mlx_distributed_group group, int color, int key);
extern bool (*mlx_distributed_is_available_ptr)(void);
extern mlx_distributed_group (*mlx_distributed_init_ptr)(bool strict);
extern bool (*mlx_distributed_is_available_ptr)(const char* bk);
extern mlx_distributed_group (*mlx_distributed_init_ptr)(bool strict, const char* bk);
extern void (*mlx_set_error_handler_ptr)(mlx_error_handler_func handler, void* data, void (*dtor)(void*));
extern void (*_mlx_error_ptr)(const char* file, const int line, const char* fmt, ...);
extern int (*mlx_export_function_ptr)(const char* file, const mlx_closure fun, const mlx_vector_array args, bool shapeless);
@@ -905,10 +909,12 @@ extern int (*mlx_astype_ptr)(mlx_array* res, const mlx_array a, mlx_dtype dtype,
extern int (*mlx_atleast_1d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
extern int (*mlx_atleast_2d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
extern int (*mlx_atleast_3d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
extern int (*mlx_bartlett_ptr)(mlx_array* res, int M, const mlx_stream s);
extern int (*mlx_bitwise_and_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
extern int (*mlx_bitwise_invert_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
extern int (*mlx_bitwise_or_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
extern int (*mlx_bitwise_xor_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
extern int (*mlx_blackman_ptr)(mlx_array* res, int M, const mlx_stream s);
extern int (*mlx_block_masked_mm_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, int block_size, const mlx_array mask_out , const mlx_array mask_lhs , const mlx_array mask_rhs , const mlx_stream s);
extern int (*mlx_broadcast_arrays_ptr)(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_stream s);
extern int (*mlx_broadcast_to_ptr)(mlx_array* res, const mlx_array a, const int* shape, size_t shape_num, const mlx_stream s);
@@ -934,7 +940,7 @@ extern int (*mlx_cumprod_ptr)(mlx_array* res, const mlx_array a, int axis, bool
extern int (*mlx_cumsum_ptr)(mlx_array* res, const mlx_array a, int axis, bool reverse, bool inclusive, const mlx_stream s);
extern int (*mlx_degrees_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
extern int (*mlx_depends_ptr)(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_vector_array dependencies);
extern int (*mlx_dequantize_ptr)(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, mlx_optional_dtype dtype, const mlx_stream s);
extern int (*mlx_dequantize_ptr)(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , mlx_optional_dtype dtype, const mlx_stream s);
extern int (*mlx_diag_ptr)(mlx_array* res, const mlx_array a, int k, const mlx_stream s);
extern int (*mlx_diagonal_ptr)(mlx_array* res, const mlx_array a, int offset, int axis1, int axis2, const mlx_stream s);
extern int (*mlx_divide_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
@@ -961,6 +967,8 @@ extern int (*mlx_gather_qmm_ptr)(mlx_array* res, const mlx_array x, const mlx_ar
extern int (*mlx_greater_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
extern int (*mlx_greater_equal_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
extern int (*mlx_hadamard_transform_ptr)(mlx_array* res, const mlx_array a, mlx_optional_float scale, const mlx_stream s);
extern int (*mlx_hamming_ptr)(mlx_array* res, int M, const mlx_stream s);
extern int (*mlx_hanning_ptr)(mlx_array* res, int M, const mlx_stream s);
extern int (*mlx_identity_ptr)(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s);
extern int (*mlx_imag_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
extern int (*mlx_inner_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
@@ -1020,8 +1028,8 @@ extern int (*mlx_prod_axes_ptr)(mlx_array* res, const mlx_array a, const int* ax
extern int (*mlx_prod_axis_ptr)(mlx_array* res, const mlx_array a, int axis, bool keepdims, const mlx_stream s);
extern int (*mlx_prod_ptr)(mlx_array* res, const mlx_array a, bool keepdims, const mlx_stream s);
extern int (*mlx_put_along_axis_ptr)(mlx_array* res, const mlx_array a, const mlx_array indices, const mlx_array values, int axis, const mlx_stream s);
extern int (*mlx_qqmm_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
extern int (*mlx_quantize_ptr)(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
extern int (*mlx_qqmm_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale_x , const mlx_array global_scale_w , const mlx_stream s);
extern int (*mlx_quantize_ptr)(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , const mlx_stream s);
extern int (*mlx_quantized_matmul_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array scales, const mlx_array biases , bool transpose, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
extern int (*mlx_radians_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
extern int (*mlx_real_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
@@ -1492,9 +1500,9 @@ int mlx_distributed_group_size(mlx_distributed_group group);
mlx_distributed_group mlx_distributed_group_split(mlx_distributed_group group, int color, int key);
bool mlx_distributed_is_available(void);
bool mlx_distributed_is_available(const char* bk);
mlx_distributed_group mlx_distributed_init(bool strict);
mlx_distributed_group mlx_distributed_init(bool strict, const char* bk);
void mlx_set_error_handler(mlx_error_handler_func handler, void* data, void (*dtor)(void*));
@@ -1800,6 +1808,8 @@ int mlx_atleast_2d(mlx_array* res, const mlx_array a, const mlx_stream s);
int mlx_atleast_3d(mlx_array* res, const mlx_array a, const mlx_stream s);
int mlx_bartlett(mlx_array* res, int M, const mlx_stream s);
int mlx_bitwise_and(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
int mlx_bitwise_invert(mlx_array* res, const mlx_array a, const mlx_stream s);
@@ -1808,6 +1818,8 @@ int mlx_bitwise_or(mlx_array* res, const mlx_array a, const mlx_array b, const m
int mlx_bitwise_xor(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
int mlx_blackman(mlx_array* res, int M, const mlx_stream s);
int mlx_block_masked_mm(mlx_array* res, const mlx_array a, const mlx_array b, int block_size, const mlx_array mask_out , const mlx_array mask_lhs , const mlx_array mask_rhs , const mlx_stream s);
int mlx_broadcast_arrays(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_stream s);
@@ -1858,7 +1870,7 @@ int mlx_degrees(mlx_array* res, const mlx_array a, const mlx_stream s);
int mlx_depends(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_vector_array dependencies);
int mlx_dequantize(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, mlx_optional_dtype dtype, const mlx_stream s);
int mlx_dequantize(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , mlx_optional_dtype dtype, const mlx_stream s);
int mlx_diag(mlx_array* res, const mlx_array a, int k, const mlx_stream s);
@@ -1912,6 +1924,10 @@ int mlx_greater_equal(mlx_array* res, const mlx_array a, const mlx_array b, cons
int mlx_hadamard_transform(mlx_array* res, const mlx_array a, mlx_optional_float scale, const mlx_stream s);
int mlx_hamming(mlx_array* res, int M, const mlx_stream s);
int mlx_hanning(mlx_array* res, int M, const mlx_stream s);
int mlx_identity(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s);
int mlx_imag(mlx_array* res, const mlx_array a, const mlx_stream s);
@@ -2030,9 +2046,9 @@ int mlx_prod(mlx_array* res, const mlx_array a, bool keepdims, const mlx_stream
int mlx_put_along_axis(mlx_array* res, const mlx_array a, const mlx_array indices, const mlx_array values, int axis, const mlx_stream s);
int mlx_qqmm(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
int mlx_qqmm(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale_x , const mlx_array global_scale_w , const mlx_stream s);
int mlx_quantize(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
int mlx_quantize(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , const mlx_stream s);
int mlx_quantized_matmul(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array scales, const mlx_array biases , bool transpose, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);

View File

@@ -93,21 +93,8 @@ func (c *kvCache) begin(m base.Model, inputs []int32) *cacheSession {
matchPath, matched = findBestMatch(c.root, inputs[:len(inputs)-1])
}
// Check for partial match within a node's edge — truncate path
// to the parent boundary. snapshot() will split the node and
// create the branch point during prefill when caches are ready.
partialMatch := false
if len(matchPath) > 1 {
lastNode := matchPath[len(matchPath)-1]
matchedInEdge := matched - lastNode.startOffset()
if matchedInEdge > 0 && matchedInEdge < len(lastNode.tokens) {
matchPath = matchPath[:len(matchPath)-1]
partialMatch = true
}
}
// Switch to the matched path, paging in/out as needed.
c.switchToPath(matchPath)
c.switchToPath(matchPath, matched)
// switchToPath aligns caches to a common offset
prefix := c.minCacheOffset()
@@ -116,7 +103,7 @@ func (c *kvCache) begin(m base.Model, inputs []int32) *cacheSession {
// Schedule a snapshot at the branch point during prefill so future
// requests diverging here can restore instead of re-evaluating.
var snapshotAt int
if partialMatch || (prefix == 0 && matched > 0) {
if prefix < matched {
snapshotAt = matched
}
@@ -142,7 +129,7 @@ func (c *kvCache) begin(m base.Model, inputs []int32) *cacheSession {
// switchToPath transitions from the current active path to a new path,
// paging out diverging segments and paging in the new path.
func (c *kvCache) switchToPath(newPath []*trieNode) {
func (c *kvCache) switchToPath(newPath []*trieNode, matched int) {
defer c.enforceEvictionPolicy()
// Find common ancestor index.
@@ -167,7 +154,10 @@ func (c *kvCache) switchToPath(newPath []*trieNode) {
// non-leaf nodes here would produce wrong results for non-rewindable
// caches (e.g. RecurrentCache) whose state reflects the leaf, not
// the intermediate boundary.
if leaf := len(c.activePath) - 1; leaf >= commonLen {
leaf := len(c.activePath) - 1
leafDiverges := leaf >= commonLen
leafNeedsRewind := matched < c.activePath[leaf].endOffset
if leafDiverges || leafNeedsRewind {
node := c.activePath[leaf]
if !node.hasAllSnapshots() {
fromOffset := node.startOffset()
@@ -184,14 +174,16 @@ func (c *kvCache) switchToPath(newPath []*trieNode) {
}
}
// Rewind each cache to the ancestor offset or free it. Freed
// caches (e.g. RecurrentCache that can't rewind) will be restored
// from snapshots during page-in.
// Rewind each cache to the target offset or free it. When matched
// falls within the ancestor's range (same-path case), we rewind
// directly to the match point. Otherwise we rewind to the ancestor
// and let page-in bring us forward to matched.
rewindTarget := min(ancestorOffset, matched)
for _, kv := range c.caches {
if kv == nil {
continue
}
if !kv.Restore(nil, ancestorOffset) {
if !kv.Restore(nil, rewindTarget) {
kv.Free()
}
}
@@ -199,10 +191,12 @@ func (c *kvCache) switchToPath(newPath []*trieNode) {
// Page in — walk the full new path, restoring from snapshots.
// Freed caches naturally pick up the first available snapshot.
// Caches already past a node skip it via offset check.
pageIn:
for _, node := range newPath {
if len(node.snapshots) == 0 {
if !node.hasSnapshots() {
continue
}
nodeTarget := min(node.endOffset, matched)
for j, kv := range c.caches {
if kv == nil {
continue
@@ -210,19 +204,18 @@ func (c *kvCache) switchToPath(newPath []*trieNode) {
if j >= len(node.snapshots) || node.snapshots[j] == nil {
continue
}
if kv.Offset() >= node.endOffset {
if kv.Offset() >= nodeTarget {
continue
}
if !kv.Restore(node.snapshots[j], node.endOffset) {
slog.Warn("cache restore failure during page-in, freeing all caches", "layer", j, "offset", node.startOffset())
c.freeAll()
c.activePath = []*trieNode{c.root}
return
if !kv.Restore(node.snapshots[j], nodeTarget) {
// Restore failed — stop page-in and let alignment
// bring all caches to a consistent offset.
break pageIn
}
}
if node.endOffset > ancestorOffset {
pageInCount++
logutil.Trace(fmt.Sprintf("page in: [%d, %d)", node.startOffset(), node.endOffset))
logutil.Trace(fmt.Sprintf("page in: [%d, %d)", node.startOffset(), nodeTarget))
}
}
@@ -536,6 +529,9 @@ func (c *kvCache) dumpTree() {
if nodeBytes > 0 {
label += " " + mlx.PrettyBytes(int(nodeBytes)).String()
}
if !n.lastUsed.IsZero() {
label += fmt.Sprintf(" %s ago", time.Since(n.lastUsed).Truncate(time.Millisecond))
}
var flags []string
if n.user {
flags = append(flags, "user")

View File

@@ -17,7 +17,8 @@ type Cache interface {
Snapshot(fromOffset int) Snapshot
// Restore brings the cache to target. If snapshot is nil, rewinds
// using the cache's own live state.
// using the cache's own live state. Returns false if the target is
// unreachable (e.g. target > current offset, or negative).
Restore(snapshot Snapshot, target int) bool
// Merge combines two sequential snapshots [a,b) and [b,c) into [a,c).
@@ -108,8 +109,8 @@ func (c *KVCache) Snapshot(fromOffset int) Snapshot {
kSlice := c.keys.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(from, to), mlx.Slice())
vSlice := c.values.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(from, to), mlx.Slice())
kCopy := mlx.Copy(kSlice)
vCopy := mlx.Copy(vSlice)
kCopy := mlx.Contiguous(kSlice, false)
vCopy := mlx.Contiguous(vSlice, false)
mlx.Pin(kCopy, vCopy)
mlx.AsyncEval(kCopy, vCopy)
@@ -122,17 +123,21 @@ func (c *KVCache) Snapshot(fromOffset int) Snapshot {
}
func (c *KVCache) Restore(snapshot Snapshot, target int) bool {
if target < 0 {
return false
}
if snapshot == nil {
// Rewind using live state — just clamp offset.
target = max(0, min(target, c.offset))
if target > c.offset {
return false
}
c.offset = target
return true
}
snap := snapshot.(*kvSnapshot)
// Check that the cache has data up to the snapshot's starting point.
if c.offset < snap.fromOffset {
if target > snap.toOffset || c.offset < snap.fromOffset {
return false
}
@@ -191,10 +196,10 @@ func (c *KVCache) Split(snapshot Snapshot, at int) (Snapshot, Snapshot) {
return snapshot, nil
}
pk := mlx.Copy(snap.keys.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(0, splitIdx), mlx.Slice()))
pv := mlx.Copy(snap.values.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(0, splitIdx), mlx.Slice()))
ck := mlx.Copy(snap.keys.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(splitIdx, seqLen), mlx.Slice()))
cv := mlx.Copy(snap.values.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(splitIdx, seqLen), mlx.Slice()))
pk := mlx.Contiguous(snap.keys.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(0, splitIdx), mlx.Slice()), false)
pv := mlx.Contiguous(snap.values.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(0, splitIdx), mlx.Slice()), false)
ck := mlx.Contiguous(snap.keys.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(splitIdx, seqLen), mlx.Slice()), false)
cv := mlx.Contiguous(snap.values.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(splitIdx, seqLen), mlx.Slice()), false)
mlx.Pin(pk, pv, ck, cv)
mlx.AsyncEval(pk, pv, ck, cv)
@@ -354,7 +359,14 @@ func (c *RotatingKVCache) Snapshot(fromOffset int) Snapshot {
}
func (c *RotatingKVCache) Restore(snapshot Snapshot, target int) bool {
if target < 0 {
return false
}
if snapshot == nil {
if target >= c.offset {
return target == c.offset
}
// Live rewind is only safe when the buffer hasn't filled yet
// (offset <= maxSize). Once the window has shifted, rewinding
// leaves fewer than maxSize trailing tokens to attend to —
@@ -362,7 +374,6 @@ func (c *RotatingKVCache) Restore(snapshot Snapshot, target int) bool {
if c.offset > c.maxSize {
return false
}
target = max(0, min(target, c.offset))
c.offset = target
c.idx = target
return true
@@ -370,6 +381,10 @@ func (c *RotatingKVCache) Restore(snapshot Snapshot, target int) bool {
snap := snapshot.(*rotatingSnapshot)
if target > snap.toOffset {
return false
}
// Reject if clamping would leave an incomplete window.
if target < snap.toOffset && snap.toOffset > c.maxSize {
return false
@@ -388,7 +403,6 @@ func (c *RotatingKVCache) Restore(snapshot Snapshot, target int) bool {
// Clamp to target if needed.
if target < c.offset {
target = max(0, target)
c.offset = target
c.idx = target
}

View File

@@ -22,14 +22,9 @@ func (c *RecurrentCache) setStateRaw(old, v *mlx.Array) *mlx.Array {
if v == nil || !v.Valid() {
return old
}
if old == v {
return old
}
mlx.Pin(v)
if old != nil && old != v {
mlx.Unpin(old)
}
mlx.Unpin(old)
return v
}
@@ -38,9 +33,6 @@ func (c *RecurrentCache) setStateDetached(old, v *mlx.Array, ensureContiguous bo
if v == nil || !v.Valid() {
return old
}
if old == v {
return old
}
root := v
if ensureContiguous {
@@ -49,9 +41,7 @@ func (c *RecurrentCache) setStateDetached(old, v *mlx.Array, ensureContiguous bo
detached := root.Clone()
mlx.Pin(detached)
if old != nil && old != detached {
mlx.Unpin(old)
}
mlx.Unpin(old)
return detached
}
@@ -150,10 +140,10 @@ func (c *RecurrentCache) Restore(snapshot Snapshot, target int) bool {
snap := snapshot.(*recurrentSnapshot)
// Recurrent state encodes all tokens up to snap.offset. Restoring
// to a target before that would leave stale state from tokens
// [target, snap.offset) baked in. Only allow restoring forward.
if target < snap.offset {
// Recurrent snapshots encode cumulative state up to exactly
// snap.offset. Target must match — rewinding would leave stale
// state, and advancing isn't possible without feeding tokens.
if target != snap.offset {
return false
}

View File

@@ -6,39 +6,35 @@ import (
"github.com/ollama/ollama/x/mlxrunner/mlx"
)
// TestRecurrentCacheRestoreDirectionality verifies that RecurrentCache only
// allows restoring forward (target >= snapshot offset), never backward.
func TestRecurrentCacheRestoreDirectionality(t *testing.T) {
// TestRecurrentCacheRestoreExactOffset verifies that RecurrentCache restore
// only succeeds when target exactly matches the snapshot's offset. Recurrent
// state is cumulative, so it can't be rewound or fast-forwarded.
func TestRecurrentCacheRestoreExactOffset(t *testing.T) {
skipIfNoMLX(t)
c := NewRecurrentCache(3, 12, 4, 8, 8)
_ = c.ConvState(1, mlx.DTypeFloat16)
_ = c.DeltaState(1, mlx.DTypeFloat16)
c.Advance(10)
snap := c.Snapshot(0)
snap := c.Snapshot(0) // snap.offset == 10
c.Advance(5) // now at 15
c.Advance(5) // cache now at 15
// Restore backward should fail.
// target < snap.offset: fails (can't rewind past snapshot)
if c.Restore(snap, 5) {
t.Fatal("Restore(snap, 5) should fail — target < snap.offset")
t.Fatal("Restore(snap, 5) should fail — target != snap.offset")
}
// Restore to exact snap offset should succeed.
// target > snap.offset: fails (can't advance without feeding tokens)
if c.Restore(snap, 15) {
t.Fatal("Restore(snap, 15) should fail — target != snap.offset")
}
// target == snap.offset: succeeds
if !c.Restore(snap, 10) {
t.Fatal("Restore(snap, 10) should succeed")
t.Fatal("Restore(snap, 10) should succeed — target == snap.offset")
}
if c.Offset() != 10 {
t.Fatalf("offset = %d, want 10", c.Offset())
}
// Restore forward (target > snap offset) should succeed, offset = snap.offset.
snap2 := c.Snapshot(0)
if !c.Restore(snap2, 15) {
t.Fatal("Restore(snap, 15) should succeed")
}
// Recurrent state is at snap.offset (10), not target (15).
if c.Offset() != 10 {
t.Fatalf("offset = %d, want 10 (snap offset)", c.Offset())
}
}

View File

@@ -79,20 +79,20 @@ func (c *fakeRewindableCache) Snapshot(fromOffset int) cache.Snapshot {
}
func (c *fakeRewindableCache) Restore(snapshot cache.Snapshot, target int) bool {
if target < 0 {
return false
}
if snapshot == nil {
// Rewind live state.
if target < 0 {
target = 0
}
if target > len(c.tokens) {
target = len(c.tokens)
return false
}
c.tokens = c.tokens[:target]
return true
}
s := snapshot.(*fakeSnapshot)
if len(c.tokens) < s.from {
return false // don't have base data up to snapshot start
if target > s.to || len(c.tokens) < s.from {
return false
}
c.tokens = append(c.tokens[:s.from], s.tokens...)
if target < len(c.tokens) {
@@ -196,9 +196,13 @@ func (c *fakeSlidingWindowCache) Snapshot(fromOffset int) cache.Snapshot {
}
func (c *fakeSlidingWindowCache) Restore(snapshot cache.Snapshot, target int) bool {
if target < 0 {
return false
}
if snapshot == nil {
if target == len(c.tokens) {
return true
if target >= len(c.tokens) {
return target == len(c.tokens)
}
// Live rewind only works when buffer hasn't filled (offset <= maxSize).
if len(c.tokens) > c.maxSize {
@@ -208,6 +212,14 @@ func (c *fakeSlidingWindowCache) Restore(snapshot cache.Snapshot, target int) bo
return true
}
s := snapshot.(*fakeSnapshot)
if target > s.to {
return false
}
// Reject if clamping would leave an incomplete window
// (matches RotatingKVCache behavior).
if target < s.to && s.to > c.maxSize {
return false
}
c.tokens = slices.Clone(s.tokens)
if target < len(c.tokens) {
c.tokens = c.tokens[:target]
@@ -268,8 +280,8 @@ func (c *fakeRecurrentCache) Restore(snapshot cache.Snapshot, target int) bool {
return target == len(c.tokens) // can only no-op
}
s := snapshot.(*fakeSnapshot)
if target < s.to {
return false // can't go backward
if target != s.to {
return false // cumulative state requires exact match
}
c.tokens = slices.Clone(s.tokens)
return true
@@ -294,9 +306,10 @@ type feedableCache interface {
// testEnv encapsulates a kvCache and its fake caches for a test scenario.
type testEnv struct {
kvc *kvCache
caches []cache.Cache // typed references for assertions
tracker *snapshotTracker
kvc *kvCache
caches []cache.Cache // typed references for assertions
tracker *snapshotTracker
rewindable bool // true when all caches support arbitrary Restore(nil, target)
}
// newTransformerEnv creates a test environment with a single rewindable cache
@@ -305,23 +318,28 @@ func newTransformerEnv() *testEnv {
tracker := &snapshotTracker{}
caches := []cache.Cache{&fakeRewindableCache{tracker: tracker}}
return &testEnv{
kvc: &kvCache{caches: caches},
caches: caches,
tracker: tracker,
kvc: &kvCache{caches: caches},
caches: caches,
tracker: tracker,
rewindable: true,
}
}
// newSlidingWindowEnv creates a test environment with one rewindable cache and
// one sliding window cache (Mistral-style architecture).
// one sliding window cache (Mistral-style architecture). The sliding window
// maxSize is set small enough that test sequences fill it, making
// Restore(nil, target) fail — the same behavior as production models where
// the window fills after a few turns.
func newSlidingWindowEnv() *testEnv {
tr := &snapshotTracker{}
rc := &fakeRewindableCache{tracker: tr}
sw := &fakeSlidingWindowCache{maxSize: 32, tracker: tr}
sw := &fakeSlidingWindowCache{maxSize: 4, tracker: tr}
caches := []cache.Cache{rc, sw}
return &testEnv{
kvc: &kvCache{caches: caches},
caches: caches,
tracker: tr,
kvc: &kvCache{caches: caches},
caches: caches,
tracker: tr,
rewindable: false,
}
}
@@ -333,9 +351,10 @@ func newRecurrentEnv() *testEnv {
nrc := &fakeRecurrentCache{tracker: tr}
caches := []cache.Cache{rc, nrc}
return &testEnv{
kvc: &kvCache{caches: caches},
caches: caches,
tracker: tr,
kvc: &kvCache{caches: caches},
caches: caches,
tracker: tr,
rewindable: false,
}
}
@@ -590,15 +609,24 @@ func TestBranchCreationAndReuse(t *testing.T) {
}
// Request B: [1,2,3,4,5,10,11,12] — shares 5-token prefix with A.
// Partial match in A's edge triggers snapshotOffset.
// For rewindable caches, switchToPath rewinds to the match point
// so only the non-matching suffix needs evaluation. For non-rewindable
// caches (RecurrentCache), the rewind fails and freeAll fires.
resB := simulateRequest(t, kvc, []int32{1, 2, 3, 4, 5, 10, 11, 12}, []int32{30, 31})
if resB.snapshotOffset != 5 {
t.Fatalf("B: snapshotOffset = %d, want 5", resB.snapshotOffset)
}
// Cache was rewound to 0 (partial match truncates path to root),
// so all tokens were re-evaluated.
if len(resB.remaining) != 8 {
t.Fatalf("B: remaining = %d, want 8", len(resB.remaining))
if env.rewindable {
if resB.snapshotOffset != 0 {
t.Fatalf("B: snapshotOffset = %d, want 0 (rewind succeeded)", resB.snapshotOffset)
}
if len(resB.remaining) != 3 {
t.Fatalf("B: remaining = %d, want 3 (rewind to match point)", len(resB.remaining))
}
} else {
if resB.snapshotOffset != 5 {
t.Fatalf("B: snapshotOffset = %d, want 5", resB.snapshotOffset)
}
if len(resB.remaining) != 8 {
t.Fatalf("B: remaining = %d, want 8 (freeAll fallback)", len(resB.remaining))
}
}
env.assertAllTokens(t, "after B", []int32{1, 2, 3, 4, 5, 10, 11, 12, 30, 31})
@@ -635,14 +663,24 @@ func TestExactMatchSeedBehavior(t *testing.T) {
simulateRequest(t, kvc, []int32{1, 2, 3, 4, 5}, []int32{10, 11})
// Request B: identical prompt. Holdback means matched=4, partial in
// the 5-token edge, so path truncates to root and all tokens are
// re-evaluated. snapshotOffset should be set at the holdback point.
// the 5-token edge. For rewindable caches, switchToPath rewinds to
// offset 4, so only the held-back token needs re-evaluation. For
// non-rewindable caches, the rewind fails and freeAll fires.
resB := simulateRequest(t, kvc, []int32{1, 2, 3, 4, 5}, []int32{20, 21})
if len(resB.remaining) != 5 {
t.Fatalf("B: remaining = %d, want 5 (full re-eval due to holdback)", len(resB.remaining))
}
if resB.snapshotOffset != 4 {
t.Fatalf("B: snapshotOffset = %d, want 4", resB.snapshotOffset)
if env.rewindable {
if len(resB.remaining) != 1 {
t.Fatalf("B: remaining = %d, want 1 (rewind to holdback point)", len(resB.remaining))
}
if resB.snapshotOffset != 0 {
t.Fatalf("B: snapshotOffset = %d, want 0 (rewind succeeded)", resB.snapshotOffset)
}
} else {
if len(resB.remaining) != 5 {
t.Fatalf("B: remaining = %d, want 5 (freeAll fallback)", len(resB.remaining))
}
if resB.snapshotOffset != 4 {
t.Fatalf("B: snapshotOffset = %d, want 4", resB.snapshotOffset)
}
}
env.assertAllTokens(t, "after B", []int32{1, 2, 3, 4, 5, 20, 21})

View File

@@ -2,6 +2,7 @@ package mlxrunner
import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
@@ -36,14 +37,69 @@ type Client struct {
modelName string
contextLength atomic.Int64
memory atomic.Uint64
done chan error
done chan struct{}
doneErr error // valid after done is closed
client *http.Client
lastErr string
lastErrLock sync.Mutex
status *statusWriter
mu sync.Mutex
cmd *exec.Cmd
}
// statusWriter captures the last stderr line from the subprocess while
// forwarding all output to os.Stderr. Lines longer than maxStatusLen are
// truncated to the first maxStatusLen bytes.
type statusWriter struct {
lastErrMsg string
buf []byte
discarding bool
mu sync.Mutex
out *os.File
}
const maxStatusLen = 256
func (w *statusWriter) Write(b []byte) (int, error) {
n, err := w.out.Write(b)
w.mu.Lock()
defer w.mu.Unlock()
w.buf = append(w.buf, b...)
for {
i := bytes.IndexByte(w.buf, '\n')
if i < 0 {
break
}
if !w.discarding {
line := bytes.TrimSpace(w.buf[:i])
if len(line) > 0 {
if len(line) > maxStatusLen {
line = line[:maxStatusLen]
}
w.lastErrMsg = string(line)
}
}
w.buf = w.buf[i+1:]
w.discarding = false
}
// if the buffer grows past maxStatusLen without a newline, keep the front
if len(w.buf) > maxStatusLen {
if !w.discarding {
w.lastErrMsg = string(bytes.TrimSpace(w.buf[:maxStatusLen]))
w.discarding = true
}
w.buf = w.buf[:0]
}
return n, err
}
func (w *statusWriter) getLastErr() string {
w.mu.Lock()
defer w.mu.Unlock()
return w.lastErrMsg
}
// NewClient prepares a new MLX runner client for LLM models.
// The subprocess is not started until Load() is called.
func NewClient(modelName string) (*Client, error) {
@@ -53,7 +109,7 @@ func NewClient(modelName string) (*Client, error) {
c := &Client{
modelName: modelName,
done: make(chan error, 1),
done: make(chan struct{}),
client: &http.Client{Timeout: 10 * time.Minute},
}
@@ -66,12 +122,6 @@ func NewClient(modelName string) (*Client, error) {
return c, nil
}
func (c *Client) getLastErr() string {
c.lastErrLock.Lock()
defer c.lastErrLock.Unlock()
return c.lastErr
}
// WaitUntilRunning waits for the subprocess to be ready.
func (c *Client) WaitUntilRunning(ctx context.Context) error {
timeout := time.After(2 * time.Minute)
@@ -82,16 +132,14 @@ func (c *Client) WaitUntilRunning(ctx context.Context) error {
select {
case <-ctx.Done():
return ctx.Err()
case err := <-c.done:
errMsg := c.getLastErr()
if errMsg != "" {
return fmt.Errorf("mlx runner failed: %s (exit: %v)", errMsg, err)
case <-c.done:
if msg := c.status.getLastErr(); msg != "" {
return fmt.Errorf("mlx runner failed: %s (exit: %v)", msg, c.doneErr)
}
return fmt.Errorf("mlx runner exited unexpectedly: %w", err)
return fmt.Errorf("mlx runner exited unexpectedly: %w", c.doneErr)
case <-timeout:
errMsg := c.getLastErr()
if errMsg != "" {
return fmt.Errorf("timeout waiting for mlx runner: %s", errMsg)
if msg := c.status.getLastErr(); msg != "" {
return fmt.Errorf("timeout waiting for mlx runner: %s", msg)
}
return errors.New("timeout waiting for mlx runner to start")
case <-ticker.C:
@@ -182,6 +230,9 @@ func (c *Client) Completion(ctx context.Context, req llm.CompletionRequest, fn f
resp, err := c.client.Do(httpReq)
if err != nil {
if errMsg := c.status.getLastErr(); errMsg != "" {
return fmt.Errorf("mlx runner failed: %s", errMsg)
}
return err
}
defer resp.Body.Close()
@@ -219,7 +270,13 @@ func (c *Client) Completion(ctx context.Context, req llm.CompletionRequest, fn f
}
}
return scanner.Err()
if err := scanner.Err(); err != nil {
if errMsg := c.status.getLastErr(); errMsg != "" {
return fmt.Errorf("mlx runner failed: %s", errMsg)
}
return err
}
return nil
}
func (c *Client) ContextLength() int {
@@ -348,18 +405,13 @@ func (c *Client) Load(ctx context.Context, _ ml.SystemInfo, gpus []ml.DeviceInfo
// Forward subprocess stdout/stderr to server logs
stdout, _ := cmd.StdoutPipe()
stderr, _ := cmd.StderrPipe()
status := &statusWriter{out: os.Stderr}
c.status = status
go func() {
io.Copy(os.Stderr, stdout) //nolint:errcheck
}()
go func() {
scanner := bufio.NewScanner(stderr)
for scanner.Scan() {
line := scanner.Text()
fmt.Fprintln(os.Stderr, line)
c.lastErrLock.Lock()
c.lastErr = line
c.lastErrLock.Unlock()
}
io.Copy(status, stderr) //nolint:errcheck
}()
slog.Info("starting mlx runner subprocess", "model", c.modelName, "port", c.port)
@@ -369,8 +421,8 @@ func (c *Client) Load(ctx context.Context, _ ml.SystemInfo, gpus []ml.DeviceInfo
// Reap subprocess when it exits
go func() {
err := cmd.Wait()
c.done <- err
c.doneErr = cmd.Wait()
close(c.done)
}()
return nil, nil

View File

@@ -15,7 +15,9 @@ set(CMAKE_INSTALL_RPATH "@loader_path")
include(FetchContent)
set(MLX_C_GIT_TAG "v0.5.0" CACHE STRING "")
# Read MLX-C version from top-level file (shared with imagegen CMakeLists)
file(READ "${CMAKE_SOURCE_DIR}/MLX_C_VERSION" MLX_C_GIT_TAG)
string(STRIP "${MLX_C_GIT_TAG}" MLX_C_GIT_TAG)
FetchContent_Declare(
mlx-c

View File

@@ -137,6 +137,9 @@ func Unpin(s ...*Array) {
for _, t := range s {
if t != nil {
t.pinned--
if t.pinned < 0 {
panic(fmt.Sprintf("mlx.Unpin: negative pin count on array %q", t.name))
}
}
}
}
@@ -259,9 +262,11 @@ func LogArrays() {
return arrays[i].NumBytes() > arrays[j].NumBytes()
})
var total int
for _, t := range arrays {
nb := t.NumBytes()
logutil.Trace(fmt.Sprintf("tensor %-60s %5s %5s %v", t.name, t.DType(), PrettyBytes(nb), t.Dims()))
total += nb
logutil.Trace(fmt.Sprintf("tensor %-60s %5s %5s pinned=%d %v", t.name, t.DType(), PrettyBytes(nb), t.pinned, t.Dims()))
}
logutil.Trace(fmt.Sprintf("tensors total: %d, size: %s", len(arrays), PrettyBytes(ActiveMemory())))
logutil.Trace(fmt.Sprintf("tensors total: %d, size: %s, active: %s", len(arrays), PrettyBytes(total), PrettyBytes(ActiveMemory())))
}

View File

@@ -13,6 +13,10 @@ var (
gatedDeltaMetalKernelOnce sync.Once
gatedDeltaMetalKernel C.mlx_fast_metal_kernel
gatedDeltaMetalDisabled bool
gatedDeltaCUDAKernelOnce sync.Once
gatedDeltaCUDAKernel C.mlx_fast_cuda_kernel
gatedDeltaCUDADisabled bool
)
const gatedDeltaMetalKernelSource = `
@@ -83,6 +87,86 @@ for (int i = 0; i < n_per_t; ++i) {
}
`
const gatedDeltaCUDAKernelSource = `
auto tid_x = threadIdx.x;
auto tid_y = threadIdx.y;
auto grid_y = blockIdx.y * blockDim.y + tid_y;
auto grid_z = blockIdx.z;
int T_val = static_cast<int>(*T);
auto n = grid_z;
auto b_idx = n / Hv;
auto hv_idx = n % Hv;
auto hk_idx = hv_idx / (Hv / Hk);
constexpr int n_per_t = Dk / 32;
// q, k: [B, T, Hk, Dk]
auto q_ = q + b_idx * T_val * Hk * Dk + hk_idx * Dk;
auto k_ = k + b_idx * T_val * Hk * Dk + hk_idx * Dk;
// v, y: [B, T, Hv, Dv]
auto dv_idx = grid_y;
auto v_ = v + b_idx * T_val * Hv * Dv + hv_idx * Dv;
y += b_idx * T_val * Hv * Dv + hv_idx * Dv;
auto dk_idx = tid_x;
// state_in, state_out: [B, Hv, Dv, Dk]
auto i_state = state_in + (n * Dv + dv_idx) * Dk;
auto o_state = state_out + (n * Dv + dv_idx) * Dk;
float state[n_per_t];
for (int i = 0; i < n_per_t; ++i) {
auto s_idx = n_per_t * dk_idx + i;
state[i] = static_cast<float>(i_state[s_idx]);
}
// g: [B, T, Hv]
auto g_ = g + b_idx * T_val * Hv;
auto beta_ = beta + b_idx * T_val * Hv;
for (int t = 0; t < T_val; ++t) {
float kv_mem = 0.0f;
for (int i = 0; i < n_per_t; ++i) {
auto s_idx = n_per_t * dk_idx + i;
state[i] = state[i] * static_cast<float>(g_[hv_idx]);
kv_mem += state[i] * static_cast<float>(k_[s_idx]);
}
// Warp reduction (full warp, 32 threads in x)
for (int offset = 16; offset > 0; offset >>= 1)
kv_mem += __shfl_down_sync(0xffffffff, kv_mem, offset);
kv_mem = __shfl_sync(0xffffffff, kv_mem, 0);
auto delta = (static_cast<float>(v_[dv_idx]) - kv_mem) * static_cast<float>(beta_[hv_idx]);
float out = 0.0f;
for (int i = 0; i < n_per_t; ++i) {
auto s_idx = n_per_t * dk_idx + i;
state[i] = state[i] + static_cast<float>(k_[s_idx]) * delta;
out += state[i] * static_cast<float>(q_[s_idx]);
}
// Warp reduction
for (int offset = 16; offset > 0; offset >>= 1)
out += __shfl_down_sync(0xffffffff, out, offset);
if (tid_x == 0) {
y[dv_idx] = static_cast<InT>(out);
}
q_ += Hk * Dk;
k_ += Hk * Dk;
v_ += Hv * Dv;
y += Hv * Dv;
g_ += Hv;
beta_ += Hv;
}
for (int i = 0; i < n_per_t; ++i) {
auto s_idx = n_per_t * dk_idx + i;
o_state[s_idx] = static_cast<InT>(state[i]);
}
`
func cStringVector(values []string) (C.mlx_vector_string, func(), bool) {
vec := C.mlx_vector_string_new()
ok := true
@@ -352,11 +436,184 @@ func gatedDeltaFallback(q, k, v, g, beta, state *Array) (y, nextState *Array) {
return Concatenate(outs, 1), nextState
}
func initGatedDeltaCUDAKernel() {
var cudaAvail C.bool
if C.mlx_cuda_is_available(&cudaAvail) != 0 || !bool(cudaAvail) {
gatedDeltaCUDADisabled = true
return
}
inputs, freeInputs, ok := cStringVector([]string{"q", "k", "v", "g", "beta", "state_in", "T"})
if !ok {
gatedDeltaCUDADisabled = true
freeInputs()
return
}
defer freeInputs()
outputs, freeOutputs, ok := cStringVector([]string{"y", "state_out"})
if !ok {
gatedDeltaCUDADisabled = true
freeOutputs()
return
}
defer freeOutputs()
cName := C.CString("gated_delta_step")
defer C.free(unsafe.Pointer(cName))
cSource := C.CString(gatedDeltaCUDAKernelSource)
defer C.free(unsafe.Pointer(cSource))
cHeader := C.CString("")
defer C.free(unsafe.Pointer(cHeader))
gatedDeltaCUDAKernel = C.mlx_fast_cuda_kernel_new(
cName,
inputs,
outputs,
cSource,
cHeader,
C.bool(true),
C.int(0),
)
}
func gatedDeltaCUDAKernelApply(q, k, v, g, beta, state *Array) (y, nextState *Array, ok bool) {
if gatedDeltaCUDADisabled {
return nil, nil, false
}
if q == nil || k == nil || v == nil || g == nil || beta == nil || state == nil {
return nil, nil, false
}
qd := q.Dims()
kd := k.Dims()
vd := v.Dims()
gd := g.Dims()
bd := beta.Dims()
sd := state.Dims()
if len(qd) != 4 || len(kd) != 4 || len(vd) != 4 || len(gd) != 3 || len(bd) != 3 || len(sd) != 4 {
return nil, nil, false
}
B, T, Hk, Dk := qd[0], qd[1], qd[2], qd[3]
if T <= 0 || Hk <= 0 || Dk <= 0 || Dk%32 != 0 {
return nil, nil, false
}
if kd[0] != B || kd[1] != T || kd[2] != Hk || kd[3] != Dk {
return nil, nil, false
}
Hv, Dv := vd[2], vd[3]
if vd[0] != B || vd[1] != T || Hv <= 0 || Dv <= 0 || Hv%Hk != 0 {
return nil, nil, false
}
if gd[0] != B || gd[1] != T || gd[2] != Hv {
return nil, nil, false
}
if bd[0] != B || bd[1] != T || bd[2] != Hv {
return nil, nil, false
}
if sd[0] != B || sd[1] != Hv || sd[2] != Dv || sd[3] != Dk {
return nil, nil, false
}
dtype := q.DType()
if k.DType() != dtype || v.DType() != dtype || g.DType() != dtype || beta.DType() != dtype || state.DType() != dtype {
return nil, nil, false
}
gatedDeltaCUDAKernelOnce.Do(initGatedDeltaCUDAKernel)
if gatedDeltaCUDADisabled {
return nil, nil, false
}
cfg := C.mlx_fast_cuda_kernel_config_new()
defer C.mlx_fast_cuda_kernel_config_free(cfg)
cInT := C.CString("InT")
defer C.free(unsafe.Pointer(cInT))
if C.mlx_fast_cuda_kernel_config_add_template_arg_dtype(cfg, cInT, C.mlx_dtype(dtype)) != 0 {
gatedDeltaCUDADisabled = true
return nil, nil, false
}
for _, tpl := range []struct {
name string
value int
}{
{name: "Dk", value: Dk},
{name: "Dv", value: Dv},
{name: "Hk", value: Hk},
{name: "Hv", value: Hv},
} {
cn := C.CString(tpl.name)
rc := C.mlx_fast_cuda_kernel_config_add_template_arg_int(cfg, cn, C.int(tpl.value))
C.free(unsafe.Pointer(cn))
if rc != 0 {
gatedDeltaCUDADisabled = true
return nil, nil, false
}
}
yShape := []C.int{C.int(B), C.int(T), C.int(Hv), C.int(Dv)}
stateShape := []C.int{C.int(B), C.int(Hv), C.int(Dv), C.int(Dk)}
if C.mlx_fast_cuda_kernel_config_add_output_arg(cfg, unsafe.SliceData(yShape), C.size_t(len(yShape)), C.mlx_dtype(dtype)) != 0 {
gatedDeltaCUDADisabled = true
return nil, nil, false
}
if C.mlx_fast_cuda_kernel_config_add_output_arg(cfg, unsafe.SliceData(stateShape), C.size_t(len(stateShape)), C.mlx_dtype(dtype)) != 0 {
gatedDeltaCUDADisabled = true
return nil, nil, false
}
if C.mlx_fast_cuda_kernel_config_set_grid(cfg, 32, C.int(Dv), C.int(B*Hv)) != 0 {
gatedDeltaCUDADisabled = true
return nil, nil, false
}
threadY := Dv
if threadY > 4 {
threadY = 4
}
if C.mlx_fast_cuda_kernel_config_set_thread_group(cfg, 32, C.int(threadY), 1) != 0 {
gatedDeltaCUDADisabled = true
return nil, nil, false
}
tScalar := FromValue(T)
inputs := []C.mlx_array{
q.ctx,
k.ctx,
v.ctx,
g.ctx,
beta.ctx,
state.ctx,
tScalar.ctx,
}
inVec := C.mlx_vector_array_new_data(unsafe.SliceData(inputs), C.size_t(len(inputs)))
defer C.mlx_vector_array_free(inVec)
outVec := C.mlx_vector_array_new()
defer C.mlx_vector_array_free(outVec)
if C.mlx_fast_cuda_kernel_apply(&outVec, gatedDeltaCUDAKernel, inVec, cfg, DefaultStream().ctx) != 0 {
gatedDeltaCUDADisabled = true
return nil, nil, false
}
if int(C.mlx_vector_array_size(outVec)) < 2 {
return nil, nil, false
}
y = New("GATED_DELTA_CUDA_Y")
nextState = New("GATED_DELTA_CUDA_STATE")
C.mlx_vector_array_get(&y.ctx, outVec, 0)
C.mlx_vector_array_get(&nextState.ctx, outVec, 1)
return y, nextState, true
}
// GatedDelta runs the recurrent update operation.
//
// It uses the fused Metal kernel when available and otherwise falls back to a
// It tries the fused CUDA kernel first, then Metal, then falls back to a
// backend-agnostic MLX implementation with identical inputs/outputs.
func GatedDelta(q, k, v, g, beta, state *Array) (y, nextState *Array) {
if y, nextState, ok := gatedDeltaCUDAKernelApply(q, k, v, g, beta, state); ok {
return y, nextState
}
if y, nextState, ok := gatedDeltaKernel(q, k, v, g, beta, state); ok {
return y, nextState
}

View File

@@ -326,8 +326,10 @@ int (*mlx_distributed_sum_scatter_)(
int (*mlx_distributed_group_rank_)(mlx_distributed_group group) = NULL;
int (*mlx_distributed_group_size_)(mlx_distributed_group group) = NULL;
mlx_distributed_group (*mlx_distributed_group_split_)(mlx_distributed_group group, int color, int key) = NULL;
bool (*mlx_distributed_is_available_)(void) = NULL;
mlx_distributed_group (*mlx_distributed_init_)(bool strict) = NULL;
bool (*mlx_distributed_is_available_)(const char* bk /* may be null */) = NULL;
mlx_distributed_group (*mlx_distributed_init_)(
bool strict,
const char* bk /* may be null */) = NULL;
void (*mlx_set_error_handler_)(
mlx_error_handler_func handler,
void* data,
@@ -924,6 +926,7 @@ int (*mlx_astype_)(
int (*mlx_atleast_1d_)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
int (*mlx_atleast_2d_)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
int (*mlx_atleast_3d_)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
int (*mlx_bartlett_)(mlx_array* res, int M, const mlx_stream s) = NULL;
int (*mlx_bitwise_and_)(
mlx_array* res,
const mlx_array a,
@@ -940,6 +943,7 @@ int (*mlx_bitwise_xor_)(
const mlx_array a,
const mlx_array b,
const mlx_stream s) = NULL;
int (*mlx_blackman_)(mlx_array* res, int M, const mlx_stream s) = NULL;
int (*mlx_block_masked_mm_)(
mlx_array* res,
const mlx_array a,
@@ -1120,6 +1124,7 @@ int (*mlx_dequantize_)(
mlx_optional_int group_size,
mlx_optional_int bits,
const char* mode,
const mlx_array global_scale /* may be null */,
mlx_optional_dtype dtype,
const mlx_stream s) = NULL;
int (*mlx_diag_)(mlx_array* res, const mlx_array a, int k, const mlx_stream s) = NULL;
@@ -1256,6 +1261,8 @@ int (*mlx_hadamard_transform_)(
const mlx_array a,
mlx_optional_float scale,
const mlx_stream s) = NULL;
int (*mlx_hamming_)(mlx_array* res, int M, const mlx_stream s) = NULL;
int (*mlx_hanning_)(mlx_array* res, int M, const mlx_stream s) = NULL;
int (*mlx_identity_)(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s) = NULL;
int (*mlx_imag_)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
int (*mlx_inner_)(
@@ -1548,6 +1555,8 @@ int (*mlx_qqmm_)(
mlx_optional_int group_size,
mlx_optional_int bits,
const char* mode,
const mlx_array global_scale_x /* may be null */,
const mlx_array global_scale_w /* may be null */,
const mlx_stream s) = NULL;
int (*mlx_quantize_)(
mlx_vector_array* res,
@@ -1555,6 +1564,7 @@ int (*mlx_quantize_)(
mlx_optional_int group_size,
mlx_optional_int bits,
const char* mode,
const mlx_array global_scale /* may be null */,
const mlx_stream s) = NULL;
int (*mlx_quantized_matmul_)(
mlx_array* res,
@@ -2550,10 +2560,12 @@ int mlx_dynamic_load_symbols(mlx_dynamic_handle handle) {
CHECK_LOAD(handle, mlx_atleast_1d);
CHECK_LOAD(handle, mlx_atleast_2d);
CHECK_LOAD(handle, mlx_atleast_3d);
CHECK_LOAD(handle, mlx_bartlett);
CHECK_LOAD(handle, mlx_bitwise_and);
CHECK_LOAD(handle, mlx_bitwise_invert);
CHECK_LOAD(handle, mlx_bitwise_or);
CHECK_LOAD(handle, mlx_bitwise_xor);
CHECK_LOAD(handle, mlx_blackman);
CHECK_LOAD(handle, mlx_block_masked_mm);
CHECK_LOAD(handle, mlx_broadcast_arrays);
CHECK_LOAD(handle, mlx_broadcast_to);
@@ -2606,6 +2618,8 @@ int mlx_dynamic_load_symbols(mlx_dynamic_handle handle) {
CHECK_LOAD(handle, mlx_greater);
CHECK_LOAD(handle, mlx_greater_equal);
CHECK_LOAD(handle, mlx_hadamard_transform);
CHECK_LOAD(handle, mlx_hamming);
CHECK_LOAD(handle, mlx_hanning);
CHECK_LOAD(handle, mlx_identity);
CHECK_LOAD(handle, mlx_imag);
CHECK_LOAD(handle, mlx_inner);

View File

@@ -300,10 +300,12 @@
#define mlx_atleast_1d mlx_atleast_1d_mlx_gen_orig_
#define mlx_atleast_2d mlx_atleast_2d_mlx_gen_orig_
#define mlx_atleast_3d mlx_atleast_3d_mlx_gen_orig_
#define mlx_bartlett mlx_bartlett_mlx_gen_orig_
#define mlx_bitwise_and mlx_bitwise_and_mlx_gen_orig_
#define mlx_bitwise_invert mlx_bitwise_invert_mlx_gen_orig_
#define mlx_bitwise_or mlx_bitwise_or_mlx_gen_orig_
#define mlx_bitwise_xor mlx_bitwise_xor_mlx_gen_orig_
#define mlx_blackman mlx_blackman_mlx_gen_orig_
#define mlx_block_masked_mm mlx_block_masked_mm_mlx_gen_orig_
#define mlx_broadcast_arrays mlx_broadcast_arrays_mlx_gen_orig_
#define mlx_broadcast_to mlx_broadcast_to_mlx_gen_orig_
@@ -356,6 +358,8 @@
#define mlx_greater mlx_greater_mlx_gen_orig_
#define mlx_greater_equal mlx_greater_equal_mlx_gen_orig_
#define mlx_hadamard_transform mlx_hadamard_transform_mlx_gen_orig_
#define mlx_hamming mlx_hamming_mlx_gen_orig_
#define mlx_hanning mlx_hanning_mlx_gen_orig_
#define mlx_identity mlx_identity_mlx_gen_orig_
#define mlx_imag mlx_imag_mlx_gen_orig_
#define mlx_inner mlx_inner_mlx_gen_orig_
@@ -889,10 +893,12 @@
#undef mlx_atleast_1d
#undef mlx_atleast_2d
#undef mlx_atleast_3d
#undef mlx_bartlett
#undef mlx_bitwise_and
#undef mlx_bitwise_invert
#undef mlx_bitwise_or
#undef mlx_bitwise_xor
#undef mlx_blackman
#undef mlx_block_masked_mm
#undef mlx_broadcast_arrays
#undef mlx_broadcast_to
@@ -945,6 +951,8 @@
#undef mlx_greater
#undef mlx_greater_equal
#undef mlx_hadamard_transform
#undef mlx_hamming
#undef mlx_hanning
#undef mlx_identity
#undef mlx_imag
#undef mlx_inner
@@ -1501,8 +1509,10 @@ extern int (*mlx_distributed_sum_scatter_)(
extern int (*mlx_distributed_group_rank_)(mlx_distributed_group group);
extern int (*mlx_distributed_group_size_)(mlx_distributed_group group);
extern mlx_distributed_group (*mlx_distributed_group_split_)(mlx_distributed_group group, int color, int key);
extern bool (*mlx_distributed_is_available_)(void);
extern mlx_distributed_group (*mlx_distributed_init_)(bool strict);
extern bool (*mlx_distributed_is_available_)(const char* bk /* may be null */);
extern mlx_distributed_group (*mlx_distributed_init_)(
bool strict,
const char* bk /* may be null */);
extern void (*mlx_set_error_handler_)(
mlx_error_handler_func handler,
void* data,
@@ -2099,6 +2109,7 @@ extern int (*mlx_astype_)(
extern int (*mlx_atleast_1d_)(mlx_array* res, const mlx_array a, const mlx_stream s);
extern int (*mlx_atleast_2d_)(mlx_array* res, const mlx_array a, const mlx_stream s);
extern int (*mlx_atleast_3d_)(mlx_array* res, const mlx_array a, const mlx_stream s);
extern int (*mlx_bartlett_)(mlx_array* res, int M, const mlx_stream s);
extern int (*mlx_bitwise_and_)(
mlx_array* res,
const mlx_array a,
@@ -2115,6 +2126,7 @@ extern int (*mlx_bitwise_xor_)(
const mlx_array a,
const mlx_array b,
const mlx_stream s);
extern int (*mlx_blackman_)(mlx_array* res, int M, const mlx_stream s);
extern int (*mlx_block_masked_mm_)(
mlx_array* res,
const mlx_array a,
@@ -2295,6 +2307,7 @@ extern int (*mlx_dequantize_)(
mlx_optional_int group_size,
mlx_optional_int bits,
const char* mode,
const mlx_array global_scale /* may be null */,
mlx_optional_dtype dtype,
const mlx_stream s);
extern int (*mlx_diag_)(mlx_array* res, const mlx_array a, int k, const mlx_stream s);
@@ -2431,6 +2444,8 @@ extern int (*mlx_hadamard_transform_)(
const mlx_array a,
mlx_optional_float scale,
const mlx_stream s);
extern int (*mlx_hamming_)(mlx_array* res, int M, const mlx_stream s);
extern int (*mlx_hanning_)(mlx_array* res, int M, const mlx_stream s);
extern int (*mlx_identity_)(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s);
extern int (*mlx_imag_)(mlx_array* res, const mlx_array a, const mlx_stream s);
extern int (*mlx_inner_)(
@@ -2723,6 +2738,8 @@ extern int (*mlx_qqmm_)(
mlx_optional_int group_size,
mlx_optional_int bits,
const char* mode,
const mlx_array global_scale_x /* may be null */,
const mlx_array global_scale_w /* may be null */,
const mlx_stream s);
extern int (*mlx_quantize_)(
mlx_vector_array* res,
@@ -2730,6 +2747,7 @@ extern int (*mlx_quantize_)(
mlx_optional_int group_size,
mlx_optional_int bits,
const char* mode,
const mlx_array global_scale /* may be null */,
const mlx_stream s);
extern int (*mlx_quantized_matmul_)(
mlx_array* res,
@@ -4033,11 +4051,13 @@ static inline int mlx_distributed_group_size(mlx_distributed_group group) {
static inline mlx_distributed_group mlx_distributed_group_split(mlx_distributed_group group, int color, int key) {
return mlx_distributed_group_split_(group, color, key);
}
static inline bool mlx_distributed_is_available(void) {
return mlx_distributed_is_available_();
static inline bool mlx_distributed_is_available(const char* bk /* may be null */) {
return mlx_distributed_is_available_(bk);
}
static inline mlx_distributed_group mlx_distributed_init(bool strict) {
return mlx_distributed_init_(strict);
static inline mlx_distributed_group mlx_distributed_init(
bool strict,
const char* bk /* may be null */) {
return mlx_distributed_init_(strict, bk);
}
static inline void mlx_set_error_handler(
mlx_error_handler_func handler,
@@ -4939,6 +4959,9 @@ static inline int mlx_atleast_2d(mlx_array* res, const mlx_array a, const mlx_st
static inline int mlx_atleast_3d(mlx_array* res, const mlx_array a, const mlx_stream s) {
return mlx_atleast_3d_(res, a, s);
}
static inline int mlx_bartlett(mlx_array* res, int M, const mlx_stream s) {
return mlx_bartlett_(res, M, s);
}
static inline int mlx_bitwise_and(
mlx_array* res,
const mlx_array a,
@@ -4963,6 +4986,9 @@ static inline int mlx_bitwise_xor(
const mlx_stream s) {
return mlx_bitwise_xor_(res, a, b, s);
}
static inline int mlx_blackman(mlx_array* res, int M, const mlx_stream s) {
return mlx_blackman_(res, M, s);
}
static inline int mlx_block_masked_mm(
mlx_array* res,
const mlx_array a,
@@ -5193,9 +5219,10 @@ static inline int mlx_dequantize(
mlx_optional_int group_size,
mlx_optional_int bits,
const char* mode,
const mlx_array global_scale /* may be null */,
mlx_optional_dtype dtype,
const mlx_stream s) {
return mlx_dequantize_(res, w, scales, biases, group_size, bits, mode, dtype, s);
return mlx_dequantize_(res, w, scales, biases, group_size, bits, mode, global_scale, dtype, s);
}
static inline int mlx_diag(mlx_array* res, const mlx_array a, int k, const mlx_stream s) {
return mlx_diag_(res, a, k, s);
@@ -5383,6 +5410,12 @@ static inline int mlx_hadamard_transform(
const mlx_stream s) {
return mlx_hadamard_transform_(res, a, scale, s);
}
static inline int mlx_hamming(mlx_array* res, int M, const mlx_stream s) {
return mlx_hamming_(res, M, s);
}
static inline int mlx_hanning(mlx_array* res, int M, const mlx_stream s) {
return mlx_hanning_(res, M, s);
}
static inline int mlx_identity(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s) {
return mlx_identity_(res, n, dtype, s);
}
@@ -5793,8 +5826,10 @@ static inline int mlx_qqmm(
mlx_optional_int group_size,
mlx_optional_int bits,
const char* mode,
const mlx_array global_scale_x /* may be null */,
const mlx_array global_scale_w /* may be null */,
const mlx_stream s) {
return mlx_qqmm_(res, x, w, w_scales, group_size, bits, mode, s);
return mlx_qqmm_(res, x, w, w_scales, group_size, bits, mode, global_scale_x, global_scale_w, s);
}
static inline int mlx_quantize(
mlx_vector_array* res,
@@ -5802,8 +5837,9 @@ static inline int mlx_quantize(
mlx_optional_int group_size,
mlx_optional_int bits,
const char* mode,
const mlx_array global_scale /* may be null */,
const mlx_stream s) {
return mlx_quantize_(res, w, group_size, bits, mode, s);
return mlx_quantize_(res, w, group_size, bits, mode, global_scale, s);
}
static inline int mlx_quantized_matmul(
mlx_array* res,

View File

@@ -1,7 +1,7 @@
# Vendored MLX-C Headers
These header files are vendored from [mlx-c](https://github.com/ml-explore/mlx-c).
The pinned version is in `MLX_VERSION` at the repo root.
The pinned version is in `MLX_C_VERSION` at the repo root.
Headers are automatically refreshed when you run a CMake build:

View File

@@ -42,12 +42,14 @@ mlx_distributed_group_split(mlx_distributed_group group, int color, int key);
/**
* Check if distributed is available.
*/
bool mlx_distributed_is_available(void);
bool mlx_distributed_is_available(const char* bk /* may be null */);
/**
* Initialize distributed.
*/
mlx_distributed_group mlx_distributed_init(bool strict);
mlx_distributed_group mlx_distributed_init(
bool strict,
const char* bk /* may be null */);
/**@}*/

View File

@@ -166,6 +166,7 @@ int mlx_astype(
int mlx_atleast_1d(mlx_array* res, const mlx_array a, const mlx_stream s);
int mlx_atleast_2d(mlx_array* res, const mlx_array a, const mlx_stream s);
int mlx_atleast_3d(mlx_array* res, const mlx_array a, const mlx_stream s);
int mlx_bartlett(mlx_array* res, int M, const mlx_stream s);
int mlx_bitwise_and(
mlx_array* res,
const mlx_array a,
@@ -182,6 +183,7 @@ int mlx_bitwise_xor(
const mlx_array a,
const mlx_array b,
const mlx_stream s);
int mlx_blackman(mlx_array* res, int M, const mlx_stream s);
int mlx_block_masked_mm(
mlx_array* res,
const mlx_array a,
@@ -362,6 +364,7 @@ int mlx_dequantize(
mlx_optional_int group_size,
mlx_optional_int bits,
const char* mode,
const mlx_array global_scale /* may be null */,
mlx_optional_dtype dtype,
const mlx_stream s);
int mlx_diag(mlx_array* res, const mlx_array a, int k, const mlx_stream s);
@@ -498,6 +501,8 @@ int mlx_hadamard_transform(
const mlx_array a,
mlx_optional_float scale,
const mlx_stream s);
int mlx_hamming(mlx_array* res, int M, const mlx_stream s);
int mlx_hanning(mlx_array* res, int M, const mlx_stream s);
int mlx_identity(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s);
int mlx_imag(mlx_array* res, const mlx_array a, const mlx_stream s);
int mlx_inner(
@@ -790,6 +795,8 @@ int mlx_qqmm(
mlx_optional_int group_size,
mlx_optional_int bits,
const char* mode,
const mlx_array global_scale_x /* may be null */,
const mlx_array global_scale_w /* may be null */,
const mlx_stream s);
int mlx_quantize(
mlx_vector_array* res,
@@ -797,6 +804,7 @@ int mlx_quantize(
mlx_optional_int group_size,
mlx_optional_int bits,
const char* mode,
const mlx_array global_scale /* may be null */,
const mlx_stream s);
int mlx_quantized_matmul(
mlx_array* res,

View File

@@ -4,35 +4,91 @@ package mlx
import "C"
import (
"fmt"
"iter"
"runtime"
"unsafe"
)
// SafetensorsFile represents a loaded safetensors file.
type SafetensorsFile struct {
arrays C.mlx_map_string_to_array
metadata C.mlx_map_string_to_string
}
func loadSafetensorsStream() C.mlx_stream {
if runtime.GOOS == "darwin" {
return C.mlx_default_cpu_stream_new()
}
return C.mlx_default_gpu_stream_new()
}
// LoadSafetensorsNative loads a safetensors file using MLX's native loader.
func LoadSafetensorsNative(path string) (*SafetensorsFile, error) {
var arrays C.mlx_map_string_to_array
var metadata C.mlx_map_string_to_string
cPath := C.CString(path)
defer C.free(unsafe.Pointer(cPath))
stream := loadSafetensorsStream()
defer C.mlx_stream_free(stream)
if C.mlx_load_safetensors(&arrays, &metadata, cPath, stream) != 0 {
return nil, fmt.Errorf("failed to load safetensors: %s", path)
}
return &SafetensorsFile{arrays: arrays, metadata: metadata}, nil
}
// Get retrieves a tensor by name.
func (s *SafetensorsFile) Get(name string) *Array {
cName := C.CString(name)
defer C.free(unsafe.Pointer(cName))
value := C.mlx_array_new()
if C.mlx_map_string_to_array_get(&value, s.arrays, cName) != 0 {
return nil
}
if value.ctx == nil {
return nil
}
arr := New(name)
arr.ctx = value
return arr
}
// GetMetadata retrieves a metadata value by key.
func (s *SafetensorsFile) GetMetadata(key string) string {
cKey := C.CString(key)
defer C.free(unsafe.Pointer(cKey))
var cValue *C.char
if C.mlx_map_string_to_string_get(&cValue, s.metadata, cKey) != 0 {
return ""
}
return C.GoString(cValue)
}
// Free releases the loaded safetensors maps.
func (s *SafetensorsFile) Free() {
if s == nil {
return
}
C.mlx_map_string_to_array_free(s.arrays)
C.mlx_map_string_to_string_free(s.metadata)
}
func Load(path string) iter.Seq2[string, *Array] {
return func(yield func(string, *Array) bool) {
string2array := C.mlx_map_string_to_array_new()
defer C.mlx_map_string_to_array_free(string2array)
string2string := C.mlx_map_string_to_string_new()
defer C.mlx_map_string_to_string_free(string2string)
cPath := C.CString(path)
defer C.free(unsafe.Pointer(cPath))
// Use GPU stream so tensors load directly to GPU memory (CUDA has Load::eval_gpu).
// macOS Metal doesn't implement eval_gpu for Load, so fall back to CPU stream.
var stream C.mlx_stream
if runtime.GOOS == "darwin" {
stream = C.mlx_default_cpu_stream_new()
} else {
stream = C.mlx_default_gpu_stream_new()
sf, err := LoadSafetensorsNative(path)
if err != nil {
return
}
defer C.mlx_stream_free(stream)
defer sf.Free()
C.mlx_load_safetensors(&string2array, &string2string, cPath, stream)
it := C.mlx_map_string_to_array_iterator_new(string2array)
it := C.mlx_map_string_to_array_iterator_new(sf.arrays)
defer C.mlx_map_string_to_array_iterator_free(it)
for {
@@ -51,3 +107,43 @@ func Load(path string) iter.Seq2[string, *Array] {
}
}
}
// SaveSafetensors saves arrays to a safetensors file without metadata.
func SaveSafetensors(path string, arrays map[string]*Array) error {
return SaveSafetensorsWithMetadata(path, arrays, nil)
}
// SaveSafetensorsWithMetadata saves arrays to a safetensors file with metadata.
func SaveSafetensorsWithMetadata(path string, arrays map[string]*Array, metadata map[string]string) error {
cPath := C.CString(path)
defer C.free(unsafe.Pointer(cPath))
cArrays := C.mlx_map_string_to_array_new()
defer C.mlx_map_string_to_array_free(cArrays)
for name, arr := range arrays {
if arr == nil {
continue
}
cName := C.CString(name)
C.mlx_map_string_to_array_insert(cArrays, cName, arr.ctx)
C.free(unsafe.Pointer(cName))
}
cMetadata := C.mlx_map_string_to_string_new()
defer C.mlx_map_string_to_string_free(cMetadata)
for key, value := range metadata {
cKey := C.CString(key)
cValue := C.CString(value)
C.mlx_map_string_to_string_insert(cMetadata, cKey, cValue)
C.free(unsafe.Pointer(cKey))
C.free(unsafe.Pointer(cValue))
}
if C.mlx_save_safetensors(cPath, cArrays, cMetadata) != 0 {
return fmt.Errorf("failed to save safetensors: %s", path)
}
return nil
}

View File

@@ -7,8 +7,44 @@ package mlx
// #cgo LDFLAGS: -lstdc++
// #cgo darwin LDFLAGS: -framework Foundation -framework Metal -framework Accelerate
// #include "generated.h"
// #include <string.h>
//
// static char _mlx_last_error_msg[1024] = {0};
// static int _mlx_last_error_flag = 0;
//
// static void _mlx_capture_error_handler(const char* msg, void* data) {
// (void)data;
// strncpy(_mlx_last_error_msg, msg, sizeof(_mlx_last_error_msg) - 1);
// _mlx_last_error_msg[sizeof(_mlx_last_error_msg) - 1] = '\0';
// _mlx_last_error_flag = 1;
// }
//
// static void mlx_install_capture_handler(void) {
// if (mlx_set_error_handler_) {
// mlx_set_error_handler_(_mlx_capture_error_handler, NULL, NULL);
// }
// }
//
// static void mlx_clear_last_error(void) {
// _mlx_last_error_flag = 0;
// _mlx_last_error_msg[0] = '\0';
// }
//
// static int mlx_had_last_error(void) {
// return _mlx_last_error_flag;
// }
//
// static const char* mlx_get_last_error(void) {
// return _mlx_last_error_flag ? _mlx_last_error_msg : NULL;
// }
import "C"
func init() {
// Replace the default exit(-1) error handler with one that captures
// the error message so we can surface it in Go.
C.mlx_install_capture_handler()
}
// Version returns the MLX core library version string.
func Version() string {
str := C.mlx_string_new()
@@ -31,10 +67,19 @@ func doEval(outputs []*Array, async bool) {
}
}
C.mlx_clear_last_error()
var rc C.int
if async {
C.mlx_async_eval(vector)
rc = C.mlx_async_eval(vector)
} else {
C.mlx_eval(vector)
rc = C.mlx_eval(vector)
}
if rc != 0 {
msg := "mlx eval failed"
if C.mlx_had_last_error() != 0 {
msg = C.GoString(C.mlx_get_last_error())
}
panic("mlx: " + msg)
}
}

View File

@@ -17,7 +17,8 @@ func Quantize(w *Array, groupSize, bits int, mode string) (weights, scales, bias
optBits := C.mlx_optional_int{value: C.int(bits), has_value: true}
res := C.mlx_vector_array_new()
defer C.mlx_vector_array_free(res)
C.mlx_quantize(&res, w.ctx, optGroupSize, optBits, cMode, DefaultStream().ctx)
var globalScale C.mlx_array
C.mlx_quantize(&res, w.ctx, optGroupSize, optBits, cMode, globalScale, DefaultStream().ctx)
vecSize := int(C.mlx_vector_array_size(res))
w0 := New("QUANTIZE_W")
@@ -32,6 +33,18 @@ func Quantize(w *Array, groupSize, bits int, mode string) (weights, scales, bias
return w0, w1, nil
}
func FromFP8(x *Array, dtype DType) *Array {
out := New("FROM_FP8")
C.mlx_from_fp8(&out.ctx, x.ctx, C.mlx_dtype(dtype), DefaultStream().ctx)
return out
}
func ToFP8(x *Array) *Array {
out := New("TO_FP8")
C.mlx_to_fp8(&out.ctx, x.ctx, DefaultStream().ctx)
return out
}
func Dequantize(w, scales, biases *Array, groupSize, bits int, mode string) *Array {
cMode := C.CString(mode)
defer C.free(unsafe.Pointer(cMode))
@@ -45,7 +58,8 @@ func Dequantize(w, scales, biases *Array, groupSize, bits int, mode string) *Arr
}
out := New("DEQUANTIZE")
C.mlx_dequantize(&out.ctx, w.ctx, scales.ctx, b, optGroupSize, optBits, cMode, optDtype, DefaultStream().ctx)
var globalScale C.mlx_array
C.mlx_dequantize(&out.ctx, w.ctx, scales.ctx, b, optGroupSize, optBits, cMode, globalScale, optDtype, DefaultStream().ctx)
return out
}
@@ -135,6 +149,40 @@ func Contiguous(a *Array, allowColMajor bool) *Array {
return out
}
func Pad(a *Array, paddings []int32) *Array {
numAxes := len(paddings) / 2
axes := make([]C.int, numAxes)
lowPad := make([]C.int, numAxes)
highPad := make([]C.int, numAxes)
for i := range numAxes {
axes[i] = C.int(i)
lowPad[i] = C.int(paddings[i*2])
highPad[i] = C.int(paddings[i*2+1])
}
padValue := C.mlx_array_new_float(C.float(0))
defer C.mlx_array_free(padValue)
cMode := C.CString("constant")
defer C.free(unsafe.Pointer(cMode))
out := New("PAD")
C.mlx_pad(
&out.ctx,
a.ctx,
unsafe.SliceData(axes),
C.size_t(len(axes)),
unsafe.SliceData(lowPad),
C.size_t(len(lowPad)),
unsafe.SliceData(highPad),
C.size_t(len(highPad)),
padValue,
cMode,
DefaultStream().ctx,
)
return out
}
func DepthwiseConv1d(x, weight *Array, bias *Array) *Array {
groups := int32(x.Dim(x.NumDims() - 1))
return Conv1d(x, weight, bias, 1, 0, 1, groups)
@@ -446,15 +494,6 @@ func Collect(v any) []*Array {
return arrays
}
func Copy(a *Array) *Array {
if a == nil || !a.Valid() {
return a
}
out := New("COPY")
C.mlx_copy(&out.ctx, a.ctx, DefaultStream().ctx)
return out
}
func collect(v reflect.Value, arrays *[]*Array, seen map[uintptr]bool) {
if !v.IsValid() {
return

View File

@@ -11,8 +11,10 @@ func QuantizationParams(quantization string) (groupSize, bits int, mode string)
switch strings.ToUpper(quantization) {
case "NVFP4":
return 16, 4, "nvfp4"
case "MXFP4":
return 32, 4, "mxfp4"
case "FP4", "Q4", "INT4":
return 32, 4, "affine"
return 64, 4, "affine"
case "MXFP8":
return 32, 8, "mxfp8"
case "FP8", "Q8", "INT8":

View File

@@ -144,3 +144,44 @@ func TestLayerNormDefaultEps(t *testing.T) {
}
}
}
func TestQuantizedLinearMXFP4MatchesDequantizedWeight(t *testing.T) {
skipIfNoMLX(t)
weightVals := make([]float32, 3*32)
for i := range weightVals {
weightVals[i] = float32((i%11)-5) / 7
}
inputVals := make([]float32, 2*32)
for i := range inputVals {
inputVals[i] = float32((i%7)-3) / 5
}
weight := mlx.FromValues(weightVals, 3, 32).AsType(mlx.DTypeBFloat16)
input := mlx.FromValues(inputVals, 2, 32).AsType(mlx.DTypeBFloat16)
mlx.Eval(weight, input)
ql := NewQuantizedLinear(weight, nil, 32, 4, "mxfp4")
if ql.QBiases != nil {
t.Fatalf("mxfp4 qbiases = %v, want nil", ql.QBiases)
}
dequantizedWeight := mlx.Dequantize(ql.Weight, ql.Scales, ql.QBiases, 32, 4, "mxfp4")
mlx.Eval(dequantizedWeight)
qOut := ql.Forward(input)
dOut := NewLinear(dequantizedWeight, nil).Forward(input)
mlx.Eval(qOut, dOut)
got := qOut.Floats()
want := dOut.Floats()
if len(got) != len(want) {
t.Fatalf("output length = %d, want %d", len(got), len(want))
}
for i := range got {
if !approxEqual(got[i], want[i], 1e-3) {
t.Fatalf("output[%d] = %.6f, want %.6f", i, got[i], want[i])
}
}
}

View File

@@ -420,7 +420,16 @@ func tensorByBase(tensors map[string]*mlx.Array, base string) (*mlx.Array, strin
}
func supportsGatherQMM(mode string, bits int) bool {
return mode == "affine" && (bits == 4 || bits == 8)
switch mode {
case "affine":
return bits == 4 || bits == 8
case "mxfp8":
return bits == 8
case "nvfp4", "mxfp4":
return bits == 4
default:
return false
}
}
func freeTensorKeys(tensors map[string]*mlx.Array, keys ...string) {

View File

@@ -83,6 +83,28 @@ func TestLayerSelectionHelpers(t *testing.T) {
}
}
func TestSupportsGatherQMM(t *testing.T) {
tests := []struct {
mode string
bits int
want bool
}{
{mode: "affine", bits: 4, want: true},
{mode: "affine", bits: 8, want: true},
{mode: "mxfp8", bits: 8, want: true},
{mode: "nvfp4", bits: 4, want: true},
{mode: "mxfp4", bits: 4, want: true},
{mode: "mxfp8", bits: 4, want: false},
{mode: "affine", bits: 3, want: false},
}
for _, tt := range tests {
if got := supportsGatherQMM(tt.mode, tt.bits); got != tt.want {
t.Fatalf("supportsGatherQMM(%q, %d) = %v, want %v", tt.mode, tt.bits, got, tt.want)
}
}
}
func TestResolveTensorPathLayout(t *testing.T) {
dummy := mlx.New("dummy")