mirror of
https://github.com/ollama/ollama.git
synced 2026-04-21 16:25:42 +02:00
Compare commits
20 Commits
pdevine/qw
...
hoyyeva/vs
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7a2306087b | ||
|
|
8b8bcf0952 | ||
|
|
d1151e18a1 | ||
|
|
ebbce136c7 | ||
|
|
26b9f53f8e | ||
|
|
7575438366 | ||
|
|
7d7c90d702 | ||
|
|
4fda69809a | ||
|
|
c9b5da6b0c | ||
|
|
de5cb7311f | ||
|
|
95ee7fbd29 | ||
|
|
ec55536734 | ||
|
|
77491439c2 | ||
|
|
b166b36cd2 | ||
|
|
c2b0bb7a52 | ||
|
|
22c2bdbd8a | ||
|
|
6df6d097d9 | ||
|
|
d7c176ab91 | ||
|
|
0ff7d724ff | ||
|
|
46cb7795e1 |
7
.github/workflows/test.yaml
vendored
7
.github/workflows/test.yaml
vendored
@@ -64,6 +64,7 @@ jobs:
|
||||
container: nvidia/cuda:13.0.0-devel-ubuntu22.04
|
||||
extra-packages: libcudnn9-dev-cuda-13 libopenblas-dev liblapack-dev liblapacke-dev git curl
|
||||
flags: '-DCMAKE_CUDA_ARCHITECTURES=87 -DBLAS_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu -DLAPACK_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu'
|
||||
install-go: true
|
||||
runs-on: linux
|
||||
container: ${{ matrix.container }}
|
||||
steps:
|
||||
@@ -90,6 +91,12 @@ jobs:
|
||||
fi
|
||||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
- if: matrix.install-go
|
||||
name: Install Go
|
||||
run: |
|
||||
GO_VERSION=$(awk '/^go / { print $2 }' go.mod)
|
||||
curl -fsSL "https://golang.org/dl/go${GO_VERSION}.linux-$(dpkg --print-architecture).tar.gz" | tar xz -C /usr/local
|
||||
echo "/usr/local/go/bin" >> $GITHUB_PATH
|
||||
- uses: actions/cache@v4
|
||||
with:
|
||||
path: /github/home/.cache/ccache
|
||||
|
||||
@@ -157,7 +157,7 @@ COPY CMakeLists.txt CMakePresets.json .
|
||||
COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
|
||||
COPY x/imagegen/mlx x/imagegen/mlx
|
||||
COPY go.mod go.sum .
|
||||
COPY MLX_VERSION MLX_CORE_VERSION .
|
||||
COPY MLX_VERSION MLX_C_VERSION .
|
||||
RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
|
||||
ENV PATH=/usr/local/go/bin:$PATH
|
||||
RUN go mod download
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
v0.30.6
|
||||
1
MLX_C_VERSION
Normal file
1
MLX_C_VERSION
Normal file
@@ -0,0 +1 @@
|
||||
0726ca922fc902c4c61ef9c27d94132be418e945
|
||||
@@ -1 +1 @@
|
||||
v0.5.0
|
||||
38ad257088fb2193ad47e527cf6534a689f30943
|
||||
|
||||
@@ -2065,6 +2065,10 @@ func runLauncherAction(cmd *cobra.Command, action tui.TUIAction, deps launcherDe
|
||||
if err != nil {
|
||||
return true, fmt.Errorf("launching %s: %w", action.Integration, err)
|
||||
}
|
||||
// VS Code is a GUI app — exit the TUI loop after launching
|
||||
if action.Integration == "vscode" {
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
default:
|
||||
return false, fmt.Errorf("unknown launcher action: %d", action.Kind)
|
||||
|
||||
@@ -209,6 +209,43 @@ func TestRunLauncherAction_RunModelContinuesAfterCancellation(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunLauncherAction_VSCodeExitsTUILoop(t *testing.T) {
|
||||
setCmdTestHome(t, t.TempDir())
|
||||
|
||||
cmd := &cobra.Command{}
|
||||
cmd.SetContext(context.Background())
|
||||
|
||||
// VS Code should exit the TUI loop (return false) after a successful launch.
|
||||
continueLoop, err := runLauncherAction(cmd, tui.TUIAction{Kind: tui.TUIActionLaunchIntegration, Integration: "vscode"}, launcherDeps{
|
||||
resolveRunModel: unexpectedRunModelResolution(t),
|
||||
launchIntegration: func(ctx context.Context, req launch.IntegrationLaunchRequest) error {
|
||||
return nil
|
||||
},
|
||||
runModel: unexpectedModelLaunch(t),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("expected nil error, got %v", err)
|
||||
}
|
||||
if continueLoop {
|
||||
t.Fatal("expected vscode launch to exit the TUI loop (return false)")
|
||||
}
|
||||
|
||||
// Other integrations should continue the TUI loop (return true).
|
||||
continueLoop, err = runLauncherAction(cmd, tui.TUIAction{Kind: tui.TUIActionLaunchIntegration, Integration: "claude"}, launcherDeps{
|
||||
resolveRunModel: unexpectedRunModelResolution(t),
|
||||
launchIntegration: func(ctx context.Context, req launch.IntegrationLaunchRequest) error {
|
||||
return nil
|
||||
},
|
||||
runModel: unexpectedModelLaunch(t),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("expected nil error, got %v", err)
|
||||
}
|
||||
if !continueLoop {
|
||||
t.Fatal("expected non-vscode integration to continue the TUI loop (return true)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunLauncherAction_IntegrationContinuesAfterCancellation(t *testing.T) {
|
||||
setCmdTestHome(t, t.TempDir())
|
||||
|
||||
|
||||
@@ -179,6 +179,7 @@ Supported integrations:
|
||||
opencode OpenCode
|
||||
openclaw OpenClaw (aliases: clawdbot, moltbot)
|
||||
pi Pi
|
||||
vscode VS Code (aliases: code)
|
||||
|
||||
Examples:
|
||||
ollama launch
|
||||
@@ -801,13 +802,6 @@ func cloneAliases(aliases map[string]string) map[string]string {
|
||||
return cloned
|
||||
}
|
||||
|
||||
func singleModelPrechecked(current string) []string {
|
||||
if current == "" {
|
||||
return nil
|
||||
}
|
||||
return []string{current}
|
||||
}
|
||||
|
||||
func firstModel(models []string) string {
|
||||
if len(models) == 0 {
|
||||
return ""
|
||||
|
||||
@@ -80,6 +80,12 @@ func (c *Openclaw) Run(model string, args []string) error {
|
||||
}
|
||||
if canInstallDaemon() {
|
||||
onboardArgs = append(onboardArgs, "--install-daemon")
|
||||
} else {
|
||||
// When we can't install a daemon (e.g. no systemd, sudo dropped
|
||||
// XDG_RUNTIME_DIR, or container environment), skip the gateway
|
||||
// health check so non-interactive onboarding completes. The
|
||||
// gateway is started as a foreground child process after onboarding.
|
||||
onboardArgs = append(onboardArgs, "--skip-health")
|
||||
}
|
||||
cmd := exec.Command(bin, onboardArgs...)
|
||||
cmd.Stdin = os.Stdin
|
||||
|
||||
@@ -33,7 +33,7 @@ type IntegrationInfo struct {
|
||||
Description string
|
||||
}
|
||||
|
||||
var launcherIntegrationOrder = []string{"opencode", "droid", "pi", "cline"}
|
||||
var launcherIntegrationOrder = []string{"vscode", "opencode", "droid", "pi", "cline"}
|
||||
|
||||
var integrationSpecs = []*IntegrationSpec{
|
||||
{
|
||||
@@ -131,6 +131,18 @@ var integrationSpecs = []*IntegrationSpec{
|
||||
Command: []string{"npm", "install", "-g", "@mariozechner/pi-coding-agent"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "vscode",
|
||||
Runner: &VSCode{},
|
||||
Aliases: []string{"code"},
|
||||
Description: "Microsoft's open-source AI code editor",
|
||||
Install: IntegrationInstallSpec{
|
||||
CheckInstalled: func() bool {
|
||||
return (&VSCode{}).findBinary() != ""
|
||||
},
|
||||
URL: "https://code.visualstudio.com",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
var integrationSpecsByName map[string]*IntegrationSpec
|
||||
|
||||
660
cmd/launch/vscode.go
Normal file
660
cmd/launch/vscode.go
Normal file
@@ -0,0 +1,660 @@
|
||||
package launch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/cmd/internal/fileutil"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
)
|
||||
|
||||
// VSCode implements Runner and Editor for Visual Studio Code integration.
|
||||
type VSCode struct{}
|
||||
|
||||
func (v *VSCode) String() string { return "Visual Studio Code" }
|
||||
|
||||
// findBinary returns the path/command to launch VS Code, or "" if not found.
|
||||
// It checks for the "code" CLI on PATH first, then falls back to platform-specific locations.
|
||||
func (v *VSCode) findBinary() string {
|
||||
if _, err := exec.LookPath("code"); err == nil {
|
||||
return "code"
|
||||
}
|
||||
var candidates []string
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
candidates = []string{
|
||||
"/Applications/Visual Studio Code.app",
|
||||
}
|
||||
case "windows":
|
||||
if localAppData := os.Getenv("LOCALAPPDATA"); localAppData != "" {
|
||||
candidates = append(candidates, filepath.Join(localAppData, "Programs", "Microsoft VS Code", "bin", "code.cmd"))
|
||||
}
|
||||
default: // linux
|
||||
candidates = []string{
|
||||
"/usr/bin/code",
|
||||
"/snap/bin/code",
|
||||
}
|
||||
}
|
||||
for _, c := range candidates {
|
||||
if _, err := os.Stat(c); err == nil {
|
||||
return c
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// IsRunning reports whether VS Code is currently running.
|
||||
// Each platform uses a pattern specific enough to avoid matching Cursor or
|
||||
// other VS Code forks.
|
||||
func (v *VSCode) IsRunning() bool {
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
out, err := exec.Command("pgrep", "-f", "Visual Studio Code.app/Contents/MacOS/Code").Output()
|
||||
return err == nil && len(out) > 0
|
||||
case "windows":
|
||||
// Match VS Code by executable path to avoid matching Cursor or other forks.
|
||||
out, err := exec.Command("powershell", "-NoProfile", "-Command",
|
||||
`Get-Process Code -ErrorAction SilentlyContinue | Where-Object { $_.Path -like '*Microsoft VS Code*' } | Select-Object -First 1`).Output()
|
||||
return err == nil && len(strings.TrimSpace(string(out))) > 0
|
||||
default:
|
||||
// Match VS Code specifically by its install path to avoid matching
|
||||
// Cursor (/cursor/) or other forks.
|
||||
for _, pattern := range []string{"/usr/share/code/", "/snap/code/"} {
|
||||
out, err := exec.Command("pgrep", "-f", pattern).Output()
|
||||
if err == nil && len(out) > 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Quit gracefully quits VS Code and waits for it to exit so that it flushes
|
||||
// its in-memory state back to the database.
|
||||
func (v *VSCode) Quit() {
|
||||
if !v.IsRunning() {
|
||||
return
|
||||
}
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
_ = exec.Command("osascript", "-e", `quit app "Visual Studio Code"`).Run()
|
||||
case "windows":
|
||||
// Kill VS Code by executable path to avoid killing Cursor or other forks.
|
||||
_ = exec.Command("powershell", "-NoProfile", "-Command",
|
||||
`Get-Process Code -ErrorAction SilentlyContinue | Where-Object { $_.Path -like '*Microsoft VS Code*' } | Stop-Process -Force`).Run()
|
||||
default:
|
||||
for _, pattern := range []string{"/usr/share/code/", "/snap/code/"} {
|
||||
_ = exec.Command("pkill", "-f", pattern).Run()
|
||||
}
|
||||
}
|
||||
// Wait for the process to fully exit and flush its state to disk
|
||||
// TODO(hoyyeva): update spinner to use bubble tea
|
||||
spinnerFrames := []string{"|", "/", "-", "\\"}
|
||||
frame := 0
|
||||
fmt.Fprintf(os.Stderr, "\033[90mRestarting VS Code... %s\033[0m", spinnerFrames[0])
|
||||
|
||||
ticker := time.NewTicker(200 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
|
||||
for range 150 { // 150 ticks × 200ms = 30s timeout
|
||||
<-ticker.C
|
||||
frame++
|
||||
fmt.Fprintf(os.Stderr, "\r\033[90mRestarting VS Code... %s\033[0m", spinnerFrames[frame%len(spinnerFrames)])
|
||||
|
||||
if frame%5 == 0 { // check every ~1s
|
||||
if !v.IsRunning() {
|
||||
fmt.Fprintf(os.Stderr, "\r\033[K")
|
||||
// Give VS Code a moment to finish writing its state DB
|
||||
time.Sleep(1 * time.Second)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(os.Stderr, "\r\033[K")
|
||||
}
|
||||
|
||||
const (
|
||||
minCopilotChatVersion = "0.41.0"
|
||||
minVSCodeVersion = "1.113"
|
||||
)
|
||||
|
||||
func (v *VSCode) Run(model string, args []string) error {
|
||||
v.checkVSCodeVersion()
|
||||
v.checkCopilotChatVersion()
|
||||
|
||||
// Get all configured models (saved by the launcher framework before Run is called)
|
||||
models := []string{model}
|
||||
if cfg, err := loadStoredIntegrationConfig("vscode"); err == nil && len(cfg.Models) > 0 {
|
||||
models = cfg.Models
|
||||
}
|
||||
|
||||
// VS Code discovers models from ollama ls. Cloud models that pass Show
|
||||
// (the server knows about them) but aren't in ls need to be pulled to
|
||||
// register them so VS Code can find them.
|
||||
if client, err := api.ClientFromEnvironment(); err == nil {
|
||||
v.ensureModelsRegistered(context.Background(), client, models)
|
||||
}
|
||||
|
||||
// Warn if the default model doesn't support tool calling
|
||||
if client, err := api.ClientFromEnvironment(); err == nil {
|
||||
if resp, err := client.Show(context.Background(), &api.ShowRequest{Model: models[0]}); err == nil {
|
||||
hasTools := false
|
||||
for _, c := range resp.Capabilities {
|
||||
if c == "tools" {
|
||||
hasTools = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasTools {
|
||||
fmt.Fprintf(os.Stderr, "Note: %s does not support tool calling and may not appear in the Copilot Chat model picker.\n", models[0])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
v.printModelAccessTip()
|
||||
|
||||
if v.IsRunning() {
|
||||
restart, err := ConfirmPrompt("Restart VS Code?")
|
||||
if err != nil {
|
||||
restart = false
|
||||
}
|
||||
if restart {
|
||||
v.Quit()
|
||||
if err := v.ShowInModelPicker(models); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s Warning: could not update VS Code model picker: %v%s\n", ansiYellow, err, ansiReset)
|
||||
}
|
||||
v.FocusVSCode()
|
||||
} else {
|
||||
fmt.Fprintf(os.Stderr, "\nTo get the latest model configuration, restart VS Code when you're ready.\n")
|
||||
}
|
||||
} else {
|
||||
if err := v.ShowInModelPicker(models); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s Warning: could not update VS Code model picker: %v%s\n", ansiYellow, err, ansiReset)
|
||||
}
|
||||
v.FocusVSCode()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ensureModelsRegistered pulls models that the server knows about (Show succeeds)
|
||||
// but aren't in ollama ls yet. This is needed for cloud models so that VS Code
|
||||
// can discover them from the Ollama API.
|
||||
func (v *VSCode) ensureModelsRegistered(ctx context.Context, client *api.Client, models []string) {
|
||||
listed, err := client.List(ctx)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
registered := make(map[string]bool, len(listed.Models))
|
||||
for _, m := range listed.Models {
|
||||
registered[m.Name] = true
|
||||
}
|
||||
|
||||
for _, model := range models {
|
||||
if registered[model] {
|
||||
continue
|
||||
}
|
||||
// Also check without :latest suffix
|
||||
if !strings.Contains(model, ":") && registered[model+":latest"] {
|
||||
continue
|
||||
}
|
||||
if err := pullModel(ctx, client, model, false); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s Warning: could not register model %s: %v%s\n", ansiYellow, model, err, ansiReset)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FocusVSCode brings VS Code to the foreground.
|
||||
func (v *VSCode) FocusVSCode() {
|
||||
binary := v.findBinary()
|
||||
if binary == "" {
|
||||
return
|
||||
}
|
||||
if runtime.GOOS == "darwin" && strings.HasSuffix(binary, ".app") {
|
||||
_ = exec.Command("open", "-a", binary).Run()
|
||||
} else {
|
||||
_ = exec.Command(binary).Start()
|
||||
}
|
||||
}
|
||||
|
||||
// printModelAccessTip shows instructions for finding Ollama models in VS Code.
|
||||
func (v *VSCode) printModelAccessTip() {
|
||||
fmt.Fprintf(os.Stderr, "\nTip: To use Ollama models, open Copilot Chat and click the model picker.\n")
|
||||
fmt.Fprintf(os.Stderr, " If you don't see your models, click \"Other models\" to find them.\n\n")
|
||||
}
|
||||
|
||||
func (v *VSCode) Paths() []string {
|
||||
if p := v.chatLanguageModelsPath(); fileExists(p) {
|
||||
return []string{p}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *VSCode) Edit(models []string) error {
|
||||
if len(models) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Write chatLanguageModels.json with Ollama vendor entry
|
||||
clmPath := v.chatLanguageModelsPath()
|
||||
if err := os.MkdirAll(filepath.Dir(clmPath), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var entries []map[string]any
|
||||
if data, err := os.ReadFile(clmPath); err == nil {
|
||||
_ = json.Unmarshal(data, &entries)
|
||||
}
|
||||
|
||||
// Remove any existing Ollama entries, preserve others
|
||||
filtered := make([]map[string]any, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
if vendor, _ := entry["vendor"].(string); vendor != "ollama" {
|
||||
filtered = append(filtered, entry)
|
||||
}
|
||||
}
|
||||
|
||||
// Add new Ollama entry
|
||||
filtered = append(filtered, map[string]any{
|
||||
"vendor": "ollama",
|
||||
"name": "Ollama",
|
||||
"url": envconfig.Host().String(),
|
||||
})
|
||||
|
||||
data, err := json.MarshalIndent(filtered, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := fileutil.WriteWithBackup(clmPath, data); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Clean up legacy settings from older Ollama integrations
|
||||
v.updateSettings()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *VSCode) Models() []string {
|
||||
if !v.hasOllamaVendor() {
|
||||
return nil
|
||||
}
|
||||
if cfg, err := loadStoredIntegrationConfig("vscode"); err == nil {
|
||||
return cfg.Models
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// hasOllamaVendor checks if chatLanguageModels.json contains an Ollama vendor entry.
|
||||
func (v *VSCode) hasOllamaVendor() bool {
|
||||
data, err := os.ReadFile(v.chatLanguageModelsPath())
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
var entries []map[string]any
|
||||
if err := json.Unmarshal(data, &entries); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
if vendor, _ := entry["vendor"].(string); vendor == "ollama" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (v *VSCode) chatLanguageModelsPath() string {
|
||||
return v.vscodePath("chatLanguageModels.json")
|
||||
}
|
||||
|
||||
func (v *VSCode) settingsPath() string {
|
||||
return v.vscodePath("settings.json")
|
||||
}
|
||||
|
||||
// updateSettings cleans up legacy settings from older Ollama integrations.
|
||||
func (v *VSCode) updateSettings() {
|
||||
settingsPath := v.settingsPath()
|
||||
data, err := os.ReadFile(settingsPath)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var settings map[string]any
|
||||
if err := json.Unmarshal(data, &settings); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
changed := false
|
||||
for _, key := range []string{"github.copilot.chat.byok.ollamaEndpoint", "ollama.launch.configured"} {
|
||||
if _, ok := settings[key]; ok {
|
||||
delete(settings, key)
|
||||
changed = true
|
||||
}
|
||||
}
|
||||
|
||||
if !changed {
|
||||
return
|
||||
}
|
||||
|
||||
updated, err := json.MarshalIndent(settings, "", " ")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
_ = fileutil.WriteWithBackup(settingsPath, updated)
|
||||
}
|
||||
|
||||
func (v *VSCode) statePath() string {
|
||||
return v.vscodePath("globalStorage", "state.vscdb")
|
||||
}
|
||||
|
||||
// ShowInModelPicker ensures the given models are visible in VS Code's Copilot
|
||||
// Chat model picker and sets the primary model as the active selection. It sets
|
||||
// the configured models to true in the picker preferences so they appear in the
|
||||
// dropdown, and writes the first model as the selected model for both the panel
|
||||
// and editor chat views. Models use the VS Code identifier format
|
||||
// "ollama/Ollama/<name>".
|
||||
func (v *VSCode) ShowInModelPicker(models []string) error {
|
||||
if len(models) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
dbPath := v.statePath()
|
||||
needsCreate := !fileExists(dbPath)
|
||||
if needsCreate {
|
||||
if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil {
|
||||
return fmt.Errorf("creating state directory: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
db, err := sql.Open("sqlite3", dbPath+"?_busy_timeout=5000")
|
||||
if err != nil {
|
||||
return fmt.Errorf("opening state database: %w", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
// Create the table if this is a fresh DB. Schema must match what VS Code creates.
|
||||
if needsCreate {
|
||||
if _, err := db.Exec("CREATE TABLE ItemTable (key TEXT UNIQUE ON CONFLICT REPLACE, value BLOB)"); err != nil {
|
||||
return fmt.Errorf("initializing state database: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Read existing preferences
|
||||
prefs := make(map[string]bool)
|
||||
var prefsJSON string
|
||||
if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = 'chatModelPickerPreferences'").Scan(&prefsJSON); err == nil {
|
||||
_ = json.Unmarshal([]byte(prefsJSON), &prefs)
|
||||
}
|
||||
|
||||
// Build name→ID map from VS Code's cached model list.
|
||||
// VS Code uses numeric IDs like "ollama/Ollama/4", not "ollama/Ollama/kimi-k2.5:cloud".
|
||||
nameToID := make(map[string]string)
|
||||
var cached []map[string]any
|
||||
var cacheJSON string
|
||||
if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = 'chat.cachedLanguageModels.v2'").Scan(&cacheJSON); err == nil {
|
||||
_ = json.Unmarshal([]byte(cacheJSON), &cached)
|
||||
}
|
||||
cachedNames := make(map[string]bool)
|
||||
for _, entry := range cached {
|
||||
meta, _ := entry["metadata"].(map[string]any)
|
||||
if meta == nil {
|
||||
continue
|
||||
}
|
||||
if vendor, _ := meta["vendor"].(string); vendor == "ollama" {
|
||||
name, _ := meta["name"].(string)
|
||||
id, _ := entry["identifier"].(string)
|
||||
if name != "" && id != "" {
|
||||
nameToID[name] = id
|
||||
}
|
||||
if name != "" {
|
||||
cachedNames[name] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ollama config is authoritative: always show configured models,
|
||||
// hide Ollama models that are no longer in the config.
|
||||
configuredIDs := make(map[string]bool)
|
||||
for _, m := range models {
|
||||
for _, id := range v.modelVSCodeIDs(m, nameToID) {
|
||||
prefs[id] = true
|
||||
configuredIDs[id] = true
|
||||
}
|
||||
}
|
||||
for id := range prefs {
|
||||
if strings.HasPrefix(id, "ollama/") && !configuredIDs[id] {
|
||||
prefs[id] = false
|
||||
}
|
||||
}
|
||||
|
||||
data, _ := json.Marshal(prefs)
|
||||
if _, err = db.Exec("INSERT OR REPLACE INTO ItemTable (key, value) VALUES ('chatModelPickerPreferences', ?)", string(data)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set the primary model as the active selection in Copilot Chat so it
|
||||
// doesn't default to "auto" or whatever the user last picked manually.
|
||||
primaryID := v.modelVSCodeIDs(models[0], nameToID)[0]
|
||||
for _, key := range []string{"chat.currentLanguageModel.panel", "chat.currentLanguageModel.editor"} {
|
||||
if _, err := db.Exec("INSERT OR REPLACE INTO ItemTable (key, value) VALUES (?, ?)", key, primaryID); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := db.Exec("INSERT OR REPLACE INTO ItemTable (key, value) VALUES (?, ?)", key+".isDefault", "false"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure configured models exist in the cached model list so VS Code can
|
||||
// restore the selection immediately on startup, before extensions load.
|
||||
// Without this, a model that was never previously used won't be in the
|
||||
// cache, and VS Code falls back to "auto" until the Ollama BYOK provider
|
||||
// discovers it via the API (which is slow).
|
||||
cacheChanged := false
|
||||
for _, m := range models {
|
||||
if cachedNames[m] {
|
||||
continue
|
||||
}
|
||||
if !strings.Contains(m, ":") && cachedNames[m+":latest"] {
|
||||
continue
|
||||
}
|
||||
cacheID := m
|
||||
if !strings.Contains(m, ":") {
|
||||
cacheID = m + ":latest"
|
||||
}
|
||||
cached = append(cached, map[string]any{
|
||||
"identifier": "ollama/Ollama/" + cacheID,
|
||||
"metadata": map[string]any{
|
||||
"extension": map[string]any{"value": "github.copilot-chat"},
|
||||
"name": m,
|
||||
"id": m,
|
||||
"vendor": "ollama",
|
||||
"version": "1.0.0",
|
||||
"family": m,
|
||||
"detail": "Ollama",
|
||||
"maxInputTokens": 4096,
|
||||
"maxOutputTokens": 4096,
|
||||
"isDefaultForLocation": map[string]any{},
|
||||
"isUserSelectable": true,
|
||||
"capabilities": map[string]any{"toolCalling": true},
|
||||
},
|
||||
})
|
||||
cacheChanged = true
|
||||
}
|
||||
if cacheChanged {
|
||||
cacheData, _ := json.Marshal(cached)
|
||||
if _, err := db.Exec("INSERT OR REPLACE INTO ItemTable (key, value) VALUES ('chat.cachedLanguageModels.v2', ?)", string(cacheData)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// modelVSCodeIDs returns all possible VS Code picker IDs for a model name.
|
||||
// The primary (first) ID should match the live identifier that VS Code assigns
|
||||
// at runtime via toModelIdentifier(vendor, group, m.id), where m.id comes from
|
||||
// /api/tags and always includes the tag (e.g. "llama3.2:latest").
|
||||
func (v *VSCode) modelVSCodeIDs(model string, nameToID map[string]string) []string {
|
||||
var ids []string
|
||||
if id, ok := nameToID[model]; ok {
|
||||
ids = append(ids, id)
|
||||
} else if !strings.Contains(model, ":") {
|
||||
if id, ok := nameToID[model+":latest"]; ok {
|
||||
ids = append(ids, id)
|
||||
}
|
||||
}
|
||||
// For untagged models, the live identifier includes :latest
|
||||
// (e.g. ollama/Ollama/llama3.2:latest), so prefer that format
|
||||
// to avoid a mismatch that causes VS Code to reset to "auto".
|
||||
if !strings.Contains(model, ":") {
|
||||
ids = append(ids, "ollama/Ollama/"+model+":latest")
|
||||
}
|
||||
ids = append(ids, "ollama/Ollama/"+model)
|
||||
return ids
|
||||
}
|
||||
|
||||
func (v *VSCode) vscodePath(parts ...string) string {
|
||||
home, _ := os.UserHomeDir()
|
||||
var base string
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
base = filepath.Join(home, "Library", "Application Support", "Code", "User")
|
||||
case "windows":
|
||||
base = filepath.Join(os.Getenv("APPDATA"), "Code", "User")
|
||||
default:
|
||||
base = filepath.Join(home, ".config", "Code", "User")
|
||||
}
|
||||
return filepath.Join(append([]string{base}, parts...)...)
|
||||
}
|
||||
|
||||
// checkVSCodeVersion warns if VS Code is older than minVSCodeVersion.
|
||||
func (v *VSCode) checkVSCodeVersion() {
|
||||
codeCLI := v.findCodeCLI()
|
||||
if codeCLI == "" {
|
||||
return
|
||||
}
|
||||
|
||||
out, err := exec.Command(codeCLI, "--version").Output()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// "code --version" outputs: version\ncommit\narch
|
||||
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
|
||||
if len(lines) == 0 || lines[0] == "" {
|
||||
return
|
||||
}
|
||||
version := strings.TrimSpace(lines[0])
|
||||
|
||||
if compareVersions(version, minVSCodeVersion) < 0 {
|
||||
fmt.Fprintf(os.Stderr, "\n%sWarning: VS Code version (%s) is older than the recommended version (%s)%s\n", ansiYellow, version, minVSCodeVersion, ansiReset)
|
||||
fmt.Fprintf(os.Stderr, "Please update VS Code to the latest version.\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
// checkCopilotChatVersion warns if the GitHub Copilot Chat extension is
|
||||
// missing or older than minCopilotChatVersion.
|
||||
func (v *VSCode) checkCopilotChatVersion() {
|
||||
codeCLI := v.findCodeCLI()
|
||||
if codeCLI == "" {
|
||||
return
|
||||
}
|
||||
|
||||
out, err := exec.Command(codeCLI, "--list-extensions", "--show-versions").Output()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
installed, version := parseCopilotChatVersion(string(out))
|
||||
if !installed {
|
||||
fmt.Fprintf(os.Stderr, "\n%sWarning: GitHub Copilot Chat extension is not installed%s\n", ansiYellow, ansiReset)
|
||||
fmt.Fprintf(os.Stderr, "Install it in VS Code: Extensions → search \"GitHub Copilot Chat\" → Install\n\n")
|
||||
return
|
||||
}
|
||||
if compareVersions(version, minCopilotChatVersion) < 0 {
|
||||
fmt.Fprintf(os.Stderr, "\n%sWarning: GitHub Copilot Chat extension version (%s) is older than the recommended version (%s)%s\n", ansiYellow, version, minCopilotChatVersion, ansiReset)
|
||||
fmt.Fprintf(os.Stderr, "Please update it in VS Code: Extensions → search \"GitHub Copilot Chat\" → Update\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
// findCodeCLI returns the path to the VS Code CLI for querying extensions.
|
||||
// On macOS, findBinary may return an .app bundle which can't run --list-extensions,
|
||||
// so this resolves to the actual CLI binary inside the bundle.
|
||||
func (v *VSCode) findCodeCLI() string {
|
||||
binary := v.findBinary()
|
||||
if binary == "" {
|
||||
return ""
|
||||
}
|
||||
if runtime.GOOS == "darwin" && strings.HasSuffix(binary, ".app") {
|
||||
bundleCLI := binary + "/Contents/Resources/app/bin/code"
|
||||
if _, err := os.Stat(bundleCLI); err == nil {
|
||||
return bundleCLI
|
||||
}
|
||||
return ""
|
||||
}
|
||||
return binary
|
||||
}
|
||||
|
||||
// parseCopilotChatVersion extracts the version of the GitHub Copilot Chat
|
||||
// extension from "code --list-extensions --show-versions" output.
|
||||
func parseCopilotChatVersion(output string) (installed bool, version string) {
|
||||
for _, line := range strings.Split(output, "\n") {
|
||||
// Format: github.copilot-chat@0.40.1
|
||||
if !strings.HasPrefix(strings.ToLower(line), "github.copilot-chat@") {
|
||||
continue
|
||||
}
|
||||
parts := strings.SplitN(line, "@", 2)
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
return true, strings.TrimSpace(parts[1])
|
||||
}
|
||||
return false, ""
|
||||
}
|
||||
|
||||
// compareVersions compares two dot-separated version strings.
|
||||
// Returns -1 if a < b, 0 if a == b, 1 if a > b.
|
||||
func compareVersions(a, b string) int {
|
||||
aParts := strings.Split(a, ".")
|
||||
bParts := strings.Split(b, ".")
|
||||
|
||||
maxLen := len(aParts)
|
||||
if len(bParts) > maxLen {
|
||||
maxLen = len(bParts)
|
||||
}
|
||||
|
||||
for i := range maxLen {
|
||||
var aNum, bNum int
|
||||
if i < len(aParts) {
|
||||
aNum, _ = strconv.Atoi(aParts[i])
|
||||
}
|
||||
if i < len(bParts) {
|
||||
bNum, _ = strconv.Atoi(bParts[i])
|
||||
}
|
||||
if aNum < bNum {
|
||||
return -1
|
||||
}
|
||||
if aNum > bNum {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func fileExists(path string) bool {
|
||||
_, err := os.Stat(path)
|
||||
return err == nil
|
||||
}
|
||||
656
cmd/launch/vscode_test.go
Normal file
656
cmd/launch/vscode_test.go
Normal file
@@ -0,0 +1,656 @@
|
||||
package launch
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"testing"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
func TestVSCodeIntegration(t *testing.T) {
|
||||
v := &VSCode{}
|
||||
|
||||
t.Run("String", func(t *testing.T) {
|
||||
if got := v.String(); got != "Visual Studio Code" {
|
||||
t.Errorf("String() = %q, want %q", got, "Visual Studio Code")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("implements Runner", func(t *testing.T) {
|
||||
var _ Runner = v
|
||||
})
|
||||
|
||||
t.Run("implements Editor", func(t *testing.T) {
|
||||
var _ Editor = v
|
||||
})
|
||||
}
|
||||
|
||||
func TestVSCodeEdit(t *testing.T) {
|
||||
v := &VSCode{}
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("XDG_CONFIG_HOME", "")
|
||||
clmPath := testVSCodePath(t, tmpDir, "chatLanguageModels.json")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
setup string // initial chatLanguageModels.json content, empty means no file
|
||||
models []string
|
||||
validate func(t *testing.T, data []byte)
|
||||
}{
|
||||
{
|
||||
name: "fresh install",
|
||||
models: []string{"llama3.2"},
|
||||
validate: func(t *testing.T, data []byte) {
|
||||
assertOllamaVendorConfigured(t, data)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "preserve other vendor entries",
|
||||
setup: `[{"vendor": "azure", "name": "Azure", "url": "https://example.com"}]`,
|
||||
models: []string{"llama3.2"},
|
||||
validate: func(t *testing.T, data []byte) {
|
||||
var entries []map[string]any
|
||||
json.Unmarshal(data, &entries)
|
||||
if len(entries) != 2 {
|
||||
t.Errorf("expected 2 entries, got %d", len(entries))
|
||||
}
|
||||
// Check Azure entry preserved
|
||||
found := false
|
||||
for _, e := range entries {
|
||||
if v, _ := e["vendor"].(string); v == "azure" {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Error("azure vendor entry was not preserved")
|
||||
}
|
||||
assertOllamaVendorConfigured(t, data)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "update existing ollama entry",
|
||||
setup: `[{"vendor": "ollama", "name": "Ollama", "url": "http://old:11434"}]`,
|
||||
models: []string{"llama3.2"},
|
||||
validate: func(t *testing.T, data []byte) {
|
||||
assertOllamaVendorConfigured(t, data)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "empty models is no-op",
|
||||
setup: `[{"vendor": "azure", "name": "Azure"}]`,
|
||||
models: []string{},
|
||||
validate: func(t *testing.T, data []byte) {
|
||||
if string(data) != `[{"vendor": "azure", "name": "Azure"}]` {
|
||||
t.Error("empty models should not modify file")
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "corrupted JSON treated as empty",
|
||||
setup: `{corrupted json`,
|
||||
models: []string{"llama3.2"},
|
||||
validate: func(t *testing.T, data []byte) {
|
||||
var entries []map[string]any
|
||||
if err := json.Unmarshal(data, &entries); err != nil {
|
||||
t.Errorf("result is not valid JSON: %v", err)
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
os.RemoveAll(filepath.Dir(clmPath))
|
||||
|
||||
if tt.setup != "" {
|
||||
os.MkdirAll(filepath.Dir(clmPath), 0o755)
|
||||
os.WriteFile(clmPath, []byte(tt.setup), 0o644)
|
||||
}
|
||||
|
||||
if err := v.Edit(tt.models); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
data, _ := os.ReadFile(clmPath)
|
||||
tt.validate(t, data)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestVSCodeEditCleansUpOldSettings(t *testing.T) {
|
||||
v := &VSCode{}
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("XDG_CONFIG_HOME", "")
|
||||
settingsPath := testVSCodePath(t, tmpDir, "settings.json")
|
||||
|
||||
// Create settings.json with old byok setting
|
||||
os.MkdirAll(filepath.Dir(settingsPath), 0o755)
|
||||
os.WriteFile(settingsPath, []byte(`{"github.copilot.chat.byok.ollamaEndpoint": "http://old:11434", "ollama.launch.configured": true, "editor.fontSize": 14}`), 0o644)
|
||||
|
||||
if err := v.Edit([]string{"llama3.2"}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Verify old settings were removed
|
||||
data, err := os.ReadFile(settingsPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var settings map[string]any
|
||||
json.Unmarshal(data, &settings)
|
||||
if _, ok := settings["github.copilot.chat.byok.ollamaEndpoint"]; ok {
|
||||
t.Error("github.copilot.chat.byok.ollamaEndpoint should have been removed")
|
||||
}
|
||||
if _, ok := settings["ollama.launch.configured"]; ok {
|
||||
t.Error("ollama.launch.configured should have been removed")
|
||||
}
|
||||
if settings["editor.fontSize"] != float64(14) {
|
||||
t.Error("editor.fontSize should have been preserved")
|
||||
}
|
||||
}
|
||||
|
||||
func TestVSCodePaths(t *testing.T) {
|
||||
v := &VSCode{}
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("XDG_CONFIG_HOME", "")
|
||||
clmPath := testVSCodePath(t, tmpDir, "chatLanguageModels.json")
|
||||
|
||||
t.Run("no file returns nil", func(t *testing.T) {
|
||||
os.Remove(clmPath)
|
||||
if paths := v.Paths(); paths != nil {
|
||||
t.Errorf("expected nil, got %v", paths)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("existing file returns path", func(t *testing.T) {
|
||||
os.MkdirAll(filepath.Dir(clmPath), 0o755)
|
||||
os.WriteFile(clmPath, []byte(`[]`), 0o644)
|
||||
|
||||
if paths := v.Paths(); len(paths) != 1 {
|
||||
t.Errorf("expected 1 path, got %d", len(paths))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// testVSCodePath returns the expected VS Code config path for the given file in tests.
|
||||
func testVSCodePath(t *testing.T, tmpDir, filename string) string {
|
||||
t.Helper()
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
return filepath.Join(tmpDir, "Library", "Application Support", "Code", "User", filename)
|
||||
case "windows":
|
||||
t.Setenv("APPDATA", tmpDir)
|
||||
return filepath.Join(tmpDir, "Code", "User", filename)
|
||||
default:
|
||||
return filepath.Join(tmpDir, ".config", "Code", "User", filename)
|
||||
}
|
||||
}
|
||||
|
||||
func assertOllamaVendorConfigured(t *testing.T, data []byte) {
|
||||
t.Helper()
|
||||
var entries []map[string]any
|
||||
if err := json.Unmarshal(data, &entries); err != nil {
|
||||
t.Fatalf("invalid JSON: %v", err)
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
if vendor, _ := entry["vendor"].(string); vendor == "ollama" {
|
||||
if name, _ := entry["name"].(string); name != "Ollama" {
|
||||
t.Errorf("expected name \"Ollama\", got %q", name)
|
||||
}
|
||||
if url, _ := entry["url"].(string); url == "" {
|
||||
t.Error("url not set")
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
t.Error("no ollama vendor entry found")
|
||||
}
|
||||
|
||||
func TestShowInModelPicker(t *testing.T) {
|
||||
v := &VSCode{}
|
||||
|
||||
// helper to create a state DB with optional seed data
|
||||
setupDB := func(t *testing.T, tmpDir string, seedPrefs map[string]bool, seedCache []map[string]any) string {
|
||||
t.Helper()
|
||||
dbDir := filepath.Join(tmpDir, "globalStorage")
|
||||
os.MkdirAll(dbDir, 0o755)
|
||||
dbPath := filepath.Join(dbDir, "state.vscdb")
|
||||
|
||||
db, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
if _, err := db.Exec("CREATE TABLE ItemTable (key TEXT UNIQUE ON CONFLICT REPLACE, value BLOB)"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if seedPrefs != nil {
|
||||
data, _ := json.Marshal(seedPrefs)
|
||||
db.Exec("INSERT INTO ItemTable (key, value) VALUES ('chatModelPickerPreferences', ?)", string(data))
|
||||
}
|
||||
if seedCache != nil {
|
||||
data, _ := json.Marshal(seedCache)
|
||||
db.Exec("INSERT INTO ItemTable (key, value) VALUES ('chat.cachedLanguageModels.v2', ?)", string(data))
|
||||
}
|
||||
return dbPath
|
||||
}
|
||||
|
||||
// helper to read prefs back from DB
|
||||
readPrefs := func(t *testing.T, dbPath string) map[string]bool {
|
||||
t.Helper()
|
||||
db, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
var raw string
|
||||
if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = 'chatModelPickerPreferences'").Scan(&raw); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
prefs := make(map[string]bool)
|
||||
json.Unmarshal([]byte(raw), &prefs)
|
||||
return prefs
|
||||
}
|
||||
|
||||
t.Run("fresh DB creates table and shows models", func(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("XDG_CONFIG_HOME", "")
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Setenv("APPDATA", tmpDir)
|
||||
}
|
||||
|
||||
err := v.ShowInModelPicker([]string{"llama3.2"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
dbPath := testVSCodePath(t, tmpDir, filepath.Join("globalStorage", "state.vscdb"))
|
||||
prefs := readPrefs(t, dbPath)
|
||||
if !prefs["ollama/Ollama/llama3.2"] {
|
||||
t.Error("expected llama3.2 to be shown")
|
||||
}
|
||||
if !prefs["ollama/Ollama/llama3.2:latest"] {
|
||||
t.Error("expected llama3.2:latest to be shown")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("configured models are shown", func(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("XDG_CONFIG_HOME", "")
|
||||
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, nil)
|
||||
|
||||
err := v.ShowInModelPicker([]string{"llama3.2", "qwen3:8b"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
prefs := readPrefs(t, dbPath)
|
||||
if !prefs["ollama/Ollama/llama3.2"] {
|
||||
t.Error("expected llama3.2 to be shown")
|
||||
}
|
||||
if !prefs["ollama/Ollama/qwen3:8b"] {
|
||||
t.Error("expected qwen3:8b to be shown")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("removed models are hidden", func(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("XDG_CONFIG_HOME", "")
|
||||
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), map[string]bool{
|
||||
"ollama/Ollama/llama3.2": true,
|
||||
"ollama/Ollama/llama3.2:latest": true,
|
||||
"ollama/Ollama/mistral": true,
|
||||
"ollama/Ollama/mistral:latest": true,
|
||||
}, nil)
|
||||
|
||||
// Only configure llama3.2 — mistral should get hidden
|
||||
err := v.ShowInModelPicker([]string{"llama3.2"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
prefs := readPrefs(t, dbPath)
|
||||
if !prefs["ollama/Ollama/llama3.2"] {
|
||||
t.Error("expected llama3.2 to stay shown")
|
||||
}
|
||||
if prefs["ollama/Ollama/mistral"] {
|
||||
t.Error("expected mistral to be hidden")
|
||||
}
|
||||
if prefs["ollama/Ollama/mistral:latest"] {
|
||||
t.Error("expected mistral:latest to be hidden")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("non-ollama prefs are preserved", func(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("XDG_CONFIG_HOME", "")
|
||||
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), map[string]bool{
|
||||
"copilot/gpt-4o": true,
|
||||
}, nil)
|
||||
|
||||
err := v.ShowInModelPicker([]string{"llama3.2"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
prefs := readPrefs(t, dbPath)
|
||||
if !prefs["copilot/gpt-4o"] {
|
||||
t.Error("expected copilot/gpt-4o to stay shown")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("uses cached numeric IDs when available", func(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("XDG_CONFIG_HOME", "")
|
||||
cache := []map[string]any{
|
||||
{
|
||||
"identifier": "ollama/Ollama/4",
|
||||
"metadata": map[string]any{"vendor": "ollama", "name": "llama3.2"},
|
||||
},
|
||||
}
|
||||
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, cache)
|
||||
|
||||
err := v.ShowInModelPicker([]string{"llama3.2"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
prefs := readPrefs(t, dbPath)
|
||||
if !prefs["ollama/Ollama/4"] {
|
||||
t.Error("expected numeric ID ollama/Ollama/4 to be shown")
|
||||
}
|
||||
// Name-based fallback should also be set
|
||||
if !prefs["ollama/Ollama/llama3.2"] {
|
||||
t.Error("expected name-based ID to also be shown")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("empty models is no-op", func(t *testing.T) {
|
||||
err := v.ShowInModelPicker([]string{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
|
||||
// helper to read a string value from the state DB
|
||||
readValue := func(t *testing.T, dbPath, key string) string {
|
||||
t.Helper()
|
||||
db, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer db.Close()
|
||||
var val string
|
||||
if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = ?", key).Scan(&val); err != nil {
|
||||
return ""
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
t.Run("sets primary model as active selection", func(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("XDG_CONFIG_HOME", "")
|
||||
setupDB(t, testVSCodePath(t, tmpDir, ""), nil, nil)
|
||||
|
||||
err := v.ShowInModelPicker([]string{"llama3.2", "qwen3:8b"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
dbPath := testVSCodePath(t, tmpDir, filepath.Join("globalStorage", "state.vscdb"))
|
||||
panelModel := readValue(t, dbPath, "chat.currentLanguageModel.panel")
|
||||
if panelModel != "ollama/Ollama/llama3.2:latest" {
|
||||
t.Errorf("expected panel model ollama/Ollama/llama3.2:latest, got %q", panelModel)
|
||||
}
|
||||
editorModel := readValue(t, dbPath, "chat.currentLanguageModel.editor")
|
||||
if editorModel != "ollama/Ollama/llama3.2:latest" {
|
||||
t.Errorf("expected editor model ollama/Ollama/llama3.2:latest, got %q", editorModel)
|
||||
}
|
||||
panelDefault := readValue(t, dbPath, "chat.currentLanguageModel.panel.isDefault")
|
||||
if panelDefault != "false" {
|
||||
t.Errorf("expected panel isDefault false, got %q", panelDefault)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("sets cached numeric ID as active selection", func(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("XDG_CONFIG_HOME", "")
|
||||
cache := []map[string]any{
|
||||
{
|
||||
"identifier": "ollama/Ollama/4",
|
||||
"metadata": map[string]any{"vendor": "ollama", "name": "llama3.2"},
|
||||
},
|
||||
}
|
||||
setupDB(t, testVSCodePath(t, tmpDir, ""), nil, cache)
|
||||
|
||||
err := v.ShowInModelPicker([]string{"llama3.2"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
dbPath := testVSCodePath(t, tmpDir, filepath.Join("globalStorage", "state.vscdb"))
|
||||
panelModel := readValue(t, dbPath, "chat.currentLanguageModel.panel")
|
||||
if panelModel != "ollama/Ollama/4" {
|
||||
t.Errorf("expected panel model to use cached numeric ID ollama/Ollama/4, got %q", panelModel)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("previously hidden model is re-shown when configured", func(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("XDG_CONFIG_HOME", "")
|
||||
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), map[string]bool{
|
||||
"ollama/Ollama/llama3.2": false,
|
||||
"ollama/Ollama/llama3.2:latest": false,
|
||||
}, nil)
|
||||
|
||||
// Ollama config is authoritative — should override the hidden state
|
||||
err := v.ShowInModelPicker([]string{"llama3.2"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
prefs := readPrefs(t, dbPath)
|
||||
if !prefs["ollama/Ollama/llama3.2"] {
|
||||
t.Error("expected llama3.2 to be re-shown")
|
||||
}
|
||||
})
|
||||
|
||||
// helper to read and parse the cached models from the state DB
|
||||
readCache := func(t *testing.T, dbPath string) []map[string]any {
|
||||
t.Helper()
|
||||
db, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer db.Close()
|
||||
var raw string
|
||||
if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = 'chat.cachedLanguageModels.v2'").Scan(&raw); err != nil {
|
||||
return nil
|
||||
}
|
||||
var result []map[string]any
|
||||
_ = json.Unmarshal([]byte(raw), &result)
|
||||
return result
|
||||
}
|
||||
|
||||
t.Run("adds uncached model to cache for instant startup display", func(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("XDG_CONFIG_HOME", "")
|
||||
// No seed cache — model has never been used in VS Code before
|
||||
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, nil)
|
||||
|
||||
err := v.ShowInModelPicker([]string{"qwen3:8b"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
cache := readCache(t, dbPath)
|
||||
if len(cache) != 1 {
|
||||
t.Fatalf("expected 1 cached entry, got %d", len(cache))
|
||||
}
|
||||
entry := cache[0]
|
||||
if id, _ := entry["identifier"].(string); id != "ollama/Ollama/qwen3:8b" {
|
||||
t.Errorf("expected identifier ollama/Ollama/qwen3:8b, got %q", id)
|
||||
}
|
||||
meta, _ := entry["metadata"].(map[string]any)
|
||||
if meta == nil {
|
||||
t.Fatal("expected metadata in cache entry")
|
||||
}
|
||||
if v, _ := meta["vendor"].(string); v != "ollama" {
|
||||
t.Errorf("expected vendor ollama, got %q", v)
|
||||
}
|
||||
if sel, ok := meta["isUserSelectable"].(bool); !ok || !sel {
|
||||
t.Error("expected isUserSelectable to be true")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("does not duplicate already-cached model", func(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("XDG_CONFIG_HOME", "")
|
||||
cache := []map[string]any{
|
||||
{
|
||||
"identifier": "ollama/Ollama/4",
|
||||
"metadata": map[string]any{"vendor": "ollama", "name": "llama3.2"},
|
||||
},
|
||||
{
|
||||
"identifier": "copilot/copilot/auto",
|
||||
"metadata": map[string]any{"vendor": "copilot", "name": "Auto"},
|
||||
},
|
||||
}
|
||||
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, cache)
|
||||
|
||||
err := v.ShowInModelPicker([]string{"llama3.2"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Cache should still have exactly 2 entries (no duplicate added)
|
||||
result := readCache(t, dbPath)
|
||||
if len(result) != 2 {
|
||||
t.Errorf("expected 2 cached entries (no duplicate), got %d", len(result))
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("adds only missing models to existing cache", func(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("XDG_CONFIG_HOME", "")
|
||||
cache := []map[string]any{
|
||||
{
|
||||
"identifier": "ollama/Ollama/4",
|
||||
"metadata": map[string]any{"vendor": "ollama", "name": "llama3.2"},
|
||||
},
|
||||
}
|
||||
dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, cache)
|
||||
|
||||
// llama3.2 is cached, qwen3:8b is not
|
||||
err := v.ShowInModelPicker([]string{"llama3.2", "qwen3:8b"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
result := readCache(t, dbPath)
|
||||
if len(result) != 2 {
|
||||
t.Fatalf("expected 2 cached entries, got %d", len(result))
|
||||
}
|
||||
// Second entry should be the newly added qwen3:8b
|
||||
if id, _ := result[1]["identifier"].(string); id != "ollama/Ollama/qwen3:8b" {
|
||||
t.Errorf("expected new entry ollama/Ollama/qwen3:8b, got %q", id)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestParseCopilotChatVersion(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
output string
|
||||
wantInstalled bool
|
||||
wantVersion string
|
||||
}{
|
||||
{
|
||||
name: "found among other extensions",
|
||||
output: "ms-python.python@2024.1.1\ngithub.copilot-chat@0.40.1\ngithub.copilot@1.200.0\n",
|
||||
wantInstalled: true,
|
||||
wantVersion: "0.40.1",
|
||||
},
|
||||
{
|
||||
name: "only extension",
|
||||
output: "GitHub.copilot-chat@0.41.0\n",
|
||||
wantInstalled: true,
|
||||
wantVersion: "0.41.0",
|
||||
},
|
||||
{
|
||||
name: "not installed",
|
||||
output: "ms-python.python@2024.1.1\ngithub.copilot@1.200.0\n",
|
||||
wantInstalled: false,
|
||||
},
|
||||
{
|
||||
name: "empty output",
|
||||
output: "",
|
||||
wantInstalled: false,
|
||||
},
|
||||
{
|
||||
name: "case insensitive match",
|
||||
output: "GitHub.Copilot-Chat@0.39.0\n",
|
||||
wantInstalled: true,
|
||||
wantVersion: "0.39.0",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
installed, version := parseCopilotChatVersion(tt.output)
|
||||
if installed != tt.wantInstalled {
|
||||
t.Errorf("installed = %v, want %v", installed, tt.wantInstalled)
|
||||
}
|
||||
if installed && version != tt.wantVersion {
|
||||
t.Errorf("version = %q, want %q", version, tt.wantVersion)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompareVersions(t *testing.T) {
|
||||
tests := []struct {
|
||||
a, b string
|
||||
want int
|
||||
}{
|
||||
{"0.40.1", "0.40.1", 0},
|
||||
{"0.40.2", "0.40.1", 1},
|
||||
{"0.40.0", "0.40.1", -1},
|
||||
{"0.41.0", "0.40.1", 1},
|
||||
{"0.39.9", "0.40.1", -1},
|
||||
{"1.0.0", "0.40.1", 1},
|
||||
{"0.40", "0.40.1", -1},
|
||||
{"0.40.1.1", "0.40.1", 1},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.a+"_vs_"+tt.b, func(t *testing.T) {
|
||||
got := compareVersions(tt.a, tt.b)
|
||||
if got != tt.want {
|
||||
t.Errorf("compareVersions(%q, %q) = %d, want %d", tt.a, tt.b, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -242,6 +242,10 @@ func (m selectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
m.cancelled = true
|
||||
return m, tea.Quit
|
||||
|
||||
case tea.KeyLeft:
|
||||
m.cancelled = true
|
||||
return m, tea.Quit
|
||||
|
||||
case tea.KeyEnter:
|
||||
filtered := m.filteredItems()
|
||||
if len(filtered) > 0 && m.cursor < len(filtered) {
|
||||
@@ -354,7 +358,7 @@ func (m selectorModel) renderContent() string {
|
||||
}
|
||||
|
||||
s.WriteString("\n")
|
||||
help := "↑/↓ navigate • enter select • esc cancel"
|
||||
help := "↑/↓ navigate • enter select • ← back"
|
||||
if m.helpText != "" {
|
||||
help = m.helpText
|
||||
}
|
||||
@@ -608,6 +612,10 @@ func (m multiSelectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
m.cancelled = true
|
||||
return m, tea.Quit
|
||||
|
||||
case tea.KeyLeft:
|
||||
m.cancelled = true
|
||||
return m, tea.Quit
|
||||
|
||||
case tea.KeyTab:
|
||||
m.multi = !m.multi
|
||||
|
||||
@@ -810,7 +818,7 @@ func (m multiSelectorModel) View() string {
|
||||
s.WriteString("\n")
|
||||
|
||||
if !m.multi {
|
||||
s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • enter select • tab add multiple • esc cancel"))
|
||||
s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • enter select • tab add multiple • ← back"))
|
||||
} else {
|
||||
count := m.selectedCount()
|
||||
if count == 0 {
|
||||
@@ -819,7 +827,7 @@ func (m multiSelectorModel) View() string {
|
||||
s.WriteString(selectorDescStyle.Render(fmt.Sprintf(" %d selected - press enter to continue", count)))
|
||||
}
|
||||
s.WriteString("\n\n")
|
||||
s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • space toggle • tab select single • enter confirm • esc cancel"))
|
||||
s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • space toggle • tab select single • enter confirm • ← back"))
|
||||
}
|
||||
|
||||
result := s.String()
|
||||
|
||||
@@ -782,6 +782,9 @@ func TestMulti_MultiModeHelpText(t *testing.T) {
|
||||
if !strings.Contains(content, "tab select single") {
|
||||
t.Error("multi mode should show 'tab select single' in help")
|
||||
}
|
||||
if !strings.Contains(content, "← back") {
|
||||
t.Error("multi mode should show '← back' in help")
|
||||
}
|
||||
}
|
||||
|
||||
// --- preChecked initialization order ---
|
||||
@@ -868,6 +871,46 @@ func TestMulti_UncheckingTopDefaultFallsBackToNearestCheckedBelow(t *testing.T)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Left arrow back navigation ---
|
||||
|
||||
func TestSelectorLeftArrowCancelsWhenNoFilter(t *testing.T) {
|
||||
m := selectorModelWithCurrent("Pick:", items("a", "b", "c"), "")
|
||||
updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
|
||||
got := updated.(selectorModel)
|
||||
if !got.cancelled {
|
||||
t.Error("left arrow with empty filter should cancel (go back)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelectorLeftArrowCancelsWhenFiltering(t *testing.T) {
|
||||
m := selectorModelWithCurrent("Pick:", items("a", "b", "c"), "")
|
||||
m.filter = "a"
|
||||
updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
|
||||
got := updated.(selectorModel)
|
||||
if !got.cancelled {
|
||||
t.Error("left arrow with active filter should still cancel (go back)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultiSelectorLeftArrowCancelsWhenNoFilter(t *testing.T) {
|
||||
m := newMultiSelectorModel("Pick:", items("a", "b", "c"), nil)
|
||||
updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
|
||||
got := updated.(multiSelectorModel)
|
||||
if !got.cancelled {
|
||||
t.Error("left arrow with empty filter should cancel (go back)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultiSelectorLeftArrowCancelsWhenFiltering(t *testing.T) {
|
||||
m := newMultiSelectorModel("Pick:", items("a", "b", "c"), nil)
|
||||
m.filter = "a"
|
||||
updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
|
||||
got := updated.(multiSelectorModel)
|
||||
if !got.cancelled {
|
||||
t.Error("left arrow with active filter should still cancel (go back)")
|
||||
}
|
||||
}
|
||||
|
||||
// Key message helpers for testing
|
||||
|
||||
type keyType = int
|
||||
|
||||
@@ -60,6 +60,9 @@ var mainMenuItems = []menuItem{
|
||||
{
|
||||
integration: "openclaw",
|
||||
},
|
||||
{
|
||||
integration: "vscode",
|
||||
},
|
||||
}
|
||||
|
||||
var othersMenuItem = menuItem{
|
||||
@@ -139,6 +142,7 @@ func otherIntegrationItems(state *launch.LauncherState) []menuItem {
|
||||
"claude": true,
|
||||
"codex": true,
|
||||
"openclaw": true,
|
||||
"vscode": true,
|
||||
}
|
||||
|
||||
var items []menuItem
|
||||
|
||||
@@ -160,6 +160,12 @@
|
||||
"group": "More information",
|
||||
"pages": [
|
||||
"/cli",
|
||||
{
|
||||
"group": "Assistant Sandboxing",
|
||||
"pages": [
|
||||
"/integrations/nemoclaw"
|
||||
]
|
||||
},
|
||||
"/modelfile",
|
||||
"/context-length",
|
||||
"/linux",
|
||||
|
||||
@@ -96,6 +96,18 @@ The `/loop` command runs a prompt or slash command on a recurring schedule insid
|
||||
/loop 1h Remind me to review the deploy status
|
||||
```
|
||||
|
||||
## Telegram
|
||||
|
||||
Chat with Claude Code from Telegram by connecting a bot to your session. Install the [Telegram plugin](https://github.com/anthropics/claude-plugins-official), create a bot via [@BotFather](https://t.me/BotFather), then launch with the channel flag:
|
||||
|
||||
```shell
|
||||
ollama launch claude -- --channels plugin:telegram@claude-plugins-official
|
||||
```
|
||||
|
||||
Claude Code will prompt for permission on most actions. To allow the bot to work autonomously, configure [permission rules](https://code.claude.com/docs/en/permissions) or pass `--dangerously-skip-permissions` in isolated environments.
|
||||
|
||||
See the [plugin README](https://github.com/anthropics/claude-plugins-official/tree/main/external_plugins/telegram) for full setup instructions including pairing and access control.
|
||||
|
||||
## Manual setup
|
||||
|
||||
Claude Code connects to Ollama using the Anthropic-compatible API.
|
||||
|
||||
67
docs/integrations/nemoclaw.mdx
Normal file
67
docs/integrations/nemoclaw.mdx
Normal file
@@ -0,0 +1,67 @@
|
||||
---
|
||||
title: NemoClaw
|
||||
---
|
||||
|
||||
NemoClaw is NVIDIA's open source security stack for [OpenClaw](/integrations/openclaw). It wraps OpenClaw with the NVIDIA OpenShell runtime to provide kernel-level sandboxing, network policy controls, and audit trails for AI agents.
|
||||
|
||||
## Quick start
|
||||
|
||||
Pull a model:
|
||||
|
||||
```bash
|
||||
ollama pull nemotron-3-nano:30b
|
||||
```
|
||||
|
||||
Run the installer:
|
||||
|
||||
```bash
|
||||
curl -fsSL https://www.nvidia.com/nemoclaw.sh | \
|
||||
NEMOCLAW_NON_INTERACTIVE=1 \
|
||||
NEMOCLAW_PROVIDER=ollama \
|
||||
NEMOCLAW_MODEL=nemotron-3-nano:30b \
|
||||
bash
|
||||
```
|
||||
|
||||
Connect to your sandbox:
|
||||
|
||||
```bash
|
||||
nemoclaw my-assistant connect
|
||||
```
|
||||
|
||||
Open the TUI:
|
||||
|
||||
```bash
|
||||
openclaw tui
|
||||
```
|
||||
|
||||
<Note>Ollama support in NemoClaw is still experimental.</Note>
|
||||
|
||||
## Platform support
|
||||
|
||||
| Platform | Runtime | Status |
|
||||
|----------|---------|--------|
|
||||
| Linux (Ubuntu 22.04+) | Docker | Primary |
|
||||
| macOS (Apple Silicon) | Colima or Docker Desktop | Supported |
|
||||
| Windows | WSL2 with Docker Desktop | Supported |
|
||||
|
||||
CMD and PowerShell are not supported on Windows — WSL2 is required.
|
||||
|
||||
<Note>Ollama must be installed and running before the installer runs. When running inside WSL2 or a container, ensure Ollama is reachable from the sandbox (e.g. `OLLAMA_HOST=0.0.0.0`).</Note>
|
||||
|
||||
## System requirements
|
||||
|
||||
- CPU: 4 vCPU minimum
|
||||
- RAM: 8 GB minimum (16 GB recommended)
|
||||
- Disk: 20 GB free (40 GB recommended for local models)
|
||||
- Node.js 20+ and npm 10+
|
||||
- Container runtime (Docker preferred)
|
||||
|
||||
## Recommended models
|
||||
|
||||
- `nemotron-3-super:cloud` — Strong reasoning and coding
|
||||
- `qwen3.5:cloud` — 397B; reasoning and code generation
|
||||
- `nemotron-3-nano:30b` — Recommended local model; fits in 24 GB VRAM
|
||||
- `qwen3.5:27b` — Fast local reasoning (~18 GB VRAM)
|
||||
- `glm-4.7-flash` — Reasoning and code generation (~25 GB VRAM)
|
||||
|
||||
More models at [ollama.com/search](https://ollama.com/search).
|
||||
@@ -214,6 +214,8 @@ func LogLevel() slog.Level {
|
||||
var (
|
||||
// FlashAttention enables the experimental flash attention feature.
|
||||
FlashAttention = BoolWithDefault("OLLAMA_FLASH_ATTENTION")
|
||||
// DebugLogRequests logs inference requests to disk for replay/debugging.
|
||||
DebugLogRequests = Bool("OLLAMA_DEBUG_LOG_REQUESTS")
|
||||
// KvCacheType is the quantization type for the K/V cache.
|
||||
KvCacheType = String("OLLAMA_KV_CACHE_TYPE")
|
||||
// NoHistory disables readline history.
|
||||
@@ -302,28 +304,29 @@ type EnvVar struct {
|
||||
|
||||
func AsMap() map[string]EnvVar {
|
||||
ret := map[string]EnvVar{
|
||||
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", LogLevel(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
||||
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(false), "Enabled flash attention"},
|
||||
"OLLAMA_KV_CACHE_TYPE": {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"},
|
||||
"OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
|
||||
"OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
|
||||
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
|
||||
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
|
||||
"OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
|
||||
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
|
||||
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
|
||||
"OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
|
||||
"OLLAMA_NO_CLOUD": {"OLLAMA_NO_CLOUD", NoCloud(), "Disable Ollama cloud features (remote inference and web search)"},
|
||||
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
|
||||
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
|
||||
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
|
||||
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
|
||||
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
|
||||
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
|
||||
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4k/32k/256k based on VRAM)"},
|
||||
"OLLAMA_EDITOR": {"OLLAMA_EDITOR", Editor(), "Path to editor for interactive prompt editing (Ctrl+G)"},
|
||||
"OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
|
||||
"OLLAMA_REMOTES": {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},
|
||||
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", LogLevel(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
||||
"OLLAMA_DEBUG_LOG_REQUESTS": {"OLLAMA_DEBUG_LOG_REQUESTS", DebugLogRequests(), "Log inference request bodies and replay curl commands to a temp directory"},
|
||||
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(false), "Enabled flash attention"},
|
||||
"OLLAMA_KV_CACHE_TYPE": {"OLLAMA_KV_CACHE_TYPE", KvCacheType(), "Quantization type for the K/V cache (default: f16)"},
|
||||
"OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
|
||||
"OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
|
||||
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
|
||||
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
|
||||
"OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
|
||||
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
|
||||
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
|
||||
"OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
|
||||
"OLLAMA_NO_CLOUD": {"OLLAMA_NO_CLOUD", NoCloud(), "Disable Ollama cloud features (remote inference and web search)"},
|
||||
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
|
||||
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
|
||||
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
|
||||
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
|
||||
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
|
||||
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
|
||||
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4k/32k/256k based on VRAM)"},
|
||||
"OLLAMA_EDITOR": {"OLLAMA_EDITOR", Editor(), "Path to editor for interactive prompt editing (Ctrl+G)"},
|
||||
"OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
|
||||
"OLLAMA_REMOTES": {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},
|
||||
|
||||
// Informational
|
||||
"HTTP_PROXY": {"HTTP_PROXY", String("HTTP_PROXY")(), "HTTP proxy"},
|
||||
|
||||
@@ -874,7 +874,7 @@ func (f GGML) SupportsFlashAttention() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
if slices.Contains([]string{"gemma2"}, arch) {
|
||||
if slices.Contains([]string{"gemma2", "grok"}, arch) {
|
||||
return false
|
||||
}
|
||||
|
||||
|
||||
@@ -14,4 +14,15 @@ The integration tests have 2 modes of operating.
|
||||
> Before running the tests locally without the "test existing" setting, compile ollama from the top of the source tree `go build .` in addition to GPU support with cmake if applicable on your platform. The integration tests expect to find an ollama binary at the top of the tree.
|
||||
|
||||
|
||||
Many tests use a default small model suitable to run on many systems. You can override this default model by setting `OLLAMA_TEST_DEFAULT_MODEL`
|
||||
## Testing a New Model
|
||||
|
||||
When implementing new model architecture, use `OLLAMA_TEST_MODEL` to run the
|
||||
integration suite against your model.
|
||||
|
||||
```bash
|
||||
# Build the binary first
|
||||
go build .
|
||||
|
||||
# Run integration tests against it
|
||||
OLLAMA_TEST_MODEL=mymodel go test -tags integration -v -count 1 -timeout 15m ./integration/
|
||||
```
|
||||
|
||||
@@ -48,9 +48,7 @@ func TestAPIGenerate(t *testing.T) {
|
||||
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatalf("pull failed %s", err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, req.Model)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -151,7 +149,11 @@ func TestAPIGenerate(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
// Validate PS while we're at it...
|
||||
// Validate PS while we're at it — skip for local-only models
|
||||
// which may lack metadata fields like family, parameter_size, etc.
|
||||
if testModel != "" {
|
||||
return
|
||||
}
|
||||
resp, err := client.ListRunning(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("list models API error: %s", err)
|
||||
@@ -208,9 +210,7 @@ func TestAPIChat(t *testing.T) {
|
||||
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatalf("pull failed %s", err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, req.Model)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -311,6 +311,9 @@ func TestAPIChat(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAPIListModels(t *testing.T) {
|
||||
if testModel != "" {
|
||||
t.Skip("skipping metadata test with model override")
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
@@ -361,6 +364,9 @@ func verifyModelDetails(t *testing.T, details api.ModelDetails) {
|
||||
}
|
||||
|
||||
func TestAPIShowModel(t *testing.T) {
|
||||
if testModel != "" {
|
||||
t.Skip("skipping metadata test with model override")
|
||||
}
|
||||
modelName := "llama3.2"
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
|
||||
defer cancel()
|
||||
@@ -400,6 +406,10 @@ func TestAPIShowModel(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAPIGenerateLogprobs(t *testing.T) {
|
||||
if testModel != "" {
|
||||
// Logprobs requires runner support (e.g. llama.cpp has it, MLX does not).
|
||||
t.Skip("logprobs not supported by all runners")
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
@@ -513,6 +523,10 @@ func TestAPIGenerateLogprobs(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAPIChatLogprobs(t *testing.T) {
|
||||
if testModel != "" {
|
||||
// Logprobs requires runner support (e.g. llama.cpp has it, MLX does not).
|
||||
t.Skip("logprobs not supported by all runners")
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
|
||||
@@ -35,6 +35,9 @@ func TestBlueSky(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestUnicode(t *testing.T) {
|
||||
if testModel != "" {
|
||||
t.Skip("uses hardcoded model, not applicable with model override")
|
||||
}
|
||||
skipUnderMinVRAM(t, 6)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
|
||||
defer cancel()
|
||||
@@ -59,9 +62,7 @@ func TestUnicode(t *testing.T) {
|
||||
}
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, req.Model)
|
||||
slog.Info("loading", "model", req.Model)
|
||||
err := client.Generate(ctx, &api.GenerateRequest{Model: req.Model}, func(response api.GenerateResponse) error { return nil })
|
||||
if err != nil {
|
||||
@@ -81,6 +82,9 @@ func TestUnicode(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestExtendedUnicodeOutput(t *testing.T) {
|
||||
if testModel != "" {
|
||||
t.Skip("uses hardcoded model, not applicable with model override")
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
// Set up the test data
|
||||
@@ -100,9 +104,7 @@ func TestExtendedUnicodeOutput(t *testing.T) {
|
||||
}
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, req.Model)
|
||||
DoChat(ctx, t, client, req, []string{"😀", "😊", "😁", "😂", "😄", "😃"}, 120*time.Second, 120*time.Second)
|
||||
}
|
||||
|
||||
@@ -148,15 +150,16 @@ func TestUnicodeModelDir(t *testing.T) {
|
||||
// TestNumPredict verifies that when num_predict is set, the model generates
|
||||
// exactly that many tokens. It uses logprobs to count the actual tokens output.
|
||||
func TestNumPredict(t *testing.T) {
|
||||
if testModel != "" {
|
||||
t.Skip("uses hardcoded model, not applicable with model override")
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
if err := PullIfMissing(ctx, client, "qwen3:0.6b"); err != nil {
|
||||
t.Fatalf("failed to pull model: %v", err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, "qwen3:0.6b")
|
||||
|
||||
req := api.GenerateRequest{
|
||||
Model: "qwen3:0.6b",
|
||||
|
||||
@@ -67,6 +67,9 @@ func TestConcurrentChat(t *testing.T) {
|
||||
// Stress the scheduler and attempt to load more models than will fit to cause thrashing
|
||||
// This test will always load at least 2 models even on CPU based systems
|
||||
func TestMultiModelStress(t *testing.T) {
|
||||
if testModel != "" {
|
||||
t.Skip("uses hardcoded models, not applicable with model override")
|
||||
}
|
||||
s := os.Getenv("OLLAMA_MAX_VRAM")
|
||||
if s == "" {
|
||||
s = "0"
|
||||
@@ -114,9 +117,7 @@ func TestMultiModelStress(t *testing.T) {
|
||||
|
||||
// Make sure all the models are pulled before we get started
|
||||
for _, model := range chosenModels {
|
||||
if err := PullIfMissing(ctx, client, model); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, model)
|
||||
}
|
||||
|
||||
// Determine how many models we can load in parallel before we exceed VRAM
|
||||
|
||||
@@ -38,9 +38,7 @@ func TestLongInputContext(t *testing.T) {
|
||||
}
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatalf("PullIfMissing failed: %v", err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, req.Model)
|
||||
DoChat(ctx, t, client, req, []string{"russia", "german", "france", "england", "austria", "prussia", "europe", "individuals", "coalition", "conflict"}, 120*time.Second, 10*time.Second)
|
||||
}
|
||||
|
||||
@@ -70,14 +68,15 @@ func TestContextExhaustion(t *testing.T) {
|
||||
}
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatalf("PullIfMissing failed: %v", err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, req.Model)
|
||||
DoChat(ctx, t, client, req, []string{"once", "upon", "lived", "sunny", "cloudy", "clear", "water", "time", "travel", "world"}, 120*time.Second, 10*time.Second)
|
||||
}
|
||||
|
||||
// Send multiple generate requests with prior context and ensure the response is coherant and expected
|
||||
func TestParallelGenerateWithHistory(t *testing.T) {
|
||||
if testModel != "" {
|
||||
t.Skip("uses hardcoded model, not applicable with model override")
|
||||
}
|
||||
modelName := "gpt-oss:20b"
|
||||
req, resp := GenerateRequests()
|
||||
numParallel := 2
|
||||
@@ -133,6 +132,12 @@ func TestParallelGenerateWithHistory(t *testing.T) {
|
||||
|
||||
// Send generate requests with prior context and ensure the response is coherant and expected
|
||||
func TestGenerateWithHistory(t *testing.T) {
|
||||
if testModel != "" {
|
||||
// The Generate API's Context field (token array continuation) is not
|
||||
// supported by all runners (e.g. MLX). Chat history works; this is
|
||||
// the only generate-specific continuation path.
|
||||
t.Skip("generate context continuation not supported by all runners")
|
||||
}
|
||||
req := api.GenerateRequest{
|
||||
Model: smol,
|
||||
Prompt: rainbowPrompt,
|
||||
@@ -173,6 +178,9 @@ func TestGenerateWithHistory(t *testing.T) {
|
||||
|
||||
// Send multiple chat requests with prior context and ensure the response is coherant and expected
|
||||
func TestParallelChatWithHistory(t *testing.T) {
|
||||
if testModel != "" {
|
||||
t.Skip("uses hardcoded model, not applicable with model override")
|
||||
}
|
||||
modelName := "gpt-oss:20b"
|
||||
req, resp := ChatRequests()
|
||||
numParallel := 2
|
||||
|
||||
@@ -78,8 +78,11 @@ func TestEmbedCosineDistanceCorrelation(t *testing.T) {
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
for _, model := range libraryEmbedModels {
|
||||
for _, model := range testModels(libraryEmbedModels) {
|
||||
t.Run(model, func(t *testing.T) {
|
||||
if testModel != "" {
|
||||
requireCapability(ctx, t, client, model, "embedding")
|
||||
}
|
||||
testCases := []struct {
|
||||
a string
|
||||
b string
|
||||
@@ -145,6 +148,9 @@ func TestEmbedCosineDistanceCorrelation(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAllMiniLMEmbeddings(t *testing.T) {
|
||||
if testModel != "" {
|
||||
t.Skip("uses hardcoded model, not applicable with model override")
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
@@ -175,6 +181,9 @@ func TestAllMiniLMEmbeddings(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAllMiniLMEmbed(t *testing.T) {
|
||||
if testModel != "" {
|
||||
t.Skip("uses hardcoded model, not applicable with model override")
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
@@ -212,6 +221,9 @@ func TestAllMiniLMEmbed(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAllMiniLMBatchEmbed(t *testing.T) {
|
||||
if testModel != "" {
|
||||
t.Skip("uses hardcoded model, not applicable with model override")
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
@@ -259,6 +271,9 @@ func TestAllMiniLMBatchEmbed(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAllMiniLMEmbedTruncate(t *testing.T) {
|
||||
if testModel != "" {
|
||||
t.Skip("uses hardcoded model, not applicable with model override")
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
@@ -397,21 +412,13 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
|
||||
|
||||
func embeddingTestHelper(ctx context.Context, client *api.Client, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) {
|
||||
t.Helper()
|
||||
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
pullOrSkip(ctx, t, client, req.Model)
|
||||
return client.Embeddings(ctx, &req)
|
||||
}
|
||||
|
||||
func embedTestHelper(ctx context.Context, client *api.Client, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) {
|
||||
t.Helper()
|
||||
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
pullOrSkip(ctx, t, client, req.Model)
|
||||
return client.Embed(ctx, &req)
|
||||
}
|
||||
|
||||
@@ -426,9 +433,12 @@ func TestEmbedTruncation(t *testing.T) {
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
for _, model := range libraryEmbedModels {
|
||||
for _, model := range testModels(libraryEmbedModels) {
|
||||
model := model
|
||||
t.Run(model, func(t *testing.T) {
|
||||
if testModel != "" {
|
||||
requireCapability(ctx, t, client, model, "embedding")
|
||||
}
|
||||
// Check if we're running out of time (reserve 20s for current model)
|
||||
if deadline, ok := t.Deadline(); ok && time.Until(deadline) < 20*time.Second {
|
||||
t.Skip("skipping remaining tests to avoid timeout")
|
||||
@@ -494,9 +504,12 @@ func TestEmbedLargeInput(t *testing.T) {
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
for _, model := range libraryEmbedModels {
|
||||
for _, model := range testModels(libraryEmbedModels) {
|
||||
model := model
|
||||
t.Run(model, func(t *testing.T) {
|
||||
if testModel != "" {
|
||||
requireCapability(ctx, t, client, model, "embedding")
|
||||
}
|
||||
mctx, mcancel := context.WithTimeout(ctx, 2*time.Minute)
|
||||
defer mcancel()
|
||||
|
||||
@@ -559,9 +572,12 @@ func TestEmbedStatusCode(t *testing.T) {
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
for _, model := range libraryEmbedModels {
|
||||
for _, model := range testModels(libraryEmbedModels) {
|
||||
model := model
|
||||
t.Run(model, func(t *testing.T) {
|
||||
if testModel != "" {
|
||||
requireCapability(ctx, t, client, model, "embedding")
|
||||
}
|
||||
// Check if we're running out of time (reserve 20s for current model)
|
||||
if deadline, ok := t.Deadline(); ok && time.Until(deadline) < 20*time.Second {
|
||||
t.Skip("skipping remaining tests to avoid timeout")
|
||||
@@ -571,9 +587,7 @@ func TestEmbedStatusCode(t *testing.T) {
|
||||
defer mcancel()
|
||||
|
||||
// Pull the model if needed
|
||||
if err := PullIfMissing(mctx, client, model); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
pullOrSkip(mctx, t, client, model)
|
||||
|
||||
t.Run("truncation error status code", func(t *testing.T) {
|
||||
truncFalse := false
|
||||
|
||||
@@ -14,6 +14,9 @@ import (
|
||||
)
|
||||
|
||||
func TestImageGeneration(t *testing.T) {
|
||||
if testModel != "" {
|
||||
t.Skip("uses hardcoded models, not applicable with model override")
|
||||
}
|
||||
skipUnderMinVRAM(t, 8)
|
||||
|
||||
type testCase struct {
|
||||
@@ -41,12 +44,8 @@ func TestImageGeneration(t *testing.T) {
|
||||
defer cleanup()
|
||||
|
||||
// Pull both models
|
||||
if err := PullIfMissing(ctx, client, tc.imageGenModel); err != nil {
|
||||
t.Fatalf("failed to pull image gen model: %v", err)
|
||||
}
|
||||
if err := PullIfMissing(ctx, client, tc.visionModel); err != nil {
|
||||
t.Fatalf("failed to pull vision model: %v", err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, tc.imageGenModel)
|
||||
pullOrSkip(ctx, t, client, tc.visionModel)
|
||||
|
||||
// Generate the image
|
||||
t.Logf("Generating image with prompt: %s", tc.prompt)
|
||||
|
||||
@@ -24,15 +24,12 @@ func TestLibraryModelsChat(t *testing.T) {
|
||||
defer cleanup()
|
||||
targetArch := os.Getenv("OLLAMA_TEST_ARCHITECTURE")
|
||||
|
||||
chatModels := libraryChatModels
|
||||
for _, model := range chatModels {
|
||||
for _, model := range testModels(libraryChatModels) {
|
||||
t.Run(model, func(t *testing.T) {
|
||||
if time.Now().Sub(started) > softTimeout {
|
||||
t.Skip("skipping remaining tests to avoid excessive runtime")
|
||||
}
|
||||
if err := PullIfMissing(ctx, client, model); err != nil {
|
||||
t.Fatalf("pull failed %s", err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, model)
|
||||
if targetArch != "" {
|
||||
resp, err := client.Show(ctx, &api.ShowRequest{Name: model})
|
||||
if err != nil {
|
||||
|
||||
@@ -13,39 +13,35 @@ import (
|
||||
|
||||
func TestVisionModels(t *testing.T) {
|
||||
skipUnderMinVRAM(t, 6)
|
||||
type testCase struct {
|
||||
model string
|
||||
}
|
||||
testCases := []testCase{
|
||||
{
|
||||
model: "qwen2.5vl",
|
||||
},
|
||||
{
|
||||
model: "llama3.2-vision",
|
||||
},
|
||||
{
|
||||
model: "gemma3",
|
||||
},
|
||||
{
|
||||
model: "qwen3-vl:8b",
|
||||
},
|
||||
{
|
||||
// Qwen 3 VL mixture of experts
|
||||
model: "qwen3-vl:30b",
|
||||
},
|
||||
{
|
||||
model: "ministral-3",
|
||||
},
|
||||
|
||||
defaultVisionModels := []string{
|
||||
"qwen2.5vl",
|
||||
"llama3.2-vision",
|
||||
"gemma3",
|
||||
"qwen3-vl:8b",
|
||||
"qwen3-vl:30b",
|
||||
"ministral-3",
|
||||
}
|
||||
|
||||
for _, v := range testCases {
|
||||
t.Run(v.model, func(t *testing.T) {
|
||||
for _, model := range testModels(defaultVisionModels) {
|
||||
t.Run(model, func(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
if testModel != "" {
|
||||
requireCapability(ctx, t, client, model, "vision")
|
||||
}
|
||||
|
||||
pullOrSkip(ctx, t, client, model)
|
||||
|
||||
image, err := base64.StdEncoding.DecodeString(imageEncoding)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
req := api.ChatRequest{
|
||||
Model: v.model,
|
||||
Model: model,
|
||||
Messages: []api.Message{
|
||||
{
|
||||
Role: "user",
|
||||
@@ -61,16 +57,7 @@ func TestVisionModels(t *testing.T) {
|
||||
"temperature": 0.0,
|
||||
},
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
|
||||
// Note: sometimes it returns "the ollamas" sometimes "the ollams"
|
||||
resp := "the ollam"
|
||||
defer cleanup()
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// Preload to skip if we're less than 80% on GPU to avoid extremely slow tests
|
||||
err = client.Generate(ctx, &api.GenerateRequest{Model: req.Model}, func(response api.GenerateResponse) error { return nil })
|
||||
if err != nil {
|
||||
@@ -78,13 +65,17 @@ func TestVisionModels(t *testing.T) {
|
||||
}
|
||||
skipIfNotGPULoaded(ctx, t, client, req.Model, 80)
|
||||
|
||||
// Note: sometimes it returns "the ollamas" sometimes "the ollams"
|
||||
// llava models on CPU can be quite slow to start
|
||||
DoChat(ctx, t, client, req, []string{resp}, 240*time.Second, 30*time.Second)
|
||||
DoChat(ctx, t, client, req, []string{"the ollam"}, 240*time.Second, 30*time.Second)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegrationSplitBatch(t *testing.T) {
|
||||
if testModel != "" {
|
||||
t.Skip("uses hardcoded model, not applicable with model override")
|
||||
}
|
||||
skipUnderMinVRAM(t, 6)
|
||||
image, err := base64.StdEncoding.DecodeString(imageEncoding)
|
||||
if err != nil {
|
||||
@@ -111,9 +102,7 @@ func TestIntegrationSplitBatch(t *testing.T) {
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, req.Model)
|
||||
// llava models on CPU can be quite slow to start,
|
||||
DoGenerate(ctx, t, client, req, []string{resp}, 120*time.Second, 30*time.Second)
|
||||
}
|
||||
|
||||
@@ -45,9 +45,7 @@ func TestMaxQueue(t *testing.T) {
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, req.Model)
|
||||
|
||||
// Context for the worker threads so we can shut them down
|
||||
// embedCtx, embedCancel := context.WithCancel(ctx)
|
||||
|
||||
@@ -46,14 +46,12 @@ func TestModelsChat(t *testing.T) {
|
||||
chatModels = append(ollamaEngineChatModels, llamaRunnerChatModels...)
|
||||
}
|
||||
|
||||
for _, model := range chatModels {
|
||||
for _, model := range testModels(chatModels) {
|
||||
t.Run(model, func(t *testing.T) {
|
||||
if time.Now().Sub(started) > softTimeout {
|
||||
t.Skip("skipping remaining tests to avoid excessive runtime")
|
||||
}
|
||||
if err := PullIfMissing(ctx, client, model); err != nil {
|
||||
t.Fatalf("pull failed %s", err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, model)
|
||||
if maxVram > 0 {
|
||||
resp, err := client.List(ctx)
|
||||
if err != nil {
|
||||
@@ -133,14 +131,15 @@ func TestModelsEmbed(t *testing.T) {
|
||||
t.Fatalf("failed to load test data: %s", err)
|
||||
}
|
||||
for model, expected := range testCase {
|
||||
if testModel != "" && model != testModel {
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(model, func(t *testing.T) {
|
||||
if time.Now().Sub(started) > softTimeout {
|
||||
t.Skip("skipping remaining tests to avoid excessive runtime")
|
||||
}
|
||||
if err := PullIfMissing(ctx, client, model); err != nil {
|
||||
t.Fatalf("pull failed %s", err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, model)
|
||||
if maxVram > 0 {
|
||||
resp, err := client.List(ctx)
|
||||
if err != nil {
|
||||
|
||||
@@ -87,9 +87,7 @@ func doModelPerfTest(t *testing.T, chatModels []string) {
|
||||
if time.Now().Sub(started) > softTimeout {
|
||||
t.Skip("skipping remaining tests to avoid excessive runtime")
|
||||
}
|
||||
if err := PullIfMissing(ctx, client, model); err != nil {
|
||||
t.Fatalf("pull failed %s", err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, model)
|
||||
var maxContext int
|
||||
|
||||
resp, err := client.Show(ctx, &api.ShowRequest{Model: model})
|
||||
|
||||
@@ -33,9 +33,7 @@ func TestQuantization(t *testing.T) {
|
||||
defer cleanup()
|
||||
|
||||
for _, base := range sourceModels {
|
||||
if err := PullIfMissing(ctx, client, base); err != nil {
|
||||
t.Fatalf("pull failed %s", err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, base)
|
||||
for _, quant := range quantizations {
|
||||
newName := fmt.Sprintf("%s__%s", base, quant)
|
||||
t.Run(newName, func(t *testing.T) {
|
||||
|
||||
523
integration/tools_stress_test.go
Normal file
523
integration/tools_stress_test.go
Normal file
@@ -0,0 +1,523 @@
|
||||
//go:build integration
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
// TestAPIToolCallingStress tests tool calling with complex, agent-style prompts
|
||||
// that include large system messages, multiple tools, and multi-turn conversations.
|
||||
// This catches cache corruption and parser bugs that simple tool tests miss.
|
||||
func TestAPIToolCallingStress(t *testing.T) {
|
||||
initialTimeout := 120 * time.Second
|
||||
streamTimeout := 120 * time.Second
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
minVRAM := map[string]uint64{
|
||||
"qwen3-vl": 16,
|
||||
"gpt-oss:20b": 16,
|
||||
"gpt-oss:120b": 70,
|
||||
"qwen3": 6,
|
||||
"llama3.1": 8,
|
||||
"llama3.2": 4,
|
||||
"mistral": 6,
|
||||
"qwen2.5": 6,
|
||||
"qwen2": 6,
|
||||
"ministral-3": 20,
|
||||
"mistral-nemo": 9,
|
||||
"mistral-small": 16,
|
||||
"mixtral:8x22b": 80,
|
||||
"qwq": 20,
|
||||
"granite3.3": 7,
|
||||
}
|
||||
|
||||
// Models that don't reliably produce tool calls with complex/multi-tool prompts.
|
||||
// The stress test uses a large system prompt with many tools, simulating coding agents.
|
||||
// Some models are too small, too slow, or not designed for this use case.
|
||||
skipModels := map[string]string{
|
||||
"lfm2.5-thinking": "returns text instead of tool calls with complex system prompts",
|
||||
"qwen3-vl": "vision model, extremely slow with complex tool prompts",
|
||||
"llama3.2": "3B model too small for reliable multi-tool agent prompts",
|
||||
"mistral": "7B v0.3 returns text instead of tool calls with complex prompts",
|
||||
"mixtral:8x22b": "returns text instead of tool calls with complex prompts",
|
||||
"qwen2": "returns text instead of tool calls with complex prompts",
|
||||
"granite3.3": "returns text instead of tool calls with complex prompts",
|
||||
}
|
||||
|
||||
models := testModels(libraryToolsModels)
|
||||
|
||||
for _, model := range models {
|
||||
t.Run(model, func(t *testing.T) {
|
||||
// Skip known-bad models unless explicitly requested via env var
|
||||
if reason, ok := skipModels[model]; ok && testModel == "" {
|
||||
t.Skipf("skipping: %s", reason)
|
||||
}
|
||||
if testModel != "" {
|
||||
requireCapability(ctx, t, client, model, "tools")
|
||||
}
|
||||
if v, ok := minVRAM[model]; ok {
|
||||
skipUnderMinVRAM(t, v)
|
||||
}
|
||||
|
||||
pullOrSkip(ctx, t, client, model)
|
||||
|
||||
tools := stressTestTools()
|
||||
|
||||
// Large system prompt that mimics real coding agents (opencode, Claude Code, etc.)
|
||||
// This is intentionally very long (~5000+ tokens) to match the prompt sizes that
|
||||
// real coding agents send. The combination of a large system prompt, many tools,
|
||||
// and thinking mode is what triggers failures in some models.
|
||||
systemPrompt := stressTestSystemPrompt()
|
||||
|
||||
// Test 1: First request (fresh prompt processing)
|
||||
// Use a direct prompt that tells the model exactly what tool to use,
|
||||
// reducing the chance it asks for clarification instead.
|
||||
t.Run("first_request", func(t *testing.T) {
|
||||
testToolCall(t, ctx, client, model, systemPrompt, tools,
|
||||
"Run git diff main to review the code changes on the current branch.",
|
||||
initialTimeout, streamTimeout)
|
||||
})
|
||||
|
||||
// Test 2: Repeat with same prompt (tests cache reuse)
|
||||
t.Run("cached_request", func(t *testing.T) {
|
||||
testToolCall(t, ctx, client, model, systemPrompt, tools,
|
||||
"Run git diff main to review the code changes on the current branch.",
|
||||
initialTimeout, streamTimeout)
|
||||
})
|
||||
|
||||
// Test 3: Different user message (partial cache hit)
|
||||
t.Run("different_user_message", func(t *testing.T) {
|
||||
testToolCall(t, ctx, client, model, systemPrompt, tools,
|
||||
"Read the file at ./go.mod and tell me what dependencies we have.",
|
||||
initialTimeout, streamTimeout)
|
||||
})
|
||||
|
||||
// Test 4: Multi-turn with tool response
|
||||
t.Run("multi_turn", func(t *testing.T) {
|
||||
testToolCallMultiTurn(t, ctx, client, model, systemPrompt, tools,
|
||||
initialTimeout, streamTimeout)
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func newTool(name, description string, required []string, props map[string]api.ToolProperty) api.Tool {
|
||||
return api.Tool{
|
||||
Type: "function",
|
||||
Function: api.ToolFunction{
|
||||
Name: name,
|
||||
Description: description,
|
||||
Parameters: api.ToolFunctionParameters{
|
||||
Type: "object",
|
||||
Required: required,
|
||||
Properties: testPropsMap(props),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// stressTestTools returns a set of tools matching the scale and verbosity of
|
||||
// real coding agent tool definitions (opencode, Claude Code, etc.). The tool
|
||||
// descriptions are intentionally verbose to match real-world prompt sizes.
|
||||
func stressTestTools() []api.Tool {
|
||||
return []api.Tool{
|
||||
newTool("bash", "Executes a given bash command in a persistent shell session with optional timeout, ensuring proper handling and security measures. All commands run in the working directory by default. Before executing the command, verify that the parent directory exists. Always quote file paths that contain spaces with double quotes. After ensuring proper quoting, execute the command and capture the output. Avoid using bash with find, grep, cat, head, tail, sed, awk, or echo commands unless explicitly instructed. Instead, always prefer using the dedicated tools for these commands. When issuing multiple commands, if they are independent and can run in parallel, make multiple tool calls in a single message.",
|
||||
[]string{"command"},
|
||||
map[string]api.ToolProperty{
|
||||
"command": {Type: api.PropertyType{"string"}, Description: "The bash command to execute"},
|
||||
"description": {Type: api.PropertyType{"string"}, Description: "Short description of what this command does in 5-10 words"},
|
||||
"timeout": {Type: api.PropertyType{"number"}, Description: "Optional timeout in milliseconds. If not specified, commands will time out after 120000ms (2 minutes)"},
|
||||
}),
|
||||
newTool("read", "Read a file or directory from the local filesystem. If the path does not exist, an error is returned. By default, this tool returns up to 2000 lines from the start of the file. The offset parameter is the line number to start from (1-indexed). To read later sections, call this tool again with a larger offset. Use the grep tool to find specific content in large files or files with long lines. If you are unsure of the correct file path, use the glob tool to look up filenames by glob pattern. Contents are returned with each line prefixed by its line number. Any line longer than 2000 characters is truncated. Call this tool in parallel when you know there are multiple files you want to read. Avoid tiny repeated slices (30 line chunks). If you need more context, read a larger window. This tool can read image files and PDFs and return them as file attachments.",
|
||||
[]string{"path"},
|
||||
map[string]api.ToolProperty{
|
||||
"path": {Type: api.PropertyType{"string"}, Description: "The absolute path to the file to read"},
|
||||
"offset": {Type: api.PropertyType{"number"}, Description: "Line number to start reading from (1-indexed)"},
|
||||
"limit": {Type: api.PropertyType{"number"}, Description: "Maximum number of lines to read"},
|
||||
}),
|
||||
newTool("glob", "Fast file pattern matching tool that works with any codebase size. Supports glob patterns like '**/*.js' or 'src/**/*.ts'. Returns matching file paths sorted by modification time. Use this tool when you need to find files by name patterns. When you are doing an open-ended search that may require multiple rounds of globbing and grepping, use the task tool instead. You have the capability to call multiple tools in a single response. It is always better to speculatively perform multiple searches as a batch that are potentially useful.",
|
||||
[]string{"pattern"},
|
||||
map[string]api.ToolProperty{
|
||||
"pattern": {Type: api.PropertyType{"string"}, Description: "The glob pattern to match files against"},
|
||||
"path": {Type: api.PropertyType{"string"}, Description: "The directory to search in"},
|
||||
}),
|
||||
newTool("grep", "Fast content search tool that works with any codebase size. Searches file contents using regular expressions. Supports full regex syntax (eg. 'log.*Error', 'function\\s+\\w+'). Filter files by pattern with the include parameter (eg. '*.js', '*.{ts,tsx}'). Returns file paths and line numbers with at least one match sorted by modification time. Use this tool when you need to find files containing specific patterns. If you need to identify or count the number of matches within files, use the bash tool with rg (ripgrep) directly. When you are doing an open-ended search that may require multiple rounds of globbing and grepping, use the task tool instead.",
|
||||
[]string{"pattern"},
|
||||
map[string]api.ToolProperty{
|
||||
"pattern": {Type: api.PropertyType{"string"}, Description: "The regex pattern to search for in file contents"},
|
||||
"path": {Type: api.PropertyType{"string"}, Description: "The directory to search in"},
|
||||
"include": {Type: api.PropertyType{"string"}, Description: "File pattern to include (eg. '*.js', '*.{ts,tsx}')"},
|
||||
}),
|
||||
newTool("edit", "Performs exact string replacements in files. You must use your read tool at least once in the conversation before editing. This tool will error if you attempt an edit without reading the file. When editing text from read tool output, ensure you preserve the exact indentation (tabs/spaces) as it appears after the line number prefix. Always prefer editing existing files in the codebase. Never write new files unless explicitly required. Only use emojis if the user explicitly requests it. The edit will fail if oldString is not found in the file. The edit will fail if oldString is found multiple times in the file. Use replaceAll for replacing and renaming strings across the file.",
|
||||
[]string{"path", "old_string", "new_string"},
|
||||
map[string]api.ToolProperty{
|
||||
"path": {Type: api.PropertyType{"string"}, Description: "The absolute path to the file to modify"},
|
||||
"old_string": {Type: api.PropertyType{"string"}, Description: "The text to replace (must be unique in the file)"},
|
||||
"new_string": {Type: api.PropertyType{"string"}, Description: "The replacement text"},
|
||||
}),
|
||||
newTool("write", "Writes a file to the local filesystem. This tool will overwrite the existing file if there is one at the provided path. If this is an existing file, you must use the read tool first to read the file contents. This tool will fail if you did not read the file first. Always prefer editing existing files in the codebase. Never write new files unless explicitly required. Never proactively create documentation files or README files. Only create documentation files if explicitly requested by the user.",
|
||||
[]string{"path", "content"},
|
||||
map[string]api.ToolProperty{
|
||||
"path": {Type: api.PropertyType{"string"}, Description: "The absolute path to the file to write"},
|
||||
"content": {Type: api.PropertyType{"string"}, Description: "The content to write to the file"},
|
||||
}),
|
||||
newTool("question", "Use this tool when you need to ask the user questions during execution. This allows you to gather user preferences or requirements, clarify ambiguous instructions, get decisions on implementation choices as you work, and offer choices to the user about what direction to take. When custom is enabled (default), a 'Type your own answer' option is added automatically. Answers are returned as arrays of labels. Set multiple to true to allow selecting more than one answer. If you recommend a specific option, make that the first option in the list and add '(Recommended)' at the end of the label.",
|
||||
[]string{"questions"},
|
||||
map[string]api.ToolProperty{
|
||||
"questions": {Type: api.PropertyType{"string"}, Description: "The question to ask the user"},
|
||||
}),
|
||||
newTool("task", "Launch a new agent to handle complex, multistep tasks autonomously. Available agent types: general (general-purpose agent for researching complex questions and executing multi-step tasks, use this to execute multiple units of work in parallel) and explore (fast agent specialized for exploring codebases, use this when you need to quickly find files by patterns, search code for keywords, or answer questions about the codebase). Launch multiple agents concurrently whenever possible to maximize performance. When the agent is done, it will return a single message back to you. Each agent invocation starts with a fresh context unless you provide task_id to resume the same subagent session.",
|
||||
[]string{"description", "prompt", "subagent_type"},
|
||||
map[string]api.ToolProperty{
|
||||
"description": {Type: api.PropertyType{"string"}, Description: "A short (3-5 word) description of the task"},
|
||||
"prompt": {Type: api.PropertyType{"string"}, Description: "The task for the agent to perform"},
|
||||
"subagent_type": {Type: api.PropertyType{"string"}, Description: "The type of specialized agent to use (general or explore)"},
|
||||
}),
|
||||
newTool("webfetch", "Fetches content from a specified URL. Takes a URL and optional format as input. Fetches the URL content, converts to requested format (markdown by default). Returns the content in the specified format. Use this tool when you need to retrieve and analyze web content. The URL must be a fully-formed valid URL. HTTP URLs will be automatically upgraded to HTTPS. Format options: markdown (default), text, or html. This tool is read-only and does not modify any files. Results may be summarized if the content is very large.",
|
||||
[]string{"url", "format"},
|
||||
map[string]api.ToolProperty{
|
||||
"url": {Type: api.PropertyType{"string"}, Description: "The URL to fetch content from"},
|
||||
"format": {Type: api.PropertyType{"string"}, Description: "Output format: markdown (default), text, or html"},
|
||||
}),
|
||||
newTool("todowrite", "Use this tool to create and manage a structured task list for your current coding session. This helps you track progress, organize complex tasks, and demonstrate thoroughness to the user. Use this tool proactively when handling complex multistep tasks, non-trivial and complex tasks, when the user explicitly requests a todo list, when the user provides multiple tasks, after receiving new instructions, and after completing a task. Do not use this tool when there is only a single straightforward task, the task is trivial, the task can be completed in less than 3 steps, or the task is purely conversational.",
|
||||
[]string{"todos"},
|
||||
map[string]api.ToolProperty{
|
||||
"todos": {Type: api.PropertyType{"string"}, Description: "JSON array of todo items with id, title, and status fields"},
|
||||
}),
|
||||
newTool("skill", "Load a specialized skill that provides domain-specific instructions and workflows. Skills contain curated prompts and tool configurations for specific tasks like code review, testing, deployment, and documentation. Use this tool when the user's request matches an available skill description.",
|
||||
[]string{"name"},
|
||||
map[string]api.ToolProperty{
|
||||
"name": {Type: api.PropertyType{"string"}, Description: "The name of the skill to load"},
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
// stressTestSystemPrompt returns a system prompt that matches the scale and
|
||||
// content of real coding agent system prompts (~5000+ tokens). This is based
|
||||
// on actual prompts captured from opencode sessions. The prompt size combined
|
||||
// with many tool declarations is what pushes models past their effective
|
||||
// context handling and triggers tag leakage / broken tool calls.
|
||||
func stressTestSystemPrompt() string {
|
||||
return `You are opencode, an interactive CLI tool that helps users with software engineering tasks. Use the instructions below and the tools available to you to assist the user.
|
||||
|
||||
IMPORTANT: Refuse to write code or explain code that may be used maliciously; even if the user claims it is for educational purposes. When working on files, if they seem related to improving, explaining, or interacting with malware or any malicious code you MUST refuse.
|
||||
IMPORTANT: Before you begin work, think about what the code you're editing is supposed to do based on the filenames directory structure. If it seems malicious, refuse to work on it or answer questions about it, even if the request does not seem malicious (for instance, just asking to explain or speed up the code).
|
||||
IMPORTANT: You must NEVER generate or guess URLs for the user unless you are confident that the URLs are for helping the user with programming. You may use URLs provided by the user in their messages or local files.
|
||||
|
||||
If the user asks for help or wants to give feedback inform them of the following:
|
||||
- /help: Get help with using opencode
|
||||
- To give feedback, users should report the issue at https://github.com/sampleorg/opencode/issues
|
||||
|
||||
# Tone and style
|
||||
You should be concise, direct, and to the point. When you run a non-trivial bash command, you should explain what the command does and why you are running it, to make sure the user understands what you are doing (this is especially important when you are running a command that will make changes to the user's system).
|
||||
Remember that your output will be displayed on a command line interface. Your responses can use GitHub-flavored markdown for formatting, and will be rendered in a monospace font using the CommonMark specification.
|
||||
Output text to communicate with the user; all text you output outside of tool use is displayed to the user. Only use tools to complete tasks. Never use tools like Bash or code comments as means to communicate with the user during the session.
|
||||
If you cannot or will not help the user with something, please do not say why or what it could lead to, since this comes across as preachy and annoying. Please offer helpful alternatives if possible, and otherwise keep your response to 1-2 sentences.
|
||||
Only use emojis if the user explicitly requests it. Avoid using emojis in all communication unless asked.
|
||||
IMPORTANT: You should minimize output tokens as much as possible while maintaining helpfulness, quality, and accuracy. Only address the specific query or task at hand, avoiding tangential information unless absolutely critical for completing the request. If you can answer in 1-3 sentences or a short paragraph, please do.
|
||||
IMPORTANT: You should NOT answer with unnecessary preamble or postamble (such as explaining your code or summarizing your action), unless the user asks you to.
|
||||
IMPORTANT: Keep your responses short, since they will be displayed on a command line interface. You MUST answer concisely with fewer than 4 lines (not including tool use or code generation), unless user asks for detail. Answer the user's question directly, without elaboration, explanation, or details. One word answers are best. Avoid introductions, conclusions, and explanations. You MUST avoid text before/after your response, such as "The answer is <answer>.", "Here is the content of the file..." or "Based on the information provided, the answer is..." or "Here is what I will do next...". Here are some examples to demonstrate appropriate verbosity:
|
||||
|
||||
user: 2 + 2
|
||||
assistant: 4
|
||||
|
||||
user: what is 2+2?
|
||||
assistant: 4
|
||||
|
||||
user: is 11 a prime number?
|
||||
assistant: Yes
|
||||
|
||||
user: what command should I run to list files in the current directory?
|
||||
assistant: ls
|
||||
|
||||
user: what command should I run to watch files in the current directory?
|
||||
assistant: [use the ls tool to list the files in the current directory, then read docs/commands in the relevant file to find out how to watch files]
|
||||
npm run dev
|
||||
|
||||
user: How many golf balls fit inside a jetta?
|
||||
assistant: 150000
|
||||
|
||||
user: what files are in the directory src/?
|
||||
assistant: [runs ls and sees foo.c, bar.c, baz.c]
|
||||
user: which file contains the implementation of foo?
|
||||
assistant: src/foo.c
|
||||
|
||||
user: write tests for new feature
|
||||
assistant: [uses grep and glob search tools to find where similar tests are defined, uses concurrent read file tool use blocks in one tool call to read relevant files at the same time, uses edit file tool to write new tests]
|
||||
|
||||
# Proactiveness
|
||||
You are allowed to be proactive, but only when the user asks you to do something. You should strive to strike a balance between:
|
||||
1. Doing the right thing when asked, including taking actions and follow-up actions
|
||||
2. Not surprising the user with actions you take without asking
|
||||
For example, if the user asks you how to approach something, you should do your best to answer their question first, and not immediately jump into taking actions.
|
||||
3. Do not add additional code explanation summary unless requested by the user. After working on a file, just stop, rather than providing an explanation of what you did.
|
||||
|
||||
# Following conventions
|
||||
When making changes to files, first understand the file's code conventions. Mimic code style, use existing libraries and utilities, and follow existing patterns.
|
||||
- NEVER assume that a given library is available, even if it is well known. Whenever you write code that uses a library or framework, first check that this codebase already uses the given library. For example, you might look at neighboring files, or check the package.json (or cargo.toml, and so on depending on the language).
|
||||
- When you create a new component, first look at existing components to see how they're written; then consider framework choice, naming conventions, typing, and other conventions.
|
||||
- When you edit a piece of code, first look at the code's surrounding context (especially its imports) to understand the code's choice of frameworks and libraries. Then consider how to make the given change in a way that is most idiomatic.
|
||||
- Always follow security best practices. Never introduce code that exposes or logs secrets and keys. Never commit secrets or keys to the repository.
|
||||
|
||||
# Code style
|
||||
- IMPORTANT: DO NOT ADD ANY COMMENTS unless asked
|
||||
|
||||
# Doing tasks
|
||||
The user will primarily request you perform software engineering tasks. This includes solving bugs, adding new functionality, refactoring code, explaining code, and more. For these tasks the following steps are recommended:
|
||||
- Use the available search tools to understand the codebase and the user's query. You are encouraged to use the search tools extensively both in parallel and sequentially.
|
||||
- Implement the solution using all tools available to you
|
||||
- Verify the solution if possible with tests. NEVER assume specific test framework or test script. Check the README or search codebase to determine the testing approach.
|
||||
- VERY IMPORTANT: When you have completed a task, you MUST run the lint and typecheck commands (e.g. npm run lint, npm run typecheck, ruff, etc.) with Bash if they were provided to you to ensure your code is correct. If you are unable to find the correct command, ask the user for the command to run and if they supply it, proactively suggest writing it to AGENTS.md so that you will know to run it next time.
|
||||
NEVER commit changes unless the user explicitly asks you to. It is VERY IMPORTANT to only commit when explicitly asked, otherwise the user will feel that you are being too proactive.
|
||||
|
||||
# Tool usage policy
|
||||
- When doing file search, prefer to use the Task tool in order to reduce context usage.
|
||||
- You have the capability to call multiple tools in a single response. When multiple independent pieces of information are requested, batch your tool calls together for optimal performance. When making multiple bash tool calls, you MUST send a single message with multiple tools calls to run the calls in parallel.
|
||||
|
||||
You MUST answer concisely with fewer than 4 lines of text (not including tool use or code generation), unless user asks for detail.
|
||||
|
||||
# Code References
|
||||
When referencing specific functions or pieces of code include the pattern file_path:line_number to allow the user to easily navigate to the source code location.
|
||||
|
||||
# Git workflow
|
||||
When working with git:
|
||||
- Create descriptive commit messages that explain WHY not just WHAT
|
||||
- Use conventional commit format: feat:, fix:, refactor:, docs:, test:, chore:
|
||||
- Check git status before and after operations
|
||||
- Never force push to main/master
|
||||
- Review diffs before committing
|
||||
- NEVER update the git config
|
||||
- NEVER run destructive/irreversible git commands unless the user explicitly requests them
|
||||
- NEVER skip hooks (--no-verify, --no-gpg-sign, etc) unless the user explicitly requests it
|
||||
- Avoid git commit --amend unless explicitly requested by the user
|
||||
- NEVER commit changes unless the user explicitly asks you to
|
||||
|
||||
# Safety
|
||||
- Never delete files without confirmation
|
||||
- Never run destructive commands (rm -rf, DROP TABLE, etc.) without confirmation
|
||||
- Always validate inputs before using them in shell commands
|
||||
- Be careful with environment variables and secrets
|
||||
- Do not expose API keys, passwords, or tokens in code or logs
|
||||
|
||||
# Environment
|
||||
Working directory: /Users/test/code/myproject
|
||||
Platform: darwin
|
||||
Shell: zsh
|
||||
Is directory a git repo: yes
|
||||
The project uses Go 1.22 with modules. Run tests with 'go test ./...' and build with 'go build ./...'.
|
||||
The CI pipeline runs golangci-lint, go vet, and go test with race detector enabled.
|
||||
|
||||
# User instructions
|
||||
Never use cd to change into the repo root or any other directory in Bash commands. The working directory is always the repo root — use relative paths directly.
|
||||
Never use heredoc-style inline bash or python scripts in Bash tool calls. Instead, write the script to an ephemeral file under ./.tmp/ in the repo, then run it as a separate command.`
|
||||
}
|
||||
|
||||
// validStressTools is the set of tool names used in the stress test.
|
||||
var validStressTools = map[string]bool{
|
||||
"bash": true, "read": true, "glob": true, "grep": true,
|
||||
"edit": true, "write": true, "question": true, "task": true,
|
||||
"webfetch": true, "todowrite": true, "skill": true,
|
||||
}
|
||||
|
||||
func testToolCall(t *testing.T, ctx context.Context, client *api.Client, model, systemPrompt string, tools []api.Tool, userMessage string, initialTimeout, streamTimeout time.Duration) {
|
||||
t.Helper()
|
||||
|
||||
req := api.ChatRequest{
|
||||
Model: model,
|
||||
Messages: []api.Message{
|
||||
{Role: "system", Content: systemPrompt},
|
||||
{Role: "user", Content: userMessage},
|
||||
},
|
||||
Tools: tools,
|
||||
Options: map[string]any{
|
||||
"temperature": 0,
|
||||
"num_ctx": contextLength(16384),
|
||||
},
|
||||
}
|
||||
|
||||
stallTimer := time.NewTimer(initialTimeout)
|
||||
var gotToolCall bool
|
||||
var lastToolCall api.ToolCall
|
||||
var allContent string
|
||||
|
||||
fn := func(response api.ChatResponse) error {
|
||||
if len(response.Message.ToolCalls) > 0 {
|
||||
gotToolCall = true
|
||||
lastToolCall = response.Message.ToolCalls[len(response.Message.ToolCalls)-1]
|
||||
}
|
||||
allContent += response.Message.Content
|
||||
if !stallTimer.Reset(streamTimeout) {
|
||||
return fmt.Errorf("stall detected while streaming")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
stream := true
|
||||
req.Stream = &stream
|
||||
done := make(chan int)
|
||||
var genErr error
|
||||
go func() {
|
||||
genErr = client.Chat(ctx, &req, fn)
|
||||
done <- 0
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-stallTimer.C:
|
||||
t.Fatalf("chat stalled after %s", initialTimeout)
|
||||
case <-done:
|
||||
if genErr != nil {
|
||||
t.Fatalf("chat failed: %v", genErr)
|
||||
}
|
||||
|
||||
// Check for leaked special tags in content — these should never
|
||||
// appear in user-visible output regardless of model quality.
|
||||
checkNoLeakedTags(t, allContent)
|
||||
|
||||
// The model must produce either a tool call or a text response.
|
||||
// A text response (e.g. asking for clarification) is legitimate.
|
||||
// Empty output with no tool call indicates a parser or model failure
|
||||
// (e.g. malformed tool call that gets dropped).
|
||||
if !gotToolCall && allContent == "" {
|
||||
t.Fatal("model produced neither a tool call nor text content")
|
||||
}
|
||||
if gotToolCall {
|
||||
if !validStressTools[lastToolCall.Function.Name] {
|
||||
t.Errorf("unexpected tool: %q", lastToolCall.Function.Name)
|
||||
}
|
||||
argsJSON, _ := json.Marshal(lastToolCall.Function.Arguments)
|
||||
t.Logf("tool call: %s(%s)", lastToolCall.Function.Name, string(argsJSON))
|
||||
} else {
|
||||
t.Logf("text response (no tool call): %q", truncate(allContent, 200))
|
||||
}
|
||||
case <-ctx.Done():
|
||||
t.Fatal("context cancelled")
|
||||
}
|
||||
}
|
||||
|
||||
func testToolCallMultiTurn(t *testing.T, ctx context.Context, client *api.Client, model, systemPrompt string, tools []api.Tool, initialTimeout, streamTimeout time.Duration) {
|
||||
t.Helper()
|
||||
|
||||
req := api.ChatRequest{
|
||||
Model: model,
|
||||
Messages: []api.Message{
|
||||
{Role: "system", Content: systemPrompt},
|
||||
{Role: "user", Content: "What files are in the current directory?"},
|
||||
{Role: "assistant", Content: "", ToolCalls: []api.ToolCall{{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "bash",
|
||||
Arguments: api.ToolCallFunctionArguments{},
|
||||
},
|
||||
}}},
|
||||
{Role: "tool", Content: "go.mod\ngo.sum\nmain.go\nREADME.md\n"},
|
||||
// The model should now respond with content or another tool call
|
||||
},
|
||||
Tools: tools,
|
||||
Options: map[string]any{
|
||||
"temperature": 0,
|
||||
"num_ctx": contextLength(16384),
|
||||
},
|
||||
}
|
||||
|
||||
// For the tool response arguments, set the command
|
||||
req.Messages[2].ToolCalls[0].Function.Arguments.Set("command", "ls")
|
||||
|
||||
stallTimer := time.NewTimer(initialTimeout)
|
||||
var gotResponse bool
|
||||
var allContent string
|
||||
var gotToolCall bool
|
||||
|
||||
fn := func(response api.ChatResponse) error {
|
||||
if response.Message.Content != "" {
|
||||
gotResponse = true
|
||||
allContent += response.Message.Content
|
||||
}
|
||||
if len(response.Message.ToolCalls) > 0 {
|
||||
gotToolCall = true
|
||||
gotResponse = true
|
||||
}
|
||||
if !stallTimer.Reset(streamTimeout) {
|
||||
return fmt.Errorf("stall detected")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
stream := true
|
||||
req.Stream = &stream
|
||||
done := make(chan int)
|
||||
var genErr error
|
||||
go func() {
|
||||
genErr = client.Chat(ctx, &req, fn)
|
||||
done <- 0
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-stallTimer.C:
|
||||
t.Fatalf("chat stalled after %s", initialTimeout)
|
||||
case <-done:
|
||||
if genErr != nil {
|
||||
t.Fatalf("chat failed: %v", genErr)
|
||||
}
|
||||
|
||||
checkNoLeakedTags(t, allContent)
|
||||
|
||||
if !gotResponse {
|
||||
t.Fatal("expected response (content or tool call), got nothing")
|
||||
}
|
||||
if gotToolCall {
|
||||
t.Log("multi-turn: got follow-up tool call")
|
||||
} else {
|
||||
t.Logf("multi-turn: got content response: %q", truncate(allContent, 200))
|
||||
}
|
||||
case <-ctx.Done():
|
||||
t.Fatal("context cancelled")
|
||||
}
|
||||
}
|
||||
|
||||
// checkNoLeakedTags verifies that model-internal special tags do not appear in
|
||||
// user-visible content. These tags should be consumed by the parser and never
|
||||
// passed through. If they appear, either the parser has a bug or the model is
|
||||
// generating malformed output that the parser fails to handle.
|
||||
func checkNoLeakedTags(t *testing.T, content string) {
|
||||
t.Helper()
|
||||
leakedTags := []string{
|
||||
"<|channel>", "<channel|>",
|
||||
"<|tool_call>", "<tool_call|>",
|
||||
"<|tool>", "<tool|>",
|
||||
"<|turn>", "<turn|>",
|
||||
}
|
||||
for _, tag := range leakedTags {
|
||||
if strings.Contains(content, tag) {
|
||||
t.Errorf("leaked special tag %q in content: %q", tag, truncate(content, 300))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func contextLength(defaultVal int) int {
|
||||
if s := os.Getenv("OLLAMA_CONTEXT_LENGTH"); s != "" {
|
||||
if n, err := strconv.Atoi(s); err == nil {
|
||||
return n
|
||||
}
|
||||
}
|
||||
return defaultVal
|
||||
}
|
||||
|
||||
func truncate(s string, n int) string {
|
||||
if len(s) <= n {
|
||||
return s
|
||||
}
|
||||
return s[:n] + "..."
|
||||
}
|
||||
@@ -47,15 +47,18 @@ func TestAPIToolCalling(t *testing.T) {
|
||||
"granite3.3": 7,
|
||||
}
|
||||
|
||||
for _, model := range libraryToolsModels {
|
||||
models := testModels(libraryToolsModels)
|
||||
|
||||
for _, model := range models {
|
||||
t.Run(model, func(t *testing.T) {
|
||||
if testModel != "" {
|
||||
requireCapability(ctx, t, client, model, "tools")
|
||||
}
|
||||
if v, ok := minVRAM[model]; ok {
|
||||
skipUnderMinVRAM(t, v)
|
||||
}
|
||||
|
||||
if err := PullIfMissing(ctx, client, model); err != nil {
|
||||
t.Fatalf("pull failed %s", err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, model)
|
||||
|
||||
tools := []api.Tool{
|
||||
{
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -26,11 +27,17 @@ import (
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
)
|
||||
|
||||
var (
|
||||
smol = "llama3.2:1b"
|
||||
stream = false
|
||||
|
||||
// testModel is set via OLLAMA_TEST_MODEL env var. When set, all tests
|
||||
// that loop over model lists will test only this model, and smol is
|
||||
// also overridden to use it.
|
||||
testModel string
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -288,23 +295,60 @@ var (
|
||||
|
||||
rainbowPrompt = "how do rainbows form? Be brief but factual in your reply"
|
||||
rainbowFollowups = []string{
|
||||
"Explain the physics involved in them. Be breif in your reply",
|
||||
"Explain the chemistry involved in them. Be breif in your reply",
|
||||
"Explain the physics involved in them. Be brief in your reply",
|
||||
"Explain the chemistry involved in them. Be brief in your reply",
|
||||
"What are common myths related to them? Be brief in your reply",
|
||||
"Can they form if there is no rain? Be breif in your reply",
|
||||
"Can they form if there are no clouds? Be breif in your reply",
|
||||
"Can they form if there is no rain? Be brief in your reply",
|
||||
"Can they form if there are no clouds? Be brief in your reply",
|
||||
"Do they happen on other planets? Be brief in your reply",
|
||||
}
|
||||
rainbowExpected = []string{"water", "droplet", "mist", "glow", "refract", "reflect", "scatter", "particles", "wave", "color", "spectrum", "raindrop", "atmosphere", "frequency", "shower", "sky", "shimmer", "light", "storm", "sunny", "sunburst", "phenomenon", "mars", "venus", "jupiter"}
|
||||
rainbowExpected = []string{"water", "droplet", "mist", "glow", "refract", "reflect", "scatter", "particles", "wave", "color", "spectrum", "raindrop", "atmosphere", "frequency", "shower", "sky", "shimmer", "light", "storm", "sunny", "sunburst", "phenomenon", "mars", "venus", "jupiter", "rain", "sun", "rainbow", "optical", "gold", "cloud", "planet", "prism", "fog", "ice"}
|
||||
)
|
||||
|
||||
func init() {
|
||||
logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug}))
|
||||
slog.SetDefault(logger)
|
||||
custom := os.Getenv("OLLAMA_TEST_DEFAULT_MODEL")
|
||||
if custom != "" {
|
||||
slog.Info("setting default test model to " + custom)
|
||||
smol = custom
|
||||
|
||||
testModel = os.Getenv("OLLAMA_TEST_MODEL")
|
||||
if testModel != "" {
|
||||
slog.Info("test model override", "model", testModel)
|
||||
smol = testModel
|
||||
}
|
||||
}
|
||||
|
||||
// testModels returns the override model as a single-element slice when
|
||||
// OLLAMA_TEST_MODEL is set, otherwise returns the provided default list.
|
||||
func testModels(defaults []string) []string {
|
||||
if testModel != "" {
|
||||
return []string{testModel}
|
||||
}
|
||||
return defaults
|
||||
}
|
||||
|
||||
// requireCapability skips the test if the model does not advertise the
|
||||
// given capability. It queries the server via Show and caches nothing —
|
||||
// call it once per subtest. For local-only models where Show may not
|
||||
// return capabilities (e.g. models created via ollama create), this is
|
||||
// a best-effort check.
|
||||
func requireCapability(ctx context.Context, t *testing.T, client *api.Client, modelName string, cap model.Capability) {
|
||||
t.Helper()
|
||||
resp, err := client.Show(ctx, &api.ShowRequest{Name: modelName})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to show model %s: %v", modelName, err)
|
||||
}
|
||||
if len(resp.Capabilities) > 0 && !slices.Contains(resp.Capabilities, cap) {
|
||||
t.Skipf("model %s does not have capability %q (has %v)", modelName, cap, resp.Capabilities)
|
||||
}
|
||||
}
|
||||
|
||||
// pullOrSkip pulls a model if it isn't already present locally. If the
|
||||
// pull fails (e.g. model not in registry), the test is skipped instead
|
||||
// of failed. PullIfMissing already checks Show first, so local-only
|
||||
// models that exist will return immediately without hitting the registry.
|
||||
func pullOrSkip(ctx context.Context, t *testing.T, client *api.Client, modelName string) {
|
||||
t.Helper()
|
||||
if err := PullIfMissing(ctx, client, modelName); err != nil {
|
||||
t.Skipf("model %s not available: %v", modelName, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -540,9 +584,7 @@ func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, strin
|
||||
func ChatTestHelper(ctx context.Context, t *testing.T, req api.ChatRequest, anyResp []string) {
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
pullOrSkip(ctx, t, client, req.Model)
|
||||
DoChat(ctx, t, client, req, anyResp, 30*time.Second, 10*time.Second)
|
||||
}
|
||||
|
||||
|
||||
349
integration/vision_test.go
Normal file
349
integration/vision_test.go
Normal file
@@ -0,0 +1,349 @@
|
||||
//go:build integration
|
||||
|
||||
package integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
// Default set of vision models to test. When OLLAMA_TEST_MODEL is set,
|
||||
// only that model is tested (with a capability check for vision).
|
||||
var defaultVisionModels = []string{
|
||||
"gemma3",
|
||||
"llama3.2-vision",
|
||||
"qwen2.5vl",
|
||||
"qwen3-vl:8b",
|
||||
}
|
||||
|
||||
// decodeTestImages returns the two test images (Abbey Road llamas, docs llamas).
|
||||
func decodeTestImages(t *testing.T) (abbeyRoad, docs api.ImageData) {
|
||||
t.Helper()
|
||||
var err error
|
||||
abbeyRoad, err = base64.StdEncoding.DecodeString(imageEncoding)
|
||||
if err != nil {
|
||||
t.Fatalf("decode abbey road image: %v", err)
|
||||
}
|
||||
docs, err = base64.StdEncoding.DecodeString(imageEncodingDocs)
|
||||
if err != nil {
|
||||
t.Fatalf("decode docs image: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// setupVisionModel pulls the model, preloads it, and skips if not GPU-loaded.
|
||||
func setupVisionModel(ctx context.Context, t *testing.T, client *api.Client, model string) {
|
||||
t.Helper()
|
||||
if testModel != "" {
|
||||
requireCapability(ctx, t, client, model, "vision")
|
||||
}
|
||||
pullOrSkip(ctx, t, client, model)
|
||||
err := client.Generate(ctx, &api.GenerateRequest{Model: model}, func(response api.GenerateResponse) error { return nil })
|
||||
if err != nil {
|
||||
t.Fatalf("failed to load model %s: %s", model, err)
|
||||
}
|
||||
skipIfNotGPULoaded(ctx, t, client, model, 80)
|
||||
}
|
||||
|
||||
// TestVisionMultiTurn sends an image, gets a response, then asks follow-up
|
||||
// questions about the same image. This verifies that the KV cache correctly
|
||||
// handles cached image tokens across turns.
|
||||
func TestVisionMultiTurn(t *testing.T) {
|
||||
skipUnderMinVRAM(t, 6)
|
||||
|
||||
// Models that fail on multi-turn detail questions (e.g. misidentifying objects).
|
||||
skipModels := map[string]string{
|
||||
"gemma3": "misidentifies briefcase as smartphone on turn 3",
|
||||
"llama3.2-vision": "miscounts animals (says 3 instead of 4) on turn 2",
|
||||
}
|
||||
|
||||
for _, model := range testModels(defaultVisionModels) {
|
||||
t.Run(model, func(t *testing.T) {
|
||||
if reason, ok := skipModels[model]; ok && testModel == "" {
|
||||
t.Skipf("skipping: %s", reason)
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
setupVisionModel(ctx, t, client, model)
|
||||
abbeyRoad, _ := decodeTestImages(t)
|
||||
|
||||
// Turn 1: describe the image
|
||||
req := api.ChatRequest{
|
||||
Model: model,
|
||||
Messages: []api.Message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "Describe this image briefly.",
|
||||
Images: []api.ImageData{abbeyRoad},
|
||||
},
|
||||
},
|
||||
Stream: &stream,
|
||||
Options: map[string]any{"temperature": 0.0, "seed": 42},
|
||||
}
|
||||
resp1 := DoChat(ctx, t, client, req, []string{
|
||||
"llama", "cross", "walk", "road", "animal", "cartoon",
|
||||
}, 120*time.Second, 30*time.Second)
|
||||
if resp1 == nil {
|
||||
t.Fatal("no response from turn 1")
|
||||
}
|
||||
|
||||
// Turn 2: follow-up about count
|
||||
req.Messages = append(req.Messages,
|
||||
*resp1,
|
||||
api.Message{Role: "user", Content: "How many animals are in the image?"},
|
||||
)
|
||||
resp2 := DoChat(ctx, t, client, req, []string{
|
||||
"four", "4",
|
||||
}, 60*time.Second, 30*time.Second)
|
||||
if resp2 == nil {
|
||||
t.Fatal("no response from turn 2")
|
||||
}
|
||||
|
||||
// Turn 3: follow-up about specific detail
|
||||
req.Messages = append(req.Messages,
|
||||
*resp2,
|
||||
api.Message{Role: "user", Content: "Is any animal carrying something? What is it?"},
|
||||
)
|
||||
DoChat(ctx, t, client, req, []string{
|
||||
"briefcase", "suitcase", "bag", "case", "luggage",
|
||||
}, 60*time.Second, 30*time.Second)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestVisionObjectCounting asks the model to count objects in an image.
|
||||
func TestVisionObjectCounting(t *testing.T) {
|
||||
skipUnderMinVRAM(t, 6)
|
||||
|
||||
skipModels := map[string]string{
|
||||
"llama3.2-vision": "consistently miscounts (says 3 instead of 4)",
|
||||
}
|
||||
|
||||
for _, model := range testModels(defaultVisionModels) {
|
||||
t.Run(model, func(t *testing.T) {
|
||||
if reason, ok := skipModels[model]; ok && testModel == "" {
|
||||
t.Skipf("skipping: %s", reason)
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
setupVisionModel(ctx, t, client, model)
|
||||
_, docs := decodeTestImages(t)
|
||||
|
||||
req := api.ChatRequest{
|
||||
Model: model,
|
||||
Messages: []api.Message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "How many animals are shown in this image? Answer with just the number.",
|
||||
Images: []api.ImageData{docs},
|
||||
},
|
||||
},
|
||||
Stream: &stream,
|
||||
Options: map[string]any{"temperature": 0.0, "seed": 42},
|
||||
}
|
||||
DoChat(ctx, t, client, req, []string{"4", "four"}, 120*time.Second, 30*time.Second)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestVisionSceneUnderstanding tests whether the model can identify
|
||||
// cultural references and scene context from an image.
|
||||
func TestVisionSceneUnderstanding(t *testing.T) {
|
||||
skipUnderMinVRAM(t, 6)
|
||||
|
||||
// Models known to be too small or not capable enough for cultural reference detection.
|
||||
skipModels := map[string]string{
|
||||
"llama3.2-vision": "3B model lacks cultural reference knowledge",
|
||||
"minicpm-v": "too small for cultural reference detection",
|
||||
}
|
||||
|
||||
for _, model := range testModels(defaultVisionModels) {
|
||||
t.Run(model, func(t *testing.T) {
|
||||
if reason, ok := skipModels[model]; ok && testModel == "" {
|
||||
t.Skipf("skipping: %s", reason)
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
setupVisionModel(ctx, t, client, model)
|
||||
abbeyRoad, _ := decodeTestImages(t)
|
||||
|
||||
req := api.ChatRequest{
|
||||
Model: model,
|
||||
Messages: []api.Message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "What famous image or album cover is this a parody of?",
|
||||
Images: []api.ImageData{abbeyRoad},
|
||||
},
|
||||
},
|
||||
Stream: &stream,
|
||||
Options: map[string]any{"temperature": 0.0, "seed": 42},
|
||||
}
|
||||
DoChat(ctx, t, client, req, []string{
|
||||
"abbey road", "beatles", "abbey",
|
||||
}, 120*time.Second, 30*time.Second)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestVisionSpatialReasoning tests the model's ability to identify
|
||||
// objects based on their spatial position in the image.
|
||||
func TestVisionSpatialReasoning(t *testing.T) {
|
||||
skipUnderMinVRAM(t, 6)
|
||||
|
||||
for _, model := range testModels(defaultVisionModels) {
|
||||
t.Run(model, func(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
setupVisionModel(ctx, t, client, model)
|
||||
_, docs := decodeTestImages(t)
|
||||
|
||||
// The docs image has: leftmost llama on laptop with glasses,
|
||||
// rightmost llama sleeping.
|
||||
req := api.ChatRequest{
|
||||
Model: model,
|
||||
Messages: []api.Message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "What is the animal on the far left doing in this image?",
|
||||
Images: []api.ImageData{docs},
|
||||
},
|
||||
},
|
||||
Stream: &stream,
|
||||
Options: map[string]any{"temperature": 0.0, "seed": 42},
|
||||
}
|
||||
DoChat(ctx, t, client, req, []string{
|
||||
"laptop", "computer", "typing", "working",
|
||||
}, 120*time.Second, 30*time.Second)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestVisionDetailRecognition tests whether the model can identify
|
||||
// small details like accessories in an image.
|
||||
func TestVisionDetailRecognition(t *testing.T) {
|
||||
skipUnderMinVRAM(t, 6)
|
||||
|
||||
for _, model := range testModels(defaultVisionModels) {
|
||||
t.Run(model, func(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
setupVisionModel(ctx, t, client, model)
|
||||
_, docs := decodeTestImages(t)
|
||||
|
||||
req := api.ChatRequest{
|
||||
Model: model,
|
||||
Messages: []api.Message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "Are any of the animals wearing glasses? Describe what you see.",
|
||||
Images: []api.ImageData{docs},
|
||||
},
|
||||
},
|
||||
Stream: &stream,
|
||||
Options: map[string]any{"temperature": 0.0, "seed": 42},
|
||||
}
|
||||
DoChat(ctx, t, client, req, []string{
|
||||
"glasses", "spectacles", "eyeglasses",
|
||||
}, 120*time.Second, 30*time.Second)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestVisionMultiImage sends two images in a single message and asks
|
||||
// the model to compare and contrast them. This exercises multi-image
|
||||
// encoding and cross-image reasoning.
|
||||
func TestVisionMultiImage(t *testing.T) {
|
||||
skipUnderMinVRAM(t, 6)
|
||||
|
||||
// Multi-image support varies across models.
|
||||
skipModels := map[string]string{
|
||||
"llama3.2-vision": "does not support multi-image input",
|
||||
}
|
||||
|
||||
for _, model := range testModels(defaultVisionModels) {
|
||||
t.Run(model, func(t *testing.T) {
|
||||
if reason, ok := skipModels[model]; ok && testModel == "" {
|
||||
t.Skipf("skipping: %s", reason)
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
setupVisionModel(ctx, t, client, model)
|
||||
abbeyRoad, docs := decodeTestImages(t)
|
||||
|
||||
req := api.ChatRequest{
|
||||
Model: model,
|
||||
Messages: []api.Message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "I'm showing you two images. What do they have in common, and how are they different?",
|
||||
Images: []api.ImageData{abbeyRoad, docs},
|
||||
},
|
||||
},
|
||||
Stream: &stream,
|
||||
Options: map[string]any{"temperature": 0.0, "seed": 42},
|
||||
}
|
||||
// Both images feature cartoon llamas/alpacas — the model should
|
||||
// note the common subject and the different settings.
|
||||
DoChat(ctx, t, client, req, []string{
|
||||
"llama", "alpaca", "animal", "cartoon",
|
||||
}, 120*time.Second, 30*time.Second)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestVisionOCR tests text extraction from an image. The docs image
|
||||
// contains the text "Ollama's documentation" in a header.
|
||||
func TestVisionOCR(t *testing.T) {
|
||||
skipUnderMinVRAM(t, 6)
|
||||
|
||||
for _, model := range testModels(defaultVisionModels) {
|
||||
t.Run(model, func(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
|
||||
defer cancel()
|
||||
client, _, cleanup := InitServerConnection(ctx, t)
|
||||
defer cleanup()
|
||||
|
||||
setupVisionModel(ctx, t, client, model)
|
||||
_, docs := decodeTestImages(t)
|
||||
|
||||
req := api.ChatRequest{
|
||||
Model: model,
|
||||
Messages: []api.Message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "What text appears in this image? Read all visible text.",
|
||||
Images: []api.ImageData{docs},
|
||||
},
|
||||
},
|
||||
Stream: &stream,
|
||||
Options: map[string]any{"temperature": 0.0, "seed": 42},
|
||||
}
|
||||
DoChat(ctx, t, client, req, []string{
|
||||
"ollama", "documentation",
|
||||
}, 120*time.Second, 30*time.Second)
|
||||
})
|
||||
}
|
||||
}
|
||||
385
integration/vision_test_data_test.go
Normal file
385
integration/vision_test_data_test.go
Normal file
@@ -0,0 +1,385 @@
|
||||
//go:build integration
|
||||
|
||||
package integration
|
||||
|
||||
// imageEncodingDocs is a 400x250 PNG of four cartoon llamas at a desk.
|
||||
// One is on a laptop wearing glasses, one writing, one reading, one sleeping.
|
||||
// The header text reads "Ollama's documentation".
|
||||
const imageEncodingDocs = `iVBORw0KGgoAAAANSUhEUgAAAZAAAAD6CAYAAACPpxFEAAAKtmlDQ1BJQ0MgUHJvZmlsZQAASImVlwdQk9kWx+/3pYeElhCKlNA70gkgJYQWQEE62AhJgEAI
|
||||
MQUFO7K4ghUVEVQWdFVAwUYRO6LYFsWGfUEWEWVdLNhQeR8wBHffvPfmnZk75zfnO/fcc+98d+Z/ASCbcMRiIawKQKZIJokM8qPHJyTScS8BDDQBHlAAicOV
|
||||
ipkREWEAsUn/d/twD0Bj/rbtWK1///5fTY3Hl3IBgCIQTuZJuZkIHwfI8lyxRAYACmFgvEgmHuP7CFMlSIMID45x6jijx+pQkyeYOp4THclC2AIAPInDkaQC
|
||||
QHJG4vRsbipShxSNsL2IJxAhnI+wd2ZmFg/hNoQtkBwxwmP1Gck/1En9W81kRU0OJ1XBE3sZN7y/QCoWcnL+z+P435YplE+uYY4MUpokOBLxusi5/ZGRFapg
|
||||
UfKs8EkW8MbzxzlNHhwzyVwpK3GSpcIo9iTzOP6hijrCWWGTnCIIVOQIZOzoSeZLA6ImWZIVqVg3RcJiTjJHMtWDPCNGEU/jsxX1c9Oi4yY5WxA7S9FbRlTo
|
||||
VA5LEZfIIxV74YuC/KbWDVScQ6b0h70L2Iq5srToYMU5cKb654uYUzWl8YreeHz/gKmcGEW+WOanWEssjFDk84VBirg0O0oxV4b8nFNzIxRnmM4JiZhkEAVk
|
||||
QA54QACyAB34I14KxEAIOCBHxl8sG9sQK0ucIxGkpsnoTOTW8elsEdfOhu5o7+gKwNgdnvhF3tHG7yZEuzoVW10NgNeJ0dHRk1OxkJsAHEkCgNgwFbOYB4Bq
|
||||
PwCXT3HlkuyJ2PhdwwAiUAFUoA30gTGwALbAEbgCT+ALAkAICAfRIAHMB1yQBjKBBCwCS8EqUACKwCawDZSBCrAHHACHwFHQBE6B8+ASuAZugrvgEegGfeAV
|
||||
GAIfwAgEQTiIDFEgbcgAMoWsIUeIAXlDAVAYFAklQElQKiSC5NBSaDVUBBVDZVAlVA0dgU5A56ErUCf0AOqBBqC30BcYBZNgKqwHm8HTYQbMhEPhaHgenAov
|
||||
hHPhfHgDXApXwQfhRvg8fA2+C3fDr+BhFEApoWgoQ5QtioFiocJRiagUlAS1HFWIKkFVoepQLah21G1UN2oQ9RmNRVPQdLQt2hMdjI5Bc9EL0cvR69Bl6APo
|
||||
RnQb+ja6Bz2E/o4hY3Qx1hgPDBsTj0nFLMIUYEow+zANmIuYu5g+zAcsFkvDmmPdsMHYBGw6dgl2HXYXth57DtuJ7cUO43A4bZw1zgsXjuPgZLgC3A7cQdxZ
|
||||
3C1cH+4TXglvgHfEB+IT8SJ8Hr4EX4M/g7+F78ePEFQJpgQPQjiBR8ghbCTsJbQQbhD6CCNENaI50YsYTUwnriKWEuuIF4mPie+UlJSMlNyVZisJlFYqlSod
|
||||
Vrqs1KP0maROsiKxSHNJctIG0n7SOdID0jsymWxG9iUnkmXkDeRq8gXyU/InZYqynTJbmae8QrlcuVH5lvJrFYKKqQpTZb5KrkqJyjGVGyqDqgRVM1WWKkd1
|
||||
uWq56gnVLtVhNYqag1q4WqbaOrUatStqL9Rx6mbqAeo89Xz1PeoX1HspKIoxhUXhUlZT9lIuUvqoWKo5lU1NpxZRD1E7qEMa6hrOGrEaizXKNU5rdNNQNDMa
|
||||
myakbaQdpd2jfdHU02Rq8jXXatZp3tL8qDVNy1eLr1WoVa91V+uLNl07QDtDe7N2k/YTHbSOlc5snUU6u3Uu6gxOo07znMadVjjt6LSHurCulW6k7hLdPbrX
|
||||
dYf19PWC9MR6O/Qu6A3q0/R99dP1t+qf0R8woBh4GwgMthqcNXhJ16Az6UJ6Kb2NPmSoaxhsKDesNOwwHDEyN4oxyjOqN3piTDRmGKcYbzVuNR4yMTCZabLU
|
||||
pNbkoSnBlGGaZrrdtN30o5m5WZzZGrMmsxfmWuZs81zzWvPHFmQLH4uFFlUWdyyxlgzLDMtdljetYCsXqzSrcqsb1rC1q7XAepd1pw3Gxt1GZFNl02VLsmXa
|
||||
ZtvW2vbY0ezC7PLsmuxeTzeZnjh98/T26d/tXeyF9nvtHzmoO4Q45Dm0OLx1tHLkOpY73nEiOwU6rXBqdnrjbO3Md97tfN+F4jLTZY1Lq8s3VzdXiWud64Cb
|
||||
iVuS2063LgaVEcFYx7jsjnH3c1/hfsr9s4erh8zjqMdfnraeGZ41ni9mmM/gz9g7o9fLyIvjVenV7U33TvL+xbvbx9CH41Pl88zX2Jfnu8+3n2nJTGceZL72
|
||||
s/eT+DX4fWR5sJaxzvmj/IP8C/07AtQDYgLKAp4GGgWmBtYGDgW5BC0JOheMCQ4N3hzcxdZjc9nV7KEQt5BlIW2hpNCo0LLQZ2FWYZKwlpnwzJCZW2Y+nmU6
|
||||
SzSrKRyEs8O3hD+JMI9YGHFyNnZ2xOzy2c8jHSKXRrZHUaIWRNVEfYj2i94Y/SjGIkYe0xqrEjs3tjr2Y5x/XHFcd/z0+GXx1xJ0EgQJzYm4xNjEfYnDcwLm
|
||||
bJvTN9dlbsHce/PM5y2ed2W+znzh/NMLVBZwFhxLwiTFJdUkfeWEc6o4w8ns5J3JQ1wWdzv3Fc+Xt5U3wPfiF/P7U7xSilNepHqlbkkdSPNJK0kbFLAEZYI3
|
||||
6cHpFekfM8Iz9meMCuOE9Zn4zKTMEyJ1UYaoLUs/a3FWp9haXCDuXuixcNvCIUmoZJ8Uks6TNsuoiFi6LreQ/yTvyfbOLs/+tCh20bHFaotFi6/nWOWszenP
|
||||
Dcz9dQl6CXdJ61LDpauW9ixjLqtcDi1PXt66wnhF/oq+lUErD6wirspY9VuefV5x3vvVcatb8vXyV+b3/hT0U22BcoGkoGuN55qKn9E/C37uWOu0dsfa74W8
|
||||
wqtF9kUlRV/XcdddXe+wvnT96IaUDR0bXTfu3oTdJNp0b7PP5gPFasW5xb1bZm5p3ErfWrj1/bYF266UOJdUbCdul2/vLg0rbd5hsmPTjq9laWV3y/3K63fq
|
||||
7ly78+Mu3q5bu31311XoVRRVfPlF8Mv9yqDKxiqzqpI92D3Ze57vjd3b/ivj1+p9OvuK9n3bL9rffSDyQFu1W3V1jW7Nxlq4Vl47cHDuwZuH/A8119nWVdbT
|
||||
6osOg8Pywy+PJB25dzT0aOsxxrG646bHdzZQGgobocacxqGmtKbu5oTmzhMhJ1pbPFsaTtqd3H/K8FT5aY3TG88Qz+SfGT2be3b4nPjc4PnU872tC1ofXYi/
|
||||
cKdtdlvHxdCLly8FXrrQzmw/e9nr8qkrHldOXGVcbbrmeq3xusv1ht9cfmvocO1ovOF2o/mm+82WzhmdZ2753Dp/2//2pTvsO9fuzrrbeS/m3v2uuV3d93n3
|
||||
XzwQPnjzMPvhyKOVjzGPC5+oPil5qvu06nfL3+u7XbtP9/j3XH8W9exRL7f31R/SP7725T8nPy/pN+ivfuH44tRA4MDNl3Ne9r0SvxoZLPhT7c+dry1eH//L
|
||||
96/rQ/FDfW8kb0bfrnun/W7/e+f3rcMRw08/ZH4Y+Vj4SfvTgc+Mz+1f4r70jyz6ivta+s3yW8v30O+PRzNHR8UcCWdcCqCQAaekAPB2PwDkBAAoiIYgzpnQ
|
||||
2OMGTbwLxgn8J57Q4eOGKJc6xI3JI9Y5AA4jw2wlACq+AIxJo2hfADs5KcakHh7X7mOGRV4xdR5d60kXntpUg3/ahK7/oe9/eqCo+jf/LwkHEGPG+ODYAAAA
|
||||
imVYSWZNTQAqAAAACAAEARoABQAAAAEAAAA+ARsABQAAAAEAAABGASgAAwAAAAEAAgAAh2kABAAAAAEAAABOAAAAAAAAAJAAAAABAAAAkAAAAAEAA5KGAAcA
|
||||
AAASAAAAeKACAAQAAAABAAABkKADAAQAAAABAAAA+gAAAABBU0NJSQAAAFNjcmVlbnNob3T1Q1G8AAAACXBIWXMAABYlAAAWJQFJUiTwAAACqGlUWHRYTUw6
|
||||
Y29tLmFkb2JlLnhtcAAAAAAAPHg6eG1wbWV0YSB4bWxuczp4PSJhZG9iZTpuczptZXRhLyIgeDp4bXB0az0iWE1QIENvcmUgNi4wLjAiPgogICA8cmRmOlJE
|
||||
RiB4bWxuczpyZGY9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkvMDIvMjItcmRmLXN5bnRheC1ucyMiPgogICAgICA8cmRmOkRlc2NyaXB0aW9uIHJkZjphYm91
|
||||
dD0iIgogICAgICAgICAgICB4bWxuczp0aWZmPSJodHRwOi8vbnMuYWRvYmUuY29tL3RpZmYvMS4wLyIKICAgICAgICAgICAgeG1sbnM6ZXhpZj0iaHR0cDov
|
||||
L25zLmFkb2JlLmNvbS9leGlmLzEuMC8iPgogICAgICAgICA8dGlmZjpZUmVzb2x1dGlvbj4xNDQ8L3RpZmY6WVJlc29sdXRpb24+CiAgICAgICAgIDx0aWZm
|
||||
OlhSZXNvbHV0aW9uPjE0NDwvdGlmZjpYUmVzb2x1dGlvbj4KICAgICAgICAgPHRpZmY6UmVzb2x1dGlvblVuaXQ+MjwvdGlmZjpSZXNvbHV0aW9uVW5pdD4K
|
||||
ICAgICAgICAgPGV4aWY6UGl4ZWxZRGltZW5zaW9uPjc0NjwvZXhpZjpQaXhlbFlEaW1lbnNpb24+CiAgICAgICAgIDxleGlmOlVzZXJDb21tZW50PlNjcmVl
|
||||
bnNob3Q8L2V4aWY6VXNlckNvbW1lbnQ+CiAgICAgICAgIDxleGlmOlBpeGVsWERpbWVuc2lvbj4xMTk0PC9leGlmOlBpeGVsWERpbWVuc2lvbj4KICAgICAg
|
||||
PC9yZGY6RGVzY3JpcHRpb24+CiAgIDwvcmRmOlJERj4KPC94OnhtcG1ldGE+Cts1PlUAAEAASURBVHgB7d0JvH3XeDfwHWKepxpiiHlMEGNMiaEorVkjLRKi
|
||||
qHmoWSXkRVExV2poQlBSQhGqSkJFqBBDRYyJMeYppiLZ7/Nd8hz7nv8Z9j333HvPuXc9n8+9Z9p77bV+a3jG9aydmqb5ZfxVqghUBCoCFYGKwKoQ2Cmubld1
|
||||
R724IlARqAhUBCoCgcDZKgoVgYpARaAiUBGYBYHKQGZBrd5TEagIVAQqAs3OFYOKQEWgIrCdENh5550bf2c729manXZixa/Utm1zxhlnNL/73e+aM888szcg
|
||||
1QfSG6p6YUWgIrDMCJz97Gdvzn3uc5cmWCgtmBbO7U6YqD/4nOMc5yi4/OY3v+mFTWUg23301PZXBLYBAjQOzOP//u//ipS9DZo8UxMxknOd61xFQ/vlL385
|
||||
lYlUBjITzPWmikBFYFkQYKo63/nO1/zqV78q0vWy1Hsz63nOc56zaCOYyCQ6e/x40KQLRv12qUtdqrn61a/eXOMa12gudrGLlUt+8Ytf7HCpSlzmMpdpLnzh
|
||||
Cze//e1vm9///veNe91DXfr1r3+9wz2L8oU6/smf/Emxk1Ln1oMueMELNpe85CWb85///E0fbr8edahl/gGBvfbaq/H3ta99rYzVisvWQeC85z1v0TysP5X6
|
||||
IcC8l34i7ycRI2Cvv6tc5Srti170ovZTn/pUe8opp7SnnXZaefX5JS95SXu1q11tRTm77757+9nPfrY96aST2lvd6lblt3/7t39rY5K2Bx988Ipr+9Zho657
|
||||
1rOe1X7zm99sn/jEJ65bPf/mb/6m/cpXvtJ++MMfbi90oQut23M2CrP1fk4sBO0FLnCBNkwRM2EV6nkbkmgpI9T0QRm77rpre/LJJ7c/+9nP2r/9278dfL/e
|
||||
7anl91t31oJTLIKlz9dSxna9NzS3qdj1DuPdc889mze/+c3NX//1XzeXvvSlm9NPP7358pe/XF593nfffZt//dd/bW5xi1sE3n8gWgYp2x9uhkjbsVgOnFl/
|
||||
uHLx/nOuqed6OtnYGj0jFsUaDTJlCHDu/fM//3Pz0Y9+tHnKU54y5erRP8PZGD3uuOOaYN6Di2jPX//615sf/vCHzbe+9a3B9/XN8iNg3eEwXw0xeRkr5uZ2
|
||||
JtFY1j/r+DjqFcbLzPKCF7yg2WWXXZof//jHzUtf+tLm3//935uf//znhTn8xV/8RfPoRz+6mKue//znN/e85z2b0E7KM1UiK+KL4c/jKrbZ3zMpUd1Gmebm
|
||||
VTedk3jMq8ytWo5F4OIXv3hzuctdroy5WdrJjGosX/ayly028SwD4zBmLRjf/va38+v6ugUQwAz6mK7CWtLc/OY3L8Ix8zVB1/wM60rzspe9bF0FyUWG2fqE
|
||||
gYwzY/ViIPe73/2aq171qsWOGKan5o1vfOOgzTSRQw89tAn1v3ne857XXPnKV27uf//7l/eDi3q84Ru52c1u1lzhClcoi+pXv/rV5iMf+Ujz05/+tNytQ2k3
|
||||
JNGPfexjpUPvfOc7l0n/6U9/ugkzULmOv+U2t7lNE2aJIk3+53/+56CMrMZFL3rRMljUVdTBqaeeWp71gx/8IC8pDje/cbx1yQKmnhYhC9LnP//5ItGK7phE
|
||||
17rWtcozRYJ87nOfa4499tixnaKc6173us0NbnCD4j9Sr//5n/9pvvjFL458hOuuf/3rFyy+973vFSmdLR9pg/qaFN/4xjcaWCXRgGDKwfi///u/xf5vElmo
|
||||
3f+FL3yhueMd71j63r2w1N8mZZgkm912262MiQ984AON/homk/JGN7pRqdd3v/vdghNJPwn+1772tYv/54Mf/GDxq/FDqJdnwwjjoOHe+MY3LpOa34wgo+/1
|
||||
G/wRSfMmN7lJ45na8/3vf7/5+Mc/PsDMeLjpTW/a8MvxaYU5trnTne7UhOmqCVNluddvyunWkR8MJsal/lYvWlD2N3xp55e4xCWaMOs2YbJtbn3rW5f+I3x8
|
||||
4hOfaD7zmc9kk+vrJiAwzYpgHIapuvnQhz7UHHPMMWUt+8lPflIWTn1vTfF5Ellk7373u5cxZozSdMN0P+mWpfhtGnYaMdHmy1b8H//xH20sAO273vWuNhbw
|
||||
kdfHxGvf+c53lutioSnXxSJY/B2x+LQxqcp9Rx99dBsLYhuayqCcP/uzP2tjgWxDEmyDYbTRWaWc6Mw2FsdyHf8Kv0sskO3f/d3ftbHgFJu1ssLs0D7+8Y9v
|
||||
Y5KX75WhLHX2vFhwBs+KAdH+93//dxsLTHmWa5V5/PHHD+oIkwMOOKANDav9y7/8y8G99773vdtYDNof/ehHbWhipZ78QPw6wVgG1w1j+pCHPKT90pe+VJ6p
|
||||
beoWGl37mMc8ptTxk5/8ZBuDtNzPxv/CF76w4JbP8LxgHu3Tnva0Nha5wXMucpGLtK94xSvaWEhLfbTFtbGotg996EPLdeyY73nPe9pY+NuXv/zlg3vVMRa9
|
||||
0h6/PfzhDy+/vfe97y3tDnNRG5JXaaMyYRQmzDYW1OLvSgx8HwyxDaY9KPs85zlP++xnP7u0QXvVS1uCSbUPeMADBtc96lGPKvXS9zDiD3KtPv3Od77TvuY1
|
||||
ryntvetd71qui4W9DUZV+jsW8IKhdoRW0b7+9a9vQ3so9U0cYPbYxz62PE/5MbELVspwrc+PfOQji//Jte7T79l/d7vb3cq4zLGiLe57+9vf3kYASbkuFo72
|
||||
3e9+d6nfq1/96vY5z3lO6QPXus+z+LqyzPo6eb2ZNz78ZvpoUrm3v/3t27//+78v11hHDjvssPZ1r3td+Xvta1/bvuENb2iNVWvcuHIe8YhHtG95y1vKPPir
|
||||
v/qrMiaud73rDa43v7u+u5zH5nuuqX43X/MZPoeAMvic1/HjWZfzOq8hYJe/7neu8b3v8nl5red2rx33Xhnde4evm6qBkMBIfCQtkv84eyI1MRbhIimSzmNx
|
||||
K5pEPHAiXelKV2pe/OIXFwmZFkHKZX/cZ599GlL7P/zDPzR3uctdirRO6sMRY5IXSS8WxOaWt7xlo4xYmJo73OEO5XcqJ0k0OrBIwLF4NM94xjOK+YL5zfXa
|
||||
EhO/SKT3ute9iub03Oc+t/nzP//zYguPhbRIorQFdOW456CDDirtYr5729veVt4/7GEPa4IBFnMeuzqVr0skdTZ7mhNJ9/3vf3+Rckn2JOGUZN0D4xjIRYMj
|
||||
vZJiSLykeFpVDOKiEUUgQ5GOaIPB4IpWEIO8+KRI6iRg5cQi3ARTL5KzZw33HSw932+p5pOymSbVO5hDEwyqCeZdJHSSdizqRfvij6CpqBft5slPfnLBlMYW
|
||||
k7B58IMfXKQ2fUFzcm8wgiaCE0q5wdTKM13PdPS4xz2uaBzqTGPyTKbRY0MLoUlEkEbBmQmK1nD44YcXTYA2pF9If7SmmMSl/25729sWzSGEjWKGOOGEExrj
|
||||
RV95HtMEbSyYd+mPxCFVdZpZMPKizdAgjAe+PM8hlSrLGGXShRkN3FjUnle96lVFi7rd7W7X0HaDiRXJNhhkd2jU9wuCgHGub2nxIkuNmRwH5oi5ayzQWo29
|
||||
YbLWGVdPeMITBlqHcUETdi/z/nWuc50yv1lrrJPGrDm+a2jGrnn605/eGLOxWJc55nqWHGtH1sX8V09jytyzXlmf7nvf+5axR3v+r//6r+ZNb3pTWf+e9KQn
|
||||
lfEZwl4Zl9wLNHfmWmQd87dWmsiJYiEu0jNpfv/995947X777Vcis0iS17zmNdswT0zVQAK0oiXg9CmFR4PaaGSR9mgdyqJdiJQhmZIOkitGOHGJ9CIZhsmr
|
||||
jcWs1BGXpgml5qRM2gwtiUTR1Ur23nvvlnSrjDChjGxjLGbl2doW5rzBNeofndDGojEyYoEkT0oXjbbHHnsM7qMJkXppZzQQ9YUXiVUbaRvq7I9UEoyvtCUW
|
||||
vTYGT2kLzUOdaWR5LSkJPjE423e84x1FanpHSMykehF0eZ1X5ZD+/UZC9522eL46XfGKVyzfwZqW5Vlw6mJE06CF0bD0EVxF3bnWeOg+L0ycRfsiwZOsRDzp
|
||||
n9Qg89owWRWNEG6k+vyeNknzoaHldyQpY4cWQJPtfh/O8tK2jPgj0YX5qWgGMdkH14b5tPSPumSdjREaRDCw1u9ZbjCF0kd+e9CDHlS+P+qoo0p7aX7BwAfX
|
||||
BrMpeCnXeM4y6uvGaSF9NBCa+D/90z+1YdIcaLXDfWT+3fCGNxzZh9YgYyCl/e69tFjjMxb+NoSoMo+8N75oOyHMFC1Y1GeYhMvcdT+NhnbcLeuQQw4p9XMP
|
||||
awgNnXYVwk6Zd9a3rId1JwSc8txnPvOZbQjprXnld+upOpuHwcBWPKP7PO/XrIH0sYHFg3Yg95GopxHbNxs64uegvYQJpHBinFcZvs8NLT6HSa1Ife4h3cZi
|
||||
VezkomtwW+R6EiapIhaOIrGTOmkzSGw4zcqzlE86x8FpP6NIUIBrAtAibZP4aQfqnvUfvk+50VmlDa7x/CR19d097nGP8hUNgJTuHk5d5SfRanwmkdMIaVf2
|
||||
4WiDenV9Usoh3YS5sUjFbLNGyGqIREQyZ9NHJOwTTzyxaDbBKIpGkOWRpmLRLdjpGxoQiSxMOKVf1NX32iBqj7bDn0Zr0b/qRwIkNSVpE8mKxOc6pE7KMa7c
|
||||
k0SKo5Ei19BQSH760bOQz8g4UAbS16NIndi8+Xc8K5hwEwxgcKk+M65oaLSUWBzKdSRHmhJfVZI+JokaZ/qt0mIiEKbLMjb0+6T1btxvxpmxZwwk0YyNMVYU
|
||||
c4lWzx+GjAX3WPtorsYUTcN6QhN2j3WD1t0lY5Ofxj1eWR+sX7SRP/3TPy11UA/zzzpi/Hkunx3txtqqjcF8SjuNa/evhUbPok6JwNUoE08FJpGJ4joLjolj
|
||||
Mk8jnUL9C7thcaia7DoC+BbDnPBZjoVo2LHtO9cNf68eKDvea0jPRTW0sFEHAe4Pc/B7XpvPy1cL6L/8y7+UxYoZgypo4bMoUhuZm3Rsl3QOc4n6hWbR/am8
|
||||
x/yyfV45i73CDhPpEtMOBzamAVdBBwZpSMI7OPgs/Ln4W2zzGd3ypr23MHcpmbmx4H1S4uazdmYbtP3wMDN1ST3UB1MwkF2PTKbsq7w++3Ja3fVXaD4NMyXT
|
||||
kslp/PhTvjE0rk/zWd1X95iQ6u++DEbIa3wn1Fe9tINA4Tuf9U+XYOP5SF9VWkwE9BMTp3VgFiLAEpiYhkJjLmMnpP7CGAhc1hzjg0BkXTO/PUvwC/NoWCbK
|
||||
d8aKRZ8pnAA8bPJUBoGYOZWA4xrBG8Y+E7731jVMA3O4d5jm33300WV91UZrinoyP7uGiV9wzFpoKgOxQAFI5UjIudgOPxQwfkds/UCymEwjdmT2RQsLv4So
|
||||
GloEO5/oLzS8AAByFA1/3/1s0cOl2actwqQCCz9QSaY6oHv9cPnuNyh0Hoano0jS7N6kUX/Z8XmveufCO2pwdiUW1+pkBMvhBaf7nev8uaf7fT53Na/j2jz8
|
||||
fX72mu89Z/h9ttcAxUBEpOQ16muSaFuYzVZoEsN1znuGvx/+bIyFA70IH2HSK9opxo4B2pukj1ZDnttlOqP6DeZIWzGcrGu+5vOGP+f39XXxEAgTVpnD5vUo
|
||||
6mq9w78bLyJQ+TEs8IQPAh9mYIzwYb7yla8sWjEh1Npo/qaWIVuHtQXxkfKr/uM//uNAwMrnEbJca0tFmJeL79hzMKmnhgbz42AO5p15xnccQTpFWCbwEDoJ
|
||||
lWEGbiLQozAf9aBNr4WmMhASp4UWMLieSWnCDhN1SuipSUMly4Vi+Lr8rJHIxkRSHGcvxyvAEWZkUVfOWidiPouWw7TBWa8d2oaYg9IMUr4Y8Q9Dw3g40Zgp
|
||||
1MnihWmELbwMPiGpXROGRQwzpLnh9sNE8kgp3G8WwJSADRAhoUmcyrQZgwiDNkhhpQ40P5pQEic605iBQw1OTKm2XSJp08K6dej+vtr3GIM2eJ5FllbWNf8o
|
||||
L01StLVJk3LUs3McZH+6hqOayo9pMKVlWK/fBEQMM+JRZbg2yfWEJvWjJek3ARNJmL6+UAeClf7IMvOa+rpYCPTtH1YC8yl8D0Urdp9+NtfMPWN7HBl31hBj
|
||||
w9hJC4Drn/rUp5bAHRpqzgdCK6ZFgLVOWMzNB9o0E+6xETwyTOqBATCFm2PGPOK8t71A2ead8gi0BHKC8v777z+waGBg1nPP6tZx+Fn5OTHIz8OvvfTqI444
|
||||
oixQpLFw7jbh4Gkuf/nLF2BVHMcU5eR3atcoBjP84PxsQuokFc3JbmETuTBv0mm5WOagwlBEAGU9xj2T3ZDdkW9h1113LXXG/am+ytKhuVBnGUwzGIr2sZfT
|
||||
qNTBQKB2+i5NN7CL8OKyeKmTyB1MC2EQEWZb8GVS4R8g3VBHDWz1t9gh/WLAGswWVkyStK9u4QQsGpNnwdgz2ErVbx6kXH4AjEv5olkwXaTNIsMMan4DAzj7
|
||||
YtqzE19YJYPNe9KE5HP2qfcPfOADS0RNanW+I4wk3uN8EurFfGYPkvEockw/KdsYIeSI1FEuX1ylxUYg15Y+tSQQRFBI0ZpTs/fKVHTggQcO/KvjyjJuSPTD
|
||||
C7O5R8BL5uF+/hDzlwkJ80CYF4uMSFFzdpgIlMqw7iTzcI0xjfmpJ+ZljjCP80daG3wv+ivJM4frmL8Nvxr3k+bpVA1EgSqLy2kYDomJmEg4qsWOecvEUzEb
|
||||
crpAmYTJGJSlQt3vbNxhC+TkOfLIIws4tB2gA0YHJuV93YWiW+a47/P51EPPoVKSKoFIq/IMphb+l+Ey8tnujaihYirBIC0wmAFp3yuu3pV+876IKS8b1vgs
|
||||
hN2Ski20tBL2TxKuZ6qDAQxj6iwzGZspTL3uGkzLAKXaesVIqMWcbxHpUZiFhZsEpI/8LtzXBBIyK3gAkxGGS5JSH8yFhmTgZru9wis/Zzumfe8eY4DPjNZD
|
||||
pRdeCF+DW/1pXBZxAon+df2oZ43qU4NYOUJ8aafS6tBwMHUTkXaFMZG4tM3ztA1DzbaYSDYJYgA2EdKG9I+JOdw+4ceeA3vl6ivM8Lph4tDfEc3SvO997xvU
|
||||
v287Es/6ujEIGGfmVleQmPRkc3jUPJ50zyy/DTvIlWH+MzuNI2O1L2FYBLa1UM4JGI6jXhqIm0Xb3Oc+9ymTn4qFcViovGYkkN9JxkkmvUlLpcpKWKh9TvMR
|
||||
NQ4wrrOwm9jKwPFJ8L53r4Uwy7IIdSnLTOkyf/MMz0o7vGglC4NFjq1TVBOTFOboOuUPl51lWZCZqjA8dkgSLjMYqVS5Ol59h4lGRnsgmcCDucxiR2OLMLoy
|
||||
sDGE1AIs8OojIoN2h0Fh0BZGmh4NKElbaBsGi75wLa0Ffuqa9k1qLz+ThdbiZwHFPGkgJB3PzwmmDd3+yWfBNrHM77zCK7FLSSXxsLtde5nTxLVbhDHhNAl5
|
||||
ZpaZ7c+ys++6mBorNBiLNQZrZ7m2i3fXP+y8GCWtTH/AUN/nWPMMe47Ui+biWlqaNsBAXRIH+GD29gppFwc9E+33Ay+4065gYpKNw8zzstzhsZntrK/ri4D+
|
||||
TEFlfZ+09tKH58DaS1xbCQRdY39SvXaKR6zafkH9Z1Yh0VKZcM6uSpXVTlOJz9Q0E5lpw6JrsXZvkgXQQsn+TOrW6WlmIEliIn43YTEAC0NSlmkB4ERKspDQ
|
||||
Kgyirkqo7qRu5VoolElqRRbZ7qKVZeWrevFPWKwsPKR+7Z9GFm6LqYXLM9VH3dRR23zOBVhZpF0pFvgoYEf6HrcIYeLq5FWbMK1RjJDWQyKHu4VXedqhTb7T
|
||||
J4mlhQ/OScr2B0t22xxU2pW+Fd/nAuw+ZiubNv2uXzyz22/MZ9qnru7ttt/YgoExo01JnscxjlHSpGgUCI6epU2w0ofj2gL3q4ZmccG4hwPSmEstOnHI53nd
|
||||
NbQZ40XbPK87lvye0VjDmGF0Wa72G5+VNh4B6w0tRP9U6oeAsWueWBO683L47pkYyHAh9XNFoCJQEVhkBAgzBEUCTAo/i1zfzawb5pHCW1cgHFWnykBGoVK/
|
||||
qwhUBLYcArRXmgitltZbGcnKLsY4WI38wWga83B3ZSArMayfKgIVgS2MAHMtk5bFElUm8ofApuxy5nTMoy8ulYEkcvW1IlAR2DYIMGfl37Zp9ISGYhiTfB3j
|
||||
bq0MZBwy9fuKQEWgIlARmIhA7zDeiaXUHysCFYGKQEVg2yFQGci26/La4IpARaAiMB8EKgOZD461lIpARaAisO0QqAxk23V5bXBFoCJQEZgPApWBzAfHWkpF
|
||||
oCJQEdh2CFQGsu26vDa4IlARqAjMB4HKQOaDYy2lIlARqAhsOwQqA9l2XV4bXBGoCFQE5oNAZSDzwbGWUhGoCFQEth0ClYFsuy6vDa4IVAQqAvNBoDKQ+eBY
|
||||
S6kIVAQqAtsOgcpAtl2X1wZXBCoCFYH5IFAZyHxwrKVUBCoCFYFth0BlINuuy2uDKwIVgYrAfBCoDGQ+ONZSKgIVgYrAtkOgMpBt1+W1wRWBikBFYD4IVAYy
|
||||
HxxrKRWBikBFYNshUBnItuvy2uCKQEWgIjAfBCoDmQ+OtZSKQEWgIrDtENh5PVt89rOffT2Lr2VXBCoCFYGKQA8EzjzzzKZt2x5Xru6SuTGQ85znPM31r3/9
|
||||
5la3ulWz++67N1e4whWaC13oQk1lIqvrkHp1RaAiUBGYJwIYx69+9avmO9/5TnPyySc3H/3oR5vjjz+++d73vrfmx+wUJayJLV3iEpdo9t9//+a+971vs9tu
|
||||
uzU77aTIShWBikBFoCKwqAicdtppzbve9a7m0EMPbU488cSZqzkzA6FZHHDAAc2Tn/zk5opXvOLMFag3VgQqAhWBisDmIEAzOeyww5qDDz54Jo1kJgZyyUte
|
||||
snnJS17S7LPPPpvT6vrUikBFoCJQEZgbAkxbD3nIQ5oPf/jDqypz1QyEtvHWt7612WOPPVb1oHpxRaAiUBGoCCwuAqeffnrzoAc9qDnyyCN7V3JVDORSl7pU
|
||||
8973vre53vWu1/sB9cKKQEWgIlARWA4EmLTuc5/7FP9Inxr3ZiDnOMc5Cme6293u1qfcek1FoCJQEagILCECorP22muv5otf/OLU2vfeSPjQhz60qcxjKp71
|
||||
gopARaAisNQI8HG//OUvb855znNObUcvDeSyl71sc8IJJzQKrlQRqAhUBCoCWx+B+93vfs0b3vCGiQ3tpYHwzlfmMRHH+mNFoCJQEdhSCDz+8Y9vzn3uc09s
|
||||
01QGYje5TYKVKgIVgYpARWD7ICBY6ja3uc3EBk9lIDe/+c2bXXfddWIh9ceKQEWgIlAR2HoI3OMe95jYqKkMZBoHmlh6/bEiUBGoCFQElhaBm93sZhPNWBMZ
|
||||
iLxW173udZe28bXiFYGKQEWgIjA7Ape//OWby13ucmMLmMhAznWuczW77LLL2JvrDxWBikBFoCKwdRE473nP21zmMpcZ28CJDEQcMCd6pYpARaAiUBHYfgiw
|
||||
Ql3wghcc2/CJDMTN9TyPsdjVHyoCFYGKwJZHYBIPmMhAtjwytYEVgYpARaAiMDMClYHMDF29sSJQEagIbG8EKgPZ3v1fW18RqAhUBGZGoDKQmaGrN1YEKgIV
|
||||
ge2NQGUg27v/a+srAhWBisDMCFQGMjN09caKQEWgIrC9EagMZHv3f219RaAiUBGYGYHKQGaGrt5YEagIVAS2NwKVgWzv/q+trwhUBCoCMyNQGcjM0NUbKwIV
|
||||
gYrA9kagMpDt3f+19RWBikBFYGYEKgOZGbp6Y0WgIlAR2N4IVAayvfu/tr4iUBGoCMyMQGUgM0NXb6wIVAQqAtsbgcpAtnf/19ZXBCoCFYGZEagMZGbo6o0V
|
||||
gYpARWB7I1AZyPbu/9r6ikBFoCIwMwKVgcwMXb2xIlARqAhsbwQqA9ne/V9bXxGoCFQEZkagMpCZoas3VgQqAhWB7Y1AZSDbu/9r6ysCFYGKwMwIVAYyM3T1
|
||||
xopARaAisL0RqAxke/d/bX1FoCJQEZgZgcpAZoau3lgRqAhUBLY3ApWBbO/+r62vCFQEKgIzI1AZyMzQ1RsrAhWBisD2RqAykO3d/7X1FYGKQEVgZgQqA5kZ
|
||||
unpjRaAiUBHY3ghUBrK9+7+2viJQEagIzIzAzjPfWW+sCFQEKgILhsBvfvOb5vvf/35z2mmnNd/+9reb78Zrs9NOzZ/92Z81V7ziFReststfncpAlr8Pawsq
|
||||
AtsegR//+MfNIx/5yOaTn/xk89Of/rRp27Y597nP3Zz/fOdrTvrCF5rHP/7xzT/+4z9ue5zmDUBlIPNGtJZXEagIbDgCO4WWcc1rXrO5yU1u0uy2227NFa5w
|
||||
heZCF7pQ86xnPrP5STCUv/zLv9zwOm2HB1YGsh16ubaxIrDFEbjIRS7SPP3pTy+tfPOb39y89KUvbX73u981Rx99dHPwwQc3N7zhDaciQGvBiCr1R6A60ftj
|
||||
Va+sCFQElgCBK1/5ys0HP/jB5thjjy1ayWGHHdZ86lOfmljzyjwmwjP2x6qBjIWm/lAR2GQEQiLmAK7UH4Ff//rXzSte8YrmUpe6VEMTue51r1sc6jSUSQTl
|
||||
ykQmITT6t8pARuNSv+0g8NnPfrZ5//vf35x++unNzW9+8+Z2t7vduqj6Jv9//Md/NCeeeGJzmctcprnDHe6wbSJnYHvSSSc1Zzvb2ZprX/vazXnPe94m2Eez
|
||||
HuzjzDPOaD4fz/r5z3/eXPWqV23+5E/+pPT2si+gxs9DHvKQ5g1veEMxZ33sYx9rXvWqVzXf/OY3m+985zvN8573vOZP//RPOyP7j28L1pVZ/xGQVbyD3ci/
|
||||
cEK13/3ud2NcVdqOCPz+979vn/nMZ7YXvOAFB+MjFrj2gQ98YPuLX/xirpBEyGV7+9vffvAcY/LSl750e/hhh831OYtYWDDN9nrXu1678847t+c85znbcAS3
|
||||
wbBLVc8888y29Tcn+vznP9/e+U53aoNBtfoyQlvbWGTnVPrmFhNMor361a/eXvjCF24vf/nLt7vvvnt7l7vcpf3bv/3b9p/+6Z/aL3/5y61rvva1r7UhFLXH
|
||||
HXdc+9GPfrT95S9/GRDPD+PNRWH+T7/b3e62Yl4O8YvRzMNFy8xA/u///q99zWte0/75n/95e+c737l9+ctfPvdF77e//W37jne8o33CE57QhgOv/dCHPjT/
|
||||
3tvEEsP5OBg4ERLZXvSiFx18fvSjHz23moXk2N4pFrUcmCERt+c4xznK53PEovqv//qvc3vWohX0kY98pCx42fZ8vcAFLtC+973vnWt1v/CFL7RXutKVBjjn
|
||||
szCS1772tXN91mYVRuDFKGIvSPu7mJ9d+pu/+Zv2Epe4RHvJS16yDQ13gPtRRx3Vvay+H0Jg2zEQC9J+++23w0SJzUTtD3/4wyF4ZvtIkiHd5CT0Snp84hOf
|
||||
2Eb0x2yFLtBdH//4x4uUql0RFlmkNRNz7733Lm3W1g9/+MNzqbHFK3HUb9/61rcKYw47dvn+cpe7XPluLg9boEJ+9atftTe72c1KG89znvO0/+///b/2oIMO
|
||||
GuBOO4DFPMicCPNNeVZEGrVh6ilSOS0P9hbUr3/96/N41MKW8T//8z/tm970pjZ8I+3b3va29rKXvWwbJtmilfStdGxQbMPUOLd+6fvczbxu2zGQ5z/veWVS
|
||||
mBjXuc512gjhG3x+wAMe0J5xxhlr6o+wVw8mo2dY6KjNuQgecsghayp/EW7+67/+69Ie5qtPfOITgyqZPOGQLL/d+973XrPqz3xw/etfv5R3rWtdq/3JT34y
|
||||
eBbN4+xnP3v5zeK61Yj2mmPmKU95yqB5L3nJS1qLvN+e9tSnDr5fyxtY5rMwj6QjjjiimLL8RuPc6vTpT3+6mLkwzJve9KbtD37wg6lNZsp9+9vf3lpICTPn
|
||||
O9/5inn1YQ97WPuzn/1s6v3LfsG2YiCkKCqqCXGjG92o/d73vtda8O91r3uV79iZ0zQwq93TYpaTkT+ANvK///u/xebqe2oyO+uyUjgdSxu0RfuG6aEPfWhp
|
||||
P+byla98ZfjnVX3+z//8z8ECFrH7K+6lyaWEvscee7SkaDRrv60ofAE+3Pe+9y04Gi+nnnrqoEbMr7e4xS3Kb7EhrtciN7h5xBs4RuBDKW+XXXZp+ZuSIvXH
|
||||
QMAyX3zeysS3dP7zn7+FK2ZwzDHHlOYaU6PGVQR0tCwXOd+HXx/72MduZbhK27YVA3nhC19YOpsN/X3ve9+gczEWk8cAuOMd79iSKmYhJgXSi3JuectbFgdc
|
||||
lvNf//VfxYzltwMPPDC/XrrXlIzZxt/97nfvUH++HoxYO1/5ylfu8PtqvnjUox5VyhleRLOM7E8+GNIjGjXR8/pleY10G21EQJW23/Oe9xxUO9v2ute9rvwG
|
||||
47e+9a2D32d5Q7ghNSsrF7zugpm+Lo51zuWtSvxNxhmf6PHHH98+NbS75z//+W3sGWkjMm2HcXX44Ye3F7/4xQf9wAe4zz77tM9+9rOLWReemFAfLWaZMd02
|
||||
DIRTm01Tx1JPSXIoJyVHt9/Ym0kWsxBnvDK6mkyWwzSWdmZ+A+aZZaQnPelJpY2c2bSRYaLRiXaBw7777jv8c+/PNAqahXI40bOfugX893//94BZsV2jUdd1
|
||||
71mG97GxrT3Xuc5V2p6aV3dRJ6jAHzacv2shJjHlMAfG5rpSVPdZBJ80mfERbFWKMN4SnAFXQqBILdaK+93vfu3vQ0tLgo3owxSSYOca5tukZzzjGQVT2ozg
|
||||
hK1MkxjIltqJHmaA5nOf+1z0d9OEltGEo7e8z/QE8uGEJNbEwlVSHJQfV/EvGEQTzrdyhw1K4VBecXdI7E1Ik+W7k08+uQmJecXvy/IBjig0tiYktvK++y8m
|
||||
TRMLf/lKG4NRdn8u7yNYofna177WfPGLX2xOOeWUJnwbO1wTpr9yjR9ufOMbj9xbEo7k5mIXu1i5NyTp8pr9WT4s6T+YhIBT2iyHU1K2LRa45gY3uEH5Opy/
|
||||
ZczmNfkq62yE5Tb2O0giqEzZaIcpQlXLV7Fglo11PnhOPutqV7taE9J1ucaen61KEiqGP6/MYfM4tOvyOaI1m7Pt/IctccE8yh6SsCA0YaVowrfZRJBH8/rX
|
||||
v77sak9sgsGXtxEt14QpN7/edq9baiOhBcbmKBMjzEs7dKaJalJG9FDzgQ98oHna0562wzWTvgh7/yAlQqjBJdvn8PW3utWtGoPKxrCPHndcEzb84UsW+jMm
|
||||
KRU2CvW8CSl5ZH13j4R14ZhtvhMps0OFL4w5JLHm3//930sKCZviTo++OCMmZEhyjZ3AEZdfNiH+xV/8RWMxw2DCCVnKx0BGkYR4GEj4sgb1GnXdsn0XEW2l
|
||||
yphx2ON3qL4xbKyGv64whghPLZsqYf3Od76z/EnPgTGH5l02INp8GGG6RbAhLMFUPqgUqmKvSVkQPSwk5gEDwTxCKm9+9KMflQ13O1RmSb/ADLSJwIgZhOra
|
||||
RKBGE+btHVoEjwCkecELXtA85znPKb8ToDCO29zmNiuuJxyF76R8Z02x63270pZiIKQxZDKEiWWHPrWQ7bXXXoWBWOw+85nPlAFG6rLgmZwGmjTQEeJXBhtN
|
||||
w8LnuwhtLQueAXj7MTtaTWASHYnw+JAMl41IxSYdCvV+bPX3OEs6Pv0Xvyi71DHviOgZqWkoxEKHYYR/pQkbchMRLI0JbuLSCnfdddeRzwpz42DRk6Z7q1Ay
|
||||
adIrgWMUhVO7fE0ooumF76ksbsl8hu/Rd8adv/BNNfe5z31KFlrMB0VE4uCW1D58gfFkqg9zYKsQvCIIpDBKTASOf//3f988+clP3qGJ8IhIqyZMU+U3TOHf
|
||||
/u3fmj333HOHazEV2h6KSMTCvLsMeYcbtvAXW4qBMJcgksO4xS8HBPVfWgPSRJE+xnQyZiE9dNhAm64p4Drx3SgisZuoJrH6MO9YIJeFLEJpBiH9j6PYP1CY
|
||||
qmudw+A+BK8Iy23CB1WkYRI2LYP2Bj/M2uJpImPKyGRlshlFYbcfLLCx+53oXCTFUdcu03cWNGTxHqflYaowgvHDH/7wFRrYVa5ylSIMEXCYGWkhX/3qV5vw
|
||||
GTXhIG5ij0nzL//yL2URTBOjcTyKLJ5phqE5b5XFEDbGJkwIi8xRsXdrFAQNpvyIRzyi4GjMHn744TswD2NcObQUxLwa/oHyvsuQyxfb5N+WYiBs6ogGQtsY
|
||||
JhPMwECk35S2SF8WMKq8BdCE9ZsTzUxEpgJ/OUgskJMWV6aHiKIp96uTfEPLQkxYtDAEi3Fk0bLowMrE4v+JUOkyYZlO0v/UvR+WpMIXv/jFTYTvDhgV3Cfh
|
||||
mWV5DpMYprLsZPwhuPkbRRgr851xmBqLsRSZD4qvLf0W3Xst/p8In8khL3pRc+SRRxZTqt9hhiGNo5wvxdQTZeRYH3f9MnwPH766v/u7vysYM2uPmovMfK4x
|
||||
V435iMwqedh8b3zzMfGZnHDCCYXR0GQQ4ZOWhzEzcyk/oraWAZq51XHHVXZuRW9sQSSwdNSy3Q8T00mEjJbEan4jjXC03+Me92jYhknUzCUms0lEaovQ3+a4
|
||||
j3ykeUtMRH6T1FRIaQZRSm3Dz2LfRyTvZWMg2Ub1n7SIYABpUjJpItKn+au/+iu3jSXStqNFb3vb2zaRm6g4K+H85S99qZjBxiW6m1SPsQ9b8B+yTfDuYt6t
|
||||
9s9j/FjEkuDrVD1jdRwp98ZxqJJMtJJechwbqxgWM22axYbv7zK04d+W9bN2v+LlLy/WiAMOOKDh+Oa3s+inUKJtki/yKyFrB1MrHydTrjK+FOOTYDVM1gEC
|
||||
kb+IzmykkY8NuM2DH/zgYgUZvn6rfg6bwOh8WCEVLk0yxR9FipJQKUtbhNih6Pj2uc99btlElSGTIWmVzXHdkLxy8YR/MYlL6oPIkjrASoK2cSREOJ8XUuC4
|
||||
yxby+5Cq2pBUSzvDVjyyjifEzvSQjMs1UmHMmtJEOgk5n4w/OZpCQxz5PHH7rgnGs+YsAiMfsAlf5mZM7Yb5MMXitCK55GMe85hWmPpqKRbGktMOfkJWwwQz
|
||||
sgjYumbvSFUTzGTkNcv2pXGpTSG4lJBd49qem24i0DgKd7Afx7Xj/uyjCf9pG6bENoJvSsqi/fffvyS+DEFyxX32hrz61a/eMmN1UhjvltFAfhGSLGkBkWpF
|
||||
WB166KGNs5KTmLacVBabgfKrXq/Ue5oK/0kwjhJpxKQQE2qklM7/wo7K5JLhfr0etAAXkczSJk99R1R1oaSxEauo8EIhSWfs86+OdNmjIt76NAWm+icW0yL1
|
||||
xV6QYnIQ2SKajSkQjkxfiAYzztzT53mLdE1iTHMm3TKvwpfvjNNbCnKmExTpd4rdPc1ME9sRYzIGZbnE+BTx9rIY8wc86EElkk2YOd8A05jgEH0nfbyximjh
|
||||
qR2VL5b4H1PqscceW9ojKIbVgak0TaC0rkNe+MIyprvNFNRwpfBvXDrM2vqJVsIZD7fh8UdDpLGIlhNEwtSt72LvTgm6ibRGAx9ePoP/6+sRKv+1cMR/Jfrd
|
||||
e9YKlg/PC0ZX1pqbRwTnOWI+Ljqt4J5R2cHnZdJAaBspzcbCNmhD2DRLOgySQ0zONQtRwZxKKhQbvUYR6S2c5yVRGyxtylsmioVkkNpCenXSVqjmAzy740MW
|
||||
4rUSvGxG7JbrfUzUkhrGrt9rXOMa5Xf5ubYK5aZW0qvMCN0dz10sbLQkJc9CXU1C/qtuufmeZE37oEn6Tn6z7UA0vAc96EGDDZTaLidb+OdKWiLzfLWkzDjM
|
||||
quTGS3ylUKLxhMBZkjhKDRSMaLBW5XXDr8a/TcnWks2mSRpI1PuPDGP4/WYxEDs7JX/rToBpIEpVor7dNtz97ndv7WQOzj7t9rn9HjHkJWVKSPKlLvNMez63
|
||||
Sk4pKE1GXSy9t8g5q0I6h2c961ltaHxTSur3s9xMYatvQyMp52IM92PWQ9qTrUIHRdbdbFf31UKOadz61rcueIREO3OTu/NHTjiLl3LlvGJmsUh1n+19+Adm
|
||||
ft6y3GjcWhuy7RGsUFLydE1ba2mL1DEwzvJlxbDrPT93X60T0qvIzcWcKd1SaCGDa+WCw5g2k5aOgRx8VrJCGTD7kgUtVNMCvA6JEMZVMaC+z5l2XTg6B51v
|
||||
oKQ/Ztp9i/I7e3x38PMZYSjhaCzpvmko60mh3hffFUku09LkhDOQ/b7sFCaPwiizXbSQ+9///i1/BUlVG7uL/7zbqw/1czh/W76V1D7UxwI2q09r3vVcj/L4
|
||||
M8OhPpijYeZqPzen/F/6LPtNCiAHWmUf5yshDBN37IM5FWbKktwyTMIlE7V+kXaerybvCTPcekDRu8ylYyARUlfAM5idoDaNLDYpTcUmvtIB0+5Zr99lpzUo
|
||||
s/PnYeZZr7oOl2sQZ9ZW9ScJZ+bi7rXdidL9ft7vwwfTxqatIp0lnhHhMpMzed51m7U8ps/w7QzGh8XEgjF3WoUjXPACBpYYyxNlYduKxLmd7TRPv3VWrre5
|
||||
jOkOA4HdxyJhI82DmfCOd7hDe1icrgnr8HlNhZYAnPV0YuVm0lIxEODqWGYStlmREySlceRIyvR9iJIaF8kz7v71+P4b3/hGmxFbzs5YlwVizhVn5usuIvAX
|
||||
2bYIxA7c1UaW9WwQUVTMdLkwsMHPy2wyj35ifk1BjPTM7LWViFbgECn407r6CKdrbb+1YDURn54Xof/lHCP1ZF5T782khWcgqfYBCbdlNolcVcV5eNe73rUc
|
||||
1sQ0lI7rvJ6EGtE6ZUAY+K6RLfeNb3xjybZLXV0vYpfEvJymJyW05x599NGDU90wjbTl3yGkj1lCMNer7qPKJennwmbApBMxsR51z0Z+x09CI1JHEh3sl426
|
||||
Jy9yrhq/i0aRIWAwDjL1+6LVcdb6dNsWEZqlmEUZ39km4zw2JQ76QFbgzaaFZyAA0pE0DXs5HLSTju8I4SzRERxLwxIDRpGL3vArJrRXxG3bazBPinC71hkV
|
||||
TjocfqbP/C9OPXT8a56zgLmNOldjnvVaS1mxAbN1GqD6i7hKiUefLNIE6zJlZ93nGFlL2zfqXud/RHhywdgYT2Foo57f9zmYGrOascA3s1rpue9zNvo6YzzP
|
||||
XxFtFWH/pQqLNL6dV6Juua7wPaYgt9F4dZ+30AwkO5DzMJ15zi/IxeFlL3tZOb/jXe96V7dNxZ4YqQoK2Gy2QkFx63/4h39oI//NCiclu3lGMuTzVhTW84PD
|
||||
dmIvSHkmpmCiCdN1hC1moQ42axkAnGUHxqFSV4joCp+ZLtby7J5VnOkyZ0DkoGUjXmR63OMeV+oqVHsZTIOJZfdI2Tz/I38bfrXYRbbXwsiZuNaqvf4uTGcE
|
||||
MSYpDvIUEIafm58dsJRRhN2jdvP3ZXx15kma51gMkPmYczJfN6NthAnrSJrizcXYvzNyg+lm1G8pGAiG8Z73vKfdb7/9iokCgMcdd1yxAdqL0CXmFhFXBoTw
|
||||
z1ETwoQxaYXG6RA2/bWYDDCPLItJSt1GDTp1EeNvgVNHPhDPt3Nb9M0iUqT+LnXUvln3HGxUu4RIxubCUl+hsMtCuddFYEhsFJxYbZkM2OoJIYJCHE6Wh2lN
|
||||
vHHEj0x9nPYOACPcGI9HTdHKmX7TNGzPgrm07JTHUDtMLkOjuwxko9snYMVOdgep0UhTgLNvjZC0SL6xhWYgozrO3g0mFQuFCceplBRnG5TFmIQUB8Hk12Nf
|
||||
v3jyyQNzkxPJ0KiFf2wB8QOJMENbpTDpTqgchPma5TgWNn0gOThmXQSyzPV4ZVrJjYLCG9Fq8VmPeo0rk6ARu6fLhLPRqk9Ey7iyNup72m+e4EiQ6UO0BefF
|
||||
85vQyGc99Y5mL0oxco8VM2pfISbNr+bgVjBjCVgwD0VFwSQtHKPG+vBc7tNfq70GY891wSth2BrDj7potHQMBIAGOuYhHjpJx9qNDHDnGfclIYkmAv/Eqaee
|
||||
2ve2wXUpvdgxvBotxuTPvSnqzLS2aBQnJw5UZ4vMMpCsAvDE+Oa1mXE92y0yMDVRudlGkWgdAobf7b5nVjzllFNGXTrzd8Y+v6HymXrf8pa3jJ0PFjIY+1tk
|
||||
/11fMEj62sIPxccqy0IyU/tiYMLSsVZzYd/60DC6mxmF+S6Cv2NU/ZeSgWgIG3f37HIdzk5oF+1qQwz5QQwgqQpWQ7QPph0mqThnYTW3tkwBmaTOszl+F420
|
||||
KW3DRx111KJVb2R9kqHzfU0zB40sYIO/JMAwTRgDzK9dsnjxlTFxXujCF26vHiYrqVsiH1VxYjNzkJjXQgI/pDIhRHkObSjOEyn1ifxwZVPbsMkkDk8bnAke
|
||||
RxOs5fGbfq85nz4dEU60QH1BGxG6LtjGZ7gkA7HWYKKiK/lMRG3ZEzVPpm7sMk96NuH2pB573jYDzKVlIMNgkdCALSXDJBqllr7vfe8r9+L6SSavaCmmArve
|
||||
4zjWEh4ah03lJa1doJ45q7nE5HO/vxve8IaDATp4wCa/EfmR9dP+ZSCmSHUWdNE1by5q3fnLRAWqMw2gS8xLhBMSsEXLLnQ70tnqs19MYGO1S3FeTXG0RwK/
|
||||
9tBXvrJkXqApjNoHJZ1OJBJsX/WqV5Vdz7RowSld7ZjdvTtvImHmYNG1qW1ZifNcNl5YYpZMR/x81hDh4ImxvWfazD9iOwBGm791XwXuiI6K0wp36JNZMNLX
|
||||
2Q+LmqpnyzCQtMu+6EUvmtpXbPvi2DMNgIllUrIz2i1OirWgk8q6A8R7pjNJ5UgcL4oIK9/N6rBlP86BSrIclvSmNmSdL5De2wKmjcsiaUoDob4i3Bbd6a/7
|
||||
BGDkGBg2E3KuXz/2t3RJehHto4V4tfjbH4AkBI1Mr0V69tvwn2uZa4Svp53fptY4UbP7iJLAsfsMzvpumhjRWqmZMnUtI1kDBAFopzE+LCAx39p3hsHzazI5
|
||||
d/2WNBSRliIoWRJ87uJNm1lrJCCN5xa3uEUpl1a4iON5EgNZqnTuAXb0XzM4CrV8GPEvbL1NqKbl3OKwd5YrgsuXVNUxaEoK6zALlO/DNl3SNMfAKYf3OOrW
|
||||
/c5D9heqZblOKvg+FBOtpJGXFjvMD+WUw2BSJcV8n/s3+hqp50PaKqfewWYchSmxiYlWjvGMKLSRJz7mvcGsG2fOO5Ar7LolnXVEmjTStMein5ft8OrakKLL
|
||||
QVUOlwrT4Q7X+CLPBNc3sF10crKgVOLaN4yx9OoOIQonbzmIKPw65dS8WLzLIVDaFrvwy/0Rpl4OlAohpDQZlnCN/Rrl5LwIBy3YRDRj40/ZoWmUVOSON5CW
|
||||
3GFKXiNIpYnFsxxJoDCHT0klngTj0EhKP7t+GclxDo6gRcGUy3G25memqw+TVeMPBZMsxwqE2bkcJRDCZxMLe5kbsf9sgIOz6Z02GtaQJsKdy6F0DkcLs1gp
|
||||
Z7X/nIAYVpGSyj/MY+XQL/29TLSCq0bFB59x40WyMYtGUb9JDnQSAU4epwy2cXbxIB1ydPyKLJe4PombCaQbyUMSYbOWjyv3mXhmH62H9CAMmbSTkqaw3nSg
|
||||
UpPTxrooEh0/TZw7UHAlbXWxyDpS17uSmYOmuuYO1/ks8oz02722O56YBQRFMBkOE+nX7uy8np+LHX6YhD9m2CO7/jIQjFPKtI8ox0BiyKxlk9t5Q0OWuoJv
|
||||
Bw78fcwpMk3nAVS+hzHTh8R7XeKEhZkgg9R4bEyjffMDKJc5h+mPzV1ZQoWZTkjrXbIZ1u+08dX6G7vlbNb774TGlmlLOM6HserWS/syRJ9GmHvGXMOsCCMp
|
||||
lQQ4PDfSvfCLWiNyTxisc5+aPmUipNXojz7EjJxmrGl7hPqUN+9rJmkgMUb+yDCG3y8aA9Ep1HphnCblMFHbqfASAlpcTBZ7QZCO0T5mrDgWtKjrVHyhwDqQ
|
||||
TZnDrJs8kOkhczBxwk8i9lNMy0AzWUXVIBvC0hRwl7vcZVIRm/ZbpnhgzrPPoksWu27yPxjaT9AVLDDhdEwmxsyDUoOzNTMHMiWkIxke+qerrmP8iVOOw25q
|
||||
8VxsmSHyOtlMl4VkZU5sjJVhEk32ofC3/fM//3MJ22U2yvBZgpB7ZWMwtrpMPs1O+ilNVspmuk1GG8cIl8cZ40w2hBs+EH4/C+Qw8QGmucbikdgPX7fIn1PY
|
||||
hNuhgekkyowW2twd1+4xRtPRneMyTXo/DmEGtr4373M/Gue7dYpfqQ8xqVu3lLOIGze3DAPhP7AQcUiaCCsoOL/wPKGy/BXSMLBpIlEoksNZeFIz8L3FXaeR
|
||||
IFICMVi6Dkvx9xZWA6S74LkfmbScjJgtxoU5kdoyXDN3TnvOojrJLN7pB+Ff6BJpKlNwaIM/2oHFCNlF64wQ38NA9mF7dYYZvHLY7x/2sIcNHMRCKRPTYQ1R
|
||||
eTaTdsnCiQn7DcbL4EDP+mt7OsanCSN5j1cLfDp0cwd1/g5rKXWMd9IwDa67R4nAlQvTavYgYS4w9rdMTDpx6Wp8Qr1prZPIZmRtpX0ME8b+lNC4MYQbhQWB
|
||||
9aNbHt9URrSlZUTyTxqe9YavdVi7G35GmMyLlqMOw/Nv+NrN+LxlGAjwMAVAW+iH8wmlNGYxomIm5QCxg7wrpZlsTAOpPkrWlxt5/JYSRWZQHZ7AyiexcI6T
|
||||
HEgmtB5OTmpwDiT19beaSZx134hXE84eF3UUqeJ0xy6RejOKSDikz4jUm7HsnLDdBIf6IvdoYBJd3DH/NNNgKCRcdRAtlFgx36TTMyVgkXQZjrlsif4wv8SK
|
||||
QELrHaZsZ/f7XMyZorrMwbXMW4mXVybSrvlFOYlpN/qwW/7we2OesKQ8m3mnLX7D9y/CZ0JfpgV5xCMeMbVKrtHe3EjrBlib01eIQI1cH5ivRlH2EbwSf9pd
|
||||
mnIxefvX9gvzNqvGMHVPU2UNWDTaUgyEdpCpN0i+3X0iw8BbwPgySA8GSO5zeP/731/KYCI4LiYybcXvGI0JgymYnCRvkjO7smv4MuQJ6pKFIW3E4riPPCti
|
||||
xUAiYSvXH4nklDlvDOvWY63vaWOphYzyLQiH5AfqHjbE7qttpNxuNIo+kkUZfuzxJqHIoi5hIp7HLJP3YjLKxKBOOOGE7uWFWeWeGjb8UaaXFTcs4AdCTS5s
|
||||
fcPC7QOBcUq33WaxxeemSmMtzabda+DsfotYlwEF1+5eNniPMbvenDkszq9YRnrLWeH+2pFzflI70rzd9VFKmOp+fkEaI+YQJ80XU/hwWZK8smIYz7TvJAIV
|
||||
wSyFHuWN0uisR7lGLWLI9JZiIDqHzTDVeq+YhPQnJhCVEpd/ZcTGd2337O/J/cXL6zCLF8na4m7w6GAOsyxb2pKUnNMJyv6fNtAcKMOvNI+U6HNgLGqIXtYd
|
||||
k0zH6yhVPq/rvgoNhdnwsb2Yatch7ppRqnlK0H0O3fptMCVHgyqLrTrDWrv1WfT3GCXtQxv6JtdM3xI/yCgShq3McSHYzFiex0n/8zDlTqMDY1Oj643bww8/
|
||||
fNrlC/m7pKrawBqQfqRJFf1sYGjsWyMEJyCMGwZwta4wWzGHMaEPm2f9bs2gqaR/K7XJM0Iosu4ceeSRRfjMAIpufdI/Zj2aJBB379nI91uOgegk3F4Hp3pp
|
||||
wNAQMIB01vrOQk4Cdi0pO8l5I1RK0oXyaAz8I2zs/mSlpX0g0RR5QJQy/QGV016ECxON3cKY2BMjqyYzkGvEiTukx7PZQ9M8lnVYlFdMkr9BnandOQkm1c89
|
||||
HOXuec9Z+XvgBWNanEmWpr9kHqKJSGU5ufLUNX20gkI6zmu63785AiKyv/swne69i/A+Ga7FalSE2ag6pg/NjulRBGdMZFzEj82D+oiPJIWhUeXkd92IJALY
|
||||
uHLz+kV8zSNr+ckmRV9l3Y01/lM4McXySQhCyOhJDJqwkwE6eV++iio0Lq09xjhS5qgxnPfkKytJri2CcBYR7y3HQN761reWzsYoDg81+/mxM/kOocJfI1IR
|
||||
kBJIqmzrOtakSUmXpjILMd94Frs9e2f3LBCLgdDLHGwG4QViwJn4PwnGYgAbyL5fVLs90wrpRx2HTU3j8LItHrA4AAA14UlEQVRw5SatPDESMyX10eaUc4mY
|
||||
UMqkfcFDVJD3uZkyMwswE8Rs++Ojuu//+G3py8xppC+WyYkOmwwL70aXdZo38m1G9Fic+kjT3UIsRqm1Cb3uSwJA9BuhpxuV2Pf+zb4u8+XRugTQ9CH9Q9DU
|
||||
bv4mTFmWClGaIq3Mf2ZslgjXJjEL8q26j+B5xpixm9cPv4q6c68/G6UXkbYcA0mpymIl224Sbp7Sb37nlUMrdg+VRb6v5Jf3U11zEnK4I7ZnOY0wJlIDpsWR
|
||||
zq4t6qJ78BVzTtruh6OK8hmb/YqxGsAmSLfu0+qVIYzScSBMhZ/E4k5D3CtyDPksAs4EJN1h/imZZfgwTbAvMTFY2NT3sCVKsSFEXJ1JqjtEEE5ovGCEzD5M
|
||||
o+2m2Zlw24qgBMyHWbUv8S8lsxMQsmyUe1iYOrsRU9PaQRDK4A4mKYw08YYJTRlzyewWML3nWUcUW4vSP5jje9rzCAT5PHtWhoOCpt2/Ub9vOQZiwTIZmVv6
|
||||
TAxO3QSBBDwqAmZUZ1Blc5HcNaSTU0Y4wU1wTCujL0aVk6YLZgSL7CLRr0OCSk3CBMHw+hJnt34wqbqSnklHSksTIGy+Gn6r7jW0h4z2yb06fZ6rDEnvPJd/
|
||||
YBnIgpLjDxPt4tCn/nwnufGPdsf8OokIARlogtmu9pAw9ZX4E8awzmi6Sc9cpN/Sp2BRX20afOHWKTBqP+uCfUwc7QIWmGWNe8Jj9gkneQpRsOvDQFyT/i1+
|
||||
WJrIolKOXXiM+Bv5ZbnQAj28sWYRGmlCaIgBMrzxbVz9OF1zYIiEEXHFudXd8+FeC+jXYvGznyNVWj6NlDrGlT/pe1Kc+rIp56I66fqN/C1Sawwig9JR22cC
|
||||
qCN7Oe1L29idV0T5TGiEBTQHpdxkq12gMjKpOIZD61x0ImDkPqPhnFR9684ca/8NrGkxoq6chOkQNr43kTwWIeaxDB+17yRDrvs+J6+T7j3nGHPOMpEgF3X3
|
||||
Ny3gZVS7jE9zwfjKcsa9skBkuPmossZ9x7SeZwwRwBZNsOzWO+fqGAyWj4EUk1QMDtKVKJNplAsiqVeEUQKBAXEEM6GIBXfojJPYcvOV624WaT5Wa/Yaro9J
|
||||
rSymiD6OzOH71/OziDR14+jn61ktuT+jt2hrk7DSDxhxmmSYVvr033CdMuyyrwY6fP9Gf6YRZPQVwWRWom2TfI3bHMPjXvfee++pmsqkemT2auX3CYWdVNZG
|
||||
/0b7zUjKtWipgmO0XZQhM/QesdDT1q0RQt1FVq1W+OliIdQdvsocFmS71232+0kMZKmSKQbYhcIp3YTa2ERIXBNmpiYW+fxp5GssjuX72D/QRNqCklAu/Cgl
|
||||
gVnsN2j8dSns9yWBnYSMMWHXlLAvNJpGkjsUKm8T0mP3UZv+PpNKhhO9gc9qKVJbN+EPaiLkuQmHaxMMogmneBMTriSqg2VoJiW5YkjJ5fdgoiWpYmiSU/tu
|
||||
VH0kEEQxeZsQCpowC426bGG+C+23yQSIYbabuV7aafxKbCnpZES0NWEhKGXrPwk/Q5pt9MktIxHgOWKOzEqSbCozFrYm8krNWsym3CcJZ/jfmlj8m/CZNREZ
|
||||
WMbkaisTgTEl0aFkhwgWIQQVXEJ4XW1xO1wv4SqyjpkT1rRlo6VkICahyWJhDvtwWeT7Ao+ZRLRE+QtJpQnpsGTttRjp0DBblUylsnSGbbJvsWOviyisxnNQ
|
||||
mHvGXrdZP4S6Xh4dUm3JmjtLPcIuXBbxONOiiY2AJROsbLCjKLSGJuzzTaSHaK44JtvuqPu6310oss+awCZz+Fe6Py3k+4iGGtQrAhUG72d9g0n4Q+GDKwtb
|
||||
BCk0oQnOWuQO9xkPyUB+vgQYdxsQ4fWNrLkIPhGWPhMD6ZbpPTzmSQQrZJ2Zx1ozz7r1LWvtK2TfJ83xujB9NBFKWxhIOMRLKmuS7mrJgr7ei3o45YqUqG4R
|
||||
ArjaKq779aEel2ekljbrA/VHOMOb2InbhPmjaHWRFqMsbhhzRMQ0kTmgkQo+U2jP+qyd5iD9zfrsRbsPtinJzrNuax0P86xL37IsyAceeGATkX8NzR9htGGC
|
||||
6VvEhl0XPo+BZSItKhv28Dk+aCkZiMEd9vZynoEFyyK9qDn0qdEWaeah8LfMsevmU1QEFJSCSGq/6kjKs5YeOcoaf8gkppqTruZpuqN10D7QPKXuUuA6/OvW
|
||||
kaa7DERrioCPUtV5aE3r3eYI6GgiWKVowJ5F0429L034KhqmqEUjptdTw/yOaNPhbylnB5Uvlujf2g15m9TYCDMsgLMf8mcsIhkgbLCITyBivheumlknDOTb
|
||||
MajnSZgGtX+ezEP9TomDdxDml/UvXyzoPwcyJaOONDwLWsuV1eK3MbdQ7FFY+eOCfVLXSJ0zYB6x76j44zCQUcyDQMcvEpkQiv8ugnKKb84haLFvpAg9691E
|
||||
/itMAzH3RkRWwyeYTHu9nz/P8sdGdIhyWcQw3oxKyJQFolL67u3Ie+f9KsJomDIL8M6xiW6WCKfh8tbjczdzqbPGl4Gk9YgJ8If8TksQxmvnfe5dEdGyDPT0
|
||||
pz+9YCx6TKj3opL1Kc/sMSaEN0/bkNdNre+e/LOfw96OMHuVc2xknRBVKAx7niQS054rz5Vbz7zLdCYiFI+NSMVFoklRWNGGPwI4/H7RGYjFLzfzyMy7ml2n
|
||||
691BwlPtN4HpXSLP1KLGeduXkjmtpGlY1Hpmf3UPO5K/aFkod0cLXQ4Namq19YN9NcJEpcORa03SPrmWpFy3cP4sFjbXCAG1f2leFNroIBGpvQqLtncp26me
|
||||
mW/NPPO+z2KfmRckXJUVQDiuvWUyI0gCaj7k3FWu/WDCp9/0pjeNzHic9en7au+OzAzKzuwWQoZlsfBc2xOEDvfJ49X3mWu5bssyEBv+IiJrIEHYH7AIZNMi
|
||||
KcYAkRLiU5/61CJUa2wdInqq1DV8Fa19HYtM9lHA1Z9klstC0tSH767Uu3vmA+leUkm7p+0tcOTv3rGHw9k0NrJJdWEMSTNjvw1t26vPpFcbFJ2NY2+CfQVO
|
||||
gHzWs57VOoZYLqdZGIt7w+xY6ioZ6KKSg7RyLDjMrQ/zsFBjCDYVj7IaaCvmTYthNYClFDIw9yy46yNjb7UZBZTtILbM/KA8KW66ZM/QPe5+9/Is/Zp55rrX
|
||||
bPT7LclASGYmTQ4gKUoWwUxkUGbqEnWjIXXPCNjozu/zPBKxMzbUlyqdB2n1uXcjr3FGSGY6pnFmUsaNrMOsz4oAhbIpFcYYgiwINrxJleE7jEHbMATMQ4JJ
|
||||
CxUzLWlUMkSM5xnxx7wkwzGGI+WI3dCZu0pZ+ceCwLzjeNdpZp1sF00nzSnGBK1nEYmZJ6LPSlslUe1bz9yEDBOEWdDgxjGTbHv4M9sjjjiiaCiYCIylpVEO
|
||||
TagPSW4pLUr2jz733ubmrplQXSRxtBlSTrlMk9LnGetxzZZkIJnvRgfwNZAs0LSBsB4Ad8sk8eUO05Q4TfJFNw1Ji5EDO1OadNu1CO+7k4/ZYdnI4sU8AWfH
|
||||
CEgFI+sBe7gFinSKKa52rFgAab1MI8rNXdgWWIuc52FOz4pzMqZJzY40yHGwqNlhYSRDhHpqozQvfUmOMH4dzBvR0t7xjneU00LtvieYTiMCF8aRjBZjmUZM
|
||||
jgSCxJZWQyDSX77TZxgF/0jSZ+Jk1Ux3spl9seUYiCNrSVeAJ6n1lQCyY9b7lRQXUR7lfHZ1ZA4wuReZpF9XV0zPhFpEktk3mbLDvjZbWFgtRrSIrH+ahggX
|
||||
tGe+jHmRlCf8Q5hV7Eov6TgyDb58bJNMqt1sxwQhzGnRyEFXxqq/Rz3qUb2rZ+HnYyCIdEkbMe/j46wafg5zlTXj+OOPL8laYyNl9/LB+0ypM82UKvklc1TW
|
||||
mYZJYEiSxyxN3tIqdTVrmb/zrJ7NCnLZUgzEopG5pdglu2efZ4csyiuzQZ4Fcu9wzi3qgsfhn+q0Mw0WcdHQpyTzzDJLivz0iScuSldPrYdFO+3oghXSXu/o
|
||||
VEylzyFeUx8ydIEFx6LFUUszdv6KhIz8KuNykJGA5YTLxc49i0Qc+ql9MLHlAU6T6ogZOFnwaU97WmkXf9Q4YmaK1C2DoAX94kwWvokumSPMqFLGZ192f8/3
|
||||
HOGugad+5qvpMo+8jgVFP7lO8lVmb4KA9hk7GJAosc04n2VLMRAdmrZPh0ZtFFm8DECH1TA99CVJGg0KvpBRA6dvOet1nQUjJRxa3aJnXjWZktmNOid8vXBa
|
||||
a7mp4Rm7sucmOaHRwuKMm/WgPNGQlMzMJdKHZuJwr+7i2xVuCD6Z+deCt1qT2nq0I8vkVM4Ipj4HxH3wgx8cMG5BIhzY445ewBS6JqR8JuY7HInm7A/9xpSO
|
||||
6fgbRTRLAoM1gAAxnD28i7tnZ1SZPtJOPhBak7orQ5jyqDqOeva8vttSDIRDEZAcWX1z/c8SiTIMvgHjuf5WE6nEPpt2b4cpLRoxteWE7HsaoTZ0B/5Gtklf
|
||||
OrhLPzAFDE/sjaxL32dZXNJeru7akPiRUAkXTBfrQULbOeYtRP7gFps7yyuBaBzlPOOj4SdYFCI0ZBumaW0YX44V9/hL0+Fwe2LTZDkQim+Ng16QA4br+1Ek
|
||||
uIEwILBB32ISzGCjKE/e9PzD4gTVSZQHjzmygMmTiY4Qam6yZojM65q4JpU1r9+2DAMBXE7EyJDZCx9SFocVm/laTDOp1nN2pQOuTwU4z9KMRRpcJLKIiXs3
|
||||
sLWr70LhvlwAN6M96fAnRPQ5UGwz6th9Zjed+6gABf4JpzYOxuec8SXwME0++UlPKmdX0OIsRBbXccShTMI2Njj/F4G6wgO8pvmN4Jl7nHKMf+lLXxrZlBzT
|
||||
zFE2Jef+kMh+3L47TF6YSmoZIr4EJdAKUjPATCLLwMiyHSORUY7TzoN5fKwRyhyei9puM6qoukXSQJYqF5astv5QRK6U12n/5KIKNbZk5wxpYaaU5THomjAB
|
||||
lEdJoSJddF+KhbmRfjykmSZMWH1v25Dr5BGTgh3tExlyw5nb67mxsJTr5KSSmiMcfU1s8CupL6S+DpW7VznTLopJ0wSDaOI40UauI5SJ50KrK+ncjYdFT+eu
|
||||
30P4KfWPhaqR1j4Ei5IrzPiQxtsYk3yyJPeE71lJLstNa/wnvbu/LsWC1oTEXBL6ea73+jFWwHLsgPfS0ISGN0gd071/M97HAt7E4l0erc9j0Z5YDbg+5tGP
|
||||
bu6/334ld9o1r3nNMsZPiVQ4xpXknpnqJMd0mHFX5NULhlLGnqzVeY2kocZ7UmhpTfiyxq4LEapdEohGcE15bjCAkdl3gyE27z766JIoVvLRLsn3F0yliUiu
|
||||
uacG6j5nte/XnYGEGjnIjLnaynWvt2CEfb4MaANDvhu5ekzIcaTDnZ2ABucbxIQgVg0oJsy5pwzECG0skytMAM1DHvzgwa193oQ0McglZNBZFOedG6pPPUZd
|
||||
ExEnjUkZPoXmgQccMOqSsd/BPZx+TWw6a/RxkvY6Q2UtBCflmqj6PBffLFM/JA3/lt8v0uuPI78SMh5jD8cKvLKefoNbRAiV82rWM/8U5oD5y/10kxvfuPne
|
||||
WeMy6+JVP+bcSuaNueQi2r12o95bYPP4AYtyH7rPvvs2R8ZYkpNOinfHP8gcHdpJyZ0V5ttSTFg2iiDSHVt+wBz8Ic+PkxqbOCu9fPYvjhhuIqS3iY2Jg++G
|
||||
38BMPjQE+4gaHSlkhbO/MJgws+1wNkiceliY+7AgMPysjf68rgwkbJRlkTHJLZxrGXzulegMYRwRiVVANqgnUZiwys+4t04evt7ncKw1cRxoI038MDkv5HVx
|
||||
YBLSeTeZMFCG783PYWopb+FgsV0EBmIByUSPYb9tYjNaVrfXq8UlNj+VNO0R7VPaFQEGTdj0e90/6iLSroRycQxr0TryGn0fUVflI4bnuqSwLzdhomxI8otI
|
||||
xt8hkV4cGWvJbEn32pUClt8sbv4sSOEgbiJIpMkFbl5to0kfdNBBTThzS5HfCakYqYs6IYsrKTkp9jk0EWixQjLP3zbyFUb+EIES+azu48hcg6UjBsy/N7zh
|
||||
DU3kuGqucY1rFA0GvuaCQ+VOOumkJnJRFS23W57xFqG9TZgfm4j6LD+5T//AMhlM957h9zQbhHmMYiDGgTT0sSF0BwFMvQlUEYnVhN9vuOhN/byuDIRaHPbW
|
||||
hupI5cRE1kLKyMXXAM/BNKlMA8UAc213UrjHILTgMXNFrqKGeapL7omoleanITVQlw3EHLjd66a9NyGRQTfL/dPKn+V3KjGzCXIgVOLatyzXR1RJWXSo1UwL
|
||||
2tdd3PuW5ToM/uEPf3gx7+R9Jou67bnnnmVSWyakwf54LLLhjCzSmr4zweLs614TOcveiFfMTorx1IKZTGBF2qURw1B6d6YN/WGRC+dtMSuF07QchBT5mkZK
|
||||
q6utP4YfaTkah37lPGCKDT9IwZeEbCE0VwhqxoZTD9UJ5vvvv38xBackvdrnz+N6c5B5lEYEMzSJeeQzjR/Zbp0dFHs7CoPELCOlSV5STEzmaTJRQioMmBuN
|
||||
rUiqOLjWOkQLMTa7NImZJeZnC3xHzbUIFW6OPfbY5klPetKKeik/AnhKXWg/wxpS9/mb9R5LH/knrJMTeFayoe6i4ZyNhWHWItb9Ps4qTqtR0RmxOA2cZNJG
|
||||
oJiIveoUg3HgFI1DlAq+QiIXhTLbKkf0OOfftLo6Az3MAcWpJ/2Jsp4Zu51XS5y6NtPlOLSpKibtRCepcZkBAO6TjWDRKDSpQZtkEA5GN7GKwXDKRrYQegb3
|
||||
2RMQgtfE+/r8mIEHsBL15bMIsElkrD8n8qTFIl3qI/Ko7/ifVO6sv8Eho6o4lE/vsWs8nwVHbd81cueFGSm/XvHqe5torVci/DJcPMeldDFPeMITyh6RFTf2
|
||||
+JC70EOYHjjj8zb9ztkvrU2mnOnibOOjfU+zztN8zqyvmxaFBQQdbkIs2m7xBFNqBxt9hnen6kiRKgaP3zPXTrdjs4zhVxNT5FecAVJSrOTGRwNQKunNppCG
|
||||
BnnE4uzs3lEdduiKJMsoNJiF07f0rTZbmFa7NwPOJhWcQzsr4YqZlmYaTmF6GCwohJ0wQUy7ZcN+14aQVEu7rhNhnqvJFG2sSesNk5BWVxU2PqqBItUyV5YF
|
||||
VIryvmSsZGoei9jwPoa+5czrupD8B2Nl2g7w7jPD/Fzu0/4cv/k7xv785z9/MN+TYeSrdUDusW6+qry3z2uYp9rwlZTn7xX5+4YFAnuAPEsdkDUm1xkRfKF1
|
||||
lP1nfZ61HtdsGgPRGJK9SbDIG9RoCHZ60hpQmGEGG3pIX5nrptux5cIx/7TZgHCvdotXJ537zk7qHBxjbl/3r22EzLDCvpI7bTIcu6UNYfctdZT5VTlCH+UQ
|
||||
8vtqUksoJHffYh6yAq+WbMQLs0Opl13di0J2DKfkLr/UaslihSEbMwSQLmGcmMIxsYcnnKttBBu0YdsvGWI/ELuuT4mUHV2yeVA5YWJspSpZLXkOLV0ZNJfN
|
||||
pC9HGK6U+OpC+xUiO43sRM8sAOa67Mf+vhtjGrPeO7IfKy//zFXZesOcVNKarIb5j6rLVyO8N8yDpXz7a7oUASNFK6JR/WREPj8Cm3G0mVl5JzGQdfWBRIeU
|
||||
c8D5Po4P55PIqUUkTvTYHVxCK4XPhRmm+EXUle03o4qiI6dWn9M0NgOV6yJbcBPSS/GfxEax4rxj6xQ+u5lYnHrqqSVsUyXDXDS1TS5wglpoC+XaDGMWAsnn
|
||||
wQHIyS1yh02/LwkfzfDoGKTlCNK+9+Z16s/PJVqL/2BRSB/HQlH8RMbBaikWlGavvfZqYvNpCVY4I3x+p0Xor9M39YOy2ev5BdjU2eyFnPLp8WHwFcTepRJs
|
||||
cnJEWyHh5GFGXW1VmsifVcKLhb7y1WwmXSXmE78SH4RAgBDImkhCOHY+8eNwdvNHid586lOe0twoIqf4pX4ZUVWRzLD4HrQJ5o7AdVy2kHbjeR4Ui3/pG2UJ
|
||||
WOnSK2KtMB8FKlx46Ohd/RwpWJpgcE2kb+netjDv54PQhOYImRNzHpLRqkNgJxQ7158sQkL0dFhsIGpClSzl67Q4f2JVjm+RGpyOBh+HWMaqY0SO0DSQ7b3Y
|
||||
TAainZx6HPpdR+IkUEWpIEz0wAMPLAu/vQyhTTW/DuxEsFnQRLT0JQu+QAtlWuzUx8LYh1HnMywKue9EHy4K2feDRNXEprPe1eq2P3HwXXjeCqOwz+bK4fy+
|
||||
4FlRPcMFG3sWV87fU2O/Q6GzBJ9Rztvh+0d9hq85jIGk83rUdRvxnfFm/CZxjFtgw8xWGC4mCS8OcJFTBLZ0YId5tblVMGVkbr/6Na9pItFh+cwhLlxfcMNa
|
||||
yTMxDcyIsHVERH4hGHYZCDxfHBF3BAWMcJgwD8JA7ESfGzMbfsY8Pg9Utyhsxfu1OtGjIws5zYt5Y5zzKq/brFdmJmaQ6MjirIIDpy4zAZpmcsrfj+2cUTBq
|
||||
x2kevckeu5kksZw2sq32tWlT+dOM0R0nTE8323PPcn4Bmz/Hel+S2kVZTJxSW89CTGvMaMphclgUkuJCnexYFmDQl3IsBWNtg7mXMgQL9CXBBcZt1+fI5Kgu
|
||||
IczMlLSRzV7iQGXY0b6ZZId4jkO+1TRNqZu/YLrlNT97lWVBJudhyuCWWORX5aMaLqf72dxK5zvcpTLK+g5nopBXz7ozKiGsMcOpLnHksM+k+7yNeL/uJiyb
|
||||
Y+yXwOlTaoqOK0TqEQNNarBxSUjdvIg0QmpdK9m0RTogNSC7nW2yE66HhttUvuz88zvN4imhHovxFmoXzKKE3wX3KVeePTSSsK2W9+LN3xeaTtnAGNKSEb9R
|
||||
RDNKU49698WPCY40xVSk/sKzmRC096MRGulPeUJ7E8cQBce2zXONCa9MnMJFmUpWS4cffvhAKoY7idS4iIm12qLmdv3OMebhgpiUaG/GWB/KsWYvSO5hsgej
|
||||
L5Ggh6XoSJVSNsCpkz07NmgaB33Jfh9zF7mPuVefbTQZK3Z8W2doRRERVeplDMAZDfe7MRtpjMqGvzIujYsYp0x+siggmxKN6dRUlJH9UC4Y/jc0rl3rj7lM
|
||||
vzHpqqvwcmuCcmnnzNkftsbEtV8K7UN4sI2I6hLC56Du1sw4UKqY1FkumI+zbsNVMdbDV1Y2R06s8/CNc/y8A8eOsst3fTUQnDTvmfQ6Kg/QajlogF1OZCMJ
|
||||
7R3Or2kJ1fqWn9JITL6SVK3vfXldmHV6YTAJn43+Lcw/M0eF6QeamyM/u2Gns7YhFoT29SG9rYbeETmaRAbN+syNuk+IZt9w+N8FruaJvlE/53f0OeRoEm4k
|
||||
WOeDZHuFqU47WCrLE1iSUYR5f339w/q4KDgYK0Lh14smaSC8wgbWSKI5sNMNSzTDF7P5cfbhkuO4oGvC1NG8813vGnvNcLnDnzkmOcwiuVsTWTOLT0EuJA6v
|
||||
mKTF5mgz2yz02Mc+tokDYko72JmjM3rXk08jzAzFj8IRZ8PYOAmti49nbDSRjKQKCdW6tNFu9NVIuKPqGyGrJXDAJjXSFE2AI5I2khrYqPtg8duQ1vidON9J
|
||||
XmzBnJ76UznDBDOSOX+SHet8LjQ7Ura+3wxMh+vos7bxg8jDhkiaNvKxd49qFwd4nNHRvCzG4Puib5Dd9fpnHg5UAQsyKZhDiLYXEXMlp9woH43xy9EsrYb5
|
||||
hmQr4NMsGG/A2KW1nyt8XL+LdSUYa6lDANtMXLT+cNVs/9fapqjbOBpV51Hfde+f9rtr9dOuu+5atDH+tvUg62Hsjxlb9EAyiStWvO+rgcSAmkpCPtkiv7+K
|
||||
szS6hQrFk4mXxOss9EiwV2zeJFASmzz/9iLMStIsx6QfaDShGvYqKibk4MAY4YUx6Xrdt5kXheloEAoZKnapSt/2Tqq3voEhm69T1vqS/SVp7zcGhZtGkEG7
|
||||
zz77lH0hNj3ybxhDtM4M43StkMtFO/Qo222DmL0yOa/4emySDLNE0aJtunzcYx/bkvByz1FeywfnJLt5UjiWSzbXfIZX537wWz3ykY9sDwqbPU3aplm2dz6T
|
||||
vNbGutwLNc86TSpL+GwEsQzm5KRr62/rh8AkDSTGx0qm0f08TwYiLbKyV3N+cReSyBFUFg/fWUwiZ1IZ6ExPHLB3DZMWVX1W4shSv0MPPbQU0WdBxcQsdO7j
|
||||
DFMnm4bC1rzir09Zs9Z7lvt+H3W00VG9xdLbVzAPssAQEpRrQ6H9NBbRaX8w48y3cEVocLlfGZP+QpMqxxkbF11SFvNaaMPdrzftvU1qmHTfdmGO9tKs12LN
|
||||
HGafUkitE/FN7ENbau2iJ3RsJAkEYPpTD5sgpVivtDkITGIgE7Wkvias6OSpJNeR0NXYuFbyS029YeiCgK7hUJKLh2rN6cUxxVQgVJYZ64hIenjOs74bun3q
|
||||
R/l1rnWta5W4eWYyz1P2OOJUtD9Ehs8k+xHcl/cKMfU+jhAtSQKHUzTnfZvxGoyyOBc9WyJJ8fBrJSYbIdFCc40dzkkqNgymkX0kwp7lLHpvhF9+KPL/MFUx
|
||||
UclRxPTGJCZcmDlIOn+BCsYVE1jmkGJOY0qFPdNMLJSNfRhxklsJqZ1Wj/X63XhhjhI2Gsyh7AuAjbHLRMysyIwlmGBSZtd51c94t6ckTtQrQSPMapmynEnQ
|
||||
mBVyKjyWSXYjidM4fDYlHD52gBeHsr5nDpyUi8s4sU/F2AuNdpCAcyPrvhWfNcmEtWEMxEaoG8dAZKdm62XvXg3ZsGavgIgQi51BbSObQYOZ2K8hIWKYOVZT
|
||||
7OBak1mZ6ieSZ1L9+A/EeJ8aG4DQ7W5727IJiP0fWewsmga8BdAZD+Lyrxgx6otC4sstwHxcos4sXJL8rYVkDJUlOTSusvjoH++nMRB4yUJr4cKQLRYInr+J
|
||||
yCF+EtdYbP0hix1fiWfqO4zC4iI6B+MXCSM6UB9ZLPmm+E3m4U8oFVjDP/4e4wI2Fm5JDPkjbKrEaPg+NoIwerhIIsr/p04IM8/EihtRj+4z4rzyIigaM2+J
|
||||
TMsXjwgj9dPfor8IKKNIf4u2eu1rX1vmrrFAqJTBluAqotL8I4CY45X6IzCJgShlrCo7TxNWDIiSR0mMdEiWPq6KpCyg0qYPRXQIc0WSXDWOg1wLSbgmfQTb
|
||||
6yjiY3EMaMZ5s2k7E3nYXKJuYr5j0Su5moZz74wqezO+k7BQG4wBPqS+e0JG1RVm4WQtZckdJkXDakgqDvWIBaDcNs3sF+Go5Xo+BmZR5kNms/3iaFj7Jphd
|
||||
pGlxnVdly+m0qHT38IPwe0xLuDjv+sfi0Ia2UdLRzLvs1Zan75mCQxgoUUX2fEgBwiz6rgkpWEKAKL4yfSz3nv5m2uZX40/zvT9mRCmL7EtjxnPSYgifJV/d
|
||||
auvq+ti4WtayCMEtJxiqx1akSSaswHXjGIi8PZ43y9ngFunYLdqG1FkWiL3Dmcr5J5su2nfffdecdC7PI+5u7PFc+Xc488LENRiMBvVhI843xhwzJPgRES4Z
|
||||
Ut1Cj6nMyqtfQmoreb+6jLlP5W16ciyrMjjRZ8lJxTYvHBgT4kOaRmHOLM/TDxacSeM4mWRI2xMz/E575nr9buGRe8lG02mMc951yE2lIfnPu+je5Wlz5uvi
|
||||
rOdbJFBgJqE1tJGCfWJZOYb59WTTleuNH8zc40sRpBH7M4pwQQjtBmEYGxio9cTaYk4TALtkPhBg+dveGPWSkZcwYjyFtjwYe9qwFWlhGAhHnKgpHTULha2+
|
||||
SGnHHHNMOfA+zBNlQbDgGDwyV66FSBK0BpEyGJLEh6JRMhFaLlKiwEZl1Q0Vu2SnJfWsVRtaSztWcy8Gycmr3dk+k+nwww8vSfnGMROLHvxfGhOT5Jz3mvSk
|
||||
ulnIJFdOnzO4pcfGPJx1b1Gwa9dkDhNX+ROdRQLVloMPPrhkRhYvL/swYYCDf1HIngzMe1T2gvWuI+bP0kBj2wyyWAsa0O8EL5kF9I/PmMm0DLhxRka5VnYH
|
||||
zOOUSCSJGbpPWYjlgICCjFsMxv4xgT1PfvKTW/cawzkHwpxXkk4aNyIBrQcwyjHu1XiTMVngRxxQVQI6fP/617++PGcr/VsYBmKxsviK/Bnm8n0ANyDcL7xR
|
||||
p4X9u9wmT/6tb33rHfLs9ymze43JlBlQu4PFe0whfAUl1DG1iq60KNWHBYz5ZjUhrN3nb+Z7Ep+UDt12W6BtZLORjFmA+c6EC19TGxkFBhFX7qF50ATgNIq5
|
||||
9mmbiS0tCWFgHOPKcmwkVV/XGVfhwC+MS1i1PyGrFo7sIxqL8OJrhNSovk996h/Cl7O8zXy12GFuwqA3g5gB9XXfzY7zqqN5lJuQMTCpjtLcyBRF6p9EaTFw
|
||||
rU2thB5CZPiRijXic5/9bLndusH6MSmyTaSgcdcd/97TVswB9aNhYE6eMWzmtnYwj1kDbDnYSrQwDASoYs2pjbPa218QOfNJA3E6YDl8R5mxSbElka6VMDVS
|
||||
DynW4hQRQaXcAyM2Xsw+DWWYSLLqYrBhbhauZSWTQP/kORbDk2ncZ9KYvRiYqGvWEnIJa2VE2omJMGIgzBsmfh9i98acMBUTQp37njvSp/y1XMPEos0OMNsM
|
||||
ovF5PnPWRlJqnIQSzCOZib5yRMAkIulbR5j+MD7zkKkrIiiLYBmBOuV2AgStg1A3vOh3y6c1y4ZA23CsgPFsT5ecZH0pIgHLGDPOCDFbhRaKgViEDdZ0lq4W
|
||||
ZKqok/1S+3A/57fT3+ZBUj4wf3DKkmT4WDhpR50GxiQXESKlPerAFLEVyGTGlNmGmRyZiKj4zCy0DJIW/5OJT+Paa6+9SrNzIVqLBMYEAf9pCQQNanXp63Sm
|
||||
RZnYNBbCAEkRw1wEkniRdhT5kTalOk72w4z7aH7zqiDtwx4qfUiqZ0GwLsRRvlOtE8aZ/SnGofHSJUxi2IfG2U0DGf6+e58x4fnDggtGhhlgTvwpmNEkslYQ
|
||||
QLVtmgY1qZxF+m0SA+mfUS3QnQdFp5f4/GOOOWamkFthmkJ2hesKhxTeK5GjNBrzICF/Yc4pYYzKta8gpN0d0rmEo73UwT4ECd6kQpm0b2QedduoMoRw2gcQ
|
||||
0nrZtxO24JIYLjS0EiIpzNMeHCTkWdI4FNJeeY1JVl5n+RcLStkDIJmcvR36YxQZB8J8w3w16ucdvjNO1Dns3CWdR2ixzQWjjM0mWIVQVcJTr3pW8s5unWDq
|
||||
Gn/aGgyw/Hmv/d0//ROLZEns6fVXkeDz19Fubc8/13ivz2IRL68+G+exGBdsxoXKduu11vdSEOnf0CJKKp1g6GVehwlrYtH20USkXQk9l87IeOmSvUj22Rij
|
||||
xo5QcqHfUrBYc6RjCUGie0vBS2JDCS/hIqQdrsKc4Ww/kT1efjd+JlGY00q6JeuTbQXCzI3VrUobzkDka7Hfwl4QA3gWcB0AJaZfnh4D0GIjI+U8yADDCGTq
|
||||
tJFKjiUDsEsOf5Gj36APyaZc0/19K7x/5StfWSZLOHYLxhbcURSO6oKBjLMmnuviiNrBpSZgmJnKXhB4hY9k8Nu4N/b56FObHZ3RMIpsvrMY+utDxpoxoo7y
|
||||
soW5ZOyZGn3Km9c1oQWVzW8Ytn0MFrCy+MfZJs438d5fMopsczISjCWZTEitY6tl4cNA7TuxyXL4z34Qf/YEbQRZ+DECm0VtCiWkdcfNqDqEFlD2GWEO9sxY
|
||||
1LsEo7AKNHGUdDlHg3BjPtsTIisvwUgG6VNib9AN9thjIAR9ODat2ktmThsjGCqs4OHe4fnffab3cE/m7j6MAyMLzbKJEwjLJmLr1FakDWcgQNTBUh+Hv2Cm
|
||||
FN7KwERIDfMmA03KatJYmGhK8QYIpmKASohH27FbGBObNujnXb+NKM8kJDlhnsOTdPj5do9Lk20BsPhJhkhiC7W/pKz+YWxYhBuCa/geyi5hkxmmtMg4yrNs
|
||||
xJQJwMKmb5VHE7QT2WIzTHb1u5eWNCoZYPd6iy4J2yKhXExPvReBMFWLnLkQppmycMHAQu/P4oUp53vX+ztvfH/eeO1+71p/ySi85ntl5mfvMXKLmj7xisFs
|
||||
pAatTeH07t0FNpiGs78s1ua9cUQIxTwvEmPparHYa8t5AjP9jRHamGpsapvrtRmjthmZECG5JEwIhLG3q1gRzP9xhFlj6sq3sVGfySDgM4ZuHFrbCMkOryIc
|
||||
2O2PCYWJbFyxS/39pjAQO6DRR2Jnqaygi0S7xABz4puBkWRiRSRHE2mwyyQnOcUBNUU9zmu20ivTgF3cdv5PW1SYJCMcspghSPmkNvdblOz8lX3Agqcck47p
|
||||
wgSmlSBmJKfrXSImd/dZkdyvZEa2s5gJbZjsLDZpLSwYzyQykZkanccQoaJNnMNedilPumejfiPxyuYMO8zAgpaLOwy7mGxUnRbtObIlEEyMn7cfdVSxYGAA
|
||||
mWLltDiF8aPBTPbae+/mCiFsSGcSPpVi3YhQ23Iei3Ns4ArvywZz+UEIHhiCseHkQlrQOOZBmyAQEVbcQyAiVGJKTGOYIQaEgWBiCNNi2lY+oZPAc8ABBywa
|
||||
tHOpT3EeRUk7vAYw6xLaF5JqiXKKNOLRD4tHnEacipxvSMy40OOYzG0cdztwpIX0s3iVX2ONRDXZMMkJGKaTFaWFml4iWYRFclbGAj74HUZHve1tJeiAU7ZL
|
||||
kjfGpOt+Vd7HZNzhu/zCszhWOfKF9w6TaCph1UI4p/WDeH7je9hBOlxm/bx4CAi9DQ2tbCrMDcgh8beRJqgEt1hLEOd1jjFRgMNZsd0zapy86JBDytiwh8T9
|
||||
gmU+/vGPt8HUB5GinOdCdwWXRGqdkU5+ZYvYEuTTJXNFZGcIByUwpfvbsrxfKCc6lkcilZNIfiimhfXKY+9ZsxAnIqmQFiJxI4cYyYLdVV4YFJ0/S9ELf4/8
|
||||
XUw9ciORiJE++lRIdd8OSa/rjCV1kdqYhJgMmAckBPxknFZIWssgB/ZhxHQiFxFNgFrPlt0l0p0/kjdpUX4oeNNo2Ke7FPsWikbIVBCbyYpJitQ3TBIt+t35
|
||||
JBuRpHD4+fXz7AjQ+iUsNR4PD3NXhOqXwoyjCJkviSlDuChmLOPFH6KZ5DimnTI7mb+0OfM2tTqmpzfGyaOc4xFJWMascWvc+y41kjyZtBR+1j9rg/rRHM0P
|
||||
497z9w4tqOvXpaUwkdF8+UaC0RUTbbesZX6/U1R+7EpINaM+TjtQahYA+A+odBxY1MxFIo5WC44kbtRbDIUpZVxE0CLVfS11MblC2iiRVZyNGa3CJ2KS+Iz5
|
||||
h7RVTFwmNnMURmHhR8UOHXZ4QoFFPl/Z5qn+3wqV/lthEuAYdm2aaXJSY0qy7VL5XeMIZBM9NJwdGI7f999//8Jg9BXHuMSVyo0QytJ3Mg3z4wh28Ft3AVkL
|
||||
VvXe9UeArzHSlJQxJ8rR4sxfZFwhJi2OdWa/G0cW5/OFQJL9a6yJlDR2/BlHeV/WXHLGCF0uJijji8m1S0yknkEwMaaMe4yDOUv55sPFYpx7Lp+eiK8cx91y
|
||||
vFcX/jdmLuY3Qtey0KRkipvGQCw+BoNIJwuGzlkEMgBIFqQGJLts7HovkvMi1G8964Bp8EnFnpZywmCfZ5lI/CUkMRON45JmkdrLuDL4QDCA1E7cS7LEQLwm
|
||||
paDBz0IatUCgnKjKOOSQQ0p9Y0PZ4DfXYVqRiqLYoGk8w/eWi+u/hURAX/GXiQZkqbBoIxI97UN6fq8WfYv7OMpxaVzlmMlr+TQzyo+Wy0mPrEVCjAlH3hvj
|
||||
6uO9sb1b+D2uEZGGozTeLNsrBkRDIswQxJ2Xzj9jTMZGxe6lC/1+IRkIcDlpnZGwKMwje9HAIFGon2idSQM079kKryKeLMZhA16Y4AYSIA2QFiyiBnMxmS0G
|
||||
+Qp7MfscpadGiKYJzwTBwd+VKl2PhheS8mX9t1AIdPuW2ZTAeWzsl2ECIs0bF+YlgQczoUkQSM3bUdQtz+80ifBNFCHDAk+TdQ4QMpY8z+/MUYQcGrZ1Ic1k
|
||||
5cKz/imbEEVTF/7/+RDEPhsMD9MwHjExhIkRZAho0/a7nFX0QrwsJAOBjI5JqXEhkDqrEgaEzh5WeRepjvOui0kjfNaZIMx2i7LIMnHxYfB1qBfT4vBiMG8s
|
||||
anmLjYCoKOfHMDVjKhZqxA/Hl5H+LsKD8TOKDjvssOLbZJo+PPwrTLT2Mk3TnJlrMR/akefabMxPKtqqu5YxVfGhMJ/6IwQxy2IgzMDLRJMYyHjdbwNaSIIg
|
||||
KS4ylcVKBUPi3cpkMjLdYeqczenT2Ow2k/rsL0H8GcItF4W5bTY22/H55iNGYVHzxxTqEC7aqcAX+8veFI5xfhHaSaQ9auwOp43y6SYxUTOHRcqcsp9Ddgva
|
||||
Qu57Mg/MB9oJ0y6NBLPANHzPr4KYbK1hfCwEMGHs9oapI//fNDNX1mdZXzfNB7KsgG3VepOiDjrooGIaWESTImGDo9zO3spAtuoonN4uDCRp1DignTgSl99E
|
||||
6hImJUT63zuc8MYQLSPOFiqnikZG38J4bJq1P4jWwqGOUdhMnMILTYb2EDnhyr4jATWis1xP2+j67bJ+W+V1kgZSGchW6eVt0o5cQEYtHtsEgtrMngjQTmgO
|
||||
mEkk+Gw+EWavX5y1gdX4EYVlUzOTVCQMLWYoRdtsiDEIAaZNMD9hHjSN4dDznlVZ6ssmMZBNNWEtNaq18puCQGUcmwL7Uj6UryGOZCh/fGiSLAr7tTcIkxD9
|
||||
SSChQUibwwneNT9tl+CZtXRuZSBrQa/eWxGoCCwNAsxY/oTRJqVGKyR4HLmmCi6j0dlx6+7o6+q3FYGKQEVgyyHQhzH0uWbLAdOzQZWB9ASqXlYRqAhUBCoC
|
||||
KxGoDGQlHvVTRaAiUBGoCPREoDKQnkDVyyoCFYGKQEVgJQKVgazEo36qCFQEKgIVgZ4IVAbSE6h6WUWgIlARqAisRKAykJV41E8VgYpARaAi0BOBykB6AlUv
|
||||
qwhUBCoCFYGVCFQGshKP+qkiUBGoCFQEeiJQGUhPoOplFYGKQEWgIrASgcpAVuJRP1UEKgIVgYpATwQqA+kJVL2sIlARqAhUBFYiUBnISjzqp4pARaAiUBHo
|
||||
iUBlID2BqpdVBCoCFYGKwEoEKgNZiUf9VBGoCFQEKgI9EagMpCdQ9bKKQEWgIlARWIlAZSAr8aifKgIVgYpARaAnApWB9ASqXlYRqAhUBCoCKxGoDGQlHvVT
|
||||
RaAiUBGoCPREoDKQnkDVyyoCFYGKQEVgJQKVgazEo36qCFQEKgIVgZ4IVAbSE6h6WUWgIlARqAisRKAykJV41E8VgYpARaAi0BOBykB6AlUvqwhUBCoCFYGV
|
||||
CFQGshKP+qkiUBGoCFQEeiJQGUhPoOplFYGKQEWgIrASgcpAVuJRP1UEKgIVgYpATwQqA+kJVL2sIlARqAhUBFYiUBnISjzqp4pARaAiUBHoicBEBtK2bfO7
|
||||
3/2uZ1H1sopARaAiUBHYaghM4gETGchvfvOb5sc//vFWw6O2pyJQEagIVAR6IHDmmWc2P/3pT8deOZGB/Pa3v22+/vWvj725/lARqAhUBCoCWxeB008/vfnG
|
||||
N74xtoETGYi7TjjhhLE31x8qAhWBikBFYOsi8JWvfKU57bTTxjZwKgP5wAc+0FBjKlUEKgIVgYrA9kLgmGOOaX7/+9+PbfRUBvLJT36y+exnPzu2gPpDRaAi
|
||||
UBGoCGw9BDCOt771rRMbNpWBcKS/6lWvmlhI/bEiUBGoCFQEthYCH/zgB5tPfOITExu1U/zaTrwifrzABS7QHH/88c21r33taZfW3ysCFYGKQEVgyRGgfdz+
|
||||
9rdvmLAm0VQNxM088U95ylOaM844Y1JZ9beKQEWgIlAR2AIIHHbYYVOZh2aePf4O8mYafelLX2rOd77zNTe/+c2nXVp/rwhUBCoCFYElReDEE09s7n//+ze/
|
||||
/vWvp7agNwNR0kc+8pHm6le/ejVlTYW1XlARqAhUBJYPAfv+7nWve03c+9FtVS8TVt7Aof6ABzygeeMb35hf1deKQEWgIlAR2AIInHzyyc3d7na35vOf/3zv
|
||||
1qxKA1GqvCjvfOc7i3qz5557Nuc85zl7P6xeWBGoCFQEKgKLh8Db3/72Zt99922+/OUvr6pyvaKwxpV405vetHn605/e3OlOd2p22klRlSoCFYGKQEVgWRCg
|
||||
dTzvec9rjjjiiJmCpNbEQBKk29zmNs1+++3X3Pa2t2122WWX/Lq+VgQqAhWBisCCISCqVooqroi3ve1tE5MlTqv6XBhIPuTiF794s/vuuze77bZbc6UrXam5
|
||||
yEUu0pztbKtys2RR9bUiUBGoCFQE5oTAL3/5y+ab3/xmc9JJJzWf+cxnmq9+9atzKXmuDGQuNaqFVAQqAhWBisBSIFDVg6XoplrJikBFoCKweAhUBrJ4fVJr
|
||||
VBGoCFQElgKBykCWoptqJSsCFYGKwOIhUBnI4vVJrVFFoCJQEVgKBCoDWYpuqpWsCFQEKgKLh0BlIIvXJ7VGFYGKQEVgKRDYOWr5q6Woaa1kRaAiUBGoCCwU
|
||||
Av8fgwPy24mbuF8AAAAASUVORK5CYII=
|
||||
`
|
||||
@@ -87,7 +87,8 @@ type LlamaServer interface {
|
||||
type llmServer struct {
|
||||
port int
|
||||
cmd *exec.Cmd
|
||||
done chan error // Channel to signal when the process exits
|
||||
done chan struct{} // closed when the process exits
|
||||
doneErr error // valid after done is closed
|
||||
status *StatusWriter
|
||||
options api.Options
|
||||
modelPath string
|
||||
@@ -280,7 +281,7 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
|
||||
sem: semaphore.NewWeighted(int64(numParallel)),
|
||||
totalLayers: f.KV().BlockCount() + 1,
|
||||
loadStart: time.Now(),
|
||||
done: make(chan error, 1),
|
||||
done: make(chan struct{}),
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -304,10 +305,11 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
|
||||
if strings.Contains(s.status.LastErrMsg, "unknown model") {
|
||||
s.status.LastErrMsg = "this model is not supported by your version of Ollama. You may need to upgrade"
|
||||
}
|
||||
s.done <- errors.New(s.status.LastErrMsg)
|
||||
s.doneErr = errors.New(s.status.LastErrMsg)
|
||||
} else {
|
||||
s.done <- err
|
||||
s.doneErr = err
|
||||
}
|
||||
close(s.done)
|
||||
}()
|
||||
|
||||
if tok != nil {
|
||||
@@ -1356,8 +1358,8 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
|
||||
case <-ctx.Done():
|
||||
slog.Warn("client connection closed before server finished loading, aborting load")
|
||||
return fmt.Errorf("timed out waiting for llama runner to start: %w", ctx.Err())
|
||||
case err := <-s.done:
|
||||
return fmt.Errorf("llama runner process has terminated: %w", err)
|
||||
case <-s.done:
|
||||
return fmt.Errorf("llama runner process has terminated: %w", s.doneErr)
|
||||
default:
|
||||
}
|
||||
if time.Now().After(stallTimer) {
|
||||
|
||||
144
server/inference_request_log.go
Normal file
144
server/inference_request_log.go
Normal file
@@ -0,0 +1,144 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
)
|
||||
|
||||
type inferenceRequestLogger struct {
|
||||
dir string
|
||||
counter uint64
|
||||
}
|
||||
|
||||
func newInferenceRequestLogger() (*inferenceRequestLogger, error) {
|
||||
dir, err := os.MkdirTemp("", "ollama-request-logs-*")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &inferenceRequestLogger{dir: dir}, nil
|
||||
}
|
||||
|
||||
func (s *Server) initRequestLogging() error {
|
||||
if !envconfig.DebugLogRequests() {
|
||||
return nil
|
||||
}
|
||||
|
||||
requestLogger, err := newInferenceRequestLogger()
|
||||
if err != nil {
|
||||
return fmt.Errorf("enable OLLAMA_DEBUG_LOG_REQUESTS: %w", err)
|
||||
}
|
||||
|
||||
s.requestLogger = requestLogger
|
||||
slog.Info(fmt.Sprintf("request debug logging enabled; inference request logs will be stored in %s and include request bodies and replay curl commands", requestLogger.dir))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) withInferenceRequestLogging(route string, handlers ...gin.HandlerFunc) []gin.HandlerFunc {
|
||||
if s.requestLogger == nil {
|
||||
return handlers
|
||||
}
|
||||
|
||||
return append([]gin.HandlerFunc{s.requestLogger.middleware(route)}, handlers...)
|
||||
}
|
||||
|
||||
func (l *inferenceRequestLogger) middleware(route string) gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
if c.Request == nil {
|
||||
c.Next()
|
||||
return
|
||||
}
|
||||
|
||||
method := c.Request.Method
|
||||
host := c.Request.Host
|
||||
scheme := "http"
|
||||
if c.Request.TLS != nil {
|
||||
scheme = "https"
|
||||
}
|
||||
contentType := c.GetHeader("Content-Type")
|
||||
|
||||
var body []byte
|
||||
if c.Request.Body != nil {
|
||||
var err error
|
||||
body, err = io.ReadAll(c.Request.Body)
|
||||
c.Request.Body = io.NopCloser(bytes.NewReader(body))
|
||||
if err != nil {
|
||||
slog.Warn("failed to read request body for debug logging", "route", route, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
c.Next()
|
||||
l.log(route, method, scheme, host, contentType, body)
|
||||
}
|
||||
}
|
||||
|
||||
func (l *inferenceRequestLogger) log(route, method, scheme, host, contentType string, body []byte) {
|
||||
if l == nil || l.dir == "" {
|
||||
return
|
||||
}
|
||||
|
||||
if contentType == "" {
|
||||
contentType = "application/json"
|
||||
}
|
||||
if host == "" || scheme == "" {
|
||||
base := envconfig.Host()
|
||||
if host == "" {
|
||||
host = base.Host
|
||||
}
|
||||
if scheme == "" {
|
||||
scheme = base.Scheme
|
||||
}
|
||||
}
|
||||
|
||||
routeForFilename := sanitizeRouteForFilename(route)
|
||||
timestamp := fmt.Sprintf("%s-%06d", time.Now().UTC().Format("20060102T150405.000000000Z"), atomic.AddUint64(&l.counter, 1))
|
||||
bodyFilename := fmt.Sprintf("%s_%s_body.json", timestamp, routeForFilename)
|
||||
curlFilename := fmt.Sprintf("%s_%s_request.sh", timestamp, routeForFilename)
|
||||
bodyPath := filepath.Join(l.dir, bodyFilename)
|
||||
curlPath := filepath.Join(l.dir, curlFilename)
|
||||
|
||||
if err := os.WriteFile(bodyPath, body, 0o600); err != nil {
|
||||
slog.Warn("failed to write debug request body", "route", route, "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("%s://%s%s", scheme, host, route)
|
||||
curl := fmt.Sprintf("#!/bin/sh\nSCRIPT_DIR=\"$(CDPATH= cd -- \"$(dirname -- \"$0\")\" && pwd)\"\ncurl --request %s --url %q --header %q --data-binary @\"${SCRIPT_DIR}/%s\"\n", method, url, "Content-Type: "+contentType, bodyFilename)
|
||||
if err := os.WriteFile(curlPath, []byte(curl), 0o600); err != nil {
|
||||
slog.Warn("failed to write debug request replay command", "route", route, "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
slog.Info(fmt.Sprintf("logged to %s, replay using curl with `sh %s`", bodyPath, curlPath))
|
||||
}
|
||||
|
||||
func sanitizeRouteForFilename(route string) string {
|
||||
route = strings.TrimPrefix(route, "/")
|
||||
if route == "" {
|
||||
return "root"
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
b.Grow(len(route))
|
||||
for _, r := range route {
|
||||
if ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') || ('0' <= r && r <= '9') {
|
||||
b.WriteRune(r)
|
||||
} else {
|
||||
b.WriteByte('_')
|
||||
}
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
||||
@@ -63,6 +63,7 @@ const (
|
||||
cloudErrRemoteModelDetailsUnavailable = "remote model details are unavailable"
|
||||
cloudErrWebSearchUnavailable = "web search is unavailable"
|
||||
cloudErrWebFetchUnavailable = "web fetch is unavailable"
|
||||
copilotChatUserAgentPrefix = "GitHubCopilotChat/"
|
||||
)
|
||||
|
||||
func writeModelRefParseError(c *gin.Context, err error, fallbackStatus int, fallbackMessage string) {
|
||||
@@ -100,6 +101,7 @@ type Server struct {
|
||||
addr net.Addr
|
||||
sched *Scheduler
|
||||
defaultNumCtx int
|
||||
requestLogger *inferenceRequestLogger
|
||||
}
|
||||
|
||||
func init() {
|
||||
@@ -1157,6 +1159,17 @@ func (s *Server) ShowHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
userAgent := c.Request.UserAgent()
|
||||
if strings.HasPrefix(userAgent, copilotChatUserAgentPrefix) {
|
||||
if resp.ModelInfo == nil {
|
||||
resp.ModelInfo = map[string]any{}
|
||||
}
|
||||
// Copilot Chat prefers `general.basename`, but this is usually not what
|
||||
// users are familiar with, so let's just echo back what we had returned in
|
||||
// `/api/tags`
|
||||
resp.ModelInfo["general.basename"] = req.Model
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
@@ -1686,26 +1699,26 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
|
||||
|
||||
// Inference
|
||||
r.GET("/api/ps", s.PsHandler)
|
||||
r.POST("/api/generate", s.GenerateHandler)
|
||||
r.POST("/api/chat", s.ChatHandler)
|
||||
r.POST("/api/generate", s.withInferenceRequestLogging("/api/generate", s.GenerateHandler)...)
|
||||
r.POST("/api/chat", s.withInferenceRequestLogging("/api/chat", s.ChatHandler)...)
|
||||
r.POST("/api/embed", s.EmbedHandler)
|
||||
r.POST("/api/embeddings", s.EmbeddingsHandler)
|
||||
|
||||
// Inference (OpenAI compatibility)
|
||||
// TODO(cloud-stage-a): apply Modelfile overlay deltas for local models with cloud
|
||||
// parents on v1 request families while preserving this explicit :cloud passthrough.
|
||||
r.POST("/v1/chat/completions", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ChatMiddleware(), s.ChatHandler)
|
||||
r.POST("/v1/completions", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.CompletionsMiddleware(), s.GenerateHandler)
|
||||
r.POST("/v1/chat/completions", s.withInferenceRequestLogging("/v1/chat/completions", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ChatMiddleware(), s.ChatHandler)...)
|
||||
r.POST("/v1/completions", s.withInferenceRequestLogging("/v1/completions", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.CompletionsMiddleware(), s.GenerateHandler)...)
|
||||
r.POST("/v1/embeddings", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.EmbeddingsMiddleware(), s.EmbedHandler)
|
||||
r.GET("/v1/models", middleware.ListMiddleware(), s.ListHandler)
|
||||
r.GET("/v1/models/:model", cloudModelPathPassthroughMiddleware(cloudErrRemoteModelDetailsUnavailable), middleware.RetrieveMiddleware(), s.ShowHandler)
|
||||
r.POST("/v1/responses", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ResponsesMiddleware(), s.ChatHandler)
|
||||
r.POST("/v1/responses", s.withInferenceRequestLogging("/v1/responses", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ResponsesMiddleware(), s.ChatHandler)...)
|
||||
// OpenAI-compatible image generation endpoints
|
||||
r.POST("/v1/images/generations", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ImageGenerationsMiddleware(), s.GenerateHandler)
|
||||
r.POST("/v1/images/edits", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.ImageEditsMiddleware(), s.GenerateHandler)
|
||||
|
||||
// Inference (Anthropic compatibility)
|
||||
r.POST("/v1/messages", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.AnthropicMessagesMiddleware(), s.ChatHandler)
|
||||
r.POST("/v1/messages", s.withInferenceRequestLogging("/v1/messages", cloudPassthroughMiddleware(cloudErrRemoteInferenceUnavailable), middleware.AnthropicMessagesMiddleware(), s.ChatHandler)...)
|
||||
|
||||
if rc != nil {
|
||||
// wrap old with new
|
||||
@@ -1757,6 +1770,9 @@ func Serve(ln net.Listener) error {
|
||||
}
|
||||
|
||||
s := &Server{addr: ln.Addr()}
|
||||
if err := s.initRequestLogging(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var rc *ollama.Registry
|
||||
if useClient2 {
|
||||
|
||||
128
server/routes_request_log_test.go
Normal file
128
server/routes_request_log_test.go
Normal file
@@ -0,0 +1,128 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func TestInferenceRequestLoggerMiddlewareWritesReplayArtifacts(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
logDir := t.TempDir()
|
||||
requestLogger := &inferenceRequestLogger{dir: logDir}
|
||||
|
||||
const route = "/v1/chat/completions"
|
||||
const requestBody = `{"model":"test-model","messages":[{"role":"user","content":"hello"}]}`
|
||||
|
||||
var bodySeenByHandler string
|
||||
|
||||
r := gin.New()
|
||||
r.POST(route, requestLogger.middleware(route), func(c *gin.Context) {
|
||||
body, err := io.ReadAll(c.Request.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read body in handler: %v", err)
|
||||
}
|
||||
|
||||
bodySeenByHandler = string(body)
|
||||
c.Status(http.StatusOK)
|
||||
})
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, route, strings.NewReader(requestBody))
|
||||
req.Host = "127.0.0.1:11434"
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status 200, got %d", w.Code)
|
||||
}
|
||||
|
||||
if bodySeenByHandler != requestBody {
|
||||
t.Fatalf("handler body mismatch:\nexpected: %s\ngot: %s", requestBody, bodySeenByHandler)
|
||||
}
|
||||
|
||||
bodyFiles, err := filepath.Glob(filepath.Join(logDir, "*_v1_chat_completions_body.json"))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to glob body logs: %v", err)
|
||||
}
|
||||
if len(bodyFiles) != 1 {
|
||||
t.Fatalf("expected 1 body log, got %d (%v)", len(bodyFiles), bodyFiles)
|
||||
}
|
||||
|
||||
curlFiles, err := filepath.Glob(filepath.Join(logDir, "*_v1_chat_completions_request.sh"))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to glob curl logs: %v", err)
|
||||
}
|
||||
if len(curlFiles) != 1 {
|
||||
t.Fatalf("expected 1 curl log, got %d (%v)", len(curlFiles), curlFiles)
|
||||
}
|
||||
|
||||
bodyData, err := os.ReadFile(bodyFiles[0])
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read body log: %v", err)
|
||||
}
|
||||
if string(bodyData) != requestBody {
|
||||
t.Fatalf("body log mismatch:\nexpected: %s\ngot: %s", requestBody, string(bodyData))
|
||||
}
|
||||
|
||||
curlData, err := os.ReadFile(curlFiles[0])
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read curl log: %v", err)
|
||||
}
|
||||
|
||||
curlString := string(curlData)
|
||||
if !strings.Contains(curlString, "http://127.0.0.1:11434"+route) {
|
||||
t.Fatalf("curl log does not contain expected route URL: %s", curlString)
|
||||
}
|
||||
|
||||
bodyFileName := filepath.Base(bodyFiles[0])
|
||||
if !strings.Contains(curlString, "@\"${SCRIPT_DIR}/"+bodyFileName+"\"") {
|
||||
t.Fatalf("curl log does not reference sibling body file: %s", curlString)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewInferenceRequestLoggerCreatesDirectory(t *testing.T) {
|
||||
requestLogger, err := newInferenceRequestLogger()
|
||||
if err != nil {
|
||||
t.Fatalf("expected no error creating request logger: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
_ = os.RemoveAll(requestLogger.dir)
|
||||
})
|
||||
|
||||
if requestLogger == nil || requestLogger.dir == "" {
|
||||
t.Fatalf("expected request logger directory to be set")
|
||||
}
|
||||
|
||||
info, err := os.Stat(requestLogger.dir)
|
||||
if err != nil {
|
||||
t.Fatalf("expected directory to exist: %v", err)
|
||||
}
|
||||
if !info.IsDir() {
|
||||
t.Fatalf("expected %q to be a directory", requestLogger.dir)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeRouteForFilename(t *testing.T) {
|
||||
tests := []struct {
|
||||
route string
|
||||
want string
|
||||
}{
|
||||
{route: "/api/generate", want: "api_generate"},
|
||||
{route: "/v1/chat/completions", want: "v1_chat_completions"},
|
||||
{route: "/v1/messages", want: "v1_messages"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
if got := sanitizeRouteForFilename(tt.route); got != tt.want {
|
||||
t.Fatalf("sanitizeRouteForFilename(%q) = %q, want %q", tt.route, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -721,6 +721,111 @@ func TestShow(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestShowCopilotUserAgentOverwritesExistingBasename(t *testing.T) {
|
||||
t.Setenv("OLLAMA_MODELS", t.TempDir())
|
||||
|
||||
var s Server
|
||||
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Model: "show-model",
|
||||
From: "bob",
|
||||
RemoteHost: "https://ollama.com",
|
||||
Info: map[string]any{
|
||||
"model_family": "gptoss",
|
||||
"base_name": "upstream-base-name",
|
||||
},
|
||||
Stream: &stream,
|
||||
})
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status code 200 creating model, actual %d", w.Code)
|
||||
}
|
||||
|
||||
h, err := s.GenerateRoutes(nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
makeRequest := func(userAgent string) api.ShowResponse {
|
||||
t.Helper()
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/show", strings.NewReader(`{"model":"show-model"}`))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
if userAgent != "" {
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
}
|
||||
h.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status code 200, actual %d", w.Code)
|
||||
}
|
||||
|
||||
var resp api.ShowResponse
|
||||
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return resp
|
||||
}
|
||||
|
||||
withoutCopilot := makeRequest("")
|
||||
if withoutCopilot.ModelInfo["general.basename"] != "upstream-base-name" {
|
||||
t.Fatalf("expected general.basename to be %q, got %v", "upstream-base-name", withoutCopilot.ModelInfo["general.basename"])
|
||||
}
|
||||
|
||||
withCopilot := makeRequest("GitHubCopilotChat/0.41.1")
|
||||
if withCopilot.ModelInfo["general.basename"] != "show-model" {
|
||||
t.Fatalf("expected general.basename to be %q, got %v", "show-model", withCopilot.ModelInfo["general.basename"])
|
||||
}
|
||||
|
||||
if withCopilot.ModelInfo["general.architecture"] != "gptoss" {
|
||||
t.Fatalf("expected general.architecture to be %q, got %v", "gptoss", withCopilot.ModelInfo["general.architecture"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestShowCopilotUserAgentSetsBasenameWhenModelInfoIsEmpty(t *testing.T) {
|
||||
t.Setenv("OLLAMA_MODELS", t.TempDir())
|
||||
|
||||
var s Server
|
||||
|
||||
w := createRequest(t, s.CreateHandler, api.CreateRequest{
|
||||
Model: "show-remote",
|
||||
From: "bob",
|
||||
RemoteHost: "https://ollama.com",
|
||||
Stream: &stream,
|
||||
})
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status code 200 creating model, actual %d", w.Code)
|
||||
}
|
||||
|
||||
h, err := s.GenerateRoutes(nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
w = httptest.NewRecorder()
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/show", strings.NewReader(`{"model":"show-remote"}`))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("User-Agent", "GitHubCopilotChat/0.41.1")
|
||||
h.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected status code 200, actual %d", w.Code)
|
||||
}
|
||||
|
||||
var resp api.ShowResponse
|
||||
if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if resp.ModelInfo["general.basename"] != "show-remote" {
|
||||
t.Fatalf("expected general.basename to be %q, got %v", "show-remote", resp.ModelInfo["general.basename"])
|
||||
}
|
||||
|
||||
if len(resp.ModelInfo) != 1 {
|
||||
t.Fatalf("expected model_info to contain only general.basename, got %#v", resp.ModelInfo)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalize(t *testing.T) {
|
||||
type testCase struct {
|
||||
input []float32
|
||||
|
||||
@@ -109,7 +109,7 @@ func ConfigFromModelfile(modelfile *parser.Modelfile) (string, *ModelfileConfig,
|
||||
type CreateOptions struct {
|
||||
ModelName string
|
||||
ModelDir string
|
||||
Quantize string // "int4", "int8", "nvfp4", or "mxfp8" for quantization
|
||||
Quantize string // "int4", "int8", "nvfp4", "mxfp4", or "mxfp8" for quantization
|
||||
Modelfile *ModelfileConfig // template/system/license/parser/renderer/parameters from Modelfile
|
||||
}
|
||||
|
||||
@@ -280,7 +280,7 @@ func newPackedTensorLayerCreator() create.PackedTensorLayerCreator {
|
||||
if !QuantizeSupported() {
|
||||
return create.LayerInfo{}, fmt.Errorf("quantization requires MLX support")
|
||||
}
|
||||
blobData, err := quantizePackedGroup(tensors)
|
||||
blobData, err := quantizePackedGroup(groupName, tensors)
|
||||
if err != nil {
|
||||
return create.LayerInfo{}, fmt.Errorf("failed to quantize packed group %s: %w", groupName, err)
|
||||
}
|
||||
|
||||
@@ -7,29 +7,27 @@ import (
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/x/create"
|
||||
"github.com/ollama/ollama/x/imagegen/mlx"
|
||||
"github.com/ollama/ollama/x/mlxrunner/mlx"
|
||||
"github.com/ollama/ollama/x/mlxrunner/model"
|
||||
)
|
||||
|
||||
// quantizeParams maps quantization type names to MLX quantize parameters.
|
||||
var quantizeParams = map[string]struct {
|
||||
groupSize int
|
||||
bits int
|
||||
mode string
|
||||
}{
|
||||
"int4": {64, 4, "affine"},
|
||||
"nvfp4": {16, 4, "nvfp4"},
|
||||
"int8": {64, 8, "affine"},
|
||||
"mxfp8": {32, 8, "mxfp8"},
|
||||
}
|
||||
|
||||
// loadAndQuantizeArray writes a safetensors reader to a temp file, loads it with MLX,
|
||||
// quantizes the tensor, and appends the resulting arrays (weight, scale, optional bias)
|
||||
// to the provided maps. If quantize is empty, the tensor is kept as-is.
|
||||
// Returns any temp file paths created (caller must clean up) and arrays needing eval.
|
||||
func loadAndQuantizeArray(r io.Reader, name, quantize string, arrays map[string]*mlx.Array) (tmpPath string, toEval []*mlx.Array, nativeHandle *mlx.SafetensorsFile, err error) {
|
||||
if quantize != "" {
|
||||
if gs, _, _ := model.QuantizationParams(quantize); gs == 0 {
|
||||
return "", nil, nil, fmt.Errorf("unsupported quantization type: %s", quantize)
|
||||
}
|
||||
}
|
||||
|
||||
tmpDir := ensureTempDir()
|
||||
|
||||
tmpFile, err := os.CreateTemp(tmpDir, "quant-*.safetensors")
|
||||
@@ -50,11 +48,16 @@ func loadAndQuantizeArray(r io.Reader, name, quantize string, arrays map[string]
|
||||
}
|
||||
|
||||
// Find the tensor key (may differ from name for single-tensor blobs)
|
||||
inputKey, err := findSafetensorsKey(tmpPath)
|
||||
header, err := readSafetensorsHeader(tmpPath)
|
||||
if err != nil {
|
||||
st.Free()
|
||||
return tmpPath, nil, nil, fmt.Errorf("failed to read blob header for %s: %w", name, err)
|
||||
}
|
||||
inputKey, err := safetensorsKey(name, header)
|
||||
if err != nil {
|
||||
st.Free()
|
||||
return tmpPath, nil, nil, fmt.Errorf("failed to resolve tensor key for %s: %w", name, err)
|
||||
}
|
||||
|
||||
arr := st.Get(inputKey)
|
||||
if arr == nil {
|
||||
@@ -62,34 +65,46 @@ func loadAndQuantizeArray(r io.Reader, name, quantize string, arrays map[string]
|
||||
return tmpPath, nil, nil, fmt.Errorf("tensor %q not found in safetensors", inputKey)
|
||||
}
|
||||
|
||||
// Decode FP8 source encoding before checking quantize, so that callers
|
||||
// requesting decode-only (quantize="") receive usable float data.
|
||||
if info, ok := header[inputKey]; ok && info.Dtype == "F8_E4M3" {
|
||||
scaleKey := inputKey + ".scale_inv"
|
||||
scaleInv := st.Get(scaleKey)
|
||||
if scaleInv == nil {
|
||||
st.Free()
|
||||
return tmpPath, nil, nil, fmt.Errorf("missing companion tensor %q for fp8 source tensor %q", scaleKey, inputKey)
|
||||
}
|
||||
arr, err = decodeSourceFP8Tensor(arr, scaleInv)
|
||||
if err != nil {
|
||||
st.Free()
|
||||
return tmpPath, nil, nil, fmt.Errorf("failed to decode fp8 tensor %s: %w", inputKey, err)
|
||||
}
|
||||
mlx.Eval(arr)
|
||||
}
|
||||
|
||||
if quantize == "" {
|
||||
arr = mlx.Contiguous(arr)
|
||||
arr = mlx.Contiguous(arr, false)
|
||||
arrays[name] = arr
|
||||
return tmpPath, []*mlx.Array{arr}, st, nil
|
||||
}
|
||||
|
||||
// Convert to float type if needed (quantize expects float)
|
||||
if arr.Dtype() != mlx.DtypeBFloat16 && arr.Dtype() != mlx.DtypeFloat32 && arr.Dtype() != mlx.DtypeFloat16 {
|
||||
arr = mlx.AsType(arr, mlx.DtypeBFloat16)
|
||||
if arr.DType() != mlx.DTypeBFloat16 && arr.DType() != mlx.DTypeFloat32 && arr.DType() != mlx.DTypeFloat16 {
|
||||
// Convert to float type if needed (quantize expects float)
|
||||
arr = arr.AsType(mlx.DTypeBFloat16)
|
||||
mlx.Eval(arr)
|
||||
}
|
||||
|
||||
params, ok := quantizeParams[quantize]
|
||||
if !ok {
|
||||
st.Free()
|
||||
return tmpPath, nil, nil, fmt.Errorf("unsupported quantization type: %s", quantize)
|
||||
}
|
||||
groupSize, bits, mode := model.QuantizationParams(quantize)
|
||||
qweight, scales, qbiases := mlx.Quantize(arr, groupSize, bits, mode)
|
||||
|
||||
qweight, scales, qbiases := mlx.Quantize(arr, params.groupSize, params.bits, params.mode)
|
||||
|
||||
qweight = mlx.Contiguous(qweight)
|
||||
scales = mlx.Contiguous(scales)
|
||||
qweight = mlx.Contiguous(qweight, false)
|
||||
scales = mlx.Contiguous(scales, false)
|
||||
arrays[name] = qweight
|
||||
arrays[name+".scale"] = scales
|
||||
toEval = append(toEval, qweight, scales)
|
||||
|
||||
if qbiases != nil {
|
||||
qbiases = mlx.Contiguous(qbiases)
|
||||
qbiases = mlx.Contiguous(qbiases, false)
|
||||
arrays[name+".bias"] = qbiases
|
||||
toEval = append(toEval, qbiases)
|
||||
}
|
||||
@@ -101,27 +116,45 @@ func loadAndQuantizeArray(r io.Reader, name, quantize string, arrays map[string]
|
||||
// and returns a single combined safetensors blob with the quantized weight, scale, and optional bias.
|
||||
// Tensor keys use the original tensor name: name, name.scale, name.bias.
|
||||
// The blob includes __metadata__ with quant_type and group_size.
|
||||
// Supported quantization types: "int4", "nvfp4", "int8", "mxfp8".
|
||||
// Supported quantization types: "int4", "nvfp4", "mxfp4", "int8", "mxfp8".
|
||||
func quantizeTensor(r io.Reader, tensorName, dtype string, shape []int32, quantize string) (blobData []byte, err error) {
|
||||
arrays := make(map[string]*mlx.Array)
|
||||
tmpPath, toEval, st, err := loadAndQuantizeArray(r, tensorName, quantize, arrays)
|
||||
if tmpPath != "" {
|
||||
defer os.Remove(tmpPath)
|
||||
}
|
||||
if st != nil {
|
||||
defer st.Free()
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
finalArrays := make([]*mlx.Array, 0, len(arrays))
|
||||
for _, arr := range arrays {
|
||||
if arr != nil {
|
||||
finalArrays = append(finalArrays, arr)
|
||||
}
|
||||
}
|
||||
mlx.Pin(finalArrays...)
|
||||
defer func() {
|
||||
if st != nil {
|
||||
st.Free()
|
||||
}
|
||||
mlx.Unpin(finalArrays...)
|
||||
mlx.Sweep()
|
||||
}()
|
||||
|
||||
mlx.Eval(toEval...)
|
||||
mlx.Sweep()
|
||||
// Free early to release mmap; defer guard handles error paths
|
||||
if st != nil {
|
||||
st.Free()
|
||||
st = nil
|
||||
}
|
||||
|
||||
// Build metadata for single-tensor blobs
|
||||
params := quantizeParams[quantize]
|
||||
groupSize, _, _ := model.QuantizationParams(quantize)
|
||||
metadata := map[string]string{
|
||||
"quant_type": quantize,
|
||||
"group_size": strconv.Itoa(params.groupSize),
|
||||
"group_size": strconv.Itoa(groupSize),
|
||||
}
|
||||
|
||||
tmpDir := ensureTempDir()
|
||||
@@ -135,48 +168,81 @@ func quantizeTensor(r io.Reader, tensorName, dtype string, shape []int32, quanti
|
||||
|
||||
// quantizePackedGroup quantizes multiple tensors and saves them all into a single
|
||||
// combined safetensors blob. Used for packing expert groups.
|
||||
// When the inputs are per-expert 2D tensors (e.g., experts.0.gate_proj.weight),
|
||||
// they are stacked into 3D switch_mlp tensors before quantization.
|
||||
// Each tensor may have a different quantization type (mixed-precision).
|
||||
// Returns the blob bytes. No __metadata__ is added because different tensors
|
||||
// may use different quantization types.
|
||||
func quantizePackedGroup(inputs []create.PackedTensorInput) ([]byte, error) {
|
||||
// Returns the blob bytes.
|
||||
func quantizePackedGroup(groupName string, inputs []create.PackedTensorInput) ([]byte, error) {
|
||||
// Check if inputs are per-expert tensors that should be stacked into 3D
|
||||
if projGroups, quantize := parsePerExpertInputs(groupName, inputs); projGroups != nil {
|
||||
return stackAndQuantizeExpertGroup(groupName, projGroups, quantize)
|
||||
}
|
||||
|
||||
allArrays := make(map[string]*mlx.Array)
|
||||
var allToEval []*mlx.Array
|
||||
var tmpPaths []string
|
||||
var handles []*mlx.SafetensorsFile
|
||||
var pinned []*mlx.Array
|
||||
|
||||
var metadata map[string]string
|
||||
uniformQuantize := ""
|
||||
hasQuantized := false
|
||||
mixedQuantize := false
|
||||
for _, input := range inputs {
|
||||
if input.Quantize == "" {
|
||||
if hasQuantized {
|
||||
mixedQuantize = true
|
||||
}
|
||||
continue
|
||||
}
|
||||
if !hasQuantized {
|
||||
hasQuantized = true
|
||||
uniformQuantize = input.Quantize
|
||||
continue
|
||||
}
|
||||
if input.Quantize != uniformQuantize {
|
||||
mixedQuantize = true
|
||||
}
|
||||
}
|
||||
if hasQuantized && !mixedQuantize {
|
||||
if groupSize, _, _ := model.QuantizationParams(uniformQuantize); groupSize > 0 {
|
||||
metadata = map[string]string{
|
||||
"quant_type": uniformQuantize,
|
||||
"group_size": strconv.Itoa(groupSize),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, input := range inputs {
|
||||
tmpPath, toEval, st, err := loadAndQuantizeArray(input.Reader, input.Name, input.Quantize, allArrays)
|
||||
if tmpPath != "" {
|
||||
tmpPaths = append(tmpPaths, tmpPath)
|
||||
}
|
||||
if st != nil {
|
||||
handles = append(handles, st)
|
||||
}
|
||||
if err != nil {
|
||||
// Cleanup on error
|
||||
for _, h := range handles {
|
||||
h.Free()
|
||||
}
|
||||
for _, p := range tmpPaths {
|
||||
os.Remove(p)
|
||||
}
|
||||
mlx.Unpin(pinned...)
|
||||
mlx.Sweep()
|
||||
return nil, err
|
||||
}
|
||||
allToEval = append(allToEval, toEval...)
|
||||
|
||||
mlx.Eval(toEval...)
|
||||
|
||||
finalArrays := arraysForPackedInput(allArrays, input)
|
||||
mlx.Pin(finalArrays...)
|
||||
pinned = append(pinned, finalArrays...)
|
||||
|
||||
if st != nil {
|
||||
st.Free()
|
||||
}
|
||||
if tmpPath != "" {
|
||||
os.Remove(tmpPath)
|
||||
}
|
||||
mlx.Sweep()
|
||||
}
|
||||
defer func() {
|
||||
mlx.Unpin(pinned...)
|
||||
mlx.Sweep()
|
||||
}()
|
||||
|
||||
mlx.Eval(allToEval...)
|
||||
|
||||
// Free native handles after eval
|
||||
for _, h := range handles {
|
||||
h.Free()
|
||||
}
|
||||
|
||||
// Save combined blob (no global metadata for mixed-precision packed blobs)
|
||||
// Save combined blob. Add global metadata only when every packed tensor uses
|
||||
// the same quantization mode and group size.
|
||||
tmpDir := ensureTempDir()
|
||||
outPath := filepath.Join(tmpDir, "packed-combined.safetensors")
|
||||
defer os.Remove(outPath)
|
||||
if err := mlx.SaveSafetensorsWithMetadata(outPath, allArrays, nil); err != nil {
|
||||
if err := mlx.SaveSafetensorsWithMetadata(outPath, allArrays, metadata); err != nil {
|
||||
return nil, fmt.Errorf("failed to save packed blob: %w", err)
|
||||
}
|
||||
|
||||
@@ -185,17 +251,193 @@ func quantizePackedGroup(inputs []create.PackedTensorInput) ([]byte, error) {
|
||||
return nil, fmt.Errorf("failed to read packed blob: %w", err)
|
||||
}
|
||||
|
||||
for _, p := range tmpPaths {
|
||||
os.Remove(p)
|
||||
return blobData, nil
|
||||
}
|
||||
|
||||
func arraysForPackedInput(allArrays map[string]*mlx.Array, input create.PackedTensorInput) []*mlx.Array {
|
||||
keys := []string{input.Name}
|
||||
if input.Quantize != "" {
|
||||
keys = append(keys, input.Name+".scale", input.Name+".bias")
|
||||
}
|
||||
|
||||
out := make([]*mlx.Array, 0, len(keys))
|
||||
for _, key := range keys {
|
||||
if arr := allArrays[key]; arr != nil {
|
||||
out = append(out, arr)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// perExpertSuffix matches ".{index}.{proj_and_suffix}" after the group prefix.
|
||||
var perExpertSuffix = regexp.MustCompile(`^\.(\d+)\.(.+)$`)
|
||||
|
||||
type expertTensorInfo struct {
|
||||
index int
|
||||
proj string // e.g., "gate_proj.weight"
|
||||
input create.PackedTensorInput
|
||||
}
|
||||
|
||||
// parsePerExpertInputs groups per-expert 2D tensor inputs by projection type
|
||||
// and returns the uniform quantization type shared by all inputs.
|
||||
// Returns nil if the inputs are not per-expert tensors (e.g., already stacked 3D)
|
||||
// or if the inputs have mixed quantization types.
|
||||
// Only handles ".experts" groups; ".shared_experts" groups are left unpacked.
|
||||
func parsePerExpertInputs(groupName string, inputs []create.PackedTensorInput) (map[string][]expertTensorInfo, string) {
|
||||
if !strings.HasSuffix(groupName, ".experts") {
|
||||
return nil, ""
|
||||
}
|
||||
|
||||
quantize := inputs[0].Quantize
|
||||
groups := make(map[string][]expertTensorInfo)
|
||||
for _, input := range inputs {
|
||||
if input.Quantize != quantize {
|
||||
return nil, "" // mixed quantization types
|
||||
}
|
||||
suffix := strings.TrimPrefix(input.Name, groupName)
|
||||
m := perExpertSuffix.FindStringSubmatch(suffix)
|
||||
if m == nil {
|
||||
return nil, "" // not a per-expert pattern
|
||||
}
|
||||
index, err := strconv.Atoi(m[1])
|
||||
if err != nil {
|
||||
return nil, ""
|
||||
}
|
||||
groups[m[2]] = append(groups[m[2]], expertTensorInfo{
|
||||
index: index,
|
||||
proj: m[2],
|
||||
input: input,
|
||||
})
|
||||
}
|
||||
if len(groups) == 0 {
|
||||
return nil, ""
|
||||
}
|
||||
return groups, quantize
|
||||
}
|
||||
|
||||
// stackAndQuantizeExpertGroup decodes per-expert tensors, stacks them into 3D
|
||||
// switch_mlp tensors, quantizes, and returns the combined safetensors blob.
|
||||
func stackAndQuantizeExpertGroup(groupName string, projGroups map[string][]expertTensorInfo, quantize string) ([]byte, error) {
|
||||
groupBase := strings.TrimSuffix(groupName, ".experts")
|
||||
|
||||
allArrays := make(map[string]*mlx.Array)
|
||||
var pinned []*mlx.Array
|
||||
|
||||
var metadata map[string]string
|
||||
if groupSize, _, _ := model.QuantizationParams(quantize); groupSize > 0 && quantize != "" {
|
||||
metadata = map[string]string{
|
||||
"quant_type": quantize,
|
||||
"group_size": strconv.Itoa(groupSize),
|
||||
}
|
||||
}
|
||||
|
||||
// Sort projection names for deterministic output
|
||||
projNames := make([]string, 0, len(projGroups))
|
||||
for proj := range projGroups {
|
||||
projNames = append(projNames, proj)
|
||||
}
|
||||
sort.Strings(projNames)
|
||||
|
||||
cleanup := func() {
|
||||
mlx.Unpin(pinned...)
|
||||
mlx.Sweep()
|
||||
}
|
||||
|
||||
for _, proj := range projNames {
|
||||
experts := projGroups[proj]
|
||||
|
||||
// Sort by expert index
|
||||
sort.Slice(experts, func(i, j int) bool {
|
||||
return experts[i].index < experts[j].index
|
||||
})
|
||||
|
||||
// Load and decode each expert tensor
|
||||
var decoded []*mlx.Array
|
||||
for _, expert := range experts {
|
||||
dummyArrays := make(map[string]*mlx.Array)
|
||||
tmpPath, toEval, st, err := loadAndQuantizeArray(expert.input.Reader, expert.input.Name, "", dummyArrays)
|
||||
if err != nil {
|
||||
cleanup()
|
||||
return nil, fmt.Errorf("failed to decode expert tensor %s: %w", expert.input.Name, err)
|
||||
}
|
||||
mlx.Eval(toEval...)
|
||||
|
||||
arr := dummyArrays[expert.input.Name]
|
||||
mlx.Pin(arr)
|
||||
pinned = append(pinned, arr)
|
||||
decoded = append(decoded, arr)
|
||||
|
||||
if st != nil {
|
||||
st.Free()
|
||||
}
|
||||
if tmpPath != "" {
|
||||
os.Remove(tmpPath)
|
||||
}
|
||||
mlx.Sweep()
|
||||
}
|
||||
|
||||
// Stack into 3D along axis 0: [numExperts, rows, cols]
|
||||
stacked := mlx.Stack(decoded, 0)
|
||||
mlx.Eval(stacked)
|
||||
mlx.Pin(stacked)
|
||||
pinned = append(pinned, stacked)
|
||||
|
||||
// Free individual decoded arrays
|
||||
mlx.Unpin(decoded...)
|
||||
mlx.Sweep()
|
||||
|
||||
stackedName := groupBase + ".switch_mlp." + proj
|
||||
|
||||
// Quantize the stacked tensor
|
||||
if quantize != "" {
|
||||
groupSize, bits, mode := model.QuantizationParams(quantize)
|
||||
|
||||
qweight, scales, qbiases := mlx.Quantize(stacked, groupSize, bits, mode)
|
||||
|
||||
qweight = mlx.Contiguous(qweight, false)
|
||||
scales = mlx.Contiguous(scales, false)
|
||||
allArrays[stackedName] = qweight
|
||||
allArrays[stackedName+".scale"] = scales
|
||||
|
||||
toEval := []*mlx.Array{qweight, scales}
|
||||
if qbiases != nil {
|
||||
qbiases = mlx.Contiguous(qbiases, false)
|
||||
allArrays[stackedName+".bias"] = qbiases
|
||||
toEval = append(toEval, qbiases)
|
||||
}
|
||||
mlx.Eval(toEval...)
|
||||
mlx.Pin(toEval...)
|
||||
pinned = append(pinned, toEval...)
|
||||
|
||||
// Free stacked source array
|
||||
mlx.Unpin(stacked)
|
||||
mlx.Sweep()
|
||||
} else {
|
||||
stacked = mlx.Contiguous(stacked, false)
|
||||
mlx.Eval(stacked)
|
||||
allArrays[stackedName] = stacked
|
||||
}
|
||||
}
|
||||
|
||||
defer cleanup()
|
||||
|
||||
tmpDir := ensureTempDir()
|
||||
outPath := filepath.Join(tmpDir, "stacked-combined.safetensors")
|
||||
defer os.Remove(outPath)
|
||||
if err := mlx.SaveSafetensorsWithMetadata(outPath, allArrays, metadata); err != nil {
|
||||
return nil, fmt.Errorf("failed to save stacked blob: %w", err)
|
||||
}
|
||||
|
||||
blobData, err := os.ReadFile(outPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read stacked blob: %w", err)
|
||||
}
|
||||
return blobData, nil
|
||||
}
|
||||
|
||||
// QuantizeSupported returns true if quantization is supported (MLX library available)
|
||||
func QuantizeSupported() bool {
|
||||
mlx.InitMLX()
|
||||
return mlx.IsMLXAvailable()
|
||||
return mlx.CheckInit() == nil
|
||||
}
|
||||
|
||||
// ensureTempDir creates the temp directory for quantization if it doesn't exist
|
||||
@@ -205,32 +447,97 @@ func ensureTempDir() string {
|
||||
return tmpDir
|
||||
}
|
||||
|
||||
// findSafetensorsKey reads the first non-metadata tensor key from a safetensors file.
|
||||
func findSafetensorsKey(path string) (string, error) {
|
||||
type safetensorsHeaderEntry struct {
|
||||
Dtype string `json:"dtype"`
|
||||
Shape []int32 `json:"shape"`
|
||||
}
|
||||
|
||||
func readSafetensorsHeader(path string) (map[string]safetensorsHeaderEntry, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var headerSize uint64
|
||||
if err := binary.Read(f, binary.LittleEndian, &headerSize); err != nil {
|
||||
return "", err
|
||||
return nil, err
|
||||
}
|
||||
headerBytes := make([]byte, headerSize)
|
||||
if _, err := io.ReadFull(f, headerBytes); err != nil {
|
||||
return "", err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var header map[string]json.RawMessage
|
||||
var header map[string]safetensorsHeaderEntry
|
||||
if err := json.Unmarshal(headerBytes, &header); err != nil {
|
||||
return "", err
|
||||
return nil, err
|
||||
}
|
||||
return header, nil
|
||||
}
|
||||
|
||||
for k := range header {
|
||||
if k != "__metadata__" {
|
||||
return k, nil
|
||||
// safetensorsKey resolves the primary tensor key from a header.
|
||||
func safetensorsKey(preferred string, header map[string]safetensorsHeaderEntry) (string, error) {
|
||||
if preferred != "" {
|
||||
if _, ok := header[preferred]; ok {
|
||||
return preferred, nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("no tensor found in safetensors header")
|
||||
|
||||
keys := make([]string, 0, len(header))
|
||||
for k := range header {
|
||||
if k == "__metadata__" || strings.HasSuffix(k, ".scale_inv") {
|
||||
continue
|
||||
}
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
if len(keys) == 0 {
|
||||
return "", fmt.Errorf("no tensor found in safetensors header")
|
||||
}
|
||||
return keys[0], nil
|
||||
}
|
||||
|
||||
func decodeSourceFP8Tensor(weight, scaleInv *mlx.Array) (*mlx.Array, error) {
|
||||
if weight == nil || scaleInv == nil {
|
||||
return nil, fmt.Errorf("fp8 weight and scale tensors are required")
|
||||
}
|
||||
|
||||
weightShape := weight.Dims()
|
||||
scaleShape := scaleInv.Dims()
|
||||
if len(weightShape) != 2 || len(scaleShape) != 2 {
|
||||
return nil, fmt.Errorf("expected 2D fp8 weight and scale tensors, got %v and %v", weightShape, scaleShape)
|
||||
}
|
||||
|
||||
// These must match the block size validated by resolveEffectiveQuantization
|
||||
// in create.go, which rejects any source model with a different block size.
|
||||
const blockRows = 128
|
||||
const blockCols = 128
|
||||
rows, cols := weightShape[0], weightShape[1]
|
||||
expectedScaleRows := (rows + blockRows - 1) / blockRows
|
||||
expectedScaleCols := (cols + blockCols - 1) / blockCols
|
||||
if scaleShape[0] != expectedScaleRows || scaleShape[1] != expectedScaleCols {
|
||||
return nil, fmt.Errorf(
|
||||
"unexpected fp8 scale shape %v for weight shape %v; want [%d %d]",
|
||||
scaleShape,
|
||||
weightShape,
|
||||
expectedScaleRows,
|
||||
expectedScaleCols,
|
||||
)
|
||||
}
|
||||
|
||||
decoded := mlx.FromFP8(weight, mlx.DTypeBFloat16)
|
||||
padBottom := blockRows*scaleShape[0] - rows
|
||||
padSide := blockCols*scaleShape[1] - cols
|
||||
if padBottom > 0 || padSide > 0 {
|
||||
decoded = mlx.Pad(decoded, []int32{0, int32(padBottom), 0, int32(padSide)})
|
||||
}
|
||||
|
||||
decoded = mlx.Reshape(decoded, int32(scaleShape[0]), int32(blockRows), int32(scaleShape[1]), int32(blockCols))
|
||||
decoded = mlx.Mul(decoded, mlx.ExpandDims(mlx.ExpandDims(scaleInv, 1), 3))
|
||||
decoded = mlx.Reshape(decoded, int32(rows+padBottom), int32(cols+padSide))
|
||||
if padBottom > 0 || padSide > 0 {
|
||||
decoded = mlx.SliceStartStop(decoded, []int32{0, 0}, []int32{int32(rows), int32(cols)})
|
||||
}
|
||||
|
||||
return decoded, nil
|
||||
}
|
||||
|
||||
@@ -267,13 +267,13 @@ func ShouldQuantize(name, component string) bool {
|
||||
|
||||
// ShouldQuantizeTensor returns true if a tensor should be quantized based on name, shape, and quantize type.
|
||||
// This is a more detailed check that also considers tensor dimensions.
|
||||
// The quantize parameter specifies the quantization type (e.g., "int4", "nvfp4", "int8", "mxfp8").
|
||||
// The quantize parameter specifies the quantization type (e.g., "int4", "nvfp4", "mxfp4", "int8", "mxfp8").
|
||||
func ShouldQuantizeTensor(name string, shape []int32, quantize string) bool {
|
||||
return GetTensorQuantization(name, shape, quantize) != ""
|
||||
}
|
||||
|
||||
// normalizeQuantType converts various quantization type aliases to canonical forms.
|
||||
// Supports: q4/Q4/int4/INT4/fp4/FP4 -> int4, q8/Q8/int8/INT8/fp8/FP8 -> int8, nvfp4/NVFP4, mxfp8/MXFP8
|
||||
// Supports: q4/Q4/int4/INT4/fp4/FP4 -> int4, q8/Q8/int8/INT8/fp8/FP8 -> int8, nvfp4/NVFP4, mxfp4/MXFP4, mxfp8/MXFP8
|
||||
func normalizeQuantType(quantize string) string {
|
||||
switch strings.ToUpper(quantize) {
|
||||
case "Q4", "INT4", "FP4":
|
||||
@@ -282,6 +282,8 @@ func normalizeQuantType(quantize string) string {
|
||||
return "int8"
|
||||
case "NVFP4":
|
||||
return "nvfp4"
|
||||
case "MXFP4":
|
||||
return "mxfp4"
|
||||
case "MXFP8":
|
||||
return "mxfp8"
|
||||
default:
|
||||
@@ -335,7 +337,7 @@ func GetTensorQuantization(name string, shape []int32, quantize string) string {
|
||||
quantNorm := normalizeQuantType(quantize)
|
||||
|
||||
// MLX quantization requires last dimension to be divisible by group size
|
||||
// nvfp4: 16, mxfp8: 32, int4/int8: 64
|
||||
// nvfp4: 16, mxfp4/mxfp8: 32, int4/int8: 64
|
||||
groupSize := int32(32)
|
||||
switch quantNorm {
|
||||
case "nvfp4":
|
||||
@@ -353,8 +355,8 @@ func GetTensorQuantization(name string, shape []int32, quantize string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// For NVFP4 or MXFP8, use the same quantization for all (no mixed precision)
|
||||
if quantNorm == "nvfp4" || quantNorm == "mxfp8" {
|
||||
// For non-affine modes, use the same quantization for all eligible tensors.
|
||||
if quantNorm == "nvfp4" || quantNorm == "mxfp4" || quantNorm == "mxfp8" {
|
||||
return quantNorm
|
||||
}
|
||||
|
||||
@@ -391,23 +393,39 @@ func GetTensorQuantization(name string, shape []int32, quantize string) string {
|
||||
return quantNorm
|
||||
}
|
||||
|
||||
// expertGroupRegexp matches expert tensor names and captures the group prefix.
|
||||
// Matches: model.layers.{L}.mlp.experts.{E}.{proj}.weight (and .scale, .bias suffixes)
|
||||
// Captures: model.layers.{L}.mlp.experts
|
||||
var expertGroupRegexp = regexp.MustCompile(`^(model\.layers\.\d+\.mlp\.(?:shared_)?experts)\..*\.weight`)
|
||||
var expertLayerPrefixRegexp = regexp.MustCompile(`^(?:model\.language_model\.|language_model(?:\.model)?\.|model\.)?layers\.\d+$`)
|
||||
|
||||
// ExpertGroupPrefix returns the group prefix for expert tensors that should be packed together.
|
||||
// For example:
|
||||
// - "model.layers.1.mlp.experts.0.down_proj.weight" -> "model.layers.1.mlp.experts"
|
||||
// - "model.layers.1.mlp.shared_experts.down_proj.weight" -> "model.layers.1.mlp.shared_experts"
|
||||
// - "language_model.model.layers.1.mlp.switch_mlp.down_proj.weight" -> "language_model.model.layers.1.mlp.switch_mlp"
|
||||
// - "model.layers.0.mlp.down_proj.weight" -> "" (dense layer, no experts)
|
||||
// - "model.layers.1.mlp.gate.weight" -> "" (routing gate, not an expert)
|
||||
func ExpertGroupPrefix(tensorName string) string {
|
||||
m := expertGroupRegexp.FindStringSubmatch(tensorName)
|
||||
if m == nil {
|
||||
if !strings.HasSuffix(tensorName, ".weight") {
|
||||
return ""
|
||||
}
|
||||
return m[1]
|
||||
|
||||
for _, marker := range []string{
|
||||
".mlp.experts.",
|
||||
".mlp.shared_experts.",
|
||||
".mlp.switch_mlp.",
|
||||
} {
|
||||
idx := strings.Index(tensorName, marker)
|
||||
if idx == -1 {
|
||||
continue
|
||||
}
|
||||
|
||||
layerPrefix := tensorName[:idx]
|
||||
if !expertLayerPrefixRegexp.MatchString(layerPrefix) {
|
||||
continue
|
||||
}
|
||||
|
||||
return layerPrefix + strings.TrimSuffix(marker, ".")
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// PackedTensorInput holds metadata for a tensor that will be packed into a multi-tensor blob.
|
||||
@@ -424,9 +442,11 @@ type PackedTensorInput struct {
|
||||
type PackedTensorLayerCreator func(groupName string, tensors []PackedTensorInput) (LayerInfo, error)
|
||||
|
||||
type sourceQuantization struct {
|
||||
Bits int `json:"bits"`
|
||||
GroupSize int `json:"group_size"`
|
||||
Mode string `json:"mode"`
|
||||
Bits int `json:"bits"`
|
||||
GroupSize int `json:"group_size"`
|
||||
Mode string `json:"mode"`
|
||||
QuantMethod string `json:"quant_method"`
|
||||
WeightBlockSize []int32 `json:"weight_block_size"`
|
||||
}
|
||||
|
||||
type sourceModelConfig struct {
|
||||
@@ -493,6 +513,98 @@ func (cfg sourceModelConfig) QuantMetadata() map[string]string {
|
||||
return metadata
|
||||
}
|
||||
|
||||
type sourceQuantizedKind string
|
||||
|
||||
const (
|
||||
sourceQuantizedKindNone sourceQuantizedKind = ""
|
||||
sourceQuantizedKindPrequantized sourceQuantizedKind = "prequantized"
|
||||
sourceQuantizedKindHFFP8 sourceQuantizedKind = "hf_fp8"
|
||||
)
|
||||
|
||||
func (cfg sourceModelConfig) quantizationConfigs() []sourceQuantization {
|
||||
return []sourceQuantization{
|
||||
cfg.Quantization,
|
||||
cfg.QuantizationConfig,
|
||||
cfg.TextConfig.Quantization,
|
||||
cfg.TextConfig.QuantizationConfig,
|
||||
}
|
||||
}
|
||||
|
||||
func (cfg sourceModelConfig) HFFP8WeightBlockSize() (rows, cols int32, ok bool) {
|
||||
for _, q := range cfg.quantizationConfigs() {
|
||||
if !strings.EqualFold(q.QuantMethod, "fp8") || len(q.WeightBlockSize) != 2 {
|
||||
continue
|
||||
}
|
||||
return q.WeightBlockSize[0], q.WeightBlockSize[1], true
|
||||
}
|
||||
return 0, 0, false
|
||||
}
|
||||
|
||||
func inspectSourceQuantization(modelDir string, cfg sourceModelConfig) (sourceQuantizedKind, error) {
|
||||
entries, err := os.ReadDir(modelDir)
|
||||
if err != nil {
|
||||
return sourceQuantizedKindNone, err
|
||||
}
|
||||
|
||||
hasScaleInv := false
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".safetensors") {
|
||||
continue
|
||||
}
|
||||
|
||||
extractor, err := safetensors.OpenForExtraction(filepath.Join(modelDir, entry.Name()))
|
||||
if err != nil {
|
||||
return sourceQuantizedKindNone, err
|
||||
}
|
||||
|
||||
for _, name := range extractor.ListTensors() {
|
||||
switch {
|
||||
case strings.HasSuffix(name, ".scales"):
|
||||
extractor.Close()
|
||||
return sourceQuantizedKindPrequantized, nil
|
||||
case strings.HasSuffix(name, ".weight_scale_inv"):
|
||||
hasScaleInv = true
|
||||
}
|
||||
}
|
||||
|
||||
extractor.Close()
|
||||
}
|
||||
|
||||
if hasScaleInv {
|
||||
if _, _, ok := cfg.HFFP8WeightBlockSize(); ok {
|
||||
return sourceQuantizedKindHFFP8, nil
|
||||
}
|
||||
}
|
||||
|
||||
return sourceQuantizedKindNone, nil
|
||||
}
|
||||
|
||||
func resolveEffectiveQuantization(cfg sourceModelConfig, sourceKind sourceQuantizedKind, requested string) (string, error) {
|
||||
switch sourceKind {
|
||||
case sourceQuantizedKindNone:
|
||||
return requested, nil
|
||||
case sourceQuantizedKindPrequantized:
|
||||
if requested != "" {
|
||||
return "", fmt.Errorf("cannot requantize already-quantized source model with --quantize %q", requested)
|
||||
}
|
||||
return "", nil
|
||||
case sourceQuantizedKindHFFP8:
|
||||
if requested != "" {
|
||||
return "", fmt.Errorf("cannot requantize already-quantized fp8 source model with --quantize %q", requested)
|
||||
}
|
||||
rows, cols, ok := cfg.HFFP8WeightBlockSize()
|
||||
if !ok {
|
||||
return "", fmt.Errorf("fp8 source model missing weight_block_size metadata")
|
||||
}
|
||||
if rows != 128 || cols != 128 {
|
||||
return "", fmt.Errorf("unsupported fp8 source block size %dx%d", rows, cols)
|
||||
}
|
||||
return "mxfp8", nil
|
||||
default:
|
||||
return "", fmt.Errorf("unsupported source quantization kind %q", sourceKind)
|
||||
}
|
||||
}
|
||||
|
||||
type tensorImportTransform interface {
|
||||
skipTensor(name string) bool
|
||||
transformTensor(td *safetensors.TensorData) ([]*safetensors.TensorData, error)
|
||||
@@ -546,6 +658,14 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read source config.json: %w", err)
|
||||
}
|
||||
sourceQuantKind, err := inspectSourceQuantization(modelDir, sourceConfig)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to inspect source quantization: %w", err)
|
||||
}
|
||||
effectiveQuantize, err := resolveEffectiveQuantization(sourceConfig, sourceQuantKind, quantize)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sourceQuantMetadata := sourceConfig.QuantMetadata()
|
||||
importTransform, err := newTensorImportTransform(modelDir, sourceConfig)
|
||||
if err != nil {
|
||||
@@ -557,7 +677,6 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
|
||||
if len(createPackedLayer) > 0 {
|
||||
packedCreator = createPackedLayer[0]
|
||||
}
|
||||
|
||||
// Accumulate expert tensors by group prefix for packing.
|
||||
// Readers reference file-backed SectionReaders, so we keep extractors
|
||||
// open until each group is flushed to avoid buffering tensor data in memory.
|
||||
@@ -600,8 +719,8 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
|
||||
tensorSet[name] = struct{}{}
|
||||
}
|
||||
quantizeMsg := ""
|
||||
if quantize != "" {
|
||||
quantizeMsg = fmt.Sprintf(", quantizing to %s", quantize)
|
||||
if effectiveQuantize != "" {
|
||||
quantizeMsg = fmt.Sprintf(", quantizing to %s", effectiveQuantize)
|
||||
}
|
||||
fn(fmt.Sprintf("importing %s (%d tensors%s)", entry.Name(), len(tensorNames), quantizeMsg))
|
||||
|
||||
@@ -612,9 +731,10 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
|
||||
if importTransform.skipTensor(tensorName) {
|
||||
continue
|
||||
}
|
||||
if shouldSkipPrequantizedCompanion(tensorName, tensorSet) {
|
||||
if shouldSkipSourceCompanion(tensorName, tensorSet) {
|
||||
continue
|
||||
}
|
||||
sourceFP8ScaleName, hasSourceFP8Scale := sourceFP8Companion(tensorName, tensorSet)
|
||||
|
||||
td, err := extractor.GetTensor(tensorName)
|
||||
if err != nil {
|
||||
@@ -623,7 +743,7 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
|
||||
return fmt.Errorf("failed to get tensor %s: %w", tensorName, err)
|
||||
}
|
||||
|
||||
if quantize == "" {
|
||||
if effectiveQuantize == "" {
|
||||
layer, ok, err := createPrequantizedLayer(extractor, td, tensorName, tensorSet, sourceQuantMetadata, createLayer)
|
||||
if err != nil {
|
||||
extractor.Close()
|
||||
@@ -647,8 +767,33 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
|
||||
// Determine quantization type for this tensor (empty string if not quantizing)
|
||||
// GetTensorQuantization handles mixed-precision (e.g., Q8 for attention, Q4 for FFN)
|
||||
quantizeType := ""
|
||||
if quantize != "" {
|
||||
quantizeType = importTransform.quantizationType(outTD.Name, outTD.Shape, quantize)
|
||||
switch {
|
||||
case sourceQuantKind == sourceQuantizedKindHFFP8 && hasSourceFP8Scale:
|
||||
quantizeType = "mxfp8"
|
||||
case sourceQuantKind == sourceQuantizedKindHFFP8:
|
||||
quantizeType = ""
|
||||
case effectiveQuantize != "":
|
||||
quantizeType = importTransform.quantizationType(outTD.Name, outTD.Shape, effectiveQuantize)
|
||||
}
|
||||
reader := outTD.SafetensorsReader()
|
||||
if hasSourceFP8Scale {
|
||||
if len(outputTensors) != 1 {
|
||||
extractor.Close()
|
||||
closeExtractors()
|
||||
return fmt.Errorf("source fp8 tensor %s rewrote into %d tensors; only 1:1 rewrites are supported", tensorName, len(outputTensors))
|
||||
}
|
||||
if quantizeType == "" {
|
||||
extractor.Close()
|
||||
closeExtractors()
|
||||
return fmt.Errorf("source fp8 tensor %s was not scheduled for mxfp8 conversion", tensorName)
|
||||
}
|
||||
scaleTD, err := extractor.GetTensor(sourceFP8ScaleName)
|
||||
if err != nil {
|
||||
extractor.Close()
|
||||
closeExtractors()
|
||||
return fmt.Errorf("failed to get fp8 scale tensor %s: %w", sourceFP8ScaleName, err)
|
||||
}
|
||||
reader = buildSourceFP8Reader(outTD, scaleTD.WithName(outTD.Name+".scale_inv"))
|
||||
}
|
||||
|
||||
// Check if this tensor belongs to an expert group for packing
|
||||
@@ -670,13 +815,13 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
|
||||
Dtype: outTD.Dtype,
|
||||
Shape: outTD.Shape,
|
||||
Quantize: quantizeType,
|
||||
Reader: outTD.SafetensorsReader(),
|
||||
Reader: reader,
|
||||
})
|
||||
} else {
|
||||
// Store as minimal safetensors format (88 bytes header overhead)
|
||||
// This enables native mmap loading via mlx_load_safetensors
|
||||
// createTensorLayer returns multiple layers if quantizing (weight + scales)
|
||||
newLayers, err := createTensorLayer(outTD.SafetensorsReader(), outTD.Name, outTD.Dtype, outTD.Shape, quantizeType)
|
||||
newLayers, err := createTensorLayer(reader, outTD.Name, outTD.Dtype, outTD.Shape, quantizeType)
|
||||
if err != nil {
|
||||
extractor.Close()
|
||||
closeExtractors()
|
||||
@@ -760,7 +905,7 @@ func CreateSafetensorsModel(modelName, modelDir, quantize string, createLayer La
|
||||
return nil
|
||||
}
|
||||
|
||||
func shouldSkipPrequantizedCompanion(name string, tensorSet map[string]struct{}) bool {
|
||||
func shouldSkipSourceCompanion(name string, tensorSet map[string]struct{}) bool {
|
||||
switch {
|
||||
case strings.HasSuffix(name, ".scales"):
|
||||
_, ok := tensorSet[strings.TrimSuffix(name, ".scales")+".weight"]
|
||||
@@ -768,11 +913,28 @@ func shouldSkipPrequantizedCompanion(name string, tensorSet map[string]struct{})
|
||||
case strings.HasSuffix(name, ".biases"):
|
||||
_, ok := tensorSet[strings.TrimSuffix(name, ".biases")+".weight"]
|
||||
return ok
|
||||
case strings.HasSuffix(name, ".weight_scale_inv"):
|
||||
_, ok := tensorSet[strings.TrimSuffix(name, "_scale_inv")]
|
||||
return ok
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func sourceFP8Companion(weightName string, tensorSet map[string]struct{}) (scaleName string, ok bool) {
|
||||
if !strings.HasSuffix(weightName, ".weight") {
|
||||
return "", false
|
||||
}
|
||||
|
||||
scaleName = weightName + "_scale_inv"
|
||||
_, ok = tensorSet[scaleName]
|
||||
return scaleName, ok
|
||||
}
|
||||
|
||||
func buildSourceFP8Reader(weightTD, scaleTD *safetensors.TensorData) io.Reader {
|
||||
return safetensors.BuildPackedSafetensorsReader([]*safetensors.TensorData{weightTD, scaleTD})
|
||||
}
|
||||
|
||||
func createPrequantizedLayer(
|
||||
extractor *safetensors.TensorExtractor,
|
||||
td *safetensors.TensorData,
|
||||
|
||||
@@ -246,6 +246,30 @@ func readSingleTensorRaw(t *testing.T, data []byte) []byte {
|
||||
return nil
|
||||
}
|
||||
|
||||
func readSafetensorsHeaderNames(t *testing.T, data []byte) []string {
|
||||
t.Helper()
|
||||
|
||||
var headerSize uint64
|
||||
if err := binary.Read(bytes.NewReader(data[:8]), binary.LittleEndian, &headerSize); err != nil {
|
||||
t.Fatalf("failed to read header size: %v", err)
|
||||
}
|
||||
|
||||
var header map[string]json.RawMessage
|
||||
if err := json.Unmarshal(data[8:8+headerSize], &header); err != nil {
|
||||
t.Fatalf("failed to parse header: %v", err)
|
||||
}
|
||||
|
||||
names := make([]string, 0, len(header))
|
||||
for name := range header {
|
||||
if name == "__metadata__" {
|
||||
continue
|
||||
}
|
||||
names = append(names, name)
|
||||
}
|
||||
slices.Sort(names)
|
||||
return names
|
||||
}
|
||||
|
||||
func TestCreateSafetensorsModel(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
@@ -546,6 +570,215 @@ func TestCreateSafetensorsModel_PacksPrequantizedTensorTriplets(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateSafetensorsModel_HFFP8AutoConvertsToMXFP8(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
configJSON := `{
|
||||
"model_type": "test",
|
||||
"architectures": ["TestModel"],
|
||||
"quantization_config": {"quant_method": "fp8", "weight_block_size": [128, 128]}
|
||||
}`
|
||||
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(configJSON), 0o644); err != nil {
|
||||
t.Fatalf("failed to write config.json: %v", err)
|
||||
}
|
||||
|
||||
createTestSafetensors(t, filepath.Join(dir, "model.safetensors"), []*st.TensorData{
|
||||
st.NewTensorDataFromBytes("linear.weight", "F8_E4M3", []int32{2, 2}, []byte{1, 2, 3, 4}),
|
||||
st.NewTensorDataFromBytes("linear.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
|
||||
st.NewTensorDataFromBytes("dense.weight", "BF16", []int32{128, 128}, make([]byte, 128*128*2)),
|
||||
st.NewTensorDataFromBytes("norm.weight", "BF16", []int32{2}, make([]byte, 4)),
|
||||
})
|
||||
|
||||
quantizeByName := make(map[string]string)
|
||||
headerNamesByName := make(map[string][]string)
|
||||
|
||||
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
|
||||
_, err := io.ReadAll(r)
|
||||
if err != nil {
|
||||
return LayerInfo{}, err
|
||||
}
|
||||
return LayerInfo{Name: name, Digest: "sha256:" + name, MediaType: mediaType}, nil
|
||||
}
|
||||
|
||||
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
|
||||
data, err := io.ReadAll(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
quantizeByName[name] = quantize
|
||||
headerNamesByName[name] = readSafetensorsHeaderNames(t, data)
|
||||
return []LayerInfo{{Name: name, Digest: "sha256:tensor_" + name, MediaType: "application/vnd.ollama.image.tensor"}}, nil
|
||||
}
|
||||
|
||||
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error { return nil }
|
||||
|
||||
if err := CreateSafetensorsModel("test-model", dir, "", createLayer, createTensorLayer, writeManifest, func(string) {}); err != nil {
|
||||
t.Fatalf("CreateSafetensorsModel failed: %v", err)
|
||||
}
|
||||
|
||||
if got := quantizeByName["linear.weight"]; got != "mxfp8" {
|
||||
t.Fatalf("linear.weight quantization = %q, want %q", got, "mxfp8")
|
||||
}
|
||||
|
||||
if got := quantizeByName["norm.weight"]; got != "" {
|
||||
t.Fatalf("norm.weight quantization = %q, want empty", got)
|
||||
}
|
||||
if got := quantizeByName["dense.weight"]; got != "" {
|
||||
t.Fatalf("dense.weight quantization = %q, want empty", got)
|
||||
}
|
||||
|
||||
if _, ok := quantizeByName["linear.weight_scale_inv"]; ok {
|
||||
t.Fatal("linear.weight_scale_inv should not be imported as a standalone tensor")
|
||||
}
|
||||
|
||||
if got := headerNamesByName["linear.weight"]; !slices.Equal(got, []string{"linear.weight", "linear.weight.scale_inv"}) {
|
||||
t.Fatalf("linear.weight blob tensors = %v, want %v", got, []string{"linear.weight", "linear.weight.scale_inv"})
|
||||
}
|
||||
|
||||
if got := headerNamesByName["norm.weight"]; !slices.Equal(got, []string{"norm.weight"}) {
|
||||
t.Fatalf("norm.weight blob tensors = %v, want %v", got, []string{"norm.weight"})
|
||||
}
|
||||
if got := headerNamesByName["dense.weight"]; !slices.Equal(got, []string{"dense.weight"}) {
|
||||
t.Fatalf("dense.weight blob tensors = %v, want %v", got, []string{"dense.weight"})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateSafetensorsModel_RejectsRequantizingQuantizedSources(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
configJSON string
|
||||
tensors []*st.TensorData
|
||||
wantErr string
|
||||
}{
|
||||
{
|
||||
name: "prequantized affine",
|
||||
configJSON: `{"model_type": "test", "architectures": ["TestModel"]}`,
|
||||
tensors: []*st.TensorData{
|
||||
st.NewTensorDataFromBytes("linear.weight", "U32", []int32{4, 4}, make([]byte, 16)),
|
||||
st.NewTensorDataFromBytes("linear.scales", "BF16", []int32{4, 1}, make([]byte, 8)),
|
||||
},
|
||||
wantErr: `cannot requantize already-quantized source model with --quantize "int4"`,
|
||||
},
|
||||
{
|
||||
name: "hf fp8 source",
|
||||
configJSON: `{
|
||||
"model_type": "test",
|
||||
"architectures": ["TestModel"],
|
||||
"quantization_config": {"quant_method": "fp8", "weight_block_size": [128, 128]}
|
||||
}`,
|
||||
tensors: []*st.TensorData{
|
||||
st.NewTensorDataFromBytes("linear.weight", "F8_E4M3", []int32{2, 2}, []byte{1, 2, 3, 4}),
|
||||
st.NewTensorDataFromBytes("linear.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
|
||||
},
|
||||
wantErr: `cannot requantize already-quantized fp8 source model with --quantize "int4"`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(tt.configJSON), 0o644); err != nil {
|
||||
t.Fatalf("failed to write config.json: %v", err)
|
||||
}
|
||||
createTestSafetensors(t, filepath.Join(dir, "model.safetensors"), tt.tensors)
|
||||
|
||||
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
|
||||
return LayerInfo{}, nil
|
||||
}
|
||||
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
|
||||
return nil, nil
|
||||
}
|
||||
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error { return nil }
|
||||
|
||||
err := CreateSafetensorsModel("test-model", dir, "int4", createLayer, createTensorLayer, writeManifest, func(string) {})
|
||||
if err == nil {
|
||||
t.Fatal("expected error, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), tt.wantErr) {
|
||||
t.Fatalf("error = %q, want substring %q", err, tt.wantErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateSafetensorsModel_HFFP8PacksExperts(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
configJSON := `{
|
||||
"model_type": "test",
|
||||
"architectures": ["Qwen3_5MoeForConditionalGeneration"],
|
||||
"quantization_config": {"quant_method": "fp8", "weight_block_size": [128, 128]}
|
||||
}`
|
||||
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(configJSON), 0o644); err != nil {
|
||||
t.Fatalf("failed to write config.json: %v", err)
|
||||
}
|
||||
|
||||
// Create 2 experts so stacking produces a [2, 128, 128] tensor
|
||||
createTestSafetensors(t, filepath.Join(dir, "model.safetensors"), []*st.TensorData{
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.gate_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.gate_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.up_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.up_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.down_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.0.down_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.gate_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.gate_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.up_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.up_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.down_proj.weight", "F8_E4M3", []int32{128, 128}, make([]byte, 128*128)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.1.down_proj.weight_scale_inv", "BF16", []int32{1, 1}, make([]byte, 2)),
|
||||
})
|
||||
|
||||
var packedLayerNames []string
|
||||
var packedLayerTensors [][]PackedTensorInput
|
||||
|
||||
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
|
||||
if _, err := io.ReadAll(r); err != nil {
|
||||
return LayerInfo{}, err
|
||||
}
|
||||
return LayerInfo{Name: name, Digest: "sha256:" + name, MediaType: mediaType}, nil
|
||||
}
|
||||
|
||||
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
|
||||
if _, err := io.ReadAll(r); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return []LayerInfo{{Name: name, Digest: "sha256:tensor_" + name, MediaType: "application/vnd.ollama.image.tensor"}}, nil
|
||||
}
|
||||
|
||||
createPackedLayer := func(groupName string, tensors []PackedTensorInput) (LayerInfo, error) {
|
||||
packedLayerNames = append(packedLayerNames, groupName)
|
||||
packedLayerTensors = append(packedLayerTensors, tensors)
|
||||
return LayerInfo{Name: groupName, Digest: "sha256:packed_" + groupName, MediaType: "application/vnd.ollama.image.tensor"}, nil
|
||||
}
|
||||
|
||||
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error { return nil }
|
||||
|
||||
if err := CreateSafetensorsModel("test-model", dir, "", createLayer, createTensorLayer, writeManifest, func(string) {}, createPackedLayer); err != nil {
|
||||
t.Fatalf("CreateSafetensorsModel failed: %v", err)
|
||||
}
|
||||
|
||||
if len(packedLayerNames) != 1 {
|
||||
t.Fatalf("expected 1 packed layer, got %d: %v", len(packedLayerNames), packedLayerNames)
|
||||
}
|
||||
if packedLayerNames[0] != "language_model.model.layers.0.mlp.experts" {
|
||||
t.Fatalf("unexpected packed layer name: %s", packedLayerNames[0])
|
||||
}
|
||||
|
||||
// Verify all 6 expert tensors (2 experts × 3 proj types) were accumulated
|
||||
tensors := packedLayerTensors[0]
|
||||
if len(tensors) != 6 {
|
||||
t.Fatalf("expected 6 tensors in packed group, got %d", len(tensors))
|
||||
}
|
||||
|
||||
// All should be marked for mxfp8 quantization
|
||||
for _, tensor := range tensors {
|
||||
if tensor.Quantize != "mxfp8" {
|
||||
t.Fatalf("expected mxfp8 quantize for %s, got %q", tensor.Name, tensor.Quantize)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateSafetensorsModel_Qwen35Transforms(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
@@ -693,6 +926,113 @@ func TestCreateSafetensorsModel_Qwen35Transforms(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateSafetensorsModel_Qwen35DirectNonAffineKeepsSensitiveWeightsBF16(t *testing.T) {
|
||||
for _, quantize := range []string{"nvfp4", "mxfp8", "mxfp4"} {
|
||||
t.Run(quantize, func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
configJSON := `{
|
||||
"model_type": "test",
|
||||
"architectures": ["Qwen3_5MoeForConditionalGeneration"],
|
||||
"text_config": {"dtype": "bfloat16"}
|
||||
}`
|
||||
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(configJSON), 0o644); err != nil {
|
||||
t.Fatalf("failed to write config.json: %v", err)
|
||||
}
|
||||
|
||||
gateUpValues := make([]float32, 2*128*64)
|
||||
for expert := range 2 {
|
||||
base := expert * 128 * 64
|
||||
for i := range 64 * 64 {
|
||||
gateUpValues[base+i] = 1
|
||||
gateUpValues[base+64*64+i] = 2
|
||||
}
|
||||
}
|
||||
|
||||
createTestSafetensors(t, filepath.Join(dir, "model.safetensors"), []*st.TensorData{
|
||||
st.NewTensorDataFromBytes("model.language_model.embed_tokens.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
|
||||
st.NewTensorDataFromBytes("lm_head.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.linear_attn.in_proj_a.weight", "BF16", []int32{32, 64}, make([]byte, 32*64*2)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.linear_attn.in_proj_b.weight", "BF16", []int32{32, 64}, make([]byte, 32*64*2)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.gate.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.shared_expert_gate.weight", "BF16", []int32{1, 64}, make([]byte, 64*2)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.self_attn.q_proj.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.gate_up_proj", "BF16", []int32{2, 128, 64}, bfloat16.EncodeFloat32(gateUpValues)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.down_proj", "BF16", []int32{2, 64, 64}, bfloat16.EncodeFloat32(make([]float32, 2*64*64))),
|
||||
})
|
||||
|
||||
type tensorCall struct {
|
||||
quantize string
|
||||
}
|
||||
type packedTensorCall struct {
|
||||
Name string
|
||||
Quantize string
|
||||
}
|
||||
|
||||
tensorCalls := make(map[string]tensorCall)
|
||||
packedCalls := make(map[string][]packedTensorCall)
|
||||
|
||||
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
|
||||
_, _ = io.ReadAll(r)
|
||||
return LayerInfo{Name: name, Digest: "sha256:" + name, MediaType: mediaType}, nil
|
||||
}
|
||||
|
||||
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantizeType string) ([]LayerInfo, error) {
|
||||
_, _ = io.ReadAll(r)
|
||||
tensorCalls[name] = tensorCall{quantize: quantizeType}
|
||||
return []LayerInfo{{Name: name, Digest: "sha256:" + name, MediaType: "application/vnd.ollama.image.tensor"}}, nil
|
||||
}
|
||||
|
||||
createPackedLayer := func(groupName string, tensors []PackedTensorInput) (LayerInfo, error) {
|
||||
group := make([]packedTensorCall, 0, len(tensors))
|
||||
for _, tensor := range tensors {
|
||||
group = append(group, packedTensorCall{
|
||||
Name: tensor.Name,
|
||||
Quantize: tensor.Quantize,
|
||||
})
|
||||
}
|
||||
packedCalls[groupName] = group
|
||||
return LayerInfo{Name: groupName, Digest: "sha256:" + groupName, MediaType: "application/vnd.ollama.image.tensor"}, nil
|
||||
}
|
||||
|
||||
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := CreateSafetensorsModel("test-model", dir, quantize, createLayer, createTensorLayer, writeManifest, func(string) {}, createPackedLayer); err != nil {
|
||||
t.Fatalf("CreateSafetensorsModel failed: %v", err)
|
||||
}
|
||||
|
||||
for _, name := range []string{
|
||||
"language_model.model.embed_tokens.weight",
|
||||
"language_model.lm_head.weight",
|
||||
"language_model.model.layers.0.linear_attn.in_proj_a.weight",
|
||||
"language_model.model.layers.0.linear_attn.in_proj_b.weight",
|
||||
"language_model.model.layers.0.mlp.gate.weight",
|
||||
"language_model.model.layers.0.mlp.shared_expert_gate.weight",
|
||||
} {
|
||||
if got := tensorCalls[name].quantize; got != "" {
|
||||
t.Fatalf("%s quantize = %q, want empty", name, got)
|
||||
}
|
||||
}
|
||||
|
||||
if got := tensorCalls["language_model.model.layers.0.self_attn.q_proj.weight"].quantize; got != quantize {
|
||||
t.Fatalf("q_proj quantize = %q, want %q", got, quantize)
|
||||
}
|
||||
|
||||
group := packedCalls["language_model.model.layers.0.mlp.switch_mlp"]
|
||||
if len(group) != 3 {
|
||||
t.Fatalf("packed switch_mlp tensor count = %d, want 3", len(group))
|
||||
}
|
||||
for _, tensor := range group {
|
||||
if tensor.Quantize != quantize {
|
||||
t.Fatalf("packed tensor %q quantize = %q, want %q", tensor.Name, tensor.Quantize, quantize)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveManifestPath(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -865,6 +1205,7 @@ func TestShouldQuantizeTensor(t *testing.T) {
|
||||
{"large 2D weight fp8", "q_proj.weight", []int32{4096, 4096}, "fp8", true},
|
||||
{"medium 2D weight fp8", "small_proj.weight", []int32{128, 128}, "fp8", true},
|
||||
{"large 2D weight nvfp4", "q_proj.weight", []int32{4096, 4096}, "nvfp4", true},
|
||||
{"large 2D weight mxfp4", "q_proj.weight", []int32{4096, 4096}, "mxfp4", true},
|
||||
|
||||
// Small tensors should not be quantized (< 1024 elements)
|
||||
{"tiny 2D weight", "tiny.weight", []int32{16, 16}, "fp8", false},
|
||||
@@ -891,9 +1232,11 @@ func TestShouldQuantizeTensor(t *testing.T) {
|
||||
{"bias 2D", "proj.bias", []int32{4096, 1}, "fp8", false},
|
||||
|
||||
// Group size divisibility tests
|
||||
// FP8/FP4 require divisible by 32
|
||||
// FP8/FP4/MXFP4 require divisible by 32
|
||||
{"not divisible by 32 fp8", "proj.weight", []int32{128, 48}, "fp8", false},
|
||||
{"divisible by 32 fp8", "proj.weight", []int32{128, 64}, "fp8", true},
|
||||
{"not divisible by 32 mxfp4", "proj.weight", []int32{128, 48}, "mxfp4", false},
|
||||
{"divisible by 32 mxfp4", "proj.weight", []int32{128, 64}, "mxfp4", true},
|
||||
// NVFP4 requires divisible by 16
|
||||
{"not divisible by 16 nvfp4", "proj.weight", []int32{128, 24}, "nvfp4", false},
|
||||
{"divisible by 16 nvfp4", "proj.weight", []int32{128, 48}, "nvfp4", true},
|
||||
@@ -919,10 +1262,20 @@ func TestExpertGroupPrefix(t *testing.T) {
|
||||
{"model.layers.1.mlp.experts.63.gate_proj.weight", "model.layers.1.mlp.experts"},
|
||||
{"model.layers.0.mlp.experts.0.up_proj.weight", "model.layers.0.mlp.experts"},
|
||||
|
||||
// Expert tensors with language_model prefix should also match
|
||||
{"language_model.model.layers.0.mlp.experts.0.gate_proj.weight", "language_model.model.layers.0.mlp.experts"},
|
||||
{"language_model.model.layers.1.mlp.experts.255.down_proj.weight", "language_model.model.layers.1.mlp.experts"},
|
||||
|
||||
// Shared expert tensors should return their own group prefix
|
||||
{"model.layers.1.mlp.shared_experts.down_proj.weight", "model.layers.1.mlp.shared_experts"},
|
||||
{"model.layers.2.mlp.shared_experts.gate_proj.weight", "model.layers.2.mlp.shared_experts"},
|
||||
|
||||
// Rewritten Qwen switch_mlp tensors should also be packed per-layer.
|
||||
{"model.layers.1.mlp.switch_mlp.down_proj.weight", "model.layers.1.mlp.switch_mlp"},
|
||||
{"language_model.layers.2.mlp.switch_mlp.gate_proj.weight", "language_model.layers.2.mlp.switch_mlp"},
|
||||
{"language_model.model.layers.3.mlp.switch_mlp.up_proj.weight", "language_model.model.layers.3.mlp.switch_mlp"},
|
||||
{"model.language_model.layers.4.mlp.switch_mlp.gate_proj.weight", "model.language_model.layers.4.mlp.switch_mlp"},
|
||||
|
||||
// Non-expert tensors should return empty string
|
||||
{"model.layers.0.mlp.down_proj.weight", ""}, // dense layer, no experts
|
||||
{"model.layers.1.mlp.gate.weight", ""}, // routing gate, not an expert
|
||||
@@ -978,6 +1331,161 @@ func TestGetTensorQuantization_StackedExpert3D(t *testing.T) {
|
||||
if combinedDown != "int8" {
|
||||
t.Fatalf("combined down_proj quantization = %q, want %q", combinedDown, "int8")
|
||||
}
|
||||
|
||||
nvfp4GateUp := GetTensorQuantization(
|
||||
"language_model.model.layers.0.mlp.switch_mlp.gate_proj.weight",
|
||||
[]int32{64, 11008, 4096},
|
||||
"nvfp4",
|
||||
)
|
||||
if nvfp4GateUp != "nvfp4" {
|
||||
t.Fatalf("nvfp4 gate_proj quantization = %q, want %q", nvfp4GateUp, "nvfp4")
|
||||
}
|
||||
|
||||
nvfp4Down := GetTensorQuantization(
|
||||
"language_model.model.layers.0.mlp.switch_mlp.down_proj.weight",
|
||||
[]int32{64, 4096, 11008},
|
||||
"nvfp4",
|
||||
)
|
||||
if nvfp4Down != "nvfp4" {
|
||||
t.Fatalf("nvfp4 down_proj quantization = %q, want %q", nvfp4Down, "nvfp4")
|
||||
}
|
||||
|
||||
mxfp4GateUp := GetTensorQuantization(
|
||||
"language_model.model.layers.0.mlp.switch_mlp.gate_proj.weight",
|
||||
[]int32{64, 11008, 4096},
|
||||
"mxfp4",
|
||||
)
|
||||
if mxfp4GateUp != "mxfp4" {
|
||||
t.Fatalf("mxfp4 gate_proj quantization = %q, want %q", mxfp4GateUp, "mxfp4")
|
||||
}
|
||||
|
||||
mxfp4Down := GetTensorQuantization(
|
||||
"language_model.model.layers.0.mlp.switch_mlp.down_proj.weight",
|
||||
[]int32{64, 4096, 11008},
|
||||
"mxfp4",
|
||||
)
|
||||
if mxfp4Down != "mxfp4" {
|
||||
t.Fatalf("mxfp4 down_proj quantization = %q, want %q", mxfp4Down, "mxfp4")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateSafetensorsModel_Qwen35NVFP4PacksSwitchMLPExperts(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
configJSON := `{
|
||||
"model_type": "test",
|
||||
"architectures": ["Qwen3_5MoeForConditionalGeneration"],
|
||||
"text_config": {"dtype": "bfloat16"}
|
||||
}`
|
||||
if err := os.WriteFile(filepath.Join(dir, "config.json"), []byte(configJSON), 0o644); err != nil {
|
||||
t.Fatalf("failed to write config.json: %v", err)
|
||||
}
|
||||
|
||||
gateUpValues := make([]float32, 2*128*64)
|
||||
for expert := range 2 {
|
||||
base := expert * 128 * 64
|
||||
for i := range 64 * 64 {
|
||||
gateUpValues[base+i] = 1
|
||||
gateUpValues[base+64*64+i] = 2
|
||||
}
|
||||
}
|
||||
|
||||
createTestSafetensors(t, filepath.Join(dir, "model.safetensors"), []*st.TensorData{
|
||||
st.NewTensorDataFromBytes("model.language_model.embed_tokens.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.gate.weight", "BF16", []int32{64, 64}, make([]byte, 64*64*2)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.gate_up_proj", "BF16", []int32{2, 128, 64}, bfloat16.EncodeFloat32(gateUpValues)),
|
||||
st.NewTensorDataFromBytes("model.language_model.layers.0.mlp.experts.down_proj", "BF16", []int32{2, 64, 64}, bfloat16.EncodeFloat32(make([]float32, 2*64*64))),
|
||||
})
|
||||
|
||||
type tensorCall struct {
|
||||
quantize string
|
||||
}
|
||||
type packedTensorCall struct {
|
||||
Name string
|
||||
Dtype string
|
||||
Shape []int32
|
||||
Quantize string
|
||||
}
|
||||
|
||||
tensorCalls := make(map[string]tensorCall)
|
||||
packedCalls := make(map[string][]packedTensorCall)
|
||||
|
||||
createLayer := func(r io.Reader, mediaType, name string) (LayerInfo, error) {
|
||||
_, _ = io.ReadAll(r)
|
||||
return LayerInfo{Name: name, Digest: "sha256:" + name, MediaType: mediaType}, nil
|
||||
}
|
||||
|
||||
createTensorLayer := func(r io.Reader, name, dtype string, shape []int32, quantize string) ([]LayerInfo, error) {
|
||||
_, _ = io.ReadAll(r)
|
||||
tensorCalls[name] = tensorCall{quantize: quantize}
|
||||
return []LayerInfo{{Name: name, Digest: "sha256:" + name, MediaType: "application/vnd.ollama.image.tensor"}}, nil
|
||||
}
|
||||
|
||||
createPackedLayer := func(groupName string, tensors []PackedTensorInput) (LayerInfo, error) {
|
||||
group := make([]packedTensorCall, 0, len(tensors))
|
||||
for _, tensor := range tensors {
|
||||
group = append(group, packedTensorCall{
|
||||
Name: tensor.Name,
|
||||
Dtype: tensor.Dtype,
|
||||
Shape: append([]int32(nil), tensor.Shape...),
|
||||
Quantize: tensor.Quantize,
|
||||
})
|
||||
}
|
||||
packedCalls[groupName] = group
|
||||
return LayerInfo{Name: groupName, Digest: "sha256:" + groupName, MediaType: "application/vnd.ollama.image.tensor"}, nil
|
||||
}
|
||||
|
||||
writeManifest := func(modelName string, config LayerInfo, layers []LayerInfo) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := CreateSafetensorsModel("test-model", dir, "nvfp4", createLayer, createTensorLayer, writeManifest, func(string) {}, createPackedLayer); err != nil {
|
||||
t.Fatalf("CreateSafetensorsModel failed: %v", err)
|
||||
}
|
||||
|
||||
groupName := "language_model.model.layers.0.mlp.switch_mlp"
|
||||
group, ok := packedCalls[groupName]
|
||||
if !ok {
|
||||
t.Fatalf("missing packed group %q: %v", groupName, packedCalls)
|
||||
}
|
||||
|
||||
if len(group) != 3 {
|
||||
t.Fatalf("packed group %q has %d tensors, want 3", groupName, len(group))
|
||||
}
|
||||
|
||||
gotNames := make([]string, 0, len(group))
|
||||
for _, tensor := range group {
|
||||
gotNames = append(gotNames, tensor.Name)
|
||||
if tensor.Quantize != "nvfp4" {
|
||||
t.Fatalf("packed tensor %q quantize = %q, want %q", tensor.Name, tensor.Quantize, "nvfp4")
|
||||
}
|
||||
if tensor.Dtype != "BF16" {
|
||||
t.Fatalf("packed tensor %q dtype = %q, want %q", tensor.Name, tensor.Dtype, "BF16")
|
||||
}
|
||||
}
|
||||
slices.Sort(gotNames)
|
||||
|
||||
wantNames := []string{
|
||||
"language_model.model.layers.0.mlp.switch_mlp.down_proj.weight",
|
||||
"language_model.model.layers.0.mlp.switch_mlp.gate_proj.weight",
|
||||
"language_model.model.layers.0.mlp.switch_mlp.up_proj.weight",
|
||||
}
|
||||
if !slices.Equal(gotNames, wantNames) {
|
||||
t.Fatalf("packed tensor names = %v, want %v", gotNames, wantNames)
|
||||
}
|
||||
|
||||
for _, name := range wantNames {
|
||||
if _, ok := tensorCalls[name]; ok {
|
||||
t.Fatalf("packed expert tensor %q unexpectedly handled by createTensorLayer", name)
|
||||
}
|
||||
}
|
||||
|
||||
if got := tensorCalls["language_model.model.embed_tokens.weight"].quantize; got != "" {
|
||||
t.Fatalf("embed_tokens quantize = %q, want empty", got)
|
||||
}
|
||||
if got := tensorCalls["language_model.model.layers.0.mlp.gate.weight"].quantize; got != "" {
|
||||
t.Fatalf("mlp.gate quantize = %q, want empty", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateSafetensorsModel_WithQuantize(t *testing.T) {
|
||||
|
||||
@@ -87,6 +87,27 @@ func (t qwen35ImportTransform) skipTensor(name string) bool {
|
||||
return strings.Contains(name, "mtp.")
|
||||
}
|
||||
|
||||
func qwen35ShouldKeepBF16ForDirectNonAffine(name string) bool {
|
||||
switch {
|
||||
case strings.HasSuffix(name, "embed_tokens.weight"):
|
||||
return true
|
||||
case strings.HasSuffix(name, "lm_head.weight"):
|
||||
return true
|
||||
case strings.HasSuffix(name, ".linear_attn.in_proj_a.weight"):
|
||||
return true
|
||||
case strings.HasSuffix(name, ".linear_attn.in_proj_b.weight"):
|
||||
return true
|
||||
case strings.HasSuffix(name, ".linear_attn.in_proj_ba.weight"):
|
||||
return true
|
||||
case strings.HasSuffix(name, ".mlp.gate.weight") && !strings.Contains(name, "_proj"):
|
||||
return true
|
||||
case strings.HasSuffix(name, ".mlp.shared_expert_gate.weight"):
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (t qwen35ImportTransform) quantizationType(name string, shape []int32, quantize string) string {
|
||||
if strings.HasPrefix(name, "vision_tower.") {
|
||||
return ""
|
||||
@@ -127,6 +148,13 @@ func (t qwen35ImportTransform) quantizationType(name string, shape []int32, quan
|
||||
return ""
|
||||
}
|
||||
|
||||
// Match the working HF-FP8 import policy for direct NVFP4/MXFP4/MXFP8 imports:
|
||||
// keep embeddings, LM head, low-rank linear_attn projections, and routing
|
||||
// gates in BF16 rather than forcing them into a non-affine quantized format.
|
||||
if (quantNorm == "nvfp4" || quantNorm == "mxfp4" || quantNorm == "mxfp8") && qwen35ShouldKeepBF16ForDirectNonAffine(name) {
|
||||
return ""
|
||||
}
|
||||
|
||||
return quantNorm
|
||||
}
|
||||
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
include(FetchContent)
|
||||
|
||||
# Read MLX version from top-level file (shared with Dockerfile)
|
||||
file(READ "${CMAKE_SOURCE_DIR}/MLX_VERSION" MLX_C_GIT_TAG)
|
||||
# Read MLX-C version from top-level file (shared with Dockerfile)
|
||||
file(READ "${CMAKE_SOURCE_DIR}/MLX_C_VERSION" MLX_C_GIT_TAG)
|
||||
string(STRIP "${MLX_C_GIT_TAG}" MLX_C_GIT_TAG)
|
||||
|
||||
# Read MLX core version from top-level file
|
||||
file(READ "${CMAKE_SOURCE_DIR}/MLX_CORE_VERSION" MLX_GIT_TAG)
|
||||
# Read MLX version from top-level file
|
||||
file(READ "${CMAKE_SOURCE_DIR}/MLX_VERSION" MLX_GIT_TAG)
|
||||
string(STRIP "${MLX_GIT_TAG}" MLX_GIT_TAG)
|
||||
|
||||
set(MLX_C_BUILD_EXAMPLES OFF)
|
||||
@@ -98,6 +98,28 @@ FetchContent_MakeAvailable(mlx-c)
|
||||
file(GLOB _mlx_c_hdrs "${mlx-c_SOURCE_DIR}/mlx/c/*.h")
|
||||
file(COPY ${_mlx_c_hdrs} DESTINATION "${CMAKE_SOURCE_DIR}/x/mlxrunner/mlx/include/mlx/c/")
|
||||
|
||||
# Regenerate Go/C shim wrappers from the (possibly updated) headers.
|
||||
find_program(GO_EXECUTABLE go REQUIRED)
|
||||
message(STATUS "Regenerating MLX Go wrappers")
|
||||
|
||||
# Go's cgo splits CC on whitespace, so a CC like "C:/Program Files/…/cl.exe"
|
||||
# (set by cmake on Windows) breaks with "C:/Program" not found. Clear CC
|
||||
# when it contains spaces so cgo falls back to its default (gcc).
|
||||
if(WIN32 AND "$ENV{CC}" MATCHES " ")
|
||||
set(_SAVE_CC "$ENV{CC}")
|
||||
set(ENV{CC} "")
|
||||
endif()
|
||||
|
||||
execute_process(
|
||||
COMMAND ${GO_EXECUTABLE} generate ./x/...
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
COMMAND_ERROR_IS_FATAL ANY
|
||||
)
|
||||
|
||||
if(DEFINED _SAVE_CC)
|
||||
set(ENV{CC} "${_SAVE_CC}")
|
||||
endif()
|
||||
|
||||
# For local dev builds, override MLX_VERSION with git describe output
|
||||
if(TARGET mlx_version AND DEFINED FETCHCONTENT_SOURCE_DIR_MLX)
|
||||
execute_process(
|
||||
|
||||
@@ -165,8 +165,8 @@ int (*mlx_distributed_sum_scatter_ptr)(mlx_array* res, const mlx_array x, const
|
||||
int (*mlx_distributed_group_rank_ptr)(mlx_distributed_group group) = NULL;
|
||||
int (*mlx_distributed_group_size_ptr)(mlx_distributed_group group) = NULL;
|
||||
mlx_distributed_group (*mlx_distributed_group_split_ptr)(mlx_distributed_group group, int color, int key) = NULL;
|
||||
bool (*mlx_distributed_is_available_ptr)(void) = NULL;
|
||||
mlx_distributed_group (*mlx_distributed_init_ptr)(bool strict) = NULL;
|
||||
bool (*mlx_distributed_is_available_ptr)(const char* bk) = NULL;
|
||||
mlx_distributed_group (*mlx_distributed_init_ptr)(bool strict, const char* bk) = NULL;
|
||||
void (*mlx_set_error_handler_ptr)(mlx_error_handler_func handler, void* data, void (*dtor)(void*)) = NULL;
|
||||
void (*_mlx_error_ptr)(const char* file, const int line, const char* fmt, ...) = NULL;
|
||||
int (*mlx_export_function_ptr)(const char* file, const mlx_closure fun, const mlx_vector_array args, bool shapeless) = NULL;
|
||||
@@ -319,10 +319,12 @@ int (*mlx_astype_ptr)(mlx_array* res, const mlx_array a, mlx_dtype dtype, const
|
||||
int (*mlx_atleast_1d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
|
||||
int (*mlx_atleast_2d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
|
||||
int (*mlx_atleast_3d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
|
||||
int (*mlx_bartlett_ptr)(mlx_array* res, int M, const mlx_stream s) = NULL;
|
||||
int (*mlx_bitwise_and_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
|
||||
int (*mlx_bitwise_invert_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
|
||||
int (*mlx_bitwise_or_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
|
||||
int (*mlx_bitwise_xor_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
|
||||
int (*mlx_blackman_ptr)(mlx_array* res, int M, const mlx_stream s) = NULL;
|
||||
int (*mlx_block_masked_mm_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, int block_size, const mlx_array mask_out , const mlx_array mask_lhs , const mlx_array mask_rhs , const mlx_stream s) = NULL;
|
||||
int (*mlx_broadcast_arrays_ptr)(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_stream s) = NULL;
|
||||
int (*mlx_broadcast_to_ptr)(mlx_array* res, const mlx_array a, const int* shape, size_t shape_num, const mlx_stream s) = NULL;
|
||||
@@ -348,7 +350,7 @@ int (*mlx_cumprod_ptr)(mlx_array* res, const mlx_array a, int axis, bool reverse
|
||||
int (*mlx_cumsum_ptr)(mlx_array* res, const mlx_array a, int axis, bool reverse, bool inclusive, const mlx_stream s) = NULL;
|
||||
int (*mlx_degrees_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
|
||||
int (*mlx_depends_ptr)(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_vector_array dependencies) = NULL;
|
||||
int (*mlx_dequantize_ptr)(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, mlx_optional_dtype dtype, const mlx_stream s) = NULL;
|
||||
int (*mlx_dequantize_ptr)(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , mlx_optional_dtype dtype, const mlx_stream s) = NULL;
|
||||
int (*mlx_diag_ptr)(mlx_array* res, const mlx_array a, int k, const mlx_stream s) = NULL;
|
||||
int (*mlx_diagonal_ptr)(mlx_array* res, const mlx_array a, int offset, int axis1, int axis2, const mlx_stream s) = NULL;
|
||||
int (*mlx_divide_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
|
||||
@@ -375,6 +377,8 @@ int (*mlx_gather_qmm_ptr)(mlx_array* res, const mlx_array x, const mlx_array w,
|
||||
int (*mlx_greater_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
|
||||
int (*mlx_greater_equal_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
|
||||
int (*mlx_hadamard_transform_ptr)(mlx_array* res, const mlx_array a, mlx_optional_float scale, const mlx_stream s) = NULL;
|
||||
int (*mlx_hamming_ptr)(mlx_array* res, int M, const mlx_stream s) = NULL;
|
||||
int (*mlx_hanning_ptr)(mlx_array* res, int M, const mlx_stream s) = NULL;
|
||||
int (*mlx_identity_ptr)(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s) = NULL;
|
||||
int (*mlx_imag_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
|
||||
int (*mlx_inner_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) = NULL;
|
||||
@@ -434,8 +438,8 @@ int (*mlx_prod_axes_ptr)(mlx_array* res, const mlx_array a, const int* axes, siz
|
||||
int (*mlx_prod_axis_ptr)(mlx_array* res, const mlx_array a, int axis, bool keepdims, const mlx_stream s) = NULL;
|
||||
int (*mlx_prod_ptr)(mlx_array* res, const mlx_array a, bool keepdims, const mlx_stream s) = NULL;
|
||||
int (*mlx_put_along_axis_ptr)(mlx_array* res, const mlx_array a, const mlx_array indices, const mlx_array values, int axis, const mlx_stream s) = NULL;
|
||||
int (*mlx_qqmm_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) = NULL;
|
||||
int (*mlx_quantize_ptr)(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) = NULL;
|
||||
int (*mlx_qqmm_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale_x , const mlx_array global_scale_w , const mlx_stream s) = NULL;
|
||||
int (*mlx_quantize_ptr)(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , const mlx_stream s) = NULL;
|
||||
int (*mlx_quantized_matmul_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array scales, const mlx_array biases , bool transpose, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) = NULL;
|
||||
int (*mlx_radians_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
|
||||
int (*mlx_real_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
|
||||
@@ -2101,6 +2105,11 @@ int mlx_load_functions(void* handle) {
|
||||
fprintf(stderr, "MLX: Failed to load symbol: mlx_atleast_3d\n");
|
||||
return -1;
|
||||
}
|
||||
mlx_bartlett_ptr = GET_SYM(handle, "mlx_bartlett");
|
||||
if (mlx_bartlett_ptr == NULL) {
|
||||
fprintf(stderr, "MLX: Failed to load symbol: mlx_bartlett\n");
|
||||
return -1;
|
||||
}
|
||||
mlx_bitwise_and_ptr = GET_SYM(handle, "mlx_bitwise_and");
|
||||
if (mlx_bitwise_and_ptr == NULL) {
|
||||
fprintf(stderr, "MLX: Failed to load symbol: mlx_bitwise_and\n");
|
||||
@@ -2121,6 +2130,11 @@ int mlx_load_functions(void* handle) {
|
||||
fprintf(stderr, "MLX: Failed to load symbol: mlx_bitwise_xor\n");
|
||||
return -1;
|
||||
}
|
||||
mlx_blackman_ptr = GET_SYM(handle, "mlx_blackman");
|
||||
if (mlx_blackman_ptr == NULL) {
|
||||
fprintf(stderr, "MLX: Failed to load symbol: mlx_blackman\n");
|
||||
return -1;
|
||||
}
|
||||
mlx_block_masked_mm_ptr = GET_SYM(handle, "mlx_block_masked_mm");
|
||||
if (mlx_block_masked_mm_ptr == NULL) {
|
||||
fprintf(stderr, "MLX: Failed to load symbol: mlx_block_masked_mm\n");
|
||||
@@ -2381,6 +2395,16 @@ int mlx_load_functions(void* handle) {
|
||||
fprintf(stderr, "MLX: Failed to load symbol: mlx_hadamard_transform\n");
|
||||
return -1;
|
||||
}
|
||||
mlx_hamming_ptr = GET_SYM(handle, "mlx_hamming");
|
||||
if (mlx_hamming_ptr == NULL) {
|
||||
fprintf(stderr, "MLX: Failed to load symbol: mlx_hamming\n");
|
||||
return -1;
|
||||
}
|
||||
mlx_hanning_ptr = GET_SYM(handle, "mlx_hanning");
|
||||
if (mlx_hanning_ptr == NULL) {
|
||||
fprintf(stderr, "MLX: Failed to load symbol: mlx_hanning\n");
|
||||
return -1;
|
||||
}
|
||||
mlx_identity_ptr = GET_SYM(handle, "mlx_identity");
|
||||
if (mlx_identity_ptr == NULL) {
|
||||
fprintf(stderr, "MLX: Failed to load symbol: mlx_identity\n");
|
||||
@@ -4132,12 +4156,12 @@ mlx_distributed_group mlx_distributed_group_split(mlx_distributed_group group, i
|
||||
return mlx_distributed_group_split_ptr(group, color, key);
|
||||
}
|
||||
|
||||
bool mlx_distributed_is_available(void) {
|
||||
return mlx_distributed_is_available_ptr();
|
||||
bool mlx_distributed_is_available(const char* bk) {
|
||||
return mlx_distributed_is_available_ptr(bk);
|
||||
}
|
||||
|
||||
mlx_distributed_group mlx_distributed_init(bool strict) {
|
||||
return mlx_distributed_init_ptr(strict);
|
||||
mlx_distributed_group mlx_distributed_init(bool strict, const char* bk) {
|
||||
return mlx_distributed_init_ptr(strict, bk);
|
||||
}
|
||||
|
||||
void mlx_set_error_handler(mlx_error_handler_func handler, void* data, void (*dtor)(void*)) {
|
||||
@@ -4748,6 +4772,10 @@ int mlx_atleast_3d(mlx_array* res, const mlx_array a, const mlx_stream s) {
|
||||
return mlx_atleast_3d_ptr(res, a, s);
|
||||
}
|
||||
|
||||
int mlx_bartlett(mlx_array* res, int M, const mlx_stream s) {
|
||||
return mlx_bartlett_ptr(res, M, s);
|
||||
}
|
||||
|
||||
int mlx_bitwise_and(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s) {
|
||||
return mlx_bitwise_and_ptr(res, a, b, s);
|
||||
}
|
||||
@@ -4764,6 +4792,10 @@ int mlx_bitwise_xor(mlx_array* res, const mlx_array a, const mlx_array b, const
|
||||
return mlx_bitwise_xor_ptr(res, a, b, s);
|
||||
}
|
||||
|
||||
int mlx_blackman(mlx_array* res, int M, const mlx_stream s) {
|
||||
return mlx_blackman_ptr(res, M, s);
|
||||
}
|
||||
|
||||
int mlx_block_masked_mm(mlx_array* res, const mlx_array a, const mlx_array b, int block_size, const mlx_array mask_out , const mlx_array mask_lhs , const mlx_array mask_rhs , const mlx_stream s) {
|
||||
return mlx_block_masked_mm_ptr(res, a, b, block_size, mask_out, mask_lhs, mask_rhs, s);
|
||||
}
|
||||
@@ -4864,8 +4896,8 @@ int mlx_depends(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_
|
||||
return mlx_depends_ptr(res, inputs, dependencies);
|
||||
}
|
||||
|
||||
int mlx_dequantize(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, mlx_optional_dtype dtype, const mlx_stream s) {
|
||||
return mlx_dequantize_ptr(res, w, scales, biases, group_size, bits, mode, dtype, s);
|
||||
int mlx_dequantize(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , mlx_optional_dtype dtype, const mlx_stream s) {
|
||||
return mlx_dequantize_ptr(res, w, scales, biases, group_size, bits, mode, global_scale, dtype, s);
|
||||
}
|
||||
|
||||
int mlx_diag(mlx_array* res, const mlx_array a, int k, const mlx_stream s) {
|
||||
@@ -4972,6 +5004,14 @@ int mlx_hadamard_transform(mlx_array* res, const mlx_array a, mlx_optional_float
|
||||
return mlx_hadamard_transform_ptr(res, a, scale, s);
|
||||
}
|
||||
|
||||
int mlx_hamming(mlx_array* res, int M, const mlx_stream s) {
|
||||
return mlx_hamming_ptr(res, M, s);
|
||||
}
|
||||
|
||||
int mlx_hanning(mlx_array* res, int M, const mlx_stream s) {
|
||||
return mlx_hanning_ptr(res, M, s);
|
||||
}
|
||||
|
||||
int mlx_identity(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s) {
|
||||
return mlx_identity_ptr(res, n, dtype, s);
|
||||
}
|
||||
@@ -5208,12 +5248,12 @@ int mlx_put_along_axis(mlx_array* res, const mlx_array a, const mlx_array indice
|
||||
return mlx_put_along_axis_ptr(res, a, indices, values, axis, s);
|
||||
}
|
||||
|
||||
int mlx_qqmm(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) {
|
||||
return mlx_qqmm_ptr(res, x, w, w_scales, group_size, bits, mode, s);
|
||||
int mlx_qqmm(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale_x , const mlx_array global_scale_w , const mlx_stream s) {
|
||||
return mlx_qqmm_ptr(res, x, w, w_scales, group_size, bits, mode, global_scale_x, global_scale_w, s);
|
||||
}
|
||||
|
||||
int mlx_quantize(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) {
|
||||
return mlx_quantize_ptr(res, w, group_size, bits, mode, s);
|
||||
int mlx_quantize(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , const mlx_stream s) {
|
||||
return mlx_quantize_ptr(res, w, group_size, bits, mode, global_scale, s);
|
||||
}
|
||||
|
||||
int mlx_quantized_matmul(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array scales, const mlx_array biases , bool transpose, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s) {
|
||||
|
||||
@@ -2125,7 +2125,8 @@ func Quantize(w *Array, groupSize, bits int, mode string) (weights, scales, bias
|
||||
optGroupSize := C.mlx_optional_int{value: C.int(groupSize), has_value: true}
|
||||
optBits := C.mlx_optional_int{value: C.int(bits), has_value: true}
|
||||
res := C.mlx_vector_array_new()
|
||||
C.mlx_quantize(&res, w.c, optGroupSize, optBits, cMode, C.default_stream())
|
||||
var globalScale C.mlx_array
|
||||
C.mlx_quantize(&res, w.c, optGroupSize, optBits, cMode, globalScale, C.default_stream())
|
||||
|
||||
// Result is a vector of arrays: [weights, scales, biases?]
|
||||
// mxfp8 mode returns only 2 elements (no biases)
|
||||
@@ -2161,7 +2162,8 @@ func Dequantize(w, scales, biases *Array, groupSize, bits int, mode string) *Arr
|
||||
}
|
||||
|
||||
res := C.mlx_array_new()
|
||||
C.mlx_dequantize(&res, w.c, scales.c, b, optGroupSize, optBits, cMode, optDtype, C.default_stream())
|
||||
var globalScale C.mlx_array
|
||||
C.mlx_dequantize(&res, w.c, scales.c, b, optGroupSize, optBits, cMode, globalScale, optDtype, C.default_stream())
|
||||
return newArray(res)
|
||||
}
|
||||
|
||||
|
||||
@@ -309,10 +309,12 @@
|
||||
#undef mlx_atleast_1d
|
||||
#undef mlx_atleast_2d
|
||||
#undef mlx_atleast_3d
|
||||
#undef mlx_bartlett
|
||||
#undef mlx_bitwise_and
|
||||
#undef mlx_bitwise_invert
|
||||
#undef mlx_bitwise_or
|
||||
#undef mlx_bitwise_xor
|
||||
#undef mlx_blackman
|
||||
#undef mlx_block_masked_mm
|
||||
#undef mlx_broadcast_arrays
|
||||
#undef mlx_broadcast_to
|
||||
@@ -365,6 +367,8 @@
|
||||
#undef mlx_greater
|
||||
#undef mlx_greater_equal
|
||||
#undef mlx_hadamard_transform
|
||||
#undef mlx_hamming
|
||||
#undef mlx_hanning
|
||||
#undef mlx_identity
|
||||
#undef mlx_imag
|
||||
#undef mlx_inner
|
||||
@@ -751,8 +755,8 @@ extern int (*mlx_distributed_sum_scatter_ptr)(mlx_array* res, const mlx_array x,
|
||||
extern int (*mlx_distributed_group_rank_ptr)(mlx_distributed_group group);
|
||||
extern int (*mlx_distributed_group_size_ptr)(mlx_distributed_group group);
|
||||
extern mlx_distributed_group (*mlx_distributed_group_split_ptr)(mlx_distributed_group group, int color, int key);
|
||||
extern bool (*mlx_distributed_is_available_ptr)(void);
|
||||
extern mlx_distributed_group (*mlx_distributed_init_ptr)(bool strict);
|
||||
extern bool (*mlx_distributed_is_available_ptr)(const char* bk);
|
||||
extern mlx_distributed_group (*mlx_distributed_init_ptr)(bool strict, const char* bk);
|
||||
extern void (*mlx_set_error_handler_ptr)(mlx_error_handler_func handler, void* data, void (*dtor)(void*));
|
||||
extern void (*_mlx_error_ptr)(const char* file, const int line, const char* fmt, ...);
|
||||
extern int (*mlx_export_function_ptr)(const char* file, const mlx_closure fun, const mlx_vector_array args, bool shapeless);
|
||||
@@ -905,10 +909,12 @@ extern int (*mlx_astype_ptr)(mlx_array* res, const mlx_array a, mlx_dtype dtype,
|
||||
extern int (*mlx_atleast_1d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
extern int (*mlx_atleast_2d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
extern int (*mlx_atleast_3d_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
extern int (*mlx_bartlett_ptr)(mlx_array* res, int M, const mlx_stream s);
|
||||
extern int (*mlx_bitwise_and_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
|
||||
extern int (*mlx_bitwise_invert_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
extern int (*mlx_bitwise_or_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
|
||||
extern int (*mlx_bitwise_xor_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
|
||||
extern int (*mlx_blackman_ptr)(mlx_array* res, int M, const mlx_stream s);
|
||||
extern int (*mlx_block_masked_mm_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, int block_size, const mlx_array mask_out , const mlx_array mask_lhs , const mlx_array mask_rhs , const mlx_stream s);
|
||||
extern int (*mlx_broadcast_arrays_ptr)(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_stream s);
|
||||
extern int (*mlx_broadcast_to_ptr)(mlx_array* res, const mlx_array a, const int* shape, size_t shape_num, const mlx_stream s);
|
||||
@@ -934,7 +940,7 @@ extern int (*mlx_cumprod_ptr)(mlx_array* res, const mlx_array a, int axis, bool
|
||||
extern int (*mlx_cumsum_ptr)(mlx_array* res, const mlx_array a, int axis, bool reverse, bool inclusive, const mlx_stream s);
|
||||
extern int (*mlx_degrees_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
extern int (*mlx_depends_ptr)(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_vector_array dependencies);
|
||||
extern int (*mlx_dequantize_ptr)(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, mlx_optional_dtype dtype, const mlx_stream s);
|
||||
extern int (*mlx_dequantize_ptr)(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , mlx_optional_dtype dtype, const mlx_stream s);
|
||||
extern int (*mlx_diag_ptr)(mlx_array* res, const mlx_array a, int k, const mlx_stream s);
|
||||
extern int (*mlx_diagonal_ptr)(mlx_array* res, const mlx_array a, int offset, int axis1, int axis2, const mlx_stream s);
|
||||
extern int (*mlx_divide_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
|
||||
@@ -961,6 +967,8 @@ extern int (*mlx_gather_qmm_ptr)(mlx_array* res, const mlx_array x, const mlx_ar
|
||||
extern int (*mlx_greater_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
|
||||
extern int (*mlx_greater_equal_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
|
||||
extern int (*mlx_hadamard_transform_ptr)(mlx_array* res, const mlx_array a, mlx_optional_float scale, const mlx_stream s);
|
||||
extern int (*mlx_hamming_ptr)(mlx_array* res, int M, const mlx_stream s);
|
||||
extern int (*mlx_hanning_ptr)(mlx_array* res, int M, const mlx_stream s);
|
||||
extern int (*mlx_identity_ptr)(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s);
|
||||
extern int (*mlx_imag_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
extern int (*mlx_inner_ptr)(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
|
||||
@@ -1020,8 +1028,8 @@ extern int (*mlx_prod_axes_ptr)(mlx_array* res, const mlx_array a, const int* ax
|
||||
extern int (*mlx_prod_axis_ptr)(mlx_array* res, const mlx_array a, int axis, bool keepdims, const mlx_stream s);
|
||||
extern int (*mlx_prod_ptr)(mlx_array* res, const mlx_array a, bool keepdims, const mlx_stream s);
|
||||
extern int (*mlx_put_along_axis_ptr)(mlx_array* res, const mlx_array a, const mlx_array indices, const mlx_array values, int axis, const mlx_stream s);
|
||||
extern int (*mlx_qqmm_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
|
||||
extern int (*mlx_quantize_ptr)(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
|
||||
extern int (*mlx_qqmm_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale_x , const mlx_array global_scale_w , const mlx_stream s);
|
||||
extern int (*mlx_quantize_ptr)(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , const mlx_stream s);
|
||||
extern int (*mlx_quantized_matmul_ptr)(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array scales, const mlx_array biases , bool transpose, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
|
||||
extern int (*mlx_radians_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
extern int (*mlx_real_ptr)(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
@@ -1492,9 +1500,9 @@ int mlx_distributed_group_size(mlx_distributed_group group);
|
||||
|
||||
mlx_distributed_group mlx_distributed_group_split(mlx_distributed_group group, int color, int key);
|
||||
|
||||
bool mlx_distributed_is_available(void);
|
||||
bool mlx_distributed_is_available(const char* bk);
|
||||
|
||||
mlx_distributed_group mlx_distributed_init(bool strict);
|
||||
mlx_distributed_group mlx_distributed_init(bool strict, const char* bk);
|
||||
|
||||
void mlx_set_error_handler(mlx_error_handler_func handler, void* data, void (*dtor)(void*));
|
||||
|
||||
@@ -1800,6 +1808,8 @@ int mlx_atleast_2d(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
|
||||
int mlx_atleast_3d(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
|
||||
int mlx_bartlett(mlx_array* res, int M, const mlx_stream s);
|
||||
|
||||
int mlx_bitwise_and(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
|
||||
|
||||
int mlx_bitwise_invert(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
@@ -1808,6 +1818,8 @@ int mlx_bitwise_or(mlx_array* res, const mlx_array a, const mlx_array b, const m
|
||||
|
||||
int mlx_bitwise_xor(mlx_array* res, const mlx_array a, const mlx_array b, const mlx_stream s);
|
||||
|
||||
int mlx_blackman(mlx_array* res, int M, const mlx_stream s);
|
||||
|
||||
int mlx_block_masked_mm(mlx_array* res, const mlx_array a, const mlx_array b, int block_size, const mlx_array mask_out , const mlx_array mask_lhs , const mlx_array mask_rhs , const mlx_stream s);
|
||||
|
||||
int mlx_broadcast_arrays(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_stream s);
|
||||
@@ -1858,7 +1870,7 @@ int mlx_degrees(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
|
||||
int mlx_depends(mlx_vector_array* res, const mlx_vector_array inputs, const mlx_vector_array dependencies);
|
||||
|
||||
int mlx_dequantize(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, mlx_optional_dtype dtype, const mlx_stream s);
|
||||
int mlx_dequantize(mlx_array* res, const mlx_array w, const mlx_array scales, const mlx_array biases , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , mlx_optional_dtype dtype, const mlx_stream s);
|
||||
|
||||
int mlx_diag(mlx_array* res, const mlx_array a, int k, const mlx_stream s);
|
||||
|
||||
@@ -1912,6 +1924,10 @@ int mlx_greater_equal(mlx_array* res, const mlx_array a, const mlx_array b, cons
|
||||
|
||||
int mlx_hadamard_transform(mlx_array* res, const mlx_array a, mlx_optional_float scale, const mlx_stream s);
|
||||
|
||||
int mlx_hamming(mlx_array* res, int M, const mlx_stream s);
|
||||
|
||||
int mlx_hanning(mlx_array* res, int M, const mlx_stream s);
|
||||
|
||||
int mlx_identity(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s);
|
||||
|
||||
int mlx_imag(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
@@ -2030,9 +2046,9 @@ int mlx_prod(mlx_array* res, const mlx_array a, bool keepdims, const mlx_stream
|
||||
|
||||
int mlx_put_along_axis(mlx_array* res, const mlx_array a, const mlx_array indices, const mlx_array values, int axis, const mlx_stream s);
|
||||
|
||||
int mlx_qqmm(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
|
||||
int mlx_qqmm(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array w_scales , mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale_x , const mlx_array global_scale_w , const mlx_stream s);
|
||||
|
||||
int mlx_quantize(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
|
||||
int mlx_quantize(mlx_vector_array* res, const mlx_array w, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_array global_scale , const mlx_stream s);
|
||||
|
||||
int mlx_quantized_matmul(mlx_array* res, const mlx_array x, const mlx_array w, const mlx_array scales, const mlx_array biases , bool transpose, mlx_optional_int group_size, mlx_optional_int bits, const char* mode, const mlx_stream s);
|
||||
|
||||
|
||||
@@ -93,21 +93,8 @@ func (c *kvCache) begin(m base.Model, inputs []int32) *cacheSession {
|
||||
matchPath, matched = findBestMatch(c.root, inputs[:len(inputs)-1])
|
||||
}
|
||||
|
||||
// Check for partial match within a node's edge — truncate path
|
||||
// to the parent boundary. snapshot() will split the node and
|
||||
// create the branch point during prefill when caches are ready.
|
||||
partialMatch := false
|
||||
if len(matchPath) > 1 {
|
||||
lastNode := matchPath[len(matchPath)-1]
|
||||
matchedInEdge := matched - lastNode.startOffset()
|
||||
if matchedInEdge > 0 && matchedInEdge < len(lastNode.tokens) {
|
||||
matchPath = matchPath[:len(matchPath)-1]
|
||||
partialMatch = true
|
||||
}
|
||||
}
|
||||
|
||||
// Switch to the matched path, paging in/out as needed.
|
||||
c.switchToPath(matchPath)
|
||||
c.switchToPath(matchPath, matched)
|
||||
|
||||
// switchToPath aligns caches to a common offset
|
||||
prefix := c.minCacheOffset()
|
||||
@@ -116,7 +103,7 @@ func (c *kvCache) begin(m base.Model, inputs []int32) *cacheSession {
|
||||
// Schedule a snapshot at the branch point during prefill so future
|
||||
// requests diverging here can restore instead of re-evaluating.
|
||||
var snapshotAt int
|
||||
if partialMatch || (prefix == 0 && matched > 0) {
|
||||
if prefix < matched {
|
||||
snapshotAt = matched
|
||||
}
|
||||
|
||||
@@ -142,7 +129,7 @@ func (c *kvCache) begin(m base.Model, inputs []int32) *cacheSession {
|
||||
|
||||
// switchToPath transitions from the current active path to a new path,
|
||||
// paging out diverging segments and paging in the new path.
|
||||
func (c *kvCache) switchToPath(newPath []*trieNode) {
|
||||
func (c *kvCache) switchToPath(newPath []*trieNode, matched int) {
|
||||
defer c.enforceEvictionPolicy()
|
||||
|
||||
// Find common ancestor index.
|
||||
@@ -167,7 +154,10 @@ func (c *kvCache) switchToPath(newPath []*trieNode) {
|
||||
// non-leaf nodes here would produce wrong results for non-rewindable
|
||||
// caches (e.g. RecurrentCache) whose state reflects the leaf, not
|
||||
// the intermediate boundary.
|
||||
if leaf := len(c.activePath) - 1; leaf >= commonLen {
|
||||
leaf := len(c.activePath) - 1
|
||||
leafDiverges := leaf >= commonLen
|
||||
leafNeedsRewind := matched < c.activePath[leaf].endOffset
|
||||
if leafDiverges || leafNeedsRewind {
|
||||
node := c.activePath[leaf]
|
||||
if !node.hasAllSnapshots() {
|
||||
fromOffset := node.startOffset()
|
||||
@@ -184,14 +174,16 @@ func (c *kvCache) switchToPath(newPath []*trieNode) {
|
||||
}
|
||||
}
|
||||
|
||||
// Rewind each cache to the ancestor offset or free it. Freed
|
||||
// caches (e.g. RecurrentCache that can't rewind) will be restored
|
||||
// from snapshots during page-in.
|
||||
// Rewind each cache to the target offset or free it. When matched
|
||||
// falls within the ancestor's range (same-path case), we rewind
|
||||
// directly to the match point. Otherwise we rewind to the ancestor
|
||||
// and let page-in bring us forward to matched.
|
||||
rewindTarget := min(ancestorOffset, matched)
|
||||
for _, kv := range c.caches {
|
||||
if kv == nil {
|
||||
continue
|
||||
}
|
||||
if !kv.Restore(nil, ancestorOffset) {
|
||||
if !kv.Restore(nil, rewindTarget) {
|
||||
kv.Free()
|
||||
}
|
||||
}
|
||||
@@ -199,10 +191,12 @@ func (c *kvCache) switchToPath(newPath []*trieNode) {
|
||||
// Page in — walk the full new path, restoring from snapshots.
|
||||
// Freed caches naturally pick up the first available snapshot.
|
||||
// Caches already past a node skip it via offset check.
|
||||
pageIn:
|
||||
for _, node := range newPath {
|
||||
if len(node.snapshots) == 0 {
|
||||
if !node.hasSnapshots() {
|
||||
continue
|
||||
}
|
||||
nodeTarget := min(node.endOffset, matched)
|
||||
for j, kv := range c.caches {
|
||||
if kv == nil {
|
||||
continue
|
||||
@@ -210,19 +204,18 @@ func (c *kvCache) switchToPath(newPath []*trieNode) {
|
||||
if j >= len(node.snapshots) || node.snapshots[j] == nil {
|
||||
continue
|
||||
}
|
||||
if kv.Offset() >= node.endOffset {
|
||||
if kv.Offset() >= nodeTarget {
|
||||
continue
|
||||
}
|
||||
if !kv.Restore(node.snapshots[j], node.endOffset) {
|
||||
slog.Warn("cache restore failure during page-in, freeing all caches", "layer", j, "offset", node.startOffset())
|
||||
c.freeAll()
|
||||
c.activePath = []*trieNode{c.root}
|
||||
return
|
||||
if !kv.Restore(node.snapshots[j], nodeTarget) {
|
||||
// Restore failed — stop page-in and let alignment
|
||||
// bring all caches to a consistent offset.
|
||||
break pageIn
|
||||
}
|
||||
}
|
||||
if node.endOffset > ancestorOffset {
|
||||
pageInCount++
|
||||
logutil.Trace(fmt.Sprintf("page in: [%d, %d)", node.startOffset(), node.endOffset))
|
||||
logutil.Trace(fmt.Sprintf("page in: [%d, %d)", node.startOffset(), nodeTarget))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -536,6 +529,9 @@ func (c *kvCache) dumpTree() {
|
||||
if nodeBytes > 0 {
|
||||
label += " " + mlx.PrettyBytes(int(nodeBytes)).String()
|
||||
}
|
||||
if !n.lastUsed.IsZero() {
|
||||
label += fmt.Sprintf(" %s ago", time.Since(n.lastUsed).Truncate(time.Millisecond))
|
||||
}
|
||||
var flags []string
|
||||
if n.user {
|
||||
flags = append(flags, "user")
|
||||
|
||||
40
x/mlxrunner/cache/cache.go
vendored
40
x/mlxrunner/cache/cache.go
vendored
@@ -17,7 +17,8 @@ type Cache interface {
|
||||
Snapshot(fromOffset int) Snapshot
|
||||
|
||||
// Restore brings the cache to target. If snapshot is nil, rewinds
|
||||
// using the cache's own live state.
|
||||
// using the cache's own live state. Returns false if the target is
|
||||
// unreachable (e.g. target > current offset, or negative).
|
||||
Restore(snapshot Snapshot, target int) bool
|
||||
|
||||
// Merge combines two sequential snapshots [a,b) and [b,c) into [a,c).
|
||||
@@ -108,8 +109,8 @@ func (c *KVCache) Snapshot(fromOffset int) Snapshot {
|
||||
|
||||
kSlice := c.keys.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(from, to), mlx.Slice())
|
||||
vSlice := c.values.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(from, to), mlx.Slice())
|
||||
kCopy := mlx.Copy(kSlice)
|
||||
vCopy := mlx.Copy(vSlice)
|
||||
kCopy := mlx.Contiguous(kSlice, false)
|
||||
vCopy := mlx.Contiguous(vSlice, false)
|
||||
mlx.Pin(kCopy, vCopy)
|
||||
mlx.AsyncEval(kCopy, vCopy)
|
||||
|
||||
@@ -122,17 +123,21 @@ func (c *KVCache) Snapshot(fromOffset int) Snapshot {
|
||||
}
|
||||
|
||||
func (c *KVCache) Restore(snapshot Snapshot, target int) bool {
|
||||
if target < 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
if snapshot == nil {
|
||||
// Rewind using live state — just clamp offset.
|
||||
target = max(0, min(target, c.offset))
|
||||
if target > c.offset {
|
||||
return false
|
||||
}
|
||||
c.offset = target
|
||||
return true
|
||||
}
|
||||
|
||||
snap := snapshot.(*kvSnapshot)
|
||||
|
||||
// Check that the cache has data up to the snapshot's starting point.
|
||||
if c.offset < snap.fromOffset {
|
||||
if target > snap.toOffset || c.offset < snap.fromOffset {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -191,10 +196,10 @@ func (c *KVCache) Split(snapshot Snapshot, at int) (Snapshot, Snapshot) {
|
||||
return snapshot, nil
|
||||
}
|
||||
|
||||
pk := mlx.Copy(snap.keys.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(0, splitIdx), mlx.Slice()))
|
||||
pv := mlx.Copy(snap.values.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(0, splitIdx), mlx.Slice()))
|
||||
ck := mlx.Copy(snap.keys.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(splitIdx, seqLen), mlx.Slice()))
|
||||
cv := mlx.Copy(snap.values.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(splitIdx, seqLen), mlx.Slice()))
|
||||
pk := mlx.Contiguous(snap.keys.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(0, splitIdx), mlx.Slice()), false)
|
||||
pv := mlx.Contiguous(snap.values.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(0, splitIdx), mlx.Slice()), false)
|
||||
ck := mlx.Contiguous(snap.keys.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(splitIdx, seqLen), mlx.Slice()), false)
|
||||
cv := mlx.Contiguous(snap.values.Slice(mlx.Slice(), mlx.Slice(), mlx.Slice(splitIdx, seqLen), mlx.Slice()), false)
|
||||
mlx.Pin(pk, pv, ck, cv)
|
||||
mlx.AsyncEval(pk, pv, ck, cv)
|
||||
|
||||
@@ -354,7 +359,14 @@ func (c *RotatingKVCache) Snapshot(fromOffset int) Snapshot {
|
||||
}
|
||||
|
||||
func (c *RotatingKVCache) Restore(snapshot Snapshot, target int) bool {
|
||||
if target < 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
if snapshot == nil {
|
||||
if target >= c.offset {
|
||||
return target == c.offset
|
||||
}
|
||||
// Live rewind is only safe when the buffer hasn't filled yet
|
||||
// (offset <= maxSize). Once the window has shifted, rewinding
|
||||
// leaves fewer than maxSize trailing tokens to attend to —
|
||||
@@ -362,7 +374,6 @@ func (c *RotatingKVCache) Restore(snapshot Snapshot, target int) bool {
|
||||
if c.offset > c.maxSize {
|
||||
return false
|
||||
}
|
||||
target = max(0, min(target, c.offset))
|
||||
c.offset = target
|
||||
c.idx = target
|
||||
return true
|
||||
@@ -370,6 +381,10 @@ func (c *RotatingKVCache) Restore(snapshot Snapshot, target int) bool {
|
||||
|
||||
snap := snapshot.(*rotatingSnapshot)
|
||||
|
||||
if target > snap.toOffset {
|
||||
return false
|
||||
}
|
||||
|
||||
// Reject if clamping would leave an incomplete window.
|
||||
if target < snap.toOffset && snap.toOffset > c.maxSize {
|
||||
return false
|
||||
@@ -388,7 +403,6 @@ func (c *RotatingKVCache) Restore(snapshot Snapshot, target int) bool {
|
||||
|
||||
// Clamp to target if needed.
|
||||
if target < c.offset {
|
||||
target = max(0, target)
|
||||
c.offset = target
|
||||
c.idx = target
|
||||
}
|
||||
|
||||
22
x/mlxrunner/cache/recurrent.go
vendored
22
x/mlxrunner/cache/recurrent.go
vendored
@@ -22,14 +22,9 @@ func (c *RecurrentCache) setStateRaw(old, v *mlx.Array) *mlx.Array {
|
||||
if v == nil || !v.Valid() {
|
||||
return old
|
||||
}
|
||||
if old == v {
|
||||
return old
|
||||
}
|
||||
|
||||
mlx.Pin(v)
|
||||
if old != nil && old != v {
|
||||
mlx.Unpin(old)
|
||||
}
|
||||
mlx.Unpin(old)
|
||||
|
||||
return v
|
||||
}
|
||||
@@ -38,9 +33,6 @@ func (c *RecurrentCache) setStateDetached(old, v *mlx.Array, ensureContiguous bo
|
||||
if v == nil || !v.Valid() {
|
||||
return old
|
||||
}
|
||||
if old == v {
|
||||
return old
|
||||
}
|
||||
|
||||
root := v
|
||||
if ensureContiguous {
|
||||
@@ -49,9 +41,7 @@ func (c *RecurrentCache) setStateDetached(old, v *mlx.Array, ensureContiguous bo
|
||||
detached := root.Clone()
|
||||
|
||||
mlx.Pin(detached)
|
||||
if old != nil && old != detached {
|
||||
mlx.Unpin(old)
|
||||
}
|
||||
mlx.Unpin(old)
|
||||
|
||||
return detached
|
||||
}
|
||||
@@ -150,10 +140,10 @@ func (c *RecurrentCache) Restore(snapshot Snapshot, target int) bool {
|
||||
|
||||
snap := snapshot.(*recurrentSnapshot)
|
||||
|
||||
// Recurrent state encodes all tokens up to snap.offset. Restoring
|
||||
// to a target before that would leave stale state from tokens
|
||||
// [target, snap.offset) baked in. Only allow restoring forward.
|
||||
if target < snap.offset {
|
||||
// Recurrent snapshots encode cumulative state up to exactly
|
||||
// snap.offset. Target must match — rewinding would leave stale
|
||||
// state, and advancing isn't possible without feeding tokens.
|
||||
if target != snap.offset {
|
||||
return false
|
||||
}
|
||||
|
||||
|
||||
34
x/mlxrunner/cache/recurrent_test.go
vendored
34
x/mlxrunner/cache/recurrent_test.go
vendored
@@ -6,39 +6,35 @@ import (
|
||||
"github.com/ollama/ollama/x/mlxrunner/mlx"
|
||||
)
|
||||
|
||||
// TestRecurrentCacheRestoreDirectionality verifies that RecurrentCache only
|
||||
// allows restoring forward (target >= snapshot offset), never backward.
|
||||
func TestRecurrentCacheRestoreDirectionality(t *testing.T) {
|
||||
// TestRecurrentCacheRestoreExactOffset verifies that RecurrentCache restore
|
||||
// only succeeds when target exactly matches the snapshot's offset. Recurrent
|
||||
// state is cumulative, so it can't be rewound or fast-forwarded.
|
||||
func TestRecurrentCacheRestoreExactOffset(t *testing.T) {
|
||||
skipIfNoMLX(t)
|
||||
c := NewRecurrentCache(3, 12, 4, 8, 8)
|
||||
_ = c.ConvState(1, mlx.DTypeFloat16)
|
||||
_ = c.DeltaState(1, mlx.DTypeFloat16)
|
||||
c.Advance(10)
|
||||
|
||||
snap := c.Snapshot(0)
|
||||
snap := c.Snapshot(0) // snap.offset == 10
|
||||
|
||||
c.Advance(5) // now at 15
|
||||
c.Advance(5) // cache now at 15
|
||||
|
||||
// Restore backward should fail.
|
||||
// target < snap.offset: fails (can't rewind past snapshot)
|
||||
if c.Restore(snap, 5) {
|
||||
t.Fatal("Restore(snap, 5) should fail — target < snap.offset")
|
||||
t.Fatal("Restore(snap, 5) should fail — target != snap.offset")
|
||||
}
|
||||
|
||||
// Restore to exact snap offset should succeed.
|
||||
// target > snap.offset: fails (can't advance without feeding tokens)
|
||||
if c.Restore(snap, 15) {
|
||||
t.Fatal("Restore(snap, 15) should fail — target != snap.offset")
|
||||
}
|
||||
|
||||
// target == snap.offset: succeeds
|
||||
if !c.Restore(snap, 10) {
|
||||
t.Fatal("Restore(snap, 10) should succeed")
|
||||
t.Fatal("Restore(snap, 10) should succeed — target == snap.offset")
|
||||
}
|
||||
if c.Offset() != 10 {
|
||||
t.Fatalf("offset = %d, want 10", c.Offset())
|
||||
}
|
||||
|
||||
// Restore forward (target > snap offset) should succeed, offset = snap.offset.
|
||||
snap2 := c.Snapshot(0)
|
||||
if !c.Restore(snap2, 15) {
|
||||
t.Fatal("Restore(snap, 15) should succeed")
|
||||
}
|
||||
// Recurrent state is at snap.offset (10), not target (15).
|
||||
if c.Offset() != 10 {
|
||||
t.Fatalf("offset = %d, want 10 (snap offset)", c.Offset())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,20 +79,20 @@ func (c *fakeRewindableCache) Snapshot(fromOffset int) cache.Snapshot {
|
||||
}
|
||||
|
||||
func (c *fakeRewindableCache) Restore(snapshot cache.Snapshot, target int) bool {
|
||||
if target < 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
if snapshot == nil {
|
||||
// Rewind live state.
|
||||
if target < 0 {
|
||||
target = 0
|
||||
}
|
||||
if target > len(c.tokens) {
|
||||
target = len(c.tokens)
|
||||
return false
|
||||
}
|
||||
c.tokens = c.tokens[:target]
|
||||
return true
|
||||
}
|
||||
s := snapshot.(*fakeSnapshot)
|
||||
if len(c.tokens) < s.from {
|
||||
return false // don't have base data up to snapshot start
|
||||
if target > s.to || len(c.tokens) < s.from {
|
||||
return false
|
||||
}
|
||||
c.tokens = append(c.tokens[:s.from], s.tokens...)
|
||||
if target < len(c.tokens) {
|
||||
@@ -196,9 +196,13 @@ func (c *fakeSlidingWindowCache) Snapshot(fromOffset int) cache.Snapshot {
|
||||
}
|
||||
|
||||
func (c *fakeSlidingWindowCache) Restore(snapshot cache.Snapshot, target int) bool {
|
||||
if target < 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
if snapshot == nil {
|
||||
if target == len(c.tokens) {
|
||||
return true
|
||||
if target >= len(c.tokens) {
|
||||
return target == len(c.tokens)
|
||||
}
|
||||
// Live rewind only works when buffer hasn't filled (offset <= maxSize).
|
||||
if len(c.tokens) > c.maxSize {
|
||||
@@ -208,6 +212,14 @@ func (c *fakeSlidingWindowCache) Restore(snapshot cache.Snapshot, target int) bo
|
||||
return true
|
||||
}
|
||||
s := snapshot.(*fakeSnapshot)
|
||||
if target > s.to {
|
||||
return false
|
||||
}
|
||||
// Reject if clamping would leave an incomplete window
|
||||
// (matches RotatingKVCache behavior).
|
||||
if target < s.to && s.to > c.maxSize {
|
||||
return false
|
||||
}
|
||||
c.tokens = slices.Clone(s.tokens)
|
||||
if target < len(c.tokens) {
|
||||
c.tokens = c.tokens[:target]
|
||||
@@ -268,8 +280,8 @@ func (c *fakeRecurrentCache) Restore(snapshot cache.Snapshot, target int) bool {
|
||||
return target == len(c.tokens) // can only no-op
|
||||
}
|
||||
s := snapshot.(*fakeSnapshot)
|
||||
if target < s.to {
|
||||
return false // can't go backward
|
||||
if target != s.to {
|
||||
return false // cumulative state requires exact match
|
||||
}
|
||||
c.tokens = slices.Clone(s.tokens)
|
||||
return true
|
||||
@@ -294,9 +306,10 @@ type feedableCache interface {
|
||||
|
||||
// testEnv encapsulates a kvCache and its fake caches for a test scenario.
|
||||
type testEnv struct {
|
||||
kvc *kvCache
|
||||
caches []cache.Cache // typed references for assertions
|
||||
tracker *snapshotTracker
|
||||
kvc *kvCache
|
||||
caches []cache.Cache // typed references for assertions
|
||||
tracker *snapshotTracker
|
||||
rewindable bool // true when all caches support arbitrary Restore(nil, target)
|
||||
}
|
||||
|
||||
// newTransformerEnv creates a test environment with a single rewindable cache
|
||||
@@ -305,23 +318,28 @@ func newTransformerEnv() *testEnv {
|
||||
tracker := &snapshotTracker{}
|
||||
caches := []cache.Cache{&fakeRewindableCache{tracker: tracker}}
|
||||
return &testEnv{
|
||||
kvc: &kvCache{caches: caches},
|
||||
caches: caches,
|
||||
tracker: tracker,
|
||||
kvc: &kvCache{caches: caches},
|
||||
caches: caches,
|
||||
tracker: tracker,
|
||||
rewindable: true,
|
||||
}
|
||||
}
|
||||
|
||||
// newSlidingWindowEnv creates a test environment with one rewindable cache and
|
||||
// one sliding window cache (Mistral-style architecture).
|
||||
// one sliding window cache (Mistral-style architecture). The sliding window
|
||||
// maxSize is set small enough that test sequences fill it, making
|
||||
// Restore(nil, target) fail — the same behavior as production models where
|
||||
// the window fills after a few turns.
|
||||
func newSlidingWindowEnv() *testEnv {
|
||||
tr := &snapshotTracker{}
|
||||
rc := &fakeRewindableCache{tracker: tr}
|
||||
sw := &fakeSlidingWindowCache{maxSize: 32, tracker: tr}
|
||||
sw := &fakeSlidingWindowCache{maxSize: 4, tracker: tr}
|
||||
caches := []cache.Cache{rc, sw}
|
||||
return &testEnv{
|
||||
kvc: &kvCache{caches: caches},
|
||||
caches: caches,
|
||||
tracker: tr,
|
||||
kvc: &kvCache{caches: caches},
|
||||
caches: caches,
|
||||
tracker: tr,
|
||||
rewindable: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -333,9 +351,10 @@ func newRecurrentEnv() *testEnv {
|
||||
nrc := &fakeRecurrentCache{tracker: tr}
|
||||
caches := []cache.Cache{rc, nrc}
|
||||
return &testEnv{
|
||||
kvc: &kvCache{caches: caches},
|
||||
caches: caches,
|
||||
tracker: tr,
|
||||
kvc: &kvCache{caches: caches},
|
||||
caches: caches,
|
||||
tracker: tr,
|
||||
rewindable: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -590,15 +609,24 @@ func TestBranchCreationAndReuse(t *testing.T) {
|
||||
}
|
||||
|
||||
// Request B: [1,2,3,4,5,10,11,12] — shares 5-token prefix with A.
|
||||
// Partial match in A's edge triggers snapshotOffset.
|
||||
// For rewindable caches, switchToPath rewinds to the match point
|
||||
// so only the non-matching suffix needs evaluation. For non-rewindable
|
||||
// caches (RecurrentCache), the rewind fails and freeAll fires.
|
||||
resB := simulateRequest(t, kvc, []int32{1, 2, 3, 4, 5, 10, 11, 12}, []int32{30, 31})
|
||||
if resB.snapshotOffset != 5 {
|
||||
t.Fatalf("B: snapshotOffset = %d, want 5", resB.snapshotOffset)
|
||||
}
|
||||
// Cache was rewound to 0 (partial match truncates path to root),
|
||||
// so all tokens were re-evaluated.
|
||||
if len(resB.remaining) != 8 {
|
||||
t.Fatalf("B: remaining = %d, want 8", len(resB.remaining))
|
||||
if env.rewindable {
|
||||
if resB.snapshotOffset != 0 {
|
||||
t.Fatalf("B: snapshotOffset = %d, want 0 (rewind succeeded)", resB.snapshotOffset)
|
||||
}
|
||||
if len(resB.remaining) != 3 {
|
||||
t.Fatalf("B: remaining = %d, want 3 (rewind to match point)", len(resB.remaining))
|
||||
}
|
||||
} else {
|
||||
if resB.snapshotOffset != 5 {
|
||||
t.Fatalf("B: snapshotOffset = %d, want 5", resB.snapshotOffset)
|
||||
}
|
||||
if len(resB.remaining) != 8 {
|
||||
t.Fatalf("B: remaining = %d, want 8 (freeAll fallback)", len(resB.remaining))
|
||||
}
|
||||
}
|
||||
env.assertAllTokens(t, "after B", []int32{1, 2, 3, 4, 5, 10, 11, 12, 30, 31})
|
||||
|
||||
@@ -635,14 +663,24 @@ func TestExactMatchSeedBehavior(t *testing.T) {
|
||||
simulateRequest(t, kvc, []int32{1, 2, 3, 4, 5}, []int32{10, 11})
|
||||
|
||||
// Request B: identical prompt. Holdback means matched=4, partial in
|
||||
// the 5-token edge, so path truncates to root and all tokens are
|
||||
// re-evaluated. snapshotOffset should be set at the holdback point.
|
||||
// the 5-token edge. For rewindable caches, switchToPath rewinds to
|
||||
// offset 4, so only the held-back token needs re-evaluation. For
|
||||
// non-rewindable caches, the rewind fails and freeAll fires.
|
||||
resB := simulateRequest(t, kvc, []int32{1, 2, 3, 4, 5}, []int32{20, 21})
|
||||
if len(resB.remaining) != 5 {
|
||||
t.Fatalf("B: remaining = %d, want 5 (full re-eval due to holdback)", len(resB.remaining))
|
||||
}
|
||||
if resB.snapshotOffset != 4 {
|
||||
t.Fatalf("B: snapshotOffset = %d, want 4", resB.snapshotOffset)
|
||||
if env.rewindable {
|
||||
if len(resB.remaining) != 1 {
|
||||
t.Fatalf("B: remaining = %d, want 1 (rewind to holdback point)", len(resB.remaining))
|
||||
}
|
||||
if resB.snapshotOffset != 0 {
|
||||
t.Fatalf("B: snapshotOffset = %d, want 0 (rewind succeeded)", resB.snapshotOffset)
|
||||
}
|
||||
} else {
|
||||
if len(resB.remaining) != 5 {
|
||||
t.Fatalf("B: remaining = %d, want 5 (freeAll fallback)", len(resB.remaining))
|
||||
}
|
||||
if resB.snapshotOffset != 4 {
|
||||
t.Fatalf("B: snapshotOffset = %d, want 4", resB.snapshotOffset)
|
||||
}
|
||||
}
|
||||
env.assertAllTokens(t, "after B", []int32{1, 2, 3, 4, 5, 20, 21})
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ package mlxrunner
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
@@ -36,14 +37,69 @@ type Client struct {
|
||||
modelName string
|
||||
contextLength atomic.Int64
|
||||
memory atomic.Uint64
|
||||
done chan error
|
||||
done chan struct{}
|
||||
doneErr error // valid after done is closed
|
||||
client *http.Client
|
||||
lastErr string
|
||||
lastErrLock sync.Mutex
|
||||
status *statusWriter
|
||||
mu sync.Mutex
|
||||
cmd *exec.Cmd
|
||||
}
|
||||
|
||||
// statusWriter captures the last stderr line from the subprocess while
|
||||
// forwarding all output to os.Stderr. Lines longer than maxStatusLen are
|
||||
// truncated to the first maxStatusLen bytes.
|
||||
type statusWriter struct {
|
||||
lastErrMsg string
|
||||
buf []byte
|
||||
discarding bool
|
||||
mu sync.Mutex
|
||||
out *os.File
|
||||
}
|
||||
|
||||
const maxStatusLen = 256
|
||||
|
||||
func (w *statusWriter) Write(b []byte) (int, error) {
|
||||
n, err := w.out.Write(b)
|
||||
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
|
||||
w.buf = append(w.buf, b...)
|
||||
for {
|
||||
i := bytes.IndexByte(w.buf, '\n')
|
||||
if i < 0 {
|
||||
break
|
||||
}
|
||||
if !w.discarding {
|
||||
line := bytes.TrimSpace(w.buf[:i])
|
||||
if len(line) > 0 {
|
||||
if len(line) > maxStatusLen {
|
||||
line = line[:maxStatusLen]
|
||||
}
|
||||
w.lastErrMsg = string(line)
|
||||
}
|
||||
}
|
||||
w.buf = w.buf[i+1:]
|
||||
w.discarding = false
|
||||
}
|
||||
// if the buffer grows past maxStatusLen without a newline, keep the front
|
||||
if len(w.buf) > maxStatusLen {
|
||||
if !w.discarding {
|
||||
w.lastErrMsg = string(bytes.TrimSpace(w.buf[:maxStatusLen]))
|
||||
w.discarding = true
|
||||
}
|
||||
w.buf = w.buf[:0]
|
||||
}
|
||||
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (w *statusWriter) getLastErr() string {
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
return w.lastErrMsg
|
||||
}
|
||||
|
||||
// NewClient prepares a new MLX runner client for LLM models.
|
||||
// The subprocess is not started until Load() is called.
|
||||
func NewClient(modelName string) (*Client, error) {
|
||||
@@ -53,7 +109,7 @@ func NewClient(modelName string) (*Client, error) {
|
||||
|
||||
c := &Client{
|
||||
modelName: modelName,
|
||||
done: make(chan error, 1),
|
||||
done: make(chan struct{}),
|
||||
client: &http.Client{Timeout: 10 * time.Minute},
|
||||
}
|
||||
|
||||
@@ -66,12 +122,6 @@ func NewClient(modelName string) (*Client, error) {
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *Client) getLastErr() string {
|
||||
c.lastErrLock.Lock()
|
||||
defer c.lastErrLock.Unlock()
|
||||
return c.lastErr
|
||||
}
|
||||
|
||||
// WaitUntilRunning waits for the subprocess to be ready.
|
||||
func (c *Client) WaitUntilRunning(ctx context.Context) error {
|
||||
timeout := time.After(2 * time.Minute)
|
||||
@@ -82,16 +132,14 @@ func (c *Client) WaitUntilRunning(ctx context.Context) error {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case err := <-c.done:
|
||||
errMsg := c.getLastErr()
|
||||
if errMsg != "" {
|
||||
return fmt.Errorf("mlx runner failed: %s (exit: %v)", errMsg, err)
|
||||
case <-c.done:
|
||||
if msg := c.status.getLastErr(); msg != "" {
|
||||
return fmt.Errorf("mlx runner failed: %s (exit: %v)", msg, c.doneErr)
|
||||
}
|
||||
return fmt.Errorf("mlx runner exited unexpectedly: %w", err)
|
||||
return fmt.Errorf("mlx runner exited unexpectedly: %w", c.doneErr)
|
||||
case <-timeout:
|
||||
errMsg := c.getLastErr()
|
||||
if errMsg != "" {
|
||||
return fmt.Errorf("timeout waiting for mlx runner: %s", errMsg)
|
||||
if msg := c.status.getLastErr(); msg != "" {
|
||||
return fmt.Errorf("timeout waiting for mlx runner: %s", msg)
|
||||
}
|
||||
return errors.New("timeout waiting for mlx runner to start")
|
||||
case <-ticker.C:
|
||||
@@ -182,6 +230,9 @@ func (c *Client) Completion(ctx context.Context, req llm.CompletionRequest, fn f
|
||||
|
||||
resp, err := c.client.Do(httpReq)
|
||||
if err != nil {
|
||||
if errMsg := c.status.getLastErr(); errMsg != "" {
|
||||
return fmt.Errorf("mlx runner failed: %s", errMsg)
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
@@ -219,7 +270,13 @@ func (c *Client) Completion(ctx context.Context, req llm.CompletionRequest, fn f
|
||||
}
|
||||
}
|
||||
|
||||
return scanner.Err()
|
||||
if err := scanner.Err(); err != nil {
|
||||
if errMsg := c.status.getLastErr(); errMsg != "" {
|
||||
return fmt.Errorf("mlx runner failed: %s", errMsg)
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Client) ContextLength() int {
|
||||
@@ -348,18 +405,13 @@ func (c *Client) Load(ctx context.Context, _ ml.SystemInfo, gpus []ml.DeviceInfo
|
||||
// Forward subprocess stdout/stderr to server logs
|
||||
stdout, _ := cmd.StdoutPipe()
|
||||
stderr, _ := cmd.StderrPipe()
|
||||
status := &statusWriter{out: os.Stderr}
|
||||
c.status = status
|
||||
go func() {
|
||||
io.Copy(os.Stderr, stdout) //nolint:errcheck
|
||||
}()
|
||||
go func() {
|
||||
scanner := bufio.NewScanner(stderr)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
fmt.Fprintln(os.Stderr, line)
|
||||
c.lastErrLock.Lock()
|
||||
c.lastErr = line
|
||||
c.lastErrLock.Unlock()
|
||||
}
|
||||
io.Copy(status, stderr) //nolint:errcheck
|
||||
}()
|
||||
|
||||
slog.Info("starting mlx runner subprocess", "model", c.modelName, "port", c.port)
|
||||
@@ -369,8 +421,8 @@ func (c *Client) Load(ctx context.Context, _ ml.SystemInfo, gpus []ml.DeviceInfo
|
||||
|
||||
// Reap subprocess when it exits
|
||||
go func() {
|
||||
err := cmd.Wait()
|
||||
c.done <- err
|
||||
c.doneErr = cmd.Wait()
|
||||
close(c.done)
|
||||
}()
|
||||
|
||||
return nil, nil
|
||||
|
||||
@@ -15,7 +15,9 @@ set(CMAKE_INSTALL_RPATH "@loader_path")
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
set(MLX_C_GIT_TAG "v0.5.0" CACHE STRING "")
|
||||
# Read MLX-C version from top-level file (shared with imagegen CMakeLists)
|
||||
file(READ "${CMAKE_SOURCE_DIR}/MLX_C_VERSION" MLX_C_GIT_TAG)
|
||||
string(STRIP "${MLX_C_GIT_TAG}" MLX_C_GIT_TAG)
|
||||
|
||||
FetchContent_Declare(
|
||||
mlx-c
|
||||
|
||||
@@ -137,6 +137,9 @@ func Unpin(s ...*Array) {
|
||||
for _, t := range s {
|
||||
if t != nil {
|
||||
t.pinned--
|
||||
if t.pinned < 0 {
|
||||
panic(fmt.Sprintf("mlx.Unpin: negative pin count on array %q", t.name))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -259,9 +262,11 @@ func LogArrays() {
|
||||
return arrays[i].NumBytes() > arrays[j].NumBytes()
|
||||
})
|
||||
|
||||
var total int
|
||||
for _, t := range arrays {
|
||||
nb := t.NumBytes()
|
||||
logutil.Trace(fmt.Sprintf("tensor %-60s %5s %5s %v", t.name, t.DType(), PrettyBytes(nb), t.Dims()))
|
||||
total += nb
|
||||
logutil.Trace(fmt.Sprintf("tensor %-60s %5s %5s pinned=%d %v", t.name, t.DType(), PrettyBytes(nb), t.pinned, t.Dims()))
|
||||
}
|
||||
logutil.Trace(fmt.Sprintf("tensors total: %d, size: %s", len(arrays), PrettyBytes(ActiveMemory())))
|
||||
logutil.Trace(fmt.Sprintf("tensors total: %d, size: %s, active: %s", len(arrays), PrettyBytes(total), PrettyBytes(ActiveMemory())))
|
||||
}
|
||||
|
||||
@@ -13,6 +13,10 @@ var (
|
||||
gatedDeltaMetalKernelOnce sync.Once
|
||||
gatedDeltaMetalKernel C.mlx_fast_metal_kernel
|
||||
gatedDeltaMetalDisabled bool
|
||||
|
||||
gatedDeltaCUDAKernelOnce sync.Once
|
||||
gatedDeltaCUDAKernel C.mlx_fast_cuda_kernel
|
||||
gatedDeltaCUDADisabled bool
|
||||
)
|
||||
|
||||
const gatedDeltaMetalKernelSource = `
|
||||
@@ -83,6 +87,86 @@ for (int i = 0; i < n_per_t; ++i) {
|
||||
}
|
||||
`
|
||||
|
||||
const gatedDeltaCUDAKernelSource = `
|
||||
auto tid_x = threadIdx.x;
|
||||
auto tid_y = threadIdx.y;
|
||||
auto grid_y = blockIdx.y * blockDim.y + tid_y;
|
||||
auto grid_z = blockIdx.z;
|
||||
|
||||
int T_val = static_cast<int>(*T);
|
||||
|
||||
auto n = grid_z;
|
||||
auto b_idx = n / Hv;
|
||||
auto hv_idx = n % Hv;
|
||||
auto hk_idx = hv_idx / (Hv / Hk);
|
||||
constexpr int n_per_t = Dk / 32;
|
||||
|
||||
// q, k: [B, T, Hk, Dk]
|
||||
auto q_ = q + b_idx * T_val * Hk * Dk + hk_idx * Dk;
|
||||
auto k_ = k + b_idx * T_val * Hk * Dk + hk_idx * Dk;
|
||||
|
||||
// v, y: [B, T, Hv, Dv]
|
||||
auto dv_idx = grid_y;
|
||||
auto v_ = v + b_idx * T_val * Hv * Dv + hv_idx * Dv;
|
||||
y += b_idx * T_val * Hv * Dv + hv_idx * Dv;
|
||||
|
||||
auto dk_idx = tid_x;
|
||||
|
||||
// state_in, state_out: [B, Hv, Dv, Dk]
|
||||
auto i_state = state_in + (n * Dv + dv_idx) * Dk;
|
||||
auto o_state = state_out + (n * Dv + dv_idx) * Dk;
|
||||
|
||||
float state[n_per_t];
|
||||
for (int i = 0; i < n_per_t; ++i) {
|
||||
auto s_idx = n_per_t * dk_idx + i;
|
||||
state[i] = static_cast<float>(i_state[s_idx]);
|
||||
}
|
||||
|
||||
// g: [B, T, Hv]
|
||||
auto g_ = g + b_idx * T_val * Hv;
|
||||
auto beta_ = beta + b_idx * T_val * Hv;
|
||||
|
||||
for (int t = 0; t < T_val; ++t) {
|
||||
float kv_mem = 0.0f;
|
||||
for (int i = 0; i < n_per_t; ++i) {
|
||||
auto s_idx = n_per_t * dk_idx + i;
|
||||
state[i] = state[i] * static_cast<float>(g_[hv_idx]);
|
||||
kv_mem += state[i] * static_cast<float>(k_[s_idx]);
|
||||
}
|
||||
// Warp reduction (full warp, 32 threads in x)
|
||||
for (int offset = 16; offset > 0; offset >>= 1)
|
||||
kv_mem += __shfl_down_sync(0xffffffff, kv_mem, offset);
|
||||
kv_mem = __shfl_sync(0xffffffff, kv_mem, 0);
|
||||
|
||||
auto delta = (static_cast<float>(v_[dv_idx]) - kv_mem) * static_cast<float>(beta_[hv_idx]);
|
||||
|
||||
float out = 0.0f;
|
||||
for (int i = 0; i < n_per_t; ++i) {
|
||||
auto s_idx = n_per_t * dk_idx + i;
|
||||
state[i] = state[i] + static_cast<float>(k_[s_idx]) * delta;
|
||||
out += state[i] * static_cast<float>(q_[s_idx]);
|
||||
}
|
||||
// Warp reduction
|
||||
for (int offset = 16; offset > 0; offset >>= 1)
|
||||
out += __shfl_down_sync(0xffffffff, out, offset);
|
||||
if (tid_x == 0) {
|
||||
y[dv_idx] = static_cast<InT>(out);
|
||||
}
|
||||
|
||||
q_ += Hk * Dk;
|
||||
k_ += Hk * Dk;
|
||||
v_ += Hv * Dv;
|
||||
y += Hv * Dv;
|
||||
g_ += Hv;
|
||||
beta_ += Hv;
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_per_t; ++i) {
|
||||
auto s_idx = n_per_t * dk_idx + i;
|
||||
o_state[s_idx] = static_cast<InT>(state[i]);
|
||||
}
|
||||
`
|
||||
|
||||
func cStringVector(values []string) (C.mlx_vector_string, func(), bool) {
|
||||
vec := C.mlx_vector_string_new()
|
||||
ok := true
|
||||
@@ -352,11 +436,184 @@ func gatedDeltaFallback(q, k, v, g, beta, state *Array) (y, nextState *Array) {
|
||||
return Concatenate(outs, 1), nextState
|
||||
}
|
||||
|
||||
func initGatedDeltaCUDAKernel() {
|
||||
var cudaAvail C.bool
|
||||
if C.mlx_cuda_is_available(&cudaAvail) != 0 || !bool(cudaAvail) {
|
||||
gatedDeltaCUDADisabled = true
|
||||
return
|
||||
}
|
||||
|
||||
inputs, freeInputs, ok := cStringVector([]string{"q", "k", "v", "g", "beta", "state_in", "T"})
|
||||
if !ok {
|
||||
gatedDeltaCUDADisabled = true
|
||||
freeInputs()
|
||||
return
|
||||
}
|
||||
defer freeInputs()
|
||||
|
||||
outputs, freeOutputs, ok := cStringVector([]string{"y", "state_out"})
|
||||
if !ok {
|
||||
gatedDeltaCUDADisabled = true
|
||||
freeOutputs()
|
||||
return
|
||||
}
|
||||
defer freeOutputs()
|
||||
|
||||
cName := C.CString("gated_delta_step")
|
||||
defer C.free(unsafe.Pointer(cName))
|
||||
cSource := C.CString(gatedDeltaCUDAKernelSource)
|
||||
defer C.free(unsafe.Pointer(cSource))
|
||||
cHeader := C.CString("")
|
||||
defer C.free(unsafe.Pointer(cHeader))
|
||||
|
||||
gatedDeltaCUDAKernel = C.mlx_fast_cuda_kernel_new(
|
||||
cName,
|
||||
inputs,
|
||||
outputs,
|
||||
cSource,
|
||||
cHeader,
|
||||
C.bool(true),
|
||||
C.int(0),
|
||||
)
|
||||
}
|
||||
|
||||
func gatedDeltaCUDAKernelApply(q, k, v, g, beta, state *Array) (y, nextState *Array, ok bool) {
|
||||
if gatedDeltaCUDADisabled {
|
||||
return nil, nil, false
|
||||
}
|
||||
if q == nil || k == nil || v == nil || g == nil || beta == nil || state == nil {
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
qd := q.Dims()
|
||||
kd := k.Dims()
|
||||
vd := v.Dims()
|
||||
gd := g.Dims()
|
||||
bd := beta.Dims()
|
||||
sd := state.Dims()
|
||||
if len(qd) != 4 || len(kd) != 4 || len(vd) != 4 || len(gd) != 3 || len(bd) != 3 || len(sd) != 4 {
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
B, T, Hk, Dk := qd[0], qd[1], qd[2], qd[3]
|
||||
if T <= 0 || Hk <= 0 || Dk <= 0 || Dk%32 != 0 {
|
||||
return nil, nil, false
|
||||
}
|
||||
if kd[0] != B || kd[1] != T || kd[2] != Hk || kd[3] != Dk {
|
||||
return nil, nil, false
|
||||
}
|
||||
Hv, Dv := vd[2], vd[3]
|
||||
if vd[0] != B || vd[1] != T || Hv <= 0 || Dv <= 0 || Hv%Hk != 0 {
|
||||
return nil, nil, false
|
||||
}
|
||||
if gd[0] != B || gd[1] != T || gd[2] != Hv {
|
||||
return nil, nil, false
|
||||
}
|
||||
if bd[0] != B || bd[1] != T || bd[2] != Hv {
|
||||
return nil, nil, false
|
||||
}
|
||||
if sd[0] != B || sd[1] != Hv || sd[2] != Dv || sd[3] != Dk {
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
dtype := q.DType()
|
||||
if k.DType() != dtype || v.DType() != dtype || g.DType() != dtype || beta.DType() != dtype || state.DType() != dtype {
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
gatedDeltaCUDAKernelOnce.Do(initGatedDeltaCUDAKernel)
|
||||
if gatedDeltaCUDADisabled {
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
cfg := C.mlx_fast_cuda_kernel_config_new()
|
||||
defer C.mlx_fast_cuda_kernel_config_free(cfg)
|
||||
|
||||
cInT := C.CString("InT")
|
||||
defer C.free(unsafe.Pointer(cInT))
|
||||
if C.mlx_fast_cuda_kernel_config_add_template_arg_dtype(cfg, cInT, C.mlx_dtype(dtype)) != 0 {
|
||||
gatedDeltaCUDADisabled = true
|
||||
return nil, nil, false
|
||||
}
|
||||
for _, tpl := range []struct {
|
||||
name string
|
||||
value int
|
||||
}{
|
||||
{name: "Dk", value: Dk},
|
||||
{name: "Dv", value: Dv},
|
||||
{name: "Hk", value: Hk},
|
||||
{name: "Hv", value: Hv},
|
||||
} {
|
||||
cn := C.CString(tpl.name)
|
||||
rc := C.mlx_fast_cuda_kernel_config_add_template_arg_int(cfg, cn, C.int(tpl.value))
|
||||
C.free(unsafe.Pointer(cn))
|
||||
if rc != 0 {
|
||||
gatedDeltaCUDADisabled = true
|
||||
return nil, nil, false
|
||||
}
|
||||
}
|
||||
|
||||
yShape := []C.int{C.int(B), C.int(T), C.int(Hv), C.int(Dv)}
|
||||
stateShape := []C.int{C.int(B), C.int(Hv), C.int(Dv), C.int(Dk)}
|
||||
if C.mlx_fast_cuda_kernel_config_add_output_arg(cfg, unsafe.SliceData(yShape), C.size_t(len(yShape)), C.mlx_dtype(dtype)) != 0 {
|
||||
gatedDeltaCUDADisabled = true
|
||||
return nil, nil, false
|
||||
}
|
||||
if C.mlx_fast_cuda_kernel_config_add_output_arg(cfg, unsafe.SliceData(stateShape), C.size_t(len(stateShape)), C.mlx_dtype(dtype)) != 0 {
|
||||
gatedDeltaCUDADisabled = true
|
||||
return nil, nil, false
|
||||
}
|
||||
if C.mlx_fast_cuda_kernel_config_set_grid(cfg, 32, C.int(Dv), C.int(B*Hv)) != 0 {
|
||||
gatedDeltaCUDADisabled = true
|
||||
return nil, nil, false
|
||||
}
|
||||
threadY := Dv
|
||||
if threadY > 4 {
|
||||
threadY = 4
|
||||
}
|
||||
if C.mlx_fast_cuda_kernel_config_set_thread_group(cfg, 32, C.int(threadY), 1) != 0 {
|
||||
gatedDeltaCUDADisabled = true
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
tScalar := FromValue(T)
|
||||
inputs := []C.mlx_array{
|
||||
q.ctx,
|
||||
k.ctx,
|
||||
v.ctx,
|
||||
g.ctx,
|
||||
beta.ctx,
|
||||
state.ctx,
|
||||
tScalar.ctx,
|
||||
}
|
||||
inVec := C.mlx_vector_array_new_data(unsafe.SliceData(inputs), C.size_t(len(inputs)))
|
||||
defer C.mlx_vector_array_free(inVec)
|
||||
|
||||
outVec := C.mlx_vector_array_new()
|
||||
defer C.mlx_vector_array_free(outVec)
|
||||
if C.mlx_fast_cuda_kernel_apply(&outVec, gatedDeltaCUDAKernel, inVec, cfg, DefaultStream().ctx) != 0 {
|
||||
gatedDeltaCUDADisabled = true
|
||||
return nil, nil, false
|
||||
}
|
||||
if int(C.mlx_vector_array_size(outVec)) < 2 {
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
y = New("GATED_DELTA_CUDA_Y")
|
||||
nextState = New("GATED_DELTA_CUDA_STATE")
|
||||
C.mlx_vector_array_get(&y.ctx, outVec, 0)
|
||||
C.mlx_vector_array_get(&nextState.ctx, outVec, 1)
|
||||
return y, nextState, true
|
||||
}
|
||||
|
||||
// GatedDelta runs the recurrent update operation.
|
||||
//
|
||||
// It uses the fused Metal kernel when available and otherwise falls back to a
|
||||
// It tries the fused CUDA kernel first, then Metal, then falls back to a
|
||||
// backend-agnostic MLX implementation with identical inputs/outputs.
|
||||
func GatedDelta(q, k, v, g, beta, state *Array) (y, nextState *Array) {
|
||||
if y, nextState, ok := gatedDeltaCUDAKernelApply(q, k, v, g, beta, state); ok {
|
||||
return y, nextState
|
||||
}
|
||||
if y, nextState, ok := gatedDeltaKernel(q, k, v, g, beta, state); ok {
|
||||
return y, nextState
|
||||
}
|
||||
|
||||
@@ -326,8 +326,10 @@ int (*mlx_distributed_sum_scatter_)(
|
||||
int (*mlx_distributed_group_rank_)(mlx_distributed_group group) = NULL;
|
||||
int (*mlx_distributed_group_size_)(mlx_distributed_group group) = NULL;
|
||||
mlx_distributed_group (*mlx_distributed_group_split_)(mlx_distributed_group group, int color, int key) = NULL;
|
||||
bool (*mlx_distributed_is_available_)(void) = NULL;
|
||||
mlx_distributed_group (*mlx_distributed_init_)(bool strict) = NULL;
|
||||
bool (*mlx_distributed_is_available_)(const char* bk /* may be null */) = NULL;
|
||||
mlx_distributed_group (*mlx_distributed_init_)(
|
||||
bool strict,
|
||||
const char* bk /* may be null */) = NULL;
|
||||
void (*mlx_set_error_handler_)(
|
||||
mlx_error_handler_func handler,
|
||||
void* data,
|
||||
@@ -924,6 +926,7 @@ int (*mlx_astype_)(
|
||||
int (*mlx_atleast_1d_)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
|
||||
int (*mlx_atleast_2d_)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
|
||||
int (*mlx_atleast_3d_)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
|
||||
int (*mlx_bartlett_)(mlx_array* res, int M, const mlx_stream s) = NULL;
|
||||
int (*mlx_bitwise_and_)(
|
||||
mlx_array* res,
|
||||
const mlx_array a,
|
||||
@@ -940,6 +943,7 @@ int (*mlx_bitwise_xor_)(
|
||||
const mlx_array a,
|
||||
const mlx_array b,
|
||||
const mlx_stream s) = NULL;
|
||||
int (*mlx_blackman_)(mlx_array* res, int M, const mlx_stream s) = NULL;
|
||||
int (*mlx_block_masked_mm_)(
|
||||
mlx_array* res,
|
||||
const mlx_array a,
|
||||
@@ -1120,6 +1124,7 @@ int (*mlx_dequantize_)(
|
||||
mlx_optional_int group_size,
|
||||
mlx_optional_int bits,
|
||||
const char* mode,
|
||||
const mlx_array global_scale /* may be null */,
|
||||
mlx_optional_dtype dtype,
|
||||
const mlx_stream s) = NULL;
|
||||
int (*mlx_diag_)(mlx_array* res, const mlx_array a, int k, const mlx_stream s) = NULL;
|
||||
@@ -1256,6 +1261,8 @@ int (*mlx_hadamard_transform_)(
|
||||
const mlx_array a,
|
||||
mlx_optional_float scale,
|
||||
const mlx_stream s) = NULL;
|
||||
int (*mlx_hamming_)(mlx_array* res, int M, const mlx_stream s) = NULL;
|
||||
int (*mlx_hanning_)(mlx_array* res, int M, const mlx_stream s) = NULL;
|
||||
int (*mlx_identity_)(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s) = NULL;
|
||||
int (*mlx_imag_)(mlx_array* res, const mlx_array a, const mlx_stream s) = NULL;
|
||||
int (*mlx_inner_)(
|
||||
@@ -1548,6 +1555,8 @@ int (*mlx_qqmm_)(
|
||||
mlx_optional_int group_size,
|
||||
mlx_optional_int bits,
|
||||
const char* mode,
|
||||
const mlx_array global_scale_x /* may be null */,
|
||||
const mlx_array global_scale_w /* may be null */,
|
||||
const mlx_stream s) = NULL;
|
||||
int (*mlx_quantize_)(
|
||||
mlx_vector_array* res,
|
||||
@@ -1555,6 +1564,7 @@ int (*mlx_quantize_)(
|
||||
mlx_optional_int group_size,
|
||||
mlx_optional_int bits,
|
||||
const char* mode,
|
||||
const mlx_array global_scale /* may be null */,
|
||||
const mlx_stream s) = NULL;
|
||||
int (*mlx_quantized_matmul_)(
|
||||
mlx_array* res,
|
||||
@@ -2550,10 +2560,12 @@ int mlx_dynamic_load_symbols(mlx_dynamic_handle handle) {
|
||||
CHECK_LOAD(handle, mlx_atleast_1d);
|
||||
CHECK_LOAD(handle, mlx_atleast_2d);
|
||||
CHECK_LOAD(handle, mlx_atleast_3d);
|
||||
CHECK_LOAD(handle, mlx_bartlett);
|
||||
CHECK_LOAD(handle, mlx_bitwise_and);
|
||||
CHECK_LOAD(handle, mlx_bitwise_invert);
|
||||
CHECK_LOAD(handle, mlx_bitwise_or);
|
||||
CHECK_LOAD(handle, mlx_bitwise_xor);
|
||||
CHECK_LOAD(handle, mlx_blackman);
|
||||
CHECK_LOAD(handle, mlx_block_masked_mm);
|
||||
CHECK_LOAD(handle, mlx_broadcast_arrays);
|
||||
CHECK_LOAD(handle, mlx_broadcast_to);
|
||||
@@ -2606,6 +2618,8 @@ int mlx_dynamic_load_symbols(mlx_dynamic_handle handle) {
|
||||
CHECK_LOAD(handle, mlx_greater);
|
||||
CHECK_LOAD(handle, mlx_greater_equal);
|
||||
CHECK_LOAD(handle, mlx_hadamard_transform);
|
||||
CHECK_LOAD(handle, mlx_hamming);
|
||||
CHECK_LOAD(handle, mlx_hanning);
|
||||
CHECK_LOAD(handle, mlx_identity);
|
||||
CHECK_LOAD(handle, mlx_imag);
|
||||
CHECK_LOAD(handle, mlx_inner);
|
||||
|
||||
@@ -300,10 +300,12 @@
|
||||
#define mlx_atleast_1d mlx_atleast_1d_mlx_gen_orig_
|
||||
#define mlx_atleast_2d mlx_atleast_2d_mlx_gen_orig_
|
||||
#define mlx_atleast_3d mlx_atleast_3d_mlx_gen_orig_
|
||||
#define mlx_bartlett mlx_bartlett_mlx_gen_orig_
|
||||
#define mlx_bitwise_and mlx_bitwise_and_mlx_gen_orig_
|
||||
#define mlx_bitwise_invert mlx_bitwise_invert_mlx_gen_orig_
|
||||
#define mlx_bitwise_or mlx_bitwise_or_mlx_gen_orig_
|
||||
#define mlx_bitwise_xor mlx_bitwise_xor_mlx_gen_orig_
|
||||
#define mlx_blackman mlx_blackman_mlx_gen_orig_
|
||||
#define mlx_block_masked_mm mlx_block_masked_mm_mlx_gen_orig_
|
||||
#define mlx_broadcast_arrays mlx_broadcast_arrays_mlx_gen_orig_
|
||||
#define mlx_broadcast_to mlx_broadcast_to_mlx_gen_orig_
|
||||
@@ -356,6 +358,8 @@
|
||||
#define mlx_greater mlx_greater_mlx_gen_orig_
|
||||
#define mlx_greater_equal mlx_greater_equal_mlx_gen_orig_
|
||||
#define mlx_hadamard_transform mlx_hadamard_transform_mlx_gen_orig_
|
||||
#define mlx_hamming mlx_hamming_mlx_gen_orig_
|
||||
#define mlx_hanning mlx_hanning_mlx_gen_orig_
|
||||
#define mlx_identity mlx_identity_mlx_gen_orig_
|
||||
#define mlx_imag mlx_imag_mlx_gen_orig_
|
||||
#define mlx_inner mlx_inner_mlx_gen_orig_
|
||||
@@ -889,10 +893,12 @@
|
||||
#undef mlx_atleast_1d
|
||||
#undef mlx_atleast_2d
|
||||
#undef mlx_atleast_3d
|
||||
#undef mlx_bartlett
|
||||
#undef mlx_bitwise_and
|
||||
#undef mlx_bitwise_invert
|
||||
#undef mlx_bitwise_or
|
||||
#undef mlx_bitwise_xor
|
||||
#undef mlx_blackman
|
||||
#undef mlx_block_masked_mm
|
||||
#undef mlx_broadcast_arrays
|
||||
#undef mlx_broadcast_to
|
||||
@@ -945,6 +951,8 @@
|
||||
#undef mlx_greater
|
||||
#undef mlx_greater_equal
|
||||
#undef mlx_hadamard_transform
|
||||
#undef mlx_hamming
|
||||
#undef mlx_hanning
|
||||
#undef mlx_identity
|
||||
#undef mlx_imag
|
||||
#undef mlx_inner
|
||||
@@ -1501,8 +1509,10 @@ extern int (*mlx_distributed_sum_scatter_)(
|
||||
extern int (*mlx_distributed_group_rank_)(mlx_distributed_group group);
|
||||
extern int (*mlx_distributed_group_size_)(mlx_distributed_group group);
|
||||
extern mlx_distributed_group (*mlx_distributed_group_split_)(mlx_distributed_group group, int color, int key);
|
||||
extern bool (*mlx_distributed_is_available_)(void);
|
||||
extern mlx_distributed_group (*mlx_distributed_init_)(bool strict);
|
||||
extern bool (*mlx_distributed_is_available_)(const char* bk /* may be null */);
|
||||
extern mlx_distributed_group (*mlx_distributed_init_)(
|
||||
bool strict,
|
||||
const char* bk /* may be null */);
|
||||
extern void (*mlx_set_error_handler_)(
|
||||
mlx_error_handler_func handler,
|
||||
void* data,
|
||||
@@ -2099,6 +2109,7 @@ extern int (*mlx_astype_)(
|
||||
extern int (*mlx_atleast_1d_)(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
extern int (*mlx_atleast_2d_)(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
extern int (*mlx_atleast_3d_)(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
extern int (*mlx_bartlett_)(mlx_array* res, int M, const mlx_stream s);
|
||||
extern int (*mlx_bitwise_and_)(
|
||||
mlx_array* res,
|
||||
const mlx_array a,
|
||||
@@ -2115,6 +2126,7 @@ extern int (*mlx_bitwise_xor_)(
|
||||
const mlx_array a,
|
||||
const mlx_array b,
|
||||
const mlx_stream s);
|
||||
extern int (*mlx_blackman_)(mlx_array* res, int M, const mlx_stream s);
|
||||
extern int (*mlx_block_masked_mm_)(
|
||||
mlx_array* res,
|
||||
const mlx_array a,
|
||||
@@ -2295,6 +2307,7 @@ extern int (*mlx_dequantize_)(
|
||||
mlx_optional_int group_size,
|
||||
mlx_optional_int bits,
|
||||
const char* mode,
|
||||
const mlx_array global_scale /* may be null */,
|
||||
mlx_optional_dtype dtype,
|
||||
const mlx_stream s);
|
||||
extern int (*mlx_diag_)(mlx_array* res, const mlx_array a, int k, const mlx_stream s);
|
||||
@@ -2431,6 +2444,8 @@ extern int (*mlx_hadamard_transform_)(
|
||||
const mlx_array a,
|
||||
mlx_optional_float scale,
|
||||
const mlx_stream s);
|
||||
extern int (*mlx_hamming_)(mlx_array* res, int M, const mlx_stream s);
|
||||
extern int (*mlx_hanning_)(mlx_array* res, int M, const mlx_stream s);
|
||||
extern int (*mlx_identity_)(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s);
|
||||
extern int (*mlx_imag_)(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
extern int (*mlx_inner_)(
|
||||
@@ -2723,6 +2738,8 @@ extern int (*mlx_qqmm_)(
|
||||
mlx_optional_int group_size,
|
||||
mlx_optional_int bits,
|
||||
const char* mode,
|
||||
const mlx_array global_scale_x /* may be null */,
|
||||
const mlx_array global_scale_w /* may be null */,
|
||||
const mlx_stream s);
|
||||
extern int (*mlx_quantize_)(
|
||||
mlx_vector_array* res,
|
||||
@@ -2730,6 +2747,7 @@ extern int (*mlx_quantize_)(
|
||||
mlx_optional_int group_size,
|
||||
mlx_optional_int bits,
|
||||
const char* mode,
|
||||
const mlx_array global_scale /* may be null */,
|
||||
const mlx_stream s);
|
||||
extern int (*mlx_quantized_matmul_)(
|
||||
mlx_array* res,
|
||||
@@ -4033,11 +4051,13 @@ static inline int mlx_distributed_group_size(mlx_distributed_group group) {
|
||||
static inline mlx_distributed_group mlx_distributed_group_split(mlx_distributed_group group, int color, int key) {
|
||||
return mlx_distributed_group_split_(group, color, key);
|
||||
}
|
||||
static inline bool mlx_distributed_is_available(void) {
|
||||
return mlx_distributed_is_available_();
|
||||
static inline bool mlx_distributed_is_available(const char* bk /* may be null */) {
|
||||
return mlx_distributed_is_available_(bk);
|
||||
}
|
||||
static inline mlx_distributed_group mlx_distributed_init(bool strict) {
|
||||
return mlx_distributed_init_(strict);
|
||||
static inline mlx_distributed_group mlx_distributed_init(
|
||||
bool strict,
|
||||
const char* bk /* may be null */) {
|
||||
return mlx_distributed_init_(strict, bk);
|
||||
}
|
||||
static inline void mlx_set_error_handler(
|
||||
mlx_error_handler_func handler,
|
||||
@@ -4939,6 +4959,9 @@ static inline int mlx_atleast_2d(mlx_array* res, const mlx_array a, const mlx_st
|
||||
static inline int mlx_atleast_3d(mlx_array* res, const mlx_array a, const mlx_stream s) {
|
||||
return mlx_atleast_3d_(res, a, s);
|
||||
}
|
||||
static inline int mlx_bartlett(mlx_array* res, int M, const mlx_stream s) {
|
||||
return mlx_bartlett_(res, M, s);
|
||||
}
|
||||
static inline int mlx_bitwise_and(
|
||||
mlx_array* res,
|
||||
const mlx_array a,
|
||||
@@ -4963,6 +4986,9 @@ static inline int mlx_bitwise_xor(
|
||||
const mlx_stream s) {
|
||||
return mlx_bitwise_xor_(res, a, b, s);
|
||||
}
|
||||
static inline int mlx_blackman(mlx_array* res, int M, const mlx_stream s) {
|
||||
return mlx_blackman_(res, M, s);
|
||||
}
|
||||
static inline int mlx_block_masked_mm(
|
||||
mlx_array* res,
|
||||
const mlx_array a,
|
||||
@@ -5193,9 +5219,10 @@ static inline int mlx_dequantize(
|
||||
mlx_optional_int group_size,
|
||||
mlx_optional_int bits,
|
||||
const char* mode,
|
||||
const mlx_array global_scale /* may be null */,
|
||||
mlx_optional_dtype dtype,
|
||||
const mlx_stream s) {
|
||||
return mlx_dequantize_(res, w, scales, biases, group_size, bits, mode, dtype, s);
|
||||
return mlx_dequantize_(res, w, scales, biases, group_size, bits, mode, global_scale, dtype, s);
|
||||
}
|
||||
static inline int mlx_diag(mlx_array* res, const mlx_array a, int k, const mlx_stream s) {
|
||||
return mlx_diag_(res, a, k, s);
|
||||
@@ -5383,6 +5410,12 @@ static inline int mlx_hadamard_transform(
|
||||
const mlx_stream s) {
|
||||
return mlx_hadamard_transform_(res, a, scale, s);
|
||||
}
|
||||
static inline int mlx_hamming(mlx_array* res, int M, const mlx_stream s) {
|
||||
return mlx_hamming_(res, M, s);
|
||||
}
|
||||
static inline int mlx_hanning(mlx_array* res, int M, const mlx_stream s) {
|
||||
return mlx_hanning_(res, M, s);
|
||||
}
|
||||
static inline int mlx_identity(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s) {
|
||||
return mlx_identity_(res, n, dtype, s);
|
||||
}
|
||||
@@ -5793,8 +5826,10 @@ static inline int mlx_qqmm(
|
||||
mlx_optional_int group_size,
|
||||
mlx_optional_int bits,
|
||||
const char* mode,
|
||||
const mlx_array global_scale_x /* may be null */,
|
||||
const mlx_array global_scale_w /* may be null */,
|
||||
const mlx_stream s) {
|
||||
return mlx_qqmm_(res, x, w, w_scales, group_size, bits, mode, s);
|
||||
return mlx_qqmm_(res, x, w, w_scales, group_size, bits, mode, global_scale_x, global_scale_w, s);
|
||||
}
|
||||
static inline int mlx_quantize(
|
||||
mlx_vector_array* res,
|
||||
@@ -5802,8 +5837,9 @@ static inline int mlx_quantize(
|
||||
mlx_optional_int group_size,
|
||||
mlx_optional_int bits,
|
||||
const char* mode,
|
||||
const mlx_array global_scale /* may be null */,
|
||||
const mlx_stream s) {
|
||||
return mlx_quantize_(res, w, group_size, bits, mode, s);
|
||||
return mlx_quantize_(res, w, group_size, bits, mode, global_scale, s);
|
||||
}
|
||||
static inline int mlx_quantized_matmul(
|
||||
mlx_array* res,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Vendored MLX-C Headers
|
||||
|
||||
These header files are vendored from [mlx-c](https://github.com/ml-explore/mlx-c).
|
||||
The pinned version is in `MLX_VERSION` at the repo root.
|
||||
The pinned version is in `MLX_C_VERSION` at the repo root.
|
||||
|
||||
Headers are automatically refreshed when you run a CMake build:
|
||||
|
||||
|
||||
@@ -42,12 +42,14 @@ mlx_distributed_group_split(mlx_distributed_group group, int color, int key);
|
||||
/**
|
||||
* Check if distributed is available.
|
||||
*/
|
||||
bool mlx_distributed_is_available(void);
|
||||
bool mlx_distributed_is_available(const char* bk /* may be null */);
|
||||
|
||||
/**
|
||||
* Initialize distributed.
|
||||
*/
|
||||
mlx_distributed_group mlx_distributed_init(bool strict);
|
||||
mlx_distributed_group mlx_distributed_init(
|
||||
bool strict,
|
||||
const char* bk /* may be null */);
|
||||
|
||||
/**@}*/
|
||||
|
||||
|
||||
@@ -166,6 +166,7 @@ int mlx_astype(
|
||||
int mlx_atleast_1d(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
int mlx_atleast_2d(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
int mlx_atleast_3d(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
int mlx_bartlett(mlx_array* res, int M, const mlx_stream s);
|
||||
int mlx_bitwise_and(
|
||||
mlx_array* res,
|
||||
const mlx_array a,
|
||||
@@ -182,6 +183,7 @@ int mlx_bitwise_xor(
|
||||
const mlx_array a,
|
||||
const mlx_array b,
|
||||
const mlx_stream s);
|
||||
int mlx_blackman(mlx_array* res, int M, const mlx_stream s);
|
||||
int mlx_block_masked_mm(
|
||||
mlx_array* res,
|
||||
const mlx_array a,
|
||||
@@ -362,6 +364,7 @@ int mlx_dequantize(
|
||||
mlx_optional_int group_size,
|
||||
mlx_optional_int bits,
|
||||
const char* mode,
|
||||
const mlx_array global_scale /* may be null */,
|
||||
mlx_optional_dtype dtype,
|
||||
const mlx_stream s);
|
||||
int mlx_diag(mlx_array* res, const mlx_array a, int k, const mlx_stream s);
|
||||
@@ -498,6 +501,8 @@ int mlx_hadamard_transform(
|
||||
const mlx_array a,
|
||||
mlx_optional_float scale,
|
||||
const mlx_stream s);
|
||||
int mlx_hamming(mlx_array* res, int M, const mlx_stream s);
|
||||
int mlx_hanning(mlx_array* res, int M, const mlx_stream s);
|
||||
int mlx_identity(mlx_array* res, int n, mlx_dtype dtype, const mlx_stream s);
|
||||
int mlx_imag(mlx_array* res, const mlx_array a, const mlx_stream s);
|
||||
int mlx_inner(
|
||||
@@ -790,6 +795,8 @@ int mlx_qqmm(
|
||||
mlx_optional_int group_size,
|
||||
mlx_optional_int bits,
|
||||
const char* mode,
|
||||
const mlx_array global_scale_x /* may be null */,
|
||||
const mlx_array global_scale_w /* may be null */,
|
||||
const mlx_stream s);
|
||||
int mlx_quantize(
|
||||
mlx_vector_array* res,
|
||||
@@ -797,6 +804,7 @@ int mlx_quantize(
|
||||
mlx_optional_int group_size,
|
||||
mlx_optional_int bits,
|
||||
const char* mode,
|
||||
const mlx_array global_scale /* may be null */,
|
||||
const mlx_stream s);
|
||||
int mlx_quantized_matmul(
|
||||
mlx_array* res,
|
||||
|
||||
@@ -4,35 +4,91 @@ package mlx
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"iter"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// SafetensorsFile represents a loaded safetensors file.
|
||||
type SafetensorsFile struct {
|
||||
arrays C.mlx_map_string_to_array
|
||||
metadata C.mlx_map_string_to_string
|
||||
}
|
||||
|
||||
func loadSafetensorsStream() C.mlx_stream {
|
||||
if runtime.GOOS == "darwin" {
|
||||
return C.mlx_default_cpu_stream_new()
|
||||
}
|
||||
return C.mlx_default_gpu_stream_new()
|
||||
}
|
||||
|
||||
// LoadSafetensorsNative loads a safetensors file using MLX's native loader.
|
||||
func LoadSafetensorsNative(path string) (*SafetensorsFile, error) {
|
||||
var arrays C.mlx_map_string_to_array
|
||||
var metadata C.mlx_map_string_to_string
|
||||
|
||||
cPath := C.CString(path)
|
||||
defer C.free(unsafe.Pointer(cPath))
|
||||
|
||||
stream := loadSafetensorsStream()
|
||||
defer C.mlx_stream_free(stream)
|
||||
|
||||
if C.mlx_load_safetensors(&arrays, &metadata, cPath, stream) != 0 {
|
||||
return nil, fmt.Errorf("failed to load safetensors: %s", path)
|
||||
}
|
||||
|
||||
return &SafetensorsFile{arrays: arrays, metadata: metadata}, nil
|
||||
}
|
||||
|
||||
// Get retrieves a tensor by name.
|
||||
func (s *SafetensorsFile) Get(name string) *Array {
|
||||
cName := C.CString(name)
|
||||
defer C.free(unsafe.Pointer(cName))
|
||||
|
||||
value := C.mlx_array_new()
|
||||
if C.mlx_map_string_to_array_get(&value, s.arrays, cName) != 0 {
|
||||
return nil
|
||||
}
|
||||
if value.ctx == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
arr := New(name)
|
||||
arr.ctx = value
|
||||
return arr
|
||||
}
|
||||
|
||||
// GetMetadata retrieves a metadata value by key.
|
||||
func (s *SafetensorsFile) GetMetadata(key string) string {
|
||||
cKey := C.CString(key)
|
||||
defer C.free(unsafe.Pointer(cKey))
|
||||
|
||||
var cValue *C.char
|
||||
if C.mlx_map_string_to_string_get(&cValue, s.metadata, cKey) != 0 {
|
||||
return ""
|
||||
}
|
||||
return C.GoString(cValue)
|
||||
}
|
||||
|
||||
// Free releases the loaded safetensors maps.
|
||||
func (s *SafetensorsFile) Free() {
|
||||
if s == nil {
|
||||
return
|
||||
}
|
||||
C.mlx_map_string_to_array_free(s.arrays)
|
||||
C.mlx_map_string_to_string_free(s.metadata)
|
||||
}
|
||||
|
||||
func Load(path string) iter.Seq2[string, *Array] {
|
||||
return func(yield func(string, *Array) bool) {
|
||||
string2array := C.mlx_map_string_to_array_new()
|
||||
defer C.mlx_map_string_to_array_free(string2array)
|
||||
|
||||
string2string := C.mlx_map_string_to_string_new()
|
||||
defer C.mlx_map_string_to_string_free(string2string)
|
||||
|
||||
cPath := C.CString(path)
|
||||
defer C.free(unsafe.Pointer(cPath))
|
||||
|
||||
// Use GPU stream so tensors load directly to GPU memory (CUDA has Load::eval_gpu).
|
||||
// macOS Metal doesn't implement eval_gpu for Load, so fall back to CPU stream.
|
||||
var stream C.mlx_stream
|
||||
if runtime.GOOS == "darwin" {
|
||||
stream = C.mlx_default_cpu_stream_new()
|
||||
} else {
|
||||
stream = C.mlx_default_gpu_stream_new()
|
||||
sf, err := LoadSafetensorsNative(path)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer C.mlx_stream_free(stream)
|
||||
defer sf.Free()
|
||||
|
||||
C.mlx_load_safetensors(&string2array, &string2string, cPath, stream)
|
||||
|
||||
it := C.mlx_map_string_to_array_iterator_new(string2array)
|
||||
it := C.mlx_map_string_to_array_iterator_new(sf.arrays)
|
||||
defer C.mlx_map_string_to_array_iterator_free(it)
|
||||
|
||||
for {
|
||||
@@ -51,3 +107,43 @@ func Load(path string) iter.Seq2[string, *Array] {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SaveSafetensors saves arrays to a safetensors file without metadata.
|
||||
func SaveSafetensors(path string, arrays map[string]*Array) error {
|
||||
return SaveSafetensorsWithMetadata(path, arrays, nil)
|
||||
}
|
||||
|
||||
// SaveSafetensorsWithMetadata saves arrays to a safetensors file with metadata.
|
||||
func SaveSafetensorsWithMetadata(path string, arrays map[string]*Array, metadata map[string]string) error {
|
||||
cPath := C.CString(path)
|
||||
defer C.free(unsafe.Pointer(cPath))
|
||||
|
||||
cArrays := C.mlx_map_string_to_array_new()
|
||||
defer C.mlx_map_string_to_array_free(cArrays)
|
||||
|
||||
for name, arr := range arrays {
|
||||
if arr == nil {
|
||||
continue
|
||||
}
|
||||
cName := C.CString(name)
|
||||
C.mlx_map_string_to_array_insert(cArrays, cName, arr.ctx)
|
||||
C.free(unsafe.Pointer(cName))
|
||||
}
|
||||
|
||||
cMetadata := C.mlx_map_string_to_string_new()
|
||||
defer C.mlx_map_string_to_string_free(cMetadata)
|
||||
|
||||
for key, value := range metadata {
|
||||
cKey := C.CString(key)
|
||||
cValue := C.CString(value)
|
||||
C.mlx_map_string_to_string_insert(cMetadata, cKey, cValue)
|
||||
C.free(unsafe.Pointer(cKey))
|
||||
C.free(unsafe.Pointer(cValue))
|
||||
}
|
||||
|
||||
if C.mlx_save_safetensors(cPath, cArrays, cMetadata) != 0 {
|
||||
return fmt.Errorf("failed to save safetensors: %s", path)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -7,8 +7,44 @@ package mlx
|
||||
// #cgo LDFLAGS: -lstdc++
|
||||
// #cgo darwin LDFLAGS: -framework Foundation -framework Metal -framework Accelerate
|
||||
// #include "generated.h"
|
||||
// #include <string.h>
|
||||
//
|
||||
// static char _mlx_last_error_msg[1024] = {0};
|
||||
// static int _mlx_last_error_flag = 0;
|
||||
//
|
||||
// static void _mlx_capture_error_handler(const char* msg, void* data) {
|
||||
// (void)data;
|
||||
// strncpy(_mlx_last_error_msg, msg, sizeof(_mlx_last_error_msg) - 1);
|
||||
// _mlx_last_error_msg[sizeof(_mlx_last_error_msg) - 1] = '\0';
|
||||
// _mlx_last_error_flag = 1;
|
||||
// }
|
||||
//
|
||||
// static void mlx_install_capture_handler(void) {
|
||||
// if (mlx_set_error_handler_) {
|
||||
// mlx_set_error_handler_(_mlx_capture_error_handler, NULL, NULL);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// static void mlx_clear_last_error(void) {
|
||||
// _mlx_last_error_flag = 0;
|
||||
// _mlx_last_error_msg[0] = '\0';
|
||||
// }
|
||||
//
|
||||
// static int mlx_had_last_error(void) {
|
||||
// return _mlx_last_error_flag;
|
||||
// }
|
||||
//
|
||||
// static const char* mlx_get_last_error(void) {
|
||||
// return _mlx_last_error_flag ? _mlx_last_error_msg : NULL;
|
||||
// }
|
||||
import "C"
|
||||
|
||||
func init() {
|
||||
// Replace the default exit(-1) error handler with one that captures
|
||||
// the error message so we can surface it in Go.
|
||||
C.mlx_install_capture_handler()
|
||||
}
|
||||
|
||||
// Version returns the MLX core library version string.
|
||||
func Version() string {
|
||||
str := C.mlx_string_new()
|
||||
@@ -31,10 +67,19 @@ func doEval(outputs []*Array, async bool) {
|
||||
}
|
||||
}
|
||||
|
||||
C.mlx_clear_last_error()
|
||||
var rc C.int
|
||||
if async {
|
||||
C.mlx_async_eval(vector)
|
||||
rc = C.mlx_async_eval(vector)
|
||||
} else {
|
||||
C.mlx_eval(vector)
|
||||
rc = C.mlx_eval(vector)
|
||||
}
|
||||
if rc != 0 {
|
||||
msg := "mlx eval failed"
|
||||
if C.mlx_had_last_error() != 0 {
|
||||
msg = C.GoString(C.mlx_get_last_error())
|
||||
}
|
||||
panic("mlx: " + msg)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,8 @@ func Quantize(w *Array, groupSize, bits int, mode string) (weights, scales, bias
|
||||
optBits := C.mlx_optional_int{value: C.int(bits), has_value: true}
|
||||
res := C.mlx_vector_array_new()
|
||||
defer C.mlx_vector_array_free(res)
|
||||
C.mlx_quantize(&res, w.ctx, optGroupSize, optBits, cMode, DefaultStream().ctx)
|
||||
var globalScale C.mlx_array
|
||||
C.mlx_quantize(&res, w.ctx, optGroupSize, optBits, cMode, globalScale, DefaultStream().ctx)
|
||||
|
||||
vecSize := int(C.mlx_vector_array_size(res))
|
||||
w0 := New("QUANTIZE_W")
|
||||
@@ -32,6 +33,18 @@ func Quantize(w *Array, groupSize, bits int, mode string) (weights, scales, bias
|
||||
return w0, w1, nil
|
||||
}
|
||||
|
||||
func FromFP8(x *Array, dtype DType) *Array {
|
||||
out := New("FROM_FP8")
|
||||
C.mlx_from_fp8(&out.ctx, x.ctx, C.mlx_dtype(dtype), DefaultStream().ctx)
|
||||
return out
|
||||
}
|
||||
|
||||
func ToFP8(x *Array) *Array {
|
||||
out := New("TO_FP8")
|
||||
C.mlx_to_fp8(&out.ctx, x.ctx, DefaultStream().ctx)
|
||||
return out
|
||||
}
|
||||
|
||||
func Dequantize(w, scales, biases *Array, groupSize, bits int, mode string) *Array {
|
||||
cMode := C.CString(mode)
|
||||
defer C.free(unsafe.Pointer(cMode))
|
||||
@@ -45,7 +58,8 @@ func Dequantize(w, scales, biases *Array, groupSize, bits int, mode string) *Arr
|
||||
}
|
||||
|
||||
out := New("DEQUANTIZE")
|
||||
C.mlx_dequantize(&out.ctx, w.ctx, scales.ctx, b, optGroupSize, optBits, cMode, optDtype, DefaultStream().ctx)
|
||||
var globalScale C.mlx_array
|
||||
C.mlx_dequantize(&out.ctx, w.ctx, scales.ctx, b, optGroupSize, optBits, cMode, globalScale, optDtype, DefaultStream().ctx)
|
||||
return out
|
||||
}
|
||||
|
||||
@@ -135,6 +149,40 @@ func Contiguous(a *Array, allowColMajor bool) *Array {
|
||||
return out
|
||||
}
|
||||
|
||||
func Pad(a *Array, paddings []int32) *Array {
|
||||
numAxes := len(paddings) / 2
|
||||
axes := make([]C.int, numAxes)
|
||||
lowPad := make([]C.int, numAxes)
|
||||
highPad := make([]C.int, numAxes)
|
||||
for i := range numAxes {
|
||||
axes[i] = C.int(i)
|
||||
lowPad[i] = C.int(paddings[i*2])
|
||||
highPad[i] = C.int(paddings[i*2+1])
|
||||
}
|
||||
|
||||
padValue := C.mlx_array_new_float(C.float(0))
|
||||
defer C.mlx_array_free(padValue)
|
||||
|
||||
cMode := C.CString("constant")
|
||||
defer C.free(unsafe.Pointer(cMode))
|
||||
|
||||
out := New("PAD")
|
||||
C.mlx_pad(
|
||||
&out.ctx,
|
||||
a.ctx,
|
||||
unsafe.SliceData(axes),
|
||||
C.size_t(len(axes)),
|
||||
unsafe.SliceData(lowPad),
|
||||
C.size_t(len(lowPad)),
|
||||
unsafe.SliceData(highPad),
|
||||
C.size_t(len(highPad)),
|
||||
padValue,
|
||||
cMode,
|
||||
DefaultStream().ctx,
|
||||
)
|
||||
return out
|
||||
}
|
||||
|
||||
func DepthwiseConv1d(x, weight *Array, bias *Array) *Array {
|
||||
groups := int32(x.Dim(x.NumDims() - 1))
|
||||
return Conv1d(x, weight, bias, 1, 0, 1, groups)
|
||||
@@ -446,15 +494,6 @@ func Collect(v any) []*Array {
|
||||
return arrays
|
||||
}
|
||||
|
||||
func Copy(a *Array) *Array {
|
||||
if a == nil || !a.Valid() {
|
||||
return a
|
||||
}
|
||||
out := New("COPY")
|
||||
C.mlx_copy(&out.ctx, a.ctx, DefaultStream().ctx)
|
||||
return out
|
||||
}
|
||||
|
||||
func collect(v reflect.Value, arrays *[]*Array, seen map[uintptr]bool) {
|
||||
if !v.IsValid() {
|
||||
return
|
||||
|
||||
@@ -11,8 +11,10 @@ func QuantizationParams(quantization string) (groupSize, bits int, mode string)
|
||||
switch strings.ToUpper(quantization) {
|
||||
case "NVFP4":
|
||||
return 16, 4, "nvfp4"
|
||||
case "MXFP4":
|
||||
return 32, 4, "mxfp4"
|
||||
case "FP4", "Q4", "INT4":
|
||||
return 32, 4, "affine"
|
||||
return 64, 4, "affine"
|
||||
case "MXFP8":
|
||||
return 32, 8, "mxfp8"
|
||||
case "FP8", "Q8", "INT8":
|
||||
|
||||
@@ -144,3 +144,44 @@ func TestLayerNormDefaultEps(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestQuantizedLinearMXFP4MatchesDequantizedWeight(t *testing.T) {
|
||||
skipIfNoMLX(t)
|
||||
|
||||
weightVals := make([]float32, 3*32)
|
||||
for i := range weightVals {
|
||||
weightVals[i] = float32((i%11)-5) / 7
|
||||
}
|
||||
inputVals := make([]float32, 2*32)
|
||||
for i := range inputVals {
|
||||
inputVals[i] = float32((i%7)-3) / 5
|
||||
}
|
||||
|
||||
weight := mlx.FromValues(weightVals, 3, 32).AsType(mlx.DTypeBFloat16)
|
||||
input := mlx.FromValues(inputVals, 2, 32).AsType(mlx.DTypeBFloat16)
|
||||
mlx.Eval(weight, input)
|
||||
|
||||
ql := NewQuantizedLinear(weight, nil, 32, 4, "mxfp4")
|
||||
if ql.QBiases != nil {
|
||||
t.Fatalf("mxfp4 qbiases = %v, want nil", ql.QBiases)
|
||||
}
|
||||
|
||||
dequantizedWeight := mlx.Dequantize(ql.Weight, ql.Scales, ql.QBiases, 32, 4, "mxfp4")
|
||||
mlx.Eval(dequantizedWeight)
|
||||
|
||||
qOut := ql.Forward(input)
|
||||
dOut := NewLinear(dequantizedWeight, nil).Forward(input)
|
||||
mlx.Eval(qOut, dOut)
|
||||
|
||||
got := qOut.Floats()
|
||||
want := dOut.Floats()
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("output length = %d, want %d", len(got), len(want))
|
||||
}
|
||||
|
||||
for i := range got {
|
||||
if !approxEqual(got[i], want[i], 1e-3) {
|
||||
t.Fatalf("output[%d] = %.6f, want %.6f", i, got[i], want[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -420,7 +420,16 @@ func tensorByBase(tensors map[string]*mlx.Array, base string) (*mlx.Array, strin
|
||||
}
|
||||
|
||||
func supportsGatherQMM(mode string, bits int) bool {
|
||||
return mode == "affine" && (bits == 4 || bits == 8)
|
||||
switch mode {
|
||||
case "affine":
|
||||
return bits == 4 || bits == 8
|
||||
case "mxfp8":
|
||||
return bits == 8
|
||||
case "nvfp4", "mxfp4":
|
||||
return bits == 4
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func freeTensorKeys(tensors map[string]*mlx.Array, keys ...string) {
|
||||
|
||||
@@ -83,6 +83,28 @@ func TestLayerSelectionHelpers(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSupportsGatherQMM(t *testing.T) {
|
||||
tests := []struct {
|
||||
mode string
|
||||
bits int
|
||||
want bool
|
||||
}{
|
||||
{mode: "affine", bits: 4, want: true},
|
||||
{mode: "affine", bits: 8, want: true},
|
||||
{mode: "mxfp8", bits: 8, want: true},
|
||||
{mode: "nvfp4", bits: 4, want: true},
|
||||
{mode: "mxfp4", bits: 4, want: true},
|
||||
{mode: "mxfp8", bits: 4, want: false},
|
||||
{mode: "affine", bits: 3, want: false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
if got := supportsGatherQMM(tt.mode, tt.bits); got != tt.want {
|
||||
t.Fatalf("supportsGatherQMM(%q, %d) = %v, want %v", tt.mode, tt.bits, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveTensorPathLayout(t *testing.T) {
|
||||
dummy := mlx.New("dummy")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user