Compare commits

...

7 Commits

Author SHA1 Message Date
Parth Sareen
160660e572 launch: use bundled OpenClaw ollama web search (#15757) 2026-04-22 16:34:19 -07:00
madflow
3b43b9bc4b docs: update structured outputs doc for cloud (#15733)
---------

Co-authored-by: Parth Sareen <parth.sareen@ollama.com>
2026-04-22 00:42:39 -07:00
Parth Sareen
21883571b7 launch: replace kimi-k2.5 with k2.6 as top recommended model (#15737) 2026-04-21 15:13:20 -07:00
Jesse Gross
ce99f24731 mlxrunner: tokenize prompts in request handler goroutines
Move tokenization out of the single GPU processing goroutine and
into each request's HTTP handler goroutine. This allows the next
request's prompt to be tokenized on the CPU while the current
request is executing on the GPU.
2026-04-21 14:38:49 -07:00
Jesse Gross
04f5f0cdb4 mlx: improve thread safety of array management
Use atomic.Int32 for Array.pinned and a sync.Mutex for the global
arrays slice so MLX arrays can be created and pinned from multiple
goroutines without racing on those structures. Convert Array value
receivers to pointer receivers and struct fields from Array to
*Array to avoid copying the atomic.

This does not fully achieve thread safety even when building
completely independent graphs. The tracing flag and traceScratch
slice in compile.go are unprotected, so concurrent Compile calls
will race. MLX itself is not fully thread-safe either although
it is working to improve.
2026-04-21 14:38:49 -07:00
Matteo Celani
fb36a01ffe app/ui: fix model picker showing stale model after switching chats (#15280)
* app/ui: fix model picker showing stale model after switching chats

Optimistic messages created during streaming were storing the full
Model object instead of the model name string. When switching back
to a chat with cached streaming data, the restore effect read an
object where it expected a string, causing the model picker to fail
matching and remain stuck on the previous chat's model.

* app/ui: fix two more instances of Model object passed as model name

Fix the same bug at lines 523 and 536 in the assistant_with_tools
event handler, where selectedModel (object) was used instead of
selectedModel.model (string).
2026-04-21 15:08:06 -04:00
Michael Verrilli
0c65ed33bc cmd: populate model capabilities in launchInteractiveModel (#15712)
launchInteractiveModel was introduced in PR #14609 without the
client.Show() capability-detection block that RunHandler uses.
This left opts.MultiModal always false in the TUI path, causing
image/audio file paths to always be treated as unknown commands
instead of being loaded as multimodal attachments.

Mirror the Show() call, pull-on-404 fallback, cloud auth handling,
and MultiModal/Think population from RunHandler into
launchInteractiveModel.

Fixes #15711
2026-04-21 14:37:36 -04:00
16 changed files with 342 additions and 362 deletions

View File

@@ -381,7 +381,7 @@ export const useSendMessage = (chatId: string) => {
role: "assistant",
content: "",
thinking: "",
model: effectiveModel,
model: effectiveModel.model,
}),
);
lastMessage = newMessages[newMessages.length - 1];
@@ -433,7 +433,7 @@ export const useSendMessage = (chatId: string) => {
role: "assistant",
content: "",
thinking: "",
model: effectiveModel,
model: effectiveModel.model,
}),
);
lastMessage = newMessages[newMessages.length - 1];
@@ -520,7 +520,7 @@ export const useSendMessage = (chatId: string) => {
thinkingTimeStart:
lastMessage.thinkingTimeStart || event.thinkingTimeStart,
thinkingTimeEnd: event.thinkingTimeEnd,
model: selectedModel,
model: selectedModel.model,
});
newMessages[newMessages.length - 1] = updatedMessage;
} else {
@@ -533,7 +533,7 @@ export const useSendMessage = (chatId: string) => {
tool_calls: event.toolCalls,
thinkingTimeStart: event.thinkingTimeStart,
thinkingTimeEnd: event.thinkingTimeEnd,
model: selectedModel,
model: selectedModel.model,
}),
);
}
@@ -699,7 +699,7 @@ export const useSendMessage = (chatId: string) => {
queryClient.setQueryData(["chat", newId], {
chat: new Chat({
id: newId,
model: effectiveModel,
model: effectiveModel.model,
messages: [
new Message({
role: "user",

View File

@@ -1975,8 +1975,61 @@ func launchInteractiveModel(cmd *cobra.Command, modelName string) error {
Options: map[string]any{},
ShowConnect: true,
}
// loadOrUnloadModel is cloud-safe here: remote/cloud models skip local preload
// and only validate auth/connectivity before interactive chat starts.
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
requestedCloud := modelref.HasExplicitCloudSource(modelName)
info, err := func() (*api.ShowResponse, error) {
showReq := &api.ShowRequest{Name: modelName}
info, err := client.Show(cmd.Context(), showReq)
var se api.StatusError
if errors.As(err, &se) && se.StatusCode == http.StatusNotFound {
if requestedCloud {
return nil, err
}
if err := PullHandler(cmd, []string{modelName}); err != nil {
return nil, err
}
return client.Show(cmd.Context(), &api.ShowRequest{Name: modelName})
}
return info, err
}()
if err != nil {
if handleCloudAuthorizationError(err) {
return nil
}
return err
}
ensureCloudStub(cmd.Context(), client, modelName)
opts.Think, err = inferThinkingOption(&info.Capabilities, &opts, false)
if err != nil {
return err
}
audioCapable := slices.Contains(info.Capabilities, model.CapabilityAudio)
opts.MultiModal = slices.Contains(info.Capabilities, model.CapabilityVision) || audioCapable
// TODO: remove the projector info and vision info checks below,
// these are left in for backwards compatibility with older servers
// that don't have the capabilities field in the model info
if len(info.ProjectorInfo) != 0 {
opts.MultiModal = true
}
for k := range info.ModelInfo {
if strings.Contains(k, ".vision.") {
opts.MultiModal = true
break
}
}
applyShowResponseToRunOptions(&opts, info)
if err := loadOrUnloadModel(cmd, &opts); err != nil {
return fmt.Errorf("error loading model: %w", err)
}

View File

@@ -301,7 +301,7 @@ func TestParseArgs(t *testing.T) {
func TestIsCloudModel(t *testing.T) {
// isCloudModel now only uses Show API, so nil client always returns false
t.Run("nil client returns false", func(t *testing.T) {
models := []string{"glm-5.1:cloud", "kimi-k2.5:cloud", "local-model"}
models := []string{"glm-5.1:cloud", "kimi-k2.6:cloud", "local-model"}
for _, model := range models {
if isCloudModel(context.Background(), nil, model) {
t.Errorf("isCloudModel(%q) with nil client should return false", model)
@@ -321,7 +321,7 @@ func names(items []ModelItem) []string {
func TestBuildModelList_NoExistingModels(t *testing.T) {
items, _, _, _ := buildModelList(nil, nil, "")
want := []string{"kimi-k2.5:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5"}
want := []string{"kimi-k2.6:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5"}
if diff := cmp.Diff(want, names(items)); diff != "" {
t.Errorf("with no existing models, items should be recommended in order (-want +got):\n%s", diff)
}
@@ -350,7 +350,7 @@ func TestBuildModelList_OnlyLocalModels_CloudRecsStillFirst(t *testing.T) {
// Cloud recs always come first among recommended, regardless of installed inventory.
// Cloud disablement is handled upstream in loadSelectableModels via filterCloudItems.
want := []string{"kimi-k2.5:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5", "llama3.2", "qwen2.5"}
want := []string{"kimi-k2.6:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5", "llama3.2", "qwen2.5"}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("cloud recs pinned first even when no cloud models installed (-want +got):\n%s", diff)
}
@@ -366,7 +366,7 @@ func TestBuildModelList_BothCloudAndLocal_RegularSort(t *testing.T) {
got := names(items)
// All recs pinned at top (cloud before local in mixed case), then non-recs
want := []string{"kimi-k2.5:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5", "llama3.2"}
want := []string{"kimi-k2.6:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5", "llama3.2"}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("recs pinned at top, cloud recs first in mixed case (-want +got):\n%s", diff)
}
@@ -437,7 +437,7 @@ func TestBuildModelList_ExistingRecommendedMarked(t *testing.T) {
if !strings.HasSuffix(item.Description, "(not downloaded)") {
t.Errorf("non-installed recommended %q should have '(not downloaded)' suffix, got %q", item.Name, item.Description)
}
case "minimax-m2.7:cloud", "kimi-k2.5:cloud", "qwen3.5:cloud":
case "minimax-m2.7:cloud", "kimi-k2.6:cloud", "qwen3.5:cloud":
if strings.HasSuffix(item.Description, "(not downloaded)") {
t.Errorf("cloud model %q should not have '(not downloaded)' suffix, got %q", item.Name, item.Description)
}
@@ -455,9 +455,9 @@ func TestBuildModelList_ExistingCloudModelsNotPushedToBottom(t *testing.T) {
got := names(items)
// gemma4 and glm-5.1:cloud are installed so they sort normally;
// kimi-k2.5:cloud, qwen3.5:cloud, and qwen3.5 are not installed so they go to the bottom
// qwen3.5:cloud and qwen3.5 are not installed so they go to the bottom
// All recs: cloud first in mixed case, then local, in rec order within each
want := []string{"kimi-k2.5:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5"}
want := []string{"kimi-k2.6:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5"}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("all recs, cloud first in mixed case (-want +got):\n%s", diff)
}
@@ -466,23 +466,23 @@ func TestBuildModelList_ExistingCloudModelsNotPushedToBottom(t *testing.T) {
func TestBuildModelList_HasRecommendedCloudModel_OnlyNonInstalledAtBottom(t *testing.T) {
existing := []modelInfo{
{Name: "llama3.2:latest", Remote: false},
{Name: "kimi-k2.5:cloud", Remote: true},
{Name: "kimi-k2.6:cloud", Remote: true},
}
items, _, _, _ := buildModelList(existing, nil, "")
got := names(items)
// kimi-k2.5:cloud is installed so it sorts normally;
// kimi-k2.6:cloud is installed so it sorts normally;
// the rest of the recommendations are not installed so they go to the bottom
// All recs pinned at top (cloud first in mixed case), then non-recs
want := []string{"kimi-k2.5:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5", "llama3.2"}
want := []string{"kimi-k2.6:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5", "llama3.2"}
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("recs pinned at top, cloud first in mixed case (-want +got):\n%s", diff)
}
for _, item := range items {
isCloud := strings.HasSuffix(item.Name, ":cloud")
isInstalled := slices.Contains([]string{"kimi-k2.5:cloud", "llama3.2"}, item.Name)
isInstalled := slices.Contains([]string{"kimi-k2.6:cloud", "llama3.2"}, item.Name)
if isInstalled || isCloud {
if strings.HasSuffix(item.Description, "(not downloaded)") {
t.Errorf("installed or cloud model %q should not have '(not downloaded)' suffix, got %q", item.Name, item.Description)
@@ -549,8 +549,8 @@ func TestBuildModelList_ReturnsExistingAndCloudMaps(t *testing.T) {
if !cloudModels["glm-5.1:cloud"] {
t.Error("glm-5.1:cloud should be in cloudModels")
}
if !cloudModels["kimi-k2.5:cloud"] {
t.Error("kimi-k2.5:cloud should be in cloudModels (recommended cloud)")
if !cloudModels["kimi-k2.6:cloud"] {
t.Error("kimi-k2.6:cloud should be in cloudModels (recommended cloud)")
}
if !cloudModels["qwen3.5:cloud"] {
t.Error("qwen3.5:cloud should be in cloudModels (recommended cloud)")
@@ -570,7 +570,7 @@ func TestBuildModelList_RecommendedFieldSet(t *testing.T) {
for _, item := range items {
switch item.Name {
case "gemma4", "qwen3.5", "glm-5.1:cloud", "kimi-k2.5:cloud", "qwen3.5:cloud":
case "gemma4", "qwen3.5", "glm-5.1:cloud", "kimi-k2.6:cloud", "qwen3.5:cloud":
if !item.Recommended {
t.Errorf("%q should have Recommended=true", item.Name)
}
@@ -628,7 +628,7 @@ func TestBuildModelList_RecsAboveNonRecs(t *testing.T) {
lastRecIdx := -1
firstNonRecIdx := len(got)
for i, name := range got {
isRec := name == "gemma4" || name == "qwen3.5" || name == "minimax-m2.7:cloud" || name == "glm-5.1:cloud" || name == "kimi-k2.5:cloud" || name == "qwen3.5:cloud"
isRec := name == "gemma4" || name == "qwen3.5" || name == "minimax-m2.7:cloud" || name == "glm-5.1:cloud" || name == "kimi-k2.6:cloud" || name == "qwen3.5:cloud"
if isRec && i > lastRecIdx {
lastRecIdx = i
}

View File

@@ -21,7 +21,7 @@ import (
)
var recommendedModels = []ModelItem{
{Name: "kimi-k2.5:cloud", Description: "Multimodal reasoning with subagents", Recommended: true},
{Name: "kimi-k2.6:cloud", Description: "State-of-the-art coding, long-horizon execution, and multimodal agent swarm capability", Recommended: true},
{Name: "qwen3.5:cloud", Description: "Reasoning, coding, and agentic tool use with vision", Recommended: true},
{Name: "glm-5.1:cloud", Description: "Reasoning and code generation", Recommended: true},
{Name: "minimax-m2.7:cloud", Description: "Fast, efficient coding and real-world productivity", Recommended: true},
@@ -56,6 +56,7 @@ var cloudModelLimits = map[string]cloudModelLimit{
"gpt-oss:20b": {Context: 131_072, Output: 131_072},
"kimi-k2:1t": {Context: 262_144, Output: 262_144},
"kimi-k2.5": {Context: 262_144, Output: 262_144},
"kimi-k2.6": {Context: 262_144, Output: 262_144},
"kimi-k2-thinking": {Context: 262_144, Output: 262_144},
"nemotron-3-nano:30b": {Context: 1_048_576, Output: 131_072},
"qwen3-coder:480b": {Context: 262_144, Output: 65_536},

View File

@@ -14,8 +14,6 @@ import (
"strings"
"time"
"golang.org/x/mod/semver"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/cmd/internal/fileutil"
"github.com/ollama/ollama/envconfig"
@@ -98,9 +96,7 @@ func (c *Openclaw) Run(model string, args []string) error {
patchDeviceScopes()
}
if ensureWebSearchPlugin() {
registerWebSearchPlugin()
}
configureOllamaWebSearch()
// When extra args are passed through, run exactly what the user asked for
// after setup and skip the built-in gateway+TUI convenience flow.
@@ -738,89 +734,13 @@ func clearSessionModelOverride(primary string) {
_ = os.WriteFile(path, out, 0o600)
}
const (
webSearchNpmPackage = "@ollama/openclaw-web-search"
webSearchMinVersion = "0.2.1"
)
// ensureWebSearchPlugin installs the openclaw-web-search extension into the
// user-level extensions directory (~/.openclaw/extensions/) if it isn't already
// present, or re-installs if the installed version is older than webSearchMinVersion.
// Returns true if the extension is available.
func ensureWebSearchPlugin() bool {
home, err := os.UserHomeDir()
if err != nil {
return false
}
pluginDir := filepath.Join(home, ".openclaw", "extensions", "openclaw-web-search")
if webSearchPluginUpToDate(pluginDir) {
return true
}
npmBin, err := exec.LookPath("npm")
if err != nil {
return false
}
if err := os.MkdirAll(pluginDir, 0o755); err != nil {
return false
}
// Download the tarball via `npm pack`, extract it flat into the plugin dir.
pack := exec.Command(npmBin, "pack", webSearchNpmPackage, "--pack-destination", pluginDir)
out, err := pack.Output()
if err != nil {
fmt.Fprintf(os.Stderr, "%s Warning: could not download web search plugin: %v%s\n", ansiYellow, err, ansiReset)
return false
}
tgzName := strings.TrimSpace(string(out))
tgzPath := filepath.Join(pluginDir, tgzName)
defer os.Remove(tgzPath)
tar := exec.Command("tar", "xzf", tgzPath, "--strip-components=1", "-C", pluginDir)
if err := tar.Run(); err != nil {
fmt.Fprintf(os.Stderr, "%s Warning: could not extract web search plugin: %v%s\n", ansiYellow, err, ansiReset)
return false
}
fmt.Fprintf(os.Stderr, "%s ✓ Installed Ollama web search %s\n", ansiGreen, ansiReset)
return true
}
// webSearchPluginUpToDate returns true if the plugin is installed and its
// package.json version is >= webSearchMinVersion.
func webSearchPluginUpToDate(pluginDir string) bool {
data, err := os.ReadFile(filepath.Join(pluginDir, "package.json"))
if err != nil {
return false
}
var pkg struct {
Version string `json:"version"`
}
if json.Unmarshal(data, &pkg) != nil || pkg.Version == "" {
return false
}
return !versionLessThan(pkg.Version, webSearchMinVersion)
}
// versionLessThan compares two semver version strings (major.minor.patch).
// Inputs may omit the "v" prefix; it is added automatically for semver.Compare.
func versionLessThan(a, b string) bool {
if !strings.HasPrefix(a, "v") {
a = "v" + a
}
if !strings.HasPrefix(b, "v") {
b = "v" + b
}
return semver.Compare(a, b) < 0
}
// registerWebSearchPlugin adds plugins.entries.openclaw-web-search to the OpenClaw
// config so the gateway activates it on next start. Best-effort; silently returns
// on any error.
func registerWebSearchPlugin() {
// configureOllamaWebSearch keeps launch-managed OpenClaw installs on the
// bundled Ollama web_search provider. Older launch builds installed an
// external openclaw-web-search plugin that added custom ollama_web_search and
// ollama_web_fetch tools. Current OpenClaw versions ship Ollama web_search as
// the bundled "ollama" plugin instead, so we migrate stale config and ensure
// fresh installs select the bundled provider.
func configureOllamaWebSearch() {
home, err := os.UserHomeDir()
if err != nil {
return
@@ -835,6 +755,8 @@ func registerWebSearchPlugin() {
return
}
stalePluginConfigured := false
plugins, _ := config["plugins"].(map[string]any)
if plugins == nil {
plugins = make(map[string]any)
@@ -843,68 +765,100 @@ func registerWebSearchPlugin() {
if entries == nil {
entries = make(map[string]any)
}
entries["openclaw-web-search"] = map[string]any{"enabled": true}
plugins["entries"] = entries
// Pin trust so the gateway doesn't warn about untracked plugins.
allow, _ := plugins["allow"].([]any)
hasAllow := false
for _, v := range allow {
if s, ok := v.(string); ok && s == "openclaw-web-search" {
hasAllow = true
break
}
}
if !hasAllow {
allow = append(allow, "openclaw-web-search")
}
plugins["allow"] = allow
// Record install provenance so the loader can verify the plugin origin.
installs, _ := plugins["installs"].(map[string]any)
if installs == nil {
installs = make(map[string]any)
}
pluginDir := filepath.Join(home, ".openclaw", "extensions", "openclaw-web-search")
installs["openclaw-web-search"] = map[string]any{
"source": "npm",
"spec": webSearchNpmPackage,
"installPath": pluginDir,
}
plugins["installs"] = installs
config["plugins"] = plugins
// Add plugin tools to tools.alsoAllow so they survive the coding profile's
// policy pipeline (which has an explicit allow list of core tools only).
tools, _ := config["tools"].(map[string]any)
if tools == nil {
tools = make(map[string]any)
}
alsoAllow, _ := tools["alsoAllow"].([]any)
needed := []string{"ollama_web_search", "ollama_web_fetch"}
have := make(map[string]bool, len(alsoAllow))
for _, v := range alsoAllow {
if s, ok := v.(string); ok {
have[s] = true
}
}
for _, name := range needed {
if !have[name] {
alsoAllow = append(alsoAllow, name)
}
}
tools["alsoAllow"] = alsoAllow
// Disable built-in web search/fetch since our plugin replaces them.
web, _ := tools["web"].(map[string]any)
if web == nil {
web = make(map[string]any)
}
web["search"] = map[string]any{"enabled": false}
web["fetch"] = map[string]any{"enabled": false}
search, _ := web["search"].(map[string]any)
if search == nil {
search = make(map[string]any)
}
fetch, _ := web["fetch"].(map[string]any)
if fetch == nil {
fetch = make(map[string]any)
}
alsoAllow, _ := tools["alsoAllow"].([]any)
var filteredAlsoAllow []any
for _, v := range alsoAllow {
s, ok := v.(string)
if !ok {
filteredAlsoAllow = append(filteredAlsoAllow, v)
continue
}
if s == "ollama_web_search" || s == "ollama_web_fetch" {
stalePluginConfigured = true
continue
}
filteredAlsoAllow = append(filteredAlsoAllow, v)
}
if len(filteredAlsoAllow) > 0 {
tools["alsoAllow"] = filteredAlsoAllow
} else {
delete(tools, "alsoAllow")
}
if _, ok := entries["openclaw-web-search"]; ok {
delete(entries, "openclaw-web-search")
stalePluginConfigured = true
}
ollamaEntry, _ := entries["ollama"].(map[string]any)
if ollamaEntry == nil {
ollamaEntry = make(map[string]any)
}
ollamaEntry["enabled"] = true
entries["ollama"] = ollamaEntry
plugins["entries"] = entries
if allow, ok := plugins["allow"].([]any); ok {
var nextAllow []any
hasOllama := false
for _, v := range allow {
s, ok := v.(string)
if ok && s == "openclaw-web-search" {
stalePluginConfigured = true
continue
}
if ok && s == "ollama" {
hasOllama = true
}
nextAllow = append(nextAllow, v)
}
if !hasOllama {
nextAllow = append(nextAllow, "ollama")
}
plugins["allow"] = nextAllow
}
if installs, ok := plugins["installs"].(map[string]any); ok {
if _, exists := installs["openclaw-web-search"]; exists {
delete(installs, "openclaw-web-search")
stalePluginConfigured = true
}
if len(installs) > 0 {
plugins["installs"] = installs
} else {
delete(plugins, "installs")
}
}
if stalePluginConfigured || search["provider"] == nil {
search["provider"] = "ollama"
}
if stalePluginConfigured {
fetch["enabled"] = true
}
search["enabled"] = true
web["search"] = search
if len(fetch) > 0 {
web["fetch"] = fetch
}
tools["web"] = web
config["plugins"] = plugins
config["tools"] = tools
out, err := json.MarshalIndent(config, "", " ")

View File

@@ -2242,95 +2242,7 @@ func TestIntegrationOnboarded(t *testing.T) {
})
}
func TestVersionLessThan(t *testing.T) {
tests := []struct {
a, b string
want bool
}{
{"0.1.7", "0.2.1", true},
{"0.2.0", "0.2.1", true},
{"0.2.1", "0.2.1", false},
{"0.2.2", "0.2.1", false},
{"1.0.0", "0.2.1", false},
{"0.2.1", "1.0.0", true},
{"v0.1.7", "0.2.1", true},
{"0.2.1", "v0.2.1", false},
}
for _, tt := range tests {
t.Run(tt.a+"_vs_"+tt.b, func(t *testing.T) {
if got := versionLessThan(tt.a, tt.b); got != tt.want {
t.Errorf("versionLessThan(%q, %q) = %v, want %v", tt.a, tt.b, got, tt.want)
}
})
}
}
func TestWebSearchPluginUpToDate(t *testing.T) {
t.Run("missing directory", func(t *testing.T) {
if webSearchPluginUpToDate(filepath.Join(t.TempDir(), "nonexistent")) {
t.Error("expected false for missing directory")
}
})
t.Run("missing package.json", func(t *testing.T) {
dir := t.TempDir()
if webSearchPluginUpToDate(dir) {
t.Error("expected false for missing package.json")
}
})
t.Run("old version", func(t *testing.T) {
dir := t.TempDir()
if err := os.WriteFile(filepath.Join(dir, "package.json"), []byte(`{"version":"0.1.7"}`), 0o644); err != nil {
t.Fatal(err)
}
if webSearchPluginUpToDate(dir) {
t.Error("expected false for old version 0.1.7")
}
})
t.Run("exact minimum version", func(t *testing.T) {
dir := t.TempDir()
if err := os.WriteFile(filepath.Join(dir, "package.json"), []byte(`{"version":"0.2.1"}`), 0o644); err != nil {
t.Fatal(err)
}
if !webSearchPluginUpToDate(dir) {
t.Error("expected true for exact minimum version 0.2.1")
}
})
t.Run("newer version", func(t *testing.T) {
dir := t.TempDir()
if err := os.WriteFile(filepath.Join(dir, "package.json"), []byte(`{"version":"1.0.0"}`), 0o644); err != nil {
t.Fatal(err)
}
if !webSearchPluginUpToDate(dir) {
t.Error("expected true for newer version 1.0.0")
}
})
t.Run("invalid json", func(t *testing.T) {
dir := t.TempDir()
if err := os.WriteFile(filepath.Join(dir, "package.json"), []byte(`not json`), 0o644); err != nil {
t.Fatal(err)
}
if webSearchPluginUpToDate(dir) {
t.Error("expected false for invalid json")
}
})
t.Run("empty version", func(t *testing.T) {
dir := t.TempDir()
if err := os.WriteFile(filepath.Join(dir, "package.json"), []byte(`{"version":""}`), 0o644); err != nil {
t.Fatal(err)
}
if webSearchPluginUpToDate(dir) {
t.Error("expected false for empty version")
}
})
}
func TestRegisterWebSearchPlugin(t *testing.T) {
func TestConfigureOllamaWebSearch(t *testing.T) {
home := t.TempDir()
setTestHome(t, home)
@@ -2345,7 +2257,7 @@ func TestRegisterWebSearchPlugin(t *testing.T) {
t.Fatal(err)
}
registerWebSearchPlugin()
configureOllamaWebSearch()
data, err := os.ReadFile(configPath)
if err != nil {
@@ -2361,40 +2273,30 @@ func TestRegisterWebSearchPlugin(t *testing.T) {
t.Fatal("plugins section missing")
}
// Check entries
entries, _ := plugins["entries"].(map[string]any)
entry, _ := entries["openclaw-web-search"].(map[string]any)
entry, _ := entries["ollama"].(map[string]any)
if enabled, _ := entry["enabled"].(bool); !enabled {
t.Error("expected entries.openclaw-web-search.enabled = true")
t.Error("expected entries.ollama.enabled = true")
}
if _, ok := entries["openclaw-web-search"]; ok {
t.Error("expected stale openclaw-web-search entry to be absent")
}
// Check allow list
allow, _ := plugins["allow"].([]any)
found := false
for _, v := range allow {
if s, ok := v.(string); ok && s == "openclaw-web-search" {
found = true
}
if _, ok := plugins["allow"]; ok {
t.Error("did not expect plugins.allow to be created when no allowlist exists")
}
if !found {
t.Error("expected plugins.allow to contain openclaw-web-search")
if _, ok := plugins["installs"]; ok {
t.Error("did not expect plugins.installs to be created")
}
// Check install provenance
installs, _ := plugins["installs"].(map[string]any)
record, _ := installs["openclaw-web-search"].(map[string]any)
if record == nil {
t.Fatal("expected plugins.installs.openclaw-web-search")
tools, _ := config["tools"].(map[string]any)
web, _ := tools["web"].(map[string]any)
search, _ := web["search"].(map[string]any)
if got, _ := search["provider"].(string); got != "ollama" {
t.Errorf("search provider = %q, want %q", got, "ollama")
}
if source, _ := record["source"].(string); source != "npm" {
t.Errorf("install source = %q, want %q", source, "npm")
}
if spec, _ := record["spec"].(string); spec != webSearchNpmPackage {
t.Errorf("install spec = %q, want %q", spec, webSearchNpmPackage)
}
expectedPath := filepath.Join(home, ".openclaw", "extensions", "openclaw-web-search")
if installPath, _ := record["installPath"].(string); installPath != expectedPath {
t.Errorf("installPath = %q, want %q", installPath, expectedPath)
if enabled, _ := search["enabled"].(bool); !enabled {
t.Error("expected tools.web.search.enabled = true")
}
})
@@ -2403,8 +2305,8 @@ func TestRegisterWebSearchPlugin(t *testing.T) {
t.Fatal(err)
}
registerWebSearchPlugin()
registerWebSearchPlugin()
configureOllamaWebSearch()
configureOllamaWebSearch()
data, err := os.ReadFile(configPath)
if err != nil {
@@ -2416,30 +2318,39 @@ func TestRegisterWebSearchPlugin(t *testing.T) {
}
plugins, _ := config["plugins"].(map[string]any)
allow, _ := plugins["allow"].([]any)
count := 0
for _, v := range allow {
if s, ok := v.(string); ok && s == "openclaw-web-search" {
count++
}
entries, _ := plugins["entries"].(map[string]any)
if len(entries) != 1 {
t.Fatalf("expected only bundled ollama entry, got %v", entries)
}
if count != 1 {
t.Errorf("expected exactly 1 openclaw-web-search in allow, got %d", count)
if _, ok := entries["ollama"]; !ok {
t.Fatalf("expected entries.ollama to exist, got %v", entries)
}
})
t.Run("preserves existing config", func(t *testing.T) {
t.Run("migrates stale plugin config and preserves unrelated settings", func(t *testing.T) {
initial := map[string]any{
"plugins": map[string]any{
"allow": []any{"some-other-plugin"},
"allow": []any{"some-other-plugin", "openclaw-web-search"},
"entries": map[string]any{
"some-other-plugin": map[string]any{"enabled": true},
"some-other-plugin": map[string]any{"enabled": true},
"openclaw-web-search": map[string]any{"enabled": true},
},
"installs": map[string]any{
"some-other-plugin": map[string]any{
"source": "npm",
"installPath": "/some/path",
},
"openclaw-web-search": map[string]any{
"source": "npm",
"installPath": "/old/path",
},
},
},
"tools": map[string]any{
"alsoAllow": []any{"ollama_web_search", "ollama_web_fetch", "browser"},
"web": map[string]any{
"search": map[string]any{"enabled": false},
"fetch": map[string]any{"enabled": false},
},
},
"customField": "preserved",
@@ -2449,7 +2360,7 @@ func TestRegisterWebSearchPlugin(t *testing.T) {
t.Fatal(err)
}
registerWebSearchPlugin()
configureOllamaWebSearch()
out, err := os.ReadFile(configPath)
if err != nil {
@@ -2469,28 +2380,61 @@ func TestRegisterWebSearchPlugin(t *testing.T) {
if entries["some-other-plugin"] == nil {
t.Error("existing plugin entry was lost")
}
if entries["openclaw-web-search"] != nil {
t.Error("stale openclaw-web-search entry should be removed")
}
if ollamaEntry, _ := entries["ollama"].(map[string]any); ollamaEntry == nil {
t.Fatal("expected bundled ollama entry to be enabled")
}
installs, _ := plugins["installs"].(map[string]any)
if installs["some-other-plugin"] == nil {
t.Error("existing install record was lost")
}
if installs["openclaw-web-search"] != nil {
t.Error("stale openclaw-web-search install record should be removed")
}
allow, _ := plugins["allow"].([]any)
hasOther, hasWebSearch := false, false
hasOther, hasStalePlugin, hasOllama := false, false, false
for _, v := range allow {
s, _ := v.(string)
if s == "some-other-plugin" {
hasOther = true
}
if s == "openclaw-web-search" {
hasWebSearch = true
hasStalePlugin = true
}
if s == "ollama" {
hasOllama = true
}
}
if !hasOther {
t.Error("existing allow entry was lost")
}
if !hasWebSearch {
t.Error("openclaw-web-search not added to allow")
if hasStalePlugin {
t.Error("stale openclaw-web-search allow entry should be removed")
}
if !hasOllama {
t.Error("expected plugins.allow to contain bundled ollama plugin")
}
tools, _ := config["tools"].(map[string]any)
alsoAllow, _ := tools["alsoAllow"].([]any)
if len(alsoAllow) != 1 || alsoAllow[0] != "browser" {
t.Errorf("expected stale custom web tools to be removed, got %v", alsoAllow)
}
web, _ := tools["web"].(map[string]any)
search, _ := web["search"].(map[string]any)
fetch, _ := web["fetch"].(map[string]any)
if got, _ := search["provider"].(string); got != "ollama" {
t.Errorf("search provider = %q, want %q", got, "ollama")
}
if enabled, _ := search["enabled"].(bool); !enabled {
t.Error("expected migrated tools.web.search.enabled = true")
}
if enabled, _ := fetch["enabled"].(bool); !enabled {
t.Error("expected migrated tools.web.fetch.enabled = true")
}
})
}

View File

@@ -2,6 +2,10 @@
title: Structured Outputs
---
<Note>
Ollama's Cloud currently does not support structured outputs.
</Note>
Structured outputs let you enforce a JSON schema on model responses so you can reliably extract structured data, describe images, or keep every reply consistent.
## Generating structured JSON

View File

@@ -15,7 +15,7 @@ Ollama handles everything automatically:
1. **Install** — If OpenClaw isn't installed, Ollama prompts to install it via npm
2. **Security** — On the first launch, a security notice explains the risks of tool access
3. **Model** — Pick a model from the selector (local or cloud)
4. **Onboarding** — Ollama configures the provider, installs the gateway daemon, sets your model as the primary, and installs the web search and fetch plugin
4. **Onboarding** — Ollama configures the provider, installs the gateway daemon, sets your model as the primary, and enables OpenClaw's bundled Ollama web search
5. **Gateway** — Starts in the background and opens the OpenClaw TUI
<Note>OpenClaw requires a larger context window. It is recommended to use a context window of at least 64k tokens if using local models. See [Context length](/context-length) for more information.</Note>
@@ -24,19 +24,19 @@ Ollama handles everything automatically:
## Web search and fetch
OpenClaw ships with a web search and fetch plugin that gives local or cloud models the ability to search the web and extract readable page content.
OpenClaw ships with a bundled Ollama `web_search` provider that lets local or cloud-backed Ollama setups search the web through the configured Ollama host.
```bash
ollama launch openclaw
```
Web search and fetch is enabled automatically when launching OpenClaw through Ollama. To install the plugin directly:
Ollama web search is enabled automatically when launching OpenClaw through Ollama. To configure it manually:
```bash
openclaw plugins install @ollama/openclaw-web-search
openclaw configure --section web
```
<Note>Web search for local models requires `ollama signin`.</Note>
<Note>Ollama web search for local models requires `ollama signin`.</Note>
## Configure without launching
@@ -93,4 +93,3 @@ Link WhatsApp, Telegram, Slack, Discord, or iMessage to chat with your local mod
```bash
openclaw gateway stop
```

View File

@@ -226,7 +226,7 @@ func (c *Client) Completion(ctx context.Context, req llm.CompletionRequest, fn f
if resp.StatusCode != http.StatusOK {
respBody, _ := io.ReadAll(resp.Body)
return fmt.Errorf("%s", strings.TrimSpace(string(respBody)))
return api.StatusError{StatusCode: resp.StatusCode, ErrorMessage: strings.TrimSpace(string(respBody))}
}
scanner := bufio.NewScanner(resp.Body)

View File

@@ -10,6 +10,8 @@ import (
"reflect"
"sort"
"strings"
"sync"
"sync/atomic"
"unsafe"
"github.com/ollama/ollama/logutil"
@@ -18,20 +20,28 @@ import (
type Array struct {
ctx C.mlx_array
name string
pinned int
pinned atomic.Int32
}
var arrays []*Array
var (
arrays []*Array
arraysMu sync.Mutex
)
// constructor utilities
func New(name string) *Array {
t := &Array{name: name}
if tracing {
traceScratch = append(traceScratch, t)
} else {
arraysMu.Lock()
defer arraysMu.Unlock()
arrays = append(arrays, t)
}
return t
}
@@ -131,7 +141,7 @@ func (t *Array) Clone() *Array {
func Pin(s ...*Array) {
for _, t := range s {
if t != nil {
t.pinned++
t.pinned.Add(1)
}
}
}
@@ -140,8 +150,7 @@ func Pin(s ...*Array) {
func Unpin(s ...*Array) {
for _, t := range s {
if t != nil {
t.pinned--
if t.pinned < 0 {
if t.pinned.Add(-1) < 0 {
panic(fmt.Sprintf("mlx.Unpin: negative pin count on array %q", t.name))
}
}
@@ -151,9 +160,11 @@ func Unpin(s ...*Array) {
// Sweep releases all unpinned arrays, primarily intermediate tensors. MLX will truly
// free them when there are no other references, including dependencies in the graph.
func Sweep() {
arraysMu.Lock()
defer arraysMu.Unlock()
n := 0
for _, t := range arrays {
if t.pinned > 0 && t.Valid() {
if t.pinned.Load() > 0 && t.Valid() {
arrays[n] = t
n++
} else if t.Valid() {
@@ -180,7 +191,7 @@ func (t *Array) String() string {
func (t *Array) LogValue() slog.Value {
attrs := []slog.Attr{
slog.String("name", t.name),
slog.Int("pinned", t.pinned),
slog.Int("pinned", int(t.pinned.Load())),
}
if t.Valid() {
attrs = append(attrs,
@@ -194,19 +205,19 @@ func (t *Array) LogValue() slog.Value {
// shape utilities
func (t Array) Size() int {
func (t *Array) Size() int {
return int(C.mlx_array_size(t.ctx))
}
func (t Array) NumBytes() int {
func (t *Array) NumBytes() int {
return int(C.mlx_array_nbytes(t.ctx))
}
func (t Array) NumDims() int {
func (t *Array) NumDims() int {
return int(C.mlx_array_ndim(t.ctx))
}
func (t Array) Dims() []int {
func (t *Array) Dims() []int {
dims := make([]int, t.NumDims())
for i := range dims {
dims[i] = t.Dim(i)
@@ -215,29 +226,29 @@ func (t Array) Dims() []int {
return dims
}
func (t Array) Dim(dim int) int {
func (t *Array) Dim(dim int) int {
return int(C.mlx_array_dim(t.ctx, C.int(dim)))
}
func (t Array) DType() DType {
func (t *Array) DType() DType {
return DType(C.mlx_array_dtype(t.ctx))
}
// data utilities
func (t Array) Int() int {
func (t *Array) Int() int {
var item C.int64_t
C.mlx_array_item_int64(&item, t.ctx)
return int(item)
}
func (t Array) Float() float64 {
func (t *Array) Float() float64 {
var item C.double
C.mlx_array_item_float64(&item, t.ctx)
return float64(item)
}
func (t Array) Ints() []int {
func (t *Array) Ints() []int {
if dt := t.DType(); dt != DTypeInt32 {
panic(fmt.Sprintf("mlx: Ints requires DTypeInt32, got %v", dt))
}
@@ -248,7 +259,7 @@ func (t Array) Ints() []int {
return ints
}
func (t Array) Floats() []float32 {
func (t *Array) Floats() []float32 {
if dt := t.DType(); dt != DTypeFloat32 {
panic(fmt.Sprintf("mlx: Floats requires DTypeFloat32, got %v", dt))
}
@@ -259,7 +270,7 @@ func (t Array) Floats() []float32 {
return floats
}
func (t Array) Save(name string) error {
func (t *Array) Save(name string) error {
cName := C.CString(name)
defer C.free(unsafe.Pointer(cName))
C.mlx_save(cName, t.ctx)
@@ -268,6 +279,8 @@ func (t Array) Save(name string) error {
// LogArrays logs all live arrays, sorted by size
func LogArrays() {
arraysMu.Lock()
defer arraysMu.Unlock()
sort.Slice(arrays, func(i, j int) bool {
return arrays[i].NumBytes() > arrays[j].NumBytes()
})
@@ -276,7 +289,7 @@ func LogArrays() {
for _, t := range arrays {
nb := t.NumBytes()
total += nb
logutil.Trace(fmt.Sprintf("tensor %-60s %5s %5s pinned=%d %v", t.name, t.DType(), PrettyBytes(nb), t.pinned, t.Dims()))
logutil.Trace(fmt.Sprintf("tensor %-60s %5s %5s pinned=%d %v", t.name, t.DType(), PrettyBytes(nb), t.pinned.Load(), t.Dims()))
}
logutil.Trace(fmt.Sprintf("tensors total: %d, size: %s, active: %s", len(arrays), PrettyBytes(total), PrettyBytes(ActiveMemory())))
}

View File

@@ -150,7 +150,7 @@ func closureCallback(res *C.mlx_vector_array, input C.mlx_vector_array, payload
traceScratch = nil
defer func() {
for _, a := range traceScratch {
if a.pinned > 0 {
if a.pinned.Load() > 0 {
panic("mlx: traced array was pinned during compilation")
}
if a.Valid() {

View File

@@ -24,8 +24,8 @@ func ScaledDotProductAttention(query, key, value, mask *Array, scale float32) *A
}
type LayerNorm struct {
Weight Array `weight:"weight"`
Bias Array `weight:"bias"`
Weight *Array `weight:"weight"`
Bias *Array `weight:"bias"`
}
func (r *LayerNorm) Forward(x *Array, eps float32) *Array {
@@ -35,10 +35,10 @@ func (r *LayerNorm) Forward(x *Array, eps float32) *Array {
}
type RMSNorm struct {
Weight Array `weight:"weight"`
Weight *Array `weight:"weight"`
}
func (r RMSNorm) Forward(x *Array, eps float32) *Array {
func (r *RMSNorm) Forward(x *Array, eps float32) *Array {
out := New("FAST_RMSNORM")
C.mlx_fast_rms_norm(&out.ctx, x.ctx, r.Weight.ctx, C.float(eps), DefaultStream().ctx)
return out

View File

@@ -1,12 +1,12 @@
package mlx
type Linear struct {
Weight Array `weight:"weight"`
Bias Array `weight:"bias"`
Weight *Array `weight:"weight"`
Bias *Array `weight:"bias"`
}
// Forward computes the linear transformation: x @ Weight.T + Bias
func (m Linear) Forward(x *Array) *Array {
func (m *Linear) Forward(x *Array) *Array {
w := m.Weight.Transpose(1, 0)
if m.Bias.Valid() {
return m.Bias.Addmm(x, w, 1.0, 1.0)
@@ -15,14 +15,14 @@ func (m Linear) Forward(x *Array) *Array {
return x.Matmul(w)
}
func (m Linear) Gather(x, lhs, rhs *Array, sorted bool) *Array {
func (m *Linear) Gather(x, lhs, rhs *Array, sorted bool) *Array {
w := m.Weight.Transpose(0, 2, 1)
// TODO: bias
return x.GatherMM(w, lhs, rhs, sorted)
}
type Embedding struct {
Weight Array `weight:"weight"`
Weight *Array `weight:"weight"`
}
func (e *Embedding) Forward(indices *Array) *Array {

View File

@@ -6,11 +6,9 @@ import (
"errors"
"fmt"
"log/slog"
"net/http"
"sort"
"time"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/logutil"
"github.com/ollama/ollama/x/mlxrunner/mlx"
@@ -22,13 +20,37 @@ func prefillChunkSize() int {
return 2 << 10
}
func (r *Runner) TextGenerationPipeline(request Request) error {
// Prepare tokenizes the prompt and validates it against the model's
// context length. It is safe to call from any goroutine. On success it
// populates request.Tokens and adjusts request.Options.NumPredict.
func (r *Runner) Prepare(request *Request) error {
if r.Model == nil {
return errors.New("model not loaded")
}
tokens := r.Tokenizer.Encode(request.Prompt, r.Tokenizer.AddBOS())
if len(tokens) == 0 {
return errors.New("empty prompt")
}
if len(tokens) >= r.contextLength {
return fmt.Errorf("input length (%d tokens) exceeds the model's maximum context length (%d tokens)", len(tokens), r.contextLength)
}
// Cap generation to stay within the model's context length
maxGenerate := r.contextLength - len(tokens)
if request.Options.NumPredict <= 0 {
request.Options.NumPredict = maxGenerate
} else {
request.Options.NumPredict = min(request.Options.NumPredict, maxGenerate)
}
request.Tokens = tokens
return nil
}
func (r *Runner) TextGenerationPipeline(ctx context.Context, request Request) error {
mlx.ResetPeakMemory()
ctx := request.Ctx
var sample, nextSample sampler.Result
defer func() {
@@ -47,26 +69,7 @@ func (r *Runner) TextGenerationPipeline(request Request) error {
slog.Info("peak memory", "size", mlx.PrettyBytes(mlx.PeakMemory()))
}()
inputs := r.Tokenizer.Encode(request.Prompt, r.Tokenizer.AddBOS())
if len(inputs) == 0 {
return errors.New("empty prompt")
}
if len(inputs) >= r.contextLength {
return api.StatusError{
StatusCode: http.StatusBadRequest,
ErrorMessage: fmt.Sprintf("input length (%d tokens) exceeds the model's maximum context length (%d tokens)", len(inputs), r.contextLength),
}
}
// Cap generation to stay within the model's context length
maxGenerate := r.contextLength - len(inputs)
if request.Options.NumPredict <= 0 {
request.Options.NumPredict = maxGenerate
} else {
request.Options.NumPredict = min(request.Options.NumPredict, maxGenerate)
}
inputs := request.Tokens
request.Sampler.ResetHistory(inputs)
session := r.cache.begin(r.Model, inputs)

View File

@@ -18,13 +18,17 @@ import (
"github.com/ollama/ollama/x/tokenizer"
)
// Request is a short-lived struct that carries a completion request through
// a channel from the HTTP handler to the runner goroutine. The ctx field
// must travel with the request so that cancellation propagates across the
// channel boundary.
type Request struct {
CompletionRequest
Responses chan CompletionResponse
Pipeline func(Request) error
Ctx context.Context
Pipeline func(context.Context, Request) error
Ctx context.Context //nolint:containedctx
Tokens []int32
Sampler *sample.Sampler
}
@@ -131,7 +135,7 @@ func (r *Runner) Run(host, port string, mux http.Handler) error {
case <-ctx.Done():
return nil
case request := <-r.Requests:
if err := request.Pipeline(request); err != nil {
if err := request.Pipeline(request.Ctx, request); err != nil {
slog.Info("Request terminated", "error", err)
var statusErr api.StatusError
if !errors.As(err, &statusErr) {

View File

@@ -106,6 +106,11 @@ func Execute(args []string) error {
TopLogprobs: request.TopLogprobs,
})
if err := runner.Prepare(&request); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
var cancel context.CancelFunc
request.Ctx, cancel = context.WithCancel(r.Context())
defer cancel()