mirror of
https://github.com/ollama/ollama.git
synced 2026-04-26 02:36:09 +02:00
Compare commits
15 Commits
v0.21.1-rc
...
v0.21.3-rc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ea01af6f76 | ||
|
|
c2ebb4d57c | ||
|
|
590109c835 | ||
|
|
b4442c6d17 | ||
|
|
85ff8e4a21 | ||
|
|
160660e572 | ||
|
|
3b43b9bc4b | ||
|
|
21883571b7 | ||
|
|
ce99f24731 | ||
|
|
04f5f0cdb4 | ||
|
|
fb36a01ffe | ||
|
|
0c65ed33bc | ||
|
|
22d6c817f8 | ||
|
|
ca01373b28 | ||
|
|
24e038d56a |
14
api/types.go
14
api/types.go
@@ -1080,7 +1080,7 @@ func DefaultOptions() Options {
|
||||
}
|
||||
}
|
||||
|
||||
// ThinkValue represents a value that can be a boolean or a string ("high", "medium", "low")
|
||||
// ThinkValue represents a value that can be a boolean or a string ("high", "medium", "low", "max")
|
||||
type ThinkValue struct {
|
||||
// Value can be a bool or string
|
||||
Value interface{}
|
||||
@@ -1096,7 +1096,7 @@ func (t *ThinkValue) IsValid() bool {
|
||||
case bool:
|
||||
return true
|
||||
case string:
|
||||
return v == "high" || v == "medium" || v == "low"
|
||||
return v == "high" || v == "medium" || v == "low" || v == "max"
|
||||
default:
|
||||
return false
|
||||
}
|
||||
@@ -1130,8 +1130,8 @@ func (t *ThinkValue) Bool() bool {
|
||||
case bool:
|
||||
return v
|
||||
case string:
|
||||
// Any string value ("high", "medium", "low") means thinking is enabled
|
||||
return v == "high" || v == "medium" || v == "low"
|
||||
// Any string value ("high", "medium", "low", "max") means thinking is enabled
|
||||
return v == "high" || v == "medium" || v == "low" || v == "max"
|
||||
default:
|
||||
return false
|
||||
}
|
||||
@@ -1169,14 +1169,14 @@ func (t *ThinkValue) UnmarshalJSON(data []byte) error {
|
||||
var s string
|
||||
if err := json.Unmarshal(data, &s); err == nil {
|
||||
// Validate string values
|
||||
if s != "high" && s != "medium" && s != "low" {
|
||||
return fmt.Errorf("invalid think value: %q (must be \"high\", \"medium\", \"low\", true, or false)", s)
|
||||
if s != "high" && s != "medium" && s != "low" && s != "max" {
|
||||
return fmt.Errorf("invalid think value: %q (must be \"high\", \"medium\", \"low\", \"max\", true, or false)", s)
|
||||
}
|
||||
t.Value = s
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\", true, or false)")
|
||||
return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\", \"max\", true, or false)")
|
||||
}
|
||||
|
||||
// MarshalJSON implements json.Marshaler
|
||||
|
||||
@@ -495,6 +495,11 @@ func TestThinking_UnmarshalJSON(t *testing.T) {
|
||||
input: `{ "think": "low" }`,
|
||||
expectedThinking: &ThinkValue{Value: "low"},
|
||||
},
|
||||
{
|
||||
name: "string_max",
|
||||
input: `{ "think": "max" }`,
|
||||
expectedThinking: &ThinkValue{Value: "max"},
|
||||
},
|
||||
{
|
||||
name: "invalid_string",
|
||||
input: `{ "think": "invalid" }`,
|
||||
|
||||
@@ -381,7 +381,7 @@ export const useSendMessage = (chatId: string) => {
|
||||
role: "assistant",
|
||||
content: "",
|
||||
thinking: "",
|
||||
model: effectiveModel,
|
||||
model: effectiveModel.model,
|
||||
}),
|
||||
);
|
||||
lastMessage = newMessages[newMessages.length - 1];
|
||||
@@ -433,7 +433,7 @@ export const useSendMessage = (chatId: string) => {
|
||||
role: "assistant",
|
||||
content: "",
|
||||
thinking: "",
|
||||
model: effectiveModel,
|
||||
model: effectiveModel.model,
|
||||
}),
|
||||
);
|
||||
lastMessage = newMessages[newMessages.length - 1];
|
||||
@@ -520,7 +520,7 @@ export const useSendMessage = (chatId: string) => {
|
||||
thinkingTimeStart:
|
||||
lastMessage.thinkingTimeStart || event.thinkingTimeStart,
|
||||
thinkingTimeEnd: event.thinkingTimeEnd,
|
||||
model: selectedModel,
|
||||
model: selectedModel.model,
|
||||
});
|
||||
newMessages[newMessages.length - 1] = updatedMessage;
|
||||
} else {
|
||||
@@ -533,7 +533,7 @@ export const useSendMessage = (chatId: string) => {
|
||||
tool_calls: event.toolCalls,
|
||||
thinkingTimeStart: event.thinkingTimeStart,
|
||||
thinkingTimeEnd: event.thinkingTimeEnd,
|
||||
model: selectedModel,
|
||||
model: selectedModel.model,
|
||||
}),
|
||||
);
|
||||
}
|
||||
@@ -699,7 +699,7 @@ export const useSendMessage = (chatId: string) => {
|
||||
queryClient.setQueryData(["chat", newId], {
|
||||
chat: new Chat({
|
||||
id: newId,
|
||||
model: effectiveModel,
|
||||
model: effectiveModel.model,
|
||||
messages: [
|
||||
new Message({
|
||||
role: "user",
|
||||
|
||||
61
cmd/cmd.go
61
cmd/cmd.go
@@ -582,10 +582,10 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
||||
opts.Think = &api.ThinkValue{Value: true}
|
||||
case "false":
|
||||
opts.Think = &api.ThinkValue{Value: false}
|
||||
case "high", "medium", "low":
|
||||
case "high", "medium", "low", "max":
|
||||
opts.Think = &api.ThinkValue{Value: thinkStr}
|
||||
default:
|
||||
return fmt.Errorf("invalid value for --think: %q (must be true, false, high, medium, or low)", thinkStr)
|
||||
return fmt.Errorf("invalid value for --think: %q (must be true, false, high, medium, low, or max)", thinkStr)
|
||||
}
|
||||
} else {
|
||||
opts.Think = nil
|
||||
@@ -1975,8 +1975,61 @@ func launchInteractiveModel(cmd *cobra.Command, modelName string) error {
|
||||
Options: map[string]any{},
|
||||
ShowConnect: true,
|
||||
}
|
||||
// loadOrUnloadModel is cloud-safe here: remote/cloud models skip local preload
|
||||
// and only validate auth/connectivity before interactive chat starts.
|
||||
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
requestedCloud := modelref.HasExplicitCloudSource(modelName)
|
||||
|
||||
info, err := func() (*api.ShowResponse, error) {
|
||||
showReq := &api.ShowRequest{Name: modelName}
|
||||
info, err := client.Show(cmd.Context(), showReq)
|
||||
var se api.StatusError
|
||||
if errors.As(err, &se) && se.StatusCode == http.StatusNotFound {
|
||||
if requestedCloud {
|
||||
return nil, err
|
||||
}
|
||||
if err := PullHandler(cmd, []string{modelName}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return client.Show(cmd.Context(), &api.ShowRequest{Name: modelName})
|
||||
}
|
||||
return info, err
|
||||
}()
|
||||
if err != nil {
|
||||
if handleCloudAuthorizationError(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
ensureCloudStub(cmd.Context(), client, modelName)
|
||||
|
||||
opts.Think, err = inferThinkingOption(&info.Capabilities, &opts, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
audioCapable := slices.Contains(info.Capabilities, model.CapabilityAudio)
|
||||
opts.MultiModal = slices.Contains(info.Capabilities, model.CapabilityVision) || audioCapable
|
||||
|
||||
// TODO: remove the projector info and vision info checks below,
|
||||
// these are left in for backwards compatibility with older servers
|
||||
// that don't have the capabilities field in the model info
|
||||
if len(info.ProjectorInfo) != 0 {
|
||||
opts.MultiModal = true
|
||||
}
|
||||
for k := range info.ModelInfo {
|
||||
if strings.Contains(k, ".vision.") {
|
||||
opts.MultiModal = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
applyShowResponseToRunOptions(&opts, info)
|
||||
|
||||
if err := loadOrUnloadModel(cmd, &opts); err != nil {
|
||||
return fmt.Errorf("error loading model: %w", err)
|
||||
}
|
||||
|
||||
@@ -301,7 +301,7 @@ func TestParseArgs(t *testing.T) {
|
||||
func TestIsCloudModel(t *testing.T) {
|
||||
// isCloudModel now only uses Show API, so nil client always returns false
|
||||
t.Run("nil client returns false", func(t *testing.T) {
|
||||
models := []string{"glm-5.1:cloud", "kimi-k2.5:cloud", "local-model"}
|
||||
models := []string{"glm-5.1:cloud", "kimi-k2.6:cloud", "local-model"}
|
||||
for _, model := range models {
|
||||
if isCloudModel(context.Background(), nil, model) {
|
||||
t.Errorf("isCloudModel(%q) with nil client should return false", model)
|
||||
@@ -318,10 +318,18 @@ func names(items []ModelItem) []string {
|
||||
return out
|
||||
}
|
||||
|
||||
func recommendedNames(extra ...string) []string {
|
||||
out := make([]string, 0, len(recommendedModels)+len(extra))
|
||||
for _, item := range recommendedModels {
|
||||
out = append(out, item.Name)
|
||||
}
|
||||
return append(out, extra...)
|
||||
}
|
||||
|
||||
func TestBuildModelList_NoExistingModels(t *testing.T) {
|
||||
items, _, _, _ := buildModelList(nil, nil, "")
|
||||
|
||||
want := []string{"kimi-k2.5:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5"}
|
||||
want := recommendedNames()
|
||||
if diff := cmp.Diff(want, names(items)); diff != "" {
|
||||
t.Errorf("with no existing models, items should be recommended in order (-want +got):\n%s", diff)
|
||||
}
|
||||
@@ -350,7 +358,7 @@ func TestBuildModelList_OnlyLocalModels_CloudRecsStillFirst(t *testing.T) {
|
||||
|
||||
// Cloud recs always come first among recommended, regardless of installed inventory.
|
||||
// Cloud disablement is handled upstream in loadSelectableModels via filterCloudItems.
|
||||
want := []string{"kimi-k2.5:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5", "llama3.2", "qwen2.5"}
|
||||
want := recommendedNames("llama3.2", "qwen2.5")
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Errorf("cloud recs pinned first even when no cloud models installed (-want +got):\n%s", diff)
|
||||
}
|
||||
@@ -366,13 +374,13 @@ func TestBuildModelList_BothCloudAndLocal_RegularSort(t *testing.T) {
|
||||
got := names(items)
|
||||
|
||||
// All recs pinned at top (cloud before local in mixed case), then non-recs
|
||||
want := []string{"kimi-k2.5:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5", "llama3.2"}
|
||||
want := recommendedNames("llama3.2")
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Errorf("recs pinned at top, cloud recs first in mixed case (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildModelList_PreCheckedFirst(t *testing.T) {
|
||||
func TestBuildModelList_PreCheckedNonRecommendedFirstInMore(t *testing.T) {
|
||||
existing := []modelInfo{
|
||||
{Name: "llama3.2:latest", Remote: false},
|
||||
{Name: "glm-5.1:cloud", Remote: true},
|
||||
@@ -381,8 +389,9 @@ func TestBuildModelList_PreCheckedFirst(t *testing.T) {
|
||||
items, _, _, _ := buildModelList(existing, []string{"llama3.2"}, "")
|
||||
got := names(items)
|
||||
|
||||
if got[0] != "llama3.2" {
|
||||
t.Errorf("pre-checked model should be first, got %v", got)
|
||||
want := recommendedNames("llama3.2")
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Errorf("recommended block should stay fixed while checked non-recommended models lead More (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -437,7 +446,7 @@ func TestBuildModelList_ExistingRecommendedMarked(t *testing.T) {
|
||||
if !strings.HasSuffix(item.Description, "(not downloaded)") {
|
||||
t.Errorf("non-installed recommended %q should have '(not downloaded)' suffix, got %q", item.Name, item.Description)
|
||||
}
|
||||
case "minimax-m2.7:cloud", "kimi-k2.5:cloud", "qwen3.5:cloud":
|
||||
case "minimax-m2.7:cloud", "kimi-k2.6:cloud", "qwen3.5:cloud":
|
||||
if strings.HasSuffix(item.Description, "(not downloaded)") {
|
||||
t.Errorf("cloud model %q should not have '(not downloaded)' suffix, got %q", item.Name, item.Description)
|
||||
}
|
||||
@@ -455,9 +464,9 @@ func TestBuildModelList_ExistingCloudModelsNotPushedToBottom(t *testing.T) {
|
||||
got := names(items)
|
||||
|
||||
// gemma4 and glm-5.1:cloud are installed so they sort normally;
|
||||
// kimi-k2.5:cloud, qwen3.5:cloud, and qwen3.5 are not installed so they go to the bottom
|
||||
// qwen3.5:cloud and qwen3.5 are not installed so they go to the bottom
|
||||
// All recs: cloud first in mixed case, then local, in rec order within each
|
||||
want := []string{"kimi-k2.5:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5"}
|
||||
want := recommendedNames()
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Errorf("all recs, cloud first in mixed case (-want +got):\n%s", diff)
|
||||
}
|
||||
@@ -466,23 +475,23 @@ func TestBuildModelList_ExistingCloudModelsNotPushedToBottom(t *testing.T) {
|
||||
func TestBuildModelList_HasRecommendedCloudModel_OnlyNonInstalledAtBottom(t *testing.T) {
|
||||
existing := []modelInfo{
|
||||
{Name: "llama3.2:latest", Remote: false},
|
||||
{Name: "kimi-k2.5:cloud", Remote: true},
|
||||
{Name: "kimi-k2.6:cloud", Remote: true},
|
||||
}
|
||||
|
||||
items, _, _, _ := buildModelList(existing, nil, "")
|
||||
got := names(items)
|
||||
|
||||
// kimi-k2.5:cloud is installed so it sorts normally;
|
||||
// kimi-k2.6:cloud is installed so it sorts normally;
|
||||
// the rest of the recommendations are not installed so they go to the bottom
|
||||
// All recs pinned at top (cloud first in mixed case), then non-recs
|
||||
want := []string{"kimi-k2.5:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5", "llama3.2"}
|
||||
want := recommendedNames("llama3.2")
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Errorf("recs pinned at top, cloud first in mixed case (-want +got):\n%s", diff)
|
||||
}
|
||||
|
||||
for _, item := range items {
|
||||
isCloud := strings.HasSuffix(item.Name, ":cloud")
|
||||
isInstalled := slices.Contains([]string{"kimi-k2.5:cloud", "llama3.2"}, item.Name)
|
||||
isInstalled := slices.Contains([]string{"kimi-k2.6:cloud", "llama3.2"}, item.Name)
|
||||
if isInstalled || isCloud {
|
||||
if strings.HasSuffix(item.Description, "(not downloaded)") {
|
||||
t.Errorf("installed or cloud model %q should not have '(not downloaded)' suffix, got %q", item.Name, item.Description)
|
||||
@@ -549,8 +558,8 @@ func TestBuildModelList_ReturnsExistingAndCloudMaps(t *testing.T) {
|
||||
if !cloudModels["glm-5.1:cloud"] {
|
||||
t.Error("glm-5.1:cloud should be in cloudModels")
|
||||
}
|
||||
if !cloudModels["kimi-k2.5:cloud"] {
|
||||
t.Error("kimi-k2.5:cloud should be in cloudModels (recommended cloud)")
|
||||
if !cloudModels["kimi-k2.6:cloud"] {
|
||||
t.Error("kimi-k2.6:cloud should be in cloudModels (recommended cloud)")
|
||||
}
|
||||
if !cloudModels["qwen3.5:cloud"] {
|
||||
t.Error("qwen3.5:cloud should be in cloudModels (recommended cloud)")
|
||||
@@ -570,7 +579,7 @@ func TestBuildModelList_RecommendedFieldSet(t *testing.T) {
|
||||
|
||||
for _, item := range items {
|
||||
switch item.Name {
|
||||
case "gemma4", "qwen3.5", "glm-5.1:cloud", "kimi-k2.5:cloud", "qwen3.5:cloud":
|
||||
case "gemma4", "qwen3.5", "glm-5.1:cloud", "kimi-k2.6:cloud", "qwen3.5:cloud":
|
||||
if !item.Recommended {
|
||||
t.Errorf("%q should have Recommended=true", item.Name)
|
||||
}
|
||||
@@ -628,7 +637,7 @@ func TestBuildModelList_RecsAboveNonRecs(t *testing.T) {
|
||||
lastRecIdx := -1
|
||||
firstNonRecIdx := len(got)
|
||||
for i, name := range got {
|
||||
isRec := name == "gemma4" || name == "qwen3.5" || name == "minimax-m2.7:cloud" || name == "glm-5.1:cloud" || name == "kimi-k2.5:cloud" || name == "qwen3.5:cloud"
|
||||
isRec := name == "gemma4" || name == "qwen3.5" || name == "minimax-m2.7:cloud" || name == "glm-5.1:cloud" || name == "kimi-k2.6:cloud" || name == "qwen3.5:cloud"
|
||||
if isRec && i > lastRecIdx {
|
||||
lastRecIdx = i
|
||||
}
|
||||
@@ -641,17 +650,32 @@ func TestBuildModelList_RecsAboveNonRecs(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildModelList_CheckedBeforeRecs(t *testing.T) {
|
||||
func TestBuildModelList_CheckedRecommendedDoesNotReshuffleRecommendedOrder(t *testing.T) {
|
||||
existing := []modelInfo{
|
||||
{Name: "llama3.2:latest", Remote: false},
|
||||
{Name: "glm-5.1:cloud", Remote: true},
|
||||
}
|
||||
|
||||
items, _, _, _ := buildModelList(existing, []string{"llama3.2"}, "")
|
||||
items, _, _, _ := buildModelList(existing, []string{"qwen3.5:cloud", "glm-5.1:cloud"}, "")
|
||||
got := names(items)
|
||||
|
||||
if got[0] != "llama3.2" {
|
||||
t.Errorf("checked model should be first even before recs, got %v", got)
|
||||
want := recommendedNames("llama3.2")
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Errorf("checked recommended models should not reshuffle the fixed recommended order (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildModelList_StaleSavedKimiK25DoesNotReshuffleRecommendedOrder(t *testing.T) {
|
||||
existing := []modelInfo{
|
||||
{Name: "kimi-k2.5:cloud", Remote: true},
|
||||
}
|
||||
|
||||
items, _, _, _ := buildModelList(existing, []string{"kimi-k2.5:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud"}, "kimi-k2.5:cloud")
|
||||
got := names(items)
|
||||
|
||||
want := recommendedNames("kimi-k2.5:cloud")
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Errorf("stale saved kimi-k2.5 should stay in More without reshuffling the fixed recommended order (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -588,7 +588,7 @@ func (c *launcherClient) launchManagedSingleIntegration(ctx context.Context, nam
|
||||
return nil
|
||||
}
|
||||
|
||||
if (current == "" || needsConfigure || req.ModelOverride != "" || target != current) && !savedMatchesModels(saved, []string{target}) {
|
||||
if needsConfigure || req.ModelOverride != "" || (current != "" && target != current) || !savedMatchesModels(saved, []string{target}) {
|
||||
if err := prepareManagedSingleIntegration(name, runner, managed, target); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/ollama/ollama/cmd/config"
|
||||
)
|
||||
|
||||
@@ -511,6 +512,65 @@ func TestLaunchIntegration_ManagedSingleIntegrationRewritesWhenSavedDiffers(t *t
|
||||
}
|
||||
}
|
||||
|
||||
func TestLaunchIntegration_ManagedSingleIntegrationRewritesWhenLiveConfigDrifts(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
setLaunchTestHome(t, tmpDir)
|
||||
withInteractiveSession(t, true)
|
||||
withLauncherHooks(t)
|
||||
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/api/tags":
|
||||
fmt.Fprint(w, `{"models":[{"name":"gemma4"},{"name":"qwen3:8b"}]}`)
|
||||
case "/api/show":
|
||||
fmt.Fprint(w, `{"model_info":{"general.context_length":131072}}`)
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer srv.Close()
|
||||
t.Setenv("OLLAMA_HOST", srv.URL)
|
||||
|
||||
if err := config.SaveIntegration("stubmanaged", []string{"gemma4"}); err != nil {
|
||||
t.Fatalf("failed to save managed integration config: %v", err)
|
||||
}
|
||||
|
||||
runner := &launcherManagedRunner{
|
||||
currentModel: "qwen3:8b",
|
||||
}
|
||||
withIntegrationOverride(t, "stubmanaged", runner)
|
||||
|
||||
DefaultSingleSelector = func(title string, items []ModelItem, current string) (string, error) {
|
||||
t.Fatal("selector should not be called when live config already provides the target")
|
||||
return "", nil
|
||||
}
|
||||
DefaultConfirmPrompt = func(prompt string, options ConfirmOptions) (bool, error) {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if err := LaunchIntegration(context.Background(), IntegrationLaunchRequest{Name: "stubmanaged"}); err != nil {
|
||||
t.Fatalf("LaunchIntegration returned error: %v", err)
|
||||
}
|
||||
|
||||
if diff := compareStrings(runner.configured, []string{"qwen3:8b"}); diff != "" {
|
||||
t.Fatalf("expected Configure to reconcile stale saved config to live target: %s", diff)
|
||||
}
|
||||
if runner.refreshCalls != 1 {
|
||||
t.Fatalf("expected runtime refresh once after drift reconciliation, got %d", runner.refreshCalls)
|
||||
}
|
||||
if runner.ranModel != "qwen3:8b" {
|
||||
t.Fatalf("expected launch to run live configured model, got %q", runner.ranModel)
|
||||
}
|
||||
|
||||
saved, err := config.LoadIntegration("stubmanaged")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to reload managed integration config: %v", err)
|
||||
}
|
||||
if diff := compareStrings(saved.Models, []string{"qwen3:8b"}); diff != "" {
|
||||
t.Fatalf("saved models mismatch after drift reconciliation: %s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLaunchIntegration_ManagedSingleIntegrationStopsWhenRuntimeRefreshFails(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
setLaunchTestHome(t, tmpDir)
|
||||
@@ -1219,8 +1279,9 @@ func TestLaunchIntegration_EditorForceConfigure_FloatsCheckedModelsInPicker(t *t
|
||||
if len(gotItems) == 0 {
|
||||
t.Fatal("expected multi selector to receive items")
|
||||
}
|
||||
if gotItems[0] != "qwen3.5:cloud" {
|
||||
t.Fatalf("expected checked models floated to top with qwen3.5:cloud first, got %v", gotItems)
|
||||
wantItems := recommendedNames()
|
||||
if diff := cmp.Diff(wantItems, gotItems); diff != "" {
|
||||
t.Fatalf("expected fixed recommended order in selector items (-want +got):\n%s", diff)
|
||||
}
|
||||
if len(gotPreChecked) < 2 {
|
||||
t.Fatalf("expected prechecked models to be preserved, got %v", gotPreChecked)
|
||||
|
||||
@@ -21,7 +21,7 @@ import (
|
||||
)
|
||||
|
||||
var recommendedModels = []ModelItem{
|
||||
{Name: "kimi-k2.5:cloud", Description: "Multimodal reasoning with subagents", Recommended: true},
|
||||
{Name: "kimi-k2.6:cloud", Description: "State-of-the-art coding, long-horizon execution, and multimodal agent swarm capability", Recommended: true},
|
||||
{Name: "qwen3.5:cloud", Description: "Reasoning, coding, and agentic tool use with vision", Recommended: true},
|
||||
{Name: "glm-5.1:cloud", Description: "Reasoning and code generation", Recommended: true},
|
||||
{Name: "minimax-m2.7:cloud", Description: "Fast, efficient coding and real-world productivity", Recommended: true},
|
||||
@@ -56,6 +56,7 @@ var cloudModelLimits = map[string]cloudModelLimit{
|
||||
"gpt-oss:20b": {Context: 131_072, Output: 131_072},
|
||||
"kimi-k2:1t": {Context: 262_144, Output: 262_144},
|
||||
"kimi-k2.5": {Context: 262_144, Output: 262_144},
|
||||
"kimi-k2.6": {Context: 262_144, Output: 262_144},
|
||||
"kimi-k2-thinking": {Context: 262_144, Output: 262_144},
|
||||
"nemotron-3-nano:30b": {Context: 1_048_576, Output: 131_072},
|
||||
"qwen3-coder:480b": {Context: 262_144, Output: 65_536},
|
||||
@@ -360,18 +361,12 @@ func buildModelList(existing []modelInfo, preChecked []string, current string) (
|
||||
}
|
||||
|
||||
if hasLocalModel || hasCloudModel {
|
||||
// Keep the Recommended section pinned to recommendedModels order. Checked
|
||||
// and default-model priority only apply within the More section.
|
||||
slices.SortStableFunc(items, func(a, b ModelItem) int {
|
||||
ac, bc := checked[a.Name], checked[b.Name]
|
||||
aNew, bNew := notInstalled[a.Name], notInstalled[b.Name]
|
||||
aRec, bRec := recRank[a.Name] > 0, recRank[b.Name] > 0
|
||||
aCloud, bCloud := cloudModels[a.Name], cloudModels[b.Name]
|
||||
|
||||
if ac != bc {
|
||||
if ac {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
}
|
||||
if aRec != bRec {
|
||||
if aRec {
|
||||
return -1
|
||||
@@ -379,14 +374,14 @@ func buildModelList(existing []modelInfo, preChecked []string, current string) (
|
||||
return 1
|
||||
}
|
||||
if aRec && bRec {
|
||||
if aCloud != bCloud {
|
||||
if aCloud {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
}
|
||||
return recRank[a.Name] - recRank[b.Name]
|
||||
}
|
||||
if ac != bc {
|
||||
if ac {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
}
|
||||
// Among checked non-recommended items - put the default first
|
||||
if ac && !aRec && current != "" {
|
||||
aCurrent := a.Name == current
|
||||
|
||||
@@ -14,8 +14,6 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/mod/semver"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/cmd/internal/fileutil"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
@@ -30,6 +28,8 @@ var openclawModelShowTimeout = 5 * time.Second
|
||||
// openclawFreshInstall is set to true when ensureOpenclawInstalled performs an install
|
||||
var openclawFreshInstall bool
|
||||
|
||||
var openclawCanInstallDaemon = canInstallDaemon
|
||||
|
||||
type Openclaw struct{}
|
||||
|
||||
func (c *Openclaw) String() string { return "OpenClaw" }
|
||||
@@ -60,6 +60,7 @@ func (c *Openclaw) Run(model string, args []string) error {
|
||||
// the newest wizard flags (e.g. --auth-choice ollama).
|
||||
if !openclawFreshInstall {
|
||||
update := exec.Command(bin, "update")
|
||||
update.Env = openclawInstallEnv()
|
||||
update.Stdout = os.Stdout
|
||||
update.Stderr = os.Stderr
|
||||
_ = update.Run() // best-effort; continue even if update fails
|
||||
@@ -75,19 +76,18 @@ func (c *Openclaw) Run(model string, args []string) error {
|
||||
"--auth-choice", "ollama",
|
||||
"--custom-base-url", envconfig.Host().String(),
|
||||
"--custom-model-id", model,
|
||||
// Launch owns the first real gateway startup immediately after onboarding,
|
||||
// so don't let OpenClaw fail the whole first-run flow on a transient
|
||||
// daemon health probe.
|
||||
"--skip-health",
|
||||
"--skip-channels",
|
||||
"--skip-skills",
|
||||
}
|
||||
if canInstallDaemon() {
|
||||
if openclawCanInstallDaemon() {
|
||||
onboardArgs = append(onboardArgs, "--install-daemon")
|
||||
} else {
|
||||
// When we can't install a daemon (e.g. no systemd, sudo dropped
|
||||
// XDG_RUNTIME_DIR, or container environment), skip the gateway
|
||||
// health check so non-interactive onboarding completes. The
|
||||
// gateway is started as a foreground child process after onboarding.
|
||||
onboardArgs = append(onboardArgs, "--skip-health")
|
||||
}
|
||||
cmd := exec.Command(bin, onboardArgs...)
|
||||
cmd.Env = openclawInstallEnv()
|
||||
cmd.Stdin = os.Stdin
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
@@ -98,13 +98,23 @@ func (c *Openclaw) Run(model string, args []string) error {
|
||||
patchDeviceScopes()
|
||||
}
|
||||
|
||||
if ensureWebSearchPlugin() {
|
||||
registerWebSearchPlugin()
|
||||
}
|
||||
configureOllamaWebSearch()
|
||||
|
||||
// When extra args are passed through, run exactly what the user asked for
|
||||
// after setup and skip the built-in gateway+TUI convenience flow.
|
||||
if len(args) > 0 {
|
||||
cleanup := func() {}
|
||||
if shouldEnsureGatewayForArgs(args) {
|
||||
cleanupFn, _, _, err := c.ensureGatewayReady(bin)
|
||||
if err != nil {
|
||||
return windowsHint(err)
|
||||
}
|
||||
if cleanupFn != nil {
|
||||
cleanup = cleanupFn
|
||||
}
|
||||
}
|
||||
defer cleanup()
|
||||
|
||||
cmd := exec.Command(bin, args...)
|
||||
cmd.Env = openclawEnv()
|
||||
cmd.Stdin = os.Stdin
|
||||
@@ -125,41 +135,11 @@ func (c *Openclaw) Run(model string, args []string) error {
|
||||
|
||||
fmt.Fprintf(os.Stderr, "\n%sStarting your assistant — this may take a moment...%s\n\n", ansiGray, ansiReset)
|
||||
|
||||
token, port := c.gatewayInfo()
|
||||
addr := fmt.Sprintf("localhost:%d", port)
|
||||
|
||||
// If the gateway is already running (e.g. via the daemon), restart it
|
||||
// so it picks up any config changes (model, provider, etc.).
|
||||
if portOpen(addr) {
|
||||
restart := exec.Command(bin, "daemon", "restart")
|
||||
restart.Env = openclawEnv()
|
||||
if err := restart.Run(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s Warning: daemon restart failed: %v%s\n", ansiYellow, err, ansiReset)
|
||||
}
|
||||
if !waitForPort(addr, 10*time.Second) {
|
||||
fmt.Fprintf(os.Stderr, "%s Warning: gateway did not come back after restart%s\n", ansiYellow, ansiReset)
|
||||
}
|
||||
}
|
||||
|
||||
// If the gateway isn't running, start it as a background child process.
|
||||
if !portOpen(addr) {
|
||||
gw := exec.Command(bin, "gateway", "run", "--force")
|
||||
gw.Env = openclawEnv()
|
||||
if err := gw.Start(); err != nil {
|
||||
return windowsHint(fmt.Errorf("failed to start gateway: %w", err))
|
||||
}
|
||||
defer func() {
|
||||
if gw.Process != nil {
|
||||
_ = gw.Process.Kill()
|
||||
_ = gw.Wait()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
fmt.Fprintf(os.Stderr, "%sStarting gateway...%s\n", ansiGray, ansiReset)
|
||||
if !waitForPort(addr, 30*time.Second) {
|
||||
return windowsHint(fmt.Errorf("gateway did not start on %s", addr))
|
||||
cleanup, token, port, err := c.ensureGatewayReady(bin)
|
||||
if err != nil {
|
||||
return windowsHint(err)
|
||||
}
|
||||
defer cleanup()
|
||||
|
||||
printOpenclawReady(bin, token, port, firstLaunch)
|
||||
|
||||
@@ -179,6 +159,66 @@ func (c *Openclaw) Run(model string, args []string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func shouldEnsureGatewayForArgs(args []string) bool {
|
||||
return len(args) > 0 && args[0] == "tui"
|
||||
}
|
||||
|
||||
func (c *Openclaw) ensureGatewayReady(bin string) (func(), string, int, error) {
|
||||
token, port := c.gatewayInfo()
|
||||
addr := fmt.Sprintf("localhost:%d", port)
|
||||
|
||||
// If the gateway is already running (e.g. via the daemon), restart it
|
||||
// so it picks up any config changes (model, provider, etc.).
|
||||
if portOpen(addr) {
|
||||
restart := exec.Command(bin, "daemon", "restart")
|
||||
restart.Env = openclawEnv()
|
||||
if err := restart.Run(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s Warning: daemon restart failed: %v%s\n", ansiYellow, err, ansiReset)
|
||||
}
|
||||
if !waitForPort(addr, 10*time.Second) {
|
||||
fmt.Fprintf(os.Stderr, "%s Warning: gateway did not come back after restart%s\n", ansiYellow, ansiReset)
|
||||
}
|
||||
}
|
||||
|
||||
// If the daemon is installed but not currently listening, try to bring it
|
||||
// up before falling back to a foreground child process.
|
||||
if openclawCanInstallDaemon() && !portOpen(addr) {
|
||||
start := exec.Command(bin, "daemon", "start")
|
||||
start.Env = openclawEnv()
|
||||
if err := start.Run(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s Warning: daemon start failed: %v%s\n", ansiYellow, err, ansiReset)
|
||||
} else if waitForPort(addr, 10*time.Second) {
|
||||
fmt.Fprintf(os.Stderr, "%sStarting gateway...%s\n", ansiGray, ansiReset)
|
||||
return func() {}, token, port, nil
|
||||
}
|
||||
}
|
||||
|
||||
cleanup := func() {}
|
||||
|
||||
// If the gateway still isn't running, start it as a background child process.
|
||||
if !portOpen(addr) {
|
||||
gw := exec.Command(bin, "gateway", "run", "--force")
|
||||
gw.Env = openclawEnv()
|
||||
if err := gw.Start(); err != nil {
|
||||
return nil, "", 0, fmt.Errorf("failed to start gateway: %w", err)
|
||||
}
|
||||
cleanup = func() {
|
||||
if gw.Process != nil {
|
||||
_ = gw.Process.Kill()
|
||||
_ = gw.Wait()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Fprintf(os.Stderr, "%sStarting gateway...%s\n", ansiGray, ansiReset)
|
||||
if !waitForPort(addr, 30*time.Second) {
|
||||
cleanup()
|
||||
return nil, "", 0, fmt.Errorf("gateway did not start on %s", addr)
|
||||
}
|
||||
|
||||
return cleanup, token, port, nil
|
||||
}
|
||||
|
||||
// runChannelSetupPreflight prompts users to connect a messaging channel before
|
||||
// starting the built-in gateway+TUI flow. In interactive sessions, it loops
|
||||
// until a channel is configured, unless the user chooses "Set up later".
|
||||
@@ -339,9 +379,30 @@ func openclawEnv() []string {
|
||||
env = append(env, e)
|
||||
}
|
||||
}
|
||||
if _, ok := os.LookupEnv("OPENCLAW_PLUGIN_STAGE_DIR"); !ok {
|
||||
if dir := openclawPluginStageDir(); dir != "" {
|
||||
env = append(env, "OPENCLAW_PLUGIN_STAGE_DIR="+dir)
|
||||
}
|
||||
}
|
||||
return env
|
||||
}
|
||||
|
||||
func openclawInstallEnv() []string {
|
||||
env := openclawEnv()
|
||||
if _, ok := os.LookupEnv("OPENCLAW_EAGER_BUNDLED_PLUGIN_DEPS"); !ok {
|
||||
env = append(env, "OPENCLAW_EAGER_BUNDLED_PLUGIN_DEPS=1")
|
||||
}
|
||||
return env
|
||||
}
|
||||
|
||||
func openclawPluginStageDir() string {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return filepath.Join(home, ".openclaw", "plugin-runtime-deps")
|
||||
}
|
||||
|
||||
// portOpen checks if a TCP port is currently accepting connections.
|
||||
func portOpen(addr string) bool {
|
||||
conn, err := net.DialTimeout("tcp", addr, 500*time.Millisecond)
|
||||
@@ -565,6 +626,7 @@ func ensureOpenclawInstalled() (string, error) {
|
||||
|
||||
fmt.Fprintf(os.Stderr, "\nInstalling OpenClaw...\n")
|
||||
cmd := exec.Command("npm", "install", "-g", "openclaw@latest")
|
||||
cmd.Env = openclawInstallEnv()
|
||||
cmd.Stdin = os.Stdin
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
@@ -738,89 +800,13 @@ func clearSessionModelOverride(primary string) {
|
||||
_ = os.WriteFile(path, out, 0o600)
|
||||
}
|
||||
|
||||
const (
|
||||
webSearchNpmPackage = "@ollama/openclaw-web-search"
|
||||
webSearchMinVersion = "0.2.1"
|
||||
)
|
||||
|
||||
// ensureWebSearchPlugin installs the openclaw-web-search extension into the
|
||||
// user-level extensions directory (~/.openclaw/extensions/) if it isn't already
|
||||
// present, or re-installs if the installed version is older than webSearchMinVersion.
|
||||
// Returns true if the extension is available.
|
||||
func ensureWebSearchPlugin() bool {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
pluginDir := filepath.Join(home, ".openclaw", "extensions", "openclaw-web-search")
|
||||
if webSearchPluginUpToDate(pluginDir) {
|
||||
return true
|
||||
}
|
||||
|
||||
npmBin, err := exec.LookPath("npm")
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(pluginDir, 0o755); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Download the tarball via `npm pack`, extract it flat into the plugin dir.
|
||||
pack := exec.Command(npmBin, "pack", webSearchNpmPackage, "--pack-destination", pluginDir)
|
||||
out, err := pack.Output()
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s Warning: could not download web search plugin: %v%s\n", ansiYellow, err, ansiReset)
|
||||
return false
|
||||
}
|
||||
|
||||
tgzName := strings.TrimSpace(string(out))
|
||||
tgzPath := filepath.Join(pluginDir, tgzName)
|
||||
defer os.Remove(tgzPath)
|
||||
|
||||
tar := exec.Command("tar", "xzf", tgzPath, "--strip-components=1", "-C", pluginDir)
|
||||
if err := tar.Run(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s Warning: could not extract web search plugin: %v%s\n", ansiYellow, err, ansiReset)
|
||||
return false
|
||||
}
|
||||
|
||||
fmt.Fprintf(os.Stderr, "%s ✓ Installed Ollama web search %s\n", ansiGreen, ansiReset)
|
||||
return true
|
||||
}
|
||||
|
||||
// webSearchPluginUpToDate returns true if the plugin is installed and its
|
||||
// package.json version is >= webSearchMinVersion.
|
||||
func webSearchPluginUpToDate(pluginDir string) bool {
|
||||
data, err := os.ReadFile(filepath.Join(pluginDir, "package.json"))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
var pkg struct {
|
||||
Version string `json:"version"`
|
||||
}
|
||||
if json.Unmarshal(data, &pkg) != nil || pkg.Version == "" {
|
||||
return false
|
||||
}
|
||||
return !versionLessThan(pkg.Version, webSearchMinVersion)
|
||||
}
|
||||
|
||||
// versionLessThan compares two semver version strings (major.minor.patch).
|
||||
// Inputs may omit the "v" prefix; it is added automatically for semver.Compare.
|
||||
func versionLessThan(a, b string) bool {
|
||||
if !strings.HasPrefix(a, "v") {
|
||||
a = "v" + a
|
||||
}
|
||||
if !strings.HasPrefix(b, "v") {
|
||||
b = "v" + b
|
||||
}
|
||||
return semver.Compare(a, b) < 0
|
||||
}
|
||||
|
||||
// registerWebSearchPlugin adds plugins.entries.openclaw-web-search to the OpenClaw
|
||||
// config so the gateway activates it on next start. Best-effort; silently returns
|
||||
// on any error.
|
||||
func registerWebSearchPlugin() {
|
||||
// configureOllamaWebSearch keeps launch-managed OpenClaw installs on the
|
||||
// bundled Ollama web_search provider. Older launch builds installed an
|
||||
// external openclaw-web-search plugin that added custom ollama_web_search and
|
||||
// ollama_web_fetch tools. Current OpenClaw versions ship Ollama web_search as
|
||||
// the bundled "ollama" plugin instead, so we migrate stale config and ensure
|
||||
// fresh installs select the bundled provider.
|
||||
func configureOllamaWebSearch() {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return
|
||||
@@ -835,6 +821,8 @@ func registerWebSearchPlugin() {
|
||||
return
|
||||
}
|
||||
|
||||
stalePluginConfigured := false
|
||||
|
||||
plugins, _ := config["plugins"].(map[string]any)
|
||||
if plugins == nil {
|
||||
plugins = make(map[string]any)
|
||||
@@ -843,68 +831,100 @@ func registerWebSearchPlugin() {
|
||||
if entries == nil {
|
||||
entries = make(map[string]any)
|
||||
}
|
||||
entries["openclaw-web-search"] = map[string]any{"enabled": true}
|
||||
plugins["entries"] = entries
|
||||
|
||||
// Pin trust so the gateway doesn't warn about untracked plugins.
|
||||
allow, _ := plugins["allow"].([]any)
|
||||
hasAllow := false
|
||||
for _, v := range allow {
|
||||
if s, ok := v.(string); ok && s == "openclaw-web-search" {
|
||||
hasAllow = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasAllow {
|
||||
allow = append(allow, "openclaw-web-search")
|
||||
}
|
||||
plugins["allow"] = allow
|
||||
|
||||
// Record install provenance so the loader can verify the plugin origin.
|
||||
installs, _ := plugins["installs"].(map[string]any)
|
||||
if installs == nil {
|
||||
installs = make(map[string]any)
|
||||
}
|
||||
pluginDir := filepath.Join(home, ".openclaw", "extensions", "openclaw-web-search")
|
||||
installs["openclaw-web-search"] = map[string]any{
|
||||
"source": "npm",
|
||||
"spec": webSearchNpmPackage,
|
||||
"installPath": pluginDir,
|
||||
}
|
||||
plugins["installs"] = installs
|
||||
|
||||
config["plugins"] = plugins
|
||||
|
||||
// Add plugin tools to tools.alsoAllow so they survive the coding profile's
|
||||
// policy pipeline (which has an explicit allow list of core tools only).
|
||||
tools, _ := config["tools"].(map[string]any)
|
||||
if tools == nil {
|
||||
tools = make(map[string]any)
|
||||
}
|
||||
|
||||
alsoAllow, _ := tools["alsoAllow"].([]any)
|
||||
needed := []string{"ollama_web_search", "ollama_web_fetch"}
|
||||
have := make(map[string]bool, len(alsoAllow))
|
||||
for _, v := range alsoAllow {
|
||||
if s, ok := v.(string); ok {
|
||||
have[s] = true
|
||||
}
|
||||
}
|
||||
for _, name := range needed {
|
||||
if !have[name] {
|
||||
alsoAllow = append(alsoAllow, name)
|
||||
}
|
||||
}
|
||||
tools["alsoAllow"] = alsoAllow
|
||||
|
||||
// Disable built-in web search/fetch since our plugin replaces them.
|
||||
web, _ := tools["web"].(map[string]any)
|
||||
if web == nil {
|
||||
web = make(map[string]any)
|
||||
}
|
||||
web["search"] = map[string]any{"enabled": false}
|
||||
web["fetch"] = map[string]any{"enabled": false}
|
||||
search, _ := web["search"].(map[string]any)
|
||||
if search == nil {
|
||||
search = make(map[string]any)
|
||||
}
|
||||
fetch, _ := web["fetch"].(map[string]any)
|
||||
if fetch == nil {
|
||||
fetch = make(map[string]any)
|
||||
}
|
||||
|
||||
alsoAllow, _ := tools["alsoAllow"].([]any)
|
||||
var filteredAlsoAllow []any
|
||||
for _, v := range alsoAllow {
|
||||
s, ok := v.(string)
|
||||
if !ok {
|
||||
filteredAlsoAllow = append(filteredAlsoAllow, v)
|
||||
continue
|
||||
}
|
||||
if s == "ollama_web_search" || s == "ollama_web_fetch" {
|
||||
stalePluginConfigured = true
|
||||
continue
|
||||
}
|
||||
filteredAlsoAllow = append(filteredAlsoAllow, v)
|
||||
}
|
||||
if len(filteredAlsoAllow) > 0 {
|
||||
tools["alsoAllow"] = filteredAlsoAllow
|
||||
} else {
|
||||
delete(tools, "alsoAllow")
|
||||
}
|
||||
|
||||
if _, ok := entries["openclaw-web-search"]; ok {
|
||||
delete(entries, "openclaw-web-search")
|
||||
stalePluginConfigured = true
|
||||
}
|
||||
ollamaEntry, _ := entries["ollama"].(map[string]any)
|
||||
if ollamaEntry == nil {
|
||||
ollamaEntry = make(map[string]any)
|
||||
}
|
||||
ollamaEntry["enabled"] = true
|
||||
entries["ollama"] = ollamaEntry
|
||||
plugins["entries"] = entries
|
||||
|
||||
if allow, ok := plugins["allow"].([]any); ok {
|
||||
var nextAllow []any
|
||||
hasOllama := false
|
||||
for _, v := range allow {
|
||||
s, ok := v.(string)
|
||||
if ok && s == "openclaw-web-search" {
|
||||
stalePluginConfigured = true
|
||||
continue
|
||||
}
|
||||
if ok && s == "ollama" {
|
||||
hasOllama = true
|
||||
}
|
||||
nextAllow = append(nextAllow, v)
|
||||
}
|
||||
if !hasOllama {
|
||||
nextAllow = append(nextAllow, "ollama")
|
||||
}
|
||||
plugins["allow"] = nextAllow
|
||||
}
|
||||
|
||||
if installs, ok := plugins["installs"].(map[string]any); ok {
|
||||
if _, exists := installs["openclaw-web-search"]; exists {
|
||||
delete(installs, "openclaw-web-search")
|
||||
stalePluginConfigured = true
|
||||
}
|
||||
if len(installs) > 0 {
|
||||
plugins["installs"] = installs
|
||||
} else {
|
||||
delete(plugins, "installs")
|
||||
}
|
||||
}
|
||||
|
||||
if stalePluginConfigured || search["provider"] == nil {
|
||||
search["provider"] = "ollama"
|
||||
}
|
||||
if stalePluginConfigured {
|
||||
fetch["enabled"] = true
|
||||
}
|
||||
search["enabled"] = true
|
||||
web["search"] = search
|
||||
if len(fetch) > 0 {
|
||||
web["fetch"] = fetch
|
||||
}
|
||||
tools["web"] = web
|
||||
config["plugins"] = plugins
|
||||
config["tools"] = tools
|
||||
|
||||
out, err := json.MarshalIndent(config, "", " ")
|
||||
|
||||
@@ -251,6 +251,359 @@ func TestOpenclawRun_SetupLaterContinuesToGatewayAndTUI(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpenclawRun_FirstLaunchOnboardUsesLaunchManagedHealthFlow(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("uses a POSIX shell test binary")
|
||||
}
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("PATH", tmpDir)
|
||||
|
||||
bin := filepath.Join(tmpDir, "openclaw")
|
||||
script := fmt.Sprintf(`#!/bin/sh
|
||||
printf '%%s\n' "$*" >> "$HOME/invocations.log"
|
||||
if [ "$1" = "onboard" ]; then
|
||||
/usr/bin/env | /usr/bin/sort > "$HOME/onboard-env.log"
|
||||
/bin/mkdir -p "$HOME/.openclaw"
|
||||
/bin/cat > "$HOME/.openclaw/openclaw.json" <<'EOF'
|
||||
{"wizard":{"lastRunAt":"2026-01-01T00:00:00Z"},"gateway":{"port":18789,"mode":"local"}}
|
||||
EOF
|
||||
fi
|
||||
exit 0
|
||||
`)
|
||||
if err := os.WriteFile(bin, []byte(script), 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
oldConfirmPrompt := DefaultConfirmPrompt
|
||||
DefaultConfirmPrompt = func(prompt string, options ConfirmOptions) (bool, error) {
|
||||
if prompt != "I understand the risks. Continue?" {
|
||||
t.Fatalf("unexpected prompt: %q", prompt)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
defer func() { DefaultConfirmPrompt = oldConfirmPrompt }()
|
||||
|
||||
c := &Openclaw{}
|
||||
if err := c.Run("llama3.2", []string{"status"}); err != nil {
|
||||
t.Fatalf("Run() error = %v", err)
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(filepath.Join(tmpDir, "invocations.log"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
lines := strings.Split(strings.TrimSpace(string(data)), "\n")
|
||||
if len(lines) < 2 {
|
||||
t.Fatalf("expected onboard + passthrough invocations, got %v", lines)
|
||||
}
|
||||
onboardInvocation := ""
|
||||
for _, line := range lines {
|
||||
if strings.HasPrefix(line, "onboard ") {
|
||||
onboardInvocation = line
|
||||
break
|
||||
}
|
||||
}
|
||||
if onboardInvocation == "" {
|
||||
t.Fatalf("expected onboard invocation, got %v", lines)
|
||||
}
|
||||
if !strings.Contains(onboardInvocation, "--skip-health") {
|
||||
t.Fatalf("expected onboard invocation to include --skip-health, got %q", onboardInvocation)
|
||||
}
|
||||
|
||||
envData, err := os.ReadFile(filepath.Join(tmpDir, "onboard-env.log"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
env := envSliceToMap(strings.Split(strings.TrimSpace(string(envData)), "\n"))
|
||||
if env["OPENCLAW_EAGER_BUNDLED_PLUGIN_DEPS"] != "1" {
|
||||
t.Fatalf("OPENCLAW_EAGER_BUNDLED_PLUGIN_DEPS = %q, want %q", env["OPENCLAW_EAGER_BUNDLED_PLUGIN_DEPS"], "1")
|
||||
}
|
||||
if env["OPENCLAW_PLUGIN_STAGE_DIR"] != filepath.Join(tmpDir, ".openclaw", "plugin-runtime-deps") {
|
||||
t.Fatalf("OPENCLAW_PLUGIN_STAGE_DIR = %q, want %q", env["OPENCLAW_PLUGIN_STAGE_DIR"], filepath.Join(tmpDir, ".openclaw", "plugin-runtime-deps"))
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpenclawRun_FirstLaunchTUIArgsEnsureGatewayBeforePassthrough(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("uses a POSIX shell test binary")
|
||||
}
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("PATH", tmpDir)
|
||||
|
||||
ln, err := net.Listen("tcp", "127.0.0.1:0")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer ln.Close()
|
||||
port := ln.Addr().(*net.TCPAddr).Port
|
||||
|
||||
bin := filepath.Join(tmpDir, "openclaw")
|
||||
script := fmt.Sprintf(`#!/bin/sh
|
||||
printf '%%s\n' "$*" >> "$HOME/invocations.log"
|
||||
if [ "$1" = "onboard" ]; then
|
||||
/bin/mkdir -p "$HOME/.openclaw"
|
||||
/bin/cat > "$HOME/.openclaw/openclaw.json" <<'EOF'
|
||||
{"wizard":{"lastRunAt":"2026-01-01T00:00:00Z"},"gateway":{"port":%d,"mode":"local"}}
|
||||
EOF
|
||||
fi
|
||||
exit 0
|
||||
`, port)
|
||||
if err := os.WriteFile(bin, []byte(script), 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
oldConfirmPrompt := DefaultConfirmPrompt
|
||||
DefaultConfirmPrompt = func(prompt string, options ConfirmOptions) (bool, error) {
|
||||
if prompt != "I understand the risks. Continue?" {
|
||||
t.Fatalf("unexpected prompt: %q", prompt)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
defer func() { DefaultConfirmPrompt = oldConfirmPrompt }()
|
||||
|
||||
c := &Openclaw{}
|
||||
if err := c.Run("llama3.2", []string{"tui"}); err != nil {
|
||||
t.Fatalf("Run() error = %v", err)
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(filepath.Join(tmpDir, "invocations.log"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
lines := strings.Split(strings.TrimSpace(string(data)), "\n")
|
||||
if len(lines) < 3 {
|
||||
t.Fatalf("expected at least 3 invocations (update, onboard, daemon restart, tui), got %v", lines)
|
||||
}
|
||||
onboardIdx, daemonRestartIdx, tuiIdx := -1, -1, -1
|
||||
for i, line := range lines {
|
||||
if onboardIdx == -1 && strings.HasPrefix(line, "onboard ") {
|
||||
onboardIdx = i
|
||||
}
|
||||
if daemonRestartIdx == -1 && line == "daemon restart" {
|
||||
daemonRestartIdx = i
|
||||
}
|
||||
if tuiIdx == -1 && line == "tui" {
|
||||
tuiIdx = i
|
||||
}
|
||||
}
|
||||
if onboardIdx == -1 {
|
||||
t.Fatalf("expected an onboarding invocation, got %v", lines)
|
||||
}
|
||||
if daemonRestartIdx == -1 {
|
||||
t.Fatalf("expected a daemon restart before tui, got %v", lines)
|
||||
}
|
||||
if tuiIdx == -1 {
|
||||
t.Fatalf("expected a tui invocation, got %v", lines)
|
||||
}
|
||||
if !(onboardIdx < daemonRestartIdx && daemonRestartIdx < tuiIdx) {
|
||||
t.Fatalf("expected onboarding, then daemon restart, then tui; got %v", lines)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpenclawEnsureGatewayReady_UsesDaemonStartFallback(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("uses a POSIX shell test binary")
|
||||
}
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("PATH", tmpDir)
|
||||
|
||||
portProbe, err := net.Listen("tcp", "127.0.0.1:0")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
port := portProbe.Addr().(*net.TCPAddr).Port
|
||||
_ = portProbe.Close()
|
||||
|
||||
configDir := filepath.Join(tmpDir, ".openclaw")
|
||||
if err := os.MkdirAll(configDir, 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(configDir, "openclaw.json"), []byte(fmt.Sprintf(`{
|
||||
"wizard": {"lastRunAt": "2026-01-01T00:00:00Z"},
|
||||
"gateway": {"port": %d, "mode": "local"}
|
||||
}`, port)), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
bin := filepath.Join(tmpDir, "openclaw")
|
||||
if err := os.WriteFile(bin, []byte("#!/bin/sh\nprintf '%s\\n' \"$*\" >> \"$HOME/invocations.log\"\n"), 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
oldCanInstallDaemon := openclawCanInstallDaemon
|
||||
openclawCanInstallDaemon = func() bool { return true }
|
||||
defer func() { openclawCanInstallDaemon = oldCanInstallDaemon }()
|
||||
|
||||
triggeredBy := make(chan string, 1)
|
||||
listenerReady := make(chan net.Listener, 1)
|
||||
go func() {
|
||||
invocationsPath := filepath.Join(tmpDir, "invocations.log")
|
||||
deadline := time.Now().Add(5 * time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
data, err := os.ReadFile(invocationsPath)
|
||||
if err == nil {
|
||||
lines := strings.Split(strings.TrimSpace(string(data)), "\n")
|
||||
for _, line := range lines {
|
||||
if line != "daemon start" && line != "gateway run --force" {
|
||||
continue
|
||||
}
|
||||
ln, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", port))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
go func() {
|
||||
for {
|
||||
conn, err := ln.Accept()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
_ = conn.Close()
|
||||
}
|
||||
}()
|
||||
triggeredBy <- line
|
||||
listenerReady <- ln
|
||||
return
|
||||
}
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}()
|
||||
|
||||
c := &Openclaw{}
|
||||
cleanup, _, gotPort, err := c.ensureGatewayReady(bin)
|
||||
if err != nil {
|
||||
t.Fatalf("ensureGatewayReady() error = %v", err)
|
||||
}
|
||||
defer cleanup()
|
||||
if gotPort != port {
|
||||
t.Fatalf("ensureGatewayReady() port = %d, want %d", gotPort, port)
|
||||
}
|
||||
|
||||
var ln net.Listener
|
||||
select {
|
||||
case which := <-triggeredBy:
|
||||
if which != "daemon start" {
|
||||
t.Fatalf("expected daemon start fallback, got %q", which)
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("timed out waiting for gateway startup trigger")
|
||||
}
|
||||
select {
|
||||
case ln = <-listenerReady:
|
||||
defer ln.Close()
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("timed out waiting for test listener")
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(filepath.Join(tmpDir, "invocations.log"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
lines := strings.Split(strings.TrimSpace(string(data)), "\n")
|
||||
if len(lines) == 0 || lines[0] != "daemon start" {
|
||||
t.Fatalf("expected daemon start invocation, got %v", lines)
|
||||
}
|
||||
for _, line := range lines {
|
||||
if line == "gateway run --force" {
|
||||
t.Fatalf("did not expect gateway run fallback when daemon start succeeds, got %v", lines)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpenclawEnv_StagesBundledPluginRuntimeDeps(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("OPENAI_API_KEY", "should-be-cleared")
|
||||
|
||||
env := envSliceToMap(openclawEnv())
|
||||
|
||||
if env["OPENCLAW_PLUGIN_STAGE_DIR"] != filepath.Join(tmpDir, ".openclaw", "plugin-runtime-deps") {
|
||||
t.Fatalf("OPENCLAW_PLUGIN_STAGE_DIR = %q, want %q", env["OPENCLAW_PLUGIN_STAGE_DIR"], filepath.Join(tmpDir, ".openclaw", "plugin-runtime-deps"))
|
||||
}
|
||||
if _, ok := env["OPENAI_API_KEY"]; ok {
|
||||
t.Fatal("expected OPENAI_API_KEY to be cleared from openclaw environment")
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpenclawInstallEnv_PreservesExplicitStageDirAndAddsEagerDeps(t *testing.T) {
|
||||
t.Setenv("OPENCLAW_PLUGIN_STAGE_DIR", "/tmp/custom-stage")
|
||||
|
||||
env := envSliceToMap(openclawInstallEnv())
|
||||
|
||||
if env["OPENCLAW_PLUGIN_STAGE_DIR"] != "/tmp/custom-stage" {
|
||||
t.Fatalf("OPENCLAW_PLUGIN_STAGE_DIR = %q, want %q", env["OPENCLAW_PLUGIN_STAGE_DIR"], "/tmp/custom-stage")
|
||||
}
|
||||
if env["OPENCLAW_EAGER_BUNDLED_PLUGIN_DEPS"] != "1" {
|
||||
t.Fatalf("OPENCLAW_EAGER_BUNDLED_PLUGIN_DEPS = %q, want %q", env["OPENCLAW_EAGER_BUNDLED_PLUGIN_DEPS"], "1")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureOpenclawInstalled_UsesBundledPluginInstallEnv(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("uses a POSIX shell test binary")
|
||||
}
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
setTestHome(t, tmpDir)
|
||||
t.Setenv("PATH", tmpDir)
|
||||
|
||||
writeScript := func(path, content string) {
|
||||
t.Helper()
|
||||
if err := os.WriteFile(path, []byte(content), 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
openclawPath := filepath.Join(tmpDir, "openclaw")
|
||||
npmScript := fmt.Sprintf(`#!/bin/sh
|
||||
/usr/bin/env | /usr/bin/sort > "$HOME/npm-env.log"
|
||||
/bin/cat > %q <<'EOF'
|
||||
#!/bin/sh
|
||||
exit 0
|
||||
EOF
|
||||
/bin/chmod +x %q
|
||||
exit 0
|
||||
`, openclawPath, openclawPath)
|
||||
writeScript(filepath.Join(tmpDir, "npm"), npmScript)
|
||||
writeScript(filepath.Join(tmpDir, "git"), "#!/bin/sh\nexit 0\n")
|
||||
|
||||
oldConfirmPrompt := DefaultConfirmPrompt
|
||||
DefaultConfirmPrompt = func(prompt string, options ConfirmOptions) (bool, error) {
|
||||
if prompt != "OpenClaw is not installed. Install with npm?" {
|
||||
t.Fatalf("unexpected prompt: %q", prompt)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
defer func() { DefaultConfirmPrompt = oldConfirmPrompt }()
|
||||
|
||||
openclawFreshInstall = false
|
||||
bin, err := ensureOpenclawInstalled()
|
||||
if err != nil {
|
||||
t.Fatalf("ensureOpenclawInstalled() error = %v", err)
|
||||
}
|
||||
if bin != "openclaw" {
|
||||
t.Fatalf("ensureOpenclawInstalled() bin = %q, want %q", bin, "openclaw")
|
||||
}
|
||||
|
||||
envData, err := os.ReadFile(filepath.Join(tmpDir, "npm-env.log"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
env := envSliceToMap(strings.Split(strings.TrimSpace(string(envData)), "\n"))
|
||||
if env["OPENCLAW_EAGER_BUNDLED_PLUGIN_DEPS"] != "1" {
|
||||
t.Fatalf("OPENCLAW_EAGER_BUNDLED_PLUGIN_DEPS = %q, want %q", env["OPENCLAW_EAGER_BUNDLED_PLUGIN_DEPS"], "1")
|
||||
}
|
||||
if env["OPENCLAW_PLUGIN_STAGE_DIR"] != filepath.Join(tmpDir, ".openclaw", "plugin-runtime-deps") {
|
||||
t.Fatalf("OPENCLAW_PLUGIN_STAGE_DIR = %q, want %q", env["OPENCLAW_PLUGIN_STAGE_DIR"], filepath.Join(tmpDir, ".openclaw", "plugin-runtime-deps"))
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpenclawEdit(t *testing.T) {
|
||||
c := &Openclaw{}
|
||||
tmpDir := t.TempDir()
|
||||
@@ -1227,6 +1580,18 @@ func TestOpenclawChannelsConfigured(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func envSliceToMap(entries []string) map[string]string {
|
||||
env := make(map[string]string, len(entries))
|
||||
for _, entry := range entries {
|
||||
key, value, ok := strings.Cut(entry, "=")
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
env[key] = value
|
||||
}
|
||||
return env
|
||||
}
|
||||
|
||||
func TestOpenclawChannelSetupPreflight(t *testing.T) {
|
||||
if runtime.GOOS == "windows" {
|
||||
t.Skip("uses a POSIX shell test binary")
|
||||
@@ -2242,95 +2607,7 @@ func TestIntegrationOnboarded(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestVersionLessThan(t *testing.T) {
|
||||
tests := []struct {
|
||||
a, b string
|
||||
want bool
|
||||
}{
|
||||
{"0.1.7", "0.2.1", true},
|
||||
{"0.2.0", "0.2.1", true},
|
||||
{"0.2.1", "0.2.1", false},
|
||||
{"0.2.2", "0.2.1", false},
|
||||
{"1.0.0", "0.2.1", false},
|
||||
{"0.2.1", "1.0.0", true},
|
||||
{"v0.1.7", "0.2.1", true},
|
||||
{"0.2.1", "v0.2.1", false},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.a+"_vs_"+tt.b, func(t *testing.T) {
|
||||
if got := versionLessThan(tt.a, tt.b); got != tt.want {
|
||||
t.Errorf("versionLessThan(%q, %q) = %v, want %v", tt.a, tt.b, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestWebSearchPluginUpToDate(t *testing.T) {
|
||||
t.Run("missing directory", func(t *testing.T) {
|
||||
if webSearchPluginUpToDate(filepath.Join(t.TempDir(), "nonexistent")) {
|
||||
t.Error("expected false for missing directory")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("missing package.json", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if webSearchPluginUpToDate(dir) {
|
||||
t.Error("expected false for missing package.json")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("old version", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(dir, "package.json"), []byte(`{"version":"0.1.7"}`), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if webSearchPluginUpToDate(dir) {
|
||||
t.Error("expected false for old version 0.1.7")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("exact minimum version", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(dir, "package.json"), []byte(`{"version":"0.2.1"}`), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !webSearchPluginUpToDate(dir) {
|
||||
t.Error("expected true for exact minimum version 0.2.1")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("newer version", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(dir, "package.json"), []byte(`{"version":"1.0.0"}`), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !webSearchPluginUpToDate(dir) {
|
||||
t.Error("expected true for newer version 1.0.0")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("invalid json", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(dir, "package.json"), []byte(`not json`), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if webSearchPluginUpToDate(dir) {
|
||||
t.Error("expected false for invalid json")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("empty version", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(dir, "package.json"), []byte(`{"version":""}`), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if webSearchPluginUpToDate(dir) {
|
||||
t.Error("expected false for empty version")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestRegisterWebSearchPlugin(t *testing.T) {
|
||||
func TestConfigureOllamaWebSearch(t *testing.T) {
|
||||
home := t.TempDir()
|
||||
setTestHome(t, home)
|
||||
|
||||
@@ -2345,7 +2622,7 @@ func TestRegisterWebSearchPlugin(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
registerWebSearchPlugin()
|
||||
configureOllamaWebSearch()
|
||||
|
||||
data, err := os.ReadFile(configPath)
|
||||
if err != nil {
|
||||
@@ -2361,40 +2638,30 @@ func TestRegisterWebSearchPlugin(t *testing.T) {
|
||||
t.Fatal("plugins section missing")
|
||||
}
|
||||
|
||||
// Check entries
|
||||
entries, _ := plugins["entries"].(map[string]any)
|
||||
entry, _ := entries["openclaw-web-search"].(map[string]any)
|
||||
entry, _ := entries["ollama"].(map[string]any)
|
||||
if enabled, _ := entry["enabled"].(bool); !enabled {
|
||||
t.Error("expected entries.openclaw-web-search.enabled = true")
|
||||
t.Error("expected entries.ollama.enabled = true")
|
||||
}
|
||||
if _, ok := entries["openclaw-web-search"]; ok {
|
||||
t.Error("expected stale openclaw-web-search entry to be absent")
|
||||
}
|
||||
|
||||
// Check allow list
|
||||
allow, _ := plugins["allow"].([]any)
|
||||
found := false
|
||||
for _, v := range allow {
|
||||
if s, ok := v.(string); ok && s == "openclaw-web-search" {
|
||||
found = true
|
||||
}
|
||||
if _, ok := plugins["allow"]; ok {
|
||||
t.Error("did not expect plugins.allow to be created when no allowlist exists")
|
||||
}
|
||||
if !found {
|
||||
t.Error("expected plugins.allow to contain openclaw-web-search")
|
||||
if _, ok := plugins["installs"]; ok {
|
||||
t.Error("did not expect plugins.installs to be created")
|
||||
}
|
||||
|
||||
// Check install provenance
|
||||
installs, _ := plugins["installs"].(map[string]any)
|
||||
record, _ := installs["openclaw-web-search"].(map[string]any)
|
||||
if record == nil {
|
||||
t.Fatal("expected plugins.installs.openclaw-web-search")
|
||||
tools, _ := config["tools"].(map[string]any)
|
||||
web, _ := tools["web"].(map[string]any)
|
||||
search, _ := web["search"].(map[string]any)
|
||||
if got, _ := search["provider"].(string); got != "ollama" {
|
||||
t.Errorf("search provider = %q, want %q", got, "ollama")
|
||||
}
|
||||
if source, _ := record["source"].(string); source != "npm" {
|
||||
t.Errorf("install source = %q, want %q", source, "npm")
|
||||
}
|
||||
if spec, _ := record["spec"].(string); spec != webSearchNpmPackage {
|
||||
t.Errorf("install spec = %q, want %q", spec, webSearchNpmPackage)
|
||||
}
|
||||
expectedPath := filepath.Join(home, ".openclaw", "extensions", "openclaw-web-search")
|
||||
if installPath, _ := record["installPath"].(string); installPath != expectedPath {
|
||||
t.Errorf("installPath = %q, want %q", installPath, expectedPath)
|
||||
if enabled, _ := search["enabled"].(bool); !enabled {
|
||||
t.Error("expected tools.web.search.enabled = true")
|
||||
}
|
||||
})
|
||||
|
||||
@@ -2403,8 +2670,8 @@ func TestRegisterWebSearchPlugin(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
registerWebSearchPlugin()
|
||||
registerWebSearchPlugin()
|
||||
configureOllamaWebSearch()
|
||||
configureOllamaWebSearch()
|
||||
|
||||
data, err := os.ReadFile(configPath)
|
||||
if err != nil {
|
||||
@@ -2416,30 +2683,39 @@ func TestRegisterWebSearchPlugin(t *testing.T) {
|
||||
}
|
||||
|
||||
plugins, _ := config["plugins"].(map[string]any)
|
||||
allow, _ := plugins["allow"].([]any)
|
||||
count := 0
|
||||
for _, v := range allow {
|
||||
if s, ok := v.(string); ok && s == "openclaw-web-search" {
|
||||
count++
|
||||
}
|
||||
entries, _ := plugins["entries"].(map[string]any)
|
||||
if len(entries) != 1 {
|
||||
t.Fatalf("expected only bundled ollama entry, got %v", entries)
|
||||
}
|
||||
if count != 1 {
|
||||
t.Errorf("expected exactly 1 openclaw-web-search in allow, got %d", count)
|
||||
if _, ok := entries["ollama"]; !ok {
|
||||
t.Fatalf("expected entries.ollama to exist, got %v", entries)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("preserves existing config", func(t *testing.T) {
|
||||
t.Run("migrates stale plugin config and preserves unrelated settings", func(t *testing.T) {
|
||||
initial := map[string]any{
|
||||
"plugins": map[string]any{
|
||||
"allow": []any{"some-other-plugin"},
|
||||
"allow": []any{"some-other-plugin", "openclaw-web-search"},
|
||||
"entries": map[string]any{
|
||||
"some-other-plugin": map[string]any{"enabled": true},
|
||||
"some-other-plugin": map[string]any{"enabled": true},
|
||||
"openclaw-web-search": map[string]any{"enabled": true},
|
||||
},
|
||||
"installs": map[string]any{
|
||||
"some-other-plugin": map[string]any{
|
||||
"source": "npm",
|
||||
"installPath": "/some/path",
|
||||
},
|
||||
"openclaw-web-search": map[string]any{
|
||||
"source": "npm",
|
||||
"installPath": "/old/path",
|
||||
},
|
||||
},
|
||||
},
|
||||
"tools": map[string]any{
|
||||
"alsoAllow": []any{"ollama_web_search", "ollama_web_fetch", "browser"},
|
||||
"web": map[string]any{
|
||||
"search": map[string]any{"enabled": false},
|
||||
"fetch": map[string]any{"enabled": false},
|
||||
},
|
||||
},
|
||||
"customField": "preserved",
|
||||
@@ -2449,7 +2725,7 @@ func TestRegisterWebSearchPlugin(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
registerWebSearchPlugin()
|
||||
configureOllamaWebSearch()
|
||||
|
||||
out, err := os.ReadFile(configPath)
|
||||
if err != nil {
|
||||
@@ -2469,28 +2745,61 @@ func TestRegisterWebSearchPlugin(t *testing.T) {
|
||||
if entries["some-other-plugin"] == nil {
|
||||
t.Error("existing plugin entry was lost")
|
||||
}
|
||||
if entries["openclaw-web-search"] != nil {
|
||||
t.Error("stale openclaw-web-search entry should be removed")
|
||||
}
|
||||
if ollamaEntry, _ := entries["ollama"].(map[string]any); ollamaEntry == nil {
|
||||
t.Fatal("expected bundled ollama entry to be enabled")
|
||||
}
|
||||
|
||||
installs, _ := plugins["installs"].(map[string]any)
|
||||
if installs["some-other-plugin"] == nil {
|
||||
t.Error("existing install record was lost")
|
||||
}
|
||||
if installs["openclaw-web-search"] != nil {
|
||||
t.Error("stale openclaw-web-search install record should be removed")
|
||||
}
|
||||
|
||||
allow, _ := plugins["allow"].([]any)
|
||||
hasOther, hasWebSearch := false, false
|
||||
hasOther, hasStalePlugin, hasOllama := false, false, false
|
||||
for _, v := range allow {
|
||||
s, _ := v.(string)
|
||||
if s == "some-other-plugin" {
|
||||
hasOther = true
|
||||
}
|
||||
if s == "openclaw-web-search" {
|
||||
hasWebSearch = true
|
||||
hasStalePlugin = true
|
||||
}
|
||||
if s == "ollama" {
|
||||
hasOllama = true
|
||||
}
|
||||
}
|
||||
if !hasOther {
|
||||
t.Error("existing allow entry was lost")
|
||||
}
|
||||
if !hasWebSearch {
|
||||
t.Error("openclaw-web-search not added to allow")
|
||||
if hasStalePlugin {
|
||||
t.Error("stale openclaw-web-search allow entry should be removed")
|
||||
}
|
||||
if !hasOllama {
|
||||
t.Error("expected plugins.allow to contain bundled ollama plugin")
|
||||
}
|
||||
|
||||
tools, _ := config["tools"].(map[string]any)
|
||||
alsoAllow, _ := tools["alsoAllow"].([]any)
|
||||
if len(alsoAllow) != 1 || alsoAllow[0] != "browser" {
|
||||
t.Errorf("expected stale custom web tools to be removed, got %v", alsoAllow)
|
||||
}
|
||||
web, _ := tools["web"].(map[string]any)
|
||||
search, _ := web["search"].(map[string]any)
|
||||
fetch, _ := web["fetch"].(map[string]any)
|
||||
if got, _ := search["provider"].(string); got != "ollama" {
|
||||
t.Errorf("search provider = %q, want %q", got, "ollama")
|
||||
}
|
||||
if enabled, _ := search["enabled"].(bool); !enabled {
|
||||
t.Error("expected migrated tools.web.search.enabled = true")
|
||||
}
|
||||
if enabled, _ := fetch["enabled"].(bool); !enabled {
|
||||
t.Error("expected migrated tools.web.fetch.enabled = true")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
title: Structured Outputs
|
||||
---
|
||||
|
||||
<Note>
|
||||
Ollama's Cloud currently does not support structured outputs.
|
||||
</Note>
|
||||
|
||||
Structured outputs let you enforce a JSON schema on model responses so you can reliably extract structured data, describe images, or keep every reply consistent.
|
||||
|
||||
## Generating structured JSON
|
||||
|
||||
@@ -15,7 +15,7 @@ Ollama handles everything automatically:
|
||||
1. **Install** — If OpenClaw isn't installed, Ollama prompts to install it via npm
|
||||
2. **Security** — On the first launch, a security notice explains the risks of tool access
|
||||
3. **Model** — Pick a model from the selector (local or cloud)
|
||||
4. **Onboarding** — Ollama configures the provider, installs the gateway daemon, sets your model as the primary, and installs the web search and fetch plugin
|
||||
4. **Onboarding** — Ollama configures the provider, installs the gateway daemon, sets your model as the primary, and enables OpenClaw's bundled Ollama web search
|
||||
5. **Gateway** — Starts in the background and opens the OpenClaw TUI
|
||||
|
||||
<Note>OpenClaw requires a larger context window. It is recommended to use a context window of at least 64k tokens if using local models. See [Context length](/context-length) for more information.</Note>
|
||||
@@ -24,19 +24,19 @@ Ollama handles everything automatically:
|
||||
|
||||
## Web search and fetch
|
||||
|
||||
OpenClaw ships with a web search and fetch plugin that gives local or cloud models the ability to search the web and extract readable page content.
|
||||
OpenClaw ships with a bundled Ollama `web_search` provider that lets local or cloud-backed Ollama setups search the web through the configured Ollama host.
|
||||
|
||||
```bash
|
||||
ollama launch openclaw
|
||||
```
|
||||
|
||||
Web search and fetch is enabled automatically when launching OpenClaw through Ollama. To install the plugin directly:
|
||||
Ollama web search is enabled automatically when launching OpenClaw through Ollama. To configure it manually:
|
||||
|
||||
```bash
|
||||
openclaw plugins install @ollama/openclaw-web-search
|
||||
openclaw configure --section web
|
||||
```
|
||||
|
||||
<Note>Web search for local models requires `ollama signin`.</Note>
|
||||
<Note>Ollama web search for local models requires `ollama signin`.</Note>
|
||||
|
||||
## Configure without launching
|
||||
|
||||
@@ -93,4 +93,3 @@ Link WhatsApp, Telegram, Slack, Discord, or iMessage to chat with your local mod
|
||||
```bash
|
||||
openclaw gateway stop
|
||||
```
|
||||
|
||||
|
||||
@@ -406,10 +406,6 @@ func TestAPIShowModel(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAPIGenerateLogprobs(t *testing.T) {
|
||||
if testModel != "" {
|
||||
// Logprobs requires runner support (e.g. llama.cpp has it, MLX does not).
|
||||
t.Skip("logprobs not supported by all runners")
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
@@ -523,10 +519,6 @@ func TestAPIGenerateLogprobs(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestAPIChatLogprobs(t *testing.T) {
|
||||
if testModel != "" {
|
||||
// Logprobs requires runner support (e.g. llama.cpp has it, MLX does not).
|
||||
t.Skip("logprobs not supported by all runners")
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
|
||||
@@ -632,8 +632,8 @@ func FromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
||||
}
|
||||
|
||||
if effort != "" {
|
||||
if !slices.Contains([]string{"high", "medium", "low", "none"}, effort) {
|
||||
return nil, fmt.Errorf("invalid reasoning value: '%s' (must be \"high\", \"medium\", \"low\", or \"none\")", effort)
|
||||
if !slices.Contains([]string{"high", "medium", "low", "max", "none"}, effort) {
|
||||
return nil, fmt.Errorf("invalid reasoning value: '%s' (must be \"high\", \"medium\", \"low\", \"max\", or \"none\")", effort)
|
||||
}
|
||||
|
||||
if effort == "none" {
|
||||
|
||||
@@ -55,6 +55,57 @@ func TestFromChatRequest_Basic(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFromChatRequest_ReasoningEffort(t *testing.T) {
|
||||
effort := func(s string) *string { return &s }
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
effort *string
|
||||
want any // expected ThinkValue.Value; nil means req.Think should be nil
|
||||
wantErr bool
|
||||
}{
|
||||
{name: "unset", effort: nil, want: nil},
|
||||
{name: "high", effort: effort("high"), want: "high"},
|
||||
{name: "medium", effort: effort("medium"), want: "medium"},
|
||||
{name: "low", effort: effort("low"), want: "low"},
|
||||
{name: "max", effort: effort("max"), want: "max"},
|
||||
{name: "none disables", effort: effort("none"), want: false},
|
||||
{name: "invalid", effort: effort("extreme"), wantErr: true},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
req := ChatCompletionRequest{
|
||||
Model: "test-model",
|
||||
Messages: []Message{{Role: "user", Content: "hi"}},
|
||||
ReasoningEffort: tc.effort,
|
||||
}
|
||||
result, err := FromChatRequest(req)
|
||||
if tc.wantErr {
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for effort=%v, got none", *tc.effort)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if tc.want == nil {
|
||||
if result.Think != nil {
|
||||
t.Fatalf("expected nil Think, got %+v", result.Think)
|
||||
}
|
||||
return
|
||||
}
|
||||
if result.Think == nil {
|
||||
t.Fatalf("expected Think=%v, got nil", tc.want)
|
||||
}
|
||||
if result.Think.Value != tc.want {
|
||||
t.Fatalf("got Think.Value=%v, want %v", result.Think.Value, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFromChatRequest_WithImage(t *testing.T) {
|
||||
imgData, _ := base64.StdEncoding.DecodeString(image)
|
||||
|
||||
|
||||
@@ -525,6 +525,18 @@ func FromResponsesRequest(r ResponsesRequest) (*api.ChatRequest, error) {
|
||||
options["num_predict"] = *r.MaxOutputTokens
|
||||
}
|
||||
|
||||
var think *api.ThinkValue
|
||||
if effort := r.Reasoning.Effort; effort != "" {
|
||||
switch effort {
|
||||
case "none":
|
||||
think = &api.ThinkValue{Value: false}
|
||||
case "low", "medium", "high", "max":
|
||||
think = &api.ThinkValue{Value: effort}
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid reasoning value: %q (must be \"high\", \"medium\", \"low\", \"max\", or \"none\")", effort)
|
||||
}
|
||||
}
|
||||
|
||||
// Convert tools from Responses API format to api.Tool format
|
||||
var tools []api.Tool
|
||||
for _, t := range r.Tools {
|
||||
@@ -552,6 +564,7 @@ func FromResponsesRequest(r ResponsesRequest) (*api.ChatRequest, error) {
|
||||
Options: options,
|
||||
Tools: tools,
|
||||
Format: format,
|
||||
Think: think,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -415,6 +415,86 @@ func TestFromResponsesRequest_Tools(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFromResponsesRequest_ReasoningEffort(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
effort string
|
||||
wantThink any
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "unset",
|
||||
},
|
||||
{
|
||||
name: "low",
|
||||
effort: "low",
|
||||
wantThink: "low",
|
||||
},
|
||||
{
|
||||
name: "medium",
|
||||
effort: "medium",
|
||||
wantThink: "medium",
|
||||
},
|
||||
{
|
||||
name: "high",
|
||||
effort: "high",
|
||||
wantThink: "high",
|
||||
},
|
||||
{
|
||||
name: "max",
|
||||
effort: "max",
|
||||
wantThink: "max",
|
||||
},
|
||||
{
|
||||
name: "none",
|
||||
effort: "none",
|
||||
wantThink: false,
|
||||
},
|
||||
{
|
||||
name: "invalid",
|
||||
effort: "extreme",
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
req := ResponsesRequest{
|
||||
Model: "deepseek-v4-flash",
|
||||
Input: ResponsesInput{Text: "hi"},
|
||||
}
|
||||
if tt.effort != "" {
|
||||
req.Reasoning.Effort = tt.effort
|
||||
}
|
||||
|
||||
chatReq, err := FromResponsesRequest(req)
|
||||
if tt.wantErr {
|
||||
if err == nil {
|
||||
t.Fatal("expected error, got nil")
|
||||
}
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if tt.wantThink == nil {
|
||||
if chatReq.Think != nil {
|
||||
t.Fatalf("Think = %#v, want nil", chatReq.Think)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if chatReq.Think == nil {
|
||||
t.Fatalf("Think = nil, want %v", tt.wantThink)
|
||||
}
|
||||
if chatReq.Think.Value != tt.wantThink {
|
||||
t.Errorf("Think.Value = %v, want %v", chatReq.Think.Value, tt.wantThink)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFromResponsesRequest_FunctionCallOutput(t *testing.T) {
|
||||
// Test a complete tool call round-trip:
|
||||
// 1. User message asking about weather
|
||||
|
||||
@@ -375,8 +375,16 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
||||
}
|
||||
|
||||
var builtinParser parsers.Parser
|
||||
if shouldUseHarmony(m) && m.Config.Parser == "" {
|
||||
m.Config.Parser = "harmony"
|
||||
if shouldUseHarmony(m) {
|
||||
// harmony's Reasoning field only understands low/medium/high; map "max" to "high"
|
||||
if req.Think != nil {
|
||||
if s, ok := req.Think.Value.(string); ok && s == "max" {
|
||||
req.Think.Value = "high"
|
||||
}
|
||||
}
|
||||
if m.Config.Parser == "" {
|
||||
m.Config.Parser = "harmony"
|
||||
}
|
||||
}
|
||||
|
||||
if !req.Raw && m.Config.Parser != "" {
|
||||
@@ -2320,8 +2328,16 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
||||
}
|
||||
msgs = filterThinkTags(msgs, m)
|
||||
|
||||
if shouldUseHarmony(m) && m.Config.Parser == "" {
|
||||
m.Config.Parser = "harmony"
|
||||
if shouldUseHarmony(m) {
|
||||
// harmony's Reasoning field only understands low/medium/high; map "max" to "high"
|
||||
if req.Think != nil {
|
||||
if s, ok := req.Think.Value.(string); ok && s == "max" {
|
||||
req.Think.Value = "high"
|
||||
}
|
||||
}
|
||||
if m.Config.Parser == "" {
|
||||
m.Config.Parser = "harmony"
|
||||
}
|
||||
}
|
||||
|
||||
var builtinParser parsers.Parser
|
||||
|
||||
@@ -151,22 +151,11 @@ func (c *Client) WaitUntilRunning(ctx context.Context) error {
|
||||
}
|
||||
}
|
||||
|
||||
// completionRequest is a properly-tagged version of llm.CompletionRequest for JSON serialization.
|
||||
type completionRequest struct {
|
||||
Prompt string `json:"prompt"`
|
||||
Options *completionOpts `json:"options,omitempty"`
|
||||
}
|
||||
|
||||
type completionOpts struct {
|
||||
Temperature float32 `json:"temperature,omitempty"`
|
||||
TopP float32 `json:"top_p,omitempty"`
|
||||
MinP float32 `json:"min_p,omitempty"`
|
||||
TopK int `json:"top_k,omitempty"`
|
||||
RepeatLastN int `json:"repeat_last_n,omitempty"`
|
||||
RepeatPenalty float32 `json:"repeat_penalty,omitempty"`
|
||||
PresencePenalty float32 `json:"presence_penalty,omitempty"`
|
||||
FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
|
||||
NumPredict int `json:"num_predict,omitempty"`
|
||||
type CompletionRequest struct {
|
||||
Prompt string
|
||||
Options api.Options
|
||||
Logprobs bool
|
||||
TopLogprobs int
|
||||
}
|
||||
|
||||
type CompletionResponse struct {
|
||||
@@ -179,6 +168,8 @@ type CompletionResponse struct {
|
||||
EvalCount int
|
||||
EvalDuration time.Duration
|
||||
|
||||
Logprobs []llm.Logprob
|
||||
|
||||
Error *api.StatusError
|
||||
}
|
||||
|
||||
@@ -203,21 +194,13 @@ func (c *Client) Close() error {
|
||||
|
||||
// Completion implements llm.LlamaServer.
|
||||
func (c *Client) Completion(ctx context.Context, req llm.CompletionRequest, fn func(llm.CompletionResponse)) error {
|
||||
creq := completionRequest{
|
||||
Prompt: req.Prompt,
|
||||
creq := CompletionRequest{
|
||||
Prompt: req.Prompt,
|
||||
Logprobs: req.Logprobs,
|
||||
TopLogprobs: req.TopLogprobs,
|
||||
}
|
||||
if req.Options != nil {
|
||||
creq.Options = &completionOpts{
|
||||
Temperature: req.Options.Temperature,
|
||||
TopP: req.Options.TopP,
|
||||
MinP: req.Options.MinP,
|
||||
TopK: req.Options.TopK,
|
||||
RepeatLastN: req.Options.RepeatLastN,
|
||||
RepeatPenalty: req.Options.RepeatPenalty,
|
||||
PresencePenalty: req.Options.PresencePenalty,
|
||||
FrequencyPenalty: req.Options.FrequencyPenalty,
|
||||
NumPredict: req.Options.NumPredict,
|
||||
}
|
||||
creq.Options = *req.Options
|
||||
}
|
||||
|
||||
body, err := json.Marshal(creq)
|
||||
@@ -243,7 +226,7 @@ func (c *Client) Completion(ctx context.Context, req llm.CompletionRequest, fn f
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
respBody, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("%s", strings.TrimSpace(string(respBody)))
|
||||
return api.StatusError{StatusCode: resp.StatusCode, ErrorMessage: strings.TrimSpace(string(respBody))}
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(resp.Body)
|
||||
@@ -266,6 +249,7 @@ func (c *Client) Completion(ctx context.Context, req llm.CompletionRequest, fn f
|
||||
PromptEvalDuration: raw.PromptEvalDuration,
|
||||
EvalCount: raw.EvalCount,
|
||||
EvalDuration: raw.EvalDuration,
|
||||
Logprobs: raw.Logprobs,
|
||||
}
|
||||
|
||||
fn(cresp)
|
||||
|
||||
@@ -10,6 +10,8 @@ import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ollama/ollama/logutil"
|
||||
@@ -18,20 +20,28 @@ import (
|
||||
type Array struct {
|
||||
ctx C.mlx_array
|
||||
name string
|
||||
pinned int
|
||||
pinned atomic.Int32
|
||||
}
|
||||
|
||||
var arrays []*Array
|
||||
var (
|
||||
arrays []*Array
|
||||
arraysMu sync.Mutex
|
||||
)
|
||||
|
||||
// constructor utilities
|
||||
|
||||
func New(name string) *Array {
|
||||
t := &Array{name: name}
|
||||
|
||||
if tracing {
|
||||
traceScratch = append(traceScratch, t)
|
||||
} else {
|
||||
arraysMu.Lock()
|
||||
defer arraysMu.Unlock()
|
||||
|
||||
arrays = append(arrays, t)
|
||||
}
|
||||
|
||||
return t
|
||||
}
|
||||
|
||||
@@ -131,7 +141,7 @@ func (t *Array) Clone() *Array {
|
||||
func Pin(s ...*Array) {
|
||||
for _, t := range s {
|
||||
if t != nil {
|
||||
t.pinned++
|
||||
t.pinned.Add(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -140,8 +150,7 @@ func Pin(s ...*Array) {
|
||||
func Unpin(s ...*Array) {
|
||||
for _, t := range s {
|
||||
if t != nil {
|
||||
t.pinned--
|
||||
if t.pinned < 0 {
|
||||
if t.pinned.Add(-1) < 0 {
|
||||
panic(fmt.Sprintf("mlx.Unpin: negative pin count on array %q", t.name))
|
||||
}
|
||||
}
|
||||
@@ -151,9 +160,11 @@ func Unpin(s ...*Array) {
|
||||
// Sweep releases all unpinned arrays, primarily intermediate tensors. MLX will truly
|
||||
// free them when there are no other references, including dependencies in the graph.
|
||||
func Sweep() {
|
||||
arraysMu.Lock()
|
||||
defer arraysMu.Unlock()
|
||||
n := 0
|
||||
for _, t := range arrays {
|
||||
if t.pinned > 0 && t.Valid() {
|
||||
if t.pinned.Load() > 0 && t.Valid() {
|
||||
arrays[n] = t
|
||||
n++
|
||||
} else if t.Valid() {
|
||||
@@ -180,7 +191,7 @@ func (t *Array) String() string {
|
||||
func (t *Array) LogValue() slog.Value {
|
||||
attrs := []slog.Attr{
|
||||
slog.String("name", t.name),
|
||||
slog.Int("pinned", t.pinned),
|
||||
slog.Int("pinned", int(t.pinned.Load())),
|
||||
}
|
||||
if t.Valid() {
|
||||
attrs = append(attrs,
|
||||
@@ -194,19 +205,19 @@ func (t *Array) LogValue() slog.Value {
|
||||
|
||||
// shape utilities
|
||||
|
||||
func (t Array) Size() int {
|
||||
func (t *Array) Size() int {
|
||||
return int(C.mlx_array_size(t.ctx))
|
||||
}
|
||||
|
||||
func (t Array) NumBytes() int {
|
||||
func (t *Array) NumBytes() int {
|
||||
return int(C.mlx_array_nbytes(t.ctx))
|
||||
}
|
||||
|
||||
func (t Array) NumDims() int {
|
||||
func (t *Array) NumDims() int {
|
||||
return int(C.mlx_array_ndim(t.ctx))
|
||||
}
|
||||
|
||||
func (t Array) Dims() []int {
|
||||
func (t *Array) Dims() []int {
|
||||
dims := make([]int, t.NumDims())
|
||||
for i := range dims {
|
||||
dims[i] = t.Dim(i)
|
||||
@@ -215,29 +226,32 @@ func (t Array) Dims() []int {
|
||||
return dims
|
||||
}
|
||||
|
||||
func (t Array) Dim(dim int) int {
|
||||
func (t *Array) Dim(dim int) int {
|
||||
return int(C.mlx_array_dim(t.ctx, C.int(dim)))
|
||||
}
|
||||
|
||||
func (t Array) DType() DType {
|
||||
func (t *Array) DType() DType {
|
||||
return DType(C.mlx_array_dtype(t.ctx))
|
||||
}
|
||||
|
||||
// data utilities
|
||||
|
||||
func (t Array) Int() int {
|
||||
func (t *Array) Int() int {
|
||||
var item C.int64_t
|
||||
C.mlx_array_item_int64(&item, t.ctx)
|
||||
return int(item)
|
||||
}
|
||||
|
||||
func (t Array) Float() float64 {
|
||||
func (t *Array) Float() float64 {
|
||||
var item C.double
|
||||
C.mlx_array_item_float64(&item, t.ctx)
|
||||
return float64(item)
|
||||
}
|
||||
|
||||
func (t Array) Ints() []int {
|
||||
func (t *Array) Ints() []int {
|
||||
if dt := t.DType(); dt != DTypeInt32 {
|
||||
panic(fmt.Sprintf("mlx: Ints requires DTypeInt32, got %v", dt))
|
||||
}
|
||||
ints := make([]int, t.Size())
|
||||
for i, f := range unsafe.Slice(C.mlx_array_data_int32(t.ctx), len(ints)) {
|
||||
ints[i] = int(f)
|
||||
@@ -245,7 +259,10 @@ func (t Array) Ints() []int {
|
||||
return ints
|
||||
}
|
||||
|
||||
func (t Array) Floats() []float32 {
|
||||
func (t *Array) Floats() []float32 {
|
||||
if dt := t.DType(); dt != DTypeFloat32 {
|
||||
panic(fmt.Sprintf("mlx: Floats requires DTypeFloat32, got %v", dt))
|
||||
}
|
||||
floats := make([]float32, t.Size())
|
||||
for i, f := range unsafe.Slice(C.mlx_array_data_float32(t.ctx), len(floats)) {
|
||||
floats[i] = float32(f)
|
||||
@@ -253,7 +270,7 @@ func (t Array) Floats() []float32 {
|
||||
return floats
|
||||
}
|
||||
|
||||
func (t Array) Save(name string) error {
|
||||
func (t *Array) Save(name string) error {
|
||||
cName := C.CString(name)
|
||||
defer C.free(unsafe.Pointer(cName))
|
||||
C.mlx_save(cName, t.ctx)
|
||||
@@ -262,6 +279,8 @@ func (t Array) Save(name string) error {
|
||||
|
||||
// LogArrays logs all live arrays, sorted by size
|
||||
func LogArrays() {
|
||||
arraysMu.Lock()
|
||||
defer arraysMu.Unlock()
|
||||
sort.Slice(arrays, func(i, j int) bool {
|
||||
return arrays[i].NumBytes() > arrays[j].NumBytes()
|
||||
})
|
||||
@@ -270,7 +289,7 @@ func LogArrays() {
|
||||
for _, t := range arrays {
|
||||
nb := t.NumBytes()
|
||||
total += nb
|
||||
logutil.Trace(fmt.Sprintf("tensor %-60s %5s %5s pinned=%d %v", t.name, t.DType(), PrettyBytes(nb), t.pinned, t.Dims()))
|
||||
logutil.Trace(fmt.Sprintf("tensor %-60s %5s %5s pinned=%d %v", t.name, t.DType(), PrettyBytes(nb), t.pinned.Load(), t.Dims()))
|
||||
}
|
||||
logutil.Trace(fmt.Sprintf("tensors total: %d, size: %s, active: %s", len(arrays), PrettyBytes(total), PrettyBytes(ActiveMemory())))
|
||||
}
|
||||
|
||||
@@ -150,7 +150,7 @@ func closureCallback(res *C.mlx_vector_array, input C.mlx_vector_array, payload
|
||||
traceScratch = nil
|
||||
defer func() {
|
||||
for _, a := range traceScratch {
|
||||
if a.pinned > 0 {
|
||||
if a.pinned.Load() > 0 {
|
||||
panic("mlx: traced array was pinned during compilation")
|
||||
}
|
||||
if a.Valid() {
|
||||
|
||||
@@ -24,8 +24,8 @@ func ScaledDotProductAttention(query, key, value, mask *Array, scale float32) *A
|
||||
}
|
||||
|
||||
type LayerNorm struct {
|
||||
Weight Array `weight:"weight"`
|
||||
Bias Array `weight:"bias"`
|
||||
Weight *Array `weight:"weight"`
|
||||
Bias *Array `weight:"bias"`
|
||||
}
|
||||
|
||||
func (r *LayerNorm) Forward(x *Array, eps float32) *Array {
|
||||
@@ -35,10 +35,10 @@ func (r *LayerNorm) Forward(x *Array, eps float32) *Array {
|
||||
}
|
||||
|
||||
type RMSNorm struct {
|
||||
Weight Array `weight:"weight"`
|
||||
Weight *Array `weight:"weight"`
|
||||
}
|
||||
|
||||
func (r RMSNorm) Forward(x *Array, eps float32) *Array {
|
||||
func (r *RMSNorm) Forward(x *Array, eps float32) *Array {
|
||||
out := New("FAST_RMSNORM")
|
||||
C.mlx_fast_rms_norm(&out.ctx, x.ctx, r.Weight.ctx, C.float(eps), DefaultStream().ctx)
|
||||
return out
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
package mlx
|
||||
|
||||
type Linear struct {
|
||||
Weight Array `weight:"weight"`
|
||||
Bias Array `weight:"bias"`
|
||||
Weight *Array `weight:"weight"`
|
||||
Bias *Array `weight:"bias"`
|
||||
}
|
||||
|
||||
// Forward computes the linear transformation: x @ Weight.T + Bias
|
||||
func (m Linear) Forward(x *Array) *Array {
|
||||
func (m *Linear) Forward(x *Array) *Array {
|
||||
w := m.Weight.Transpose(1, 0)
|
||||
if m.Bias.Valid() {
|
||||
return m.Bias.Addmm(x, w, 1.0, 1.0)
|
||||
@@ -15,14 +15,14 @@ func (m Linear) Forward(x *Array) *Array {
|
||||
return x.Matmul(w)
|
||||
}
|
||||
|
||||
func (m Linear) Gather(x, lhs, rhs *Array, sorted bool) *Array {
|
||||
func (m *Linear) Gather(x, lhs, rhs *Array, sorted bool) *Array {
|
||||
w := m.Weight.Transpose(0, 2, 1)
|
||||
// TODO: bias
|
||||
return x.GatherMM(w, lhs, rhs, sorted)
|
||||
}
|
||||
|
||||
type Embedding struct {
|
||||
Weight Array `weight:"weight"`
|
||||
Weight *Array `weight:"weight"`
|
||||
}
|
||||
|
||||
func (e *Embedding) Forward(indices *Array) *Array {
|
||||
|
||||
@@ -139,6 +139,12 @@ func (t *Array) Less(other *Array) *Array {
|
||||
return out
|
||||
}
|
||||
|
||||
func (t *Array) MaxAxis(axis int, keepDims bool) *Array {
|
||||
out := New("MAX_AXIS")
|
||||
C.mlx_max_axis(&out.ctx, t.ctx, C.int(axis), C.bool(keepDims), DefaultStream().ctx)
|
||||
return out
|
||||
}
|
||||
|
||||
func (t *Array) Matmul(other *Array) *Array {
|
||||
out := New("MATMUL")
|
||||
C.mlx_matmul(&out.ctx, t.ctx, other.ctx, DefaultStream().ctx)
|
||||
|
||||
@@ -6,36 +6,59 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/logutil"
|
||||
"github.com/ollama/ollama/x/mlxrunner/mlx"
|
||||
sampler "github.com/ollama/ollama/x/mlxrunner/sample"
|
||||
"github.com/ollama/ollama/x/tokenizer"
|
||||
)
|
||||
|
||||
func prefillChunkSize() int {
|
||||
return 2 << 10
|
||||
}
|
||||
|
||||
func (r *Runner) TextGenerationPipeline(request Request) error {
|
||||
// Prepare tokenizes the prompt and validates it against the model's
|
||||
// context length. It is safe to call from any goroutine. On success it
|
||||
// populates request.Tokens and adjusts request.Options.NumPredict.
|
||||
func (r *Runner) Prepare(request *Request) error {
|
||||
if r.Model == nil {
|
||||
return errors.New("model not loaded")
|
||||
}
|
||||
|
||||
tokens := r.Tokenizer.Encode(request.Prompt, r.Tokenizer.AddBOS())
|
||||
if len(tokens) == 0 {
|
||||
return errors.New("empty prompt")
|
||||
}
|
||||
|
||||
if len(tokens) >= r.contextLength {
|
||||
return fmt.Errorf("input length (%d tokens) exceeds the model's maximum context length (%d tokens)", len(tokens), r.contextLength)
|
||||
}
|
||||
|
||||
// Cap generation to stay within the model's context length
|
||||
maxGenerate := r.contextLength - len(tokens)
|
||||
if request.Options.NumPredict <= 0 {
|
||||
request.Options.NumPredict = maxGenerate
|
||||
} else {
|
||||
request.Options.NumPredict = min(request.Options.NumPredict, maxGenerate)
|
||||
}
|
||||
|
||||
request.Tokens = tokens
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *Runner) TextGenerationPipeline(ctx context.Context, request Request) error {
|
||||
mlx.ResetPeakMemory()
|
||||
ctx := request.Ctx
|
||||
var (
|
||||
sample *mlx.Array
|
||||
nextSample *mlx.Array
|
||||
)
|
||||
var sample, nextSample sampler.Result
|
||||
|
||||
defer func() {
|
||||
if request.Sampler != nil {
|
||||
request.Sampler.Free()
|
||||
}
|
||||
mlx.Unpin(sample)
|
||||
mlx.Unpin(nextSample)
|
||||
mlx.Unpin(sample.Arrays()...)
|
||||
mlx.Unpin(nextSample.Arrays()...)
|
||||
mlx.Sweep()
|
||||
mlx.ClearCache()
|
||||
|
||||
@@ -46,26 +69,7 @@ func (r *Runner) TextGenerationPipeline(request Request) error {
|
||||
slog.Info("peak memory", "size", mlx.PrettyBytes(mlx.PeakMemory()))
|
||||
}()
|
||||
|
||||
inputs := r.Tokenizer.Encode(request.Prompt, r.Tokenizer.AddBOS())
|
||||
if len(inputs) == 0 {
|
||||
return errors.New("empty prompt")
|
||||
}
|
||||
|
||||
if len(inputs) >= r.contextLength {
|
||||
return api.StatusError{
|
||||
StatusCode: http.StatusBadRequest,
|
||||
ErrorMessage: fmt.Sprintf("input length (%d tokens) exceeds the model's maximum context length (%d tokens)", len(inputs), r.contextLength),
|
||||
}
|
||||
}
|
||||
|
||||
// Cap generation to stay within the model's context length
|
||||
maxGenerate := r.contextLength - len(inputs)
|
||||
if request.Options.MaxTokens <= 0 {
|
||||
request.Options.MaxTokens = maxGenerate
|
||||
} else {
|
||||
request.Options.MaxTokens = min(request.Options.MaxTokens, maxGenerate)
|
||||
}
|
||||
|
||||
inputs := request.Tokens
|
||||
request.Sampler.ResetHistory(inputs)
|
||||
|
||||
session := r.cache.begin(r.Model, inputs)
|
||||
@@ -135,40 +139,38 @@ func (r *Runner) TextGenerationPipeline(request Request) error {
|
||||
mlx.ClearCache()
|
||||
}
|
||||
|
||||
step := func(token *mlx.Array) *mlx.Array {
|
||||
step := func(token *mlx.Array) sampler.Result {
|
||||
fwd := r.Model.Forward(token.ExpandDims(0), caches)
|
||||
logits := r.Model.Unembed(fwd)
|
||||
logits = logits.Slice(mlx.Slice(), mlx.Slice(logits.Dim(1)-1), mlx.Slice()).Squeeze(1)
|
||||
|
||||
sample := request.Sampler.Sample(logits)
|
||||
|
||||
mlx.Pin(sample)
|
||||
mlx.Pin(sample.Arrays()...)
|
||||
mlx.Sweep()
|
||||
mlx.AsyncEval(sample)
|
||||
|
||||
mlx.AsyncEval(sample.Arrays()...)
|
||||
return sample
|
||||
}
|
||||
|
||||
sample = step(mlx.FromValues(tokens[processed:], total-processed))
|
||||
|
||||
var b bytes.Buffer
|
||||
dec := decoder{tokenizer: r.Tokenizer}
|
||||
|
||||
final := CompletionResponse{Done: true, PromptEvalCount: len(inputs), EvalCount: request.Options.MaxTokens, DoneReason: 1}
|
||||
for i := range request.Options.MaxTokens {
|
||||
final := CompletionResponse{Done: true, PromptEvalCount: len(inputs), EvalCount: request.Options.NumPredict, DoneReason: 1}
|
||||
for i := range request.Options.NumPredict {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
request.Sampler.AppendToken(sample)
|
||||
nextSample = step(sample)
|
||||
request.Sampler.AppendToken(sample.Token)
|
||||
nextSample = step(sample.Token)
|
||||
|
||||
if i == 0 {
|
||||
mlx.Eval(sample)
|
||||
mlx.Eval(sample.Arrays()...)
|
||||
final.PromptEvalDuration = time.Since(now)
|
||||
now = time.Now()
|
||||
}
|
||||
|
||||
output := int32(sample.Int())
|
||||
output := int32(sample.Token.Int())
|
||||
session.outputs = append(session.outputs, output)
|
||||
|
||||
if r.Tokenizer.IsEOS(output) {
|
||||
@@ -177,17 +179,16 @@ func (r *Runner) TextGenerationPipeline(request Request) error {
|
||||
break
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case request.Responses <- CompletionResponse{
|
||||
Content: r.Decode(output, &b),
|
||||
}:
|
||||
if resp, ok := dec.decode(sample); ok {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case request.Responses <- resp:
|
||||
}
|
||||
}
|
||||
|
||||
mlx.Unpin(sample)
|
||||
sample = nextSample
|
||||
nextSample = nil
|
||||
mlx.Unpin(sample.Arrays()...)
|
||||
sample, nextSample = nextSample, sampler.Result{}
|
||||
|
||||
if i%256 == 0 {
|
||||
mlx.ClearCache()
|
||||
@@ -203,13 +204,57 @@ func (r *Runner) TextGenerationPipeline(request Request) error {
|
||||
}
|
||||
}
|
||||
|
||||
func (r Runner) Decode(sample int32, b *bytes.Buffer) string {
|
||||
token := r.Tokenizer.Decode([]int32{sample})
|
||||
// decoder serializes sampled tokens into response chunks, holding bytes
|
||||
// whose UTF-8 sequence hasn't completed yet and the logprobs that belong
|
||||
// with those bytes so Content and Logprobs stay aligned when a chunk does
|
||||
// flush.
|
||||
type decoder struct {
|
||||
tokenizer *tokenizer.Tokenizer
|
||||
buf bytes.Buffer
|
||||
logprobs []llm.Logprob
|
||||
}
|
||||
|
||||
if _, err := b.WriteString(token); err != nil {
|
||||
slog.Error("Failed to write token to buffer", "error", err)
|
||||
return ""
|
||||
func (d *decoder) decode(res sampler.Result) (CompletionResponse, bool) {
|
||||
output := int32(res.Token.Int())
|
||||
d.buf.WriteString(d.tokenizer.Decode([]int32{output}))
|
||||
d.logprobs = append(d.logprobs, buildLogprob(res, d.tokenizer.Decode)...)
|
||||
|
||||
content := flushValidUTF8Prefix(&d.buf)
|
||||
if content == "" {
|
||||
return CompletionResponse{}, false
|
||||
}
|
||||
resp := CompletionResponse{Content: content, Logprobs: d.logprobs}
|
||||
d.logprobs = nil
|
||||
return resp, true
|
||||
}
|
||||
|
||||
func buildLogprob(sample sampler.Result, decode func([]int32) string) []llm.Logprob {
|
||||
if sample.Logprob == nil {
|
||||
return nil
|
||||
}
|
||||
tok := func(id int32) string { return decode([]int32{id}) }
|
||||
|
||||
out := llm.Logprob{
|
||||
TokenLogprob: llm.TokenLogprob{
|
||||
Token: tok(int32(sample.Token.Int())),
|
||||
Logprob: float64(sample.Logprob.Floats()[0]),
|
||||
},
|
||||
}
|
||||
|
||||
return flushValidUTF8Prefix(b)
|
||||
if sample.TopTokens != nil {
|
||||
ids := sample.TopTokens.Ints()
|
||||
vals := sample.TopLogprobs.Floats()
|
||||
pairs := make([]llm.TokenLogprob, len(ids))
|
||||
for i, id := range ids {
|
||||
pairs[i] = llm.TokenLogprob{
|
||||
Token: tok(int32(id)),
|
||||
Logprob: float64(vals[i]),
|
||||
}
|
||||
}
|
||||
sort.Slice(pairs, func(i, j int) bool {
|
||||
return pairs[i].Logprob > pairs[j].Logprob
|
||||
})
|
||||
out.TopLogprobs = pairs
|
||||
}
|
||||
return []llm.Logprob{out}
|
||||
}
|
||||
|
||||
@@ -18,34 +18,20 @@ import (
|
||||
"github.com/ollama/ollama/x/tokenizer"
|
||||
)
|
||||
|
||||
// Request is a short-lived struct that carries a completion request through
|
||||
// a channel from the HTTP handler to the runner goroutine. The ctx field
|
||||
// must travel with the request so that cancellation propagates across the
|
||||
// channel boundary.
|
||||
type Request struct {
|
||||
TextCompletionsRequest
|
||||
CompletionRequest
|
||||
Responses chan CompletionResponse
|
||||
Pipeline func(Request) error
|
||||
|
||||
Ctx context.Context
|
||||
Pipeline func(context.Context, Request) error
|
||||
|
||||
Ctx context.Context //nolint:containedctx
|
||||
Tokens []int32
|
||||
Sampler *sample.Sampler
|
||||
}
|
||||
|
||||
type TextCompletionsRequest struct {
|
||||
Prompt string `json:"prompt"`
|
||||
Options struct {
|
||||
Temperature float32 `json:"temperature"`
|
||||
TopP float32 `json:"top_p"`
|
||||
MinP float32 `json:"min_p"`
|
||||
TopK int `json:"top_k"`
|
||||
RepeatLastN int `json:"repeat_last_n"`
|
||||
RepeatPenalty float32 `json:"repeat_penalty"`
|
||||
PresencePenalty float32 `json:"presence_penalty"`
|
||||
FrequencyPenalty float32 `json:"frequency_penalty"`
|
||||
MaxTokens int `json:"max_tokens"`
|
||||
|
||||
// Deprecated: use MaxTokens instead
|
||||
NumPredict int `json:"num_predict"`
|
||||
} `json:"options"`
|
||||
}
|
||||
|
||||
type Runner struct {
|
||||
Model base.Model
|
||||
Tokenizer *tokenizer.Tokenizer
|
||||
@@ -149,7 +135,7 @@ func (r *Runner) Run(host, port string, mux http.Handler) error {
|
||||
case <-ctx.Done():
|
||||
return nil
|
||||
case request := <-r.Requests:
|
||||
if err := request.Pipeline(request); err != nil {
|
||||
if err := request.Pipeline(request.Ctx, request); err != nil {
|
||||
slog.Info("Request terminated", "error", err)
|
||||
var statusErr api.StatusError
|
||||
if !errors.As(err, &statusErr) {
|
||||
|
||||
249
x/mlxrunner/sample/logprob_test.go
Normal file
249
x/mlxrunner/sample/logprob_test.go
Normal file
@@ -0,0 +1,249 @@
|
||||
//go:build mlx
|
||||
|
||||
package sample
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/x/mlxrunner/mlx"
|
||||
)
|
||||
|
||||
// logprobEntry is the (token id, logprob) pair returned by the sampler's
|
||||
// top-K extraction, used after the test-side descending sort.
|
||||
type logprobEntry struct {
|
||||
id int
|
||||
logprob float64
|
||||
}
|
||||
|
||||
// runSampleLogprobs drives Sample on a fresh Sampler configured for logprobs
|
||||
// and returns the greedily-sampled token id, its logprob, and the top-K
|
||||
// entries sorted descending by logprob. Logits must be a [vocab]-shaped
|
||||
// slice; the helper reshapes it to [1, vocab] before calling the sampler.
|
||||
func runSampleLogprobs(t *testing.T, logits []float32, topK int) (int, float64, []logprobEntry) {
|
||||
t.Helper()
|
||||
|
||||
s := New(Options{Logprobs: true, TopLogprobs: topK})
|
||||
defer func() {
|
||||
s.Free()
|
||||
mlx.Sweep()
|
||||
}()
|
||||
|
||||
tensor := mlx.FromValues(logits, 1, len(logits))
|
||||
res := s.Sample(tensor)
|
||||
|
||||
mlx.Pin(res.Arrays()...)
|
||||
defer mlx.Unpin(res.Arrays()...)
|
||||
mlx.Sweep()
|
||||
mlx.Eval(res.Arrays()...)
|
||||
|
||||
selected := res.Token.Int()
|
||||
selLP := float64(res.Logprob.Floats()[0])
|
||||
|
||||
var top []logprobEntry
|
||||
if topK > 0 && res.TopTokens != nil {
|
||||
ids := res.TopTokens.Ints()
|
||||
vals := res.TopLogprobs.Floats()
|
||||
top = make([]logprobEntry, len(ids))
|
||||
for i, id := range ids {
|
||||
top[i] = logprobEntry{id: id, logprob: float64(vals[i])}
|
||||
}
|
||||
sort.Slice(top, func(i, j int) bool { return top[i].logprob > top[j].logprob })
|
||||
}
|
||||
return selected, selLP, top
|
||||
}
|
||||
|
||||
func TestSampleLogprobsBasic(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
logits []float32
|
||||
topK int
|
||||
wantSelectedID int
|
||||
wantTopLen int
|
||||
}{
|
||||
{
|
||||
name: "single token without top logprobs",
|
||||
logits: []float32{1.0, 0.5, 0.3, 0.1},
|
||||
topK: 0,
|
||||
wantSelectedID: 0,
|
||||
wantTopLen: 0,
|
||||
},
|
||||
{
|
||||
name: "single token with top logprobs",
|
||||
logits: []float32{1.0, 0.5, 0.3, 0.1},
|
||||
topK: 3,
|
||||
wantSelectedID: 0,
|
||||
wantTopLen: 3,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
selected, _, top := runSampleLogprobs(t, tt.logits, tt.topK)
|
||||
if selected != tt.wantSelectedID {
|
||||
t.Errorf("selected = %d, want %d", selected, tt.wantSelectedID)
|
||||
}
|
||||
if len(top) != tt.wantTopLen {
|
||||
t.Errorf("top-K length = %d, want %d", len(top), tt.wantTopLen)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSampleLogprobsNumericalStability(t *testing.T) {
|
||||
logits := []float32{1000.0, 999.0, 998.0}
|
||||
_, selLP, top := runSampleLogprobs(t, logits, 3)
|
||||
|
||||
if math.IsInf(selLP, 0) || math.IsNaN(selLP) {
|
||||
t.Errorf("selected logprob is not finite: %f", selLP)
|
||||
}
|
||||
for i, e := range top {
|
||||
if math.IsInf(e.logprob, 0) || math.IsNaN(e.logprob) {
|
||||
t.Errorf("top[%d] logprob is not finite: %f", i, e.logprob)
|
||||
}
|
||||
}
|
||||
for i := 1; i < len(top); i++ {
|
||||
if top[i].logprob > top[i-1].logprob {
|
||||
t.Errorf("top logprobs not descending: %f > %f", top[i].logprob, top[i-1].logprob)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSampleLogprobsProbabilityCorrectness(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
logits []float32
|
||||
}{
|
||||
{"uniform", []float32{1.0, 1.0, 1.0, 1.0}},
|
||||
{"different", []float32{2.0, 1.0, 0.5, 0.1}},
|
||||
{"negative", []float32{-1.0, -2.0, -3.0, -4.0}},
|
||||
{"mixed", []float32{5.0, -5.0, 0.0, 2.5}},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
selected, selLP, top := runSampleLogprobs(t, tt.logits, len(tt.logits))
|
||||
|
||||
if selLP > 0 {
|
||||
t.Errorf("selected logprob should be <= 0, got %f", selLP)
|
||||
}
|
||||
for i, e := range top {
|
||||
if e.logprob > 0 {
|
||||
t.Errorf("top[%d] logprob should be <= 0, got %f", i, e.logprob)
|
||||
}
|
||||
}
|
||||
|
||||
if tt.name == "uniform" {
|
||||
want := 1.0 / float64(len(tt.logits))
|
||||
got := math.Exp(selLP)
|
||||
if math.Abs(got-want) > 1e-6 {
|
||||
t.Errorf("uniform logits: selected prob = %f, want %f", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
for i := 1; i < len(top); i++ {
|
||||
if top[i].logprob > top[i-1].logprob {
|
||||
t.Errorf("top logprobs not descending at %d: %f > %f",
|
||||
i, top[i].logprob, top[i-1].logprob)
|
||||
}
|
||||
}
|
||||
|
||||
found := false
|
||||
for _, e := range top {
|
||||
if e.id == selected {
|
||||
found = true
|
||||
if math.Abs(e.logprob-selLP) > 1e-6 {
|
||||
t.Errorf("selected logprob mismatch: selLP=%f top=%f", selLP, e.logprob)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("selected token %d not present in top-K", selected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSampleLogprobsSoftmaxCorrectness(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
logits []float32
|
||||
}{
|
||||
{"small vocabulary", []float32{1.0, 2.0, 3.0}},
|
||||
{"large differences", []float32{10.0, 0.0, -10.0}},
|
||||
{"all equal", []float32{5.0, 5.0, 5.0, 5.0, 5.0}},
|
||||
{"very large values", []float32{500.0, 499.0, 498.0}},
|
||||
{"very small values", []float32{-500.0, -499.0, -498.0}},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
_, _, top := runSampleLogprobs(t, tt.logits, len(tt.logits))
|
||||
if len(top) != len(tt.logits) {
|
||||
t.Fatalf("top-K length = %d, want %d", len(top), len(tt.logits))
|
||||
}
|
||||
|
||||
var sum float64
|
||||
for _, e := range top {
|
||||
p := math.Exp(e.logprob)
|
||||
if p < 0 || p > 1 {
|
||||
t.Errorf("token %d: probability %f out of [0,1]", e.id, p)
|
||||
}
|
||||
sum += p
|
||||
}
|
||||
|
||||
if math.Abs(sum-1.0) > 1e-5 {
|
||||
t.Errorf("probabilities sum = %f, want 1.0", sum)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSampleLogprobsSelectedTokenCorrectness(t *testing.T) {
|
||||
logits := []float32{3.0, 1.0, 2.0, 0.5}
|
||||
|
||||
maxIdx := 0
|
||||
for i, v := range logits[1:] {
|
||||
if v > logits[maxIdx] {
|
||||
maxIdx = i + 1
|
||||
}
|
||||
}
|
||||
|
||||
selected, selLP, top := runSampleLogprobs(t, logits, len(logits))
|
||||
|
||||
if selected != maxIdx {
|
||||
t.Errorf("selected = %d, want argmax %d", selected, maxIdx)
|
||||
}
|
||||
|
||||
if top[0].id != maxIdx {
|
||||
t.Errorf("top[0].id = %d, want argmax %d", top[0].id, maxIdx)
|
||||
}
|
||||
if math.Abs(top[0].logprob-selLP) > 1e-6 {
|
||||
t.Errorf("top[0].logprob = %f, want selected %f", top[0].logprob, selLP)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSampleLogprobsTopKOrdering(t *testing.T) {
|
||||
// Logits chosen so argmax order differs from index order.
|
||||
logits := []float32{2.0, 5.0, 1.0, 4.0, 3.0}
|
||||
wantOrder := []int{1, 3, 4, 0, 2}
|
||||
|
||||
_, _, top := runSampleLogprobs(t, logits, len(logits))
|
||||
|
||||
if len(top) != len(wantOrder) {
|
||||
t.Fatalf("top-K length = %d, want %d", len(top), len(wantOrder))
|
||||
}
|
||||
for i, e := range top {
|
||||
if e.id != wantOrder[i] {
|
||||
t.Errorf("top[%d].id = %d, want %d", i, e.id, wantOrder[i])
|
||||
}
|
||||
}
|
||||
for i := 1; i < len(top); i++ {
|
||||
if top[i].logprob > top[i-1].logprob {
|
||||
t.Errorf("top[%d].logprob (%f) > top[%d].logprob (%f)",
|
||||
i, top[i].logprob, i-1, top[i-1].logprob)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
|
||||
type Transform func(*Sampler, *mlx.Array) *mlx.Array
|
||||
|
||||
type Sampler struct {
|
||||
type Options struct {
|
||||
Temperature float32
|
||||
TopP float32
|
||||
MinP float32
|
||||
@@ -18,45 +18,66 @@ type Sampler struct {
|
||||
PresencePenalty float32
|
||||
FrequencyPenalty float32
|
||||
|
||||
// Logprobs causes Sample to populate Result.Logprob with the selected
|
||||
// token's log-probability. TopLogprobs (when > 0) adds top-K pairs.
|
||||
Logprobs bool
|
||||
TopLogprobs int
|
||||
}
|
||||
|
||||
type Sampler struct {
|
||||
Options
|
||||
|
||||
history *mlx.Array
|
||||
historyLen int
|
||||
transforms []Transform
|
||||
}
|
||||
|
||||
func New(temp, top_p, min_p float32, top_k, repeatLastN int, repeatPenalty, presencePenalty, frequencyPenalty float32) *Sampler {
|
||||
if repeatPenalty <= 0 {
|
||||
repeatPenalty = 1
|
||||
// Result bundles the outputs of one decode step. The logprob tensors are
|
||||
// populated only when the sampler is configured to report them.
|
||||
type Result struct {
|
||||
Token *mlx.Array // sampled token id, shape [B]
|
||||
Logprob *mlx.Array // sampled-token logprob, shape [B,1]; nil unless Logprobs
|
||||
TopTokens *mlx.Array // top-K token ids, shape [B,K]; nil unless TopLogprobs > 0
|
||||
TopLogprobs *mlx.Array // top-K logprobs, shape [B,K]; nil unless TopLogprobs > 0
|
||||
}
|
||||
|
||||
// Arrays returns the tensor fields as a slice so callers can drive the mlx
|
||||
// lifecycle verbs (Pin, Unpin, Eval, AsyncEval) over the whole group. Unset
|
||||
// fields stay nil; the mlx helpers skip them.
|
||||
func (r Result) Arrays() []*mlx.Array {
|
||||
return []*mlx.Array{r.Token, r.Logprob, r.TopTokens, r.TopLogprobs}
|
||||
}
|
||||
|
||||
func New(opts Options) *Sampler {
|
||||
if opts.RepeatPenalty <= 0 {
|
||||
opts.RepeatPenalty = 1
|
||||
}
|
||||
|
||||
s := &Sampler{
|
||||
Temperature: temp,
|
||||
TopP: top_p,
|
||||
MinP: min_p,
|
||||
TopK: top_k,
|
||||
RepeatLastN: repeatLastN,
|
||||
RepeatPenalty: repeatPenalty,
|
||||
PresencePenalty: presencePenalty,
|
||||
FrequencyPenalty: frequencyPenalty,
|
||||
}
|
||||
s := &Sampler{Options: opts}
|
||||
|
||||
var transforms []Transform
|
||||
if s.usesHistory() {
|
||||
transforms = append(transforms, penalty)
|
||||
}
|
||||
|
||||
if top_p > 0 && top_p < 1 {
|
||||
transforms = append(transforms, topP)
|
||||
}
|
||||
|
||||
if min_p != 0 {
|
||||
transforms = append(transforms, minP)
|
||||
}
|
||||
|
||||
if top_k > 0 {
|
||||
hasTopP := opts.TopP > 0 && opts.TopP < 1
|
||||
hasTopK := opts.TopK > 0
|
||||
switch {
|
||||
case hasTopP:
|
||||
// topKTopP always does a full descending sort for the top-P
|
||||
// cumulative mask and opportunistically masks top-K during the
|
||||
// same pass when it is also configured.
|
||||
transforms = append(transforms, topKTopP)
|
||||
case hasTopK:
|
||||
// Argpartition (partial sort) is cheaper than a full sort.
|
||||
transforms = append(transforms, topK)
|
||||
}
|
||||
|
||||
if temp == 0 {
|
||||
if opts.MinP != 0 {
|
||||
transforms = append(transforms, minP)
|
||||
}
|
||||
|
||||
if opts.Temperature == 0 {
|
||||
transforms = append(transforms, greedy)
|
||||
} else {
|
||||
transforms = append(transforms, temperature)
|
||||
@@ -123,76 +144,121 @@ func (s *Sampler) Free() {
|
||||
s.setHistory(nil, 0)
|
||||
}
|
||||
|
||||
func (s *Sampler) Sample(logits *mlx.Array) *mlx.Array {
|
||||
// Sample runs the configured transform chain on the raw per-token logits
|
||||
// and returns the sampled token id plus, when configured, the reported
|
||||
// log-probability tensors for the selected token and the top-K tokens.
|
||||
func (s *Sampler) Sample(logits *mlx.Array) Result {
|
||||
scores := logits
|
||||
for _, transform := range s.transforms {
|
||||
logits = transform(s, logits)
|
||||
scores = transform(s, scores)
|
||||
}
|
||||
return logits
|
||||
}
|
||||
res := Result{Token: scores}
|
||||
|
||||
func greedy(_ *Sampler, logits *mlx.Array) *mlx.Array {
|
||||
return logits.Argmax(-1, false)
|
||||
}
|
||||
|
||||
func temperature(s *Sampler, logits *mlx.Array) *mlx.Array {
|
||||
return mlx.DivScalar(logits, s.Temperature).Categorical(-1)
|
||||
}
|
||||
|
||||
func topP(s *Sampler, logits *mlx.Array) *mlx.Array {
|
||||
if s.TopP <= 0 || s.TopP >= 1 {
|
||||
return logits
|
||||
if s.Logprobs {
|
||||
// Compute log_softmax in fp32 and subtract the max before
|
||||
// logsumexp so the final subtraction stays on small values.
|
||||
// Otherwise it cancels two large numbers and loses precision.
|
||||
lp := logits.AsType(mlx.DTypeFloat32)
|
||||
lp = lp.Subtract(lp.MaxAxis(-1, true))
|
||||
lp = lp.Subtract(lp.Logsumexp(true))
|
||||
res.Logprob = lp.TakeAlongAxis(res.Token.ExpandDims(-1), -1)
|
||||
if k := s.TopLogprobs; k > 0 {
|
||||
if vocab := lp.Dim(lp.NumDims() - 1); k > vocab {
|
||||
k = vocab
|
||||
}
|
||||
// Argpartition on the negated values places the K largest
|
||||
// (unsorted) in positions [0:K].
|
||||
idx := lp.Negative().ArgpartitionAxis(k-1, -1).Slice(mlx.Slice(), mlx.Slice(0, k))
|
||||
res.TopTokens = idx.AsType(mlx.DTypeInt32)
|
||||
res.TopLogprobs = lp.TakeAlongAxis(idx, -1)
|
||||
}
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
order := logits.Negative().ArgsortAxis(-1)
|
||||
sortedLogits := logits.TakeAlongAxis(order, -1)
|
||||
sortedProbs := mlx.SoftmaxAxis(sortedLogits, -1, true)
|
||||
prevCumProbs := sortedProbs.Cumsum(-1, false, true).Subtract(sortedProbs)
|
||||
func greedy(_ *Sampler, scores *mlx.Array) *mlx.Array {
|
||||
return scores.Argmax(-1, false)
|
||||
}
|
||||
|
||||
func temperature(s *Sampler, scores *mlx.Array) *mlx.Array {
|
||||
return mlx.DivScalar(scores, s.Temperature).Categorical(-1)
|
||||
}
|
||||
|
||||
// topKTopP applies top-P in a descending sort pass and, when top-K is also
|
||||
// configured, masks any surviving value below the K-th largest in the same
|
||||
// pass. Callers dispatch here whenever top-P is enabled — the top-K-only
|
||||
// case uses a cheaper partial sort via the topK transform.
|
||||
func topKTopP(s *Sampler, scores *mlx.Array) *mlx.Array {
|
||||
vocab := scores.Dim(scores.NumDims() - 1)
|
||||
applyTopK := s.TopK > 0 && s.TopK < vocab
|
||||
|
||||
order := scores.Negative().ArgsortAxis(-1)
|
||||
sorted := scores.TakeAlongAxis(order, -1)
|
||||
negInf := mlx.FromValue(float32(math.Inf(-1)))
|
||||
|
||||
// Top-P: in descending order, keep tokens whose exclusive cumulative
|
||||
// probability is still below s.TopP.
|
||||
probs := mlx.SoftmaxAxis(sorted, -1, true)
|
||||
prevCumProbs := probs.Cumsum(-1, false, true).Subtract(probs)
|
||||
keep := prevCumProbs.Less(mlx.FromValue(s.TopP))
|
||||
filtered := mlx.Where(keep, sortedLogits, mlx.FromValue(float32(math.Inf(-1))))
|
||||
return logits.PutAlongAxis(order, filtered, -1)
|
||||
}
|
||||
sorted = mlx.Where(keep, sorted, negInf)
|
||||
|
||||
func minP(s *Sampler, logits *mlx.Array) *mlx.Array {
|
||||
if s.MinP <= 0 || s.MinP > 1 {
|
||||
return logits
|
||||
out := scores.PutAlongAxis(order, sorted, -1)
|
||||
|
||||
// Top-K: sorted is already in descending order, so positions [K, V)
|
||||
// are the ones to drop. Scatter -inf through their original-layout
|
||||
// indices (order[K:]). Positional (not value-based) so exactly K
|
||||
// tokens survive — ties at the K-th logit get broken by the sort
|
||||
// order rather than promoted through the filter.
|
||||
if applyTopK {
|
||||
dropOrder := order.Slice(mlx.Slice(), mlx.Slice(s.TopK, mlx.End))
|
||||
out = out.PutAlongAxis(dropOrder, negInf, -1)
|
||||
}
|
||||
|
||||
maxLogits := logits.TakeAlongAxis(logits.Argmax(-1, true), -1)
|
||||
minLogits := mlx.AddScalar(maxLogits, float32(math.Log(float64(s.MinP))))
|
||||
return out
|
||||
}
|
||||
|
||||
func minP(s *Sampler, scores *mlx.Array) *mlx.Array {
|
||||
if s.MinP <= 0 || s.MinP > 1 {
|
||||
return scores
|
||||
}
|
||||
|
||||
maxScore := scores.MaxAxis(-1, true)
|
||||
threshold := mlx.AddScalar(maxScore, float32(math.Log(float64(s.MinP))))
|
||||
|
||||
return mlx.Where(
|
||||
logits.Less(minLogits),
|
||||
scores.Less(threshold),
|
||||
mlx.FromValue(float32(math.Inf(-1))),
|
||||
logits,
|
||||
scores,
|
||||
)
|
||||
}
|
||||
|
||||
func topK(s *Sampler, logits *mlx.Array) *mlx.Array {
|
||||
func topK(s *Sampler, scores *mlx.Array) *mlx.Array {
|
||||
if s.TopK <= 0 {
|
||||
return logits
|
||||
return scores
|
||||
}
|
||||
|
||||
vocab := logits.Dim(logits.NumDims() - 1)
|
||||
vocab := scores.Dim(scores.NumDims() - 1)
|
||||
if s.TopK >= vocab {
|
||||
return logits
|
||||
return scores
|
||||
}
|
||||
|
||||
mask := logits.Negative().ArgpartitionAxis(s.TopK-1, -1).Slice(mlx.Slice(), mlx.Slice(s.TopK, mlx.End))
|
||||
return logits.PutAlongAxis(mask, mlx.FromValue(float32(math.Inf(-1))), -1)
|
||||
mask := scores.Negative().ArgpartitionAxis(s.TopK-1, -1).Slice(mlx.Slice(), mlx.Slice(s.TopK, mlx.End))
|
||||
return scores.PutAlongAxis(mask, mlx.FromValue(float32(math.Inf(-1))), -1)
|
||||
}
|
||||
|
||||
func penalty(s *Sampler, logits *mlx.Array) *mlx.Array {
|
||||
func penalty(s *Sampler, scores *mlx.Array) *mlx.Array {
|
||||
if s.historyLen == 0 {
|
||||
return logits
|
||||
return scores
|
||||
}
|
||||
|
||||
tokenIndices := s.history
|
||||
if logits.NumDims() > 1 {
|
||||
if scores.NumDims() > 1 {
|
||||
tokenIndices = tokenIndices.ExpandDims(0)
|
||||
}
|
||||
|
||||
if s.RepeatPenalty != 1 || s.PresencePenalty != 0 {
|
||||
adjusted := logits.TakeAlongAxis(tokenIndices, -1)
|
||||
adjusted := scores.TakeAlongAxis(tokenIndices, -1)
|
||||
if s.RepeatPenalty != 1 {
|
||||
factor := mlx.Where(
|
||||
adjusted.Less(mlx.FromValue(float32(0))),
|
||||
@@ -204,12 +270,12 @@ func penalty(s *Sampler, logits *mlx.Array) *mlx.Array {
|
||||
if s.PresencePenalty != 0 {
|
||||
adjusted = mlx.AddScalar(adjusted, -s.PresencePenalty)
|
||||
}
|
||||
logits = logits.PutAlongAxis(tokenIndices, adjusted, -1)
|
||||
scores = scores.PutAlongAxis(tokenIndices, adjusted, -1)
|
||||
}
|
||||
|
||||
if s.FrequencyPenalty != 0 {
|
||||
logits = logits.ScatterAddAxis(tokenIndices, mlx.FromValue(-s.FrequencyPenalty), -1)
|
||||
scores = scores.ScatterAddAxis(tokenIndices, mlx.FromValue(-s.FrequencyPenalty), -1)
|
||||
}
|
||||
|
||||
return logits
|
||||
return scores
|
||||
}
|
||||
|
||||
@@ -10,8 +10,7 @@ import (
|
||||
)
|
||||
|
||||
func TestPresencePenaltyUsesAppendedTokenImmediately(t *testing.T) {
|
||||
// RepeatLastN = 1, PresencePenalty = 6
|
||||
s := New(0, 0, 0, 0, 1, 1, 6, 0)
|
||||
s := New(Options{RepeatLastN: 1, PresencePenalty: 6})
|
||||
defer func() {
|
||||
s.Free()
|
||||
mlx.Sweep()
|
||||
@@ -21,7 +20,7 @@ func TestPresencePenaltyUsesAppendedTokenImmediately(t *testing.T) {
|
||||
s.AppendToken(mlx.NewArrayInt32([]int32{1}, []int32{1}))
|
||||
|
||||
logits := mlx.FromValues([]float32{0, 5, 4}, 3)
|
||||
got := s.Sample(logits)
|
||||
got := s.Sample(logits).Token
|
||||
mlx.Eval(got)
|
||||
|
||||
// logits will be [0, -1, 4] after the penalty
|
||||
@@ -33,7 +32,7 @@ func TestPresencePenaltyUsesAppendedTokenImmediately(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestRepeatPenaltyUsesHistoryWithoutPresencePenalty(t *testing.T) {
|
||||
s := New(0, 0, 0, 0, 1, 2, 0, 0)
|
||||
s := New(Options{RepeatLastN: 1, RepeatPenalty: 2})
|
||||
defer func() {
|
||||
s.Free()
|
||||
mlx.Sweep()
|
||||
@@ -42,7 +41,7 @@ func TestRepeatPenaltyUsesHistoryWithoutPresencePenalty(t *testing.T) {
|
||||
s.ResetHistory([]int32{1})
|
||||
|
||||
logits := mlx.FromValues([]float32{0, 5, 4}, 3)
|
||||
got := s.Sample(logits)
|
||||
got := s.Sample(logits).Token
|
||||
mlx.Eval(got)
|
||||
|
||||
// token 1 is repeated and positive, so 5 / 2 falls below token 2.
|
||||
@@ -53,7 +52,7 @@ func TestRepeatPenaltyUsesHistoryWithoutPresencePenalty(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestFrequencyPenaltyUsesTokenCounts(t *testing.T) {
|
||||
s := New(0, 0, 0, 0, 4, 1, 0, 2)
|
||||
s := New(Options{RepeatLastN: 4, FrequencyPenalty: 2})
|
||||
defer func() {
|
||||
s.Free()
|
||||
mlx.Sweep()
|
||||
@@ -62,7 +61,7 @@ func TestFrequencyPenaltyUsesTokenCounts(t *testing.T) {
|
||||
s.ResetHistory([]int32{1, 1})
|
||||
|
||||
logits := mlx.FromValues([]float32{0, 5, 4}, 3)
|
||||
got := s.Sample(logits)
|
||||
got := s.Sample(logits).Token
|
||||
mlx.Eval(got)
|
||||
|
||||
// token 1 appears twice, so 5 - (2 * 2) falls below token 2.
|
||||
@@ -73,7 +72,7 @@ func TestFrequencyPenaltyUsesTokenCounts(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestMinPMasksTokensBelowThreshold(t *testing.T) {
|
||||
s := New(0, 0, 0.5, 0, 0, 1, 0, 0)
|
||||
s := New(Options{MinP: 0.5})
|
||||
defer func() {
|
||||
s.Free()
|
||||
mlx.Sweep()
|
||||
|
||||
@@ -2,7 +2,6 @@ package mlxrunner
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"cmp"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
@@ -87,25 +86,30 @@ func Execute(args []string) error {
|
||||
mux.HandleFunc("POST /v1/completions", func(w http.ResponseWriter, r *http.Request) {
|
||||
request := Request{Responses: make(chan CompletionResponse)}
|
||||
|
||||
if err := json.NewDecoder(r.Body).Decode(&request.TextCompletionsRequest); err != nil {
|
||||
if err := json.NewDecoder(r.Body).Decode(&request.CompletionRequest); err != nil {
|
||||
slog.Error("Failed to decode request", "error", err)
|
||||
http.Error(w, "Bad Request", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
request.Options.MaxTokens = cmp.Or(request.Options.MaxTokens, request.Options.NumPredict)
|
||||
|
||||
request.Pipeline = runner.TextGenerationPipeline
|
||||
request.Sampler = sample.New(
|
||||
request.Options.Temperature,
|
||||
request.Options.TopP,
|
||||
request.Options.MinP,
|
||||
request.Options.TopK,
|
||||
request.Options.RepeatLastN,
|
||||
request.Options.RepeatPenalty,
|
||||
request.Options.PresencePenalty,
|
||||
request.Options.FrequencyPenalty,
|
||||
)
|
||||
request.Sampler = sample.New(sample.Options{
|
||||
Temperature: request.Options.Temperature,
|
||||
TopP: request.Options.TopP,
|
||||
MinP: request.Options.MinP,
|
||||
TopK: request.Options.TopK,
|
||||
RepeatLastN: request.Options.RepeatLastN,
|
||||
RepeatPenalty: request.Options.RepeatPenalty,
|
||||
PresencePenalty: request.Options.PresencePenalty,
|
||||
FrequencyPenalty: request.Options.FrequencyPenalty,
|
||||
Logprobs: request.Logprobs,
|
||||
TopLogprobs: request.TopLogprobs,
|
||||
})
|
||||
|
||||
if err := runner.Prepare(&request); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
var cancel context.CancelFunc
|
||||
request.Ctx, cancel = context.WithCancel(r.Context())
|
||||
|
||||
@@ -144,6 +144,8 @@ func TestRouterForwardMatchesLegacy(t *testing.T) {
|
||||
|
||||
gotScores, gotInds := r.Forward(x, cfg)
|
||||
wantScores, wantInds := legacyRouterForward(r, x, cfg)
|
||||
gotInds = gotInds.AsType(mlx.DTypeInt32)
|
||||
wantInds = wantInds.AsType(mlx.DTypeInt32)
|
||||
mlx.Eval(gotScores, gotInds, wantScores, wantInds)
|
||||
|
||||
if got, want := gotInds.Ints(), wantInds.Ints(); !intSlicesEqual(got, want) {
|
||||
|
||||
@@ -169,8 +169,8 @@ func TestQuantizedLinearMXFP4MatchesDequantizedWeight(t *testing.T) {
|
||||
dequantizedWeight := mlx.Dequantize(ql.Weight, ql.Scales, ql.QBiases, 32, 4, "mxfp4")
|
||||
mlx.Eval(dequantizedWeight)
|
||||
|
||||
qOut := ql.Forward(input)
|
||||
dOut := NewLinear(dequantizedWeight, nil).Forward(input)
|
||||
qOut := ql.Forward(input).AsType(mlx.DTypeFloat32)
|
||||
dOut := NewLinear(dequantizedWeight, nil).Forward(input).AsType(mlx.DTypeFloat32)
|
||||
mlx.Eval(qOut, dOut)
|
||||
|
||||
got := qOut.Floats()
|
||||
|
||||
Reference in New Issue
Block a user