diff --git a/cmd/config/droid.go b/cmd/config/droid.go index 340cda4fc..b2a0d9693 100644 --- a/cmd/config/droid.go +++ b/cmd/config/droid.go @@ -1,6 +1,7 @@ package config import ( + "context" "encoding/json" "fmt" "os" @@ -8,6 +9,7 @@ import ( "path/filepath" "slices" + "github.com/ollama/ollama/api" "github.com/ollama/ollama/envconfig" ) @@ -112,9 +114,17 @@ func (d *Droid) Edit(models []string) error { } // Build new Ollama model entries with sequential indices (0, 1, 2, ...) + client, _ := api.ClientFromEnvironment() + var newModels []any var defaultModelID string for i, model := range models { + maxOutput := 64000 + if isCloudModel(context.Background(), client, model) { + if l, ok := lookupCloudModelLimit(model); ok { + maxOutput = l.Output + } + } modelID := fmt.Sprintf("custom:%s-%d", model, i) newModels = append(newModels, modelEntry{ Model: model, @@ -122,7 +132,7 @@ func (d *Droid) Edit(models []string) error { BaseURL: envconfig.Host().String() + "/v1", APIKey: "ollama", Provider: "generic-chat-completion-api", - MaxOutputTokens: 64000, + MaxOutputTokens: maxOutput, SupportsImages: false, ID: modelID, Index: i, diff --git a/cmd/config/droid_test.go b/cmd/config/droid_test.go index 58b4ea9f9..f13c3e936 100644 --- a/cmd/config/droid_test.go +++ b/cmd/config/droid_test.go @@ -1251,6 +1251,55 @@ func TestDroidEdit_LargeNumberOfModels(t *testing.T) { } } +func TestDroidEdit_LocalModelDefaultMaxOutput(t *testing.T) { + d := &Droid{} + tmpDir := t.TempDir() + setTestHome(t, tmpDir) + + settingsDir := filepath.Join(tmpDir, ".factory") + settingsPath := filepath.Join(settingsDir, "settings.json") + + if err := d.Edit([]string{"llama3.2"}); err != nil { + t.Fatal(err) + } + + data, _ := os.ReadFile(settingsPath) + var settings map[string]any + json.Unmarshal(data, &settings) + + models := settings["customModels"].([]any) + entry := models[0].(map[string]any) + if entry["maxOutputTokens"] != float64(64000) { + t.Errorf("local model maxOutputTokens = %v, want 64000", entry["maxOutputTokens"]) + } +} + +func TestDroidEdit_CloudModelLimitsUsed(t *testing.T) { + // Verify that every cloud model in cloudModelLimits has a valid output + // value that would be used for maxOutputTokens when isCloudModel returns true. + // :cloud suffix stripping must also work since that's how users specify them. + for name, expected := range cloudModelLimits { + t.Run(name, func(t *testing.T) { + l, ok := lookupCloudModelLimit(name) + if !ok { + t.Fatalf("lookupCloudModelLimit(%q) returned false", name) + } + if l.Output != expected.Output { + t.Errorf("output = %d, want %d", l.Output, expected.Output) + } + // Also verify :cloud suffix lookup + cloudName := name + ":cloud" + l2, ok := lookupCloudModelLimit(cloudName) + if !ok { + t.Fatalf("lookupCloudModelLimit(%q) returned false", cloudName) + } + if l2.Output != expected.Output { + t.Errorf(":cloud output = %d, want %d", l2.Output, expected.Output) + } + }) + } +} + func TestDroidEdit_ArraysWithMixedTypes(t *testing.T) { d := &Droid{} tmpDir := t.TempDir() diff --git a/cmd/config/opencode.go b/cmd/config/opencode.go index 59a6f0119..0fe9f8a8f 100644 --- a/cmd/config/opencode.go +++ b/cmd/config/opencode.go @@ -39,6 +39,7 @@ var cloudModelLimits = map[string]cloudModelLimit{ "kimi-k2-thinking": {Context: 262_144, Output: 262_144}, "nemotron-3-nano:30b": {Context: 1_048_576, Output: 131_072}, "qwen3-coder:480b": {Context: 262_144, Output: 65_536}, + "qwen3-coder-next": {Context: 262_144, Output: 32_768}, "qwen3-next:80b": {Context: 262_144, Output: 32_768}, } diff --git a/cmd/config/opencode_test.go b/cmd/config/opencode_test.go index 9dc85dd10..8de174588 100644 --- a/cmd/config/opencode_test.go +++ b/cmd/config/opencode_test.go @@ -633,6 +633,7 @@ func TestLookupCloudModelLimit(t *testing.T) { {"deepseek-v3.2", true, 163_840, 65_536}, {"deepseek-v3.2:cloud", true, 163_840, 65_536}, {"qwen3-coder:480b", true, 262_144, 65_536}, + {"qwen3-coder-next:cloud", true, 262_144, 32_768}, {"llama3.2", false, 0, 0}, {"unknown-model:cloud", false, 0, 0}, }