mirror of
https://github.com/ollama/ollama.git
synced 2026-04-17 15:53:27 +02:00
launch: skip context length warning for MLX models and show model name (#15102)
This commit is contained in:
@@ -1506,6 +1506,7 @@ func TestConfirmLowContextLength(t *testing.T) {
|
||||
statusBody string
|
||||
statusCode int
|
||||
showParams string // Parameters field returned by /api/show
|
||||
showBody string // full JSON body for /api/show (overrides showParams when set)
|
||||
wantWarning bool
|
||||
wantModelfile bool // true if warning should mention Modelfile
|
||||
}{
|
||||
@@ -1596,6 +1597,28 @@ func TestConfirmLowContextLength(t *testing.T) {
|
||||
statusCode: http.StatusOK,
|
||||
showParams: "SHOW_ERROR", // sentinel to make show return 500
|
||||
},
|
||||
{
|
||||
name: "no warning for safetensors model with high context length",
|
||||
models: []string{"qwen3.5"},
|
||||
statusBody: `{"cloud":{},"context_length":32768}`,
|
||||
statusCode: http.StatusOK,
|
||||
showBody: `{"details":{"format":"safetensors"},"model_info":{"qwen3_5_moe.context_length":262144}}`,
|
||||
},
|
||||
{
|
||||
name: "warns for safetensors model with low context length",
|
||||
models: []string{"small-model"},
|
||||
statusBody: `{"cloud":{},"context_length":32768}`,
|
||||
statusCode: http.StatusOK,
|
||||
showBody: `{"details":{"format":"safetensors"},"model_info":{"small.context_length":4096}}`,
|
||||
wantWarning: true,
|
||||
},
|
||||
{
|
||||
name: "no warning for safetensors model even when server context is low",
|
||||
models: []string{"qwen3.5"},
|
||||
statusBody: `{"cloud":{},"context_length":4096}`,
|
||||
statusCode: http.StatusOK,
|
||||
showBody: `{"details":{"format":"safetensors"},"model_info":{"qwen3_5_moe.context_length":262144}}`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
@@ -1614,7 +1637,11 @@ func TestConfirmLowContextLength(t *testing.T) {
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
fmt.Fprintf(w, `{"parameters":%q}`, tt.showParams)
|
||||
if tt.showBody != "" {
|
||||
fmt.Fprint(w, tt.showBody)
|
||||
} else {
|
||||
fmt.Fprintf(w, `{"parameters":%q}`, tt.showParams)
|
||||
}
|
||||
return
|
||||
}
|
||||
http.NotFound(w, r)
|
||||
@@ -1648,12 +1675,12 @@ func TestConfirmLowContextLength(t *testing.T) {
|
||||
t.Fatalf("expected warning=%v, got output: %q", tt.wantWarning, output)
|
||||
}
|
||||
if tt.wantWarning && tt.wantModelfile {
|
||||
if !strings.Contains(output, "Use the model:") {
|
||||
if !strings.Contains(output, "Use the base model") {
|
||||
t.Fatalf("expected parent model hint in output: %q", output)
|
||||
}
|
||||
}
|
||||
if tt.wantWarning && !tt.wantModelfile {
|
||||
if strings.Contains(output, "Use the model:") {
|
||||
if strings.Contains(output, "Use the base model") {
|
||||
t.Fatalf("expected server hint, not parent model hint in output: %q", output)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -479,16 +479,24 @@ func lowContextLength(ctx context.Context, client *api.Client, models []string)
|
||||
modelfileOverride := false
|
||||
var info *api.ShowResponse
|
||||
if info, err = client.Show(ctx, &api.ShowRequest{Model: m}); err == nil {
|
||||
// Safetensors (MLX) models always load at their full max context
|
||||
// length, so the server default num_ctx doesn't apply.
|
||||
if info.Details.Format == "safetensors" {
|
||||
// Context length check in case models with low context length are added
|
||||
if modelCtx := modelInfoContextLength(info.ModelInfo); modelCtx >= recommendedContextLength {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if numCtx := parseNumCtx(info.Parameters); numCtx > 0 {
|
||||
effectiveCtx = numCtx
|
||||
modelfileOverride = true
|
||||
}
|
||||
}
|
||||
if effectiveCtx < recommendedContextLength {
|
||||
fmt.Fprintf(os.Stderr, "\n%sWarning: context window is %d tokens (recommended: %d+)%s\n", ansiYellow, effectiveCtx, recommendedContextLength, ansiReset)
|
||||
fmt.Fprintf(os.Stderr, "\n%sWarning: %s has a context length of %d tokens, which is below the recommended %d.%s\n", ansiYellow, m, effectiveCtx, recommendedContextLength, ansiReset)
|
||||
if modelfileOverride {
|
||||
parentModel := info.Details.ParentModel
|
||||
fmt.Fprintf(os.Stderr, "%sUse the model: %s and increase the context length to at least %d in Ollama App Settings.%s\n\n", ansiYellow, parentModel, recommendedContextLength, ansiReset)
|
||||
fmt.Fprintf(os.Stderr, "%sUse the base model %s and increase the context length in Ollama App Settings.%s\n\n", ansiYellow, parentModel, ansiReset)
|
||||
} else {
|
||||
if runtime.GOOS == "windows" {
|
||||
fmt.Fprintf(os.Stderr, "%sIncrease it in Ollama App Settings or with $env:OLLAMA_CONTEXT_LENGTH=%d; ollama serve%s\n\n", ansiYellow, recommendedContextLength, ansiReset)
|
||||
@@ -515,6 +523,22 @@ func parseNumCtx(parameters string) int {
|
||||
return 0
|
||||
}
|
||||
|
||||
// modelInfoContextLength extracts the model's architectural context length
|
||||
// from the ModelInfo map (e.g. "qwen3_5_moe.context_length" → 262144).
|
||||
func modelInfoContextLength(modelInfo map[string]any) int {
|
||||
for k, v := range modelInfo {
|
||||
if strings.HasSuffix(k, ".context_length") {
|
||||
switch n := v.(type) {
|
||||
case float64:
|
||||
return int(n)
|
||||
case int:
|
||||
return n
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// TODO(parthsareen): this duplicates the pull progress UI in cmd.PullHandler.
|
||||
// Move the shared pull rendering to a small utility once the package boundary settles.
|
||||
func pullModel(ctx context.Context, client *api.Client, model string, insecure bool) error {
|
||||
|
||||
Reference in New Issue
Block a user