launch: skip context length warning for MLX models and show model name (#15102)

2026-04-18 16:23:27 +02:00 · 2026-03-27 15:01:33 -07:00
parent ac83ac20c4
commit b00bd1dfd4
2 changed files with 56 additions and 5 deletions
--- a/cmd/launch/models.go
+++ b/cmd/launch/models.go
@@ -479,16 +479,24 @@ func lowContextLength(ctx context.Context, client *api.Client, models []string)
 		modelfileOverride := false
 		var info *api.ShowResponse
 		if info, err = client.Show(ctx, &api.ShowRequest{Model: m}); err == nil {
+			// Safetensors (MLX) models always load at their full max context
+			// length, so the server default num_ctx doesn't apply.
+			if info.Details.Format == "safetensors" {
+				// Context length check in case models with low context length are added
+				if modelCtx := modelInfoContextLength(info.ModelInfo); modelCtx >= recommendedContextLength {
+					continue
+				}
+			}
 			if numCtx := parseNumCtx(info.Parameters); numCtx > 0 {
 				effectiveCtx = numCtx
 				modelfileOverride = true
 			}
 		}
 		if effectiveCtx < recommendedContextLength {
-			fmt.Fprintf(os.Stderr, "\n%sWarning: context window is %d tokens (recommended: %d+)%s\n", ansiYellow, effectiveCtx, recommendedContextLength, ansiReset)
+			fmt.Fprintf(os.Stderr, "\n%sWarning: %s has a context length of %d tokens, which is below the recommended %d.%s\n", ansiYellow, m, effectiveCtx, recommendedContextLength, ansiReset)
 			if modelfileOverride {
 				parentModel := info.Details.ParentModel
-				fmt.Fprintf(os.Stderr, "%sUse the model: %s and increase the context length to at least %d in Ollama App Settings.%s\n\n", ansiYellow, parentModel, recommendedContextLength, ansiReset)
+				fmt.Fprintf(os.Stderr, "%sUse the base model %s and increase the context length in Ollama App Settings.%s\n\n", ansiYellow, parentModel, ansiReset)
 			} else {
 				if runtime.GOOS == "windows" {
 					fmt.Fprintf(os.Stderr, "%sIncrease it in Ollama App Settings or with $env:OLLAMA_CONTEXT_LENGTH=%d; ollama serve%s\n\n", ansiYellow, recommendedContextLength, ansiReset)
@@ -515,6 +523,22 @@ func parseNumCtx(parameters string) int {
 	return 0
 }

+// modelInfoContextLength extracts the model's architectural context length
+// from the ModelInfo map (e.g. "qwen3_5_moe.context_length" → 262144).
+func modelInfoContextLength(modelInfo map[string]any) int {
+	for k, v := range modelInfo {
+		if strings.HasSuffix(k, ".context_length") {
+			switch n := v.(type) {
+			case float64:
+				return int(n)
+			case int:
+				return n
+			}
+		}
+	}
+	return 0
+}
+
 // TODO(parthsareen): this duplicates the pull progress UI in cmd.PullHandler.
 // Move the shared pull rendering to a small utility once the package boundary settles.
 func pullModel(ctx context.Context, client *api.Client, model string, insecure bool) error {