diff --git a/api/types.go b/api/types.go
index 7c03d272a..82caf17dc 100644
--- a/api/types.go
+++ b/api/types.go
@@ -841,8 +841,7 @@ type CloudStatus struct {
 
 // StatusResponse is the response from [Client.CloudStatusExperimental].
 type StatusResponse struct {
-	Cloud         CloudStatus `json:"cloud"`
-	ContextLength int         `json:"context_length,omitempty"`
+	Cloud CloudStatus `json:"cloud"`
 }
 
 // GenerateResponse is the response passed into [GenerateResponseFunc].
diff --git a/cmd/launch/launch.go b/cmd/launch/launch.go
index 8b54796a1..a44569ad4 100644
--- a/cmd/launch/launch.go
+++ b/cmd/launch/launch.go
@@ -472,10 +472,6 @@ func (c *launcherClient) launchSingleIntegration(ctx context.Context, name strin
 		return nil
 	}
 
-	if err := lowContextLength(ctx, c.apiClient, []string{target}); err != nil {
-		return err
-	}
-
 	if target != current {
 		if err := config.SaveIntegration(name, []string{target}); err != nil {
 			return fmt.Errorf("failed to save: %w", err)
@@ -504,10 +500,6 @@ func (c *launcherClient) launchEditorIntegration(ctx context.Context, name strin
 		return nil
 	}
 
-	if err := lowContextLength(ctx, c.apiClient, models); err != nil {
-		return err
-	}
-
 	if needsConfigure || req.ModelOverride != "" {
 		if err := prepareEditorIntegration(name, runner, editor, models); err != nil {
 			return err
diff --git a/cmd/launch/launch_test.go b/cmd/launch/launch_test.go
index 923d8ff8d..98c64cf91 100644
--- a/cmd/launch/launch_test.go
+++ b/cmd/launch/launch_test.go
@@ -1,7 +1,6 @@
 package launch
 
 import (
-	"bytes"
 	"context"
 	"encoding/json"
 	"fmt"
@@ -14,7 +13,6 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/cmd/config"
 )
 
@@ -1990,259 +1988,3 @@ func compareStringSlices(got, want [][]string) string {
 	}
 	return ""
 }
-
-func TestConfirmLowContextLength(t *testing.T) {
-	tests := []struct {
-		name          string
-		models        []string
-		statusBody    string
-		statusCode    int
-		showParams    string // Parameters field returned by /api/show
-		showBody      string // full JSON body for /api/show (overrides showParams when set)
-		wantWarning   bool
-		wantModelfile bool // true if warning should mention Modelfile
-	}{
-		{
-			name:       "no warning when server context meets recommended",
-			models:     []string{"llama3.2"},
-			statusBody: `{"cloud":{},"context_length":65536}`,
-			statusCode: http.StatusOK,
-		},
-		{
-			name:       "no warning when server context exceeds recommended",
-			models:     []string{"llama3.2"},
-			statusBody: `{"cloud":{},"context_length":131072}`,
-			statusCode: http.StatusOK,
-		},
-		{
-			name:        "warns when server context is below recommended",
-			models:      []string{"llama3.2"},
-			statusBody:  `{"cloud":{},"context_length":4096}`,
-			statusCode:  http.StatusOK,
-			wantWarning: true,
-		},
-		{
-			name:       "no warning when status endpoint fails",
-			models:     []string{"llama3.2"},
-			statusCode: http.StatusInternalServerError,
-		},
-		{
-			name:       "no warning for cloud-only models even with low context",
-			models:     []string{"gpt-4o:cloud"},
-			statusBody: `{"cloud":{},"context_length":4096}`,
-			statusCode: http.StatusOK,
-		},
-		{
-			name:       "no warning when models list is empty",
-			models:     []string{},
-			statusBody: `{"cloud":{},"context_length":4096}`,
-			statusCode: http.StatusOK,
-		},
-		{
-			name:       "no warning when modelfile num_ctx meets recommended",
-			models:     []string{"llama3.2"},
-			statusBody: `{"cloud":{},"context_length":4096}`,
-			statusCode: http.StatusOK,
-			showParams: "num_ctx                        65536",
-		},
-		{
-			name:       "no warning when modelfile num_ctx exceeds recommended",
-			models:     []string{"llama3.2"},
-			statusBody: `{"cloud":{},"context_length":4096}`,
-			statusCode: http.StatusOK,
-			showParams: "num_ctx                        131072",
-		},
-		{
-			name:          "warns with modelfile hint when modelfile num_ctx is below recommended",
-			models:        []string{"llama3.2"},
-			statusBody:    `{"cloud":{},"context_length":131072}`,
-			statusCode:    http.StatusOK,
-			showParams:    "num_ctx                        4096",
-			wantWarning:   true,
-			wantModelfile: true,
-		},
-		{
-			name:          "warns with modelfile hint when both server and modelfile are low",
-			models:        []string{"llama3.2"},
-			statusBody:    `{"cloud":{},"context_length":2048}`,
-			statusCode:    http.StatusOK,
-			showParams:    "num_ctx                        4096",
-			wantWarning:   true,
-			wantModelfile: true,
-		},
-		{
-			name:       "no warning when status returns malformed JSON",
-			models:     []string{"llama3.2"},
-			statusBody: `{invalid json`,
-			statusCode: http.StatusOK,
-		},
-		{
-			name:       "no warning when status returns empty body with 200",
-			models:     []string{"llama3.2"},
-			statusBody: "",
-			statusCode: http.StatusOK,
-		},
-		{
-			name:       "no warning when show endpoint fails",
-			models:     []string{"llama3.2"},
-			statusBody: `{"cloud":{},"context_length":65536}`,
-			statusCode: http.StatusOK,
-			showParams: "SHOW_ERROR", // sentinel to make show return 500
-		},
-		{
-			name:       "no warning for safetensors model with high context length",
-			models:     []string{"qwen3.5"},
-			statusBody: `{"cloud":{},"context_length":32768}`,
-			statusCode: http.StatusOK,
-			showBody:   `{"details":{"format":"safetensors"},"model_info":{"qwen3_5_moe.context_length":262144}}`,
-		},
-		{
-			name:        "warns for safetensors model with low context length",
-			models:      []string{"small-model"},
-			statusBody:  `{"cloud":{},"context_length":32768}`,
-			statusCode:  http.StatusOK,
-			showBody:    `{"details":{"format":"safetensors"},"model_info":{"small.context_length":4096}}`,
-			wantWarning: true,
-		},
-		{
-			name:       "no warning for safetensors model even when server context is low",
-			models:     []string{"qwen3.5"},
-			statusBody: `{"cloud":{},"context_length":4096}`,
-			statusCode: http.StatusOK,
-			showBody:   `{"details":{"format":"safetensors"},"model_info":{"qwen3_5_moe.context_length":262144}}`,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				if r.URL.Path == "/api/status" {
-					w.WriteHeader(tt.statusCode)
-					if tt.statusBody != "" {
-						fmt.Fprint(w, tt.statusBody)
-					}
-					return
-				}
-				if r.URL.Path == "/api/show" {
-					if tt.showParams == "SHOW_ERROR" {
-						w.WriteHeader(http.StatusInternalServerError)
-						return
-					}
-					w.WriteHeader(http.StatusOK)
-					if tt.showBody != "" {
-						fmt.Fprint(w, tt.showBody)
-					} else {
-						fmt.Fprintf(w, `{"parameters":%q}`, tt.showParams)
-					}
-					return
-				}
-				http.NotFound(w, r)
-			}))
-			defer srv.Close()
-			t.Setenv("OLLAMA_HOST", srv.URL)
-
-			client, err := newTestClient(srv.URL)
-			if err != nil {
-				t.Fatalf("failed to create client: %v", err)
-			}
-
-			// capture stderr
-			oldStderr := os.Stderr
-			r, w, _ := os.Pipe()
-			os.Stderr = w
-
-			err = lowContextLength(context.Background(), client, tt.models)
-
-			w.Close()
-			var buf bytes.Buffer
-			buf.ReadFrom(r)
-			os.Stderr = oldStderr
-			output := buf.String()
-
-			if err != nil {
-				t.Fatalf("unexpected error: %v", err)
-			}
-			hasWarning := strings.Contains(output, "Warning:")
-			if hasWarning != tt.wantWarning {
-				t.Fatalf("expected warning=%v, got output: %q", tt.wantWarning, output)
-			}
-			if tt.wantWarning && tt.wantModelfile {
-				if !strings.Contains(output, "Use the base model") {
-					t.Fatalf("expected parent model hint in output: %q", output)
-				}
-			}
-			if tt.wantWarning && !tt.wantModelfile {
-				if strings.Contains(output, "Use the base model") {
-					t.Fatalf("expected server hint, not parent model hint in output: %q", output)
-				}
-			}
-		})
-	}
-}
-
-func TestParseNumCtxFromParameters(t *testing.T) {
-	tests := []struct {
-		name       string
-		parameters string
-		want       int
-	}{
-		{
-			name:       "extracts num_ctx",
-			parameters: "num_ctx                        65536",
-			want:       65536,
-		},
-		{
-			name:       "extracts num_ctx among other parameters",
-			parameters: "temperature                    0.7\nnum_ctx                        131072\nstop                           \"<|end|>\"",
-			want:       131072,
-		},
-		{
-			name:       "returns zero when no num_ctx",
-			parameters: "temperature                    0.7\nstop                           \"<|end|>\"",
-			want:       0,
-		},
-		{
-			name:       "returns zero for empty string",
-			parameters: "",
-			want:       0,
-		},
-		{
-			name:       "handles float representation",
-			parameters: "num_ctx                        65536.0",
-			want:       65536,
-		},
-		{
-			name:       "returns zero when num_ctx value is not a number",
-			parameters: "num_ctx                        abc",
-			want:       0,
-		},
-		{
-			name:       "returns zero for completely garbled input",
-			parameters: "!@#$%^&*()_+{}|:<>?",
-			want:       0,
-		},
-		{
-			name:       "returns zero when num_ctx has no value",
-			parameters: "num_ctx",
-			want:       0,
-		},
-		{
-			name:       "returns zero when num_ctx has extra fields",
-			parameters: "num_ctx 65536 extra_stuff",
-			want:       0,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got := parseNumCtx(tt.parameters)
-			if got != tt.want {
-				t.Fatalf("parseNumCtx(%q) = %d, want %d", tt.parameters, got, tt.want)
-			}
-		})
-	}
-}
-
-func newTestClient(url string) (*api.Client, error) {
-	return api.ClientFromEnvironment()
-}
diff --git a/cmd/launch/models.go b/cmd/launch/models.go
index 7452fc2cd..d541eb4ae 100644
--- a/cmd/launch/models.go
+++ b/cmd/launch/models.go
@@ -9,7 +9,6 @@ import (
 	"os/exec"
 	"runtime"
 	"slices"
-	"strconv"
 	"strings"
 	"time"
 
@@ -444,101 +443,6 @@ func cloudStatusDisabled(ctx context.Context, client *api.Client) (disabled bool
 	return status.Cloud.Disabled, true
 }
 
-// TODO(ParthSareen): make this controllable on an integration level as well
-const recommendedContextLength = 64000
-
-func hasLocalModel(models []string) bool {
-	for _, m := range models {
-		if !isCloudModelName(m) {
-			return true
-		}
-	}
-	return false
-}
-
-func lowContextLength(ctx context.Context, client *api.Client, models []string) error {
-	if !hasLocalModel(models) {
-		return nil
-	}
-
-	status, err := client.CloudStatusExperimental(ctx)
-	if err != nil {
-		return nil //nolint:nilerr // best-effort check; ignore if status endpoint is unavailable
-	}
-	serverCtx := status.ContextLength
-	if serverCtx == 0 {
-		return nil // couldn't determine context length, skip check
-	}
-
-	for _, m := range models {
-		if isCloudModelName(m) {
-			continue
-		}
-		// A Modelfile can override num_ctx, which takes precedence over the server default.
-		effectiveCtx := serverCtx
-		modelfileOverride := false
-		var info *api.ShowResponse
-		if info, err = client.Show(ctx, &api.ShowRequest{Model: m}); err == nil {
-			// Safetensors (MLX) models always load at their full max context
-			// length, so the server default num_ctx doesn't apply.
-			if info.Details.Format == "safetensors" {
-				// Context length check in case models with low context length are added
-				if modelCtx := modelInfoContextLength(info.ModelInfo); modelCtx >= recommendedContextLength {
-					continue
-				}
-			}
-			if numCtx := parseNumCtx(info.Parameters); numCtx > 0 {
-				effectiveCtx = numCtx
-				modelfileOverride = true
-			}
-		}
-		if effectiveCtx < recommendedContextLength {
-			fmt.Fprintf(os.Stderr, "\n%sWarning: %s has a context length of %d tokens, which is below the recommended %d.%s\n", ansiYellow, m, effectiveCtx, recommendedContextLength, ansiReset)
-			if modelfileOverride {
-				parentModel := info.Details.ParentModel
-				fmt.Fprintf(os.Stderr, "%sUse the base model %s and increase the context length in Ollama App Settings.%s\n\n", ansiYellow, parentModel, ansiReset)
-			} else {
-				if runtime.GOOS == "windows" {
-					fmt.Fprintf(os.Stderr, "%sIncrease it in Ollama App Settings or with $env:OLLAMA_CONTEXT_LENGTH=%d; ollama serve%s\n\n", ansiYellow, recommendedContextLength, ansiReset)
-				} else {
-					fmt.Fprintf(os.Stderr, "%sIncrease it in Ollama App Settings or with OLLAMA_CONTEXT_LENGTH=%d ollama serve%s\n\n", ansiYellow, recommendedContextLength, ansiReset)
-				}
-			}
-			return nil
-		}
-	}
-	return nil
-}
-
-// parseNumCtx extracts num_ctx from the Show response Parameters string.
-func parseNumCtx(parameters string) int {
-	for _, line := range strings.Split(parameters, "\n") {
-		fields := strings.Fields(line)
-		if len(fields) == 2 && fields[0] == "num_ctx" {
-			if v, err := strconv.ParseFloat(fields[1], 64); err == nil {
-				return int(v)
-			}
-		}
-	}
-	return 0
-}
-
-// modelInfoContextLength extracts the model's architectural context length
-// from the ModelInfo map (e.g. "qwen3_5_moe.context_length" → 262144).
-func modelInfoContextLength(modelInfo map[string]any) int {
-	for k, v := range modelInfo {
-		if strings.HasSuffix(k, ".context_length") {
-			switch n := v.(type) {
-			case float64:
-				return int(n)
-			case int:
-				return n
-			}
-		}
-	}
-	return 0
-}
-
 // TODO(parthsareen): this duplicates the pull progress UI in cmd.PullHandler.
 // Move the shared pull rendering to a small utility once the package boundary settles.
 func pullModel(ctx context.Context, client *api.Client, model string, insecure bool) error {
diff --git a/server/routes.go b/server/routes.go
index 97796b066..28384ed42 100644
--- a/server/routes.go
+++ b/server/routes.go
@@ -1937,19 +1937,11 @@ func streamResponse(c *gin.Context, ch chan any) {
 
 func (s *Server) StatusHandler(c *gin.Context) {
 	disabled, source := internalcloud.Status()
-
-	contextLength := int(envconfig.ContextLength())
-	if contextLength == 0 {
-		slog.Warn("OLLAMA_CONTEXT_LENGTH is not set, using default", "default", s.defaultNumCtx)
-		contextLength = s.defaultNumCtx
-	}
-
 	c.JSON(http.StatusOK, api.StatusResponse{
 		Cloud: api.CloudStatus{
 			Disabled: disabled,
 			Source:   source,
 		},
-		ContextLength: contextLength,
 	})
 }
 
diff --git a/server/routes_cloud_test.go b/server/routes_cloud_test.go
index eb6050b64..aaaf5b73d 100644
--- a/server/routes_cloud_test.go
+++ b/server/routes_cloud_test.go
@@ -44,62 +44,6 @@ func TestStatusHandler(t *testing.T) {
 	}
 }
 
-func TestStatusHandlerContextLength(t *testing.T) {
-	gin.SetMode(gin.TestMode)
-	setTestHome(t, t.TempDir())
-
-	tests := []struct {
-		name           string
-		envContextLen  string
-		defaultNumCtx  int
-		wantContextLen int
-	}{
-		{
-			name:           "env var takes precedence over VRAM default",
-			envContextLen:  "8192",
-			defaultNumCtx:  32768,
-			wantContextLen: 8192,
-		},
-		{
-			name:           "falls back to VRAM default when env not set",
-			envContextLen:  "",
-			defaultNumCtx:  32768,
-			wantContextLen: 32768,
-		},
-		{
-			name:           "zero when neither is set",
-			envContextLen:  "",
-			defaultNumCtx:  0,
-			wantContextLen: 0,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			if tt.envContextLen != "" {
-				t.Setenv("OLLAMA_CONTEXT_LENGTH", tt.envContextLen)
-			} else {
-				t.Setenv("OLLAMA_CONTEXT_LENGTH", "")
-			}
-
-			s := Server{defaultNumCtx: tt.defaultNumCtx}
-			w := createRequest(t, s.StatusHandler, nil)
-			if w.Code != http.StatusOK {
-				t.Fatalf("expected status 200, got %d", w.Code)
-			}
-
-			var resp api.StatusResponse
-			if err := json.NewDecoder(w.Body).Decode(&resp); err != nil {
-				t.Fatal(err)
-			}
-
-			if resp.ContextLength != tt.wantContextLen {
-				t.Fatalf("expected context_length %d, got %d", tt.wantContextLen, resp.ContextLength)
-			}
-		})
-	}
-}
-
 func TestCloudDisabledBlocksRemoteOperations(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	setTestHome(t, t.TempDir())