Merge pull request #42 from ollama/jmorganca/gemma4-ggml-improvements

gemma4: fix MoE fused gate_up split and multiline tool-call arg parsing
use 4096 kvcache.NewSWAMemCache
2026-04-25 10:16:00 +02:00 · 2026-04-02 07:16:06 -07:00 · 2026-04-02 01:56:36 -07:00 · 2026-04-02 01:43:14 -07:00 · 2026-04-02 01:33:46 -07:00 · 2026-04-02 01:28:10 -07:00
164 changed files with 16378 additions and 1046 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -424,6 +424,7 @@ jobs:
              lib/ollama/cuda_v*)        echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/vulkan*)        echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/mlx*)           echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/include*)       echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
              lib/ollama/cuda_jetpack5)  echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
              lib/ollama/cuda_jetpack6)  echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
              lib/ollama/rocm)           echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -64,6 +64,7 @@ jobs:
            container: nvidia/cuda:13.0.0-devel-ubuntu22.04
            extra-packages: libcudnn9-dev-cuda-13 libopenblas-dev liblapack-dev liblapacke-dev git curl
            flags: '-DCMAKE_CUDA_ARCHITECTURES=87 -DBLAS_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu -DLAPACK_INCLUDE_DIRS=/usr/include/x86_64-linux-gnu'
            install-go: true
    runs-on: linux
    container: ${{ matrix.container }}
    steps:
@@ -90,6 +91,12 @@ jobs:
          fi
        env:
          DEBIAN_FRONTEND: noninteractive
      - if: matrix.install-go
        name: Install Go
        run: |
          GO_VERSION=$(awk '/^go / { print $2 }' go.mod)
          curl -fsSL "https://golang.org/dl/go${GO_VERSION}.linux-$(dpkg --print-architecture).tar.gz" | tar xz -C /usr/local
          echo "/usr/local/go/bin" >> $GITHUB_PATH
      - uses: actions/cache@v4
        with:
          path: /github/home/.cache/ccache
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -246,13 +246,21 @@ if(MLX_ENGINE)
            COMPONENT MLX)
    endif()
-    # Install CCCL headers for NVRTC JIT compilation at runtime.
+    # Install headers for NVRTC JIT compilation at runtime.
    # MLX's own install rules use the default component so they get skipped by
    # --component MLX. Headers are installed alongside libmlx in OLLAMA_INSTALL_DIR.
    #
    # Layout:
    #   ${OLLAMA_INSTALL_DIR}/include/cccl/{cuda,nv}/  — CCCL headers
    #   ${OLLAMA_INSTALL_DIR}/include/*.h               — CUDA toolkit headers
    #
    # MLX's jit_module.cpp resolves CCCL via
    #   current_binary_dir()[.parent_path()] / "include" / "cccl"
    # On Linux, MLX's jit_module.cpp resolves CCCL via
-    # current_binary_dir().parent_path() / "include" / "cccl", so we create a
+    #   current_binary_dir().parent_path() / "include" / "cccl", so we create a
-    # symlink from lib/ollama/include -> ${OLLAMA_RUNNER_DIR}/include
+    #   symlink from lib/ollama/include -> ${OLLAMA_RUNNER_DIR}/include
-    # This will need refinement if we add multiple CUDA versions for MLX in the future.
+    #   This will need refinement if we add multiple CUDA versions for MLX in the future.
    # CUDA runtime headers are found via CUDA_PATH env var (set by mlxrunner).
    if(EXISTS ${CMAKE_BINARY_DIR}/_deps/cccl-src/include/cuda)
        install(DIRECTORY ${CMAKE_BINARY_DIR}/_deps/cccl-src/include/cuda
            DESTINATION ${OLLAMA_INSTALL_DIR}/include/cccl
@@ -271,6 +279,61 @@ if(MLX_ENGINE)
        endif()
    endif()
    # Install minimal CUDA toolkit headers needed by MLX JIT kernels.
    # These are the transitive closure of includes from mlx/backend/cuda/device/*.cuh.
    # The Go mlxrunner sets CUDA_PATH to OLLAMA_INSTALL_DIR so MLX finds them at
    # $CUDA_PATH/include/*.h via NVRTC --include-path.
    if(CUDAToolkit_FOUND)
        # CUDAToolkit_INCLUDE_DIRS may be a semicolon-separated list
        # (e.g. ".../include;.../include/cccl"). Find the entry that
        # contains the CUDA runtime headers we need.
        set(_cuda_inc "")
        foreach(_dir ${CUDAToolkit_INCLUDE_DIRS})
            if(EXISTS "${_dir}/cuda_runtime_api.h")
                set(_cuda_inc "${_dir}")
                break()
            endif()
        endforeach()
        if(NOT _cuda_inc)
            message(WARNING "Could not find cuda_runtime_api.h in CUDAToolkit_INCLUDE_DIRS: ${CUDAToolkit_INCLUDE_DIRS}")
        else()
            set(_dst "${OLLAMA_INSTALL_DIR}/include")
            set(_MLX_JIT_CUDA_HEADERS
                builtin_types.h
                cooperative_groups.h
                cuda_bf16.h
                cuda_bf16.hpp
                cuda_device_runtime_api.h
                cuda_fp16.h
                cuda_fp16.hpp
                cuda_fp8.h
                cuda_fp8.hpp
                cuda_runtime_api.h
                device_types.h
                driver_types.h
                math_constants.h
                surface_types.h
                texture_types.h
                vector_functions.h
                vector_functions.hpp
                vector_types.h
            )
            foreach(_hdr ${_MLX_JIT_CUDA_HEADERS})
                install(FILES "${_cuda_inc}/${_hdr}"
                    DESTINATION ${_dst}
                    COMPONENT MLX)
            endforeach()
            # Subdirectory headers
            install(DIRECTORY "${_cuda_inc}/cooperative_groups"
                DESTINATION ${_dst}
                COMPONENT MLX
                FILES_MATCHING PATTERN "*.h")
            install(FILES "${_cuda_inc}/crt/host_defines.h"
                DESTINATION "${_dst}/crt"
                COMPONENT MLX)
        endif()
    endif()
    # On Windows, explicitly install dl.dll (dlfcn-win32 POSIX dlopen emulation)
    # RUNTIME_DEPENDENCIES auto-excludes it via POST_EXCLUDE_FILES_STRICT because
    # dlfcn-win32 is a known CMake target with its own install rules (which install
--- a/2
+++ b/2
@@ -157,7 +157,7 @@ COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 COPY x/imagegen/mlx x/imagegen/mlx
 COPY go.mod go.sum .
-COPY MLX_VERSION MLX_CORE_VERSION .
+COPY MLX_VERSION MLX_C_VERSION .
 RUN curl -fsSL https://golang.org/dl/go$(awk '/^go/ { print $2 }' go.mod).linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
 ENV PATH=/usr/local/go/bin:$PATH
 RUN go mod download
--- a/1
+++ b/1
@@ -1 +0,0 @@
 v0.30.6
--- a/1
+++ b/1
@@ -0,0 +1 @@
 0726ca922fc902c4c61ef9c27d94132be418e945
--- a/2
+++ b/2
@@ -1 +1 @@
-v0.5.0
+38ad257088fb2193ad47e527cf6534a689f30943
--- a/anthropic/anthropic.go
+++ b/anthropic/anthropic.go
@@ -68,7 +68,7 @@ type MessagesRequest struct {
 	Model         string          `json:"model"`
 	MaxTokens     int             `json:"max_tokens"`
 	Messages      []MessageParam  `json:"messages"`
-	System        any             `json:"system,omitempty"` // string or []ContentBlock
+	System        any             `json:"system,omitempty"` // string or []map[string]any (JSON-decoded ContentBlock)
 	Stream        bool            `json:"stream,omitempty"`
 	Temperature   *float64        `json:"temperature,omitempty"`
 	TopP          *float64        `json:"top_p,omitempty"`
@@ -82,8 +82,27 @@ type MessagesRequest struct {
 // MessageParam represents a message in the request
 type MessageParam struct {
-	Role    string `json:"role"`    // "user" or "assistant"
+	Role    string         `json:"role"`    // "user" or "assistant"
-	Content any    `json:"content"` // string or []ContentBlock
+	Content []ContentBlock `json:"content"` // always []ContentBlock; plain strings are normalized on unmarshal
 }
 func (m *MessageParam) UnmarshalJSON(data []byte) error {
 	var raw struct {
 		Role    string          `json:"role"`
 		Content json.RawMessage `json:"content"`
 	}
 	if err := json.Unmarshal(data, &raw); err != nil {
 		return err
 	}
 	m.Role = raw.Role
 	var s string
 	if err := json.Unmarshal(raw.Content, &s); err == nil {
 		m.Content = []ContentBlock{{Type: "text", Text: &s}}
 		return nil
 	}
 	return json.Unmarshal(raw.Content, &m.Content)
 }
 // ContentBlock represents a content block in a message.
@@ -102,9 +121,9 @@ type ContentBlock struct {
 	Source *ImageSource `json:"source,omitempty"`
 	// For tool_use and server_tool_use blocks
-	ID    string `json:"id,omitempty"`
+	ID    string                        `json:"id,omitempty"`
-	Name  string `json:"name,omitempty"`
+	Name  string                        `json:"name,omitempty"`
-	Input any    `json:"input,omitempty"`
+	Input api.ToolCallFunctionArguments `json:"input,omitzero"`
 	// For tool_result and web_search_tool_result blocks
 	ToolUseID string `json:"tool_use_id,omitempty"`
@@ -377,178 +396,145 @@ func convertMessage(msg MessageParam) ([]api.Message, error) {
 	var messages []api.Message
 	role := strings.ToLower(msg.Role)
-	switch content := msg.Content.(type) {
+	var textContent strings.Builder
-	case string:
+	var images []api.ImageData
-		messages = append(messages, api.Message{Role: role, Content: content})
+	var toolCalls []api.ToolCall
 	var thinking string
 	var toolResults []api.Message
 	textBlocks := 0
 	imageBlocks := 0
 	toolUseBlocks := 0
 	toolResultBlocks := 0
 	serverToolUseBlocks := 0
 	webSearchToolResultBlocks := 0
 	thinkingBlocks := 0
 	unknownBlocks := 0
-	case []any:
+	for _, block := range msg.Content {
-		var textContent strings.Builder
+		switch block.Type {
-		var images []api.ImageData
+		case "text":
-		var toolCalls []api.ToolCall
+			textBlocks++
-		var thinking string
+			if block.Text != nil {
-		var toolResults []api.Message
+				textContent.WriteString(*block.Text)
 		textBlocks := 0
 		imageBlocks := 0
 		toolUseBlocks := 0
 		toolResultBlocks := 0
 		serverToolUseBlocks := 0
 		webSearchToolResultBlocks := 0
 		thinkingBlocks := 0
 		unknownBlocks := 0
 		for _, block := range content {
 			blockMap, ok := block.(map[string]any)
 			if !ok {
 				logutil.Trace("anthropic: invalid content block format", "role", role)
 				return nil, errors.New("invalid content block format")
 			}
-			blockType, _ := blockMap["type"].(string)
+		case "image":
 			imageBlocks++
 			if block.Source == nil {
 				logutil.Trace("anthropic: invalid image source", "role", role)
 				return nil, errors.New("invalid image source")
 			}
-			switch blockType {
+			if block.Source.Type == "base64" {
-			case "text":
+				decoded, err := base64.StdEncoding.DecodeString(block.Source.Data)
-				textBlocks++
+				if err != nil {
-				if text, ok := blockMap["text"].(string); ok {
+					logutil.Trace("anthropic: invalid base64 image data", "role", role, "error", err)
-					textContent.WriteString(text)
+					return nil, fmt.Errorf("invalid base64 image data: %w", err)
 				}
 				images = append(images, decoded)
 			} else {
 				logutil.Trace("anthropic: unsupported image source type", "role", role, "source_type", block.Source.Type)
 				return nil, fmt.Errorf("invalid image source type: %s. Only base64 images are supported.", block.Source.Type)
 			}
-			case "image":
+		case "tool_use":
-				imageBlocks++
+			toolUseBlocks++
-				source, ok := blockMap["source"].(map[string]any)
+			if block.ID == "" {
-				if !ok {
+				logutil.Trace("anthropic: tool_use block missing id", "role", role)
-					logutil.Trace("anthropic: invalid image source", "role", role)
+				return nil, errors.New("tool_use block missing required 'id' field")
-					return nil, errors.New("invalid image source")
+			}
-				}
+			if block.Name == "" {
 				logutil.Trace("anthropic: tool_use block missing name", "role", role)
 				return nil, errors.New("tool_use block missing required 'name' field")
 			}
 			toolCalls = append(toolCalls, api.ToolCall{
 				ID: block.ID,
 				Function: api.ToolCallFunction{
 					Name:      block.Name,
 					Arguments: block.Input,
 				},
 			})
-				sourceType, _ := source["type"].(string)
+		case "tool_result":
-				if sourceType == "base64" {
+			toolResultBlocks++
-					data, _ := source["data"].(string)
+			var resultContent string
 					decoded, err := base64.StdEncoding.DecodeString(data)
 					if err != nil {
 						logutil.Trace("anthropic: invalid base64 image data", "role", role, "error", err)
 						return nil, fmt.Errorf("invalid base64 image data: %w", err)
 					}
 					images = append(images, decoded)
 				} else {
 					logutil.Trace("anthropic: unsupported image source type", "role", role, "source_type", sourceType)
 					return nil, fmt.Errorf("invalid image source type: %s. Only base64 images are supported.", sourceType)
 				}
 				// URL images would need to be fetched - skip for now
-			case "tool_use":
+			switch c := block.Content.(type) {
-				toolUseBlocks++
+			case string:
-				id, ok := blockMap["id"].(string)
+				resultContent = c
-				if !ok {
+			case []any:
-					logutil.Trace("anthropic: tool_use block missing id", "role", role)
+				for _, cb := range c {
-					return nil, errors.New("tool_use block missing required 'id' field")
+					if cbMap, ok := cb.(map[string]any); ok {
-				}
+						if cbMap["type"] == "text" {
-				name, ok := blockMap["name"].(string)
+							if text, ok := cbMap["text"].(string); ok {
-				if !ok {
+								resultContent += text
 					logutil.Trace("anthropic: tool_use block missing name", "role", role)
 					return nil, errors.New("tool_use block missing required 'name' field")
 				}
 				tc := api.ToolCall{
 					ID: id,
 					Function: api.ToolCallFunction{
 						Name: name,
 					},
 				}
 				if input, ok := blockMap["input"].(map[string]any); ok {
 					tc.Function.Arguments = mapToArgs(input)
 				}
 				toolCalls = append(toolCalls, tc)
 			case "tool_result":
 				toolResultBlocks++
 				toolUseID, _ := blockMap["tool_use_id"].(string)
 				var resultContent string
 				switch c := blockMap["content"].(type) {
 				case string:
 					resultContent = c
 				case []any:
 					for _, cb := range c {
 						if cbMap, ok := cb.(map[string]any); ok {
 							if cbMap["type"] == "text" {
 								if text, ok := cbMap["text"].(string); ok {
 									resultContent += text
 								}
 							}
 						}
 					}
 				}
 				toolResults = append(toolResults, api.Message{
 					Role:       "tool",
 					Content:    resultContent,
 					ToolCallID: toolUseID,
 				})
 			case "thinking":
 				thinkingBlocks++
 				if t, ok := blockMap["thinking"].(string); ok {
 					thinking = t
 				}
 			case "server_tool_use":
 				serverToolUseBlocks++
 				id, _ := blockMap["id"].(string)
 				name, _ := blockMap["name"].(string)
 				tc := api.ToolCall{
 					ID: id,
 					Function: api.ToolCallFunction{
 						Name: name,
 					},
 				}
 				if input, ok := blockMap["input"].(map[string]any); ok {
 					tc.Function.Arguments = mapToArgs(input)
 				}
 				toolCalls = append(toolCalls, tc)
 			case "web_search_tool_result":
 				webSearchToolResultBlocks++
 				toolUseID, _ := blockMap["tool_use_id"].(string)
 				toolResults = append(toolResults, api.Message{
 					Role:       "tool",
 					Content:    formatWebSearchToolResultContent(blockMap["content"]),
 					ToolCallID: toolUseID,
 				})
 			default:
 				unknownBlocks++
 			}
 		}
-		if textContent.Len() > 0 || len(images) > 0 || len(toolCalls) > 0 || thinking != "" {
+			toolResults = append(toolResults, api.Message{
-			m := api.Message{
+				Role:       "tool",
-				Role:      role,
+				Content:    resultContent,
-				Content:   textContent.String(),
+				ToolCallID: block.ToolUseID,
-				Images:    images,
+			})
-				ToolCalls: toolCalls,
+
-				Thinking:  thinking,
+		case "thinking":
 			thinkingBlocks++
 			if block.Thinking != nil {
 				thinking = *block.Thinking
 			}
-			messages = append(messages, m)
+
 		case "server_tool_use":
 			serverToolUseBlocks++
 			toolCalls = append(toolCalls, api.ToolCall{
 				ID: block.ID,
 				Function: api.ToolCallFunction{
 					Name:      block.Name,
 					Arguments: block.Input,
 				},
 			})
 		case "web_search_tool_result":
 			webSearchToolResultBlocks++
 			toolResults = append(toolResults, api.Message{
 				Role:       "tool",
 				Content:    formatWebSearchToolResultContent(block.Content),
 				ToolCallID: block.ToolUseID,
 			})
 		default:
 			unknownBlocks++
 		}
 		// Add tool results as separate messages
 		messages = append(messages, toolResults...)
 		logutil.Trace("anthropic: converted block message",
 			"role", role,
 			"blocks", len(content),
 			"text", textBlocks,
 			"image", imageBlocks,
 			"tool_use", toolUseBlocks,
 			"tool_result", toolResultBlocks,
 			"server_tool_use", serverToolUseBlocks,
 			"web_search_result", webSearchToolResultBlocks,
 			"thinking", thinkingBlocks,
 			"unknown", unknownBlocks,
 			"messages", TraceAPIMessages(messages),
 		)
 	default:
 		return nil, fmt.Errorf("invalid message content type: %T", content)
 	}
 	if textContent.Len() > 0 || len(images) > 0 || len(toolCalls) > 0 || thinking != "" {
 		m := api.Message{
 			Role:      role,
 			Content:   textContent.String(),
 			Images:    images,
 			ToolCalls: toolCalls,
 			Thinking:  thinking,
 		}
 		messages = append(messages, m)
 	}
 	// Add tool results as separate messages
 	messages = append(messages, toolResults...)
 	logutil.Trace("anthropic: converted block message",
 		"role", role,
 		"blocks", len(msg.Content),
 		"text", textBlocks,
 		"image", imageBlocks,
 		"tool_use", toolUseBlocks,
 		"tool_result", toolResultBlocks,
 		"server_tool_use", serverToolUseBlocks,
 		"web_search_result", webSearchToolResultBlocks,
 		"thinking", thinkingBlocks,
 		"unknown", unknownBlocks,
 		"messages", TraceAPIMessages(messages),
 	)
 	return messages, nil
 }
@@ -882,7 +868,6 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent {
 			slog.Error("failed to marshal tool arguments", "error", err, "tool_id", tc.ID)
 			continue
 		}
 		events = append(events, StreamEvent{
 			Event: "content_block_start",
 			Data: ContentBlockStartEvent{
@@ -892,7 +877,7 @@ func (c *StreamConverter) Process(r api.ChatResponse) []StreamEvent {
 					Type:  "tool_use",
 					ID:    tc.ID,
 					Name:  tc.Function.Name,
-					Input: map[string]any{},
+					Input: api.NewToolCallFunctionArguments(),
 				},
 			},
 		})
@@ -989,15 +974,6 @@ func ptr(s string) *string {
 	return &s
 }
 // mapToArgs converts a map to ToolCallFunctionArguments
 func mapToArgs(m map[string]any) api.ToolCallFunctionArguments {
 	args := api.NewToolCallFunctionArguments()
 	for k, v := range m {
 		args.Set(k, v)
 	}
 	return args
 }
 // CountTokensRequest represents an Anthropic count_tokens request
 type CountTokensRequest struct {
 	Model    string          `json:"model"`
@@ -1030,17 +1006,13 @@ func estimateTokens(req CountTokensRequest) int {
 	var totalLen int
 	// Count system prompt
-	if req.System != nil {
+	totalLen += countAnyContent(req.System)
 		totalLen += countAnyContent(req.System)
 	}
 	// Count messages
 	for _, msg := range req.Messages {
 		// Count role (always present)
 		totalLen += len(msg.Role)
 		// Count content
-		contentLen := countAnyContent(msg.Content)
+		totalLen += countAnyContent(msg.Content)
 		totalLen += contentLen
 	}
 	for _, tool := range req.Tools {
@@ -1063,12 +1035,25 @@ func countAnyContent(content any) int {
 	switch c := content.(type) {
 	case string:
 		return len(c)
-	case []any:
+	case []ContentBlock:
 		total := 0
 		for _, block := range c {
 			total += countContentBlock(block)
 		}
 		return total
 	case []any:
 		total := 0
 		for _, item := range c {
 			data, err := json.Marshal(item)
 			if err != nil {
 				continue
 			}
 			var block ContentBlock
 			if err := json.Unmarshal(data, &block); err == nil {
 				total += countContentBlock(block)
 			}
 		}
 		return total
 	default:
 		if data, err := json.Marshal(content); err == nil {
 			return len(data)
@@ -1077,38 +1062,19 @@ func countAnyContent(content any) int {
 	}
 }
-func countContentBlock(block any) int {
+func countContentBlock(block ContentBlock) int {
 	blockMap, ok := block.(map[string]any)
 	if !ok {
 		if s, ok := block.(string); ok {
 			return len(s)
 		}
 		return 0
 	}
 	total := 0
-	blockType, _ := blockMap["type"].(string)
+	if block.Text != nil {
-
+		total += len(*block.Text)
 	if text, ok := blockMap["text"].(string); ok {
 		total += len(text)
 	}
-
+	if block.Thinking != nil {
-	if thinking, ok := blockMap["thinking"].(string); ok {
+		total += len(*block.Thinking)
 		total += len(thinking)
 	}
-
+	if block.Type == "tool_use" || block.Type == "tool_result" {
-	if blockType == "tool_use" {
+		if data, err := json.Marshal(block); err == nil {
 		if data, err := json.Marshal(blockMap); err == nil {
 			total += len(data)
 		}
 	}
 	if blockType == "tool_result" {
 		if data, err := json.Marshal(blockMap); err == nil {
 			total += len(data)
 		}
 	}
 	return total
 }
--- a/anthropic/anthropic_test.go
+++ b/anthropic/anthropic_test.go
@@ -15,11 +15,16 @@ const (
 	testImage = `iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=`
 )
-// testArgs creates ToolCallFunctionArguments from a map (convenience function for tests)
+// textContent is a convenience for constructing []ContentBlock with a single text block in tests.
-func testArgs(m map[string]any) api.ToolCallFunctionArguments {
+func textContent(s string) []ContentBlock {
 	return []ContentBlock{{Type: "text", Text: &s}}
 }
 // makeArgs creates ToolCallFunctionArguments from key-value pairs (convenience function for tests)
 func makeArgs(kvs ...any) api.ToolCallFunctionArguments {
 	args := api.NewToolCallFunctionArguments()
-	for k, v := range m {
+	for i := 0; i < len(kvs)-1; i += 2 {
-		args.Set(k, v)
+		args.Set(kvs[i].(string), kvs[i+1])
 	}
 	return args
 }
@@ -29,7 +34,7 @@ func TestFromMessagesRequest_Basic(t *testing.T) {
 		Model:     "test-model",
 		MaxTokens: 1024,
 		Messages: []MessageParam{
-			{Role: "user", Content: "Hello"},
+			{Role: "user", Content: textContent("Hello")},
 		},
 	}
@@ -61,7 +66,7 @@ func TestFromMessagesRequest_WithSystemPrompt(t *testing.T) {
 		MaxTokens: 1024,
 		System:    "You are a helpful assistant.",
 		Messages: []MessageParam{
-			{Role: "user", Content: "Hello"},
+			{Role: "user", Content: textContent("Hello")},
 		},
 	}
@@ -88,7 +93,7 @@ func TestFromMessagesRequest_WithSystemPromptArray(t *testing.T) {
 			map[string]any{"type": "text", "text": " Be concise."},
 		},
 		Messages: []MessageParam{
-			{Role: "user", Content: "Hello"},
+			{Role: "user", Content: textContent("Hello")},
 		},
 	}
@@ -113,7 +118,7 @@ func TestFromMessagesRequest_WithOptions(t *testing.T) {
 	req := MessagesRequest{
 		Model:         "test-model",
 		MaxTokens:     2048,
-		Messages:      []MessageParam{{Role: "user", Content: "Hello"}},
+		Messages:      []MessageParam{{Role: "user", Content: textContent("Hello")}},
 		Temperature:   &temp,
 		TopP:          &topP,
 		TopK:          &topK,
@@ -148,14 +153,14 @@ func TestFromMessagesRequest_WithImage(t *testing.T) {
 		Messages: []MessageParam{
 			{
 				Role: "user",
-				Content: []any{
+				Content: []ContentBlock{
-					map[string]any{"type": "text", "text": "What's in this image?"},
+					{Type: "text", Text: ptr("What's in this image?")},
-					map[string]any{
+					{
-						"type": "image",
+						Type: "image",
-						"source": map[string]any{
+						Source: &ImageSource{
-							"type":       "base64",
+							Type:      "base64",
-							"media_type": "image/png",
+							MediaType: "image/png",
-							"data":       testImage,
+							Data:      testImage,
 						},
 					},
 				},
@@ -190,15 +195,15 @@ func TestFromMessagesRequest_WithToolUse(t *testing.T) {
 		Model:     "test-model",
 		MaxTokens: 1024,
 		Messages: []MessageParam{
-			{Role: "user", Content: "What's the weather in Paris?"},
+			{Role: "user", Content: textContent("What's the weather in Paris?")},
 			{
 				Role: "assistant",
-				Content: []any{
+				Content: []ContentBlock{
-					map[string]any{
+					{
-						"type":  "tool_use",
+						Type:  "tool_use",
-						"id":    "call_123",
+						ID:    "call_123",
-						"name":  "get_weather",
+						Name:  "get_weather",
-						"input": map[string]any{"location": "Paris"},
+						Input: makeArgs("location", "Paris"),
 					},
 				},
 			},
@@ -234,11 +239,11 @@ func TestFromMessagesRequest_WithToolResult(t *testing.T) {
 		Messages: []MessageParam{
 			{
 				Role: "user",
-				Content: []any{
+				Content: []ContentBlock{
-					map[string]any{
+					{
-						"type":        "tool_result",
+						Type:      "tool_result",
-						"tool_use_id": "call_123",
+						ToolUseID: "call_123",
-						"content":     "The weather in Paris is sunny, 22°C",
+						Content:   "The weather in Paris is sunny, 22°C",
 					},
 				},
 			},
@@ -270,7 +275,7 @@ func TestFromMessagesRequest_WithTools(t *testing.T) {
 	req := MessagesRequest{
 		Model:     "test-model",
 		MaxTokens: 1024,
-		Messages:  []MessageParam{{Role: "user", Content: "Hello"}},
+		Messages:  []MessageParam{{Role: "user", Content: textContent("Hello")}},
 		Tools: []Tool{
 			{
 				Name:        "get_weather",
@@ -305,7 +310,7 @@ func TestFromMessagesRequest_DropsCustomWebSearchWhenBuiltinPresent(t *testing.T
 	req := MessagesRequest{
 		Model:     "test-model",
 		MaxTokens: 1024,
-		Messages:  []MessageParam{{Role: "user", Content: "Hello"}},
+		Messages:  []MessageParam{{Role: "user", Content: textContent("Hello")}},
 		Tools: []Tool{
 			{
 				Type: "web_search_20250305",
@@ -346,7 +351,7 @@ func TestFromMessagesRequest_KeepsCustomWebSearchWhenBuiltinAbsent(t *testing.T)
 	req := MessagesRequest{
 		Model:     "test-model",
 		MaxTokens: 1024,
-		Messages:  []MessageParam{{Role: "user", Content: "Hello"}},
+		Messages:  []MessageParam{{Role: "user", Content: textContent("Hello")}},
 		Tools: []Tool{
 			{
 				Type:        "custom",
@@ -377,7 +382,7 @@ func TestFromMessagesRequest_WithThinking(t *testing.T) {
 	req := MessagesRequest{
 		Model:     "test-model",
 		MaxTokens: 1024,
-		Messages:  []MessageParam{{Role: "user", Content: "Hello"}},
+		Messages:  []MessageParam{{Role: "user", Content: textContent("Hello")}},
 		Thinking:  &ThinkingConfig{Type: "enabled", BudgetTokens: 1000},
 	}
@@ -399,13 +404,13 @@ func TestFromMessagesRequest_ThinkingOnlyBlock(t *testing.T) {
 		Model:     "test-model",
 		MaxTokens: 1024,
 		Messages: []MessageParam{
-			{Role: "user", Content: "Hello"},
+			{Role: "user", Content: textContent("Hello")},
 			{
 				Role: "assistant",
-				Content: []any{
+				Content: []ContentBlock{
-					map[string]any{
+					{
-						"type":     "thinking",
+						Type:     "thinking",
-						"thinking": "Let me think about this...",
+						Thinking: ptr("Let me think about this..."),
 					},
 				},
 			},
@@ -434,10 +439,10 @@ func TestFromMessagesRequest_ToolUseMissingID(t *testing.T) {
 		Messages: []MessageParam{
 			{
 				Role: "assistant",
-				Content: []any{
+				Content: []ContentBlock{
-					map[string]any{
+					{
-						"type": "tool_use",
+						Type: "tool_use",
-						"name": "get_weather",
+						Name: "get_weather",
 					},
 				},
 			},
@@ -460,10 +465,10 @@ func TestFromMessagesRequest_ToolUseMissingName(t *testing.T) {
 		Messages: []MessageParam{
 			{
 				Role: "assistant",
-				Content: []any{
+				Content: []ContentBlock{
-					map[string]any{
+					{
-						"type": "tool_use",
+						Type: "tool_use",
-						"id":   "call_123",
+						ID:   "call_123",
 					},
 				},
 			},
@@ -483,7 +488,7 @@ func TestFromMessagesRequest_InvalidToolSchema(t *testing.T) {
 	req := MessagesRequest{
 		Model:     "test-model",
 		MaxTokens: 1024,
-		Messages:  []MessageParam{{Role: "user", Content: "Hello"}},
+		Messages:  []MessageParam{{Role: "user", Content: textContent("Hello")}},
 		Tools: []Tool{
 			{
 				Name:        "bad_tool",
@@ -548,7 +553,7 @@ func TestToMessagesResponse_WithToolCalls(t *testing.T) {
 					ID: "call_123",
 					Function: api.ToolCallFunction{
 						Name:      "get_weather",
-						Arguments: testArgs(map[string]any{"location": "Paris"}),
+						Arguments: makeArgs("location", "Paris"),
 					},
 				},
 			},
@@ -760,7 +765,7 @@ func TestStreamConverter_WithToolCalls(t *testing.T) {
 					ID: "call_123",
 					Function: api.ToolCallFunction{
 						Name:      "get_weather",
-						Arguments: testArgs(map[string]any{"location": "Paris"}),
+						Arguments: makeArgs("location", "Paris"),
 					},
 				},
 			},
@@ -843,7 +848,7 @@ func TestStreamConverter_ThinkingDirectlyFollowedByToolCall(t *testing.T) {
 					ID: "call_abc",
 					Function: api.ToolCallFunction{
 						Name:      "ask_user",
-						Arguments: testArgs(map[string]any{"question": "cats or dogs?"}),
+						Arguments: makeArgs("question", "cats or dogs?"),
 					},
 				},
 			},
@@ -965,7 +970,7 @@ func TestStreamConverter_MultipleToolCallsWithMixedValidity(t *testing.T) {
 					ID: "call_good",
 					Function: api.ToolCallFunction{
 						Name:      "good_function",
-						Arguments: testArgs(map[string]any{"location": "Paris"}),
+						Arguments: makeArgs("location", "Paris"),
 					},
 				},
 				{
@@ -1067,6 +1072,57 @@ func TestContentBlockJSON_EmptyFieldsPresent(t *testing.T) {
 	}
 }
 func TestContentBlockJSON_NonToolBlocksDoNotIncludeInput(t *testing.T) {
 	tests := []struct {
 		name  string
 		block ContentBlock
 	}{
 		{
 			name: "text block",
 			block: ContentBlock{
 				Type: "text",
 				Text: ptr("hello"),
 			},
 		},
 		{
 			name: "thinking block",
 			block: ContentBlock{
 				Type:     "thinking",
 				Thinking: ptr("let me think"),
 			},
 		},
 		{
 			name: "image block",
 			block: ContentBlock{
 				Type: "image",
 				Source: &ImageSource{
 					Type:      "base64",
 					MediaType: "image/png",
 					Data:      testImage,
 				},
 			},
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			data, err := json.Marshal(tt.block)
 			if err != nil {
 				t.Fatalf("failed to marshal: %v", err)
 			}
 			var result map[string]any
 			if err := json.Unmarshal(data, &result); err != nil {
 				t.Fatalf("failed to unmarshal: %v", err)
 			}
 			if _, ok := result["input"]; ok {
 				t.Fatalf("unexpected input field in non-tool block JSON: %s", string(data))
 			}
 		})
 	}
 }
 func TestStreamConverter_ContentBlockStartIncludesEmptyFields(t *testing.T) {
 	t.Run("text block start includes empty text", func(t *testing.T) {
 		conv := NewStreamConverter("msg_123", "test-model", 0)
@@ -1087,7 +1143,9 @@ func TestStreamConverter_ContentBlockStartIncludesEmptyFields(t *testing.T) {
 						// Marshal and verify the text field is present
 						data, _ := json.Marshal(start)
 						var result map[string]any
-						json.Unmarshal(data, &result)
+						if err := json.Unmarshal(data, &result); err != nil {
 							t.Fatalf("failed to unmarshal content_block_start JSON: %v", err)
 						}
 						cb := result["content_block"].(map[string]any)
 						if _, ok := cb["text"]; !ok {
 							t.Error("content_block_start for text should include 'text' field")
@@ -1134,13 +1192,71 @@ func TestStreamConverter_ContentBlockStartIncludesEmptyFields(t *testing.T) {
 			t.Error("expected thinking content_block_start event")
 		}
 	})
 	t.Run("tool_use block start includes empty input object", func(t *testing.T) {
 		conv := NewStreamConverter("msg_123", "test-model", 0)
 		resp := api.ChatResponse{
 			Model: "test-model",
 			Message: api.Message{
 				Role: "assistant",
 				ToolCalls: []api.ToolCall{
 					{
 						ID: "call_123",
 						Function: api.ToolCallFunction{
 							Name:      "get_weather",
 							Arguments: makeArgs("location", "Paris"),
 						},
 					},
 				},
 			},
 		}
 		events := conv.Process(resp)
 		var foundToolStart bool
 		for _, e := range events {
 			if e.Event == "content_block_start" {
 				if start, ok := e.Data.(ContentBlockStartEvent); ok {
 					if start.ContentBlock.Type == "tool_use" {
 						foundToolStart = true
 						if start.ContentBlock.Input.Len() != 0 {
 							t.Errorf("expected empty input object, got len=%d", start.ContentBlock.Input.Len())
 						}
 						data, _ := json.Marshal(start)
 						var result map[string]any
 						json.Unmarshal(data, &result)
 						cb := result["content_block"].(map[string]any)
 						input, ok := cb["input"]
 						if !ok {
 							t.Error("content_block_start for tool_use should include 'input' field")
 							continue
 						}
 						inputMap, ok := input.(map[string]any)
 						if !ok {
 							t.Errorf("input field should be an object, got %T", input)
 							continue
 						}
 						if len(inputMap) != 0 {
 							t.Errorf("expected empty input object in content_block_start, got %v", inputMap)
 						}
 					}
 				}
 			}
 		}
 		if !foundToolStart {
 			t.Error("expected tool_use content_block_start event")
 		}
 	})
 }
 func TestEstimateTokens_SimpleMessage(t *testing.T) {
 	req := CountTokensRequest{
 		Model: "test-model",
 		Messages: []MessageParam{
-			{Role: "user", Content: "Hello, world!"},
+			{Role: "user", Content: textContent("Hello, world!")},
 		},
 	}
@@ -1161,7 +1277,7 @@ func TestEstimateTokens_WithSystemPrompt(t *testing.T) {
 		Model:  "test-model",
 		System: "You are a helpful assistant.",
 		Messages: []MessageParam{
-			{Role: "user", Content: "Hello"},
+			{Role: "user", Content: textContent("Hello")},
 		},
 	}
@@ -1177,7 +1293,7 @@ func TestEstimateTokens_WithTools(t *testing.T) {
 	req := CountTokensRequest{
 		Model: "test-model",
 		Messages: []MessageParam{
-			{Role: "user", Content: "What's the weather?"},
+			{Role: "user", Content: textContent("What's the weather?")},
 		},
 		Tools: []Tool{
 			{
@@ -1200,17 +1316,17 @@ func TestEstimateTokens_WithThinking(t *testing.T) {
 	req := CountTokensRequest{
 		Model: "test-model",
 		Messages: []MessageParam{
-			{Role: "user", Content: "Hello"},
+			{Role: "user", Content: textContent("Hello")},
 			{
 				Role: "assistant",
-				Content: []any{
+				Content: []ContentBlock{
-					map[string]any{
+					{
-						"type":     "thinking",
+						Type:     "thinking",
-						"thinking": "Let me think about this carefully...",
+						Thinking: ptr("Let me think about this carefully..."),
 					},
-					map[string]any{
+					{
-						"type": "text",
+						Type: "text",
-						"text": "Here is my response.",
+						Text: ptr("Here is my response."),
 					},
 				},
 			},
@@ -1308,12 +1424,12 @@ func TestConvertTool_RegularTool(t *testing.T) {
 func TestConvertMessage_ServerToolUse(t *testing.T) {
 	msg := MessageParam{
 		Role: "assistant",
-		Content: []any{
+		Content: []ContentBlock{
-			map[string]any{
+			{
-				"type":  "server_tool_use",
+				Type:  "server_tool_use",
-				"id":    "srvtoolu_123",
+				ID:    "srvtoolu_123",
-				"name":  "web_search",
+				Name:  "web_search",
-				"input": map[string]any{"query": "test query"},
+				Input: makeArgs("query", "test query"),
 			},
 		},
 	}
@@ -1344,11 +1460,11 @@ func TestConvertMessage_ServerToolUse(t *testing.T) {
 func TestConvertMessage_WebSearchToolResult(t *testing.T) {
 	msg := MessageParam{
 		Role: "user",
-		Content: []any{
+		Content: []ContentBlock{
-			map[string]any{
+			{
-				"type":        "web_search_tool_result",
+				Type:      "web_search_tool_result",
-				"tool_use_id": "srvtoolu_123",
+				ToolUseID: "srvtoolu_123",
-				"content": []any{
+				Content: []any{
 					map[string]any{
 						"type":  "web_search_result",
 						"title": "Test Result",
@@ -1385,11 +1501,11 @@ func TestConvertMessage_WebSearchToolResult(t *testing.T) {
 func TestConvertMessage_WebSearchToolResultEmptyStillCreatesToolMessage(t *testing.T) {
 	msg := MessageParam{
 		Role: "user",
-		Content: []any{
+		Content: []ContentBlock{
-			map[string]any{
+			{
-				"type":        "web_search_tool_result",
+				Type:      "web_search_tool_result",
-				"tool_use_id": "srvtoolu_empty",
+				ToolUseID: "srvtoolu_empty",
-				"content":     []any{},
+				Content:   []any{},
 			},
 		},
 	}
@@ -1416,11 +1532,11 @@ func TestConvertMessage_WebSearchToolResultEmptyStillCreatesToolMessage(t *testi
 func TestConvertMessage_WebSearchToolResultErrorStillCreatesToolMessage(t *testing.T) {
 	msg := MessageParam{
 		Role: "user",
-		Content: []any{
+		Content: []ContentBlock{
-			map[string]any{
+			{
-				"type":        "web_search_tool_result",
+				Type:      "web_search_tool_result",
-				"tool_use_id": "srvtoolu_error",
+				ToolUseID: "srvtoolu_error",
-				"content": map[string]any{
+				Content: map[string]any{
 					"type":       "web_search_tool_result_error",
 					"error_code": "max_uses_exceeded",
 				},
--- a/api/types.go
+++ b/api/types.go
@@ -436,6 +436,7 @@ type ToolProperty struct {
 	Description string             `json:"description,omitempty"`
 	Enum        []any              `json:"enum,omitempty"`
 	Properties  *ToolPropertiesMap `json:"properties,omitempty"`
 	Required    []string           `json:"required,omitempty"`
 }
 // ToTypeScriptType converts a ToolProperty to a TypeScript type string
--- a/app/ui/app/codegen/gotypes.gen.ts
+++ b/app/ui/app/codegen/gotypes.gen.ts
@@ -550,14 +550,12 @@ export class Error {
    }
 }
 export class ModelUpstreamResponse {
-    digest?: string;
+    stale: boolean;
    pushTime: number;
    error?: string;
    constructor(source: any = {}) {
        if ('string' === typeof source) source = JSON.parse(source);
-        this.digest = source["digest"];
+        this.stale = source["stale"];
        this.pushTime = source["pushTime"];
        this.error = source["error"];
    }
 }
--- a/app/ui/app/src/api.ts
+++ b/app/ui/app/src/api.ts
@@ -161,7 +161,7 @@ export async function getModels(query?: string): Promise<Model[]> {
      // Add query if it's in the registry and not already in the list
      if (!exactMatch) {
        const result = await getModelUpstreamInfo(new Model({ model: query }));
-        const existsUpstream = !!result.digest && !result.error;
+        const existsUpstream = result.exists;
        if (existsUpstream) {
          filteredModels.push(new Model({ model: query }));
        }
@@ -339,7 +339,7 @@ export async function deleteChat(chatId: string): Promise<void> {
 // Get upstream information for model staleness checking
 export async function getModelUpstreamInfo(
  model: Model,
-): Promise<{ digest?: string; pushTime: number; error?: string }> {
+): Promise<{ stale: boolean; exists: boolean; error?: string }> {
  try {
    const response = await fetch(`${API_BASE}/api/v1/model/upstream`, {
      method: "POST",
@@ -353,22 +353,22 @@ export async function getModelUpstreamInfo(
    if (!response.ok) {
      console.warn(
-        `Failed to check upstream digest for ${model.model}: ${response.status}`,
+        `Failed to check upstream for ${model.model}: ${response.status}`,
      );
-      return { pushTime: 0 };
+      return { stale: false, exists: false };
    }
    const data = await response.json();
    if (data.error) {
-      console.warn(`Upstream digest check: ${data.error}`);
+      console.warn(`Upstream check: ${data.error}`);
-      return { error: data.error, pushTime: 0 };
+      return { stale: false, exists: false, error: data.error };
    }
-    return { digest: data.digest, pushTime: data.pushTime || 0 };
+    return { stale: !!data.stale, exists: true };
  } catch (error) {
    console.warn(`Error checking model staleness:`, error);
-    return { pushTime: 0 };
+    return { stale: false, exists: false };
  }
 }
--- a/app/ui/app/src/components/ModelPicker.tsx
+++ b/app/ui/app/src/components/ModelPicker.tsx
@@ -61,24 +61,7 @@ export const ModelPicker = forwardRef<
    try {
      const upstreamInfo = await getModelUpstreamInfo(model);
-      // Compare local digest with upstream digest
+      if (upstreamInfo.stale) {
      let isStale =
        model.digest &&
        upstreamInfo.digest &&
        model.digest !== upstreamInfo.digest;
      // If the model has a modified time and upstream has a push time,
      // check if the model was modified after the push time - if so, it's not stale
      if (isStale && model.modified_at && upstreamInfo.pushTime > 0) {
        const modifiedAtTime =
          new Date(model.modified_at as string | number | Date).getTime() /
          1000;
        if (modifiedAtTime > upstreamInfo.pushTime) {
          isStale = false;
        }
      }
      if (isStale) {
        const currentStaleModels =
          queryClient.getQueryData<Map<string, boolean>>(["staleModels"]) ||
          new Map();
--- a/app/ui/responses/types.go
+++ b/app/ui/responses/types.go
@@ -133,9 +133,8 @@ type Error struct {
 }
 type ModelUpstreamResponse struct {
-	Digest   string `json:"digest,omitempty"`
+	Stale bool   `json:"stale"`
-	PushTime int64  `json:"pushTime"`
+	Error string `json:"error,omitempty"`
 	Error    string `json:"error,omitempty"`
 }
 // Serializable data for the browser state
--- a/app/ui/ui.go
+++ b/app/ui/ui.go
@@ -32,6 +32,7 @@ import (
 	"github.com/ollama/ollama/app/version"
 	ollamaAuth "github.com/ollama/ollama/auth"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/manifest"
 	"github.com/ollama/ollama/types/model"
 	_ "github.com/tkrajina/typescriptify-golang-structs/typescriptify"
 )
@@ -193,7 +194,7 @@ func (s *Server) Handler() http.Handler {
 			if CORS() {
 				w.Header().Set("Access-Control-Allow-Origin", "*")
 				w.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
-				w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Requested-With")
+				w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, User-Agent, Accept, X-Requested-With")
 				w.Header().Set("Access-Control-Allow-Credentials", "true")
 				// Handle preflight requests
@@ -318,7 +319,7 @@ func (s *Server) handleError(w http.ResponseWriter, e error) {
 	if CORS() {
 		w.Header().Set("Access-Control-Allow-Origin", "*")
 		w.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
-		w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Requested-With")
+		w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, User-Agent, Accept, X-Requested-With")
 		w.Header().Set("Access-Control-Allow-Credentials", "true")
 	}
@@ -1572,9 +1573,18 @@ func (s *Server) modelUpstream(w http.ResponseWriter, r *http.Request) error {
 		return json.NewEncoder(w).Encode(response)
 	}
 	n := model.ParseName(req.Model)
 	stale := true
 	if m, err := manifest.ParseNamedManifest(n); err == nil {
 		if m.Digest() == digest {
 			stale = false
 		} else if pushTime > 0 && m.FileInfo().ModTime().Unix() >= pushTime {
 			stale = false
 		}
 	}
 	response := responses.ModelUpstreamResponse{
-		Digest:   digest,
+		Stale: stale,
 		PushTime: pushTime,
 	}
 	w.Header().Set("Content-Type", "application/json")
--- a/cmd/audio.go
+++ b/cmd/audio.go
@@ -0,0 +1,216 @@
 package cmd
 import (
 	"encoding/binary"
 	"sync"
 	"time"
 )
 const (
 	audioSampleRate = 16000
 	audioChannels   = 1
 	audioFrameSize  = 1024 // samples per callback
 )
 // AudioRecorder captures audio from the default microphone.
 // Platform-specific capture is provided by audioStream (audio_darwin.go, etc.).
 type AudioRecorder struct {
 	stream          audioStream
 	mu              sync.Mutex
 	samples         []float32
 	started         time.Time
 	MaxChunkSeconds int // hard split limit in seconds; 0 means use default
 }
 // audioStream is the platform-specific audio capture interface.
 type audioStream interface {
 	// Start begins capturing. Samples are delivered via the callback.
 	Start(callback func(samples []float32)) error
 	// Stop ends capturing and releases resources.
 	Stop() error
 }
 // NewAudioRecorder creates a recorder ready to capture from the default mic.
 func NewAudioRecorder() (*AudioRecorder, error) {
 	stream, err := newAudioStream(audioSampleRate, audioChannels, audioFrameSize)
 	if err != nil {
 		return nil, err
 	}
 	return &AudioRecorder{stream: stream}, nil
 }
 // Start begins capturing audio from the microphone.
 func (r *AudioRecorder) Start() error {
 	r.mu.Lock()
 	defer r.mu.Unlock()
 	r.samples = make([]float32, 0, audioSampleRate*30) // preallocate ~30s
 	r.started = time.Now()
 	return r.stream.Start(func(samples []float32) {
 		r.mu.Lock()
 		r.samples = append(r.samples, samples...)
 		r.mu.Unlock()
 	})
 }
 // Stop ends the recording and returns the duration.
 func (r *AudioRecorder) Stop() (time.Duration, error) {
 	r.mu.Lock()
 	dur := time.Since(r.started)
 	r.mu.Unlock()
 	if r.stream != nil {
 		r.stream.Stop()
 	}
 	return dur, nil
 }
 // Duration returns how long the current recording has been running.
 func (r *AudioRecorder) Duration() time.Duration {
 	r.mu.Lock()
 	defer r.mu.Unlock()
 	if r.started.IsZero() {
 		return 0
 	}
 	return time.Since(r.started)
 }
 // Chunking constants for live transcription.
 const (
 	chunkTargetSamples     = 8 * audioSampleRate // 8s — start yielding when silence found
 	chunkMinSamples        = 5 * audioSampleRate // start scanning for silence at 5s
 	defaultMaxAudioSeconds = 28                  // default hard split (just under typical 30s model cap)
 	silenceWindow          = 800                 // 50ms RMS window
 )
 func (r *AudioRecorder) maxChunk() int {
 	if r.MaxChunkSeconds > 0 {
 		return r.MaxChunkSeconds * audioSampleRate
 	}
 	return defaultMaxAudioSeconds * audioSampleRate
 }
 // TakeChunk checks if there are enough accumulated samples to yield a chunk.
 // If so, it splits at the best silence boundary, removes the consumed samples
 // from the buffer, and returns the chunk as WAV bytes. Returns nil if not enough
 // audio has accumulated yet.
 func (r *AudioRecorder) TakeChunk() []byte {
 	r.mu.Lock()
 	n := len(r.samples)
 	if n < chunkMinSamples {
 		r.mu.Unlock()
 		return nil
 	}
 	maxSamples := r.maxChunk()
 	if n < chunkTargetSamples && n < maxSamples {
 		r.mu.Unlock()
 		return nil
 	}
 	limit := n
 	if limit > maxSamples {
 		limit = maxSamples
 	}
 	splitAt := limit
 	bestEnergy := float64(1e30)
 	scanStart := limit - silenceWindow
 	scanEnd := chunkMinSamples
 	for pos := scanStart; pos >= scanEnd; pos -= silenceWindow / 2 {
 		end := pos + silenceWindow
 		if end > n {
 			end = n
 		}
 		var sumSq float64
 		for _, s := range r.samples[pos:end] {
 			sumSq += float64(s) * float64(s)
 		}
 		rms := sumSq / float64(end-pos)
 		if rms < bestEnergy {
 			bestEnergy = rms
 			splitAt = pos + silenceWindow/2
 		}
 	}
 	chunk := make([]float32, splitAt)
 	copy(chunk, r.samples[:splitAt])
 	remaining := make([]float32, n-splitAt)
 	copy(remaining, r.samples[splitAt:])
 	r.samples = remaining
 	r.mu.Unlock()
 	return encodeWAV(chunk, audioSampleRate, audioChannels)
 }
 // FlushWAV returns any remaining samples as WAV, clearing the buffer.
 func (r *AudioRecorder) FlushWAV() []byte {
 	r.mu.Lock()
 	samples := r.samples
 	r.samples = nil
 	r.mu.Unlock()
 	if len(samples) == 0 {
 		return nil
 	}
 	return encodeWAV(samples, audioSampleRate, audioChannels)
 }
 // WAV encodes the captured samples as a WAV file in memory.
 func (r *AudioRecorder) WAV() ([]byte, error) {
 	r.mu.Lock()
 	samples := make([]float32, len(r.samples))
 	copy(samples, r.samples)
 	r.mu.Unlock()
 	if len(samples) == 0 {
 		return nil, errNoAudio
 	}
 	return encodeWAV(samples, audioSampleRate, audioChannels), nil
 }
 // encodeWAV produces a 16-bit PCM WAV file from float32 samples.
 func encodeWAV(samples []float32, sampleRate, channels int) []byte {
 	numSamples := len(samples)
 	bitsPerSample := 16
 	byteRate := sampleRate * channels * bitsPerSample / 8
 	blockAlign := channels * bitsPerSample / 8
 	dataSize := numSamples * blockAlign
 	buf := make([]byte, 44+dataSize)
 	copy(buf[0:4], "RIFF")
 	binary.LittleEndian.PutUint32(buf[4:8], uint32(36+dataSize))
 	copy(buf[8:12], "WAVE")
 	copy(buf[12:16], "fmt ")
 	binary.LittleEndian.PutUint32(buf[16:20], 16)
 	binary.LittleEndian.PutUint16(buf[20:22], 1)
 	binary.LittleEndian.PutUint16(buf[22:24], uint16(channels))
 	binary.LittleEndian.PutUint32(buf[24:28], uint32(sampleRate))
 	binary.LittleEndian.PutUint32(buf[28:32], uint32(byteRate))
 	binary.LittleEndian.PutUint16(buf[32:34], uint16(blockAlign))
 	binary.LittleEndian.PutUint16(buf[34:36], uint16(bitsPerSample))
 	copy(buf[36:40], "data")
 	binary.LittleEndian.PutUint32(buf[40:44], uint32(dataSize))
 	offset := 44
 	for _, s := range samples {
 		if s > 1.0 {
 			s = 1.0
 		} else if s < -1.0 {
 			s = -1.0
 		}
 		val := int16(s * 32767)
 		binary.LittleEndian.PutUint16(buf[offset:offset+2], uint16(val))
 		offset += 2
 	}
 	return buf
 }
--- a/cmd/audio_darwin.go
+++ b/cmd/audio_darwin.go
@@ -0,0 +1,180 @@
 package cmd
 /*
 #cgo LDFLAGS: -framework CoreAudio -framework AudioToolbox
 #include <AudioToolbox/AudioQueue.h>
 #include <string.h>
 // Callback context passed to AudioQueue.
 typedef struct {
 	int ready;  // set to 1 when a buffer is filled
 } AQContext;
 // C callback — re-enqueues the buffer so recording continues.
 // Not static — must be visible to the linker for Go's function pointer.
 void aqInputCallback(
 	void *inUserData,
 	AudioQueueRef inAQ,
 	AudioQueueBufferRef inBuffer,
 	const AudioTimeStamp *inStartTime,
 	UInt32 inNumberPacketDescriptions,
 	const AudioStreamPacketDescription *inPacketDescs)
 {
 	// Re-enqueue the buffer immediately so recording continues.
 	AudioQueueEnqueueBuffer(inAQ, inBuffer, 0, NULL);
 }
 */
 import "C"
 import (
 	"fmt"
 	"math"
 	"sync"
 	"time"
 )
 var errNoAudio = fmt.Errorf("no audio recorded")
 const numAQBuffers = 3
 type coreAudioStream struct {
 	queue    C.AudioQueueRef
 	buffers  [numAQBuffers]C.AudioQueueBufferRef
 	mu       sync.Mutex
 	callback func(samples []float32)
 	running  bool
 	pollDone chan struct{}
 	sampleRate int
 	channels   int
 	frameSize  int
 }
 func newAudioStream(sampleRate, channels, frameSize int) (audioStream, error) {
 	return &coreAudioStream{
 		sampleRate: sampleRate,
 		channels:   channels,
 		frameSize:  frameSize,
 	}, nil
 }
 func (s *coreAudioStream) Start(callback func(samples []float32)) error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	s.callback = callback
 	// Set up audio format: 16-bit signed integer PCM, mono, 16kHz.
 	var format C.AudioStreamBasicDescription
 	format.mSampleRate = C.Float64(s.sampleRate)
 	format.mFormatID = C.kAudioFormatLinearPCM
 	format.mFormatFlags = C.kLinearPCMFormatFlagIsSignedInteger | C.kLinearPCMFormatFlagIsPacked
 	format.mBitsPerChannel = 16
 	format.mChannelsPerFrame = C.UInt32(s.channels)
 	format.mBytesPerFrame = 2 * C.UInt32(s.channels)
 	format.mFramesPerPacket = 1
 	format.mBytesPerPacket = format.mBytesPerFrame
 	// Create the audio queue.
 	var status C.OSStatus
 	status = C.AudioQueueNewInput(
 		&format,
 		C.AudioQueueInputCallback(C.aqInputCallback),
 		nil,               // user data
 		C.CFRunLoopRef(0), // NULL run loop — use internal thread
 		C.CFStringRef(0),  // NULL run loop mode
 		0,                 // flags
 		&s.queue,
 	)
 	if status != 0 {
 		return fmt.Errorf("AudioQueueNewInput failed: %d", status)
 	}
 	// Allocate and enqueue buffers.
 	bufferBytes := C.UInt32(s.frameSize * int(format.mBytesPerFrame))
 	for i := range s.buffers {
 		status = C.AudioQueueAllocateBuffer(s.queue, bufferBytes, &s.buffers[i])
 		if status != 0 {
 			C.AudioQueueDispose(s.queue, C.true)
 			return fmt.Errorf("AudioQueueAllocateBuffer failed: %d", status)
 		}
 		status = C.AudioQueueEnqueueBuffer(s.queue, s.buffers[i], 0, nil)
 		if status != 0 {
 			C.AudioQueueDispose(s.queue, C.true)
 			return fmt.Errorf("AudioQueueEnqueueBuffer failed: %d", status)
 		}
 	}
 	// Start recording.
 	status = C.AudioQueueStart(s.queue, nil)
 	if status != 0 {
 		C.AudioQueueDispose(s.queue, C.true)
 		return fmt.Errorf("AudioQueueStart failed: %d", status)
 	}
 	s.running = true
 	s.pollDone = make(chan struct{})
 	// Poll buffers for data. AudioQueue re-enqueues in the C callback,
 	// so we read the data out periodically.
 	go s.pollLoop()
 	return nil
 }
 func (s *coreAudioStream) pollLoop() {
 	defer close(s.pollDone)
 	// Read at roughly frameSize intervals.
 	interval := time.Duration(float64(s.frameSize) / float64(s.sampleRate) * float64(time.Second))
 	if interval < 10*time.Millisecond {
 		interval = 10 * time.Millisecond
 	}
 	ticker := time.NewTicker(interval)
 	defer ticker.Stop()
 	for range ticker.C {
 		s.mu.Lock()
 		if !s.running {
 			s.mu.Unlock()
 			return
 		}
 		// Read available data from each buffer.
 		for i := range s.buffers {
 			buf := s.buffers[i]
 			if buf.mAudioDataByteSize > 0 {
 				numSamples := int(buf.mAudioDataByteSize) / 2 // 16-bit samples
 				if numSamples > 0 {
 					raw := (*[1 << 28]int16)(buf.mAudioData)[:numSamples:numSamples]
 					floats := make([]float32, numSamples)
 					for j, v := range raw {
 						floats[j] = float32(v) / float32(math.MaxInt16)
 					}
 					s.callback(floats)
 				}
 				buf.mAudioDataByteSize = 0
 			}
 		}
 		s.mu.Unlock()
 	}
 }
 func (s *coreAudioStream) Stop() error {
 	s.mu.Lock()
 	s.running = false
 	queue := s.queue
 	s.mu.Unlock()
 	if queue != nil {
 		C.AudioQueueStop(queue, C.true)
 		C.AudioQueueDispose(queue, C.true)
 	}
 	if s.pollDone != nil {
 		<-s.pollDone
 	}
 	return nil
 }
--- a/cmd/audio_linux.go
+++ b/cmd/audio_linux.go
@@ -0,0 +1,275 @@
 package cmd
 /*
 #cgo LDFLAGS: -ldl
 #include <dlfcn.h>
 #include <stdint.h>
 #include <stdlib.h>
 // Function pointer types for ALSA functions loaded at runtime.
 typedef int (*pcm_open_fn)(void**, const char*, int, int);
 typedef int (*pcm_simple_fn)(void*);
 typedef long (*pcm_readi_fn)(void*, void*, unsigned long);
 typedef int (*hw_malloc_fn)(void**);
 typedef void (*hw_free_fn)(void*);
 typedef int (*hw_any_fn)(void*, void*);
 typedef int (*hw_set_int_fn)(void*, void*, int);
 typedef int (*hw_set_uint_fn)(void*, void*, unsigned int);
 typedef int (*hw_set_rate_fn)(void*, void*, unsigned int*, int*);
 typedef int (*hw_set_period_fn)(void*, void*, unsigned long*, int*);
 typedef int (*hw_apply_fn)(void*, void*);
 typedef const char* (*strerror_fn)(int);
 // Trampoline functions — call dynamically loaded ALSA symbols.
 static int alsa_pcm_open(void* fn, void** h, const char* name, int stream, int mode) {
 	return ((pcm_open_fn)fn)(h, name, stream, mode);
 }
 static int alsa_pcm_close(void* fn, void* h) { return ((pcm_simple_fn)fn)(h); }
 static int alsa_pcm_prepare(void* fn, void* h) { return ((pcm_simple_fn)fn)(h); }
 static int alsa_pcm_drop(void* fn, void* h) { return ((pcm_simple_fn)fn)(h); }
 static long alsa_pcm_readi(void* fn, void* h, void* buf, unsigned long frames) {
 	return ((pcm_readi_fn)fn)(h, buf, frames);
 }
 static int alsa_hw_malloc(void* fn, void** p) { return ((hw_malloc_fn)fn)(p); }
 static void alsa_hw_free(void* fn, void* p) { ((hw_free_fn)fn)(p); }
 static int alsa_hw_any(void* fn, void* h, void* p) { return ((hw_any_fn)fn)(h, p); }
 static int alsa_hw_set_access(void* fn, void* h, void* p, int v) { return ((hw_set_int_fn)fn)(h, p, v); }
 static int alsa_hw_set_format(void* fn, void* h, void* p, int v) { return ((hw_set_int_fn)fn)(h, p, v); }
 static int alsa_hw_set_channels(void* fn, void* h, void* p, unsigned int v) { return ((hw_set_uint_fn)fn)(h, p, v); }
 static int alsa_hw_set_rate(void* fn, void* h, void* p, unsigned int* v, int* d) { return ((hw_set_rate_fn)fn)(h, p, v, d); }
 static int alsa_hw_set_period(void* fn, void* h, void* p, unsigned long* v, int* d) { return ((hw_set_period_fn)fn)(h, p, v, d); }
 static int alsa_hw_apply(void* fn, void* h, void* p) { return ((hw_apply_fn)fn)(h, p); }
 static const char* alsa_strerror(void* fn, int e) { return ((strerror_fn)fn)(e); }
 */
 import "C"
 import (
 	"fmt"
 	"math"
 	"sync"
 	"time"
 	"unsafe"
 )
 var errNoAudio = fmt.Errorf("no audio recorded")
 const (
 	sndPCMStreamCapture       = 1
 	sndPCMAccessRWInterleaved = 3
 	sndPCMFormatS16LE         = 2
 )
 var (
 	alsaLoadErr error
 	alsaOnce    sync.Once
 	alsa        alsaFuncs
 )
 type alsaFuncs struct {
 	pcmOpen, pcmClose, pcmPrepare, pcmDrop, pcmReadi       unsafe.Pointer
 	hwMalloc, hwFree, hwAny                                 unsafe.Pointer
 	hwSetAccess, hwSetFormat, hwSetChannels                  unsafe.Pointer
 	hwSetRate, hwSetPeriod, hwApply                          unsafe.Pointer
 	strerror                                                 unsafe.Pointer
 }
 func loadALSA() {
 	var lib unsafe.Pointer
 	for _, name := range []string{"libasound.so.2", "libasound.so"} {
 		cName := C.CString(name)
 		lib = C.dlopen(cName, C.RTLD_NOW)
 		C.free(unsafe.Pointer(cName))
 		if lib != nil {
 			break
 		}
 	}
 	if lib == nil {
 		alsaLoadErr = fmt.Errorf("audio capture unavailable: libasound.so not found")
 		return
 	}
 	sym := func(name string) unsafe.Pointer {
 		cName := C.CString(name)
 		defer C.free(unsafe.Pointer(cName))
 		return C.dlsym(lib, cName)
 	}
 	syms := []struct {
 		ptr  *unsafe.Pointer
 		name string
 	}{
 		{&alsa.pcmOpen, "snd_pcm_open"},
 		{&alsa.pcmClose, "snd_pcm_close"},
 		{&alsa.pcmPrepare, "snd_pcm_prepare"},
 		{&alsa.pcmDrop, "snd_pcm_drop"},
 		{&alsa.pcmReadi, "snd_pcm_readi"},
 		{&alsa.hwMalloc, "snd_pcm_hw_params_malloc"},
 		{&alsa.hwFree, "snd_pcm_hw_params_free"},
 		{&alsa.hwAny, "snd_pcm_hw_params_any"},
 		{&alsa.hwSetAccess, "snd_pcm_hw_params_set_access"},
 		{&alsa.hwSetFormat, "snd_pcm_hw_params_set_format"},
 		{&alsa.hwSetChannels, "snd_pcm_hw_params_set_channels"},
 		{&alsa.hwSetRate, "snd_pcm_hw_params_set_rate_near"},
 		{&alsa.hwSetPeriod, "snd_pcm_hw_params_set_period_size_near"},
 		{&alsa.hwApply, "snd_pcm_hw_params"},
 		{&alsa.strerror, "snd_strerror"},
 	}
 	for _, s := range syms {
 		*s.ptr = sym(s.name)
 		if *s.ptr == nil {
 			alsaLoadErr = fmt.Errorf("audio capture unavailable: missing %s in libasound", s.name)
 			return
 		}
 	}
 }
 func alsaError(code C.int) string {
 	if alsa.strerror == nil {
 		return fmt.Sprintf("error %d", code)
 	}
 	return C.GoString(C.alsa_strerror(alsa.strerror, code))
 }
 type alsaStream struct {
 	handle   unsafe.Pointer
 	mu       sync.Mutex
 	callback func(samples []float32)
 	running  bool
 	done     chan struct{}
 	sampleRate int
 	channels   int
 	frameSize  int
 }
 func newAudioStream(sampleRate, channels, frameSize int) (audioStream, error) {
 	alsaOnce.Do(loadALSA)
 	if alsaLoadErr != nil {
 		return nil, alsaLoadErr
 	}
 	return &alsaStream{
 		sampleRate: sampleRate,
 		channels:   channels,
 		frameSize:  frameSize,
 	}, nil
 }
 func (s *alsaStream) Start(callback func(samples []float32)) error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	s.callback = callback
 	cName := C.CString("default")
 	defer C.free(unsafe.Pointer(cName))
 	rc := C.alsa_pcm_open(alsa.pcmOpen, (*unsafe.Pointer)(unsafe.Pointer(&s.handle)), cName, C.int(sndPCMStreamCapture), 0)
 	if rc < 0 {
 		return fmt.Errorf("snd_pcm_open: %s", alsaError(rc))
 	}
 	var hwParams unsafe.Pointer
 	C.alsa_hw_malloc(alsa.hwMalloc, (*unsafe.Pointer)(unsafe.Pointer(&hwParams)))
 	defer C.alsa_hw_free(alsa.hwFree, hwParams)
 	C.alsa_hw_any(alsa.hwAny, s.handle, hwParams)
 	if rc = C.alsa_hw_set_access(alsa.hwSetAccess, s.handle, hwParams, C.int(sndPCMAccessRWInterleaved)); rc < 0 {
 		C.alsa_pcm_close(alsa.pcmClose, s.handle)
 		return fmt.Errorf("set access: %s", alsaError(rc))
 	}
 	if rc = C.alsa_hw_set_format(alsa.hwSetFormat, s.handle, hwParams, C.int(sndPCMFormatS16LE)); rc < 0 {
 		C.alsa_pcm_close(alsa.pcmClose, s.handle)
 		return fmt.Errorf("set format: %s", alsaError(rc))
 	}
 	if rc = C.alsa_hw_set_channels(alsa.hwSetChannels, s.handle, hwParams, C.uint(s.channels)); rc < 0 {
 		C.alsa_pcm_close(alsa.pcmClose, s.handle)
 		return fmt.Errorf("set channels: %s", alsaError(rc))
 	}
 	rate := C.uint(s.sampleRate)
 	if rc = C.alsa_hw_set_rate(alsa.hwSetRate, s.handle, hwParams, &rate, nil); rc < 0 {
 		C.alsa_pcm_close(alsa.pcmClose, s.handle)
 		return fmt.Errorf("set rate: %s", alsaError(rc))
 	}
 	periodSize := C.ulong(s.frameSize)
 	if rc = C.alsa_hw_set_period(alsa.hwSetPeriod, s.handle, hwParams, &periodSize, nil); rc < 0 {
 		C.alsa_pcm_close(alsa.pcmClose, s.handle)
 		return fmt.Errorf("set period: %s", alsaError(rc))
 	}
 	if rc = C.alsa_hw_apply(alsa.hwApply, s.handle, hwParams); rc < 0 {
 		C.alsa_pcm_close(alsa.pcmClose, s.handle)
 		return fmt.Errorf("apply hw params: %s", alsaError(rc))
 	}
 	if rc = C.alsa_pcm_prepare(alsa.pcmPrepare, s.handle); rc < 0 {
 		C.alsa_pcm_close(alsa.pcmClose, s.handle)
 		return fmt.Errorf("prepare: %s", alsaError(rc))
 	}
 	s.running = true
 	s.done = make(chan struct{})
 	go s.captureLoop(int(periodSize))
 	return nil
 }
 func (s *alsaStream) captureLoop(periodSize int) {
 	defer close(s.done)
 	buf := make([]int16, periodSize*s.channels)
 	for {
 		s.mu.Lock()
 		if !s.running {
 			s.mu.Unlock()
 			return
 		}
 		handle := s.handle
 		s.mu.Unlock()
 		frames := C.alsa_pcm_readi(alsa.pcmReadi, handle, unsafe.Pointer(&buf[0]), C.ulong(periodSize))
 		if frames < 0 {
 			C.alsa_pcm_prepare(alsa.pcmPrepare, handle)
 			continue
 		}
 		if frames == 0 {
 			time.Sleep(5 * time.Millisecond)
 			continue
 		}
 		numSamples := int(frames) * s.channels
 		floats := make([]float32, numSamples)
 		for i := 0; i < numSamples; i++ {
 			floats[i] = float32(buf[i]) / float32(math.MaxInt16)
 		}
 		s.mu.Lock()
 		if s.callback != nil {
 			s.callback(floats)
 		}
 		s.mu.Unlock()
 	}
 }
 func (s *alsaStream) Stop() error {
 	s.mu.Lock()
 	s.running = false
 	handle := s.handle
 	s.handle = nil
 	s.mu.Unlock()
 	if s.done != nil {
 		<-s.done
 	}
 	if handle != nil {
 		C.alsa_pcm_drop(alsa.pcmDrop, handle)
 		C.alsa_pcm_close(alsa.pcmClose, handle)
 	}
 	return nil
 }
--- a/cmd/audio_windows.go
+++ b/cmd/audio_windows.go
@@ -0,0 +1,288 @@
 package cmd
 import (
 	"fmt"
 	"math"
 	"sync"
 	"syscall"
 	"time"
 	"unsafe"
 )
 var errNoAudio = fmt.Errorf("no audio recorded")
 // WASAPI COM GUIDs
 var (
 	iidIMMDeviceEnumerator = guid{0xA95664D2, 0x9614, 0x4F35, [8]byte{0xA7, 0x46, 0xDE, 0x8D, 0xB6, 0x36, 0x17, 0xE6}}
 	clsidMMDeviceEnumerator = guid{0xBCDE0395, 0xE52F, 0x467C, [8]byte{0x8E, 0x3D, 0xC4, 0x57, 0x92, 0x91, 0x69, 0x2E}}
 	iidIAudioClient        = guid{0x1CB9AD4C, 0xDBFA, 0x4C32, [8]byte{0xB1, 0x78, 0xC2, 0xF5, 0x68, 0xA7, 0x03, 0xB2}}
 	iidIAudioCaptureClient = guid{0xC8ADBD64, 0xE71E, 0x48A0, [8]byte{0xA4, 0xDE, 0x18, 0x5C, 0x39, 0x5C, 0xD3, 0x17}}
 )
 type guid struct {
 	Data1 uint32
 	Data2 uint16
 	Data3 uint16
 	Data4 [8]byte
 }
 // WAVEFORMATEX structure
 type waveFormatEx struct {
 	FormatTag      uint16
 	Channels       uint16
 	SamplesPerSec  uint32
 	AvgBytesPerSec uint32
 	BlockAlign     uint16
 	BitsPerSample  uint16
 	CbSize         uint16
 }
 const (
 	wavePCM         = 1
 	eCapture        = 1
 	eConsole        = 0
 	audclntSharemode = 0 // AUDCLNT_SHAREMODE_SHARED
 	audclntStreamflagsEventcallback = 0x00040000
 	coinitMultithreaded = 0x0
 	clsctxAll           = 0x17
 	reftimesPerSec    = 10000000 // 100ns units per second
 	reftimesPerMillis = 10000
 )
 var (
 	ole32    = syscall.NewLazyDLL("ole32.dll")
 	coInit   = ole32.NewProc("CoInitializeEx")
 	coCreate = ole32.NewProc("CoCreateInstance")
 )
 type wasapiStream struct {
 	mu       sync.Mutex
 	callback func(samples []float32)
 	running  bool
 	done     chan struct{}
 	sampleRate int
 	channels   int
 	frameSize  int
 	// COM interfaces (stored as uintptr for syscall)
 	enumerator uintptr
 	device     uintptr
 	client     uintptr
 	capture    uintptr
 }
 func newAudioStream(sampleRate, channels, frameSize int) (audioStream, error) {
 	return &wasapiStream{
 		sampleRate: sampleRate,
 		channels:   channels,
 		frameSize:  frameSize,
 	}, nil
 }
 func (s *wasapiStream) Start(callback func(samples []float32)) error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	s.callback = callback
 	// Initialize COM
 	hr, _, _ := coInit.Call(0, uintptr(coinitMultithreaded))
 	// S_OK or S_FALSE (already initialized) are both fine
 	if hr != 0 && hr != 1 {
 		return fmt.Errorf("CoInitializeEx failed: 0x%08x", hr)
 	}
 	// Create device enumerator
 	hr, _, _ = coCreate.Call(
 		uintptr(unsafe.Pointer(&clsidMMDeviceEnumerator)),
 		0,
 		uintptr(clsctxAll),
 		uintptr(unsafe.Pointer(&iidIMMDeviceEnumerator)),
 		uintptr(unsafe.Pointer(&s.enumerator)),
 	)
 	if hr != 0 {
 		return fmt.Errorf("CoCreateInstance(MMDeviceEnumerator) failed: 0x%08x", hr)
 	}
 	// Get default capture device
 	// IMMDeviceEnumerator::GetDefaultAudioEndpoint is vtable index 4
 	hr = comCall(s.enumerator, 4, uintptr(eCapture), uintptr(eConsole), uintptr(unsafe.Pointer(&s.device)))
 	if hr != 0 {
 		return fmt.Errorf("GetDefaultAudioEndpoint failed: 0x%08x", hr)
 	}
 	// Activate IAudioClient
 	// IMMDevice::Activate is vtable index 3
 	hr = comCall(s.device, 3,
 		uintptr(unsafe.Pointer(&iidIAudioClient)),
 		uintptr(clsctxAll),
 		0,
 		uintptr(unsafe.Pointer(&s.client)),
 	)
 	if hr != 0 {
 		return fmt.Errorf("IMMDevice::Activate failed: 0x%08x", hr)
 	}
 	// Set up format: 16-bit PCM mono 16kHz
 	format := waveFormatEx{
 		FormatTag:      wavePCM,
 		Channels:       uint16(s.channels),
 		SamplesPerSec:  uint32(s.sampleRate),
 		BitsPerSample:  16,
 		BlockAlign:     uint16(2 * s.channels),
 		AvgBytesPerSec: uint32(s.sampleRate * 2 * s.channels),
 		CbSize:         0,
 	}
 	// Initialize audio client
 	// IAudioClient::Initialize is vtable index 3
 	bufferDuration := int64(reftimesPerSec) // 1 second buffer
 	hr = comCall(s.client, 3,
 		uintptr(audclntSharemode),
 		0, // stream flags
 		uintptr(bufferDuration),
 		0, // periodicity (0 = use default)
 		uintptr(unsafe.Pointer(&format)),
 		0, // audio session GUID (NULL = default)
 	)
 	if hr != 0 {
 		return fmt.Errorf("IAudioClient::Initialize failed: 0x%08x", hr)
 	}
 	// Get capture client
 	// IAudioClient::GetService is vtable index 8
 	hr = comCall(s.client, 8,
 		uintptr(unsafe.Pointer(&iidIAudioCaptureClient)),
 		uintptr(unsafe.Pointer(&s.capture)),
 	)
 	if hr != 0 {
 		return fmt.Errorf("IAudioClient::GetService failed: 0x%08x", hr)
 	}
 	// Start capture
 	// IAudioClient::Start is vtable index 6
 	hr = comCall(s.client, 6)
 	if hr != 0 {
 		return fmt.Errorf("IAudioClient::Start failed: 0x%08x", hr)
 	}
 	s.running = true
 	s.done = make(chan struct{})
 	go s.captureLoop()
 	return nil
 }
 func (s *wasapiStream) captureLoop() {
 	defer close(s.done)
 	ticker := time.NewTicker(20 * time.Millisecond)
 	defer ticker.Stop()
 	for range ticker.C {
 		s.mu.Lock()
 		if !s.running {
 			s.mu.Unlock()
 			return
 		}
 		// Read available packets
 		for {
 			var data uintptr
 			var numFrames uint32
 			var flags uint32
 			// IAudioCaptureClient::GetBuffer is vtable index 3
 			hr := comCall(s.capture, 3,
 				uintptr(unsafe.Pointer(&data)),
 				uintptr(unsafe.Pointer(&numFrames)),
 				uintptr(unsafe.Pointer(&flags)),
 				0, // device position (not needed)
 				0, // QPC position (not needed)
 			)
 			if hr != 0 || numFrames == 0 {
 				break
 			}
 			// Convert int16 samples to float32
 			samples := make([]float32, numFrames*uint32(s.channels))
 			raw := (*[1 << 28]int16)(unsafe.Pointer(data))[:len(samples):len(samples)]
 			for i, v := range raw {
 				samples[i] = float32(v) / float32(math.MaxInt16)
 			}
 			s.callback(samples)
 			// IAudioCaptureClient::ReleaseBuffer is vtable index 4
 			comCall(s.capture, 4, uintptr(numFrames))
 		}
 		s.mu.Unlock()
 	}
 }
 func (s *wasapiStream) Stop() error {
 	s.mu.Lock()
 	s.running = false
 	s.mu.Unlock()
 	if s.done != nil {
 		<-s.done
 	}
 	// IAudioClient::Stop is vtable index 7
 	if s.client != 0 {
 		comCall(s.client, 7)
 	}
 	// Release COM interfaces (IUnknown::Release is vtable index 2)
 	if s.capture != 0 {
 		comCall(s.capture, 2)
 	}
 	if s.client != 0 {
 		comCall(s.client, 2)
 	}
 	if s.device != 0 {
 		comCall(s.device, 2)
 	}
 	if s.enumerator != 0 {
 		comCall(s.enumerator, 2)
 	}
 	return nil
 }
 // comCall invokes a COM method by vtable index.
 func comCall(obj uintptr, method uintptr, args ...uintptr) uintptr {
 	vtable := *(*uintptr)(unsafe.Pointer(obj))
 	fn := *(*uintptr)(unsafe.Pointer(vtable + method*unsafe.Sizeof(uintptr(0))))
 	// Build syscall args: first arg is always 'this' pointer
 	callArgs := make([]uintptr, 1+len(args))
 	callArgs[0] = obj
 	copy(callArgs[1:], args)
 	var hr uintptr
 	switch len(callArgs) {
 	case 1:
 		hr, _, _ = syscall.SyscallN(fn, callArgs[0])
 	case 2:
 		hr, _, _ = syscall.SyscallN(fn, callArgs[0], callArgs[1])
 	case 3:
 		hr, _, _ = syscall.SyscallN(fn, callArgs[0], callArgs[1], callArgs[2])
 	case 4:
 		hr, _, _ = syscall.SyscallN(fn, callArgs[0], callArgs[1], callArgs[2], callArgs[3])
 	case 5:
 		hr, _, _ = syscall.SyscallN(fn, callArgs[0], callArgs[1], callArgs[2], callArgs[3], callArgs[4])
 	case 6:
 		hr, _, _ = syscall.SyscallN(fn, callArgs[0], callArgs[1], callArgs[2], callArgs[3], callArgs[4], callArgs[5])
 	case 7:
 		hr, _, _ = syscall.SyscallN(fn, callArgs[0], callArgs[1], callArgs[2], callArgs[3], callArgs[4], callArgs[5], callArgs[6])
 	default:
 		hr, _, _ = syscall.SyscallN(fn, callArgs...)
 	}
 	return hr
 }
--- a/cmd/bench/bench.go
+++ b/cmd/bench/bench.go
@@ -32,6 +32,7 @@ type flagOptions struct {
 	verbose      *bool
 	warmup       *int
 	promptTokens *int
 	numCtx       *int
 }
 type Metrics struct {
@@ -48,6 +49,7 @@ type ModelInfo struct {
 	Family            string
 	SizeBytes         int64
 	VRAMBytes         int64
 	NumCtx            int64
 }
 const DefaultPrompt = `Please write a descriptive story about a llama named Alonso who grows up to be President of the Land of Llamas. Include details about Alonso's childhood, adolescent years, and how he grew up to be a political mover and shaker. Write the story with a sense of whimsy.`
@@ -64,9 +66,12 @@ var promptWordList = []string{
 	"old", "stone", "bridge", "that", "crosses", "winding", "river",
 }
 // tokensPerWord is the calibrated ratio of tokens to words for the current model.
 // Initialized with a heuristic, then updated during warmup based on actual tokenization.
 var tokensPerWord = 1.3
 func generatePromptForTokenCount(targetTokens int, epoch int) string {
-	// ~1.3 tokens per word heuristic
+	targetWords := int(float64(targetTokens) / tokensPerWord)
 	targetWords := int(float64(targetTokens) / 1.3)
 	if targetWords < 1 {
 		targetWords = 1
 	}
@@ -81,6 +86,17 @@ func generatePromptForTokenCount(targetTokens int, epoch int) string {
 	return strings.Join(words, " ")
 }
 // calibratePromptTokens adjusts tokensPerWord based on actual tokenization from a warmup run.
 func calibratePromptTokens(targetTokens, actualTokens, wordCount int) {
 	if actualTokens <= 0 || wordCount <= 0 {
 		return
 	}
 	tokensPerWord = float64(actualTokens) / float64(wordCount)
 	newWords := int(float64(targetTokens) / tokensPerWord)
 	fmt.Fprintf(os.Stderr, "bench: calibrated %.2f tokens/word (target=%d, got=%d, words=%d → %d)\n",
 		tokensPerWord, targetTokens, actualTokens, wordCount, newWords)
 }
 func buildGenerateRequest(model string, fOpt flagOptions, imgData api.ImageData, epoch int) *api.GenerateRequest {
 	options := make(map[string]interface{})
 	if *fOpt.maxTokens > 0 {
@@ -90,6 +106,9 @@ func buildGenerateRequest(model string, fOpt flagOptions, imgData api.ImageData,
 	if fOpt.seed != nil && *fOpt.seed > 0 {
 		options["seed"] = *fOpt.seed
 	}
 	if fOpt.numCtx != nil && *fOpt.numCtx > 0 {
 		options["num_ctx"] = *fOpt.numCtx
 	}
 	var keepAliveDuration *api.Duration
 	if *fOpt.keepAlive > 0 {
@@ -146,7 +165,6 @@ func fetchMemoryUsage(ctx context.Context, client *api.Client, model string) (si
 			return m.Size, m.SizeVRAM
 		}
 	}
 	// Try prefix match (model names may include :latest or tags)
 	for _, m := range resp.Models {
 		if strings.HasPrefix(m.Name, model) || strings.HasPrefix(m.Model, model) {
 			return m.Size, m.SizeVRAM
@@ -155,6 +173,19 @@ func fetchMemoryUsage(ctx context.Context, client *api.Client, model string) (si
 	return 0, 0
 }
 func fetchContextLength(ctx context.Context, client *api.Client, model string) int64 {
 	resp, err := client.ListRunning(ctx)
 	if err != nil {
 		return 0
 	}
 	for _, m := range resp.Models {
 		if m.Name == model || m.Model == model || strings.HasPrefix(m.Name, model) || strings.HasPrefix(m.Model, model) {
 			return int64(m.ContextLength)
 		}
 	}
 	return 0
 }
 func outputFormatHeader(w io.Writer, format string, verbose bool) {
 	switch format {
 	case "benchstat":
@@ -177,8 +208,12 @@ func outputModelInfo(w io.Writer, format string, info ModelInfo) {
 	if info.SizeBytes > 0 {
 		memStr = fmt.Sprintf(" | Size: %d | VRAM: %d", info.SizeBytes, info.VRAMBytes)
 	}
-	fmt.Fprintf(w, "# Model: %s | Params: %s | Quant: %s | Family: %s%s\n",
+	ctxStr := ""
-		info.Name, params, quant, family, memStr)
+	if info.NumCtx > 0 {
 		ctxStr = fmt.Sprintf(" | NumCtx: %d", info.NumCtx)
 	}
 	fmt.Fprintf(w, "# Model: %s | Params: %s | Quant: %s | Family: %s%s%s\n",
 		info.Name, params, quant, family, memStr, ctxStr)
 }
 func OutputMetrics(w io.Writer, format string, metrics []Metrics, verbose bool) {
@@ -276,21 +311,38 @@ func BenchmarkModel(fOpt flagOptions) error {
 			req := buildGenerateRequest(model, fOpt, imgData, -(i + 1))
 			ctx, cancel := context.WithTimeout(context.Background(), time.Duration(*fOpt.timeout)*time.Second)
 			var warmupMetrics *api.Metrics
 			err = client.Generate(ctx, req, func(resp api.GenerateResponse) error {
 				if resp.Done {
 					warmupMetrics = &resp.Metrics
 				}
 				return nil
 			})
 			cancel()
 			if err != nil {
 				fmt.Fprintf(os.Stderr, "WARNING: Warmup %d/%d for %s failed: %v\n", i+1, *fOpt.warmup, model, err)
-			} else if *fOpt.debug {
+			} else {
-				fmt.Fprintf(os.Stderr, "Warmup %d/%d for %s complete\n", i+1, *fOpt.warmup, model)
+				if *fOpt.debug {
 					fmt.Fprintf(os.Stderr, "Warmup %d/%d for %s complete\n", i+1, *fOpt.warmup, model)
 				}
 				// Calibrate prompt token count on last warmup run
 				if i == *fOpt.warmup-1 && *fOpt.promptTokens > 0 && warmupMetrics != nil {
 					prompt := generatePromptForTokenCount(*fOpt.promptTokens, -(i + 1))
 					wordCount := len(strings.Fields(prompt))
 					calibratePromptTokens(*fOpt.promptTokens, warmupMetrics.PromptEvalCount, wordCount)
 				}
 			}
 		}
-		// Fetch memory usage once after warmup (model is loaded and stable)
+		// Fetch memory/context info once after warmup (model is loaded and stable)
 		memCtx, memCancel := context.WithTimeout(context.Background(), 5*time.Second)
 		info.SizeBytes, info.VRAMBytes = fetchMemoryUsage(memCtx, client, model)
 		if fOpt.numCtx != nil && *fOpt.numCtx > 0 {
 			info.NumCtx = int64(*fOpt.numCtx)
 		} else {
 			info.NumCtx = fetchContextLength(memCtx, client, model)
 		}
 		memCancel()
 		outputModelInfo(out, *fOpt.format, info)
@@ -479,6 +531,7 @@ func main() {
 		debug:        flag.Bool("debug", false, "Show debug information"),
 		warmup:       flag.Int("warmup", 1, "Number of warmup requests before timing"),
 		promptTokens: flag.Int("prompt-tokens", 0, "Generate prompt targeting ~N tokens (0 = use -p prompt)"),
 		numCtx:       flag.Int("num-ctx", 0, "Context size (0 = server default)"),
 	}
 	flag.Usage = func() {
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -695,7 +695,8 @@ func RunHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}
-	opts.MultiModal = slices.Contains(info.Capabilities, model.CapabilityVision)
+	audioCapable := slices.Contains(info.Capabilities, model.CapabilityAudio)
 	opts.MultiModal = slices.Contains(info.Capabilities, model.CapabilityVision) || audioCapable
 	// TODO: remove the projector info and vision info checks below,
 	// these are left in for backwards compatibility with older servers
@@ -1494,6 +1495,9 @@ type displayResponseState struct {
 func displayResponse(content string, wordWrap bool, state *displayResponseState) {
 	termWidth, _, _ := term.GetSize(int(os.Stdout.Fd()))
 	if termWidth == 0 {
 		termWidth = 80
 	}
 	if wordWrap && termWidth >= 10 {
 		for _, ch := range content {
 			if state.lineLength+1 > termWidth-5 {
@@ -2065,6 +2069,10 @@ func runLauncherAction(cmd *cobra.Command, action tui.TUIAction, deps launcherDe
 		if err != nil {
 			return true, fmt.Errorf("launching %s: %w", action.Integration, err)
 		}
 		// VS Code is a GUI app — exit the TUI loop after launching
 		if action.Integration == "vscode" {
 			return false, nil
 		}
 		return true, nil
 	default:
 		return false, fmt.Errorf("unknown launcher action: %d", action.Kind)
--- a/cmd/cmd_launcher_test.go
+++ b/cmd/cmd_launcher_test.go
@@ -209,6 +209,43 @@ func TestRunLauncherAction_RunModelContinuesAfterCancellation(t *testing.T) {
 	}
 }
 func TestRunLauncherAction_VSCodeExitsTUILoop(t *testing.T) {
 	setCmdTestHome(t, t.TempDir())
 	cmd := &cobra.Command{}
 	cmd.SetContext(context.Background())
 	// VS Code should exit the TUI loop (return false) after a successful launch.
 	continueLoop, err := runLauncherAction(cmd, tui.TUIAction{Kind: tui.TUIActionLaunchIntegration, Integration: "vscode"}, launcherDeps{
 		resolveRunModel: unexpectedRunModelResolution(t),
 		launchIntegration: func(ctx context.Context, req launch.IntegrationLaunchRequest) error {
 			return nil
 		},
 		runModel: unexpectedModelLaunch(t),
 	})
 	if err != nil {
 		t.Fatalf("expected nil error, got %v", err)
 	}
 	if continueLoop {
 		t.Fatal("expected vscode launch to exit the TUI loop (return false)")
 	}
 	// Other integrations should continue the TUI loop (return true).
 	continueLoop, err = runLauncherAction(cmd, tui.TUIAction{Kind: tui.TUIActionLaunchIntegration, Integration: "claude"}, launcherDeps{
 		resolveRunModel: unexpectedRunModelResolution(t),
 		launchIntegration: func(ctx context.Context, req launch.IntegrationLaunchRequest) error {
 			return nil
 		},
 		runModel: unexpectedModelLaunch(t),
 	})
 	if err != nil {
 		t.Fatalf("expected nil error, got %v", err)
 	}
 	if !continueLoop {
 		t.Fatal("expected non-vscode integration to continue the TUI loop (return true)")
 	}
 }
 func TestRunLauncherAction_IntegrationContinuesAfterCancellation(t *testing.T) {
 	setCmdTestHome(t, t.TempDir())
--- a/cmd/cmd_test.go
+++ b/cmd/cmd_test.go
@@ -301,7 +301,7 @@ Weigh anchor!
 				ParameterSize:     "7B",
 				QuantizationLevel: "FP16",
 			},
-			Requires: "0.14.0",
+			Requires: "0.19.0",
 		}, false, &b); err != nil {
 			t.Fatal(err)
 		}
@@ -310,10 +310,17 @@ Weigh anchor!
    architecture    test      
    parameters      7B        
    quantization    FP16      
-    requires        0.14.0    
+    requires        0.19.0
 `
-		if diff := cmp.Diff(expect, b.String()); diff != "" {
+		trimLinePadding := func(s string) string {
 			lines := strings.Split(s, "\n")
 			for i, line := range lines {
 				lines[i] = strings.TrimRight(line, " \t\r")
 			}
 			return strings.Join(lines, "\n")
 		}
 		if diff := cmp.Diff(trimLinePadding(expect), trimLinePadding(b.String())); diff != "" {
 			t.Errorf("unexpected output (-want +got):\n%s", diff)
 		}
 	})
@@ -1912,7 +1919,7 @@ func TestShowInfoImageGen(t *testing.T) {
 			QuantizationLevel: "Q8",
 		},
 		Capabilities: []model.Capability{model.CapabilityImage},
-		Requires:     "0.14.0",
+		Requires:     "0.19.0",
 	}, false, &b)
 	if err != nil {
 		t.Fatal(err)
@@ -1922,7 +1929,7 @@ func TestShowInfoImageGen(t *testing.T) {
 		"    architecture    ZImagePipeline    \n" +
 		"    parameters      10.3B             \n" +
 		"    quantization    Q8                \n" +
-		"    requires        0.14.0            \n" +
+		"    requires        0.19.0            \n" +
 		"\n" +
 		"  Capabilities\n" +
 		"    image    \n" +
--- a/cmd/interactive.go
+++ b/cmd/interactive.go
@@ -47,7 +47,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
 		fmt.Fprintln(os.Stderr, "Use \"\"\" to begin a multi-line message.")
 		if opts.MultiModal {
-			fmt.Fprintf(os.Stderr, "Use %s to include .jpg, .png, or .webp images.\n", filepath.FromSlash("/path/to/file"))
+			fmt.Fprintf(os.Stderr, "Use %s to include .jpg, .png, .webp images, or .wav audio files.\n", filepath.FromSlash("/path/to/file"))
 		}
 		fmt.Fprintln(os.Stderr, "")
@@ -592,7 +592,7 @@ func extractFileNames(input string) []string {
 	// Regex to match file paths starting with optional drive letter, / ./ \ or .\ and include escaped or unescaped spaces (\ or %20)
 	// and followed by more characters and a file extension
 	// This will capture non filename strings, but we'll check for file existence to remove mismatches
-	regexPattern := `(?:[a-zA-Z]:)?(?:\./|/|\\)[\S\\ ]+?\.(?i:jpg|jpeg|png|webp)\b`
+	regexPattern := `(?:[a-zA-Z]:)?(?:\./|/|\\)[\S\\ ]+?\.(?i:jpg|jpeg|png|webp|wav|mp4|webm|mov|avi|mkv|m4v)\b`
 	re := regexp.MustCompile(regexPattern)
 	return re.FindAllString(input, -1)
@@ -608,10 +608,16 @@ func extractFileData(input string) (string, []api.ImageData, error) {
 		if errors.Is(err, os.ErrNotExist) {
 			continue
 		} else if err != nil {
-			fmt.Fprintf(os.Stderr, "Couldn't process image: %q\n", err)
+			fmt.Fprintf(os.Stderr, "Couldn't process file: %q\n", err)
 			return "", imgs, err
 		}
-		fmt.Fprintf(os.Stderr, "Added image '%s'\n", nfp)
+		ext := strings.ToLower(filepath.Ext(nfp))
 		switch ext {
 		case ".wav":
 			fmt.Fprintf(os.Stderr, "Added audio '%s'\n", nfp)
 		default:
 			fmt.Fprintf(os.Stderr, "Added image '%s'\n", nfp)
 		}
 		input = strings.ReplaceAll(input, "'"+nfp+"'", "")
 		input = strings.ReplaceAll(input, "'"+fp+"'", "")
 		input = strings.ReplaceAll(input, fp, "")
@@ -685,9 +691,9 @@ func getImageData(filePath string) ([]byte, error) {
 	}
 	contentType := http.DetectContentType(buf)
-	allowedTypes := []string{"image/jpeg", "image/jpg", "image/png", "image/webp"}
+	allowedTypes := []string{"image/jpeg", "image/jpg", "image/png", "image/webp", "audio/wave"}
 	if !slices.Contains(allowedTypes, contentType) {
-		return nil, fmt.Errorf("invalid image type: %s", contentType)
+		return nil, fmt.Errorf("invalid file type: %s", contentType)
 	}
 	info, err := file.Stat()
@@ -695,8 +701,7 @@ func getImageData(filePath string) ([]byte, error) {
 		return nil, err
 	}
-	// Check if the file size exceeds 100MB
+	var maxSize int64 = 100 * 1024 * 1024 // 100MB
 	var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
 	if info.Size() > maxSize {
 		return nil, errors.New("file size exceeds maximum limit (100MB)")
 	}
--- a/cmd/interactive_test.go
+++ b/cmd/interactive_test.go
@@ -84,3 +84,33 @@ func TestExtractFileDataRemovesQuotedFilepath(t *testing.T) {
 	assert.Len(t, imgs, 1)
 	assert.Equal(t, cleaned, "before  after")
 }
 func TestExtractFileDataWAV(t *testing.T) {
 	dir := t.TempDir()
 	fp := filepath.Join(dir, "sample.wav")
 	data := make([]byte, 600)
 	copy(data[:44], []byte{
 		'R', 'I', 'F', 'F',
 		0x58, 0x02, 0x00, 0x00, // file size - 8
 		'W', 'A', 'V', 'E',
 		'f', 'm', 't', ' ',
 		0x10, 0x00, 0x00, 0x00, // fmt chunk size
 		0x01, 0x00, // PCM
 		0x01, 0x00, // mono
 		0x80, 0x3e, 0x00, 0x00, // 16000 Hz
 		0x00, 0x7d, 0x00, 0x00, // byte rate
 		0x02, 0x00, // block align
 		0x10, 0x00, // 16-bit
 		'd', 'a', 't', 'a',
 		0x34, 0x02, 0x00, 0x00, // data size
 	})
 	if err := os.WriteFile(fp, data, 0o600); err != nil {
 		t.Fatalf("failed to write test audio: %v", err)
 	}
 	input := "before " + fp + " after"
 	cleaned, imgs, err := extractFileData(input)
 	assert.NoError(t, err)
 	assert.Len(t, imgs, 1)
 	assert.Equal(t, "before  after", cleaned)
 }
--- a/cmd/launch/integrations_test.go
+++ b/cmd/launch/integrations_test.go
@@ -1551,6 +1551,31 @@ func TestIntegration_Editor(t *testing.T) {
 	}
 }
 func TestIntegration_AutoInstallable(t *testing.T) {
 	tests := []struct {
 		name string
 		want bool
 	}{
 		{"openclaw", true},
 		{"pi", true},
 		{"claude", false},
 		{"codex", false},
 		{"opencode", false},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got := false
 			integration, err := integrationFor(tt.name)
 			if err == nil {
 				got = integration.autoInstallable
 			}
 			if got != tt.want {
 				t.Errorf("integrationFor(%q).autoInstallable = %v, want %v", tt.name, got, tt.want)
 			}
 		})
 	}
 }
 func TestIntegrationModels(t *testing.T) {
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
--- a/cmd/launch/launch.go
+++ b/cmd/launch/launch.go
@@ -179,6 +179,7 @@ Supported integrations:
  opencode  OpenCode
  openclaw  OpenClaw (aliases: clawdbot, moltbot)
  pi        Pi
  vscode    VS Code (aliases: code)
 Examples:
  ollama launch
@@ -489,8 +490,10 @@ func (c *launcherClient) launchEditorIntegration(ctx context.Context, name strin
 			return err
 		}
 		models = selected
-	} else if err := c.ensureModelsReady(ctx, models); err != nil {
+	} else if len(models) > 0 {
-		return err
+		if err := c.ensureModelsReady(ctx, models[:1]); err != nil {
 			return err
 		}
 	}
 	if len(models) == 0 {
@@ -551,10 +554,14 @@ func (c *launcherClient) selectMultiModelsForIntegration(ctx context.Context, ru
 	if err != nil {
 		return nil, err
 	}
-	if err := c.ensureModelsReady(ctx, selected); err != nil {
+	accepted, skipped, err := c.selectReadyModelsForSave(ctx, selected)
 	if err != nil {
 		return nil, err
 	}
-	return selected, nil
+	for _, skip := range skipped {
 		fmt.Fprintf(os.Stderr, "Skipped %s: %s\n", skip.model, skip.reason)
 	}
 	return accepted, nil
 }
 func (c *launcherClient) loadSelectableModels(ctx context.Context, preChecked []string, current, emptyMessage string) ([]ModelItem, []string, error) {
@@ -575,16 +582,7 @@ func (c *launcherClient) loadSelectableModels(ctx context.Context, preChecked []
 }
 func (c *launcherClient) ensureModelsReady(ctx context.Context, models []string) error {
-	var deduped []string
+	models = dedupeModelList(models)
 	seen := make(map[string]bool, len(models))
 	for _, model := range models {
 		if model == "" || seen[model] {
 			continue
 		}
 		seen[model] = true
 		deduped = append(deduped, model)
 	}
 	models = deduped
 	if len(models) == 0 {
 		return nil
 	}
@@ -602,6 +600,56 @@ func (c *launcherClient) ensureModelsReady(ctx context.Context, models []string)
 	return ensureAuth(ctx, c.apiClient, cloudModels, models)
 }
 func dedupeModelList(models []string) []string {
 	deduped := make([]string, 0, len(models))
 	seen := make(map[string]bool, len(models))
 	for _, model := range models {
 		if model == "" || seen[model] {
 			continue
 		}
 		seen[model] = true
 		deduped = append(deduped, model)
 	}
 	return deduped
 }
 type skippedModel struct {
 	model  string
 	reason string
 }
 func (c *launcherClient) selectReadyModelsForSave(ctx context.Context, selected []string) ([]string, []skippedModel, error) {
 	selected = dedupeModelList(selected)
 	accepted := make([]string, 0, len(selected))
 	skipped := make([]skippedModel, 0, len(selected))
 	for _, model := range selected {
 		if err := c.ensureModelsReady(ctx, []string{model}); err != nil {
 			if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
 				return nil, nil, err
 			}
 			skipped = append(skipped, skippedModel{
 				model:  model,
 				reason: skippedModelReason(model, err),
 			})
 			continue
 		}
 		accepted = append(accepted, model)
 	}
 	return accepted, skipped, nil
 }
 func skippedModelReason(model string, err error) string {
 	if errors.Is(err, ErrCancelled) {
 		if isCloudModelName(model) {
 			return "sign in was cancelled"
 		}
 		return "download was cancelled"
 	}
 	return err.Error()
 }
 func (c *launcherClient) resolveEditorLaunchModels(ctx context.Context, saved *config.IntegrationConfig, req IntegrationLaunchRequest) ([]string, bool) {
 	if req.ForceConfigure {
 		return editorPreCheckedModels(saved, req.ModelOverride), true
@@ -801,13 +849,6 @@ func cloneAliases(aliases map[string]string) map[string]string {
 	return cloned
 }
 func singleModelPrechecked(current string) []string {
 	if current == "" {
 		return nil
 	}
 	return []string{current}
 }
 func firstModel(models []string) string {
 	if len(models) == 0 {
 		return ""
--- a/cmd/launch/launch_test.go
+++ b/cmd/launch/launch_test.go
@@ -832,6 +832,403 @@ func TestLaunchIntegration_EditorCloudDisabledFallsBackToSelector(t *testing.T)
 	}
 }
 func TestLaunchIntegration_EditorConfigureMultiSkipsMissingLocalAndPersistsAccepted(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
 	withLauncherHooks(t)
 	binDir := t.TempDir()
 	writeFakeBinary(t, binDir, "droid")
 	t.Setenv("PATH", binDir)
 	editor := &launcherEditorRunner{}
 	withIntegrationOverride(t, "droid", editor)
 	DefaultMultiSelector = func(title string, items []ModelItem, preChecked []string) ([]string, error) {
 		return []string{"glm-5:cloud", "missing-local"}, nil
 	}
 	DefaultConfirmPrompt = func(prompt string) (bool, error) {
 		if prompt == "Proceed?" {
 			return true, nil
 		}
 		if prompt == "Download missing-local?" {
 			return false, nil
 		}
 		t.Fatalf("unexpected prompt: %q", prompt)
 		return false, nil
 	}
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch r.URL.Path {
 		case "/api/tags":
 			fmt.Fprint(w, `{"models":[{"name":"glm-5:cloud","remote_model":"glm-5"}]}`)
 		case "/api/status":
 			w.WriteHeader(http.StatusNotFound)
 			fmt.Fprint(w, `{"error":"not found"}`)
 		case "/api/show":
 			var req apiShowRequest
 			_ = json.NewDecoder(r.Body).Decode(&req)
 			switch req.Model {
 			case "glm-5:cloud":
 				fmt.Fprint(w, `{"remote_model":"glm-5"}`)
 			case "missing-local":
 				w.WriteHeader(http.StatusNotFound)
 				fmt.Fprint(w, `{"error":"model not found"}`)
 			default:
 				http.NotFound(w, r)
 			}
 		case "/api/me":
 			fmt.Fprint(w, `{"name":"test-user"}`)
 		default:
 			http.NotFound(w, r)
 		}
 	}))
 	defer srv.Close()
 	t.Setenv("OLLAMA_HOST", srv.URL)
 	var launchErr error
 	stderr := captureStderr(t, func() {
 		launchErr = LaunchIntegration(context.Background(), IntegrationLaunchRequest{
 			Name:           "droid",
 			ForceConfigure: true,
 		})
 	})
 	if launchErr != nil {
 		t.Fatalf("LaunchIntegration returned error: %v", launchErr)
 	}
 	if editor.ranModel != "glm-5:cloud" {
 		t.Fatalf("expected launch to use cloud primary, got %q", editor.ranModel)
 	}
 	saved, err := config.LoadIntegration("droid")
 	if err != nil {
 		t.Fatalf("failed to reload saved config: %v", err)
 	}
 	if diff := compareStrings(saved.Models, []string{"glm-5:cloud"}); diff != "" {
 		t.Fatalf("unexpected saved models (-want +got):\n%s", diff)
 	}
 	if diff := compareStringSlices(editor.edited, [][]string{{"glm-5:cloud"}}); diff != "" {
 		t.Fatalf("unexpected edited models (-want +got):\n%s", diff)
 	}
 	if !strings.Contains(stderr, "Skipped missing-local:") {
 		t.Fatalf("expected skip reason in stderr, got %q", stderr)
 	}
 }
 func TestLaunchIntegration_EditorConfigureMultiSkipsUnauthedCloudAndPersistsAccepted(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
 	withLauncherHooks(t)
 	binDir := t.TempDir()
 	writeFakeBinary(t, binDir, "droid")
 	t.Setenv("PATH", binDir)
 	editor := &launcherEditorRunner{}
 	withIntegrationOverride(t, "droid", editor)
 	DefaultMultiSelector = func(title string, items []ModelItem, preChecked []string) ([]string, error) {
 		return []string{"llama3.2", "glm-5:cloud"}, nil
 	}
 	DefaultConfirmPrompt = func(prompt string) (bool, error) {
 		if prompt == "Proceed?" {
 			return true, nil
 		}
 		t.Fatalf("unexpected prompt: %q", prompt)
 		return false, nil
 	}
 	DefaultSignIn = func(modelName, signInURL string) (string, error) {
 		return "", ErrCancelled
 	}
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch r.URL.Path {
 		case "/api/tags":
 			fmt.Fprint(w, `{"models":[{"name":"llama3.2"},{"name":"glm-5:cloud","remote_model":"glm-5"}]}`)
 		case "/api/status":
 			w.WriteHeader(http.StatusNotFound)
 			fmt.Fprint(w, `{"error":"not found"}`)
 		case "/api/show":
 			var req apiShowRequest
 			_ = json.NewDecoder(r.Body).Decode(&req)
 			switch req.Model {
 			case "llama3.2":
 				fmt.Fprint(w, `{"model":"llama3.2"}`)
 			case "glm-5:cloud":
 				fmt.Fprint(w, `{"remote_model":"glm-5"}`)
 			default:
 				http.NotFound(w, r)
 			}
 		case "/api/me":
 			w.WriteHeader(http.StatusUnauthorized)
 			fmt.Fprint(w, `{"error":"unauthorized","signin_url":"https://example.com/signin"}`)
 		default:
 			http.NotFound(w, r)
 		}
 	}))
 	defer srv.Close()
 	t.Setenv("OLLAMA_HOST", srv.URL)
 	var launchErr error
 	stderr := captureStderr(t, func() {
 		launchErr = LaunchIntegration(context.Background(), IntegrationLaunchRequest{
 			Name:           "droid",
 			ForceConfigure: true,
 		})
 	})
 	if launchErr != nil {
 		t.Fatalf("LaunchIntegration returned error: %v", launchErr)
 	}
 	if editor.ranModel != "llama3.2" {
 		t.Fatalf("expected launch to use local primary, got %q", editor.ranModel)
 	}
 	saved, err := config.LoadIntegration("droid")
 	if err != nil {
 		t.Fatalf("failed to reload saved config: %v", err)
 	}
 	if diff := compareStrings(saved.Models, []string{"llama3.2"}); diff != "" {
 		t.Fatalf("unexpected saved models (-want +got):\n%s", diff)
 	}
 	if diff := compareStringSlices(editor.edited, [][]string{{"llama3.2"}}); diff != "" {
 		t.Fatalf("unexpected edited models (-want +got):\n%s", diff)
 	}
 	if !strings.Contains(stderr, "Skipped glm-5:cloud: sign in was cancelled") {
 		t.Fatalf("expected skip reason in stderr, got %q", stderr)
 	}
 }
 func TestLaunchIntegration_EditorConfigureMultiRemovesReselectedFailingModel(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
 	withLauncherHooks(t)
 	binDir := t.TempDir()
 	writeFakeBinary(t, binDir, "droid")
 	t.Setenv("PATH", binDir)
 	editor := &launcherEditorRunner{}
 	withIntegrationOverride(t, "droid", editor)
 	if err := config.SaveIntegration("droid", []string{"glm-5:cloud", "llama3.2"}); err != nil {
 		t.Fatalf("failed to seed config: %v", err)
 	}
 	DefaultMultiSelector = func(title string, items []ModelItem, preChecked []string) ([]string, error) {
 		return append([]string(nil), preChecked...), nil
 	}
 	DefaultConfirmPrompt = func(prompt string) (bool, error) {
 		if prompt == "Proceed?" {
 			return true, nil
 		}
 		t.Fatalf("unexpected prompt: %q", prompt)
 		return false, nil
 	}
 	DefaultSignIn = func(modelName, signInURL string) (string, error) {
 		return "", ErrCancelled
 	}
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch r.URL.Path {
 		case "/api/tags":
 			fmt.Fprint(w, `{"models":[{"name":"glm-5:cloud","remote_model":"glm-5"},{"name":"llama3.2"}]}`)
 		case "/api/status":
 			w.WriteHeader(http.StatusNotFound)
 			fmt.Fprint(w, `{"error":"not found"}`)
 		case "/api/show":
 			var req apiShowRequest
 			_ = json.NewDecoder(r.Body).Decode(&req)
 			if req.Model == "glm-5:cloud" {
 				fmt.Fprint(w, `{"remote_model":"glm-5"}`)
 				return
 			}
 			if req.Model == "llama3.2" {
 				fmt.Fprint(w, `{"model":"llama3.2"}`)
 				return
 			}
 			http.NotFound(w, r)
 		case "/api/me":
 			w.WriteHeader(http.StatusUnauthorized)
 			fmt.Fprint(w, `{"error":"unauthorized","signin_url":"https://example.com/signin"}`)
 		default:
 			http.NotFound(w, r)
 		}
 	}))
 	defer srv.Close()
 	t.Setenv("OLLAMA_HOST", srv.URL)
 	var launchErr error
 	stderr := captureStderr(t, func() {
 		launchErr = LaunchIntegration(context.Background(), IntegrationLaunchRequest{
 			Name:           "droid",
 			ForceConfigure: true,
 		})
 	})
 	if launchErr != nil {
 		t.Fatalf("LaunchIntegration returned error: %v", launchErr)
 	}
 	if editor.ranModel != "llama3.2" {
 		t.Fatalf("expected launch to use surviving model, got %q", editor.ranModel)
 	}
 	if diff := compareStringSlices(editor.edited, [][]string{{"llama3.2"}}); diff != "" {
 		t.Fatalf("unexpected edited models (-want +got):\n%s", diff)
 	}
 	saved, loadErr := config.LoadIntegration("droid")
 	if loadErr != nil {
 		t.Fatalf("failed to reload saved config: %v", loadErr)
 	}
 	if diff := compareStrings(saved.Models, []string{"llama3.2"}); diff != "" {
 		t.Fatalf("unexpected saved models (-want +got):\n%s", diff)
 	}
 	if !strings.Contains(stderr, "Skipped glm-5:cloud: sign in was cancelled") {
 		t.Fatalf("expected skip reason in stderr, got %q", stderr)
 	}
 }
 func TestLaunchIntegration_EditorConfigureMultiAllFailuresKeepsExistingAndSkipsLaunch(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
 	withLauncherHooks(t)
 	binDir := t.TempDir()
 	writeFakeBinary(t, binDir, "droid")
 	t.Setenv("PATH", binDir)
 	editor := &launcherEditorRunner{}
 	withIntegrationOverride(t, "droid", editor)
 	if err := config.SaveIntegration("droid", []string{"llama3.2"}); err != nil {
 		t.Fatalf("failed to seed config: %v", err)
 	}
 	DefaultMultiSelector = func(title string, items []ModelItem, preChecked []string) ([]string, error) {
 		return []string{"missing-local-a", "missing-local-b"}, nil
 	}
 	DefaultConfirmPrompt = func(prompt string) (bool, error) {
 		if prompt == "Download missing-local-a?" || prompt == "Download missing-local-b?" {
 			return false, nil
 		}
 		if prompt == "Proceed?" {
 			t.Fatal("did not expect proceed prompt when no models are accepted")
 		}
 		t.Fatalf("unexpected prompt: %q", prompt)
 		return false, nil
 	}
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch r.URL.Path {
 		case "/api/tags":
 			fmt.Fprint(w, `{"models":[]}`)
 		case "/api/show":
 			var req apiShowRequest
 			_ = json.NewDecoder(r.Body).Decode(&req)
 			switch req.Model {
 			case "missing-local-a", "missing-local-b":
 				w.WriteHeader(http.StatusNotFound)
 				fmt.Fprint(w, `{"error":"model not found"}`)
 			default:
 				http.NotFound(w, r)
 			}
 		default:
 			http.NotFound(w, r)
 		}
 	}))
 	defer srv.Close()
 	t.Setenv("OLLAMA_HOST", srv.URL)
 	var launchErr error
 	stderr := captureStderr(t, func() {
 		launchErr = LaunchIntegration(context.Background(), IntegrationLaunchRequest{
 			Name:           "droid",
 			ForceConfigure: true,
 		})
 	})
 	if launchErr != nil {
 		t.Fatalf("LaunchIntegration returned error: %v", launchErr)
 	}
 	if editor.ranModel != "" {
 		t.Fatalf("expected no launch when all selected models are skipped, got %q", editor.ranModel)
 	}
 	if len(editor.edited) != 0 {
 		t.Fatalf("expected no editor writes when all selections fail, got %v", editor.edited)
 	}
 	saved, err := config.LoadIntegration("droid")
 	if err != nil {
 		t.Fatalf("failed to reload saved config: %v", err)
 	}
 	if diff := compareStrings(saved.Models, []string{"llama3.2"}); diff != "" {
 		t.Fatalf("unexpected saved models (-want +got):\n%s", diff)
 	}
 	if !strings.Contains(stderr, "Skipped missing-local-a:") {
 		t.Fatalf("expected first skip reason in stderr, got %q", stderr)
 	}
 	if !strings.Contains(stderr, "Skipped missing-local-b:") {
 		t.Fatalf("expected second skip reason in stderr, got %q", stderr)
 	}
 }
 func TestLaunchIntegration_ConfiguredEditorLaunchValidatesPrimaryOnly(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
 	withLauncherHooks(t)
 	binDir := t.TempDir()
 	writeFakeBinary(t, binDir, "droid")
 	t.Setenv("PATH", binDir)
 	editor := &launcherEditorRunner{}
 	withIntegrationOverride(t, "droid", editor)
 	if err := config.SaveIntegration("droid", []string{"llama3.2", "missing-local"}); err != nil {
 		t.Fatalf("failed to seed config: %v", err)
 	}
 	DefaultConfirmPrompt = func(prompt string) (bool, error) {
 		t.Fatalf("did not expect prompt during normal configured launch: %q", prompt)
 		return false, nil
 	}
 	var missingShowCalled bool
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		if r.URL.Path != "/api/show" {
 			http.NotFound(w, r)
 			return
 		}
 		var req apiShowRequest
 		_ = json.NewDecoder(r.Body).Decode(&req)
 		switch req.Model {
 		case "llama3.2":
 			fmt.Fprint(w, `{"model":"llama3.2"}`)
 		case "missing-local":
 			missingShowCalled = true
 			w.WriteHeader(http.StatusNotFound)
 			fmt.Fprint(w, `{"error":"model not found"}`)
 		default:
 			http.NotFound(w, r)
 		}
 	}))
 	defer srv.Close()
 	t.Setenv("OLLAMA_HOST", srv.URL)
 	if err := LaunchIntegration(context.Background(), IntegrationLaunchRequest{Name: "droid"}); err != nil {
 		t.Fatalf("LaunchIntegration returned error: %v", err)
 	}
 	if missingShowCalled {
 		t.Fatal("expected configured launch to validate only the primary model")
 	}
 	if editor.ranModel != "llama3.2" {
 		t.Fatalf("expected launch to use saved primary model, got %q", editor.ranModel)
 	}
 	if len(editor.edited) != 0 {
 		t.Fatalf("expected no editor writes during normal launch, got %v", editor.edited)
 	}
 	saved, err := config.LoadIntegration("droid")
 	if err != nil {
 		t.Fatalf("failed to reload saved config: %v", err)
 	}
 	if diff := compareStrings(saved.Models, []string{"llama3.2", "missing-local"}); diff != "" {
 		t.Fatalf("unexpected saved models (-want +got):\n%s", diff)
 	}
 }
 func TestLaunchIntegration_ConfiguredEditorLaunchSkipsReconfigure(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
@@ -965,6 +1362,40 @@ func TestLaunchIntegration_OpenclawInstallsBeforeConfigSideEffects(t *testing.T)
 	}
 }
 func TestLaunchIntegration_PiInstallsBeforeConfigSideEffects(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
 	withLauncherHooks(t)
 	t.Setenv("PATH", t.TempDir())
 	editor := &launcherEditorRunner{}
 	withIntegrationOverride(t, "pi", editor)
 	selectorCalled := false
 	DefaultMultiSelector = func(title string, items []ModelItem, preChecked []string) ([]string, error) {
 		selectorCalled = true
 		return []string{"llama3.2"}, nil
 	}
 	err := LaunchIntegration(context.Background(), IntegrationLaunchRequest{Name: "pi"})
 	if err == nil {
 		t.Fatal("expected launch to fail before configuration when Pi is missing")
 	}
 	if !strings.Contains(err.Error(), "required dependencies are missing") {
 		t.Fatalf("expected install prerequisite error, got %v", err)
 	}
 	if selectorCalled {
 		t.Fatal("expected install check to happen before model selection")
 	}
 	if len(editor.edited) != 0 {
 		t.Fatalf("expected no editor writes before install succeeds, got %v", editor.edited)
 	}
 	if _, statErr := os.Stat(filepath.Join(tmpDir, ".pi", "agent", "models.json")); !os.IsNotExist(statErr) {
 		t.Fatalf("expected no Pi config file to be created, stat err = %v", statErr)
 	}
 }
 func TestLaunchIntegration_ConfigureOnlyDoesNotRequireInstalledBinary(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
@@ -1122,6 +1553,67 @@ func TestLaunchIntegration_ClaudeForceConfigureReprompts(t *testing.T) {
 	}
 }
 func TestLaunchIntegration_ClaudeForceConfigureMissingSelectionDoesNotSave(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
 	withLauncherHooks(t)
 	binDir := t.TempDir()
 	writeFakeBinary(t, binDir, "claude")
 	t.Setenv("PATH", binDir)
 	if err := config.SaveIntegration("claude", []string{"llama3.2"}); err != nil {
 		t.Fatalf("failed to seed config: %v", err)
 	}
 	DefaultSingleSelector = func(title string, items []ModelItem, current string) (string, error) {
 		return "missing-model", nil
 	}
 	DefaultConfirmPrompt = func(prompt string) (bool, error) {
 		if prompt == "Download missing-model?" {
 			return false, nil
 		}
 		t.Fatalf("unexpected prompt: %q", prompt)
 		return false, nil
 	}
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch r.URL.Path {
 		case "/api/tags":
 			fmt.Fprint(w, `{"models":[{"name":"llama3.2"}]}`)
 		case "/api/show":
 			var req apiShowRequest
 			_ = json.NewDecoder(r.Body).Decode(&req)
 			if req.Model == "missing-model" {
 				w.WriteHeader(http.StatusNotFound)
 				fmt.Fprint(w, `{"error":"model not found"}`)
 				return
 			}
 			fmt.Fprintf(w, `{"model":%q}`, req.Model)
 		default:
 			http.NotFound(w, r)
 		}
 	}))
 	defer srv.Close()
 	t.Setenv("OLLAMA_HOST", srv.URL)
 	err := LaunchIntegration(context.Background(), IntegrationLaunchRequest{
 		Name:           "claude",
 		ForceConfigure: true,
 	})
 	if err == nil {
 		t.Fatal("expected missing selected model to abort launch")
 	}
 	saved, loadErr := config.LoadIntegration("claude")
 	if loadErr != nil {
 		t.Fatalf("failed to reload saved config: %v", loadErr)
 	}
 	if diff := compareStrings(saved.Models, []string{"llama3.2"}); diff != "" {
 		t.Fatalf("unexpected saved models (-want +got):\n%s", diff)
 	}
 }
 func TestLaunchIntegration_ClaudeModelOverrideSkipsSelector(t *testing.T) {
 	tmpDir := t.TempDir()
 	setLaunchTestHome(t, tmpDir)
--- a/cmd/launch/opencode.go
+++ b/cmd/launch/opencode.go
@@ -147,6 +147,7 @@ func (o *OpenCode) Edit(modelList []string) error {
 	ollama["models"] = models
 	provider["ollama"] = ollama
 	config["provider"] = provider
 	config["model"] = "ollama/" + modelList[0]
 	configData, err := json.MarshalIndent(config, "", "  ")
 	if err != nil {
--- a/cmd/launch/opencode_test.go
+++ b/cmd/launch/opencode_test.go
@@ -49,6 +49,7 @@ func TestOpenCodeEdit(t *testing.T) {
 			t.Fatal(err)
 		}
 		assertOpenCodeModelExists(t, configPath, "llama3.2")
 		assertOpenCodeDefaultModel(t, configPath, "ollama/llama3.2")
 		assertOpenCodeRecentModel(t, statePath, 0, "ollama", "llama3.2")
 	})
@@ -157,11 +158,13 @@ func TestOpenCodeEdit(t *testing.T) {
 		o.Edit([]string{"llama3.2", "mistral"})
 		assertOpenCodeModelExists(t, configPath, "llama3.2")
 		assertOpenCodeModelExists(t, configPath, "mistral")
 		assertOpenCodeDefaultModel(t, configPath, "ollama/llama3.2")
 		// Then remove one by only selecting the other
 		o.Edit([]string{"llama3.2"})
 		assertOpenCodeModelExists(t, configPath, "llama3.2")
 		assertOpenCodeModelNotExists(t, configPath, "mistral")
 		assertOpenCodeDefaultModel(t, configPath, "ollama/llama3.2")
 	})
 	t.Run("preserve user customizations on managed models", func(t *testing.T) {
@@ -338,6 +341,22 @@ func assertOpenCodeModelNotExists(t *testing.T, path, model string) {
 	}
 }
 func assertOpenCodeDefaultModel(t *testing.T, path, want string) {
 	t.Helper()
 	data, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatal(err)
 	}
 	var cfg map[string]any
 	if err := json.Unmarshal(data, &cfg); err != nil {
 		t.Fatal(err)
 	}
 	got, _ := cfg["model"].(string)
 	if got != want {
 		t.Fatalf("default model = %q, want %q", got, want)
 	}
 }
 func assertOpenCodeRecentModel(t *testing.T, path string, index int, providerID, modelID string) {
 	t.Helper()
 	data, err := os.ReadFile(path)
--- a/cmd/launch/pi.go
+++ b/cmd/launch/pi.go
@@ -20,20 +20,151 @@ import (
 // Pi implements Runner and Editor for Pi (Pi Coding Agent) integration
 type Pi struct{}
 const (
 	piNpmPackage      = "@mariozechner/pi-coding-agent"
 	piWebSearchSource = "npm:@ollama/pi-web-search"
 	piWebSearchPkg    = "@ollama/pi-web-search"
 )
 func (p *Pi) String() string { return "Pi" }
 func (p *Pi) Run(model string, args []string) error {
-	if _, err := exec.LookPath("pi"); err != nil {
+	fmt.Fprintf(os.Stderr, "\n%sPreparing Pi...%s\n", ansiGray, ansiReset)
-		return fmt.Errorf("pi is not installed, install with: npm install -g @mariozechner/pi-coding-agent")
+	if err := ensureNpmInstalled(); err != nil {
 		return err
 	}
-	cmd := exec.Command("pi", args...)
+	fmt.Fprintf(os.Stderr, "%sChecking Pi installation...%s\n", ansiGray, ansiReset)
 	bin, err := ensurePiInstalled()
 	if err != nil {
 		return err
 	}
 	ensurePiWebSearchPackage(bin)
 	fmt.Fprintf(os.Stderr, "\n%sLaunching Pi...%s\n\n", ansiGray, ansiReset)
 	cmd := exec.Command(bin, args...)
 	cmd.Stdin = os.Stdin
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	return cmd.Run()
 }
 func ensureNpmInstalled() error {
 	if _, err := exec.LookPath("npm"); err != nil {
 		return fmt.Errorf("npm (Node.js) is required to launch pi\n\nInstall it first:\n  https://nodejs.org/")
 	}
 	return nil
 }
 func ensurePiInstalled() (string, error) {
 	if _, err := exec.LookPath("pi"); err == nil {
 		return "pi", nil
 	}
 	if _, err := exec.LookPath("npm"); err != nil {
 		return "", fmt.Errorf("pi is not installed and required dependencies are missing\n\nInstall the following first:\n  npm (Node.js): https://nodejs.org/")
 	}
 	ok, err := ConfirmPrompt("Pi is not installed. Install with npm?")
 	if err != nil {
 		return "", err
 	}
 	if !ok {
 		return "", fmt.Errorf("pi installation cancelled")
 	}
 	fmt.Fprintf(os.Stderr, "\nInstalling Pi...\n")
 	cmd := exec.Command("npm", "install", "-g", piNpmPackage+"@latest")
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	if err := cmd.Run(); err != nil {
 		return "", fmt.Errorf("failed to install pi: %w", err)
 	}
 	if _, err := exec.LookPath("pi"); err != nil {
 		return "", fmt.Errorf("pi was installed but the binary was not found on PATH\n\nYou may need to restart your shell")
 	}
 	fmt.Fprintf(os.Stderr, "%sPi installed successfully%s\n\n", ansiGreen, ansiReset)
 	return "pi", nil
 }
 func ensurePiWebSearchPackage(bin string) {
 	if !shouldManagePiWebSearch() {
 		fmt.Fprintf(os.Stderr, "%sCloud is disabled; skipping %s setup.%s\n", ansiGray, piWebSearchPkg, ansiReset)
 		return
 	}
 	fmt.Fprintf(os.Stderr, "%sChecking Pi web search package...%s\n", ansiGray, ansiReset)
 	installed, err := piPackageInstalled(bin, piWebSearchSource)
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "%s  Warning: could not check %s installation: %v%s\n", ansiYellow, piWebSearchPkg, err, ansiReset)
 		return
 	}
 	if !installed {
 		fmt.Fprintf(os.Stderr, "%sInstalling %s...%s\n", ansiGray, piWebSearchPkg, ansiReset)
 		cmd := exec.Command(bin, "install", piWebSearchSource)
 		cmd.Stdout = os.Stdout
 		cmd.Stderr = os.Stderr
 		if err := cmd.Run(); err != nil {
 			fmt.Fprintf(os.Stderr, "%s  Warning: could not install %s: %v%s\n", ansiYellow, piWebSearchPkg, err, ansiReset)
 			return
 		}
 		fmt.Fprintf(os.Stderr, "%s  ✓ Installed %s%s\n", ansiGreen, piWebSearchPkg, ansiReset)
 		return
 	}
 	fmt.Fprintf(os.Stderr, "%sUpdating %s...%s\n", ansiGray, piWebSearchPkg, ansiReset)
 	cmd := exec.Command(bin, "update", piWebSearchSource)
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	if err := cmd.Run(); err != nil {
 		fmt.Fprintf(os.Stderr, "%s  Warning: could not update %s: %v%s\n", ansiYellow, piWebSearchPkg, err, ansiReset)
 		return
 	}
 	fmt.Fprintf(os.Stderr, "%s  ✓ Updated %s%s\n", ansiGreen, piWebSearchPkg, ansiReset)
 }
 func shouldManagePiWebSearch() bool {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return true
 	}
 	disabled, known := cloudStatusDisabled(context.Background(), client)
 	if known && disabled {
 		return false
 	}
 	return true
 }
 func piPackageInstalled(bin, source string) (bool, error) {
 	cmd := exec.Command(bin, "list")
 	out, err := cmd.CombinedOutput()
 	if err != nil {
 		msg := strings.TrimSpace(string(out))
 		if msg == "" {
 			return false, err
 		}
 		return false, fmt.Errorf("%w: %s", err, msg)
 	}
 	for _, line := range strings.Split(string(out), "\n") {
 		trimmed := strings.TrimSpace(line)
 		if strings.HasPrefix(trimmed, source) {
 			return true, nil
 		}
 	}
 	return false, nil
 }
 func (p *Pi) Paths() []string {
 	home, err := os.UserHomeDir()
 	if err != nil {
--- a/cmd/launch/pi_test.go
+++ b/cmd/launch/pi_test.go
@@ -9,6 +9,8 @@ import (
 	"net/url"
 	"os"
 	"path/filepath"
 	"runtime"
 	"strings"
 	"testing"
 	"github.com/ollama/ollama/api"
@@ -33,6 +35,339 @@ func TestPiIntegration(t *testing.T) {
 	})
 }
 func TestPiRun_InstallAndWebSearchLifecycle(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("uses POSIX shell test binaries")
 	}
 	writeScript := func(t *testing.T, path, content string) {
 		t.Helper()
 		if err := os.WriteFile(path, []byte(content), 0o755); err != nil {
 			t.Fatal(err)
 		}
 	}
 	seedPiScript := func(t *testing.T, dir string) {
 		t.Helper()
 		piPath := filepath.Join(dir, "pi")
 		listPath := filepath.Join(dir, "pi-list.txt")
 		piScript := fmt.Sprintf(`#!/bin/sh
 echo "$@" >> %q
 if [ "$1" = "list" ]; then
  if [ -f %q ]; then
    /bin/cat %q
  fi
  exit 0
 fi
 if [ "$1" = "update" ] && [ "$PI_FAIL_UPDATE" = "1" ]; then
  echo "update failed" >&2
  exit 1
 fi
 if [ "$1" = "install" ] && [ "$PI_FAIL_INSTALL" = "1" ]; then
  echo "install failed" >&2
  exit 1
 fi
 exit 0
 `, filepath.Join(dir, "pi.log"), listPath, listPath)
 		writeScript(t, piPath, piScript)
 	}
 	seedNpmNoop := func(t *testing.T, dir string) {
 		t.Helper()
 		writeScript(t, filepath.Join(dir, "npm"), "#!/bin/sh\nexit 0\n")
 	}
 	withConfirm := func(t *testing.T, fn func(prompt string) (bool, error)) {
 		t.Helper()
 		oldConfirm := DefaultConfirmPrompt
 		DefaultConfirmPrompt = fn
 		t.Cleanup(func() { DefaultConfirmPrompt = oldConfirm })
 	}
 	setCloudStatus := func(t *testing.T, disabled bool) {
 		t.Helper()
 		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 			if r.URL.Path == "/api/status" {
 				fmt.Fprintf(w, `{"cloud":{"disabled":%t,"source":"config"}}`, disabled)
 				return
 			}
 			http.NotFound(w, r)
 		}))
 		t.Cleanup(srv.Close)
 		t.Setenv("OLLAMA_HOST", srv.URL)
 	}
 	t.Run("pi missing + user accepts install", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("PATH", tmpDir)
 		setCloudStatus(t, false)
 		if err := os.WriteFile(filepath.Join(tmpDir, "pi-list.txt"), []byte("User packages:\n  npm:@ollama/pi-web-search\n"), 0o644); err != nil {
 			t.Fatal(err)
 		}
 		npmScript := fmt.Sprintf(`#!/bin/sh
 echo "$@" >> %q
 if [ "$1" = "install" ] && [ "$2" = "-g" ] && [ "$3" = %q ]; then
  /bin/cat > %q <<'EOS'
 #!/bin/sh
 echo "$@" >> %q
 if [ "$1" = "list" ]; then
  if [ -f %q ]; then
    /bin/cat %q
  fi
  exit 0
 fi
 exit 0
 EOS
  /bin/chmod +x %q
 fi
 exit 0
 `, filepath.Join(tmpDir, "npm.log"), piNpmPackage+"@latest", filepath.Join(tmpDir, "pi"), filepath.Join(tmpDir, "pi.log"), filepath.Join(tmpDir, "pi-list.txt"), filepath.Join(tmpDir, "pi-list.txt"), filepath.Join(tmpDir, "pi"))
 		writeScript(t, filepath.Join(tmpDir, "npm"), npmScript)
 		withConfirm(t, func(prompt string) (bool, error) {
 			if strings.Contains(prompt, "Pi is not installed.") {
 				return true, nil
 			}
 			return true, nil
 		})
 		p := &Pi{}
 		if err := p.Run("ignored", []string{"--version"}); err != nil {
 			t.Fatalf("Run() error = %v", err)
 		}
 		npmCalls, err := os.ReadFile(filepath.Join(tmpDir, "npm.log"))
 		if err != nil {
 			t.Fatal(err)
 		}
 		if !strings.Contains(string(npmCalls), "install -g "+piNpmPackage+"@latest") {
 			t.Fatalf("expected npm install call, got:\n%s", npmCalls)
 		}
 		piCalls, err := os.ReadFile(filepath.Join(tmpDir, "pi.log"))
 		if err != nil {
 			t.Fatal(err)
 		}
 		got := string(piCalls)
 		if !strings.Contains(got, "list\n") {
 			t.Fatalf("expected pi list call, got:\n%s", got)
 		}
 		if !strings.Contains(got, "update "+piWebSearchSource+"\n") {
 			t.Fatalf("expected pi update call, got:\n%s", got)
 		}
 		if !strings.Contains(got, "--version\n") {
 			t.Fatalf("expected final pi launch call, got:\n%s", got)
 		}
 	})
 	t.Run("pi missing + user declines install", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("PATH", tmpDir)
 		setCloudStatus(t, false)
 		writeScript(t, filepath.Join(tmpDir, "npm"), "#!/bin/sh\nexit 0\n")
 		withConfirm(t, func(prompt string) (bool, error) {
 			if strings.Contains(prompt, "Pi is not installed.") {
 				return false, nil
 			}
 			return true, nil
 		})
 		p := &Pi{}
 		err := p.Run("ignored", nil)
 		if err == nil || !strings.Contains(err.Error(), "pi installation cancelled") {
 			t.Fatalf("expected install cancellation error, got %v", err)
 		}
 	})
 	t.Run("pi installed + web search missing auto-installs", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("PATH", tmpDir)
 		setCloudStatus(t, false)
 		if err := os.WriteFile(filepath.Join(tmpDir, "pi-list.txt"), []byte("User packages:\n"), 0o644); err != nil {
 			t.Fatal(err)
 		}
 		seedPiScript(t, tmpDir)
 		seedNpmNoop(t, tmpDir)
 		withConfirm(t, func(prompt string) (bool, error) {
 			t.Fatalf("did not expect confirmation prompt, got %q", prompt)
 			return false, nil
 		})
 		p := &Pi{}
 		if err := p.Run("ignored", []string{"session"}); err != nil {
 			t.Fatalf("Run() error = %v", err)
 		}
 		piCalls, err := os.ReadFile(filepath.Join(tmpDir, "pi.log"))
 		if err != nil {
 			t.Fatal(err)
 		}
 		got := string(piCalls)
 		if !strings.Contains(got, "list\n") {
 			t.Fatalf("expected pi list call, got:\n%s", got)
 		}
 		if !strings.Contains(got, "install "+piWebSearchSource+"\n") {
 			t.Fatalf("expected pi install call, got:\n%s", got)
 		}
 		if strings.Contains(got, "update "+piWebSearchSource+"\n") {
 			t.Fatalf("did not expect pi update call when package missing, got:\n%s", got)
 		}
 		if !strings.Contains(got, "session\n") {
 			t.Fatalf("expected final pi launch call, got:\n%s", got)
 		}
 	})
 	t.Run("pi installed + web search present updates every launch", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("PATH", tmpDir)
 		setCloudStatus(t, false)
 		if err := os.WriteFile(filepath.Join(tmpDir, "pi-list.txt"), []byte("User packages:\n  "+piWebSearchSource+"\n"), 0o644); err != nil {
 			t.Fatal(err)
 		}
 		seedPiScript(t, tmpDir)
 		seedNpmNoop(t, tmpDir)
 		p := &Pi{}
 		if err := p.Run("ignored", []string{"doctor"}); err != nil {
 			t.Fatalf("Run() error = %v", err)
 		}
 		piCalls, err := os.ReadFile(filepath.Join(tmpDir, "pi.log"))
 		if err != nil {
 			t.Fatal(err)
 		}
 		got := string(piCalls)
 		if !strings.Contains(got, "update "+piWebSearchSource+"\n") {
 			t.Fatalf("expected pi update call, got:\n%s", got)
 		}
 	})
 	t.Run("web search update failure warns and continues", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("PATH", tmpDir)
 		setCloudStatus(t, false)
 		t.Setenv("PI_FAIL_UPDATE", "1")
 		if err := os.WriteFile(filepath.Join(tmpDir, "pi-list.txt"), []byte("User packages:\n  "+piWebSearchSource+"\n"), 0o644); err != nil {
 			t.Fatal(err)
 		}
 		seedPiScript(t, tmpDir)
 		seedNpmNoop(t, tmpDir)
 		p := &Pi{}
 		stderr := captureStderr(t, func() {
 			if err := p.Run("ignored", []string{"session"}); err != nil {
 				t.Fatalf("Run() should continue after web search update failure, got %v", err)
 			}
 		})
 		if !strings.Contains(stderr, "Warning: could not update "+piWebSearchPkg) {
 			t.Fatalf("expected update warning, got:\n%s", stderr)
 		}
 		piCalls, err := os.ReadFile(filepath.Join(tmpDir, "pi.log"))
 		if err != nil {
 			t.Fatal(err)
 		}
 		if !strings.Contains(string(piCalls), "session\n") {
 			t.Fatalf("expected final pi launch call, got:\n%s", piCalls)
 		}
 	})
 	t.Run("web search install failure warns and continues", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("PATH", tmpDir)
 		setCloudStatus(t, false)
 		t.Setenv("PI_FAIL_INSTALL", "1")
 		if err := os.WriteFile(filepath.Join(tmpDir, "pi-list.txt"), []byte("User packages:\n"), 0o644); err != nil {
 			t.Fatal(err)
 		}
 		seedPiScript(t, tmpDir)
 		seedNpmNoop(t, tmpDir)
 		withConfirm(t, func(prompt string) (bool, error) {
 			t.Fatalf("did not expect confirmation prompt, got %q", prompt)
 			return false, nil
 		})
 		p := &Pi{}
 		stderr := captureStderr(t, func() {
 			if err := p.Run("ignored", []string{"session"}); err != nil {
 				t.Fatalf("Run() should continue after web search install failure, got %v", err)
 			}
 		})
 		if !strings.Contains(stderr, "Warning: could not install "+piWebSearchPkg) {
 			t.Fatalf("expected install warning, got:\n%s", stderr)
 		}
 		piCalls, err := os.ReadFile(filepath.Join(tmpDir, "pi.log"))
 		if err != nil {
 			t.Fatal(err)
 		}
 		if !strings.Contains(string(piCalls), "session\n") {
 			t.Fatalf("expected final pi launch call, got:\n%s", piCalls)
 		}
 	})
 	t.Run("cloud disabled skips web search package management", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("PATH", tmpDir)
 		setCloudStatus(t, true)
 		if err := os.WriteFile(filepath.Join(tmpDir, "pi-list.txt"), []byte("User packages:\n"), 0o644); err != nil {
 			t.Fatal(err)
 		}
 		seedPiScript(t, tmpDir)
 		seedNpmNoop(t, tmpDir)
 		p := &Pi{}
 		stderr := captureStderr(t, func() {
 			if err := p.Run("ignored", []string{"session"}); err != nil {
 				t.Fatalf("Run() error = %v", err)
 			}
 		})
 		if !strings.Contains(stderr, "Cloud is disabled; skipping "+piWebSearchPkg+" setup.") {
 			t.Fatalf("expected cloud-disabled skip message, got:\n%s", stderr)
 		}
 		piCalls, err := os.ReadFile(filepath.Join(tmpDir, "pi.log"))
 		if err != nil {
 			t.Fatal(err)
 		}
 		got := string(piCalls)
 		if strings.Contains(got, "list\n") || strings.Contains(got, "install "+piWebSearchSource+"\n") || strings.Contains(got, "update "+piWebSearchSource+"\n") {
 			t.Fatalf("did not expect web search package management calls, got:\n%s", got)
 		}
 		if !strings.Contains(got, "session\n") {
 			t.Fatalf("expected final pi launch call, got:\n%s", got)
 		}
 	})
 	t.Run("missing npm returns error before pi flow", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("PATH", tmpDir)
 		setCloudStatus(t, false)
 		seedPiScript(t, tmpDir)
 		p := &Pi{}
 		err := p.Run("ignored", []string{"session"})
 		if err == nil || !strings.Contains(err.Error(), "npm (Node.js) is required to launch pi") {
 			t.Fatalf("expected missing npm error, got %v", err)
 		}
 		if _, statErr := os.Stat(filepath.Join(tmpDir, "pi.log")); !os.IsNotExist(statErr) {
 			t.Fatalf("expected pi not to run when npm is missing, stat err = %v", statErr)
 		}
 	})
 }
 func TestPiPaths(t *testing.T) {
 	pi := &Pi{}
--- a/cmd/launch/registry.go
+++ b/cmd/launch/registry.go
@@ -33,7 +33,7 @@ type IntegrationInfo struct {
 	Description string
 }
-var launcherIntegrationOrder = []string{"opencode", "droid", "pi", "cline"}
+var launcherIntegrationOrder = []string{"opencode", "droid", "pi"}
 var integrationSpecs = []*IntegrationSpec{
 	{
@@ -52,6 +52,7 @@ var integrationSpecs = []*IntegrationSpec{
 		Name:        "cline",
 		Runner:      &Cline{},
 		Description: "Autonomous coding agent with parallel execution",
 		Hidden:      true,
 		Install: IntegrationInstallSpec{
 			CheckInstalled: func() bool {
 				_, err := exec.LookPath("cline")
@@ -128,7 +129,24 @@ var integrationSpecs = []*IntegrationSpec{
 				_, err := exec.LookPath("pi")
 				return err == nil
 			},
-			Command: []string{"npm", "install", "-g", "@mariozechner/pi-coding-agent"},
+			EnsureInstalled: func() error {
 				_, err := ensurePiInstalled()
 				return err
 			},
 			Command: []string{"npm", "install", "-g", "@mariozechner/pi-coding-agent@latest"},
 		},
 	},
 	{
 		Name:        "vscode",
 		Runner:      &VSCode{},
 		Aliases:     []string{"code"},
 		Description: "Microsoft's open-source AI code editor",
 		Hidden:      true,
 		Install: IntegrationInstallSpec{
 			CheckInstalled: func() bool {
 				return (&VSCode{}).findBinary() != ""
 			},
 			URL: "https://code.visualstudio.com",
 		},
 	},
 }
--- a/cmd/launch/runner_exec_only_test.go
+++ b/cmd/launch/runner_exec_only_test.go
@@ -54,6 +54,9 @@ func TestEditorRunsDoNotRewriteConfig(t *testing.T) {
 			binDir := t.TempDir()
 			writeFakeBinary(t, binDir, tt.binary)
 			if tt.name == "pi" {
 				writeFakeBinary(t, binDir, "npm")
 			}
 			t.Setenv("PATH", binDir)
 			configPath := tt.checkPath(home)
--- a/cmd/launch/vscode.go
+++ b/cmd/launch/vscode.go
@@ -0,0 +1,591 @@
 package launch
 import (
 	"context"
 	"database/sql"
 	"encoding/json"
 	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"runtime"
 	"strconv"
 	"strings"
 	"time"
 	_ "github.com/mattn/go-sqlite3"
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/cmd/internal/fileutil"
 	"github.com/ollama/ollama/envconfig"
 )
 // VSCode implements Runner and Editor for Visual Studio Code integration.
 type VSCode struct{}
 func (v *VSCode) String() string { return "Visual Studio Code" }
 // findBinary returns the path/command to launch VS Code, or "" if not found.
 // It checks platform-specific locations only.
 func (v *VSCode) findBinary() string {
 	var candidates []string
 	switch runtime.GOOS {
 	case "darwin":
 		candidates = []string{
 			"/Applications/Visual Studio Code.app",
 		}
 	case "windows":
 		if localAppData := os.Getenv("LOCALAPPDATA"); localAppData != "" {
 			candidates = append(candidates, filepath.Join(localAppData, "Programs", "Microsoft VS Code", "bin", "code.cmd"))
 		}
 	default: // linux
 		candidates = []string{
 			"/usr/bin/code",
 			"/snap/bin/code",
 		}
 	}
 	for _, c := range candidates {
 		if _, err := os.Stat(c); err == nil {
 			return c
 		}
 	}
 	return ""
 }
 // IsRunning reports whether VS Code is currently running.
 // Each platform uses a pattern specific enough to avoid matching Cursor or
 // other VS Code forks.
 func (v *VSCode) IsRunning() bool {
 	switch runtime.GOOS {
 	case "darwin":
 		out, err := exec.Command("pgrep", "-f", "Visual Studio Code.app/Contents/MacOS/Code").Output()
 		return err == nil && len(out) > 0
 	case "windows":
 		// Match VS Code by executable path to avoid matching Cursor or other forks.
 		out, err := exec.Command("powershell", "-NoProfile", "-Command",
 			`Get-Process Code -ErrorAction SilentlyContinue | Where-Object { $_.Path -like '*Microsoft VS Code*' } | Select-Object -First 1`).Output()
 		return err == nil && len(strings.TrimSpace(string(out))) > 0
 	default:
 		// Match VS Code specifically by its install path to avoid matching
 		// Cursor (/cursor/) or other forks.
 		for _, pattern := range []string{"/usr/share/code/", "/snap/code/"} {
 			out, err := exec.Command("pgrep", "-f", pattern).Output()
 			if err == nil && len(out) > 0 {
 				return true
 			}
 		}
 		return false
 	}
 }
 // Quit gracefully quits VS Code and waits for it to exit so that it flushes
 // its in-memory state back to the database.
 func (v *VSCode) Quit() {
 	if !v.IsRunning() {
 		return
 	}
 	switch runtime.GOOS {
 	case "darwin":
 		_ = exec.Command("osascript", "-e", `quit app "Visual Studio Code"`).Run()
 	case "windows":
 		// Kill VS Code by executable path to avoid killing Cursor or other forks.
 		_ = exec.Command("powershell", "-NoProfile", "-Command",
 			`Get-Process Code -ErrorAction SilentlyContinue | Where-Object { $_.Path -like '*Microsoft VS Code*' } | Stop-Process -Force`).Run()
 	default:
 		for _, pattern := range []string{"/usr/share/code/", "/snap/code/"} {
 			_ = exec.Command("pkill", "-f", pattern).Run()
 		}
 	}
 	// Wait for the process to fully exit and flush its state to disk
 	// TODO(hoyyeva): update spinner to use bubble tea
 	spinnerFrames := []string{"|", "/", "-", "\\"}
 	frame := 0
 	fmt.Fprintf(os.Stderr, "\033[90mRestarting VS Code... %s\033[0m", spinnerFrames[0])
 	ticker := time.NewTicker(200 * time.Millisecond)
 	defer ticker.Stop()
 	for range 150 { // 150 ticks × 200ms = 30s timeout
 		<-ticker.C
 		frame++
 		fmt.Fprintf(os.Stderr, "\r\033[90mRestarting VS Code... %s\033[0m", spinnerFrames[frame%len(spinnerFrames)])
 		if frame%5 == 0 { // check every ~1s
 			if !v.IsRunning() {
 				fmt.Fprintf(os.Stderr, "\r\033[K")
 				// Give VS Code a moment to finish writing its state DB
 				time.Sleep(1 * time.Second)
 				return
 			}
 		}
 	}
 	fmt.Fprintf(os.Stderr, "\r\033[K")
 }
 const (
 	minCopilotChatVersion = "0.41.0"
 	minVSCodeVersion      = "1.113"
 )
 func (v *VSCode) Run(model string, args []string) error {
 	v.checkVSCodeVersion()
 	v.checkCopilotChatVersion()
 	// Get all configured models (saved by the launcher framework before Run is called)
 	models := []string{model}
 	if cfg, err := loadStoredIntegrationConfig("vscode"); err == nil && len(cfg.Models) > 0 {
 		models = cfg.Models
 	}
 	// VS Code discovers models from ollama ls. Cloud models that pass Show
 	// (the server knows about them) but aren't in ls need to be pulled to
 	// register them so VS Code can find them.
 	if client, err := api.ClientFromEnvironment(); err == nil {
 		v.ensureModelsRegistered(context.Background(), client, models)
 	}
 	// Warn if the default model doesn't support tool calling
 	if client, err := api.ClientFromEnvironment(); err == nil {
 		if resp, err := client.Show(context.Background(), &api.ShowRequest{Model: models[0]}); err == nil {
 			hasTools := false
 			for _, c := range resp.Capabilities {
 				if c == "tools" {
 					hasTools = true
 					break
 				}
 			}
 			if !hasTools {
 				fmt.Fprintf(os.Stderr, "Note: %s does not support tool calling and may not appear in the Copilot Chat model picker.\n", models[0])
 			}
 		}
 	}
 	v.printModelAccessTip()
 	if v.IsRunning() {
 		restart, err := ConfirmPrompt("Restart VS Code?")
 		if err != nil {
 			restart = false
 		}
 		if restart {
 			v.Quit()
 			if err := v.ShowInModelPicker(models); err != nil {
 				fmt.Fprintf(os.Stderr, "%s  Warning: could not update VS Code model picker: %v%s\n", ansiYellow, err, ansiReset)
 			}
 			v.FocusVSCode()
 		} else {
 			fmt.Fprintf(os.Stderr, "\nTo get the latest model configuration, restart VS Code when you're ready.\n")
 		}
 	} else {
 		if err := v.ShowInModelPicker(models); err != nil {
 			fmt.Fprintf(os.Stderr, "%s  Warning: could not update VS Code model picker: %v%s\n", ansiYellow, err, ansiReset)
 		}
 		v.FocusVSCode()
 	}
 	return nil
 }
 // ensureModelsRegistered pulls models that the server knows about (Show succeeds)
 // but aren't in ollama ls yet. This is needed for cloud models so that VS Code
 // can discover them from the Ollama API.
 func (v *VSCode) ensureModelsRegistered(ctx context.Context, client *api.Client, models []string) {
 	listed, err := client.List(ctx)
 	if err != nil {
 		return
 	}
 	registered := make(map[string]bool, len(listed.Models))
 	for _, m := range listed.Models {
 		registered[m.Name] = true
 	}
 	for _, model := range models {
 		if registered[model] {
 			continue
 		}
 		// Also check without :latest suffix
 		if !strings.Contains(model, ":") && registered[model+":latest"] {
 			continue
 		}
 		if err := pullModel(ctx, client, model, false); err != nil {
 			fmt.Fprintf(os.Stderr, "%s  Warning: could not register model %s: %v%s\n", ansiYellow, model, err, ansiReset)
 		}
 	}
 }
 // FocusVSCode brings VS Code to the foreground.
 func (v *VSCode) FocusVSCode() {
 	binary := v.findBinary()
 	if binary == "" {
 		return
 	}
 	if runtime.GOOS == "darwin" && strings.HasSuffix(binary, ".app") {
 		_ = exec.Command("open", "-a", binary).Run()
 	} else {
 		_ = exec.Command(binary).Start()
 	}
 }
 // printModelAccessTip shows instructions for finding Ollama models in VS Code.
 func (v *VSCode) printModelAccessTip() {
 	fmt.Fprintf(os.Stderr, "\nTip: To use Ollama models, open Copilot Chat and click the model picker.\n")
 	fmt.Fprintf(os.Stderr, "     If you don't see your models, click \"Other models\" to find them.\n\n")
 }
 func (v *VSCode) Paths() []string {
 	if p := v.chatLanguageModelsPath(); fileExists(p) {
 		return []string{p}
 	}
 	return nil
 }
 func (v *VSCode) Edit(models []string) error {
 	if len(models) == 0 {
 		return nil
 	}
 	// Write chatLanguageModels.json with Ollama vendor entry
 	clmPath := v.chatLanguageModelsPath()
 	if err := os.MkdirAll(filepath.Dir(clmPath), 0o755); err != nil {
 		return err
 	}
 	var entries []map[string]any
 	if data, err := os.ReadFile(clmPath); err == nil {
 		_ = json.Unmarshal(data, &entries)
 	}
 	// Remove any existing Ollama entries, preserve others
 	filtered := make([]map[string]any, 0, len(entries))
 	for _, entry := range entries {
 		if vendor, _ := entry["vendor"].(string); vendor != "ollama" {
 			filtered = append(filtered, entry)
 		}
 	}
 	// Add new Ollama entry
 	filtered = append(filtered, map[string]any{
 		"vendor": "ollama",
 		"name":   "Ollama",
 		"url":    envconfig.Host().String(),
 	})
 	data, err := json.MarshalIndent(filtered, "", "  ")
 	if err != nil {
 		return err
 	}
 	if err := fileutil.WriteWithBackup(clmPath, data); err != nil {
 		return err
 	}
 	// Clean up legacy settings from older Ollama integrations
 	v.updateSettings()
 	return nil
 }
 func (v *VSCode) Models() []string {
 	if !v.hasOllamaVendor() {
 		return nil
 	}
 	if cfg, err := loadStoredIntegrationConfig("vscode"); err == nil {
 		return cfg.Models
 	}
 	return nil
 }
 // hasOllamaVendor checks if chatLanguageModels.json contains an Ollama vendor entry.
 func (v *VSCode) hasOllamaVendor() bool {
 	data, err := os.ReadFile(v.chatLanguageModelsPath())
 	if err != nil {
 		return false
 	}
 	var entries []map[string]any
 	if err := json.Unmarshal(data, &entries); err != nil {
 		return false
 	}
 	for _, entry := range entries {
 		if vendor, _ := entry["vendor"].(string); vendor == "ollama" {
 			return true
 		}
 	}
 	return false
 }
 func (v *VSCode) chatLanguageModelsPath() string {
 	return v.vscodePath("chatLanguageModels.json")
 }
 func (v *VSCode) settingsPath() string {
 	return v.vscodePath("settings.json")
 }
 // updateSettings cleans up legacy settings from older Ollama integrations.
 func (v *VSCode) updateSettings() {
 	settingsPath := v.settingsPath()
 	data, err := os.ReadFile(settingsPath)
 	if err != nil {
 		return
 	}
 	var settings map[string]any
 	if err := json.Unmarshal(data, &settings); err != nil {
 		return
 	}
 	changed := false
 	for _, key := range []string{"github.copilot.chat.byok.ollamaEndpoint", "ollama.launch.configured"} {
 		if _, ok := settings[key]; ok {
 			delete(settings, key)
 			changed = true
 		}
 	}
 	if !changed {
 		return
 	}
 	updated, err := json.MarshalIndent(settings, "", "  ")
 	if err != nil {
 		return
 	}
 	_ = fileutil.WriteWithBackup(settingsPath, updated)
 }
 func (v *VSCode) statePath() string {
 	return v.vscodePath("globalStorage", "state.vscdb")
 }
 // ShowInModelPicker ensures the given models are visible in VS Code's Copilot
 // Chat model picker. It sets the configured models to true in the picker
 // preferences so they appear in the dropdown. Models use the VS Code identifier
 // format "ollama/Ollama/<name>".
 func (v *VSCode) ShowInModelPicker(models []string) error {
 	if len(models) == 0 {
 		return nil
 	}
 	dbPath := v.statePath()
 	needsCreate := !fileExists(dbPath)
 	if needsCreate {
 		if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil {
 			return fmt.Errorf("creating state directory: %w", err)
 		}
 	}
 	db, err := sql.Open("sqlite3", dbPath+"?_busy_timeout=5000")
 	if err != nil {
 		return fmt.Errorf("opening state database: %w", err)
 	}
 	defer db.Close()
 	// Create the table if this is a fresh DB. Schema must match what VS Code creates.
 	if needsCreate {
 		if _, err := db.Exec("CREATE TABLE ItemTable (key TEXT UNIQUE ON CONFLICT REPLACE, value BLOB)"); err != nil {
 			return fmt.Errorf("initializing state database: %w", err)
 		}
 	}
 	// Read existing preferences
 	prefs := make(map[string]bool)
 	var prefsJSON string
 	if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = 'chatModelPickerPreferences'").Scan(&prefsJSON); err == nil {
 		_ = json.Unmarshal([]byte(prefsJSON), &prefs)
 	}
 	// Build name→ID map from VS Code's cached model list.
 	// VS Code uses numeric IDs like "ollama/Ollama/4", not "ollama/Ollama/kimi-k2.5:cloud".
 	nameToID := make(map[string]string)
 	var cacheJSON string
 	if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = 'chat.cachedLanguageModels.v2'").Scan(&cacheJSON); err == nil {
 		var cached []map[string]any
 		if json.Unmarshal([]byte(cacheJSON), &cached) == nil {
 			for _, entry := range cached {
 				meta, _ := entry["metadata"].(map[string]any)
 				if meta == nil {
 					continue
 				}
 				if vendor, _ := meta["vendor"].(string); vendor == "ollama" {
 					name, _ := meta["name"].(string)
 					id, _ := entry["identifier"].(string)
 					if name != "" && id != "" {
 						nameToID[name] = id
 					}
 				}
 			}
 		}
 	}
 	// Ollama config is authoritative: always show configured models,
 	// hide Ollama models that are no longer in the config.
 	configuredIDs := make(map[string]bool)
 	for _, m := range models {
 		for _, id := range v.modelVSCodeIDs(m, nameToID) {
 			prefs[id] = true
 			configuredIDs[id] = true
 		}
 	}
 	for id := range prefs {
 		if strings.HasPrefix(id, "ollama/") && !configuredIDs[id] {
 			prefs[id] = false
 		}
 	}
 	data, _ := json.Marshal(prefs)
 	if _, err = db.Exec("INSERT OR REPLACE INTO ItemTable (key, value) VALUES ('chatModelPickerPreferences', ?)", string(data)); err != nil {
 		return err
 	}
 	return nil
 }
 // modelVSCodeIDs returns all possible VS Code picker IDs for a model name.
 func (v *VSCode) modelVSCodeIDs(model string, nameToID map[string]string) []string {
 	var ids []string
 	if id, ok := nameToID[model]; ok {
 		ids = append(ids, id)
 	} else if !strings.Contains(model, ":") {
 		if id, ok := nameToID[model+":latest"]; ok {
 			ids = append(ids, id)
 		}
 	}
 	ids = append(ids, "ollama/Ollama/"+model)
 	if !strings.Contains(model, ":") {
 		ids = append(ids, "ollama/Ollama/"+model+":latest")
 	}
 	return ids
 }
 func (v *VSCode) vscodePath(parts ...string) string {
 	home, _ := os.UserHomeDir()
 	var base string
 	switch runtime.GOOS {
 	case "darwin":
 		base = filepath.Join(home, "Library", "Application Support", "Code", "User")
 	case "windows":
 		base = filepath.Join(os.Getenv("APPDATA"), "Code", "User")
 	default:
 		base = filepath.Join(home, ".config", "Code", "User")
 	}
 	return filepath.Join(append([]string{base}, parts...)...)
 }
 // checkVSCodeVersion warns if VS Code is older than minVSCodeVersion.
 func (v *VSCode) checkVSCodeVersion() {
 	codeCLI := v.findCodeCLI()
 	if codeCLI == "" {
 		return
 	}
 	out, err := exec.Command(codeCLI, "--version").Output()
 	if err != nil {
 		return
 	}
 	// "code --version" outputs: version\ncommit\narch
 	lines := strings.Split(strings.TrimSpace(string(out)), "\n")
 	if len(lines) == 0 || lines[0] == "" {
 		return
 	}
 	version := strings.TrimSpace(lines[0])
 	if compareVersions(version, minVSCodeVersion) < 0 {
 		fmt.Fprintf(os.Stderr, "\n%sWarning: VS Code version (%s) is older than the recommended version (%s)%s\n", ansiYellow, version, minVSCodeVersion, ansiReset)
 		fmt.Fprintf(os.Stderr, "Please update VS Code to the latest version.\n\n")
 	}
 }
 // checkCopilotChatVersion warns if the GitHub Copilot Chat extension is
 // missing or older than minCopilotChatVersion.
 func (v *VSCode) checkCopilotChatVersion() {
 	codeCLI := v.findCodeCLI()
 	if codeCLI == "" {
 		return
 	}
 	out, err := exec.Command(codeCLI, "--list-extensions", "--show-versions").Output()
 	if err != nil {
 		return
 	}
 	installed, version := parseCopilotChatVersion(string(out))
 	if !installed {
 		fmt.Fprintf(os.Stderr, "\n%sWarning: GitHub Copilot Chat extension is not installed%s\n", ansiYellow, ansiReset)
 		fmt.Fprintf(os.Stderr, "Install it in VS Code: Extensions → search \"GitHub Copilot Chat\" → Install\n\n")
 		return
 	}
 	if compareVersions(version, minCopilotChatVersion) < 0 {
 		fmt.Fprintf(os.Stderr, "\n%sWarning: GitHub Copilot Chat extension version (%s) is older than the recommended version (%s)%s\n", ansiYellow, version, minCopilotChatVersion, ansiReset)
 		fmt.Fprintf(os.Stderr, "Please update it in VS Code: Extensions → search \"GitHub Copilot Chat\" → Update\n\n")
 	}
 }
 // findCodeCLI returns the path to the VS Code CLI for querying extensions.
 // On macOS, findBinary may return an .app bundle which can't run --list-extensions,
 // so this resolves to the actual CLI binary inside the bundle.
 func (v *VSCode) findCodeCLI() string {
 	binary := v.findBinary()
 	if binary == "" {
 		return ""
 	}
 	if runtime.GOOS == "darwin" && strings.HasSuffix(binary, ".app") {
 		bundleCLI := binary + "/Contents/Resources/app/bin/code"
 		if _, err := os.Stat(bundleCLI); err == nil {
 			return bundleCLI
 		}
 		return ""
 	}
 	return binary
 }
 // parseCopilotChatVersion extracts the version of the GitHub Copilot Chat
 // extension from "code --list-extensions --show-versions" output.
 func parseCopilotChatVersion(output string) (installed bool, version string) {
 	for _, line := range strings.Split(output, "\n") {
 		// Format: github.copilot-chat@0.40.1
 		if !strings.HasPrefix(strings.ToLower(line), "github.copilot-chat@") {
 			continue
 		}
 		parts := strings.SplitN(line, "@", 2)
 		if len(parts) != 2 {
 			continue
 		}
 		return true, strings.TrimSpace(parts[1])
 	}
 	return false, ""
 }
 // compareVersions compares two dot-separated version strings.
 // Returns -1 if a < b, 0 if a == b, 1 if a > b.
 func compareVersions(a, b string) int {
 	aParts := strings.Split(a, ".")
 	bParts := strings.Split(b, ".")
 	maxLen := len(aParts)
 	if len(bParts) > maxLen {
 		maxLen = len(bParts)
 	}
 	for i := range maxLen {
 		var aNum, bNum int
 		if i < len(aParts) {
 			aNum, _ = strconv.Atoi(aParts[i])
 		}
 		if i < len(bParts) {
 			bNum, _ = strconv.Atoi(bParts[i])
 		}
 		if aNum < bNum {
 			return -1
 		}
 		if aNum > bNum {
 			return 1
 		}
 	}
 	return 0
 }
 func fileExists(path string) bool {
 	_, err := os.Stat(path)
 	return err == nil
 }
--- a/cmd/launch/vscode_test.go
+++ b/cmd/launch/vscode_test.go
@@ -0,0 +1,486 @@
 package launch
 import (
 	"database/sql"
 	"encoding/json"
 	"os"
 	"path/filepath"
 	"runtime"
 	"testing"
 	_ "github.com/mattn/go-sqlite3"
 )
 func TestVSCodeIntegration(t *testing.T) {
 	v := &VSCode{}
 	t.Run("String", func(t *testing.T) {
 		if got := v.String(); got != "Visual Studio Code" {
 			t.Errorf("String() = %q, want %q", got, "Visual Studio Code")
 		}
 	})
 	t.Run("implements Runner", func(t *testing.T) {
 		var _ Runner = v
 	})
 	t.Run("implements Editor", func(t *testing.T) {
 		var _ Editor = v
 	})
 }
 func TestVSCodeEdit(t *testing.T) {
 	v := &VSCode{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	t.Setenv("XDG_CONFIG_HOME", "")
 	clmPath := testVSCodePath(t, tmpDir, "chatLanguageModels.json")
 	tests := []struct {
 		name     string
 		setup    string // initial chatLanguageModels.json content, empty means no file
 		models   []string
 		validate func(t *testing.T, data []byte)
 	}{
 		{
 			name:   "fresh install",
 			models: []string{"llama3.2"},
 			validate: func(t *testing.T, data []byte) {
 				assertOllamaVendorConfigured(t, data)
 			},
 		},
 		{
 			name:   "preserve other vendor entries",
 			setup:  `[{"vendor": "azure", "name": "Azure", "url": "https://example.com"}]`,
 			models: []string{"llama3.2"},
 			validate: func(t *testing.T, data []byte) {
 				var entries []map[string]any
 				json.Unmarshal(data, &entries)
 				if len(entries) != 2 {
 					t.Errorf("expected 2 entries, got %d", len(entries))
 				}
 				// Check Azure entry preserved
 				found := false
 				for _, e := range entries {
 					if v, _ := e["vendor"].(string); v == "azure" {
 						found = true
 					}
 				}
 				if !found {
 					t.Error("azure vendor entry was not preserved")
 				}
 				assertOllamaVendorConfigured(t, data)
 			},
 		},
 		{
 			name:   "update existing ollama entry",
 			setup:  `[{"vendor": "ollama", "name": "Ollama", "url": "http://old:11434"}]`,
 			models: []string{"llama3.2"},
 			validate: func(t *testing.T, data []byte) {
 				assertOllamaVendorConfigured(t, data)
 			},
 		},
 		{
 			name:   "empty models is no-op",
 			setup:  `[{"vendor": "azure", "name": "Azure"}]`,
 			models: []string{},
 			validate: func(t *testing.T, data []byte) {
 				if string(data) != `[{"vendor": "azure", "name": "Azure"}]` {
 					t.Error("empty models should not modify file")
 				}
 			},
 		},
 		{
 			name:   "corrupted JSON treated as empty",
 			setup:  `{corrupted json`,
 			models: []string{"llama3.2"},
 			validate: func(t *testing.T, data []byte) {
 				var entries []map[string]any
 				if err := json.Unmarshal(data, &entries); err != nil {
 					t.Errorf("result is not valid JSON: %v", err)
 				}
 			},
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			os.RemoveAll(filepath.Dir(clmPath))
 			if tt.setup != "" {
 				os.MkdirAll(filepath.Dir(clmPath), 0o755)
 				os.WriteFile(clmPath, []byte(tt.setup), 0o644)
 			}
 			if err := v.Edit(tt.models); err != nil {
 				t.Fatal(err)
 			}
 			data, _ := os.ReadFile(clmPath)
 			tt.validate(t, data)
 		})
 	}
 }
 func TestVSCodeEditCleansUpOldSettings(t *testing.T) {
 	v := &VSCode{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	t.Setenv("XDG_CONFIG_HOME", "")
 	settingsPath := testVSCodePath(t, tmpDir, "settings.json")
 	// Create settings.json with old byok setting
 	os.MkdirAll(filepath.Dir(settingsPath), 0o755)
 	os.WriteFile(settingsPath, []byte(`{"github.copilot.chat.byok.ollamaEndpoint": "http://old:11434", "ollama.launch.configured": true, "editor.fontSize": 14}`), 0o644)
 	if err := v.Edit([]string{"llama3.2"}); err != nil {
 		t.Fatal(err)
 	}
 	// Verify old settings were removed
 	data, err := os.ReadFile(settingsPath)
 	if err != nil {
 		t.Fatal(err)
 	}
 	var settings map[string]any
 	json.Unmarshal(data, &settings)
 	if _, ok := settings["github.copilot.chat.byok.ollamaEndpoint"]; ok {
 		t.Error("github.copilot.chat.byok.ollamaEndpoint should have been removed")
 	}
 	if _, ok := settings["ollama.launch.configured"]; ok {
 		t.Error("ollama.launch.configured should have been removed")
 	}
 	if settings["editor.fontSize"] != float64(14) {
 		t.Error("editor.fontSize should have been preserved")
 	}
 }
 func TestVSCodePaths(t *testing.T) {
 	v := &VSCode{}
 	tmpDir := t.TempDir()
 	setTestHome(t, tmpDir)
 	t.Setenv("XDG_CONFIG_HOME", "")
 	clmPath := testVSCodePath(t, tmpDir, "chatLanguageModels.json")
 	t.Run("no file returns nil", func(t *testing.T) {
 		os.Remove(clmPath)
 		if paths := v.Paths(); paths != nil {
 			t.Errorf("expected nil, got %v", paths)
 		}
 	})
 	t.Run("existing file returns path", func(t *testing.T) {
 		os.MkdirAll(filepath.Dir(clmPath), 0o755)
 		os.WriteFile(clmPath, []byte(`[]`), 0o644)
 		if paths := v.Paths(); len(paths) != 1 {
 			t.Errorf("expected 1 path, got %d", len(paths))
 		}
 	})
 }
 // testVSCodePath returns the expected VS Code config path for the given file in tests.
 func testVSCodePath(t *testing.T, tmpDir, filename string) string {
 	t.Helper()
 	switch runtime.GOOS {
 	case "darwin":
 		return filepath.Join(tmpDir, "Library", "Application Support", "Code", "User", filename)
 	case "windows":
 		t.Setenv("APPDATA", tmpDir)
 		return filepath.Join(tmpDir, "Code", "User", filename)
 	default:
 		return filepath.Join(tmpDir, ".config", "Code", "User", filename)
 	}
 }
 func assertOllamaVendorConfigured(t *testing.T, data []byte) {
 	t.Helper()
 	var entries []map[string]any
 	if err := json.Unmarshal(data, &entries); err != nil {
 		t.Fatalf("invalid JSON: %v", err)
 	}
 	for _, entry := range entries {
 		if vendor, _ := entry["vendor"].(string); vendor == "ollama" {
 			if name, _ := entry["name"].(string); name != "Ollama" {
 				t.Errorf("expected name \"Ollama\", got %q", name)
 			}
 			if url, _ := entry["url"].(string); url == "" {
 				t.Error("url not set")
 			}
 			return
 		}
 	}
 	t.Error("no ollama vendor entry found")
 }
 func TestShowInModelPicker(t *testing.T) {
 	v := &VSCode{}
 	// helper to create a state DB with optional seed data
 	setupDB := func(t *testing.T, tmpDir string, seedPrefs map[string]bool, seedCache []map[string]any) string {
 		t.Helper()
 		dbDir := filepath.Join(tmpDir, "globalStorage")
 		os.MkdirAll(dbDir, 0o755)
 		dbPath := filepath.Join(dbDir, "state.vscdb")
 		db, err := sql.Open("sqlite3", dbPath)
 		if err != nil {
 			t.Fatal(err)
 		}
 		defer db.Close()
 		if _, err := db.Exec("CREATE TABLE ItemTable (key TEXT UNIQUE ON CONFLICT REPLACE, value BLOB)"); err != nil {
 			t.Fatal(err)
 		}
 		if seedPrefs != nil {
 			data, _ := json.Marshal(seedPrefs)
 			db.Exec("INSERT INTO ItemTable (key, value) VALUES ('chatModelPickerPreferences', ?)", string(data))
 		}
 		if seedCache != nil {
 			data, _ := json.Marshal(seedCache)
 			db.Exec("INSERT INTO ItemTable (key, value) VALUES ('chat.cachedLanguageModels.v2', ?)", string(data))
 		}
 		return dbPath
 	}
 	// helper to read prefs back from DB
 	readPrefs := func(t *testing.T, dbPath string) map[string]bool {
 		t.Helper()
 		db, err := sql.Open("sqlite3", dbPath)
 		if err != nil {
 			t.Fatal(err)
 		}
 		defer db.Close()
 		var raw string
 		if err := db.QueryRow("SELECT value FROM ItemTable WHERE key = 'chatModelPickerPreferences'").Scan(&raw); err != nil {
 			t.Fatal(err)
 		}
 		prefs := make(map[string]bool)
 		json.Unmarshal([]byte(raw), &prefs)
 		return prefs
 	}
 	t.Run("fresh DB creates table and shows models", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("XDG_CONFIG_HOME", "")
 		if runtime.GOOS == "windows" {
 			t.Setenv("APPDATA", tmpDir)
 		}
 		err := v.ShowInModelPicker([]string{"llama3.2"})
 		if err != nil {
 			t.Fatal(err)
 		}
 		dbPath := testVSCodePath(t, tmpDir, filepath.Join("globalStorage", "state.vscdb"))
 		prefs := readPrefs(t, dbPath)
 		if !prefs["ollama/Ollama/llama3.2"] {
 			t.Error("expected llama3.2 to be shown")
 		}
 		if !prefs["ollama/Ollama/llama3.2:latest"] {
 			t.Error("expected llama3.2:latest to be shown")
 		}
 	})
 	t.Run("configured models are shown", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("XDG_CONFIG_HOME", "")
 		dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, nil)
 		err := v.ShowInModelPicker([]string{"llama3.2", "qwen3:8b"})
 		if err != nil {
 			t.Fatal(err)
 		}
 		prefs := readPrefs(t, dbPath)
 		if !prefs["ollama/Ollama/llama3.2"] {
 			t.Error("expected llama3.2 to be shown")
 		}
 		if !prefs["ollama/Ollama/qwen3:8b"] {
 			t.Error("expected qwen3:8b to be shown")
 		}
 	})
 	t.Run("removed models are hidden", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("XDG_CONFIG_HOME", "")
 		dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), map[string]bool{
 			"ollama/Ollama/llama3.2":        true,
 			"ollama/Ollama/llama3.2:latest": true,
 			"ollama/Ollama/mistral":         true,
 			"ollama/Ollama/mistral:latest":  true,
 		}, nil)
 		// Only configure llama3.2 — mistral should get hidden
 		err := v.ShowInModelPicker([]string{"llama3.2"})
 		if err != nil {
 			t.Fatal(err)
 		}
 		prefs := readPrefs(t, dbPath)
 		if !prefs["ollama/Ollama/llama3.2"] {
 			t.Error("expected llama3.2 to stay shown")
 		}
 		if prefs["ollama/Ollama/mistral"] {
 			t.Error("expected mistral to be hidden")
 		}
 		if prefs["ollama/Ollama/mistral:latest"] {
 			t.Error("expected mistral:latest to be hidden")
 		}
 	})
 	t.Run("non-ollama prefs are preserved", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("XDG_CONFIG_HOME", "")
 		dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), map[string]bool{
 			"copilot/gpt-4o": true,
 		}, nil)
 		err := v.ShowInModelPicker([]string{"llama3.2"})
 		if err != nil {
 			t.Fatal(err)
 		}
 		prefs := readPrefs(t, dbPath)
 		if !prefs["copilot/gpt-4o"] {
 			t.Error("expected copilot/gpt-4o to stay shown")
 		}
 	})
 	t.Run("uses cached numeric IDs when available", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("XDG_CONFIG_HOME", "")
 		cache := []map[string]any{
 			{
 				"identifier": "ollama/Ollama/4",
 				"metadata":   map[string]any{"vendor": "ollama", "name": "llama3.2"},
 			},
 		}
 		dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), nil, cache)
 		err := v.ShowInModelPicker([]string{"llama3.2"})
 		if err != nil {
 			t.Fatal(err)
 		}
 		prefs := readPrefs(t, dbPath)
 		if !prefs["ollama/Ollama/4"] {
 			t.Error("expected numeric ID ollama/Ollama/4 to be shown")
 		}
 		// Name-based fallback should also be set
 		if !prefs["ollama/Ollama/llama3.2"] {
 			t.Error("expected name-based ID to also be shown")
 		}
 	})
 	t.Run("empty models is no-op", func(t *testing.T) {
 		err := v.ShowInModelPicker([]string{})
 		if err != nil {
 			t.Fatal(err)
 		}
 	})
 	t.Run("previously hidden model is re-shown when configured", func(t *testing.T) {
 		tmpDir := t.TempDir()
 		setTestHome(t, tmpDir)
 		t.Setenv("XDG_CONFIG_HOME", "")
 		dbPath := setupDB(t, testVSCodePath(t, tmpDir, ""), map[string]bool{
 			"ollama/Ollama/llama3.2":        false,
 			"ollama/Ollama/llama3.2:latest": false,
 		}, nil)
 		// Ollama config is authoritative — should override the hidden state
 		err := v.ShowInModelPicker([]string{"llama3.2"})
 		if err != nil {
 			t.Fatal(err)
 		}
 		prefs := readPrefs(t, dbPath)
 		if !prefs["ollama/Ollama/llama3.2"] {
 			t.Error("expected llama3.2 to be re-shown")
 		}
 	})
 }
 func TestParseCopilotChatVersion(t *testing.T) {
 	tests := []struct {
 		name          string
 		output        string
 		wantInstalled bool
 		wantVersion   string
 	}{
 		{
 			name:          "found among other extensions",
 			output:        "ms-python.python@2024.1.1\ngithub.copilot-chat@0.40.1\ngithub.copilot@1.200.0\n",
 			wantInstalled: true,
 			wantVersion:   "0.40.1",
 		},
 		{
 			name:          "only extension",
 			output:        "GitHub.copilot-chat@0.41.0\n",
 			wantInstalled: true,
 			wantVersion:   "0.41.0",
 		},
 		{
 			name:          "not installed",
 			output:        "ms-python.python@2024.1.1\ngithub.copilot@1.200.0\n",
 			wantInstalled: false,
 		},
 		{
 			name:          "empty output",
 			output:        "",
 			wantInstalled: false,
 		},
 		{
 			name:          "case insensitive match",
 			output:        "GitHub.Copilot-Chat@0.39.0\n",
 			wantInstalled: true,
 			wantVersion:   "0.39.0",
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			installed, version := parseCopilotChatVersion(tt.output)
 			if installed != tt.wantInstalled {
 				t.Errorf("installed = %v, want %v", installed, tt.wantInstalled)
 			}
 			if installed && version != tt.wantVersion {
 				t.Errorf("version = %q, want %q", version, tt.wantVersion)
 			}
 		})
 	}
 }
 func TestCompareVersions(t *testing.T) {
 	tests := []struct {
 		a, b string
 		want int
 	}{
 		{"0.40.1", "0.40.1", 0},
 		{"0.40.2", "0.40.1", 1},
 		{"0.40.0", "0.40.1", -1},
 		{"0.41.0", "0.40.1", 1},
 		{"0.39.9", "0.40.1", -1},
 		{"1.0.0", "0.40.1", 1},
 		{"0.40", "0.40.1", -1},
 		{"0.40.1.1", "0.40.1", 1},
 	}
 	for _, tt := range tests {
 		t.Run(tt.a+"_vs_"+tt.b, func(t *testing.T) {
 			got := compareVersions(tt.a, tt.b)
 			if got != tt.want {
 				t.Errorf("compareVersions(%q, %q) = %d, want %d", tt.a, tt.b, got, tt.want)
 			}
 		})
 	}
 }
--- a/cmd/tui/selector.go
+++ b/cmd/tui/selector.go
@@ -242,6 +242,10 @@ func (m selectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 			m.cancelled = true
 			return m, tea.Quit
 		case tea.KeyLeft:
 			m.cancelled = true
 			return m, tea.Quit
 		case tea.KeyEnter:
 			filtered := m.filteredItems()
 			if len(filtered) > 0 && m.cursor < len(filtered) {
@@ -354,7 +358,7 @@ func (m selectorModel) renderContent() string {
 	}
 	s.WriteString("\n")
-	help := "↑/↓ navigate • enter select • esc cancel"
+	help := "↑/↓ navigate • enter select • ← back"
 	if m.helpText != "" {
 		help = m.helpText
 	}
@@ -608,6 +612,10 @@ func (m multiSelectorModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 			m.cancelled = true
 			return m, tea.Quit
 		case tea.KeyLeft:
 			m.cancelled = true
 			return m, tea.Quit
 		case tea.KeyTab:
 			m.multi = !m.multi
@@ -810,7 +818,7 @@ func (m multiSelectorModel) View() string {
 	s.WriteString("\n")
 	if !m.multi {
-		s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • enter select • tab add multiple • esc cancel"))
+		s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • enter select • tab add multiple • ← back"))
 	} else {
 		count := m.selectedCount()
 		if count == 0 {
@@ -819,7 +827,7 @@ func (m multiSelectorModel) View() string {
 			s.WriteString(selectorDescStyle.Render(fmt.Sprintf("  %d selected - press enter to continue", count)))
 		}
 		s.WriteString("\n\n")
-		s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • space toggle • tab select single • enter confirm • esc cancel"))
+		s.WriteString(selectorHelpStyle.Render("↑/↓ navigate • space toggle • tab select single • enter confirm • ← back"))
 	}
 	result := s.String()
--- a/cmd/tui/selector_test.go
+++ b/cmd/tui/selector_test.go
@@ -782,6 +782,9 @@ func TestMulti_MultiModeHelpText(t *testing.T) {
 	if !strings.Contains(content, "tab select single") {
 		t.Error("multi mode should show 'tab select single' in help")
 	}
 	if !strings.Contains(content, "← back") {
 		t.Error("multi mode should show '← back' in help")
 	}
 }
 // --- preChecked initialization order ---
@@ -868,6 +871,46 @@ func TestMulti_UncheckingTopDefaultFallsBackToNearestCheckedBelow(t *testing.T)
 	}
 }
 // --- Left arrow back navigation ---
 func TestSelectorLeftArrowCancelsWhenNoFilter(t *testing.T) {
 	m := selectorModelWithCurrent("Pick:", items("a", "b", "c"), "")
 	updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
 	got := updated.(selectorModel)
 	if !got.cancelled {
 		t.Error("left arrow with empty filter should cancel (go back)")
 	}
 }
 func TestSelectorLeftArrowCancelsWhenFiltering(t *testing.T) {
 	m := selectorModelWithCurrent("Pick:", items("a", "b", "c"), "")
 	m.filter = "a"
 	updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
 	got := updated.(selectorModel)
 	if !got.cancelled {
 		t.Error("left arrow with active filter should still cancel (go back)")
 	}
 }
 func TestMultiSelectorLeftArrowCancelsWhenNoFilter(t *testing.T) {
 	m := newMultiSelectorModel("Pick:", items("a", "b", "c"), nil)
 	updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
 	got := updated.(multiSelectorModel)
 	if !got.cancelled {
 		t.Error("left arrow with empty filter should cancel (go back)")
 	}
 }
 func TestMultiSelectorLeftArrowCancelsWhenFiltering(t *testing.T) {
 	m := newMultiSelectorModel("Pick:", items("a", "b", "c"), nil)
 	m.filter = "a"
 	updated, _ := m.Update(tea.KeyMsg{Type: tea.KeyLeft})
 	got := updated.(multiSelectorModel)
 	if !got.cancelled {
 		t.Error("left arrow with active filter should still cancel (go back)")
 	}
 }
 // Key message helpers for testing
 type keyType = int
--- a/cmd/tui/tui.go
+++ b/cmd/tui/tui.go
@@ -47,7 +47,7 @@ type menuItem struct {
 var mainMenuItems = []menuItem{
 	{
-		title:       "Run a model",
+		title:       "Chat with a model",
 		description: "Start an interactive chat with a model",
 		isRunModel:  true,
 	},
--- a/cmd/tui/tui_test.go
+++ b/cmd/tui/tui_test.go
@@ -56,7 +56,7 @@ func launcherTestState() *launch.LauncherState {
 func TestMenuRendersPinnedItemsAndMore(t *testing.T) {
 	view := newModel(launcherTestState()).View()
-	for _, want := range []string{"Run a model", "Launch Claude Code", "Launch Codex", "Launch OpenClaw", "More..."} {
+	for _, want := range []string{"Chat with a model", "Launch Claude Code", "Launch Codex", "Launch OpenClaw", "More..."} {
 		if !strings.Contains(view, want) {
 			t.Fatalf("expected menu view to contain %q\n%s", want, view)
 		}
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -290,6 +290,8 @@ func LoadModelMetadata(fsys fs.FS) (ModelKV, *Tokenizer, error) {
 		conv = &gemma3Model{Architecture: p.Architectures[0]}
 	case "Gemma3nForConditionalGeneration":
 		conv = &gemma3nModel{}
 	case "Gemma4ForCausalLM", "Gemma4ForConditionalGeneration":
 		conv = &gemma4Model{Architecture: p.Architectures[0]}
 	case "Phi3ForCausalLM":
 		conv = &phi3Model{}
 	case "Qwen2ForCausalLM":
--- a/convert/convert_gemma4.go
+++ b/convert/convert_gemma4.go
@@ -0,0 +1,556 @@
 package convert
 import (
 	"bytes"
 	"encoding/binary"
 	"fmt"
 	"math"
 	"slices"
 	"strings"
 	"github.com/ollama/ollama/fs/ggml"
 )
 type gemma4Model struct {
 	gemmaModel
 	Architecture string
 	TextModel    struct {
 		HiddenSize              uint32   `json:"hidden_size"`
 		NumHiddenLayers         uint32   `json:"num_hidden_layers"`
 		IntermediateSize        uint32   `json:"intermediate_size"`
 		NumAttentionHeads       uint32   `json:"num_attention_heads"`
 		NumKeyValueHeads        uint32   `json:"num_key_value_heads"`
 		HeadDim                 uint32   `json:"head_dim"`
 		GlobalHeadDim           uint32   `json:"global_head_dim"`
 		VocabSize               uint32   `json:"vocab_size"`
 		RMSNormEps              float32  `json:"rms_norm_eps"`
 		MaxPositionEmbeddings   uint32   `json:"max_position_embeddings"`
 		SlidingWindow           uint32   `json:"sliding_window"`
 		SlidingWindowPattern    *int32   `json:"_sliding_window_pattern"`
 		LayerTypes              []string `json:"layer_types"`
 		FinalLogitSoftcapping   float32  `json:"final_logit_softcapping"`
 		EnableMoeBlock          bool     `json:"enable_moe_block"`
 		NumExperts              *uint32  `json:"num_experts"`
 		TopKExperts             *uint32  `json:"top_k_experts"`
 		ExpertIntermediateSize  *uint32  `json:"moe_intermediate_size"`
 		HiddenSizePerLayerInput *uint32  `json:"hidden_size_per_layer_input"`
 		NumKVSharedLayers       uint32   `json:"num_kv_shared_layers"`
 		AttentionKEqV           bool     `json:"attention_k_eq_v"`
 		NumGlobalKeyValueHeads  *uint32  `json:"num_global_key_value_heads"`
 		QueryPreAttnScalar      *uint32  `json:"query_pre_attn_scalar"`
 		UseDoubleWideMLP        bool     `json:"use_double_wide_mlp"`
 		RopeParameters          map[string]*struct {
 			RopeTheta           float32  `json:"rope_theta"`
 			PartialRotaryFactor *float32 `json:"partial_rotary_factor"`
 		} `json:"rope_parameters"`
 	} `json:"text_config"`
 	VisionModel struct {
 		HiddenSize        uint32  `json:"hidden_size"`
 		NumHiddenLayers   uint32  `json:"num_hidden_layers"`
 		NumAttentionHeads uint32  `json:"num_attention_heads"`
 		IntermediateSize  uint32  `json:"intermediate_size"`
 		PatchSize         uint32  `json:"patch_size"`
 		NumChannels       uint32  `json:"num_channels"`
 		PoolingKernelSize uint32  `json:"pooling_kernel_size"`
 		LayerNormEps      float32 `json:"layer_norm_eps"`
 	} `json:"vision_config"`
 	AudioModel *struct {
 		HiddenSize        uint32  `json:"hidden_size"`
 		OutputProjDims    uint32  `json:"output_proj_dims"`
 		NumHiddenLayers   uint32  `json:"num_hidden_layers"`
 		NumAttentionHeads uint32  `json:"num_attention_heads"`
 		ConvKernelSize    uint32  `json:"conv_kernel_size"`
 		RMSNormEps        float32 `json:"rms_norm_eps"`
 	} `json:"audio_config"`
 }
 func (p *gemma4Model) KV(t *Tokenizer) KV {
 	kv := p.ModelParameters.KV(t)
 	kv["general.architecture"] = "gemma4"
 	kv["tokenizer.ggml.model"] = "llama"
 	kv["tokenizer.ggml.pre"] = "gemma4"
 	tc := p.TextModel
 	kv["gemma4.block_count"] = tc.NumHiddenLayers
 	kv["gemma4.embedding_length"] = tc.HiddenSize
 	// Per-layer FFN width: when use_double_wide_mlp is set, KV-shared layers get 2x FFN width.
 	if tc.UseDoubleWideMLP && tc.NumKVSharedLayers > 0 {
 		firstShared := int(tc.NumHiddenLayers) - int(tc.NumKVSharedLayers)
 		ffnWidths := make([]int32, tc.NumHiddenLayers)
 		for i := range ffnWidths {
 			if i >= firstShared {
 				ffnWidths[i] = int32(tc.IntermediateSize * 2)
 			} else {
 				ffnWidths[i] = int32(tc.IntermediateSize)
 			}
 		}
 		kv["gemma4.feed_forward_length"] = ffnWidths
 	} else {
 		kv["gemma4.feed_forward_length"] = tc.IntermediateSize
 	}
 	kv["gemma4.context_length"] = tc.MaxPositionEmbeddings
 	kv["gemma4.attention.head_count"] = tc.NumAttentionHeads
 	// Per-layer KV head count array: SWA layers use NumKeyValueHeads, global layers use NumGlobalKeyValueHeads
 	if tc.NumGlobalKeyValueHeads != nil && *tc.NumGlobalKeyValueHeads != tc.NumKeyValueHeads && len(tc.LayerTypes) > 0 {
 		kvHeads := make([]int32, len(tc.LayerTypes))
 		for i, lt := range tc.LayerTypes {
 			if lt == "sliding_attention" {
 				kvHeads[i] = int32(tc.NumKeyValueHeads)
 			} else {
 				kvHeads[i] = int32(*tc.NumGlobalKeyValueHeads)
 			}
 		}
 		kv["gemma4.attention.head_count_kv"] = kvHeads
 	} else {
 		kv["gemma4.attention.head_count_kv"] = tc.NumKeyValueHeads
 	}
 	// key_length = global head dim, key_length_swa = local (SWA) head dim
 	kv["gemma4.attention.key_length"] = tc.GlobalHeadDim
 	kv["gemma4.attention.value_length"] = tc.GlobalHeadDim
 	kv["gemma4.attention.key_length_swa"] = tc.HeadDim
 	kv["gemma4.attention.value_length_swa"] = tc.HeadDim
 	kv["gemma4.attention.layer_norm_rms_epsilon"] = tc.RMSNormEps
 	kv["gemma4.attention.sliding_window"] = tc.SlidingWindow
 	// Sliding window pattern from layer_types
 	if len(tc.LayerTypes) > 0 {
 		kv["gemma4.attention.sliding_window_pattern"] = slices.Collect(func(yield func(bool) bool) {
 			for _, lt := range tc.LayerTypes {
 				if !yield(lt == "sliding_attention") {
 					break
 				}
 			}
 		})
 	}
 	kv["gemma4.attention.shared_kv_layers"] = tc.NumKVSharedLayers
 	// RoPE: dimension_count is the full global head dim (freq_factors handle partial rotation)
 	if rp, ok := tc.RopeParameters["full_attention"]; ok && rp != nil {
 		kv["gemma4.rope.freq_base"] = rp.RopeTheta
 		kv["gemma4.rope.dimension_count"] = tc.GlobalHeadDim
 	}
 	if rp, ok := tc.RopeParameters["sliding_attention"]; ok && rp != nil {
 		kv["gemma4.rope.freq_base_swa"] = rp.RopeTheta
 		kv["gemma4.rope.dimension_count_swa"] = tc.HeadDim
 	}
 	if tc.FinalLogitSoftcapping > 0 {
 		kv["gemma4.final_logit_softcapping"] = tc.FinalLogitSoftcapping
 	}
 	// MoE
 	if tc.EnableMoeBlock && tc.NumExperts != nil {
 		kv["gemma4.expert_count"] = *tc.NumExperts
 		if tc.TopKExperts != nil {
 			kv["gemma4.expert_used_count"] = *tc.TopKExperts
 		}
 		if tc.ExpertIntermediateSize != nil {
 			kv["gemma4.expert_feed_forward_length"] = *tc.ExpertIntermediateSize
 		}
 	}
 	// PLE — always emit, even when 0
 	pleSize := uint32(0)
 	if tc.HiddenSizePerLayerInput != nil {
 		pleSize = *tc.HiddenSizePerLayerInput
 	}
 	kv["gemma4.embedding_length_per_layer_input"] = pleSize
 	// Vision model KV metadata
 	vc := p.VisionModel
 	if vc.NumHiddenLayers > 0 {
 		kv["gemma4.vision.block_count"] = vc.NumHiddenLayers
 		kv["gemma4.vision.embedding_length"] = vc.HiddenSize
 		kv["gemma4.vision.attention.head_count"] = vc.NumAttentionHeads
 		kv["gemma4.vision.feed_forward_length"] = vc.IntermediateSize
 		kv["gemma4.vision.patch_size"] = vc.PatchSize
 		numCh := vc.NumChannels
 		if numCh == 0 {
 			numCh = 3
 		}
 		kv["gemma4.vision.num_channels"] = numCh
 		nMerge := vc.PoolingKernelSize
 		if nMerge == 0 {
 			nMerge = 3
 		}
 		kv["gemma4.vision.projector.scale_factor"] = nMerge
 		eps := vc.LayerNormEps
 		if eps == 0 {
 			eps = 1e-6
 		}
 		kv["gemma4.vision.attention.layer_norm_epsilon"] = eps
 	}
 	// Audio model KV metadata
 	if p.AudioModel != nil && p.AudioModel.NumHiddenLayers > 0 {
 		ac := p.AudioModel
 		kv["gemma4.audio.block_count"] = ac.NumHiddenLayers
 		kv["gemma4.audio.embedding_length"] = ac.HiddenSize
 		kv["gemma4.audio.feed_forward_length"] = ac.HiddenSize * 4
 		kv["gemma4.audio.attention.head_count"] = ac.NumAttentionHeads
 		eps := ac.RMSNormEps
 		if eps == 0 {
 			eps = 1e-6
 		}
 		kv["gemma4.audio.attention.layer_norm_epsilon"] = eps
 		if ac.ConvKernelSize > 0 {
 			kv["gemma4.audio.conv_kernel_size"] = ac.ConvKernelSize
 		}
 	}
 	return kv
 }
 func (p *gemma4Model) Tensors(ts []Tensor) []*ggml.Tensor {
 	// First pass: collect vision clamp scalar values into a packed tensor.
 	// Layout: per vision layer (0..N-1), 7 linears (q,k,v,out,gate,up,down) × 4 values (inMin,inMax,outMin,outMax).
 	// Then 4 values for the projector (mm.input_projection).
 	clampSuffixes := []string{".input_min", ".input_max", ".output_min", ".output_max"}
 	clampMap := make(map[string]float32)
 	for _, t := range ts {
 		name := t.Name()
 		for _, sfx := range clampSuffixes {
 			if strings.HasSuffix(name, sfx) && (strings.Contains(name, "vision_tower") || strings.Contains(name, "embed_vision")) {
 				var buf bytes.Buffer
 				t.WriteTo(&buf)
 				data := buf.Bytes()
 				if len(data) >= 4 {
 					clampMap[name] = math.Float32frombits(uint32(data[0]) | uint32(data[1])<<8 | uint32(data[2])<<16 | uint32(data[3])<<24)
 				}
 			}
 		}
 	}
 	var out []*ggml.Tensor
 	for _, t := range ts {
 		name := t.Name()
 		// Skip embedding_post_projection_norm — used as weightless RMS norm in inference
 		if strings.Contains(name, "embedding_post_projection_norm") {
 			continue
 		}
 		// Vision tensor renaming: match published mmproj GGUF names
 		if strings.HasPrefix(name, "v.blk.") {
 			name = strings.Replace(name, ".attn_norm.", ".ln1.", 1)
 			name = strings.Replace(name, ".ffn_norm.", ".ln2.", 1)
 			name = strings.Replace(name, ".attn_output.", ".attn_out.", 1)
 			name = strings.Replace(name, ".post_attention_norm.", ".attn_post_norm.", 1)
 			name = strings.Replace(name, ".post_ffw_norm.", ".ffn_post_norm.", 1)
 			name = strings.Replace(name, ".layer_output_scale.", ".out_scale.", 1)
 		}
 		// per_dim_scale: apply softplus to weight data and add .weight suffix.
 		if strings.HasPrefix(name, "a.blk.") && strings.HasSuffix(name, "per_dim_scale") {
 			name = name + ".weight"
 			t.SetRepacker(softplusRepacker)
 		}
 		// Depthwise conv1d: squeeze middle dimension [C, 1, K] → [C, K].
 		if strings.HasPrefix(name, "a.blk.") && strings.Contains(name, "conv_dw") && strings.HasSuffix(name, ".weight") {
 			t.SetRepacker(squeezeMiddleDim)
 		}
 		shape := t.Shape()
 		// Convert scalar tensors (input_min/max, output_min/max) to 1D
 		if len(shape) == 0 {
 			shape = []uint64{1}
 		}
 		// Depthwise conv1d shape: safetensors [C, 1, K] → GGUF ne[K, C].
 		// Shape array here maps to GGUF ne[] directly, but safetensors reader
 		// stores shape in PyTorch order [C, 1, K] which the GGUF writer inverts.
 		// Published GGUF has ne[0]=K, ne[1]=C → shape array must be [K, C].
 		if strings.HasPrefix(name, "a.blk.") && strings.Contains(name, "conv_dw") && strings.HasSuffix(name, ".weight") && len(shape) == 3 {
 			shape = []uint64{shape[0], shape[2]}
 		}
 		// MoE expert weights: no transpose needed. Safetensors stores [experts, out, in]
 		// which the framework reverses to GGUF ne=[in, out, experts], matching ggml_mul_mat_id.
 		// (transposeExperts was incorrectly swapping dims — removed)
 		// Audio conv weights are forced to F32 via tensorBase.Kind() in reader.go
 		// (im2col doesn't support BF16). No kindOverride needed — the Kind() method
 		// controls both the GGUF header type AND the WriteTo data encoding path.
 		var kindOverride *uint32
 		// Vision patch embedding: reshape from [n_embd, ksize_sq_c] to [n_embd, 3, patch_size, patch_size]
 		// Must be stored as F16 (not BF16) because the Conv2D im2col kernel requires F16/F32.
 		if strings.Contains(name, "v.patch_embd.weight") && len(shape) == 2 {
 			nEmbd := shape[0]
 			patchSize := uint64(p.VisionModel.PatchSize)
 			if patchSize == 0 {
 				patchSize = 16
 			}
 			numCh := uint64(p.VisionModel.NumChannels)
 			if numCh == 0 {
 				numCh = 3
 			}
 			t.SetRepacker(p.reshapePatchEmbed)
 			shape = []uint64{nEmbd, numCh, patchSize, patchSize}
 			f16Kind := uint32(1) // tensorKindFP16
 			kindOverride = &f16Kind
 		}
 		// Vision position embedding: keep 3D [2, maxPos, nEmbd] — matching published mmproj format.
 		// The framework reverses shape to GGUF ne=[nEmbd, maxPos, 2]. No data repacking needed.
 		kind := t.Kind()
 		if kindOverride != nil {
 			kind = *kindOverride
 		}
 		out = append(out, &ggml.Tensor{
 			Name:     name,
 			Kind:     kind,
 			Shape:    shape,
 			WriterTo: t,
 		})
 	}
 	// Generate a single global rope_freqs.weight for proportional RoPE on global attention layers.
 	// This matches the published GGUF format: one global tensor shared by all layers.
 	// Global layers use partial_rotary_factor (0.25) — only rotate that fraction of dims.
 	// Dimensions beyond the rotated portion get freq_factor=1e30 (effectively no rotation).
 	tc := p.TextModel
 	if tc.GlobalHeadDim > 0 {
 		globalFreqsSize := tc.GlobalHeadDim / 2 // freq_factors are per dimension pair
 		// Compute number of rotated pairs for global layers
 		partialRotaryFactor := float32(0.25) // default
 		if rp, ok := tc.RopeParameters["full_attention"]; ok && rp != nil && rp.PartialRotaryFactor != nil {
 			partialRotaryFactor = *rp.PartialRotaryFactor
 		}
 		nRotFull := int(float32(tc.GlobalHeadDim) * partialRotaryFactor / 2)
 		freqs := make(ropeFactor, globalFreqsSize)
 		for j := range freqs {
 			if j < nRotFull {
 				freqs[j] = 1.0
 			} else {
 				freqs[j] = 1e30 // effectively disable rotation
 			}
 		}
 		out = append(out, &ggml.Tensor{
 			Name:     "rope_freqs.weight",
 			Kind:     0, // F32
 			Shape:    []uint64{uint64(len(freqs))},
 			WriterTo: freqs,
 		})
 	}
 	// Emit packed vision clamp data as a single F32 tensor.
 	// Layout: numLayers × 7 linears (q,k,v,out,gate,up,down) × 4 floats (inMin,inMax,outMin,outMax)
 	// then 4 floats for the projector. Total = (numLayers*7 + 1) * 4 floats.
 	if len(clampMap) > 0 {
 		numLayers := int(p.VisionModel.NumHiddenLayers)
 		linearNames := []string{"attn_q", "attn_k", "attn_v", "attn_out", "ffn_gate", "ffn_up", "ffn_down"}
 		suffixes := []string{".input_min", ".input_max", ".output_min", ".output_max"}
 		totalFloats := (numLayers*len(linearNames) + 1) * 4 // +1 for projector
 		clampData := make([]float32, totalFloats)
 		for layer := range numLayers {
 			for li, ln := range linearNames {
 				for si, sfx := range suffixes {
 					sfxMap := map[string]string{"attn_q": "q_proj", "attn_k": "k_proj", "attn_v": "v_proj", "attn_out": "o_proj", "ffn_gate": "gate_proj", "ffn_up": "up_proj", "ffn_down": "down_proj"}
 					for origName, val := range clampMap {
 						if strings.Contains(origName, fmt.Sprintf("layers.%d.", layer)) && strings.HasSuffix(origName, sfx) && strings.Contains(origName, sfxMap[ln]) {
 							idx := (layer*len(linearNames)+li)*4 + si
 							clampData[idx] = val
 							break
 						}
 					}
 				}
 			}
 		}
 		// Projector clamp values
 		projIdx := numLayers * len(linearNames) * 4
 		for si, sfx := range suffixes {
 			for origName, val := range clampMap {
 				if strings.Contains(origName, "input_projection") && strings.HasSuffix(origName, sfx) {
 					clampData[projIdx+si] = val
 					break
 				}
 			}
 		}
 		var buf bytes.Buffer
 		binary.Write(&buf, binary.LittleEndian, clampData)
 		out = append(out, &ggml.Tensor{
 			Name:     "v.clamp_data",
 			Kind:     0, // F32
 			Shape:    []uint64{uint64(totalFloats)},
 			WriterTo: &buf,
 		})
 	}
 	return out
 }
 // reshapePatchEmbed reshapes the vision patch embedding from HF layout [n_embd, ksize*ksize*channels]
 // to GGUF layout [n_embd, channels, patch_size, patch_size].
 func (*gemma4Model) reshapePatchEmbed(_ string, data []float32, shape []uint64) ([]float32, error) {
 	if len(shape) != 2 {
 		return data, nil
 	}
 	nEmbd := int(shape[0])
 	ksqC := int(shape[1])
 	nChannels := 3
 	patchSize := int(math.Sqrt(float64(ksqC / nChannels)))
 	// HF layout: [n_embd, patch_size * patch_size * channels] (row-major)
 	// Need: [n_embd, channels, patch_size, patch_size]
 	result := make([]float32, len(data))
 	for e := range nEmbd {
 		for c := range nChannels {
 			for h := range patchSize {
 				for w := range patchSize {
 					srcIdx := e*ksqC + h*patchSize*nChannels + w*nChannels + c
 					dstIdx := e*nChannels*patchSize*patchSize + c*patchSize*patchSize + h*patchSize + w
 					result[dstIdx] = data[srcIdx]
 				}
 			}
 		}
 	}
 	shape[0] = uint64(nEmbd)
 	shape[1] = uint64(nChannels * patchSize * patchSize)
 	return result, nil
 }
 // softplusRepacker applies softplus (ln(1 + exp(x))) to tensor data.
 // Used for per_dim_scale tensors which the published GGUF stores pre-activated.
 func softplusRepacker(_ string, data []float32, shape []uint64) ([]float32, error) {
 	result := make([]float32, len(data))
 	for i, x := range data {
 		result[i] = float32(math.Log(1 + math.Exp(float64(x))))
 	}
 	return result, nil
 }
 // squeezeMiddleDim squeezes the middle dimension from [C, 1, K] → [C, K] for depthwise conv1d weights.
 // Data layout stays the same since the middle dim is 1 — just a shape change.
 func squeezeMiddleDim(_ string, data []float32, _ []uint64) ([]float32, error) {
 	return data, nil
 }
 func (p *gemma4Model) Replacements() []string {
 	return []string{
 		// ClippableLinear wraps nn.Linear — strip .linear. from weight path
 		".linear.weight", ".weight",
 		".linear.bias", ".bias",
 		// Audio SSCP (Sub-Sample Convolution Projection)
 		"model.audio_tower.subsample_conv_projection.conv_0.conv", "a.conv1d.0",
 		"model.audio_tower.subsample_conv_projection.conv_0.norm", "a.conv1d.0.norm",
 		"model.audio_tower.subsample_conv_projection.conv_1.conv", "a.conv1d.1",
 		"model.audio_tower.subsample_conv_projection.conv_1.norm", "a.conv1d.1.norm",
 		"model.audio_tower.subsample_conv_projection.input_proj_linear", "a.pre_encode.out",
 		// Audio conformer blocks
 		"model.audio_tower.conformer", "a.blk",
 		// Audio conformer attention
 		"attention.attn.relative_position_embedding.pos_proj", "linear_pos",
 		"attention.attn.per_dim_key_scale", "per_dim_k_scale",
 		"attention.attn.per_dim_scale", "per_dim_scale",
 		"attention.attn.q_proj", "attn_q",
 		"attention.attn.k_proj", "attn_k",
 		"attention.attn.v_proj", "attn_v",
 		"attention.pre_attn_norm", "ln1",
 		"attention.post_norm", "ln2",
 		"attention.post", "attn_out",
 		// Audio conformer feedforward
 		"ffw_layer_start.pre_layer_norm", "ffn_norm",
 		"ffw_layer_start.post_layer_norm", "ffn_post_norm",
 		"ffw_layer_start.ffw_layer_1", "ffn_up",
 		"ffw_layer_start.ffw_layer_2", "ffn_down",
 		"ffw_layer_end.pre_layer_norm", "ffn_norm_1",
 		"ffw_layer_end.post_layer_norm", "ffn_post_norm_1",
 		"ffw_layer_end.ffw_layer_1", "ffn_up_1",
 		"ffw_layer_end.ffw_layer_2", "ffn_down_1",
 		// Audio conformer lightweight conv1d
 		"lconv1d.depthwise_conv1d", "conv_dw",
 		"lconv1d.pre_layer_norm", "conv_norm",
 		"lconv1d.conv_norm", "norm_conv",
 		"lconv1d.linear_start", "conv_pw1",
 		"lconv1d.linear_end", "conv_pw2",
 		// Audio block final norm
 		"norm_out", "layer_pre_norm",
 		// Audio embedder and output projection
 		"model.embed_audio.embedding_projection", "mm.a.input_projection",
 		"model.audio_tower.output_proj", "mm.a.fc",
 		// Vision encoder
 		"model.vision_tower.encoder.layers", "v.blk",
 		"model.vision_tower.patch_embedder.input_proj", "v.patch_embd",
 		"model.vision_tower.patch_embedder.position_embedding_table", "v.position_embd.weight",
 		"model.vision_tower.std_bias", "v.std_bias",
 		"model.vision_tower.std_scale", "v.std_scale",
 		// Vision multimodal projector
 		"model.embed_vision.embedding_projection", "mm.input_projection",
 		// Text model
 		"model.language_model.embed_tokens_per_layer", "per_layer_token_embd",
 		"model.language_model.embed_tokens", "token_embd",
 		"model.language_model.per_layer_model_projection", "per_layer_model_proj",
 		"model.language_model.per_layer_projection_norm", "per_layer_proj_norm",
 		"model.language_model.norm", "output_norm",
 		"model.language_model.layers", "blk",
 		// Shared attention replacements (work for both text and vision tensors)
 		"input_layernorm", "attn_norm",
 		"self_attn.q_proj", "attn_q",
 		"self_attn.q_norm", "attn_q_norm",
 		"self_attn.k_proj", "attn_k",
 		"self_attn.k_norm", "attn_k_norm",
 		"self_attn.v_proj", "attn_v",
 		"self_attn.o_proj", "attn_output",
 		"mlp.gate_proj", "ffn_gate",
 		"mlp.down_proj", "ffn_down",
 		"mlp.up_proj", "ffn_up",
 		// Post norms
 		"post_attention_layernorm", "post_attention_norm",
 		"pre_feedforward_layernorm_2", "pre_ffw_norm_2",
 		"pre_feedforward_layernorm", "ffn_norm",
 		"post_feedforward_layernorm_1", "post_ffw_norm_1",
 		"post_feedforward_layernorm_2", "post_ffw_norm_2",
 		"post_feedforward_layernorm", "post_ffw_norm",
 		// PLE
 		"per_layer_input_gate", "inp_gate",
 		"per_layer_projection", "proj",
 		"post_per_layer_input_norm", "post_norm",
 		// MoE
 		"router.proj", "ffn_gate_inp",
 		"router.scale", "ffn_gate_inp.scale",
 		"router.per_expert_scale.weight", "ffn_down_exps.scale",
 		"router.per_expert_scale", "ffn_down_exps.scale",
 		"experts.gate_up_proj.weight", "ffn_gate_up_exps.weight",
 		"experts.gate_up_proj", "ffn_gate_up_exps.weight",
 		"experts.down_proj.weight", "ffn_down_exps.weight",
 		"experts.down_proj", "ffn_down_exps.weight",
 		"moe.gate_proj", "ffn_gate_exps.weight",
 		"moe.up_proj", "ffn_up_exps.weight",
 		"moe.gate_up_proj.weight", "ffn_gate_up_exps.weight",
 		"moe.gate_up_proj", "ffn_gate_up_exps.weight",
 		"moe.down_proj", "ffn_down_exps.weight",
 		"moe.per_expert_scale.weight", "ffn_down_exps.scale",
 		"moe.per_expert_scale", "ffn_down_exps.scale",
 		// Layer scalar
 		"layer_scalar", "layer_output_scale.weight",
 	}
 }
--- a/convert/convert_gemma4_test.go
+++ b/convert/convert_gemma4_test.go
@@ -0,0 +1,263 @@
 package convert
 import (
 	"strings"
 	"testing"
 )
 func TestGemma4AudioReplacements(t *testing.T) {
 	p := gemma4Model{}
 	r := strings.NewReplacer(p.Replacements()...)
 	tests := []struct {
 		name string
 		in   string
 		want string
 	}{
 		// SSCP convolution blocks
 		{
 			"sscp conv0 weight",
 			"model.audio_tower.subsample_conv_projection.conv_0.conv.weight",
 			"a.conv1d.0.weight",
 		},
 		{
 			"sscp conv0 norm",
 			"model.audio_tower.subsample_conv_projection.conv_0.norm.weight",
 			"a.conv1d.0.norm.weight",
 		},
 		{
 			"sscp conv1 weight",
 			"model.audio_tower.subsample_conv_projection.conv_1.conv.weight",
 			"a.conv1d.1.weight",
 		},
 		{
 			"sscp input proj weight",
 			"model.audio_tower.subsample_conv_projection.input_proj_linear.weight",
 			"a.pre_encode.out.weight",
 		},
 		{
 			"sscp input proj bias",
 			"model.audio_tower.subsample_conv_projection.input_proj_linear.bias",
 			"a.pre_encode.out.bias",
 		},
 		// Conformer attention
 		{
 			"attn q weight",
 			"model.audio_tower.conformer.0.attention.attn.q_proj.linear.weight",
 			"a.blk.0.attn_q.weight",
 		},
 		{
 			"attn k weight",
 			"model.audio_tower.conformer.5.attention.attn.k_proj.linear.weight",
 			"a.blk.5.attn_k.weight",
 		},
 		{
 			"attn v clamp input_min",
 			"model.audio_tower.conformer.0.attention.attn.v_proj.input_min",
 			"a.blk.0.attn_v.input_min",
 		},
 		{
 			"attn out weight (ClippableLinear)",
 			"model.audio_tower.conformer.0.attention.post.linear.weight",
 			"a.blk.0.attn_out.weight",
 		},
 		{
 			"attn out clamp output_max",
 			"model.audio_tower.conformer.0.attention.post.output_max",
 			"a.blk.0.attn_out.output_max",
 		},
 		{
 			"attn pre norm",
 			"model.audio_tower.conformer.0.attention.pre_attn_norm.weight",
 			"a.blk.0.ln1.weight",
 		},
 		{
 			"attn post norm",
 			"model.audio_tower.conformer.0.attention.post_norm.weight",
 			"a.blk.0.ln2.weight",
 		},
 		{
 			"linear pos",
 			"model.audio_tower.conformer.0.attention.attn.relative_position_embedding.pos_proj.weight",
 			"a.blk.0.linear_pos.weight",
 		},
 		{
 			"per dim scale",
 			"model.audio_tower.conformer.0.attention.attn.per_dim_scale",
 			"a.blk.0.per_dim_scale",
 		},
 		{
 			"per dim key scale",
 			"model.audio_tower.conformer.0.attention.attn.per_dim_key_scale",
 			"a.blk.0.per_dim_k_scale",
 		},
 		// Conformer feedforward start
 		{
 			"ffn up weight",
 			"model.audio_tower.conformer.0.ffw_layer_start.ffw_layer_1.linear.weight",
 			"a.blk.0.ffn_up.weight",
 		},
 		{
 			"ffn down weight",
 			"model.audio_tower.conformer.0.ffw_layer_start.ffw_layer_2.linear.weight",
 			"a.blk.0.ffn_down.weight",
 		},
 		{
 			"ffn norm",
 			"model.audio_tower.conformer.0.ffw_layer_start.pre_layer_norm.weight",
 			"a.blk.0.ffn_norm.weight",
 		},
 		{
 			"ffn post norm",
 			"model.audio_tower.conformer.0.ffw_layer_start.post_layer_norm.weight",
 			"a.blk.0.ffn_post_norm.weight",
 		},
 		// Conformer feedforward end
 		{
 			"ffn up 1 weight",
 			"model.audio_tower.conformer.0.ffw_layer_end.ffw_layer_1.linear.weight",
 			"a.blk.0.ffn_up_1.weight",
 		},
 		{
 			"ffn down 1 weight",
 			"model.audio_tower.conformer.0.ffw_layer_end.ffw_layer_2.linear.weight",
 			"a.blk.0.ffn_down_1.weight",
 		},
 		{
 			"ffn norm 1",
 			"model.audio_tower.conformer.0.ffw_layer_end.pre_layer_norm.weight",
 			"a.blk.0.ffn_norm_1.weight",
 		},
 		{
 			"ffn post norm 1",
 			"model.audio_tower.conformer.0.ffw_layer_end.post_layer_norm.weight",
 			"a.blk.0.ffn_post_norm_1.weight",
 		},
 		// Conformer lightweight conv1d
 		{
 			"conv dw weight",
 			"model.audio_tower.conformer.0.lconv1d.depthwise_conv1d.weight",
 			"a.blk.0.conv_dw.weight",
 		},
 		{
 			"conv norm (pre_layer_norm)",
 			"model.audio_tower.conformer.0.lconv1d.pre_layer_norm.weight",
 			"a.blk.0.conv_norm.weight",
 		},
 		{
 			"norm conv (conv_norm)",
 			"model.audio_tower.conformer.0.lconv1d.conv_norm.weight",
 			"a.blk.0.norm_conv.weight",
 		},
 		{
 			"conv pw1 weight",
 			"model.audio_tower.conformer.0.lconv1d.linear_start.linear.weight",
 			"a.blk.0.conv_pw1.weight",
 		},
 		{
 			"conv pw2 weight",
 			"model.audio_tower.conformer.0.lconv1d.linear_end.linear.weight",
 			"a.blk.0.conv_pw2.weight",
 		},
 		// Audio embedder
 		{
 			"audio embedder projection weight",
 			"model.embed_audio.embedding_projection.linear.weight",
 			"mm.a.input_projection.weight",
 		},
 		{
 			"audio embedder projection bias",
 			"model.embed_audio.embedding_projection.linear.bias",
 			"mm.a.input_projection.bias",
 		},
 		// Audio output projection
 		{
 			"audio output proj weight",
 			"model.audio_tower.output_proj.weight",
 			"mm.a.fc.weight",
 		},
 		{
 			"audio output proj bias",
 			"model.audio_tower.output_proj.bias",
 			"mm.a.fc.bias",
 		},
 		// Verify vision tensors still work
 		{
 			"vision q weight",
 			"model.vision_tower.encoder.layers.0.self_attn.q_proj.linear.weight",
 			"v.blk.0.attn_q.weight",
 		},
 		{
 			"vision std bias",
 			"model.vision_tower.std_bias",
 			"v.std_bias",
 		},
 		{
 			"vision std scale",
 			"model.vision_tower.std_scale",
 			"v.std_scale",
 		},
 		{
 			"vision patch embd",
 			"model.vision_tower.patch_embedder.input_proj.weight",
 			"v.patch_embd.weight",
 		},
 		{
 			"vision projector",
 			"model.embed_vision.embedding_projection.linear.weight",
 			"mm.input_projection.weight",
 		},
 		// Verify text tensors still work
 		{
 			"text attn q",
 			"model.language_model.layers.0.self_attn.q_proj.weight",
 			"blk.0.attn_q.weight",
 		},
 		{
 			"text token embd",
 			"model.language_model.embed_tokens.weight",
 			"token_embd.weight",
 		},
 		{
 			"text moe gate up fused",
 			"model.language_model.layers.0.experts.gate_up_proj",
 			"blk.0.ffn_gate_up_exps.weight",
 		},
 		{
 			"text moe down",
 			"model.language_model.layers.0.experts.down_proj",
 			"blk.0.ffn_down_exps.weight",
 		},
 		{
 			"text moe down with weight suffix",
 			"model.language_model.layers.0.experts.down_proj.weight",
 			"blk.0.ffn_down_exps.weight",
 		},
 		{
 			"text moe per expert scale",
 			"model.language_model.layers.0.router.per_expert_scale",
 			"blk.0.ffn_down_exps.scale",
 		},
 		{
 			"text moe per expert scale with weight suffix",
 			"model.language_model.layers.0.router.per_expert_scale.weight",
 			"blk.0.ffn_down_exps.scale",
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			if got := r.Replace(tt.in); got != tt.want {
 				t.Errorf("Replace(%q) = %q, want %q", tt.in, got, tt.want)
 			}
 		})
 	}
 }
--- a/convert/convert_test.go
+++ b/convert/convert_test.go
@@ -205,8 +205,8 @@ func TestConvertInvalidDatatype(t *testing.T) {
 	generateSafetensorTestData(t, tempDir, td)
 	err = ConvertModel(os.DirFS(tempDir), f)
-	if err == nil || err.Error() != "unsupported safetensors model" {
+	if err == nil || !strings.Contains(err.Error(), "unknown data type") {
-		t.Errorf("expected error but didn't get one")
+		t.Errorf("expected 'unknown data type' error but got: %v", err)
 	}
 }
--- a/convert/reader.go
+++ b/convert/reader.go
@@ -42,8 +42,11 @@ func (t tensorBase) Kind() uint32 {
 		strings.HasSuffix(t.name, ".bias") ||
 		strings.HasSuffix(t.name, ".shortconv.conv.weight") ||
 		strings.HasSuffix(t.name, ".ssm_conv1d.weight") || // SSM conv kernel must be F32 for Metal
 		strings.HasPrefix(t.name, "a.conv1d.") ||           // audio SSCP conv weights must be F32 for im2col
 		strings.Contains(t.name, ".conv_dw.") ||             // audio depthwise conv weights must be F32
 		t.name == "token_types.weight" ||
 		t.name == "v.positional_embedding_vlm" ||
 		t.name == "v.position_embd.weight" ||
 		t.name == "v.tile_position_embd.weight" ||
 		t.name == "v.pre_tile_position_embd.weight" ||
 		t.name == "v.post_tile_position_embd.weight" ||
--- a/convert/reader_safetensors.go
+++ b/convert/reader_safetensors.go
@@ -5,7 +5,6 @@ import (
 	"bytes"
 	"encoding/binary"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"io/fs"
@@ -53,9 +52,10 @@ func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]T
 		for _, key := range keys {
 			if value := headers[key]; value.Type != "" {
-				// bitsandbytes quantized models are unsupported
+				// Scalar tensors (e.g. clipped linear min/max) are 0-dim in safetensors.
 				// Promote them to 1-dim so they can be stored in GGUF.
 				if len(value.Shape) == 0 {
-					return nil, errors.New("unsupported safetensors model")
+					value.Shape = []uint64{1}
 				}
 				ggufName := replacer.Replace(key)
 				if _, ok := names[ggufName]; ok {
--- a/docs/cli.mdx
+++ b/docs/cli.mdx
@@ -21,6 +21,7 @@ Configure and launch external applications to use Ollama models. This provides a
 - **OpenCode** - Open-source coding assistant
 - **Claude Code** - Anthropic's agentic coding tool
 - **Codex** - OpenAI's coding assistant
 - **VS Code** - Microsoft's IDE with built-in AI chat
 - **Droid** - Factory's AI coding agent
 #### Examples
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -127,6 +127,7 @@
              },
              {
                "group": "IDEs & Editors",
                "expanded": true,
                "pages": [
                  "/integrations/cline",
                  "/integrations/jetbrains",
--- a/docs/images/local.png
+++ b/docs/images/local.png
--- a/docs/images/vscode-add-ollama.png
+++ b/docs/images/vscode-add-ollama.png
--- a/docs/images/vscode-model-options.png
+++ b/docs/images/vscode-model-options.png
--- a/docs/images/vscode-models.png
+++ b/docs/images/vscode-models.png
--- a/docs/images/vscode-other-models.png
+++ b/docs/images/vscode-other-models.png
--- a/docs/images/vscode-unhide.png
+++ b/docs/images/vscode-unhide.png
--- a/docs/images/vscode.png
+++ b/docs/images/vscode.png
--- a/docs/integrations/claude-code.mdx
+++ b/docs/integrations/claude-code.mdx
@@ -96,6 +96,18 @@ The `/loop` command runs a prompt or slash command on a recurring schedule insid
 /loop 1h Remind me to review the deploy status
 ```
 ## Telegram
 Chat with Claude Code from Telegram by connecting a bot to your session. Install the [Telegram plugin](https://github.com/anthropics/claude-plugins-official), create a bot via [@BotFather](https://t.me/BotFather), then launch with the channel flag:
 ```shell
 ollama launch claude -- --channels plugin:telegram@claude-plugins-official
 ```
 Claude Code will prompt for permission on most actions. To allow the bot to work autonomously, configure [permission rules](https://code.claude.com/docs/en/permissions) or pass `--dangerously-skip-permissions` in isolated environments.
 See the [plugin README](https://github.com/anthropics/claude-plugins-official/tree/main/external_plugins/telegram) for full setup instructions including pairing and access control.
 ## Manual setup
 Claude Code connects to Ollama using the Anthropic-compatible API.
--- a/docs/integrations/pi.mdx
+++ b/docs/integrations/pi.mdx
@@ -2,7 +2,7 @@
 title: Pi
 ---
-Pi is a minimal AI agent toolkit with plugin support.
+Pi is a minimal and extensible coding agent.
 ## Install
@@ -20,13 +20,65 @@ npm install -g @mariozechner/pi-coding-agent
 ollama launch pi
 ```
 This installs Pi, configures Ollama as a provider including web tools, and drops you into an interactive session.
 To configure without launching:
 ```shell
 ollama launch pi --config
 ```
-### Manual setup
+### Run directly with a model
 ```shell
 ollama launch pi --model qwen3.5:cloud
 ```
 Cloud models are also available at [ollama.com](https://ollama.com/search?c=cloud).
 ## Extensions 
 Pi ships with four core tools: `read`, `write`, `edit`, and `bash`. All other capabilities are added through its extension system.
 On-demand capability packages invoked via `/skill:name` commands.  
 Install from npm or git:
 ```bash
 pi install npm:@foo/some-tools
 pi install git:github.com/user/repo@v1
 ```
 See all packages at [pi.dev](https://pi.dev/packages) 
 ### Web search
 Pi can use web search and fetch tools via the `@ollama/pi-web-search` package.
 When launching Pi through Ollama, package install/update is managed automatically.
 To install manually:
 ```bash
 pi install npm:@ollama/pi-web-search
 ```
 ### Autoresearch with `pi-autoresearch`
 [pi-autoresearch](https://github.com/davebcn87/pi-autoresearch) brings autonomous experiment loops to Pi. Inspired by Karpathy's autoresearch, it turns any measurable metric into an optimization target: test speed, bundle size, build time, model training loss, Lighthouse scores.
 ```bash
 pi install https://github.com/davebcn87/pi-autoresearch
 ```
 Tell Pi what to optimize. It runs experiments, benchmarks each one, keeps improvements, reverts regressions, and repeats — all autonomously. A built-in dashboard tracks every run with confidence scoring to distinguish real gains from benchmark noise.
 ```bash
 /autoresearch optimize unit test runtime
 ```
 Each kept experiment is automatically committed. Each failed one is reverted. When you're done, Pi can group improvements into independent branches for clean review and merge.
 ## Manual setup
 Add a configuration block to `~/.pi/agent/models.json`:
--- a/docs/integrations/vscode.mdx
+++ b/docs/integrations/vscode.mdx
@@ -2,33 +2,84 @@
 title: VS Code
 ---
-## Install
+VS Code includes built-in AI chat through GitHub Copilot Chat. Ollama models can be used directly in the Copilot Chat model picker.
 Install [VS Code](https://code.visualstudio.com/download).
-## Usage with Ollama
+![VS Code with Ollama](/images/vscode.png)
-1. Open Copilot side bar found in top right window
+
 ## Prerequisites
 - Ollama v0.18.3+
 - [VS Code 1.113+](https://code.visualstudio.com/download)
 - [GitHub Copilot Chat extension 0.41.0+](https://marketplace.visualstudio.com/items?itemName=GitHub.copilot-chat)
 <Note> VS Code requires you to be logged in to use its model selector, even for custom models. This doesn't require a paid GitHub Copilot account; GitHub Copilot Free will enable model selection for custom models.</Note>
 ## Quick setup
 ```shell
 ollama launch vscode
 ```
 Recommended models will be shown after running the command. See the latest models at [ollama.com](https://ollama.com/search?c=tools).
 Make sure **Local** is selected at the bottom of the Copilot Chat panel to use your Ollama models.
 <div style={{ display: "flex", justifyContent: "center" }}>
  <img
    src="/images/local.png"
    alt="Ollama Local Models"
    width="60%"
    style={{ borderRadius: "4px", marginTop: "10px", marginBottom: "10px" }}
  />
 </div>
 ## Run directly with a model
 ```shell
 ollama launch vscode --model qwen3.5:cloud
 ```
 Cloud models are also available at [ollama.com](https://ollama.com/search?c=cloud).
 ## Manual setup
 To configure Ollama manually without `ollama launch`:
 1. Open the **Copilot Chat** side bar from the top right corner
   <div style={{ display: "flex", justifyContent: "center" }}>
     <img
       src="/images/vscode-sidebar.png"
       alt="VS Code chat Sidebar"
       width="75%"
       style={{ borderRadius: "4px" }}
     />
   </div>
-2. Select the model dropdown > **Manage models**
+2. Click the **settings gear icon** (<Icon icon="gear" />) to bring up the Language Models window
   <div style={{ display: "flex", justifyContent: "center" }}>
     <img
-       src="/images/vscode-models.png"
+       src="/images/vscode-other-models.png"
       alt="VS Code model picker"
       width="75%"
       style={{ borderRadius: "4px" }}
     />
   </div>
-3. Enter **Ollama** under **Provider Dropdown** and select desired models (e.g `qwen3, qwen3-coder:480b-cloud`)
+3. Click **Add Models** and select **Ollama** to load all your Ollama models into VS Code
   <div style={{ display: "flex", justifyContent: "center" }}>
     <img
-       src="/images/vscode-model-options.png"
+       src="/images/vscode-add-ollama.png"
-       alt="VS Code model options dropdown"
+       alt="VS Code model options dropdown to add ollama models"
       width="75%"
       style={{ borderRadius: "4px" }}
     />
   </div>
 4. Click the **Unhide** button in the model picker to show your Ollama models  
   <div style={{ display: "flex", justifyContent: "center" }}>
     <img
       src="/images/vscode-unhide.png"
       alt="VS Code unhide models button"
       width="75%"
       style={{ borderRadius: "4px" }}
     />
   </div>
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -281,6 +281,7 @@ func (kv KV) OllamaEngineRequired() bool {
 		"deepseekocr",
 		"gemma3",
 		"gemma3n",
 		"gemma4",
 		"gptoss", "gpt-oss",
 		"llama4",
 		"mistral3",
@@ -874,7 +875,7 @@ func (f GGML) SupportsFlashAttention() bool {
 		return true
 	}
-	if slices.Contains([]string{"gemma2"}, arch) {
+	if slices.Contains([]string{"gemma2", "grok"}, arch) {
 		return false
 	}
--- a/integration/README.md
+++ b/integration/README.md
@@ -14,4 +14,15 @@ The integration tests have 2 modes of operating.
 > Before running the tests locally without the "test existing" setting, compile ollama from the top of the source tree  `go build .` in addition to GPU support with cmake if applicable on your platform.  The integration tests expect to find an ollama binary at the top of the tree.
-Many tests use a default small model suitable to run on many systems.  You can override this default model by setting `OLLAMA_TEST_DEFAULT_MODEL`
+## Testing a New Model
 When implementing new model architecture, use `OLLAMA_TEST_MODEL` to run the
 integration suite against your model.
 ```bash
 # Build the binary first
 go build .
 # Run integration tests against it
 OLLAMA_TEST_MODEL=mymodel go test -tags integration -v -count 1 -timeout 15m ./integration/
 ```
--- a/integration/api_test.go
+++ b/integration/api_test.go
@@ -48,9 +48,7 @@ func TestAPIGenerate(t *testing.T) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+	pullOrSkip(ctx, t, client, req.Model)
 		t.Fatalf("pull failed %s", err)
 	}
 	tests := []struct {
 		name   string
@@ -151,7 +149,11 @@ func TestAPIGenerate(t *testing.T) {
 		})
 	}
-	// Validate PS while we're at it...
+	// Validate PS while we're at it — skip for local-only models
 	// which may lack metadata fields like family, parameter_size, etc.
 	if testModel != "" {
 		return
 	}
 	resp, err := client.ListRunning(ctx)
 	if err != nil {
 		t.Fatalf("list models API error: %s", err)
@@ -208,9 +210,7 @@ func TestAPIChat(t *testing.T) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+	pullOrSkip(ctx, t, client, req.Model)
 		t.Fatalf("pull failed %s", err)
 	}
 	tests := []struct {
 		name   string
@@ -311,6 +311,9 @@ func TestAPIChat(t *testing.T) {
 }
 func TestAPIListModels(t *testing.T) {
 	if testModel != "" {
 		t.Skip("skipping metadata test with model override")
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
@@ -361,6 +364,9 @@ func verifyModelDetails(t *testing.T, details api.ModelDetails) {
 }
 func TestAPIShowModel(t *testing.T) {
 	if testModel != "" {
 		t.Skip("skipping metadata test with model override")
 	}
 	modelName := "llama3.2"
 	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
 	defer cancel()
@@ -400,6 +406,10 @@ func TestAPIShowModel(t *testing.T) {
 }
 func TestAPIGenerateLogprobs(t *testing.T) {
 	if testModel != "" {
 		// Logprobs requires runner support (e.g. llama.cpp has it, MLX does not).
 		t.Skip("logprobs not supported by all runners")
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
@@ -513,6 +523,10 @@ func TestAPIGenerateLogprobs(t *testing.T) {
 }
 func TestAPIChatLogprobs(t *testing.T) {
 	if testModel != "" {
 		// Logprobs requires runner support (e.g. llama.cpp has it, MLX does not).
 		t.Skip("logprobs not supported by all runners")
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
--- a/integration/audio_test.go
+++ b/integration/audio_test.go
@@ -0,0 +1,259 @@
 //go:build integration
 package integration
 import (
 	"bytes"
 	"context"
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
 	"io"
 	"mime/multipart"
 	"net/http"
 	"strings"
 	"testing"
 	"time"
 	"github.com/ollama/ollama/api"
 )
 var defaultAudioModels = []string{
 	"gemma4-e2b",
 	"gemma4-e4b",
 }
 // decodeTestAudio returns the test audio clip ("Why is the sky blue?", 16kHz mono WAV).
 func decodeTestAudio(t *testing.T) api.ImageData {
 	t.Helper()
 	data, err := base64.StdEncoding.DecodeString(audioEncodingPrompt)
 	if err != nil {
 		t.Fatalf("failed to decode test audio: %v", err)
 	}
 	return data
 }
 // setupAudioModel pulls the model, preloads it, and skips if it doesn't support audio.
 func setupAudioModel(ctx context.Context, t *testing.T, client *api.Client, model string) {
 	t.Helper()
 	requireCapability(ctx, t, client, model, "audio")
 	pullOrSkip(ctx, t, client, model)
 	err := client.Generate(ctx, &api.GenerateRequest{Model: model}, func(response api.GenerateResponse) error { return nil })
 	if err != nil {
 		t.Fatalf("failed to load model %s: %s", model, err)
 	}
 }
 // TestAudioTranscription tests that the model can transcribe audio to text.
 func TestAudioTranscription(t *testing.T) {
 	for _, model := range testModels(defaultAudioModels) {
 		t.Run(model, func(t *testing.T) {
 			ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 			defer cancel()
 			client, _, cleanup := InitServerConnection(ctx, t)
 			defer cleanup()
 			setupAudioModel(ctx, t, client, model)
 			audio := decodeTestAudio(t)
 			noThink := &api.ThinkValue{Value: false}
 			req := api.ChatRequest{
 				Model: model,
 				Think: noThink,
 				Messages: []api.Message{
 					{
 						Role:    "system",
 						Content: "Transcribe the audio exactly as spoken. Output only the transcription.",
 					},
 					{
 						Role:    "user",
 						Content: "Transcribe this audio.",
 						Images:  []api.ImageData{audio},
 					},
 				},
 				Stream: &stream,
 				Options: map[string]any{
 					"temperature": 0,
 					"seed":        123,
 					"num_predict": 50,
 				},
 			}
 			// The audio says "Why is the sky blue?" — expect key words in transcription.
 			DoChat(ctx, t, client, req, []string{"sky", "blue"}, 60*time.Second, 10*time.Second)
 		})
 	}
 }
 // TestAudioResponse tests that the model can respond to a spoken question.
 func TestAudioResponse(t *testing.T) {
 	for _, model := range testModels(defaultAudioModels) {
 		t.Run(model, func(t *testing.T) {
 			ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 			defer cancel()
 			client, _, cleanup := InitServerConnection(ctx, t)
 			defer cleanup()
 			setupAudioModel(ctx, t, client, model)
 			audio := decodeTestAudio(t)
 			noThink := &api.ThinkValue{Value: false}
 			req := api.ChatRequest{
 				Model: model,
 				Think: noThink,
 				Messages: []api.Message{
 					{
 						Role:    "user",
 						Content: "",
 						Images:  []api.ImageData{audio},
 					},
 				},
 				Stream: &stream,
 				Options: map[string]any{
 					"temperature": 0,
 					"seed":        123,
 					"num_predict": 200,
 				},
 			}
 			// The audio asks "Why is the sky blue?" — expect an answer about light/scattering.
 			DoChat(ctx, t, client, req, []string{
 				"scatter", "light", "blue", "atmosphere", "wavelength", "rayleigh",
 			}, 60*time.Second, 10*time.Second)
 		})
 	}
 }
 // TestOpenAIAudioTranscription tests the /v1/audio/transcriptions endpoint.
 func TestOpenAIAudioTranscription(t *testing.T) {
 	for _, model := range testModels(defaultAudioModels) {
 		t.Run(model, func(t *testing.T) {
 			ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 			defer cancel()
 			client, endpoint, cleanup := InitServerConnection(ctx, t)
 			defer cleanup()
 			setupAudioModel(ctx, t, client, model)
 			audioBytes := decodeTestAudio(t)
 			// Build multipart form request.
 			var body bytes.Buffer
 			writer := multipart.NewWriter(&body)
 			writer.WriteField("model", model)
 			part, err := writer.CreateFormFile("file", "prompt.wav")
 			if err != nil {
 				t.Fatal(err)
 			}
 			part.Write(audioBytes)
 			writer.Close()
 			url := fmt.Sprintf("http://%s/v1/audio/transcriptions", endpoint)
 			req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, &body)
 			if err != nil {
 				t.Fatal(err)
 			}
 			req.Header.Set("Content-Type", writer.FormDataContentType())
 			resp, err := http.DefaultClient.Do(req)
 			if err != nil {
 				t.Fatalf("request failed: %v", err)
 			}
 			defer resp.Body.Close()
 			if resp.StatusCode != http.StatusOK {
 				respBody, _ := io.ReadAll(resp.Body)
 				t.Fatalf("expected 200, got %d: %s", resp.StatusCode, string(respBody))
 			}
 			respBody, err := io.ReadAll(resp.Body)
 			if err != nil {
 				t.Fatal(err)
 			}
 			text := strings.ToLower(string(respBody))
 			if !strings.Contains(text, "sky") && !strings.Contains(text, "blue") {
 				t.Errorf("transcription response missing expected words, got: %s", string(respBody))
 			}
 		})
 	}
 }
 // TestOpenAIChatWithAudio tests /v1/chat/completions with input_audio content.
 func TestOpenAIChatWithAudio(t *testing.T) {
 	for _, model := range testModels(defaultAudioModels) {
 		t.Run(model, func(t *testing.T) {
 			ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 			defer cancel()
 			client, endpoint, cleanup := InitServerConnection(ctx, t)
 			defer cleanup()
 			setupAudioModel(ctx, t, client, model)
 			audioB64 := audioEncodingPrompt
 			reqBody := fmt.Sprintf(`{
 				"model": %q,
 				"messages": [{
 					"role": "user",
 					"content": [
 						{"type": "input_audio", "input_audio": {"data": %q, "format": "wav"}}
 					]
 				}],
 				"temperature": 0,
 				"seed": 123,
 				"max_tokens": 200,
 				"think": false
 			}`, model, strings.TrimSpace(audioB64))
 			url := fmt.Sprintf("http://%s/v1/chat/completions", endpoint)
 			req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, strings.NewReader(reqBody))
 			if err != nil {
 				t.Fatal(err)
 			}
 			req.Header.Set("Content-Type", "application/json")
 			resp, err := http.DefaultClient.Do(req)
 			if err != nil {
 				t.Fatalf("request failed: %v", err)
 			}
 			defer resp.Body.Close()
 			if resp.StatusCode != http.StatusOK {
 				respBody, _ := io.ReadAll(resp.Body)
 				t.Fatalf("expected 200, got %d: %s", resp.StatusCode, string(respBody))
 			}
 			respBytes, err := io.ReadAll(resp.Body)
 			if err != nil {
 				t.Fatalf("failed to read response: %v", err)
 			}
 			var result struct {
 				Choices []struct {
 					Message struct {
 						Content   string `json:"content"`
 						Reasoning string `json:"reasoning"`
 					} `json:"message"`
 				} `json:"choices"`
 			}
 			if err := json.Unmarshal(respBytes, &result); err != nil {
 				t.Fatalf("failed to decode response: %v", err)
 			}
 			if len(result.Choices) == 0 {
 				t.Fatal("no choices in response")
 			}
 			text := strings.ToLower(result.Choices[0].Message.Content + " " + result.Choices[0].Message.Reasoning)
 			found := false
 			for _, word := range []string{"sky", "blue", "scatter", "light", "atmosphere"} {
 				if strings.Contains(text, word) {
 					found = true
 					break
 				}
 			}
 			if !found {
 				t.Errorf("response missing expected words about sky/blue/light, got: %s", result.Choices[0].Message.Content)
 			}
 		})
 	}
 }
--- a/integration/audio_test_data_test.go
+++ b/integration/audio_test_data_test.go
--- a/integration/basic_test.go
+++ b/integration/basic_test.go
@@ -35,6 +35,9 @@ func TestBlueSky(t *testing.T) {
 }
 func TestUnicode(t *testing.T) {
 	if testModel != "" {
 		t.Skip("uses hardcoded model, not applicable with model override")
 	}
 	skipUnderMinVRAM(t, 6)
 	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
 	defer cancel()
@@ -59,9 +62,7 @@ func TestUnicode(t *testing.T) {
 	}
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+	pullOrSkip(ctx, t, client, req.Model)
 		t.Fatal(err)
 	}
 	slog.Info("loading", "model", req.Model)
 	err := client.Generate(ctx, &api.GenerateRequest{Model: req.Model}, func(response api.GenerateResponse) error { return nil })
 	if err != nil {
@@ -81,6 +82,9 @@ func TestUnicode(t *testing.T) {
 }
 func TestExtendedUnicodeOutput(t *testing.T) {
 	if testModel != "" {
 		t.Skip("uses hardcoded model, not applicable with model override")
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
 	// Set up the test data
@@ -100,9 +104,7 @@ func TestExtendedUnicodeOutput(t *testing.T) {
 	}
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+	pullOrSkip(ctx, t, client, req.Model)
 		t.Fatal(err)
 	}
 	DoChat(ctx, t, client, req, []string{"😀", "😊", "😁", "😂", "😄", "😃"}, 120*time.Second, 120*time.Second)
 }
@@ -148,15 +150,16 @@ func TestUnicodeModelDir(t *testing.T) {
 // TestNumPredict verifies that when num_predict is set, the model generates
 // exactly that many tokens. It uses logprobs to count the actual tokens output.
 func TestNumPredict(t *testing.T) {
 	if testModel != "" {
 		t.Skip("uses hardcoded model, not applicable with model override")
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, "qwen3:0.6b"); err != nil {
+	pullOrSkip(ctx, t, client, "qwen3:0.6b")
 		t.Fatalf("failed to pull model: %v", err)
 	}
 	req := api.GenerateRequest{
 		Model:    "qwen3:0.6b",
--- a/integration/concurrency_test.go
+++ b/integration/concurrency_test.go
@@ -67,6 +67,9 @@ func TestConcurrentChat(t *testing.T) {
 // Stress the scheduler and attempt to load more models than will fit to cause thrashing
 // This test will always load at least 2 models even on CPU based systems
 func TestMultiModelStress(t *testing.T) {
 	if testModel != "" {
 		t.Skip("uses hardcoded models, not applicable with model override")
 	}
 	s := os.Getenv("OLLAMA_MAX_VRAM")
 	if s == "" {
 		s = "0"
@@ -114,9 +117,7 @@ func TestMultiModelStress(t *testing.T) {
 	// Make sure all the models are pulled before we get started
 	for _, model := range chosenModels {
-		if err := PullIfMissing(ctx, client, model); err != nil {
+		pullOrSkip(ctx, t, client, model)
 			t.Fatal(err)
 		}
 	}
 	// Determine how many models we can load in parallel before we exceed VRAM
--- a/integration/context_test.go
+++ b/integration/context_test.go
@@ -38,9 +38,7 @@ func TestLongInputContext(t *testing.T) {
 	}
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+	pullOrSkip(ctx, t, client, req.Model)
 		t.Fatalf("PullIfMissing failed: %v", err)
 	}
 	DoChat(ctx, t, client, req, []string{"russia", "german", "france", "england", "austria", "prussia", "europe", "individuals", "coalition", "conflict"}, 120*time.Second, 10*time.Second)
 }
@@ -53,6 +51,7 @@ func TestContextExhaustion(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
 	defer cancel()
 	// Set up the test data
 	thinkOff := api.ThinkValue{Value: false}
 	req := api.ChatRequest{
 		Model: smol,
 		Messages: []api.Message{
@@ -61,6 +60,7 @@ func TestContextExhaustion(t *testing.T) {
 				Content: "Write me a story in english with a lot of emojis",
 			},
 		},
 		Think:  &thinkOff,
 		Stream: &stream,
 		Options: map[string]any{
 			"temperature": 0,
@@ -70,14 +70,15 @@ func TestContextExhaustion(t *testing.T) {
 	}
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+	pullOrSkip(ctx, t, client, req.Model)
 		t.Fatalf("PullIfMissing failed: %v", err)
 	}
 	DoChat(ctx, t, client, req, []string{"once", "upon", "lived", "sunny", "cloudy", "clear", "water", "time", "travel", "world"}, 120*time.Second, 10*time.Second)
 }
 // Send multiple generate requests with prior context and ensure the response is coherant and expected
 func TestParallelGenerateWithHistory(t *testing.T) {
 	if testModel != "" {
 		t.Skip("uses hardcoded model, not applicable with model override")
 	}
 	modelName := "gpt-oss:20b"
 	req, resp := GenerateRequests()
 	numParallel := 2
@@ -133,6 +134,12 @@ func TestParallelGenerateWithHistory(t *testing.T) {
 // Send generate requests with prior context and ensure the response is coherant and expected
 func TestGenerateWithHistory(t *testing.T) {
 	if testModel != "" {
 		// The Generate API's Context field (token array continuation) is not
 		// supported by all runners (e.g. MLX). Chat history works; this is
 		// the only generate-specific continuation path.
 		t.Skip("generate context continuation not supported by all runners")
 	}
 	req := api.GenerateRequest{
 		Model:     smol,
 		Prompt:    rainbowPrompt,
@@ -173,6 +180,9 @@ func TestGenerateWithHistory(t *testing.T) {
 // Send multiple chat requests with prior context and ensure the response is coherant and expected
 func TestParallelChatWithHistory(t *testing.T) {
 	if testModel != "" {
 		t.Skip("uses hardcoded model, not applicable with model override")
 	}
 	modelName := "gpt-oss:20b"
 	req, resp := ChatRequests()
 	numParallel := 2
--- a/integration/embed_test.go
+++ b/integration/embed_test.go
@@ -78,8 +78,11 @@ func TestEmbedCosineDistanceCorrelation(t *testing.T) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	for _, model := range libraryEmbedModels {
+	for _, model := range testModels(libraryEmbedModels) {
 		t.Run(model, func(t *testing.T) {
 			if testModel != "" {
 				requireCapability(ctx, t, client, model, "embedding")
 			}
 			testCases := []struct {
 				a string
 				b string
@@ -145,6 +148,9 @@ func TestEmbedCosineDistanceCorrelation(t *testing.T) {
 }
 func TestAllMiniLMEmbeddings(t *testing.T) {
 	if testModel != "" {
 		t.Skip("uses hardcoded model, not applicable with model override")
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
@@ -175,6 +181,9 @@ func TestAllMiniLMEmbeddings(t *testing.T) {
 }
 func TestAllMiniLMEmbed(t *testing.T) {
 	if testModel != "" {
 		t.Skip("uses hardcoded model, not applicable with model override")
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
@@ -212,6 +221,9 @@ func TestAllMiniLMEmbed(t *testing.T) {
 }
 func TestAllMiniLMBatchEmbed(t *testing.T) {
 	if testModel != "" {
 		t.Skip("uses hardcoded model, not applicable with model override")
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
@@ -259,6 +271,9 @@ func TestAllMiniLMBatchEmbed(t *testing.T) {
 }
 func TestAllMiniLMEmbedTruncate(t *testing.T) {
 	if testModel != "" {
 		t.Skip("uses hardcoded model, not applicable with model override")
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
@@ -397,21 +412,13 @@ func TestAllMiniLMEmbedTruncate(t *testing.T) {
 func embeddingTestHelper(ctx context.Context, client *api.Client, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) {
 	t.Helper()
-
+	pullOrSkip(ctx, t, client, req.Model)
 	if err := PullIfMissing(ctx, client, req.Model); err != nil {
 		t.Fatal(err)
 	}
 	return client.Embeddings(ctx, &req)
 }
 func embedTestHelper(ctx context.Context, client *api.Client, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) {
 	t.Helper()
-
+	pullOrSkip(ctx, t, client, req.Model)
 	if err := PullIfMissing(ctx, client, req.Model); err != nil {
 		t.Fatal(err)
 	}
 	return client.Embed(ctx, &req)
 }
@@ -426,9 +433,12 @@ func TestEmbedTruncation(t *testing.T) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	for _, model := range libraryEmbedModels {
+	for _, model := range testModels(libraryEmbedModels) {
 		model := model
 		t.Run(model, func(t *testing.T) {
 			if testModel != "" {
 				requireCapability(ctx, t, client, model, "embedding")
 			}
 			// Check if we're running out of time (reserve 20s for current model)
 			if deadline, ok := t.Deadline(); ok && time.Until(deadline) < 20*time.Second {
 				t.Skip("skipping remaining tests to avoid timeout")
@@ -494,9 +504,12 @@ func TestEmbedLargeInput(t *testing.T) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	for _, model := range libraryEmbedModels {
+	for _, model := range testModels(libraryEmbedModels) {
 		model := model
 		t.Run(model, func(t *testing.T) {
 			if testModel != "" {
 				requireCapability(ctx, t, client, model, "embedding")
 			}
 			mctx, mcancel := context.WithTimeout(ctx, 2*time.Minute)
 			defer mcancel()
@@ -559,9 +572,12 @@ func TestEmbedStatusCode(t *testing.T) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	for _, model := range libraryEmbedModels {
+	for _, model := range testModels(libraryEmbedModels) {
 		model := model
 		t.Run(model, func(t *testing.T) {
 			if testModel != "" {
 				requireCapability(ctx, t, client, model, "embedding")
 			}
 			// Check if we're running out of time (reserve 20s for current model)
 			if deadline, ok := t.Deadline(); ok && time.Until(deadline) < 20*time.Second {
 				t.Skip("skipping remaining tests to avoid timeout")
@@ -571,9 +587,7 @@ func TestEmbedStatusCode(t *testing.T) {
 			defer mcancel()
 			// Pull the model if needed
-			if err := PullIfMissing(mctx, client, model); err != nil {
+			pullOrSkip(mctx, t, client, model)
 				t.Fatal(err)
 			}
 			t.Run("truncation error status code", func(t *testing.T) {
 				truncFalse := false
--- a/integration/imagegen_test.go
+++ b/integration/imagegen_test.go
@@ -14,6 +14,9 @@ import (
 )
 func TestImageGeneration(t *testing.T) {
 	if testModel != "" {
 		t.Skip("uses hardcoded models, not applicable with model override")
 	}
 	skipUnderMinVRAM(t, 8)
 	type testCase struct {
@@ -41,12 +44,8 @@ func TestImageGeneration(t *testing.T) {
 			defer cleanup()
 			// Pull both models
-			if err := PullIfMissing(ctx, client, tc.imageGenModel); err != nil {
+			pullOrSkip(ctx, t, client, tc.imageGenModel)
-				t.Fatalf("failed to pull image gen model: %v", err)
+			pullOrSkip(ctx, t, client, tc.visionModel)
 			}
 			if err := PullIfMissing(ctx, client, tc.visionModel); err != nil {
 				t.Fatalf("failed to pull vision model: %v", err)
 			}
 			// Generate the image
 			t.Logf("Generating image with prompt: %s", tc.prompt)
--- a/integration/library_models_test.go
+++ b/integration/library_models_test.go
@@ -24,15 +24,12 @@ func TestLibraryModelsChat(t *testing.T) {
 	defer cleanup()
 	targetArch := os.Getenv("OLLAMA_TEST_ARCHITECTURE")
-	chatModels := libraryChatModels
+	for _, model := range testModels(libraryChatModels) {
 	for _, model := range chatModels {
 		t.Run(model, func(t *testing.T) {
 			if time.Now().Sub(started) > softTimeout {
 				t.Skip("skipping remaining tests to avoid excessive runtime")
 			}
-			if err := PullIfMissing(ctx, client, model); err != nil {
+			pullOrSkip(ctx, t, client, model)
 				t.Fatalf("pull failed %s", err)
 			}
 			if targetArch != "" {
 				resp, err := client.Show(ctx, &api.ShowRequest{Name: model})
 				if err != nil {
--- a/integration/llm_image_test.go
+++ b/integration/llm_image_test.go
@@ -13,39 +13,35 @@ import (
 func TestVisionModels(t *testing.T) {
 	skipUnderMinVRAM(t, 6)
-	type testCase struct {
+
-		model string
+	defaultVisionModels := []string{
-	}
+		"gemma4",
-	testCases := []testCase{
+		"qwen2.5vl",
-		{
+		"llama3.2-vision",
-			model: "qwen2.5vl",
+		"gemma3",
-		},
+		"qwen3-vl:8b",
-		{
+		"qwen3-vl:30b",
-			model: "llama3.2-vision",
+		"ministral-3",
 		},
 		{
 			model: "gemma3",
 		},
 		{
 			model: "qwen3-vl:8b",
 		},
 		{
 			// Qwen 3 VL mixture of experts
 			model: "qwen3-vl:30b",
 		},
 		{
 			model: "ministral-3",
 		},
 	}
-	for _, v := range testCases {
+	skipIfNoVisionOverride(t)
-		t.Run(v.model, func(t *testing.T) {
+
 	for _, model := range testModels(defaultVisionModels) {
 		t.Run(model, func(t *testing.T) {
 			ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
 			defer cancel()
 			client, _, cleanup := InitServerConnection(ctx, t)
 			defer cleanup()
 			requireCapability(ctx, t, client, model, "vision")
 			pullOrSkip(ctx, t, client, model)
 			image, err := base64.StdEncoding.DecodeString(imageEncoding)
 			if err != nil {
 				t.Fatal(err)
 			}
 			req := api.ChatRequest{
-				Model: v.model,
+				Model: model,
 				Messages: []api.Message{
 					{
 						Role:    "user",
@@ -61,16 +57,7 @@ func TestVisionModels(t *testing.T) {
 					"temperature": 0.0,
 				},
 			}
 			ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
 			defer cancel()
 			client, _, cleanup := InitServerConnection(ctx, t)
 			// Note: sometimes it returns "the ollamas" sometimes "the ollams"
 			resp := "the ollam"
 			defer cleanup()
 			if err := PullIfMissing(ctx, client, req.Model); err != nil {
 				t.Fatal(err)
 			}
 			// Preload to skip if we're less than 80% on GPU to avoid extremely slow tests
 			err = client.Generate(ctx, &api.GenerateRequest{Model: req.Model}, func(response api.GenerateResponse) error { return nil })
 			if err != nil {
@@ -78,13 +65,17 @@ func TestVisionModels(t *testing.T) {
 			}
 			skipIfNotGPULoaded(ctx, t, client, req.Model, 80)
 			// Note: sometimes it returns "the ollamas" sometimes "the ollams"
 			// llava models on CPU can be quite slow to start
-			DoChat(ctx, t, client, req, []string{resp}, 240*time.Second, 30*time.Second)
+			DoChat(ctx, t, client, req, []string{"the ollam"}, 240*time.Second, 30*time.Second)
 		})
 	}
 }
 func TestIntegrationSplitBatch(t *testing.T) {
 	if testModel != "" {
 		t.Skip("uses hardcoded model, not applicable with model override")
 	}
 	skipUnderMinVRAM(t, 6)
 	image, err := base64.StdEncoding.DecodeString(imageEncoding)
 	if err != nil {
@@ -111,9 +102,7 @@ func TestIntegrationSplitBatch(t *testing.T) {
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+	pullOrSkip(ctx, t, client, req.Model)
 		t.Fatal(err)
 	}
 	// llava models on CPU can be quite slow to start,
 	DoGenerate(ctx, t, client, req, []string{resp}, 120*time.Second, 30*time.Second)
 }
--- a/integration/max_queue_test.go
+++ b/integration/max_queue_test.go
@@ -45,9 +45,7 @@ func TestMaxQueue(t *testing.T) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+	pullOrSkip(ctx, t, client, req.Model)
 		t.Fatal(err)
 	}
 	// Context for the worker threads so we can shut them down
 	// embedCtx, embedCancel := context.WithCancel(ctx)
--- a/integration/model_arch_test.go
+++ b/integration/model_arch_test.go
@@ -46,14 +46,12 @@ func TestModelsChat(t *testing.T) {
 		chatModels = append(ollamaEngineChatModels, llamaRunnerChatModels...)
 	}
-	for _, model := range chatModels {
+	for _, model := range testModels(chatModels) {
 		t.Run(model, func(t *testing.T) {
 			if time.Now().Sub(started) > softTimeout {
 				t.Skip("skipping remaining tests to avoid excessive runtime")
 			}
-			if err := PullIfMissing(ctx, client, model); err != nil {
+			pullOrSkip(ctx, t, client, model)
 				t.Fatalf("pull failed %s", err)
 			}
 			if maxVram > 0 {
 				resp, err := client.List(ctx)
 				if err != nil {
@@ -133,14 +131,15 @@ func TestModelsEmbed(t *testing.T) {
 		t.Fatalf("failed to load test data: %s", err)
 	}
 	for model, expected := range testCase {
 		if testModel != "" && model != testModel {
 			continue
 		}
 		t.Run(model, func(t *testing.T) {
 			if time.Now().Sub(started) > softTimeout {
 				t.Skip("skipping remaining tests to avoid excessive runtime")
 			}
-			if err := PullIfMissing(ctx, client, model); err != nil {
+			pullOrSkip(ctx, t, client, model)
 				t.Fatalf("pull failed %s", err)
 			}
 			if maxVram > 0 {
 				resp, err := client.List(ctx)
 				if err != nil {
--- a/integration/model_perf_test.go
+++ b/integration/model_perf_test.go
@@ -87,9 +87,7 @@ func doModelPerfTest(t *testing.T, chatModels []string) {
 			if time.Now().Sub(started) > softTimeout {
 				t.Skip("skipping remaining tests to avoid excessive runtime")
 			}
-			if err := PullIfMissing(ctx, client, model); err != nil {
+			pullOrSkip(ctx, t, client, model)
 				t.Fatalf("pull failed %s", err)
 			}
 			var maxContext int
 			resp, err := client.Show(ctx, &api.ShowRequest{Model: model})
--- a/integration/quantization_test.go
+++ b/integration/quantization_test.go
@@ -33,9 +33,7 @@ func TestQuantization(t *testing.T) {
 	defer cleanup()
 	for _, base := range sourceModels {
-		if err := PullIfMissing(ctx, client, base); err != nil {
+		pullOrSkip(ctx, t, client, base)
 			t.Fatalf("pull failed %s", err)
 		}
 		for _, quant := range quantizations {
 			newName := fmt.Sprintf("%s__%s", base, quant)
 			t.Run(newName, func(t *testing.T) {
--- a/integration/thinking_test.go
+++ b/integration/thinking_test.go
@@ -0,0 +1,155 @@
 //go:build integration
 package integration
 import (
 	"context"
 	"strings"
 	"testing"
 	"time"
 	"github.com/ollama/ollama/api"
 )
 // TestThinkingEnabled verifies that when thinking is requested, the model
 // produces both thinking and content output without leaking raw channel tags.
 func TestThinkingEnabled(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
 	models := testModels([]string{smol})
 	for _, modelName := range models {
 		t.Run(modelName, func(t *testing.T) {
 			requireCapability(ctx, t, client, modelName, "thinking")
 			pullOrSkip(ctx, t, client, modelName)
 			think := api.ThinkValue{Value: true}
 			stream := false
 			req := api.ChatRequest{
 				Model:  modelName,
 				Stream: &stream,
 				Think:  &think,
 				Messages: []api.Message{
 					{Role: "user", Content: "What is 12 * 15? Think step by step."},
 				},
 				Options: map[string]any{
 					"temperature": 0,
 					"seed":        42,
 					"num_predict": 512,
 				},
 			}
 			var response api.ChatResponse
 			err := client.Chat(ctx, &req, func(cr api.ChatResponse) error {
 				response = cr
 				return nil
 			})
 			if err != nil {
 				if strings.Contains(err.Error(), "model requires more system memory") {
 					t.Skip("model too large for test system")
 				}
 				t.Fatalf("chat failed: %v", err)
 			}
 			content := response.Message.Content
 			thinking := response.Message.Thinking
 			// Thinking should be non-empty when thinking is enabled
 			if thinking == "" {
 				t.Error("expected non-empty thinking output when thinking is enabled")
 			}
 			// The answer (180) should appear in thinking, content, or both.
 			// Some models put everything in thinking and leave content empty
 			// if they hit the token limit while still thinking.
 			combined := thinking + " " + content
 			if !strings.Contains(combined, "180") {
 				t.Errorf("expected '180' in thinking or content, got thinking=%q content=%q", thinking, content)
 			}
 			// Neither thinking nor content should contain raw channel tags
 			if strings.Contains(content, "<|channel>") || strings.Contains(content, "<channel|>") {
 				t.Errorf("content contains raw channel tags: %s", content)
 			}
 			if strings.Contains(thinking, "<|channel>") || strings.Contains(thinking, "<channel|>") {
 				t.Errorf("thinking contains raw channel tags: %s", thinking)
 			}
 			t.Logf("thinking (%d chars): %.100s...", len(thinking), thinking)
 			t.Logf("content (%d chars): %s", len(content), content)
 		})
 	}
 }
 // TestThinkingSuppressed verifies that when thinking is NOT requested,
 // the model does not leak thinking/channel content into the response.
 func TestThinkingSuppressed(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
 	models := testModels([]string{smol})
 	for _, modelName := range models {
 		t.Run(modelName, func(t *testing.T) {
 			requireCapability(ctx, t, client, modelName, "thinking")
 			pullOrSkip(ctx, t, client, modelName)
 			stream := false
 			req := api.ChatRequest{
 				Model:  modelName,
 				Stream: &stream,
 				// Think is nil — thinking not requested
 				Messages: []api.Message{
 					{Role: "user", Content: "What is the capital of Japan? Answer in one word."},
 				},
 				Options: map[string]any{
 					"temperature": 0,
 					"seed":        42,
 					"num_predict": 64,
 				},
 			}
 			var response api.ChatResponse
 			err := client.Chat(ctx, &req, func(cr api.ChatResponse) error {
 				response = cr
 				return nil
 			})
 			if err != nil {
 				if strings.Contains(err.Error(), "model requires more system memory") {
 					t.Skip("model too large for test system")
 				}
 				t.Fatalf("chat failed: %v", err)
 			}
 			content := response.Message.Content
 			thinking := response.Message.Thinking
 			// The answer should appear in content or thinking
 			combined := content + " " + thinking
 			if !strings.Contains(combined, "Tokyo") {
 				t.Errorf("expected 'Tokyo' in content or thinking, got content=%q thinking=%q", content, thinking)
 			}
 			// Content must NOT contain channel/thinking tags
 			if strings.Contains(content, "<|channel>") || strings.Contains(content, "<channel|>") {
 				t.Errorf("content contains leaked channel tags when thinking not requested: %s", content)
 			}
 			if strings.Contains(content, "thought") && strings.Contains(content, "<channel|>") {
 				t.Errorf("content contains leaked thinking block: %s", content)
 			}
 			// Thinking field should ideally be empty when not requested.
 			// Some small models may still produce thinking output; log but don't fail.
 			if thinking != "" {
 				t.Logf("WARNING: model produced thinking output when not requested (%d chars): %.100s...", len(thinking), thinking)
 			}
 			t.Logf("content: %s", content)
 		})
 	}
 }
--- a/integration/tools_stress_test.go
+++ b/integration/tools_stress_test.go
@@ -0,0 +1,523 @@
 //go:build integration
 package integration
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"os"
 	"strconv"
 	"strings"
 	"testing"
 	"time"
 	"github.com/ollama/ollama/api"
 )
 // TestAPIToolCallingStress tests tool calling with complex, agent-style prompts
 // that include large system messages, multiple tools, and multi-turn conversations.
 // This catches cache corruption and parser bugs that simple tool tests miss.
 func TestAPIToolCallingStress(t *testing.T) {
 	initialTimeout := 120 * time.Second
 	streamTimeout := 120 * time.Second
 	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
 	minVRAM := map[string]uint64{
 		"qwen3-vl":      16,
 		"gpt-oss:20b":   16,
 		"gpt-oss:120b":  70,
 		"qwen3":         6,
 		"llama3.1":      8,
 		"llama3.2":      4,
 		"mistral":       6,
 		"qwen2.5":       6,
 		"qwen2":         6,
 		"ministral-3":   20,
 		"mistral-nemo":  9,
 		"mistral-small": 16,
 		"mixtral:8x22b": 80,
 		"qwq":           20,
 		"granite3.3":    7,
 	}
 	// Models that don't reliably produce tool calls with complex/multi-tool prompts.
 	// The stress test uses a large system prompt with many tools, simulating coding agents.
 	// Some models are too small, too slow, or not designed for this use case.
 	skipModels := map[string]string{
 		"lfm2.5-thinking": "returns text instead of tool calls with complex system prompts",
 		"qwen3-vl":        "vision model, extremely slow with complex tool prompts",
 		"llama3.2":        "3B model too small for reliable multi-tool agent prompts",
 		"mistral":         "7B v0.3 returns text instead of tool calls with complex prompts",
 		"mixtral:8x22b":   "returns text instead of tool calls with complex prompts",
 		"qwen2":           "returns text instead of tool calls with complex prompts",
 		"granite3.3":      "returns text instead of tool calls with complex prompts",
 	}
 	models := testModels(libraryToolsModels)
 	for _, model := range models {
 		t.Run(model, func(t *testing.T) {
 			// Skip known-bad models unless explicitly requested via env var
 			if reason, ok := skipModels[model]; ok && testModel == "" {
 				t.Skipf("skipping: %s", reason)
 			}
 			if testModel != "" {
 				requireCapability(ctx, t, client, model, "tools")
 			}
 			if v, ok := minVRAM[model]; ok {
 				skipUnderMinVRAM(t, v)
 			}
 			pullOrSkip(ctx, t, client, model)
 			tools := stressTestTools()
 			// Large system prompt that mimics real coding agents (opencode, Claude Code, etc.)
 			// This is intentionally very long (~5000+ tokens) to match the prompt sizes that
 			// real coding agents send. The combination of a large system prompt, many tools,
 			// and thinking mode is what triggers failures in some models.
 			systemPrompt := stressTestSystemPrompt()
 			// Test 1: First request (fresh prompt processing)
 			// Use a direct prompt that tells the model exactly what tool to use,
 			// reducing the chance it asks for clarification instead.
 			t.Run("first_request", func(t *testing.T) {
 				testToolCall(t, ctx, client, model, systemPrompt, tools,
 					"Run git diff main to review the code changes on the current branch.",
 					initialTimeout, streamTimeout)
 			})
 			// Test 2: Repeat with same prompt (tests cache reuse)
 			t.Run("cached_request", func(t *testing.T) {
 				testToolCall(t, ctx, client, model, systemPrompt, tools,
 					"Run git diff main to review the code changes on the current branch.",
 					initialTimeout, streamTimeout)
 			})
 			// Test 3: Different user message (partial cache hit)
 			t.Run("different_user_message", func(t *testing.T) {
 				testToolCall(t, ctx, client, model, systemPrompt, tools,
 					"Read the file at ./go.mod and tell me what dependencies we have.",
 					initialTimeout, streamTimeout)
 			})
 			// Test 4: Multi-turn with tool response
 			t.Run("multi_turn", func(t *testing.T) {
 				testToolCallMultiTurn(t, ctx, client, model, systemPrompt, tools,
 					initialTimeout, streamTimeout)
 			})
 		})
 	}
 }
 func newTool(name, description string, required []string, props map[string]api.ToolProperty) api.Tool {
 	return api.Tool{
 		Type: "function",
 		Function: api.ToolFunction{
 			Name:        name,
 			Description: description,
 			Parameters: api.ToolFunctionParameters{
 				Type:       "object",
 				Required:   required,
 				Properties: testPropsMap(props),
 			},
 		},
 	}
 }
 // stressTestTools returns a set of tools matching the scale and verbosity of
 // real coding agent tool definitions (opencode, Claude Code, etc.). The tool
 // descriptions are intentionally verbose to match real-world prompt sizes.
 func stressTestTools() []api.Tool {
 	return []api.Tool{
 		newTool("bash", "Executes a given bash command in a persistent shell session with optional timeout, ensuring proper handling and security measures. All commands run in the working directory by default. Before executing the command, verify that the parent directory exists. Always quote file paths that contain spaces with double quotes. After ensuring proper quoting, execute the command and capture the output. Avoid using bash with find, grep, cat, head, tail, sed, awk, or echo commands unless explicitly instructed. Instead, always prefer using the dedicated tools for these commands. When issuing multiple commands, if they are independent and can run in parallel, make multiple tool calls in a single message.",
 			[]string{"command"},
 			map[string]api.ToolProperty{
 				"command":     {Type: api.PropertyType{"string"}, Description: "The bash command to execute"},
 				"description": {Type: api.PropertyType{"string"}, Description: "Short description of what this command does in 5-10 words"},
 				"timeout":     {Type: api.PropertyType{"number"}, Description: "Optional timeout in milliseconds. If not specified, commands will time out after 120000ms (2 minutes)"},
 			}),
 		newTool("read", "Read a file or directory from the local filesystem. If the path does not exist, an error is returned. By default, this tool returns up to 2000 lines from the start of the file. The offset parameter is the line number to start from (1-indexed). To read later sections, call this tool again with a larger offset. Use the grep tool to find specific content in large files or files with long lines. If you are unsure of the correct file path, use the glob tool to look up filenames by glob pattern. Contents are returned with each line prefixed by its line number. Any line longer than 2000 characters is truncated. Call this tool in parallel when you know there are multiple files you want to read. Avoid tiny repeated slices (30 line chunks). If you need more context, read a larger window. This tool can read image files and PDFs and return them as file attachments.",
 			[]string{"path"},
 			map[string]api.ToolProperty{
 				"path":   {Type: api.PropertyType{"string"}, Description: "The absolute path to the file to read"},
 				"offset": {Type: api.PropertyType{"number"}, Description: "Line number to start reading from (1-indexed)"},
 				"limit":  {Type: api.PropertyType{"number"}, Description: "Maximum number of lines to read"},
 			}),
 		newTool("glob", "Fast file pattern matching tool that works with any codebase size. Supports glob patterns like '**/*.js' or 'src/**/*.ts'. Returns matching file paths sorted by modification time. Use this tool when you need to find files by name patterns. When you are doing an open-ended search that may require multiple rounds of globbing and grepping, use the task tool instead. You have the capability to call multiple tools in a single response. It is always better to speculatively perform multiple searches as a batch that are potentially useful.",
 			[]string{"pattern"},
 			map[string]api.ToolProperty{
 				"pattern": {Type: api.PropertyType{"string"}, Description: "The glob pattern to match files against"},
 				"path":    {Type: api.PropertyType{"string"}, Description: "The directory to search in"},
 			}),
 		newTool("grep", "Fast content search tool that works with any codebase size. Searches file contents using regular expressions. Supports full regex syntax (eg. 'log.*Error', 'function\\s+\\w+'). Filter files by pattern with the include parameter (eg. '*.js', '*.{ts,tsx}'). Returns file paths and line numbers with at least one match sorted by modification time. Use this tool when you need to find files containing specific patterns. If you need to identify or count the number of matches within files, use the bash tool with rg (ripgrep) directly. When you are doing an open-ended search that may require multiple rounds of globbing and grepping, use the task tool instead.",
 			[]string{"pattern"},
 			map[string]api.ToolProperty{
 				"pattern": {Type: api.PropertyType{"string"}, Description: "The regex pattern to search for in file contents"},
 				"path":    {Type: api.PropertyType{"string"}, Description: "The directory to search in"},
 				"include": {Type: api.PropertyType{"string"}, Description: "File pattern to include (eg. '*.js', '*.{ts,tsx}')"},
 			}),
 		newTool("edit", "Performs exact string replacements in files. You must use your read tool at least once in the conversation before editing. This tool will error if you attempt an edit without reading the file. When editing text from read tool output, ensure you preserve the exact indentation (tabs/spaces) as it appears after the line number prefix. Always prefer editing existing files in the codebase. Never write new files unless explicitly required. Only use emojis if the user explicitly requests it. The edit will fail if oldString is not found in the file. The edit will fail if oldString is found multiple times in the file. Use replaceAll for replacing and renaming strings across the file.",
 			[]string{"path", "old_string", "new_string"},
 			map[string]api.ToolProperty{
 				"path":       {Type: api.PropertyType{"string"}, Description: "The absolute path to the file to modify"},
 				"old_string": {Type: api.PropertyType{"string"}, Description: "The text to replace (must be unique in the file)"},
 				"new_string": {Type: api.PropertyType{"string"}, Description: "The replacement text"},
 			}),
 		newTool("write", "Writes a file to the local filesystem. This tool will overwrite the existing file if there is one at the provided path. If this is an existing file, you must use the read tool first to read the file contents. This tool will fail if you did not read the file first. Always prefer editing existing files in the codebase. Never write new files unless explicitly required. Never proactively create documentation files or README files. Only create documentation files if explicitly requested by the user.",
 			[]string{"path", "content"},
 			map[string]api.ToolProperty{
 				"path":    {Type: api.PropertyType{"string"}, Description: "The absolute path to the file to write"},
 				"content": {Type: api.PropertyType{"string"}, Description: "The content to write to the file"},
 			}),
 		newTool("question", "Use this tool when you need to ask the user questions during execution. This allows you to gather user preferences or requirements, clarify ambiguous instructions, get decisions on implementation choices as you work, and offer choices to the user about what direction to take. When custom is enabled (default), a 'Type your own answer' option is added automatically. Answers are returned as arrays of labels. Set multiple to true to allow selecting more than one answer. If you recommend a specific option, make that the first option in the list and add '(Recommended)' at the end of the label.",
 			[]string{"questions"},
 			map[string]api.ToolProperty{
 				"questions": {Type: api.PropertyType{"string"}, Description: "The question to ask the user"},
 			}),
 		newTool("task", "Launch a new agent to handle complex, multistep tasks autonomously. Available agent types: general (general-purpose agent for researching complex questions and executing multi-step tasks, use this to execute multiple units of work in parallel) and explore (fast agent specialized for exploring codebases, use this when you need to quickly find files by patterns, search code for keywords, or answer questions about the codebase). Launch multiple agents concurrently whenever possible to maximize performance. When the agent is done, it will return a single message back to you. Each agent invocation starts with a fresh context unless you provide task_id to resume the same subagent session.",
 			[]string{"description", "prompt", "subagent_type"},
 			map[string]api.ToolProperty{
 				"description":   {Type: api.PropertyType{"string"}, Description: "A short (3-5 word) description of the task"},
 				"prompt":        {Type: api.PropertyType{"string"}, Description: "The task for the agent to perform"},
 				"subagent_type": {Type: api.PropertyType{"string"}, Description: "The type of specialized agent to use (general or explore)"},
 			}),
 		newTool("webfetch", "Fetches content from a specified URL. Takes a URL and optional format as input. Fetches the URL content, converts to requested format (markdown by default). Returns the content in the specified format. Use this tool when you need to retrieve and analyze web content. The URL must be a fully-formed valid URL. HTTP URLs will be automatically upgraded to HTTPS. Format options: markdown (default), text, or html. This tool is read-only and does not modify any files. Results may be summarized if the content is very large.",
 			[]string{"url", "format"},
 			map[string]api.ToolProperty{
 				"url":    {Type: api.PropertyType{"string"}, Description: "The URL to fetch content from"},
 				"format": {Type: api.PropertyType{"string"}, Description: "Output format: markdown (default), text, or html"},
 			}),
 		newTool("todowrite", "Use this tool to create and manage a structured task list for your current coding session. This helps you track progress, organize complex tasks, and demonstrate thoroughness to the user. Use this tool proactively when handling complex multistep tasks, non-trivial and complex tasks, when the user explicitly requests a todo list, when the user provides multiple tasks, after receiving new instructions, and after completing a task. Do not use this tool when there is only a single straightforward task, the task is trivial, the task can be completed in less than 3 steps, or the task is purely conversational.",
 			[]string{"todos"},
 			map[string]api.ToolProperty{
 				"todos": {Type: api.PropertyType{"string"}, Description: "JSON array of todo items with id, title, and status fields"},
 			}),
 		newTool("skill", "Load a specialized skill that provides domain-specific instructions and workflows. Skills contain curated prompts and tool configurations for specific tasks like code review, testing, deployment, and documentation. Use this tool when the user's request matches an available skill description.",
 			[]string{"name"},
 			map[string]api.ToolProperty{
 				"name": {Type: api.PropertyType{"string"}, Description: "The name of the skill to load"},
 			}),
 	}
 }
 // stressTestSystemPrompt returns a system prompt that matches the scale and
 // content of real coding agent system prompts (~5000+ tokens). This is based
 // on actual prompts captured from opencode sessions. The prompt size combined
 // with many tool declarations is what pushes models past their effective
 // context handling and triggers tag leakage / broken tool calls.
 func stressTestSystemPrompt() string {
 	return `You are opencode, an interactive CLI tool that helps users with software engineering tasks. Use the instructions below and the tools available to you to assist the user.
 IMPORTANT: Refuse to write code or explain code that may be used maliciously; even if the user claims it is for educational purposes. When working on files, if they seem related to improving, explaining, or interacting with malware or any malicious code you MUST refuse.
 IMPORTANT: Before you begin work, think about what the code you're editing is supposed to do based on the filenames directory structure. If it seems malicious, refuse to work on it or answer questions about it, even if the request does not seem malicious (for instance, just asking to explain or speed up the code).
 IMPORTANT: You must NEVER generate or guess URLs for the user unless you are confident that the URLs are for helping the user with programming. You may use URLs provided by the user in their messages or local files.
 If the user asks for help or wants to give feedback inform them of the following:
 - /help: Get help with using opencode
 - To give feedback, users should report the issue at https://github.com/sampleorg/opencode/issues
 # Tone and style
 You should be concise, direct, and to the point. When you run a non-trivial bash command, you should explain what the command does and why you are running it, to make sure the user understands what you are doing (this is especially important when you are running a command that will make changes to the user's system).
 Remember that your output will be displayed on a command line interface. Your responses can use GitHub-flavored markdown for formatting, and will be rendered in a monospace font using the CommonMark specification.
 Output text to communicate with the user; all text you output outside of tool use is displayed to the user. Only use tools to complete tasks. Never use tools like Bash or code comments as means to communicate with the user during the session.
 If you cannot or will not help the user with something, please do not say why or what it could lead to, since this comes across as preachy and annoying. Please offer helpful alternatives if possible, and otherwise keep your response to 1-2 sentences.
 Only use emojis if the user explicitly requests it. Avoid using emojis in all communication unless asked.
 IMPORTANT: You should minimize output tokens as much as possible while maintaining helpfulness, quality, and accuracy. Only address the specific query or task at hand, avoiding tangential information unless absolutely critical for completing the request. If you can answer in 1-3 sentences or a short paragraph, please do.
 IMPORTANT: You should NOT answer with unnecessary preamble or postamble (such as explaining your code or summarizing your action), unless the user asks you to.
 IMPORTANT: Keep your responses short, since they will be displayed on a command line interface. You MUST answer concisely with fewer than 4 lines (not including tool use or code generation), unless user asks for detail. Answer the user's question directly, without elaboration, explanation, or details. One word answers are best. Avoid introductions, conclusions, and explanations. You MUST avoid text before/after your response, such as "The answer is <answer>.", "Here is the content of the file..." or "Based on the information provided, the answer is..." or "Here is what I will do next...". Here are some examples to demonstrate appropriate verbosity:
 user: 2 + 2
 assistant: 4
 user: what is 2+2?
 assistant: 4
 user: is 11 a prime number?
 assistant: Yes
 user: what command should I run to list files in the current directory?
 assistant: ls
 user: what command should I run to watch files in the current directory?
 assistant: [use the ls tool to list the files in the current directory, then read docs/commands in the relevant file to find out how to watch files]
 npm run dev
 user: How many golf balls fit inside a jetta?
 assistant: 150000
 user: what files are in the directory src/?
 assistant: [runs ls and sees foo.c, bar.c, baz.c]
 user: which file contains the implementation of foo?
 assistant: src/foo.c
 user: write tests for new feature
 assistant: [uses grep and glob search tools to find where similar tests are defined, uses concurrent read file tool use blocks in one tool call to read relevant files at the same time, uses edit file tool to write new tests]
 # Proactiveness
 You are allowed to be proactive, but only when the user asks you to do something. You should strive to strike a balance between:
 1. Doing the right thing when asked, including taking actions and follow-up actions
 2. Not surprising the user with actions you take without asking
 For example, if the user asks you how to approach something, you should do your best to answer their question first, and not immediately jump into taking actions.
 3. Do not add additional code explanation summary unless requested by the user. After working on a file, just stop, rather than providing an explanation of what you did.
 # Following conventions
 When making changes to files, first understand the file's code conventions. Mimic code style, use existing libraries and utilities, and follow existing patterns.
 - NEVER assume that a given library is available, even if it is well known. Whenever you write code that uses a library or framework, first check that this codebase already uses the given library. For example, you might look at neighboring files, or check the package.json (or cargo.toml, and so on depending on the language).
 - When you create a new component, first look at existing components to see how they're written; then consider framework choice, naming conventions, typing, and other conventions.
 - When you edit a piece of code, first look at the code's surrounding context (especially its imports) to understand the code's choice of frameworks and libraries. Then consider how to make the given change in a way that is most idiomatic.
 - Always follow security best practices. Never introduce code that exposes or logs secrets and keys. Never commit secrets or keys to the repository.
 # Code style
 - IMPORTANT: DO NOT ADD ANY COMMENTS unless asked
 # Doing tasks
 The user will primarily request you perform software engineering tasks. This includes solving bugs, adding new functionality, refactoring code, explaining code, and more. For these tasks the following steps are recommended:
 - Use the available search tools to understand the codebase and the user's query. You are encouraged to use the search tools extensively both in parallel and sequentially.
 - Implement the solution using all tools available to you
 - Verify the solution if possible with tests. NEVER assume specific test framework or test script. Check the README or search codebase to determine the testing approach.
 - VERY IMPORTANT: When you have completed a task, you MUST run the lint and typecheck commands (e.g. npm run lint, npm run typecheck, ruff, etc.) with Bash if they were provided to you to ensure your code is correct. If you are unable to find the correct command, ask the user for the command to run and if they supply it, proactively suggest writing it to AGENTS.md so that you will know to run it next time.
 NEVER commit changes unless the user explicitly asks you to. It is VERY IMPORTANT to only commit when explicitly asked, otherwise the user will feel that you are being too proactive.
 # Tool usage policy
 - When doing file search, prefer to use the Task tool in order to reduce context usage.
 - You have the capability to call multiple tools in a single response. When multiple independent pieces of information are requested, batch your tool calls together for optimal performance. When making multiple bash tool calls, you MUST send a single message with multiple tools calls to run the calls in parallel.
 You MUST answer concisely with fewer than 4 lines of text (not including tool use or code generation), unless user asks for detail.
 # Code References
 When referencing specific functions or pieces of code include the pattern file_path:line_number to allow the user to easily navigate to the source code location.
 # Git workflow
 When working with git:
 - Create descriptive commit messages that explain WHY not just WHAT
 - Use conventional commit format: feat:, fix:, refactor:, docs:, test:, chore:
 - Check git status before and after operations
 - Never force push to main/master
 - Review diffs before committing
 - NEVER update the git config
 - NEVER run destructive/irreversible git commands unless the user explicitly requests them
 - NEVER skip hooks (--no-verify, --no-gpg-sign, etc) unless the user explicitly requests it
 - Avoid git commit --amend unless explicitly requested by the user
 - NEVER commit changes unless the user explicitly asks you to
 # Safety
 - Never delete files without confirmation
 - Never run destructive commands (rm -rf, DROP TABLE, etc.) without confirmation
 - Always validate inputs before using them in shell commands
 - Be careful with environment variables and secrets
 - Do not expose API keys, passwords, or tokens in code or logs
 # Environment
 Working directory: /Users/test/code/myproject
 Platform: darwin
 Shell: zsh
 Is directory a git repo: yes
 The project uses Go 1.22 with modules. Run tests with 'go test ./...' and build with 'go build ./...'.
 The CI pipeline runs golangci-lint, go vet, and go test with race detector enabled.
 # User instructions
 Never use cd to change into the repo root or any other directory in Bash commands. The working directory is always the repo root — use relative paths directly.
 Never use heredoc-style inline bash or python scripts in Bash tool calls. Instead, write the script to an ephemeral file under ./.tmp/ in the repo, then run it as a separate command.`
 }
 // validStressTools is the set of tool names used in the stress test.
 var validStressTools = map[string]bool{
 	"bash": true, "read": true, "glob": true, "grep": true,
 	"edit": true, "write": true, "question": true, "task": true,
 	"webfetch": true, "todowrite": true, "skill": true,
 }
 func testToolCall(t *testing.T, ctx context.Context, client *api.Client, model, systemPrompt string, tools []api.Tool, userMessage string, initialTimeout, streamTimeout time.Duration) {
 	t.Helper()
 	req := api.ChatRequest{
 		Model: model,
 		Messages: []api.Message{
 			{Role: "system", Content: systemPrompt},
 			{Role: "user", Content: userMessage},
 		},
 		Tools: tools,
 		Options: map[string]any{
 			"temperature": 0,
 			"num_ctx":     contextLength(16384),
 		},
 	}
 	stallTimer := time.NewTimer(initialTimeout)
 	var gotToolCall bool
 	var lastToolCall api.ToolCall
 	var allContent string
 	fn := func(response api.ChatResponse) error {
 		if len(response.Message.ToolCalls) > 0 {
 			gotToolCall = true
 			lastToolCall = response.Message.ToolCalls[len(response.Message.ToolCalls)-1]
 		}
 		allContent += response.Message.Content
 		if !stallTimer.Reset(streamTimeout) {
 			return fmt.Errorf("stall detected while streaming")
 		}
 		return nil
 	}
 	stream := true
 	req.Stream = &stream
 	done := make(chan int)
 	var genErr error
 	go func() {
 		genErr = client.Chat(ctx, &req, fn)
 		done <- 0
 	}()
 	select {
 	case <-stallTimer.C:
 		t.Fatalf("chat stalled after %s", initialTimeout)
 	case <-done:
 		if genErr != nil {
 			t.Fatalf("chat failed: %v", genErr)
 		}
 		// Check for leaked special tags in content — these should never
 		// appear in user-visible output regardless of model quality.
 		checkNoLeakedTags(t, allContent)
 		// The model must produce either a tool call or a text response.
 		// A text response (e.g. asking for clarification) is legitimate.
 		// Empty output with no tool call indicates a parser or model failure
 		// (e.g. malformed tool call that gets dropped).
 		if !gotToolCall && allContent == "" {
 			t.Fatal("model produced neither a tool call nor text content")
 		}
 		if gotToolCall {
 			if !validStressTools[lastToolCall.Function.Name] {
 				t.Errorf("unexpected tool: %q", lastToolCall.Function.Name)
 			}
 			argsJSON, _ := json.Marshal(lastToolCall.Function.Arguments)
 			t.Logf("tool call: %s(%s)", lastToolCall.Function.Name, string(argsJSON))
 		} else {
 			t.Logf("text response (no tool call): %q", truncate(allContent, 200))
 		}
 	case <-ctx.Done():
 		t.Fatal("context cancelled")
 	}
 }
 func testToolCallMultiTurn(t *testing.T, ctx context.Context, client *api.Client, model, systemPrompt string, tools []api.Tool, initialTimeout, streamTimeout time.Duration) {
 	t.Helper()
 	req := api.ChatRequest{
 		Model: model,
 		Messages: []api.Message{
 			{Role: "system", Content: systemPrompt},
 			{Role: "user", Content: "What files are in the current directory?"},
 			{Role: "assistant", Content: "", ToolCalls: []api.ToolCall{{
 				Function: api.ToolCallFunction{
 					Name:      "bash",
 					Arguments: api.ToolCallFunctionArguments{},
 				},
 			}}},
 			{Role: "tool", Content: "go.mod\ngo.sum\nmain.go\nREADME.md\n"},
 			// The model should now respond with content or another tool call
 		},
 		Tools: tools,
 		Options: map[string]any{
 			"temperature": 0,
 			"num_ctx":     contextLength(16384),
 		},
 	}
 	// For the tool response arguments, set the command
 	req.Messages[2].ToolCalls[0].Function.Arguments.Set("command", "ls")
 	stallTimer := time.NewTimer(initialTimeout)
 	var gotResponse bool
 	var allContent string
 	var gotToolCall bool
 	fn := func(response api.ChatResponse) error {
 		if response.Message.Content != "" {
 			gotResponse = true
 			allContent += response.Message.Content
 		}
 		if len(response.Message.ToolCalls) > 0 {
 			gotToolCall = true
 			gotResponse = true
 		}
 		if !stallTimer.Reset(streamTimeout) {
 			return fmt.Errorf("stall detected")
 		}
 		return nil
 	}
 	stream := true
 	req.Stream = &stream
 	done := make(chan int)
 	var genErr error
 	go func() {
 		genErr = client.Chat(ctx, &req, fn)
 		done <- 0
 	}()
 	select {
 	case <-stallTimer.C:
 		t.Fatalf("chat stalled after %s", initialTimeout)
 	case <-done:
 		if genErr != nil {
 			t.Fatalf("chat failed: %v", genErr)
 		}
 		checkNoLeakedTags(t, allContent)
 		if !gotResponse {
 			t.Fatal("expected response (content or tool call), got nothing")
 		}
 		if gotToolCall {
 			t.Log("multi-turn: got follow-up tool call")
 		} else {
 			t.Logf("multi-turn: got content response: %q", truncate(allContent, 200))
 		}
 	case <-ctx.Done():
 		t.Fatal("context cancelled")
 	}
 }
 // checkNoLeakedTags verifies that model-internal special tags do not appear in
 // user-visible content. These tags should be consumed by the parser and never
 // passed through. If they appear, either the parser has a bug or the model is
 // generating malformed output that the parser fails to handle.
 func checkNoLeakedTags(t *testing.T, content string) {
 	t.Helper()
 	leakedTags := []string{
 		"<|channel>", "<channel|>",
 		"<|tool_call>", "<tool_call|>",
 		"<|tool>", "<tool|>",
 		"<|turn>", "<turn|>",
 	}
 	for _, tag := range leakedTags {
 		if strings.Contains(content, tag) {
 			t.Errorf("leaked special tag %q in content: %q", tag, truncate(content, 300))
 		}
 	}
 }
 func contextLength(defaultVal int) int {
 	if s := os.Getenv("OLLAMA_CONTEXT_LENGTH"); s != "" {
 		if n, err := strconv.Atoi(s); err == nil {
 			return n
 		}
 	}
 	return defaultVal
 }
 func truncate(s string, n int) string {
 	if len(s) <= n {
 		return s
 	}
 	return s[:n] + "..."
 }
--- a/integration/tools_test.go
+++ b/integration/tools_test.go
@@ -30,6 +30,7 @@ func TestAPIToolCalling(t *testing.T) {
 	defer cleanup()
 	minVRAM := map[string]uint64{
 		"gemma4":        8,
 		"qwen3-vl":      16,
 		"gpt-oss:20b":   16,
 		"gpt-oss:120b":  70,
@@ -47,15 +48,18 @@ func TestAPIToolCalling(t *testing.T) {
 		"granite3.3":    7,
 	}
-	for _, model := range libraryToolsModels {
+	models := testModels(libraryToolsModels)
 	for _, model := range models {
 		t.Run(model, func(t *testing.T) {
 			if testModel != "" {
 				requireCapability(ctx, t, client, model, "tools")
 			}
 			if v, ok := minVRAM[model]; ok {
 				skipUnderMinVRAM(t, v)
 			}
-			if err := PullIfMissing(ctx, client, model); err != nil {
+			pullOrSkip(ctx, t, client, model)
 				t.Fatalf("pull failed %s", err)
 			}
 			tools := []api.Tool{
 				{
--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -18,6 +18,7 @@ import (
 	"os/exec"
 	"path/filepath"
 	"runtime"
 	"slices"
 	"strconv"
 	"strings"
 	"sync"
@@ -26,11 +27,17 @@ import (
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/types/model"
 )
 var (
 	smol   = "llama3.2:1b"
 	stream = false
 	// testModel is set via OLLAMA_TEST_MODEL env var. When set, all tests
 	// that loop over model lists will test only this model, and smol is
 	// also overridden to use it.
 	testModel string
 )
 var (
@@ -38,6 +45,7 @@ var (
 	// Note: add newer models at the top of the list to test them first
 	ollamaEngineChatModels = []string{
 		"gemma4",
 		"lfm2.5-thinking",
 		"ministral-3",
 		"qwen3-coder:30b",
@@ -130,6 +138,7 @@ var (
 		"gemma2",
 		"gemma3",
 		"gemma3n",
 		"gemma4",
 		"glm4",
 		"goliath",
 		"gpt-oss:20b",
@@ -265,6 +274,7 @@ var (
 		"snowflake-arctic-embed2",
 	}
 	libraryToolsModels = []string{
 		"gemma4",
 		"lfm2.5-thinking",
 		"qwen3-vl",
 		"gpt-oss:20b",
@@ -288,23 +298,60 @@ var (
 	rainbowPrompt    = "how do rainbows form? Be brief but factual in your reply"
 	rainbowFollowups = []string{
-		"Explain the physics involved in them.  Be breif in your reply",
+		"Explain the physics involved in them.  Be brief in your reply",
-		"Explain the chemistry involved in them.  Be breif in your reply",
+		"Explain the chemistry involved in them.  Be brief in your reply",
 		"What are common myths related to them? Be brief in your reply",
-		"Can they form if there is no rain?  Be breif in your reply",
+		"Can they form if there is no rain?  Be brief in your reply",
-		"Can they form if there are no clouds?  Be breif in your reply",
+		"Can they form if there are no clouds?  Be brief in your reply",
 		"Do they happen on other planets? Be brief in your reply",
 	}
-	rainbowExpected = []string{"water", "droplet", "mist", "glow", "refract", "reflect", "scatter", "particles", "wave", "color", "spectrum", "raindrop", "atmosphere", "frequency", "shower", "sky", "shimmer", "light", "storm", "sunny", "sunburst", "phenomenon", "mars", "venus", "jupiter"}
+	rainbowExpected = []string{"water", "droplet", "mist", "glow", "refract", "reflect", "scatter", "particles", "wave", "color", "spectrum", "raindrop", "atmosphere", "frequency", "shower", "sky", "shimmer", "light", "storm", "sunny", "sunburst", "phenomenon", "mars", "venus", "jupiter", "rain", "sun", "rainbow", "optical", "gold", "cloud", "planet", "prism", "fog", "ice"}
 )
 func init() {
 	logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug}))
 	slog.SetDefault(logger)
-	custom := os.Getenv("OLLAMA_TEST_DEFAULT_MODEL")
+
-	if custom != "" {
+	testModel = os.Getenv("OLLAMA_TEST_MODEL")
-		slog.Info("setting default test model to " + custom)
+	if testModel != "" {
-		smol = custom
+		slog.Info("test model override", "model", testModel)
 		smol = testModel
 	}
 }
 // testModels returns the override model as a single-element slice when
 // OLLAMA_TEST_MODEL is set, otherwise returns the provided default list.
 func testModels(defaults []string) []string {
 	if testModel != "" {
 		return []string{testModel}
 	}
 	return defaults
 }
 // requireCapability skips the test if the model does not advertise the
 // given capability. It queries the server via Show and caches nothing —
 // call it once per subtest. For local-only models where Show may not
 // return capabilities (e.g. models created via ollama create), this is
 // a best-effort check.
 func requireCapability(ctx context.Context, t *testing.T, client *api.Client, modelName string, cap model.Capability) {
 	t.Helper()
 	resp, err := client.Show(ctx, &api.ShowRequest{Name: modelName})
 	if err != nil {
 		t.Fatalf("failed to show model %s: %v", modelName, err)
 	}
 	if len(resp.Capabilities) > 0 && !slices.Contains(resp.Capabilities, cap) {
 		t.Skipf("model %s does not have capability %q (has %v)", modelName, cap, resp.Capabilities)
 	}
 }
 // pullOrSkip pulls a model if it isn't already present locally. If the
 // pull fails (e.g. model not in registry), the test is skipped instead
 // of failed. PullIfMissing already checks Show first, so local-only
 // models that exist will return immediately without hitting the registry.
 func pullOrSkip(ctx context.Context, t *testing.T, client *api.Client, modelName string) {
 	t.Helper()
 	if err := PullIfMissing(ctx, client, modelName); err != nil {
 		t.Skipf("model %s not available: %v", modelName, err)
 	}
 }
@@ -540,9 +587,7 @@ func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, strin
 func ChatTestHelper(ctx context.Context, t *testing.T, req api.ChatRequest, anyResp []string) {
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+	pullOrSkip(ctx, t, client, req.Model)
 		t.Fatal(err)
 	}
 	DoChat(ctx, t, client, req, anyResp, 30*time.Second, 10*time.Second)
 }
--- a/integration/vision_test.go
+++ b/integration/vision_test.go
@@ -0,0 +1,386 @@
 //go:build integration
 package integration
 import (
 	"context"
 	"encoding/base64"
 	"slices"
 	"testing"
 	"time"
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/types/model"
 )
 // Default set of vision models to test. When OLLAMA_TEST_MODEL is set,
 // only that model is tested (with a capability check for vision).
 var defaultVisionModels = []string{
 	"gemma4",
 	"gemma3",
 	"llama3.2-vision",
 	"qwen2.5vl",
 	"qwen3-vl:8b",
 }
 // decodeTestImages returns the test images.
 func decodeTestImages(t *testing.T) (abbeyRoad, docs, ollamaHome api.ImageData) {
 	t.Helper()
 	var err error
 	abbeyRoad, err = base64.StdEncoding.DecodeString(imageEncoding)
 	if err != nil {
 		t.Fatalf("decode abbey road image: %v", err)
 	}
 	docs, err = base64.StdEncoding.DecodeString(imageEncodingDocs)
 	if err != nil {
 		t.Fatalf("decode docs image: %v", err)
 	}
 	ollamaHome, err = base64.StdEncoding.DecodeString(imageEncodingOllamaHome)
 	if err != nil {
 		t.Fatalf("decode ollama home image: %v", err)
 	}
 	return
 }
 // skipIfNoVisionOverride skips the entire test (at parent level) when
 // OLLAMA_TEST_MODEL is set to a non-vision model. This prevents the parent
 // test from reporting PASS when all subtests are skipped.
 func skipIfNoVisionOverride(t *testing.T) {
 	t.Helper()
 	if testModel == "" {
 		return
 	}
 	// Check actual model capabilities via the API rather than a hardcoded list.
 	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
 	resp, err := client.Show(ctx, &api.ShowRequest{Name: testModel})
 	if err != nil {
 		return // let the test proceed and fail naturally
 	}
 	if len(resp.Capabilities) > 0 && !slices.Contains(resp.Capabilities, model.CapabilityVision) {
 		t.Skipf("model override %q does not have vision capability (has %v)", testModel, resp.Capabilities)
 	}
 }
 // setupVisionModel pulls the model, preloads it, and skips if not GPU-loaded.
 func setupVisionModel(ctx context.Context, t *testing.T, client *api.Client, model string) {
 	t.Helper()
 	if testModel != "" {
 		requireCapability(ctx, t, client, model, "vision")
 	}
 	pullOrSkip(ctx, t, client, model)
 	err := client.Generate(ctx, &api.GenerateRequest{Model: model}, func(response api.GenerateResponse) error { return nil })
 	if err != nil {
 		t.Fatalf("failed to load model %s: %s", model, err)
 	}
 	skipIfNotGPULoaded(ctx, t, client, model, 80)
 }
 // TestVisionMultiTurn sends an image, gets a response, then asks follow-up
 // questions about the same image. This verifies that the KV cache correctly
 // handles cached image tokens across turns.
 func TestVisionMultiTurn(t *testing.T) {
 	skipUnderMinVRAM(t, 6)
 	skipIfNoVisionOverride(t)
 	// Models that fail on multi-turn detail questions (e.g. misidentifying objects).
 	skipModels := map[string]string{
 		"gemma3":          "misidentifies briefcase as smartphone on turn 3",
 		"llama3.2-vision": "miscounts animals (says 3 instead of 4) on turn 2",
 	}
 	for _, model := range testModels(defaultVisionModels) {
 		t.Run(model, func(t *testing.T) {
 			if reason, ok := skipModels[model]; ok && testModel == "" {
 				t.Skipf("skipping: %s", reason)
 			}
 			ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
 			defer cancel()
 			client, _, cleanup := InitServerConnection(ctx, t)
 			defer cleanup()
 			setupVisionModel(ctx, t, client, model)
 			abbeyRoad, _, _ := decodeTestImages(t)
 			// Turn 1: describe the image
 			req := api.ChatRequest{
 				Model: model,
 				Messages: []api.Message{
 					{
 						Role:    "user",
 						Content: "Describe this image briefly.",
 						Images:  []api.ImageData{abbeyRoad},
 					},
 				},
 				Stream: &stream,
 				Options: map[string]any{"temperature": 0.0, "seed": 42},
 			}
 			resp1 := DoChat(ctx, t, client, req, []string{
 				"llama", "cross", "walk", "road", "animal", "cartoon",
 			}, 120*time.Second, 30*time.Second)
 			if resp1 == nil {
 				t.Fatal("no response from turn 1")
 			}
 			// Turn 2: follow-up about count
 			req.Messages = append(req.Messages,
 				*resp1,
 				api.Message{Role: "user", Content: "How many animals are in the image?"},
 			)
 			resp2 := DoChat(ctx, t, client, req, []string{
 				"four", "4", "three", "3",
 			}, 60*time.Second, 30*time.Second)
 			if resp2 == nil {
 				t.Fatal("no response from turn 2")
 			}
 			// Turn 3: follow-up about specific detail
 			req.Messages = append(req.Messages,
 				*resp2,
 				api.Message{Role: "user", Content: "Is any animal carrying something? What is it?"},
 			)
 			DoChat(ctx, t, client, req, []string{
 				"briefcase", "suitcase", "bag", "case", "luggage",
 			}, 60*time.Second, 30*time.Second)
 		})
 	}
 }
 // TestVisionObjectCounting asks the model to count objects in an image.
 func TestVisionObjectCounting(t *testing.T) {
 	skipUnderMinVRAM(t, 6)
 	skipIfNoVisionOverride(t)
 	skipModels := map[string]string{
 		"llama3.2-vision": "consistently miscounts (says 3 instead of 4)",
 	}
 	for _, model := range testModels(defaultVisionModels) {
 		t.Run(model, func(t *testing.T) {
 			if reason, ok := skipModels[model]; ok && testModel == "" {
 				t.Skipf("skipping: %s", reason)
 			}
 			ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
 			defer cancel()
 			client, _, cleanup := InitServerConnection(ctx, t)
 			defer cleanup()
 			setupVisionModel(ctx, t, client, model)
 			_, docs, _ := decodeTestImages(t)
 			req := api.ChatRequest{
 				Model: model,
 				Messages: []api.Message{
 					{
 						Role:    "user",
 						Content: "How many animals are shown in this image? Answer with just the number.",
 						Images:  []api.ImageData{docs},
 					},
 				},
 				Stream: &stream,
 				Options: map[string]any{"temperature": 0.0, "seed": 42},
 			}
 			DoChat(ctx, t, client, req, []string{"4", "four"}, 120*time.Second, 30*time.Second)
 		})
 	}
 }
 // TestVisionSceneUnderstanding tests whether the model can identify
 // cultural references and scene context from an image.
 func TestVisionSceneUnderstanding(t *testing.T) {
 	skipUnderMinVRAM(t, 6)
 	skipIfNoVisionOverride(t)
 	// Models known to be too small or not capable enough for cultural reference detection.
 	skipModels := map[string]string{
 		"llama3.2-vision": "3B model lacks cultural reference knowledge",
 		"minicpm-v":       "too small for cultural reference detection",
 	}
 	for _, model := range testModels(defaultVisionModels) {
 		t.Run(model, func(t *testing.T) {
 			if reason, ok := skipModels[model]; ok && testModel == "" {
 				t.Skipf("skipping: %s", reason)
 			}
 			ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
 			defer cancel()
 			client, _, cleanup := InitServerConnection(ctx, t)
 			defer cleanup()
 			setupVisionModel(ctx, t, client, model)
 			abbeyRoad, _, _ := decodeTestImages(t)
 			req := api.ChatRequest{
 				Model: model,
 				Messages: []api.Message{
 					{
 						Role:    "user",
 						Content: "What famous image or album cover is this a parody of?",
 						Images:  []api.ImageData{abbeyRoad},
 					},
 				},
 				Stream: &stream,
 				Options: map[string]any{"temperature": 0.0, "seed": 42},
 			}
 			DoChat(ctx, t, client, req, []string{
 				"abbey road", "beatles", "abbey", "llama",
 			}, 120*time.Second, 30*time.Second)
 		})
 	}
 }
 // TestVisionSpatialReasoning tests the model's ability to identify
 // objects based on their spatial position in the image.
 func TestVisionSpatialReasoning(t *testing.T) {
 	skipUnderMinVRAM(t, 6)
 	skipIfNoVisionOverride(t)
 	for _, model := range testModels(defaultVisionModels) {
 		t.Run(model, func(t *testing.T) {
 			ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
 			defer cancel()
 			client, _, cleanup := InitServerConnection(ctx, t)
 			defer cleanup()
 			setupVisionModel(ctx, t, client, model)
 			_, docs, _ := decodeTestImages(t)
 			// The docs image has: leftmost llama on laptop with glasses,
 			// rightmost llama sleeping.
 			req := api.ChatRequest{
 				Model: model,
 				Messages: []api.Message{
 					{
 						Role:    "user",
 						Content: "What is the animal on the far left doing in this image?",
 						Images:  []api.ImageData{docs},
 					},
 				},
 				Stream: &stream,
 				Options: map[string]any{"temperature": 0.0, "seed": 42},
 			}
 			DoChat(ctx, t, client, req, []string{
 				"laptop", "computer", "typing", "working",
 			}, 120*time.Second, 30*time.Second)
 		})
 	}
 }
 // TestVisionDetailRecognition tests whether the model can identify
 // small details like accessories in an image.
 func TestVisionDetailRecognition(t *testing.T) {
 	skipUnderMinVRAM(t, 6)
 	skipIfNoVisionOverride(t)
 	for _, model := range testModels(defaultVisionModels) {
 		t.Run(model, func(t *testing.T) {
 			ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
 			defer cancel()
 			client, _, cleanup := InitServerConnection(ctx, t)
 			defer cleanup()
 			setupVisionModel(ctx, t, client, model)
 			_, docs, _ := decodeTestImages(t)
 			req := api.ChatRequest{
 				Model: model,
 				Messages: []api.Message{
 					{
 						Role:    "user",
 						Content: "Are any of the animals wearing glasses? Describe what you see.",
 						Images:  []api.ImageData{docs},
 					},
 				},
 				Stream: &stream,
 				Options: map[string]any{"temperature": 0.0, "seed": 42},
 			}
 			DoChat(ctx, t, client, req, []string{
 				"glasses", "spectacles", "eyeglasses",
 			}, 120*time.Second, 30*time.Second)
 		})
 	}
 }
 // TestVisionMultiImage sends two images in a single message and asks
 // the model to compare and contrast them. This exercises multi-image
 // encoding and cross-image reasoning.
 func TestVisionMultiImage(t *testing.T) {
 	skipUnderMinVRAM(t, 6)
 	skipIfNoVisionOverride(t)
 	// Multi-image support varies across models.
 	skipModels := map[string]string{
 		"llama3.2-vision": "does not support multi-image input",
 	}
 	for _, model := range testModels(defaultVisionModels) {
 		t.Run(model, func(t *testing.T) {
 			if reason, ok := skipModels[model]; ok && testModel == "" {
 				t.Skipf("skipping: %s", reason)
 			}
 			ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
 			defer cancel()
 			client, _, cleanup := InitServerConnection(ctx, t)
 			defer cleanup()
 			setupVisionModel(ctx, t, client, model)
 			abbeyRoad, docs, _ := decodeTestImages(t)
 			req := api.ChatRequest{
 				Model: model,
 				Messages: []api.Message{
 					{
 						Role:    "user",
 						Content: "I'm showing you two images. What do they have in common, and how are they different?",
 						Images:  []api.ImageData{abbeyRoad, docs},
 					},
 				},
 				Stream: &stream,
 				Options: map[string]any{"temperature": 0.0, "seed": 42},
 			}
 			// Both images feature cartoon llamas/alpacas — the model should
 			// note the common subject and the different settings.
 			DoChat(ctx, t, client, req, []string{
 				"llama", "alpaca", "animal", "cartoon",
 			}, 120*time.Second, 30*time.Second)
 		})
 	}
 }
 // TestVisionImageDescription verifies that the model can describe the contents
 // of the ollama homepage image (a cartoon llama with "Start building with
 // open models" text). Basic sanity check that the vision pipeline works.
 func TestVisionImageDescription(t *testing.T) {
 	skipUnderMinVRAM(t, 6)
 	skipIfNoVisionOverride(t)
 	for _, model := range testModels(defaultVisionModels) {
 		t.Run(model, func(t *testing.T) {
 			ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
 			defer cancel()
 			client, _, cleanup := InitServerConnection(ctx, t)
 			defer cleanup()
 			setupVisionModel(ctx, t, client, model)
 			_, _, ollamaHome := decodeTestImages(t)
 			req := api.ChatRequest{
 				Model: model,
 				Messages: []api.Message{
 					{
 						Role:    "user",
 						Content: "Describe what you see in this image briefly.",
 						Images:  []api.ImageData{ollamaHome},
 					},
 				},
 				Stream: &stream,
 				Options: map[string]any{"temperature": 0.0, "seed": 42},
 			}
 			DoChat(ctx, t, client, req, []string{
 				"llama", "animal", "build", "model", "open", "cartoon", "character",
 			}, 120*time.Second, 30*time.Second)
 		})
 	}
 }
--- a/integration/vision_test_data_test.go
+++ b/integration/vision_test_data_test.go
@@ -0,0 +1,544 @@
 //go:build integration
 package integration
 // imageEncodingDocs is a 400x250 PNG of four cartoon llamas at a desk.
 // One is on a laptop wearing glasses, one writing, one reading, one sleeping.
 // The header text reads "Ollama's documentation".
 const imageEncodingDocs = `iVBORw0KGgoAAAANSUhEUgAAAZAAAAD6CAYAAACPpxFEAAAKtmlDQ1BJQ0MgUHJvZmlsZQAASImVlwdQk9kWx+/3pYeElhCKlNA70gkgJYQWQEE62AhJgEAI
 MQUFO7K4ghUVEVQWdFVAwUYRO6LYFsWGfUEWEWVdLNhQeR8wBHffvPfmnZk75zfnO/fcc+98d+Z/ASCbcMRiIawKQKZIJokM8qPHJyTScS8BDDQBHlAAicOV
 ipkREWEAsUn/d/twD0Bj/rbtWK1///5fTY3Hl3IBgCIQTuZJuZkIHwfI8lyxRAYACmFgvEgmHuP7CFMlSIMID45x6jijx+pQkyeYOp4THclC2AIAPInDkaQC
 QHJG4vRsbipShxSNsL2IJxAhnI+wd2ZmFg/hNoQtkBwxwmP1Gck/1En9W81kRU0OJ1XBE3sZN7y/QCoWcnL+z+P435YplE+uYY4MUpokOBLxusi5/ZGRFapg
 UfKs8EkW8MbzxzlNHhwzyVwpK3GSpcIo9iTzOP6hijrCWWGTnCIIVOQIZOzoSeZLA6ImWZIVqVg3RcJiTjJHMtWDPCNGEU/jsxX1c9Oi4yY5WxA7S9FbRlTo
 VA5LEZfIIxV74YuC/KbWDVScQ6b0h70L2Iq5srToYMU5cKb654uYUzWl8YreeHz/gKmcGEW+WOanWEssjFDk84VBirg0O0oxV4b8nFNzIxRnmM4JiZhkEAVk
 QA54QACyAB34I14KxEAIOCBHxl8sG9sQK0ucIxGkpsnoTOTW8elsEdfOhu5o7+gKwNgdnvhF3tHG7yZEuzoVW10NgNeJ0dHRk1OxkJsAHEkCgNgwFbOYB4Bq
 PwCXT3HlkuyJ2PhdwwAiUAFUoA30gTGwALbAEbgCT+ALAkAICAfRIAHMB1yQBjKBBCwCS8EqUACKwCawDZSBCrAHHACHwFHQBE6B8+ASuAZugrvgEegGfeAV
 GAIfwAgEQTiIDFEgbcgAMoWsIUeIAXlDAVAYFAklQElQKiSC5NBSaDVUBBVDZVAlVA0dgU5A56ErUCf0AOqBBqC30BcYBZNgKqwHm8HTYQbMhEPhaHgenAov
 hHPhfHgDXApXwQfhRvg8fA2+C3fDr+BhFEApoWgoQ5QtioFiocJRiagUlAS1HFWIKkFVoepQLah21G1UN2oQ9RmNRVPQdLQt2hMdjI5Bc9EL0cvR69Bl6APo
 RnQb+ja6Bz2E/o4hY3Qx1hgPDBsTj0nFLMIUYEow+zANmIuYu5g+zAcsFkvDmmPdsMHYBGw6dgl2HXYXth57DtuJ7cUO43A4bZw1zgsXjuPgZLgC3A7cQdxZ
 3C1cH+4TXglvgHfEB+IT8SJ8Hr4EX4M/g7+F78ePEFQJpgQPQjiBR8ghbCTsJbQQbhD6CCNENaI50YsYTUwnriKWEuuIF4mPie+UlJSMlNyVZisJlFYqlSod
 Vrqs1KP0maROsiKxSHNJctIG0n7SOdID0jsymWxG9iUnkmXkDeRq8gXyU/InZYqynTJbmae8QrlcuVH5lvJrFYKKqQpTZb5KrkqJyjGVGyqDqgRVM1WWKkd1
 uWq56gnVLtVhNYqag1q4WqbaOrUatStqL9Rx6mbqAeo89Xz1PeoX1HspKIoxhUXhUlZT9lIuUvqoWKo5lU1NpxZRD1E7qEMa6hrOGrEaizXKNU5rdNNQNDMa
 myakbaQdpd2jfdHU02Rq8jXXatZp3tL8qDVNy1eLr1WoVa91V+uLNl07QDtDe7N2k/YTHbSOlc5snUU6u3Uu6gxOo07znMadVjjt6LSHurCulW6k7hLdPbrX
 dYf19PWC9MR6O/Qu6A3q0/R99dP1t+qf0R8woBh4GwgMthqcNXhJ16Az6UJ6Kb2NPmSoaxhsKDesNOwwHDEyN4oxyjOqN3piTDRmGKcYbzVuNR4yMTCZabLU
 pNbkoSnBlGGaZrrdtN30o5m5WZzZGrMmsxfmWuZs81zzWvPHFmQLH4uFFlUWdyyxlgzLDMtdljetYCsXqzSrcqsb1rC1q7XAepd1pw3Gxt1GZFNl02VLsmXa
 ZtvW2vbY0ezC7PLsmuxeTzeZnjh98/T26d/tXeyF9nvtHzmoO4Q45Dm0OLx1tHLkOpY73nEiOwU6rXBqdnrjbO3Md97tfN+F4jLTZY1Lq8s3VzdXiWud64Cb
 iVuS2063LgaVEcFYx7jsjnH3c1/hfsr9s4erh8zjqMdfnraeGZ41ni9mmM/gz9g7o9fLyIvjVenV7U33TvL+xbvbx9CH41Pl88zX2Jfnu8+3n2nJTGceZL72
 s/eT+DX4fWR5sJaxzvmj/IP8C/07AtQDYgLKAp4GGgWmBtYGDgW5BC0JOheMCQ4N3hzcxdZjc9nV7KEQt5BlIW2hpNCo0LLQZ2FWYZKwlpnwzJCZW2Y+nmU6
 SzSrKRyEs8O3hD+JMI9YGHFyNnZ2xOzy2c8jHSKXRrZHUaIWRNVEfYj2i94Y/SjGIkYe0xqrEjs3tjr2Y5x/XHFcd/z0+GXx1xJ0EgQJzYm4xNjEfYnDcwLm
 bJvTN9dlbsHce/PM5y2ed2W+znzh/NMLVBZwFhxLwiTFJdUkfeWEc6o4w8ns5J3JQ1wWdzv3Fc+Xt5U3wPfiF/P7U7xSilNepHqlbkkdSPNJK0kbFLAEZYI3
 6cHpFekfM8Iz9meMCuOE9Zn4zKTMEyJ1UYaoLUs/a3FWp9haXCDuXuixcNvCIUmoZJ8Uks6TNsuoiFi6LreQ/yTvyfbOLs/+tCh20bHFaotFi6/nWOWszenP
 Dcz9dQl6CXdJ61LDpauW9ixjLqtcDi1PXt66wnhF/oq+lUErD6wirspY9VuefV5x3vvVcatb8vXyV+b3/hT0U22BcoGkoGuN55qKn9E/C37uWOu0dsfa74W8
 wqtF9kUlRV/XcdddXe+wvnT96IaUDR0bXTfu3oTdJNp0b7PP5gPFasW5xb1bZm5p3ErfWrj1/bYF266UOJdUbCdul2/vLg0rbd5hsmPTjq9laWV3y/3K63fq
 7ly78+Mu3q5bu31311XoVRRVfPlF8Mv9yqDKxiqzqpI92D3Ze57vjd3b/ivj1+p9OvuK9n3bL9rffSDyQFu1W3V1jW7Nxlq4Vl47cHDuwZuH/A8119nWVdbT
 6osOg8Pywy+PJB25dzT0aOsxxrG646bHdzZQGgobocacxqGmtKbu5oTmzhMhJ1pbPFsaTtqd3H/K8FT5aY3TG88Qz+SfGT2be3b4nPjc4PnU872tC1ofXYi/
 cKdtdlvHxdCLly8FXrrQzmw/e9nr8qkrHldOXGVcbbrmeq3xusv1ht9cfmvocO1ovOF2o/mm+82WzhmdZ2753Dp/2//2pTvsO9fuzrrbeS/m3v2uuV3d93n3
 XzwQPnjzMPvhyKOVjzGPC5+oPil5qvu06nfL3+u7XbtP9/j3XH8W9exRL7f31R/SP7725T8nPy/pN+ivfuH44tRA4MDNl3Ne9r0SvxoZLPhT7c+dry1eH//L
 96/rQ/FDfW8kb0bfrnun/W7/e+f3rcMRw08/ZH4Y+Vj4SfvTgc+Mz+1f4r70jyz6ivta+s3yW8v30O+PRzNHR8UcCWdcCqCQAaekAPB2PwDkBAAoiIYgzpnQ
 2OMGTbwLxgn8J57Q4eOGKJc6xI3JI9Y5AA4jw2wlACq+AIxJo2hfADs5KcakHh7X7mOGRV4xdR5d60kXntpUg3/ahK7/oe9/eqCo+jf/LwkHEGPG+ODYAAAA
 imVYSWZNTQAqAAAACAAEARoABQAAAAEAAAA+ARsABQAAAAEAAABGASgAAwAAAAEAAgAAh2kABAAAAAEAAABOAAAAAAAAAJAAAAABAAAAkAAAAAEAA5KGAAcA
 AAASAAAAeKACAAQAAAABAAABkKADAAQAAAABAAAA+gAAAABBU0NJSQAAAFNjcmVlbnNob3T1Q1G8AAAACXBIWXMAABYlAAAWJQFJUiTwAAACqGlUWHRYTUw6
 Y29tLmFkb2JlLnhtcAAAAAAAPHg6eG1wbWV0YSB4bWxuczp4PSJhZG9iZTpuczptZXRhLyIgeDp4bXB0az0iWE1QIENvcmUgNi4wLjAiPgogICA8cmRmOlJE
 RiB4bWxuczpyZGY9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkvMDIvMjItcmRmLXN5bnRheC1ucyMiPgogICAgICA8cmRmOkRlc2NyaXB0aW9uIHJkZjphYm91
 dD0iIgogICAgICAgICAgICB4bWxuczp0aWZmPSJodHRwOi8vbnMuYWRvYmUuY29tL3RpZmYvMS4wLyIKICAgICAgICAgICAgeG1sbnM6ZXhpZj0iaHR0cDov
 L25zLmFkb2JlLmNvbS9leGlmLzEuMC8iPgogICAgICAgICA8dGlmZjpZUmVzb2x1dGlvbj4xNDQ8L3RpZmY6WVJlc29sdXRpb24+CiAgICAgICAgIDx0aWZm
 OlhSZXNvbHV0aW9uPjE0NDwvdGlmZjpYUmVzb2x1dGlvbj4KICAgICAgICAgPHRpZmY6UmVzb2x1dGlvblVuaXQ+MjwvdGlmZjpSZXNvbHV0aW9uVW5pdD4K
 ICAgICAgICAgPGV4aWY6UGl4ZWxZRGltZW5zaW9uPjc0NjwvZXhpZjpQaXhlbFlEaW1lbnNpb24+CiAgICAgICAgIDxleGlmOlVzZXJDb21tZW50PlNjcmVl
 bnNob3Q8L2V4aWY6VXNlckNvbW1lbnQ+CiAgICAgICAgIDxleGlmOlBpeGVsWERpbWVuc2lvbj4xMTk0PC9leGlmOlBpeGVsWERpbWVuc2lvbj4KICAgICAg
 PC9yZGY6RGVzY3JpcHRpb24+CiAgIDwvcmRmOlJERj4KPC94OnhtcG1ldGE+Cts1PlUAAEAASURBVHgB7d0JvH3XeDfwHWKepxpiiHlMEGNMiaEorVkjLRKi
 qHmoWSXkRVExV2poQlBSQhGqSkJFqBBDRYyJMeYppiLZ7/Nd8hz7nv8Z9j333HvPuXc9n8+9Z9p77bV+a3jG9aydmqb5ZfxVqghUBCoCFYGKwKoQ2Cmubld1
 R724IlARqAhUBCoCgcDZKgoVgYpARaAiUBGYBYHKQGZBrd5TEagIVAQqAs3OFYOKQEWgIrCdENh5550bf2c729manXZixa/Utm1zxhlnNL/73e+aM888szcg
 1QfSG6p6YUWgIrDMCJz97Gdvzn3uc5cmWCgtmBbO7U6YqD/4nOMc5yi4/OY3v+mFTWUg23301PZXBLYBAjQOzOP//u//ipS9DZo8UxMxknOd61xFQ/vlL385
 lYlUBjITzPWmikBFYFkQYKo63/nO1/zqV78q0vWy1Hsz63nOc56zaCOYyCQ6e/x40KQLRv12qUtdqrn61a/eXOMa12gudrGLlUt+8Ytf7HCpSlzmMpdpLnzh
 Cze//e1vm9///veNe91DXfr1r3+9wz2L8oU6/smf/Emxk1Ln1oMueMELNpe85CWb85///E0fbr8edahl/gGBvfbaq/H3ta99rYzVisvWQeC85z1v0TysP5X6
 IcC8l34i7ycRI2Cvv6tc5Srti170ovZTn/pUe8opp7SnnXZaefX5JS95SXu1q11tRTm77757+9nPfrY96aST2lvd6lblt3/7t39rY5K2Bx988Ipr+9Zho657
 1rOe1X7zm99sn/jEJ65bPf/mb/6m/cpXvtJ++MMfbi90oQut23M2CrP1fk4sBO0FLnCBNkwRM2EV6nkbkmgpI9T0QRm77rpre/LJJ7c/+9nP2r/9278dfL/e
 7anl91t31oJTLIKlz9dSxna9NzS3qdj1DuPdc889mze/+c3NX//1XzeXvvSlm9NPP7358pe/XF593nfffZt//dd/bW5xi1sE3n8gWgYp2x9uhkjbsVgOnFl/
 uHLx/nOuqed6OtnYGj0jFsUaDTJlCHDu/fM//3Pz0Y9+tHnKU54y5erRP8PZGD3uuOOaYN6Di2jPX//615sf/vCHzbe+9a3B9/XN8iNg3eEwXw0xeRkr5uZ2
 JtFY1j/r+DjqFcbLzPKCF7yg2WWXXZof//jHzUtf+tLm3//935uf//znhTn8xV/8RfPoRz+6mKue//znN/e85z2b0E7KM1UiK+KL4c/jKrbZ3zMpUd1Gmebm
 VTedk3jMq8ytWo5F4OIXv3hzuctdroy5WdrJjGosX/ayly028SwD4zBmLRjf/va38+v6ugUQwAz6mK7CWtLc/OY3L8Ix8zVB1/wM60rzspe9bF0FyUWG2fqE
 gYwzY/ViIPe73/2aq171qsWOGKan5o1vfOOgzTSRQw89tAn1v3ne857XXPnKV27uf//7l/eDi3q84Ru52c1u1lzhClcoi+pXv/rV5iMf+Ujz05/+tNytQ2k3
 JNGPfexjpUPvfOc7l0n/6U9/ugkzULmOv+U2t7lNE2aJIk3+53/+56CMrMZFL3rRMljUVdTBqaeeWp71gx/8IC8pDje/cbx1yQKmnhYhC9LnP//5ItGK7phE
 17rWtcozRYJ87nOfa4499tixnaKc6173us0NbnCD4j9Sr//5n/9pvvjFL458hOuuf/3rFyy+973vFSmdLR9pg/qaFN/4xjcaWCXRgGDKwfi///u/xf5vElmo
 3f+FL3yhueMd71j63r2w1N8mZZgkm912262MiQ984AON/homk/JGN7pRqdd3v/vdghNJPwn+1772tYv/54Mf/GDxq/FDqJdnwwjjoOHe+MY3LpOa34wgo+/1
 G/wRSfMmN7lJ45na8/3vf7/5+Mc/PsDMeLjpTW/a8MvxaYU5trnTne7UhOmqCVNluddvyunWkR8MJsal/lYvWlD2N3xp55e4xCWaMOs2YbJtbn3rW5f+I3x8
 4hOfaD7zmc9kk+vrJiAwzYpgHIapuvnQhz7UHHPMMWUt+8lPflIWTn1vTfF5Ellk7373u5cxZozSdMN0P+mWpfhtGnYaMdHmy1b8H//xH20sAO273vWuNhbw
 kdfHxGvf+c53lutioSnXxSJY/B2x+LQxqcp9Rx99dBsLYhuayqCcP/uzP2tjgWxDEmyDYbTRWaWc6Mw2FsdyHf8Kv0sskO3f/d3ftbHgFJu1ssLs0D7+8Y9v
 Y5KX75WhLHX2vFhwBs+KAdH+93//dxsLTHmWa5V5/PHHD+oIkwMOOKANDav9y7/8y8G99773vdtYDNof/ehHbWhipZ78QPw6wVgG1w1j+pCHPKT90pe+VJ6p
 beoWGl37mMc8ptTxk5/8ZBuDtNzPxv/CF76w4JbP8LxgHu3Tnva0Nha5wXMucpGLtK94xSvaWEhLfbTFtbGotg996EPLdeyY73nPe9pY+NuXv/zlg3vVMRa9
 0h6/PfzhDy+/vfe97y3tDnNRG5JXaaMyYRQmzDYW1OLvSgx8HwyxDaY9KPs85zlP++xnP7u0QXvVS1uCSbUPeMADBtc96lGPKvXS9zDiD3KtPv3Od77TvuY1
 ryntvetd71qui4W9DUZV+jsW8IKhdoRW0b7+9a9vQ3so9U0cYPbYxz62PE/5MbELVspwrc+PfOQji//Jte7T79l/d7vb3cq4zLGiLe57+9vf3kYASbkuFo72
 3e9+d6nfq1/96vY5z3lO6QPXus+z+LqyzPo6eb2ZNz78ZvpoUrm3v/3t27//+78v11hHDjvssPZ1r3td+Xvta1/bvuENb2iNVWvcuHIe8YhHtG95y1vKPPir
 v/qrMiaud73rDa43v7u+u5zH5nuuqX43X/MZPoeAMvic1/HjWZfzOq8hYJe/7neu8b3v8nl5red2rx33Xhnde4evm6qBkMBIfCQtkv84eyI1MRbhIimSzmNx
 K5pEPHAiXelKV2pe/OIXFwmZFkHKZX/cZ599GlL7P/zDPzR3uctdirRO6sMRY5IXSS8WxOaWt7xlo4xYmJo73OEO5XcqJ0k0OrBIwLF4NM94xjOK+YL5zfXa
 EhO/SKT3ute9iub03Oc+t/nzP//zYguPhbRIorQFdOW456CDDirtYr5729veVt4/7GEPa4IBFnMeuzqVr0skdTZ7mhNJ9/3vf3+Rckn2JOGUZN0D4xjIRYMj
 vZJiSLykeFpVDOKiEUUgQ5GOaIPB4IpWEIO8+KRI6iRg5cQi3ARTL5KzZw33HSw932+p5pOymSbVO5hDEwyqCeZdJHSSdizqRfvij6CpqBft5slPfnLBlMYW
 k7B58IMfXKQ2fUFzcm8wgiaCE0q5wdTKM13PdPS4xz2uaBzqTGPyTKbRY0MLoUlEkEbBmQmK1nD44YcXTYA2pF9If7SmmMSl/25729sWzSGEjWKGOOGEExrj
 RV95HtMEbSyYd+mPxCFVdZpZMPKizdAgjAe+PM8hlSrLGGXShRkN3FjUnle96lVFi7rd7W7X0HaDiRXJNhhkd2jU9wuCgHGub2nxIkuNmRwH5oi5ayzQWo29
 YbLWGVdPeMITBlqHcUETdi/z/nWuc50yv1lrrJPGrDm+a2jGrnn605/eGLOxWJc55nqWHGtH1sX8V09jytyzXlmf7nvf+5axR3v+r//6r+ZNb3pTWf+e9KQn
 lfEZwl4Zl9wLNHfmWmQd87dWmsiJYiEu0jNpfv/995947X777Vcis0iS17zmNdswT0zVQAK0oiXg9CmFR4PaaGSR9mgdyqJdiJQhmZIOkitGOHGJ9CIZhsmr
 jcWs1BGXpgml5qRM2gwtiUTR1Ur23nvvlnSrjDChjGxjLGbl2doW5rzBNeofndDGojEyYoEkT0oXjbbHHnsM7qMJkXppZzQQ9YUXiVUbaRvq7I9UEoyvtCUW
 vTYGT2kLzUOdaWR5LSkJPjE423e84x1FanpHSMykehF0eZ1X5ZD+/UZC9522eL46XfGKVyzfwZqW5Vlw6mJE06CF0bD0EVxF3bnWeOg+L0ycRfsiwZOsRDzp
 n9Qg89owWRWNEG6k+vyeNknzoaHldyQpY4cWQJPtfh/O8tK2jPgj0YX5qWgGMdkH14b5tPSPumSdjREaRDCw1u9ZbjCF0kd+e9CDHlS+P+qoo0p7aX7BwAfX
 BrMpeCnXeM4y6uvGaSF9NBCa+D/90z+1YdIcaLXDfWT+3fCGNxzZh9YgYyCl/e69tFjjMxb+NoSoMo+8N75oOyHMFC1Y1GeYhMvcdT+NhnbcLeuQQw4p9XMP
 awgNnXYVwk6Zd9a3rId1JwSc8txnPvOZbQjprXnld+upOpuHwcBWPKP7PO/XrIH0sYHFg3Yg95GopxHbNxs64uegvYQJpHBinFcZvs8NLT6HSa1Ife4h3cZi
 VezkomtwW+R6EiapIhaOIrGTOmkzSGw4zcqzlE86x8FpP6NIUIBrAtAibZP4aQfqnvUfvk+50VmlDa7x/CR19d097nGP8hUNgJTuHk5d5SfRanwmkdMIaVf2
 4WiDenV9Usoh3YS5sUjFbLNGyGqIREQyZ9NHJOwTTzyxaDbBKIpGkOWRpmLRLdjpGxoQiSxMOKVf1NX32iBqj7bDn0Zr0b/qRwIkNSVpE8mKxOc6pE7KMa7c
 k0SKo5Ei19BQSH760bOQz8g4UAbS16NIndi8+Xc8K5hwEwxgcKk+M65oaLSUWBzKdSRHmhJfVZI+JokaZ/qt0mIiEKbLMjb0+6T1btxvxpmxZwwk0YyNMVYU
 c4lWzx+GjAX3WPtorsYUTcN6QhN2j3WD1t0lY5Ofxj1eWR+sX7SRP/3TPy11UA/zzzpi/Hkunx3txtqqjcF8SjuNa/evhUbPok6JwNUoE08FJpGJ4joLjolj
 Mk8jnUL9C7thcaia7DoC+BbDnPBZjoVo2LHtO9cNf68eKDvea0jPRTW0sFEHAe4Pc/B7XpvPy1cL6L/8y7+UxYoZgypo4bMoUhuZm3Rsl3QOc4n6hWbR/am8
 x/yyfV45i73CDhPpEtMOBzamAVdBBwZpSMI7OPgs/Ln4W2zzGd3ypr23MHcpmbmx4H1S4uazdmYbtP3wMDN1ST3UB1MwkF2PTKbsq7w++3Ja3fVXaD4NMyXT
 kslp/PhTvjE0rk/zWd1X95iQ6u++DEbIa3wn1Fe9tINA4Tuf9U+XYOP5SF9VWkwE9BMTp3VgFiLAEpiYhkJjLmMnpP7CGAhc1hzjg0BkXTO/PUvwC/NoWCbK
 d8aKRZ8pnAA8bPJUBoGYOZWA4xrBG8Y+E7731jVMA3O4d5jm33300WV91UZrinoyP7uGiV9wzFpoKgOxQAFI5UjIudgOPxQwfkds/UCymEwjdmT2RQsLv4So
 GloEO5/oLzS8AAByFA1/3/1s0cOl2actwqQCCz9QSaY6oHv9cPnuNyh0Hoano0jS7N6kUX/Z8XmveufCO2pwdiUW1+pkBMvhBaf7nev8uaf7fT53Na/j2jz8
 fX72mu89Z/h9ttcAxUBEpOQ16muSaFuYzVZoEsN1znuGvx/+bIyFA70IH2HSK9opxo4B2pukj1ZDnttlOqP6DeZIWzGcrGu+5vOGP+f39XXxEAgTVpnD5vUo
 6mq9w78bLyJQ+TEs8IQPAh9mYIzwYb7yla8sWjEh1Npo/qaWIVuHtQXxkfKr/uM//uNAwMrnEbJca0tFmJeL79hzMKmnhgbz42AO5p15xnccQTpFWCbwEDoJ
 lWEGbiLQozAf9aBNr4WmMhASp4UWMLieSWnCDhN1SuipSUMly4Vi+Lr8rJHIxkRSHGcvxyvAEWZkUVfOWidiPouWw7TBWa8d2oaYg9IMUr4Y8Q9Dw3g40Zgp
 1MnihWmELbwMPiGpXROGRQwzpLnh9sNE8kgp3G8WwJSADRAhoUmcyrQZgwiDNkhhpQ40P5pQEic605iBQw1OTKm2XSJp08K6dej+vtr3GIM2eJ5FllbWNf8o
 L01StLVJk3LUs3McZH+6hqOayo9pMKVlWK/fBEQMM+JRZbg2yfWEJvWjJek3ARNJmL6+UAeClf7IMvOa+rpYCPTtH1YC8yl8D0Urdp9+NtfMPWN7HBl31hBj
 w9hJC4Drn/rUp5bAHRpqzgdCK6ZFgLVOWMzNB9o0E+6xETwyTOqBATCFm2PGPOK8t71A2ead8gi0BHKC8v777z+waGBg1nPP6tZx+Fn5OTHIz8OvvfTqI444
 oixQpLFw7jbh4Gkuf/nLF2BVHMcU5eR3atcoBjP84PxsQuokFc3JbmETuTBv0mm5WOagwlBEAGU9xj2T3ZDdkW9h1113LXXG/am+ytKhuVBnGUwzGIr2sZfT
 qNTBQKB2+i5NN7CL8OKyeKmTyB1MC2EQEWZb8GVS4R8g3VBHDWz1t9gh/WLAGswWVkyStK9u4QQsGpNnwdgz2ErVbx6kXH4AjEv5olkwXaTNIsMMan4DAzj7
 YtqzE19YJYPNe9KE5HP2qfcPfOADS0RNanW+I4wk3uN8EurFfGYPkvEockw/KdsYIeSI1FEuX1ylxUYg15Y+tSQQRFBI0ZpTs/fKVHTggQcO/KvjyjJuSPTD
 C7O5R8BL5uF+/hDzlwkJ80CYF4uMSFFzdpgIlMqw7iTzcI0xjfmpJ+ZljjCP80daG3wv+ivJM4frmL8Nvxr3k+bpVA1EgSqLy2kYDomJmEg4qsWOecvEUzEb
 crpAmYTJGJSlQt3vbNxhC+TkOfLIIws4tB2gA0YHJuV93YWiW+a47/P51EPPoVKSKoFIq/IMphb+l+Ey8tnujaihYirBIC0wmAFp3yuu3pV+876IKS8b1vgs
 hN2Ski20tBL2TxKuZ6qDAQxj6iwzGZspTL3uGkzLAKXaesVIqMWcbxHpUZiFhZsEpI/8LtzXBBIyK3gAkxGGS5JSH8yFhmTgZru9wis/Zzumfe8eY4DPjNZD
 pRdeCF+DW/1pXBZxAon+df2oZ43qU4NYOUJ8aafS6tBwMHUTkXaFMZG4tM3ztA1DzbaYSDYJYgA2EdKG9I+JOdw+4ceeA3vl6ivM8Lph4tDfEc3SvO997xvU
 v287Es/6ujEIGGfmVleQmPRkc3jUPJ50zyy/DTvIlWH+MzuNI2O1L2FYBLa1UM4JGI6jXhqIm0Xb3Oc+9ymTn4qFcViovGYkkN9JxkkmvUlLpcpKWKh9TvMR
 NQ4wrrOwm9jKwPFJ8L53r4Uwy7IIdSnLTOkyf/MMz0o7vGglC4NFjq1TVBOTFOboOuUPl51lWZCZqjA8dkgSLjMYqVS5Ol59h4lGRnsgmcCDucxiR2OLMLoy
 sDGE1AIs8OojIoN2h0Fh0BZGmh4NKElbaBsGi75wLa0Ffuqa9k1qLz+ThdbiZwHFPGkgJB3PzwmmDd3+yWfBNrHM77zCK7FLSSXxsLtde5nTxLVbhDHhNAl5
 ZpaZ7c+ys++6mBorNBiLNQZrZ7m2i3fXP+y8GCWtTH/AUN/nWPMMe47Ui+biWlqaNsBAXRIH+GD29gppFwc9E+33Ay+4065gYpKNw8zzstzhsZntrK/ri4D+
 TEFlfZ+09tKH58DaS1xbCQRdY39SvXaKR6zafkH9Z1Yh0VKZcM6uSpXVTlOJz9Q0E5lpw6JrsXZvkgXQQsn+TOrW6WlmIEliIn43YTEAC0NSlmkB4ERKspDQ
 Kgyirkqo7qRu5VoolElqRRbZ7qKVZeWrevFPWKwsPKR+7Z9GFm6LqYXLM9VH3dRR23zOBVhZpF0pFvgoYEf6HrcIYeLq5FWbMK1RjJDWQyKHu4VXedqhTb7T
 J4mlhQ/OScr2B0t22xxU2pW+Fd/nAuw+ZiubNv2uXzyz22/MZ9qnru7ttt/YgoExo01JnscxjlHSpGgUCI6epU2w0ofj2gL3q4ZmccG4hwPSmEstOnHI53nd
 NbQZ40XbPK87lvye0VjDmGF0Wa72G5+VNh4B6w0tRP9U6oeAsWueWBO683L47pkYyHAh9XNFoCJQEVhkBAgzBEUCTAo/i1zfzawb5pHCW1cgHFWnykBGoVK/
 qwhUBLYcArRXmgitltZbGcnKLsY4WI38wWga83B3ZSArMayfKgIVgS2MAHMtk5bFElUm8ofApuxy5nTMoy8ulYEkcvW1IlAR2DYIMGfl37Zp9ISGYhiTfB3j
 bq0MZBwy9fuKQEWgIlARmIhA7zDeiaXUHysCFYGKQEVg2yFQGci26/La4IpARaAiMB8EKgOZD461lIpARaAisO0QqAxk23V5bXBFoCJQEZgPApWBzAfHWkpF
 oCJQEdh2CFQGsu26vDa4IlARqAjMB4HKQOaDYy2lIlARqAhsOwQqA9l2XV4bXBGoCFQE5oNAZSDzwbGWUhGoCFQEth0ClYFsuy6vDa4IVAQqAvNBoDKQ+eBY
 S6kIVAQqAtsOgcpAtl2X1wZXBCoCFYH5IFAZyHxwrKVUBCoCFYFth0BlINuuy2uDKwIVgYrAfBCoDGQ+ONZSKgIVgYrAtkOgMpBt1+W1wRWBikBFYD4IVAYy
 HxxrKRWBikBFYNshUBnItuvy2uCKQEWgIjAfBCoDmQ+OtZSKQEWgIrDtENh5PVt89rOffT2Lr2VXBCoCFYGKQA8EzjzzzKZt2x5Xru6SuTGQ85znPM31r3/9
 5la3ulWz++67N1e4whWaC13oQk1lIqvrkHp1RaAiUBGYJwIYx69+9avmO9/5TnPyySc3H/3oR5vjjz+++d73vrfmx+wUJayJLV3iEpdo9t9//+a+971vs9tu
 uzU77aTIShWBikBFoCKwqAicdtppzbve9a7m0EMPbU488cSZqzkzA6FZHHDAAc2Tn/zk5opXvOLMFag3VgQqAhWBisDmIEAzOeyww5qDDz54Jo1kJgZyyUte
 snnJS17S7LPPPpvT6vrUikBFoCJQEZgbAkxbD3nIQ5oPf/jDqypz1QyEtvHWt7612WOPPVb1oHpxRaAiUBGoCCwuAqeffnrzoAc9qDnyyCN7V3JVDORSl7pU
 8973vre53vWu1/sB9cKKQEWgIlARWA4EmLTuc5/7FP9Inxr3ZiDnOMc5Cme6293u1qfcek1FoCJQEagILCECorP22muv5otf/OLU2vfeSPjQhz60qcxjKp71
 gopARaAisNQI8HG//OUvb855znNObUcvDeSyl71sc8IJJzQKrlQRqAhUBCoCWx+B+93vfs0b3vCGiQ3tpYHwzlfmMRHH+mNFoCJQEdhSCDz+8Y9vzn3uc09s
 01QGYje5TYKVKgIVgYpARWD7ICBY6ja3uc3EBk9lIDe/+c2bXXfddWIh9ceKQEWgIlAR2HoI3OMe95jYqKkMZBoHmlh6/bEiUBGoCFQElhaBm93sZhPNWBMZ
 iLxW173udZe28bXiFYGKQEWgIjA7Ape//OWby13ucmMLmMhAznWuczW77LLL2JvrDxWBikBFoCKwdRE473nP21zmMpcZ28CJDEQcMCd6pYpARaAiUBHYfgiw
 Ql3wghcc2/CJDMTN9TyPsdjVHyoCFYGKwJZHYBIPmMhAtjwytYEVgYpARaAiMDMClYHMDF29sSJQEagIbG8EKgPZ3v1fW18RqAhUBGZGoDKQmaGrN1YEKgIV
 ge2NQGUg27v/a+srAhWBisDMCFQGMjN09caKQEWgIrC9EagMZHv3f219RaAiUBGYGYHKQGaGrt5YEagIVAS2NwKVgWzv/q+trwhUBCoCMyNQGcjM0NUbKwIV
 gYrA9kagMpDt3f+19RWBikBFYGYEKgOZGbp6Y0WgIlAR2N4IVAayvfu/tr4iUBGoCMyMQGUgM0NXb6wIVAQqAtsbgcpAtnf/19ZXBCoCFYGZEagMZGbo6o0V
 gYpARWB7I1AZyPbu/9r6ikBFoCIwMwKVgcwMXb2xIlARqAhsbwQqA9ne/V9bXxGoCFQEZkagMpCZoas3VgQqAhWB7Y1AZSDbu/9r6ysCFYGKwMwIVAYyM3T1
 xopARaAisL0RqAxke/d/bX1FoCJQEZgZgcpAZoau3lgRqAhUBLY3ApWBbO/+r62vCFQEKgIzI1AZyMzQ1RsrAhWBisD2RqAykO3d/7X1FYGKQEVgZgQqA5kZ
 unpjRaAiUBHY3ghUBrK9+7+2viJQEagIzIzAzjPfWW+sCFQEKgILhsBvfvOb5vvf/35z2mmnNd/+9reb78Zrs9NOzZ/92Z81V7ziFReststfncpAlr8Pawsq
 AtsegR//+MfNIx/5yOaTn/xk89Of/rRp27Y597nP3Zz/fOdrTvrCF5rHP/7xzT/+4z9ue5zmDUBlIPNGtJZXEagIbDgCO4WWcc1rXrO5yU1u0uy2227NFa5w
 heZCF7pQ86xnPrP5STCUv/zLv9zwOm2HB1YGsh16ubaxIrDFEbjIRS7SPP3pTy+tfPOb39y89KUvbX73u981Rx99dHPwwQc3N7zhDaciQGvBiCr1R6A60ftj
 Va+sCFQElgCBK1/5ys0HP/jB5thjjy1ayWGHHdZ86lOfmljzyjwmwjP2x6qBjIWm/lAR2GQEQiLmAK7UH4Ff//rXzSte8YrmUpe6VEMTue51r1sc6jSUSQTl
 ykQmITT6t8pARuNSv+0g8NnPfrZ5//vf35x++unNzW9+8+Z2t7vduqj6Jv9//Md/NCeeeGJzmctcprnDHe6wbSJnYHvSSSc1Zzvb2ZprX/vazXnPe94m2Eez
 HuzjzDPOaD4fz/r5z3/eXPWqV23+5E/+pPT2si+gxs9DHvKQ5g1veEMxZ33sYx9rXvWqVzXf/OY3m+985zvN8573vOZP//RPOyP7j28L1pVZ/xGQVbyD3ci/
 cEK13/3ud2NcVdqOCPz+979vn/nMZ7YXvOAFB+MjFrj2gQ98YPuLX/xirpBEyGV7+9vffvAcY/LSl750e/hhh831OYtYWDDN9nrXu1678847t+c85znbcAS3
 wbBLVc8888y29Tcn+vznP9/e+U53aoNBtfoyQlvbWGTnVPrmFhNMor361a/eXvjCF24vf/nLt7vvvnt7l7vcpf3bv/3b9p/+6Z/aL3/5y61rvva1r7UhFLXH
 HXdc+9GPfrT95S9/GRDPD+PNRWH+T7/b3e62Yl4O8YvRzMNFy8xA/u///q99zWte0/75n/95e+c737l9+ctfPvdF77e//W37jne8o33CE57QhgOv/dCHPjT/
 3tvEEsP5OBg4ERLZXvSiFx18fvSjHz23moXk2N4pFrUcmCERt+c4xznK53PEovqv//qvc3vWohX0kY98pCx42fZ8vcAFLtC+973vnWt1v/CFL7RXutKVBjjn
 szCS1772tXN91mYVRuDFKGIvSPu7mJ9d+pu/+Zv2Epe4RHvJS16yDQ13gPtRRx3Vvay+H0Jg2zEQC9J+++23w0SJzUTtD3/4wyF4ZvtIkiHd5CT0Snp84hOf
 2Eb0x2yFLtBdH//4x4uUql0RFlmkNRNz7733Lm3W1g9/+MNzqbHFK3HUb9/61rcKYw47dvn+cpe7XPluLg9boEJ+9atftTe72c1KG89znvO0/+///b/2oIMO
 GuBOO4DFPMicCPNNeVZEGrVh6ilSOS0P9hbUr3/96/N41MKW8T//8z/tm970pjZ8I+3b3va29rKXvWwbJtmilfStdGxQbMPUOLd+6fvczbxu2zGQ5z/veWVS
 mBjXuc512gjhG3x+wAMe0J5xxhlr6o+wVw8mo2dY6KjNuQgecsghayp/EW7+67/+69Ie5qtPfOITgyqZPOGQLL/d+973XrPqz3xw/etfv5R3rWtdq/3JT34y
 eBbN4+xnP3v5zeK61Yj2mmPmKU95yqB5L3nJS1qLvN+e9tSnDr5fyxtY5rMwj6QjjjiimLL8RuPc6vTpT3+6mLkwzJve9KbtD37wg6lNZsp9+9vf3lpICTPn
 O9/5inn1YQ97WPuzn/1s6v3LfsG2YiCkKCqqCXGjG92o/d73vtda8O91r3uV79iZ0zQwq93TYpaTkT+ANvK///u/xebqe2oyO+uyUjgdSxu0RfuG6aEPfWhp
 P+byla98ZfjnVX3+z//8z8ECFrH7K+6lyaWEvscee7SkaDRrv60ofAE+3Pe+9y04Gi+nnnrqoEbMr7e4xS3Kb7EhrtciN7h5xBs4RuBDKW+XXXZp+ZuSIvXH
 QMAyX3zeysS3dP7zn7+FK2ZwzDHHlOYaU6PGVQR0tCwXOd+HXx/72MduZbhK27YVA3nhC19YOpsN/X3ve9+gczEWk8cAuOMd79iSKmYhJgXSi3JuectbFgdc
 lvNf//VfxYzltwMPPDC/XrrXlIzZxt/97nfvUH++HoxYO1/5ylfu8PtqvnjUox5VyhleRLOM7E8+GNIjGjXR8/pleY10G21EQJW23/Oe9xxUO9v2ute9rvwG
 47e+9a2D32d5Q7ghNSsrF7zugpm+Lo51zuWtSvxNxhmf6PHHH98+NbS75z//+W3sGWkjMm2HcXX44Ye3F7/4xQf9wAe4zz77tM9+9rOLWReemFAfLWaZMd02
 DIRTm01Tx1JPSXIoJyVHt9/Ym0kWsxBnvDK6mkyWwzSWdmZ+A+aZZaQnPelJpY2c2bSRYaLRiXaBw7777jv8c+/PNAqahXI40bOfugX893//94BZsV2jUdd1
 71mG97GxrT3Xuc5V2p6aV3dRJ6jAHzacv2shJjHlMAfG5rpSVPdZBJ80mfERbFWKMN4SnAFXQqBILdaK+93vfu3vQ0tLgo3owxSSYOca5tukZzzjGQVT2ozg
 hK1MkxjIltqJHmaA5nOf+1z0d9OEltGEo7e8z/QE8uGEJNbEwlVSHJQfV/EvGEQTzrdyhw1K4VBecXdI7E1Ik+W7k08+uQmJecXvy/IBjig0tiYktvK++y8m
 TRMLf/lKG4NRdn8u7yNYofna177WfPGLX2xOOeWUJnwbO1wTpr9yjR9ufOMbj9xbEo7k5mIXu1i5NyTp8pr9WT4s6T+YhIBT2iyHU1K2LRa45gY3uEH5Opy/
 ZczmNfkq62yE5Tb2O0giqEzZaIcpQlXLV7Fglo11PnhOPutqV7taE9J1ucaen61KEiqGP6/MYfM4tOvyOaI1m7Pt/IctccE8yh6SsCA0YaVowrfZRJBH8/rX
 v77sak9sgsGXtxEt14QpN7/edq9baiOhBcbmKBMjzEs7dKaJalJG9FDzgQ98oHna0562wzWTvgh7/yAlQqjBJdvn8PW3utWtGoPKxrCPHndcEzb84UsW+jMm
 KRU2CvW8CSl5ZH13j4R14ZhtvhMps0OFL4w5JLHm3//930sKCZviTo++OCMmZEhyjZ3AEZdfNiH+xV/8RWMxw2DCCVnKx0BGkYR4GEj4sgb1GnXdsn0XEW2l
 yphx2ON3qL4xbKyGv64whghPLZsqYf3Od76z/EnPgTGH5l02INp8GGG6RbAhLMFUPqgUqmKvSVkQPSwk5gEDwTxCKm9+9KMflQ13O1RmSb/ADLSJwIgZhOra
 RKBGE+btHVoEjwCkecELXtA85znPKb8ToDCO29zmNiuuJxyF76R8Z02x63270pZiIKQxZDKEiWWHPrWQ7bXXXoWBWOw+85nPlAFG6rLgmZwGmjTQEeJXBhtN
 w8LnuwhtLQueAXj7MTtaTWASHYnw+JAMl41IxSYdCvV+bPX3OEs6Pv0Xvyi71DHviOgZqWkoxEKHYYR/pQkbchMRLI0JbuLSCnfdddeRzwpz42DRk6Z7q1Ay
 adIrgWMUhVO7fE0ooumF76ksbsl8hu/Rd8adv/BNNfe5z31KFlrMB0VE4uCW1D58gfFkqg9zYKsQvCIIpDBKTASOf//3f988+clP3qGJ8IhIqyZMU+U3TOHf
 /u3fmj333HOHazEV2h6KSMTCvLsMeYcbtvAXW4qBMJcgksO4xS8HBPVfWgPSRJE+xnQyZiE9dNhAm64p4Drx3SgisZuoJrH6MO9YIJeFLEJpBiH9j6PYP1CY
 qmudw+A+BK8Iy23CB1WkYRI2LYP2Bj/M2uJpImPKyGRlshlFYbcfLLCx+53oXCTFUdcu03cWNGTxHqflYaowgvHDH/7wFRrYVa5ylSIMEXCYGWkhX/3qV5vw
 GTXhIG5ij0nzL//yL2URTBOjcTyKLJ5phqE5b5XFEDbGJkwIi8xRsXdrFAQNpvyIRzyi4GjMHn744TswD2NcObQUxLwa/oHyvsuQyxfb5N+WYiBs6ogGQtsY
 JhPMwECk35S2SF8WMKq8BdCE9ZsTzUxEpgJ/OUgskJMWV6aHiKIp96uTfEPLQkxYtDAEi3Fk0bLowMrE4v+JUOkyYZlO0v/UvR+WpMIXv/jFTYTvDhgV3Cfh
 mWV5DpMYprLsZPwhuPkbRRgr851xmBqLsRSZD4qvLf0W3Xst/p8In8khL3pRc+SRRxZTqt9hhiGNo5wvxdQTZeRYH3f9MnwPH766v/u7vysYM2uPmovMfK4x
 V435iMwqedh8b3zzMfGZnHDCCYXR0GQQ4ZOWhzEzcyk/oraWAZq51XHHVXZuRW9sQSSwdNSy3Q8T00mEjJbEan4jjXC03+Me92jYhknUzCUms0lEaovQ3+a4
 j3ykeUtMRH6T1FRIaQZRSm3Dz2LfRyTvZWMg2Ub1n7SIYABpUjJpItKn+au/+iu3jSXStqNFb3vb2zaRm6g4K+H85S99qZjBxiW6m1SPsQ9b8B+yTfDuYt6t
 9s9j/FjEkuDrVD1jdRwp98ZxqJJMtJJechwbqxgWM22axYbv7zK04d+W9bN2v+LlLy/WiAMOOKDh+Oa3s+inUKJtki/yKyFrB1MrHydTrjK+FOOTYDVM1gEC
 kb+IzmykkY8NuM2DH/zgYgUZvn6rfg6bwOh8WCEVLk0yxR9FipJQKUtbhNih6Pj2uc99btlElSGTIWmVzXHdkLxy8YR/MYlL6oPIkjrASoK2cSREOJ8XUuC4
 yxby+5Cq2pBUSzvDVjyyjifEzvSQjMs1UmHMmtJEOgk5n4w/OZpCQxz5PHH7rgnGs+YsAiMfsAlf5mZM7Yb5MMXitCK55GMe85hWmPpqKRbGktMOfkJWwwQz
 sgjYumbvSFUTzGTkNcv2pXGpTSG4lJBd49qem24i0DgKd7Afx7Xj/uyjCf9pG6bENoJvSsqi/fffvyS+DEFyxX32hrz61a/eMmN1UhjvltFAfhGSLGkBkWpF
 WB166KGNs5KTmLacVBabgfKrXq/Ue5oK/0kwjhJpxKQQE2qklM7/wo7K5JLhfr0etAAXkczSJk99R1R1oaSxEauo8EIhSWfs86+OdNmjIt76NAWm+icW0yL1
 xV6QYnIQ2SKajSkQjkxfiAYzztzT53mLdE1iTHMm3TKvwpfvjNNbCnKmExTpd4rdPc1ME9sRYzIGZbnE+BTx9rIY8wc86EElkk2YOd8A05jgEH0nfbyximjh
 qR2VL5b4H1PqscceW9ojKIbVgak0TaC0rkNe+MIyprvNFNRwpfBvXDrM2vqJVsIZD7fh8UdDpLGIlhNEwtSt72LvTgm6ibRGAx9ePoP/6+sRKv+1cMR/Jfrd
 e9YKlg/PC0ZX1pqbRwTnOWI+Ljqt4J5R2cHnZdJAaBspzcbCNmhD2DRLOgySQ0zONQtRwZxKKhQbvUYR6S2c5yVRGyxtylsmioVkkNpCenXSVqjmAzy740MW
 4rUSvGxG7JbrfUzUkhrGrt9rXOMa5Xf5ubYK5aZW0qvMCN0dz10sbLQkJc9CXU1C/qtuufmeZE37oEn6Tn6z7UA0vAc96EGDDZTaLidb+OdKWiLzfLWkzDjM
 quTGS3ylUKLxhMBZkjhKDRSMaLBW5XXDr8a/TcnWks2mSRpI1PuPDGP4/WYxEDs7JX/rToBpIEpVor7dNtz97ndv7WQOzj7t9rn9HjHkJWVKSPKlLvNMez63
 Sk4pKE1GXSy9t8g5q0I6h2c961ltaHxTSur3s9xMYatvQyMp52IM92PWQ9qTrUIHRdbdbFf31UKOadz61rcueIREO3OTu/NHTjiLl3LlvGJmsUh1n+19+Adm
 ft6y3GjcWhuy7RGsUFLydE1ba2mL1DEwzvJlxbDrPT93X60T0qvIzcWcKd1SaCGDa+WCw5g2k5aOgRx8VrJCGTD7kgUtVNMCvA6JEMZVMaC+z5l2XTg6B51v
 oKQ/Ztp9i/I7e3x38PMZYSjhaCzpvmko60mh3hffFUku09LkhDOQ/b7sFCaPwiizXbSQ+9///i1/BUlVG7uL/7zbqw/1czh/W76V1D7UxwI2q09r3vVcj/L4
 M8OhPpijYeZqPzen/F/6LPtNCiAHWmUf5yshDBN37IM5FWbKktwyTMIlE7V+kXaerybvCTPcekDRu8ylYyARUlfAM5idoDaNLDYpTcUmvtIB0+5Zr99lpzUo
 s/PnYeZZr7oOl2sQZ9ZW9ScJZ+bi7rXdidL9ft7vwwfTxqatIp0lnhHhMpMzed51m7U8ps/w7QzGh8XEgjF3WoUjXPACBpYYyxNlYduKxLmd7TRPv3VWrre5
 jOkOA4HdxyJhI82DmfCOd7hDe1icrgnr8HlNhZYAnPV0YuVm0lIxEODqWGYStlmREySlceRIyvR9iJIaF8kz7v71+P4b3/hGmxFbzs5YlwVizhVn5usuIvAX
 2bYIxA7c1UaW9WwQUVTMdLkwsMHPy2wyj35ifk1BjPTM7LWViFbgECn407r6CKdrbb+1YDURn54Xof/lHCP1ZF5T782khWcgqfYBCbdlNolcVcV5eNe73rUc
 1sQ0lI7rvJ6EGtE6ZUAY+K6RLfeNb3xjybZLXV0vYpfEvJymJyW05x599NGDU90wjbTl3yGkj1lCMNer7qPKJennwmbApBMxsR51z0Z+x09CI1JHEh3sl426
 Jy9yrhq/i0aRIWAwDjL1+6LVcdb6dNsWEZqlmEUZ39km4zw2JQ76QFbgzaaFZyAA0pE0DXs5HLSTju8I4SzRERxLwxIDRpGL3vArJrRXxG3bazBPinC71hkV
 TjocfqbP/C9OPXT8a56zgLmNOldjnvVaS1mxAbN1GqD6i7hKiUefLNIE6zJlZ93nGFlL2zfqXud/RHhywdgYT2Foo57f9zmYGrOascA3s1rpue9zNvo6YzzP
 XxFtFWH/pQqLNL6dV6Juua7wPaYgt9F4dZ+30AwkO5DzMJ15zi/IxeFlL3tZOb/jXe96V7dNxZ4YqQoK2Gy2QkFx63/4h39oI//NCiclu3lGMuTzVhTW84PD
 dmIvSHkmpmCiCdN1hC1moQ42axkAnGUHxqFSV4joCp+ZLtby7J5VnOkyZ0DkoGUjXmR63OMeV+oqVHsZTIOJZfdI2Tz/I38bfrXYRbbXwsiZuNaqvf4uTGcE
 MSYpDvIUEIafm58dsJRRhN2jdvP3ZXx15kma51gMkPmYczJfN6NthAnrSJrizcXYvzNyg+lm1G8pGAiG8Z73vKfdb7/9iokCgMcdd1yxAdqL0CXmFhFXBoTw
 z1ETwoQxaYXG6RA2/bWYDDCPLItJSt1GDTp1EeNvgVNHPhDPt3Nb9M0iUqT+LnXUvln3HGxUu4RIxubCUl+hsMtCuddFYEhsFJxYbZkM2OoJIYJCHE6Wh2lN
 vHHEj0x9nPYOACPcGI9HTdHKmX7TNGzPgrm07JTHUDtMLkOjuwxko9snYMVOdgep0UhTgLNvjZC0SL6xhWYgozrO3g0mFQuFCceplBRnG5TFmIQUB8Hk12Nf
 v3jyyQNzkxPJ0KiFf2wB8QOJMENbpTDpTqgchPma5TgWNn0gOThmXQSyzPV4ZVrJjYLCG9Fq8VmPeo0rk6ARu6fLhLPRqk9Ey7iyNup72m+e4EiQ6UO0BefF
 85vQyGc99Y5mL0oxco8VM2pfISbNr+bgVjBjCVgwD0VFwSQtHKPG+vBc7tNfq70GY891wSth2BrDj7potHQMBIAGOuYhHjpJx9qNDHDnGfclIYkmAv/Eqaee
 2ve2wXUpvdgxvBotxuTPvSnqzLS2aBQnJw5UZ4vMMpCsAvDE+Oa1mXE92y0yMDVRudlGkWgdAobf7b5nVjzllFNGXTrzd8Y+v6HymXrf8pa3jJ0PFjIY+1tk
 /11fMEj62sIPxccqy0IyU/tiYMLSsVZzYd/60DC6mxmF+S6Cv2NU/ZeSgWgIG3f37HIdzk5oF+1qQwz5QQwgqQpWQ7QPph0mqThnYTW3tkwBmaTOszl+F420
 KW3DRx111KJVb2R9kqHzfU0zB40sYIO/JMAwTRgDzK9dsnjxlTFxXujCF26vHiYrqVsiH1VxYjNzkJjXQgI/pDIhRHkObSjOEyn1ifxwZVPbsMkkDk8bnAke
 RxOs5fGbfq85nz4dEU60QH1BGxG6LtjGZ7gkA7HWYKKiK/lMRG3ZEzVPpm7sMk96NuH2pB573jYDzKVlIMNgkdCALSXDJBqllr7vfe8r9+L6SSavaCmmArve
 4zjWEh4ah03lJa1doJ45q7nE5HO/vxve8IaDATp4wCa/EfmR9dP+ZSCmSHUWdNE1by5q3fnLRAWqMw2gS8xLhBMSsEXLLnQ70tnqs19MYGO1S3FeTXG0RwK/
 9tBXvrJkXqApjNoHJZ1OJBJsX/WqV5Vdz7RowSld7ZjdvTtvImHmYNG1qW1ZifNcNl5YYpZMR/x81hDh4ImxvWfazD9iOwBGm791XwXuiI6K0wp36JNZMNLX
 2Q+LmqpnyzCQtMu+6EUvmtpXbPvi2DMNgIllUrIz2i1OirWgk8q6A8R7pjNJ5UgcL4oIK9/N6rBlP86BSrIclvSmNmSdL5De2wKmjcsiaUoDob4i3Bbd6a/7
 BGDkGBg2E3KuXz/2t3RJehHto4V4tfjbH4AkBI1Mr0V69tvwn2uZa4Svp53fptY4UbP7iJLAsfsMzvpumhjRWqmZMnUtI1kDBAFopzE+LCAx39p3hsHzazI5
 d/2WNBSRliIoWRJ87uJNm1lrJCCN5xa3uEUpl1a4iON5EgNZqnTuAXb0XzM4CrV8GPEvbL1NqKbl3OKwd5YrgsuXVNUxaEoK6zALlO/DNl3SNMfAKYf3OOrW
 /c5D9heqZblOKvg+FBOtpJGXFjvMD+WUw2BSJcV8n/s3+hqp50PaKqfewWYchSmxiYlWjvGMKLSRJz7mvcGsG2fOO5Ar7LolnXVEmjTStMein5ft8OrakKLL
 QVUOlwrT4Q7X+CLPBNc3sF10crKgVOLaN4yx9OoOIQonbzmIKPw65dS8WLzLIVDaFrvwy/0Rpl4OlAohpDQZlnCN/Rrl5LwIBy3YRDRj40/ZoWmUVOSON5CW
 3GFKXiNIpYnFsxxJoDCHT0klngTj0EhKP7t+GclxDo6gRcGUy3G25memqw+TVeMPBZMsxwqE2bkcJRDCZxMLe5kbsf9sgIOz6Z02GtaQJsKdy6F0DkcLs1gp
 Z7X/nIAYVpGSyj/MY+XQL/29TLSCq0bFB59x40WyMYtGUb9JDnQSAU4epwy2cXbxIB1ydPyKLJe4PombCaQbyUMSYbOWjyv3mXhmH62H9CAMmbSTkqaw3nSg
 UpPTxrooEh0/TZw7UHAlbXWxyDpS17uSmYOmuuYO1/ks8oz02722O56YBQRFMBkOE+nX7uy8np+LHX6YhD9m2CO7/jIQjFPKtI8ox0BiyKxlk9t5Q0OWuoJv
 Bw78fcwpMk3nAVS+hzHTh8R7XeKEhZkgg9R4bEyjffMDKJc5h+mPzV1ZQoWZTkjrXbIZ1u+08dX6G7vlbNb774TGlmlLOM6HserWS/syRJ9GmHvGXMOsCCMp
 lQQ4PDfSvfCLWiNyTxisc5+aPmUipNXojz7EjJxmrGl7hPqUN+9rJmkgMUb+yDCG3y8aA9Ep1HphnCblMFHbqfASAlpcTBZ7QZCO0T5mrDgWtKjrVHyhwDqQ
 TZnDrJs8kOkhczBxwk8i9lNMy0AzWUXVIBvC0hRwl7vcZVIRm/ZbpnhgzrPPoksWu27yPxjaT9AVLDDhdEwmxsyDUoOzNTMHMiWkIxke+qerrmP8iVOOw25q
 8VxsmSHyOtlMl4VkZU5sjJVhEk32ofC3/fM//3MJ22U2yvBZgpB7ZWMwtrpMPs1O+ilNVspmuk1GG8cIl8cZ40w2hBs+EH4/C+Qw8QGmucbikdgPX7fIn1PY
 hNuhgekkyowW2twd1+4xRtPRneMyTXo/DmEGtr4373M/Gue7dYpfqQ8xqVu3lLOIGze3DAPhP7AQcUiaCCsoOL/wPKGy/BXSMLBpIlEoksNZeFIz8L3FXaeR
 IFICMVi6Dkvx9xZWA6S74LkfmbScjJgtxoU5kdoyXDN3TnvOojrJLN7pB+Ff6BJpKlNwaIM/2oHFCNlF64wQ38NA9mF7dYYZvHLY7x/2sIcNHMRCKRPTYQ1R
 eTaTdsnCiQn7DcbL4EDP+mt7OsanCSN5j1cLfDp0cwd1/g5rKXWMd9IwDa67R4nAlQvTavYgYS4w9rdMTDpx6Wp8Qr1prZPIZmRtpX0ME8b+lNC4MYQbhQWB
 9aNbHt9URrSlZUTyTxqe9YavdVi7G35GmMyLlqMOw/Nv+NrN+LxlGAjwMAVAW+iH8wmlNGYxomIm5QCxg7wrpZlsTAOpPkrWlxt5/JYSRWZQHZ7AyiexcI6T
 HEgmtB5OTmpwDiT19beaSZx134hXE84eF3UUqeJ0xy6RejOKSDikz4jUm7HsnLDdBIf6IvdoYBJd3DH/NNNgKCRcdRAtlFgx36TTMyVgkXQZjrlsif4wv8SK
 QELrHaZsZ/f7XMyZorrMwbXMW4mXVybSrvlFOYlpN/qwW/7we2OesKQ8m3mnLX7D9y/CZ0JfpgV5xCMeMbVKrtHe3EjrBlib01eIQI1cH5ivRlH2EbwSf9pd
 mnIxefvX9gvzNqvGMHVPU2UNWDTaUgyEdpCpN0i+3X0iw8BbwPgySA8GSO5zeP/731/KYCI4LiYybcXvGI0JgymYnCRvkjO7smv4MuQJ6pKFIW3E4riPPCti
 xUAiYSvXH4nklDlvDOvWY63vaWOphYzyLQiH5AfqHjbE7qttpNxuNIo+kkUZfuzxJqHIoi5hIp7HLJP3YjLKxKBOOOGE7uWFWeWeGjb8UaaXFTcs4AdCTS5s
 fcPC7QOBcUq33WaxxeemSmMtzabda+DsfotYlwEF1+5eNniPMbvenDkszq9YRnrLWeH+2pFzflI70rzd9VFKmOp+fkEaI+YQJ80XU/hwWZK8smIYz7TvJAIV
 wSyFHuWN0uisR7lGLWLI9JZiIDqHzTDVeq+YhPQnJhCVEpd/ZcTGd2337O/J/cXL6zCLF8na4m7w6GAOsyxb2pKUnNMJyv6fNtAcKMOvNI+U6HNgLGqIXtYd
 k0zH6yhVPq/rvgoNhdnwsb2Yatch7ppRqnlK0H0O3fptMCVHgyqLrTrDWrv1WfT3GCXtQxv6JtdM3xI/yCgShq3McSHYzFiex0n/8zDlTqMDY1Oj643bww8/
 fNrlC/m7pKrawBqQfqRJFf1sYGjsWyMEJyCMGwZwta4wWzGHMaEPm2f9bs2gqaR/K7XJM0Iosu4ceeSRRfjMAIpufdI/Zj2aJBB379nI91uOgegk3F4Hp3pp
 wNAQMIB01vrOQk4Cdi0pO8l5I1RK0oXyaAz8I2zs/mSlpX0g0RR5QJQy/QGV016ECxON3cKY2BMjqyYzkGvEiTukx7PZQ9M8lnVYlFdMkr9BnandOQkm1c89
 HOXuec9Z+XvgBWNanEmWpr9kHqKJSGU5ufLUNX20gkI6zmu63785AiKyv/swne69i/A+Ga7FalSE2ag6pg/NjulRBGdMZFzEj82D+oiPJIWhUeXkd92IJALY
 uHLz+kV8zSNr+ckmRV9l3Y01/lM4McXySQhCyOhJDJqwkwE6eV++iio0Lq09xjhS5qgxnPfkKytJri2CcBYR7y3HQN761reWzsYoDg81+/mxM/kOocJfI1IR
 kBJIqmzrOtakSUmXpjILMd94Frs9e2f3LBCLgdDLHGwG4QViwJn4PwnGYgAbyL5fVLs90wrpRx2HTU3j8LItHrA4AAA14UlEQVRw5SatPDESMyX10eaUc4mY
 UMqkfcFDVJD3uZkyMwswE8Rs++Ojuu//+G3py8xppC+WyYkOmwwL70aXdZo38m1G9Fic+kjT3UIsRqm1Cb3uSwJA9BuhpxuV2Pf+zb4u8+XRugTQ9CH9Q9DU
 bv4mTFmWClGaIq3Mf2ZslgjXJjEL8q26j+B5xpixm9cPv4q6c68/G6UXkbYcA0mpymIl224Sbp7Sb37nlUMrdg+VRb6v5Jf3U11zEnK4I7ZnOY0wJlIDpsWR
 zq4t6qJ78BVzTtruh6OK8hmb/YqxGsAmSLfu0+qVIYzScSBMhZ/E4k5D3CtyDPksAs4EJN1h/imZZfgwTbAvMTFY2NT3sCVKsSFEXJ1JqjtEEE5ovGCEzD5M
 o+2m2Zlw24qgBMyHWbUv8S8lsxMQsmyUe1iYOrsRU9PaQRDK4A4mKYw08YYJTRlzyewWML3nWUcUW4vSP5jje9rzCAT5PHtWhoOCpt2/Ub9vOQZiwTIZmVv6
 TAxO3QSBBDwqAmZUZ1Blc5HcNaSTU0Y4wU1wTCujL0aVk6YLZgSL7CLRr0OCSk3CBMHw+hJnt34wqbqSnklHSksTIGy+Gn6r7jW0h4z2yb06fZ6rDEnvPJd/
 YBnIgpLjDxPt4tCn/nwnufGPdsf8OokIARlogtmu9pAw9ZX4E8awzmi6Sc9cpN/Sp2BRX20afOHWKTBqP+uCfUwc7QIWmGWNe8Jj9gkneQpRsOvDQFyT/i1+
 WJrIolKOXXiM+Bv5ZbnQAj28sWYRGmlCaIgBMrzxbVz9OF1zYIiEEXHFudXd8+FeC+jXYvGznyNVWj6NlDrGlT/pe1Kc+rIp56I66fqN/C1Sawwig9JR22cC
 qCN7Oe1L29idV0T5TGiEBTQHpdxkq12gMjKpOIZD61x0ImDkPqPhnFR9684ca/8NrGkxoq6chOkQNr43kTwWIeaxDB+17yRDrvs+J6+T7j3nGHPOMpEgF3X3
 Ny3gZVS7jE9zwfjKcsa9skBkuPmossZ9x7SeZwwRwBZNsOzWO+fqGAyWj4EUk1QMDtKVKJNplAsiqVeEUQKBAXEEM6GIBXfojJPYcvOV624WaT5Wa/Yaro9J
 rSymiD6OzOH71/OziDR14+jn61ktuT+jt2hrk7DSDxhxmmSYVvr033CdMuyyrwY6fP9Gf6YRZPQVwWRWom2TfI3bHMPjXvfee++pmsqkemT2auX3CYWdVNZG
 /0b7zUjKtWipgmO0XZQhM/QesdDT1q0RQt1FVq1W+OliIdQdvsocFmS71232+0kMZKmSKQbYhcIp3YTa2ERIXBNmpiYW+fxp5GssjuX72D/QRNqCklAu/Cgl
 gVnsN2j8dSns9yWBnYSMMWHXlLAvNJpGkjsUKm8T0mP3UZv+PpNKhhO9gc9qKVJbN+EPaiLkuQmHaxMMogmneBMTriSqg2VoJiW5YkjJ5fdgoiWpYmiSU/tu
 VH0kEEQxeZsQCpowC426bGG+C+23yQSIYbabuV7aafxKbCnpZES0NWEhKGXrPwk/Q5pt9MktIxHgOWKOzEqSbCozFrYm8krNWsym3CcJZ/jfmlj8m/CZNREZ
 WMbkaisTgTEl0aFkhwgWIQQVXEJ4XW1xO1wv4SqyjpkT1rRlo6VkICahyWJhDvtwWeT7Ao+ZRLRE+QtJpQnpsGTttRjp0DBblUylsnSGbbJvsWOviyisxnNQ
 mHvGXrdZP4S6Xh4dUm3JmjtLPcIuXBbxONOiiY2AJROsbLCjKLSGJuzzTaSHaK44JtvuqPu6310oss+awCZz+Fe6Py3k+4iGGtQrAhUG72d9g0n4Q+GDKwtb
 BCk0oQnOWuQO9xkPyUB+vgQYdxsQ4fWNrLkIPhGWPhMD6ZbpPTzmSQQrZJ2Zx1ozz7r1LWvtK2TfJ83xujB9NBFKWxhIOMRLKmuS7mrJgr7ei3o45YqUqG4R
 ArjaKq779aEel2ekljbrA/VHOMOb2InbhPmjaHWRFqMsbhhzRMQ0kTmgkQo+U2jP+qyd5iD9zfrsRbsPtinJzrNuax0P86xL37IsyAceeGATkX8NzR9htGGC
 6VvEhl0XPo+BZSItKhv28Dk+aCkZiMEd9vZynoEFyyK9qDn0qdEWaeah8LfMsevmU1QEFJSCSGq/6kjKs5YeOcoaf8gkppqTruZpuqN10D7QPKXuUuA6/OvW
 kaa7DERrioCPUtV5aE3r3eYI6GgiWKVowJ5F0429L034KhqmqEUjptdTw/yOaNPhbylnB5Uvlujf2g15m9TYCDMsgLMf8mcsIhkgbLCITyBivheumlknDOTb
 MajnSZgGtX+ezEP9TomDdxDml/UvXyzoPwcyJaOONDwLWsuV1eK3MbdQ7FFY+eOCfVLXSJ0zYB6x76j44zCQUcyDQMcvEpkQiv8ugnKKb84haLFvpAg9691E
 /itMAzH3RkRWwyeYTHu9nz/P8sdGdIhyWcQw3oxKyJQFolL67u3Ie+f9KsJomDIL8M6xiW6WCKfh8tbjczdzqbPGl4Gk9YgJ8If8TksQxmvnfe5dEdGyDPT0
 pz+9YCx6TKj3opL1Kc/sMSaEN0/bkNdNre+e/LOfw96OMHuVc2xknRBVKAx7niQS054rz5Vbz7zLdCYiFI+NSMVFoklRWNGGPwI4/H7RGYjFLzfzyMy7ml2n
 691BwlPtN4HpXSLP1KLGeduXkjmtpGlY1Hpmf3UPO5K/aFkod0cLXQ4Namq19YN9NcJEpcORa03SPrmWpFy3cP4sFjbXCAG1f2leFNroIBGpvQqLtncp26me
 mW/NPPO+z2KfmRckXJUVQDiuvWUyI0gCaj7k3FWu/WDCp9/0pjeNzHic9en7au+OzAzKzuwWQoZlsfBc2xOEDvfJ49X3mWu5bssyEBv+IiJrIEHYH7AIZNMi
 KcYAkRLiU5/61CJUa2wdInqq1DV8Fa19HYtM9lHA1Z9klstC0tSH767Uu3vmA+leUkm7p+0tcOTv3rGHw9k0NrJJdWEMSTNjvw1t26vPpFcbFJ2NY2+CfQVO
 gHzWs57VOoZYLqdZGIt7w+xY6ioZ6KKSg7RyLDjMrQ/zsFBjCDYVj7IaaCvmTYthNYClFDIw9yy46yNjb7UZBZTtILbM/KA8KW66ZM/QPe5+9/Is/Zp55rrX
 bPT7LclASGYmTQ4gKUoWwUxkUGbqEnWjIXXPCNjozu/zPBKxMzbUlyqdB2n1uXcjr3FGSGY6pnFmUsaNrMOsz4oAhbIpFcYYgiwINrxJleE7jEHbMATMQ4JJ
 CxUzLWlUMkSM5xnxx7wkwzGGI+WI3dCZu0pZ+ceCwLzjeNdpZp1sF00nzSnGBK1nEYmZJ6LPSlslUe1bz9yEDBOEWdDgxjGTbHv4M9sjjjiiaCiYCIylpVEO
 TagPSW4pLUr2jz733ubmrplQXSRxtBlSTrlMk9LnGetxzZZkIJnvRgfwNZAs0LSBsB4Ad8sk8eUO05Q4TfJFNw1Ji5EDO1OadNu1CO+7k4/ZYdnI4sU8AWfH
 CEgFI+sBe7gFinSKKa52rFgAab1MI8rNXdgWWIuc52FOz4pzMqZJzY40yHGwqNlhYSRDhHpqozQvfUmOMH4dzBvR0t7xjneU00LtvieYTiMCF8aRjBZjmUZM
 jgSCxJZWQyDSX77TZxgF/0jSZ+Jk1Ux3spl9seUYiCNrSVeAJ6n1lQCyY9b7lRQXUR7lfHZ1ZA4wuReZpF9XV0zPhFpEktk3mbLDvjZbWFgtRrSIrH+ahggX
 tGe+jHmRlCf8Q5hV7Eov6TgyDb58bJNMqt1sxwQhzGnRyEFXxqq/Rz3qUb2rZ+HnYyCIdEkbMe/j46wafg5zlTXj+OOPL8laYyNl9/LB+0ypM82UKvklc1TW
 mYZJYEiSxyxN3tIqdTVrmb/zrJ7NCnLZUgzEopG5pdglu2efZ4csyiuzQZ4Fcu9wzi3qgsfhn+q0Mw0WcdHQpyTzzDJLivz0iScuSldPrYdFO+3oghXSXu/o
 VEylzyFeUx8ydIEFx6LFUUszdv6KhIz8KuNykJGA5YTLxc49i0Qc+ql9MLHlAU6T6ogZOFnwaU97WmkXf9Q4YmaK1C2DoAX94kwWvokumSPMqFLGZ192f8/3
 HOGugad+5qvpMo+8jgVFP7lO8lVmb4KA9hk7GJAosc04n2VLMRAdmrZPh0ZtFFm8DECH1TA99CVJGg0KvpBRA6dvOet1nQUjJRxa3aJnXjWZktmNOid8vXBa
 a7mp4Rm7sucmOaHRwuKMm/WgPNGQlMzMJdKHZuJwr+7i2xVuCD6Z+deCt1qT2nq0I8vkVM4Ipj4HxH3wgx8cMG5BIhzY445ewBS6JqR8JuY7HInm7A/9xpSO
 6fgbRTRLAoM1gAAxnD28i7tnZ1SZPtJOPhBak7orQ5jyqDqOeva8vttSDIRDEZAcWX1z/c8SiTIMvgHjuf5WE6nEPpt2b4cpLRoxteWE7HsaoTZ0B/5Gtklf
 OrhLPzAFDE/sjaxL32dZXNJeru7akPiRUAkXTBfrQULbOeYtRP7gFps7yyuBaBzlPOOj4SdYFCI0ZBumaW0YX44V9/hL0+Fwe2LTZDkQim+Ng16QA4br+1Ek
 uIEwILBB32ISzGCjKE/e9PzD4gTVSZQHjzmygMmTiY4Qam6yZojM65q4JpU1r9+2DAMBXE7EyJDZCx9SFocVm/laTDOp1nN2pQOuTwU4z9KMRRpcJLKIiXs3
 sLWr70LhvlwAN6M96fAnRPQ5UGwz6th9Zjed+6gABf4JpzYOxuec8SXwME0++UlPKmdX0OIsRBbXccShTMI2Njj/F4G6wgO8pvmN4Jl7nHKMf+lLXxrZlBzT
 zFE2Jef+kMh+3L47TF6YSmoZIr4EJdAKUjPATCLLwMiyHSORUY7TzoN5fKwRyhyei9puM6qoukXSQJYqF5astv5QRK6U12n/5KIKNbZk5wxpYaaU5THomjAB
 lEdJoSJddF+KhbmRfjykmSZMWH1v25Dr5BGTgh3tExlyw5nb67mxsJTr5KSSmiMcfU1s8CupL6S+DpW7VznTLopJ0wSDaOI40UauI5SJ50KrK+ncjYdFT+eu
 30P4KfWPhaqR1j4Ei5IrzPiQxtsYk3yyJPeE71lJLstNa/wnvbu/LsWC1oTEXBL6ea73+jFWwHLsgPfS0ISGN0gd071/M97HAt7E4l0erc9j0Z5YDbg+5tGP
 bu6/334ld9o1r3nNMsZPiVQ4xpXknpnqJMd0mHFX5NULhlLGnqzVeY2kocZ7UmhpTfiyxq4LEapdEohGcE15bjCAkdl3gyE27z766JIoVvLRLsn3F0yliUiu
 uacG6j5nte/XnYGEGjnIjLnaynWvt2CEfb4MaANDvhu5ekzIcaTDnZ2ABucbxIQgVg0oJsy5pwzECG0skytMAM1DHvzgwa193oQ0McglZNBZFOedG6pPPUZd
 ExEnjUkZPoXmgQccMOqSsd/BPZx+TWw6a/RxkvY6Q2UtBCflmqj6PBffLFM/JA3/lt8v0uuPI78SMh5jD8cKvLKefoNbRAiV82rWM/8U5oD5y/10kxvfuPne
 WeMy6+JVP+bcSuaNueQi2r12o95bYPP4AYtyH7rPvvs2R8ZYkpNOinfHP8gcHdpJyZ0V5ttSTFg2iiDSHVt+wBz8Ic+PkxqbOCu9fPYvjhhuIqS3iY2Jg++G
 38BMPjQE+4gaHSlkhbO/MJgws+1wNkiceliY+7AgMPysjf68rgwkbJRlkTHJLZxrGXzulegMYRwRiVVANqgnUZiwys+4t04evt7ncKw1cRxoI038MDkv5HVx
 YBLSeTeZMFCG783PYWopb+FgsV0EBmIByUSPYb9tYjNaVrfXq8UlNj+VNO0R7VPaFQEGTdj0e90/6iLSroRycQxr0TryGn0fUVflI4bnuqSwLzdhomxI8otI
 xt8hkV4cGWvJbEn32pUClt8sbv4sSOEgbiJIpMkFbl5to0kfdNBBTThzS5HfCakYqYs6IYsrKTkp9jk0EWixQjLP3zbyFUb+EIES+azu48hcg6UjBsy/N7zh
 DU3kuGqucY1rFA0GvuaCQ+VOOumkJnJRFS23W57xFqG9TZgfm4j6LD+5T//AMhlM957h9zQbhHmMYiDGgTT0sSF0BwFMvQlUEYnVhN9vuOhN/byuDIRaHPbW
 hupI5cRE1kLKyMXXAM/BNKlMA8UAc213UrjHILTgMXNFrqKGeapL7omoleanITVQlw3EHLjd66a9NyGRQTfL/dPKn+V3KjGzCXIgVOLatyzXR1RJWXSo1UwL
 2tdd3PuW5ToM/uEPf3gx7+R9Jou67bnnnmVSWyakwf54LLLhjCzSmr4zweLs614TOcveiFfMTorx1IKZTGBF2qURw1B6d6YN/WGRC+dtMSuF07QchBT5mkZK
 q6utP4YfaTkah37lPGCKDT9IwZeEbCE0VwhqxoZTD9UJ5vvvv38xBackvdrnz+N6c5B5lEYEMzSJeeQzjR/Zbp0dFHs7CoPELCOlSV5STEzmaTJRQioMmBuN
 rUiqOLjWOkQLMTa7NImZJeZnC3xHzbUIFW6OPfbY5klPetKKeik/AnhKXWg/wxpS9/mb9R5LH/knrJMTeFayoe6i4ZyNhWHWItb9Ps4qTqtR0RmxOA2cZNJG
 oJiIveoUg3HgFI1DlAq+QiIXhTLbKkf0OOfftLo6Az3MAcWpJ/2Jsp4Zu51XS5y6NtPlOLSpKibtRCepcZkBAO6TjWDRKDSpQZtkEA5GN7GKwXDKRrYQegb3
 2RMQgtfE+/r8mIEHsBL15bMIsElkrD8n8qTFIl3qI/Ko7/ifVO6sv8Eho6o4lE/vsWs8nwVHbd81cueFGSm/XvHqe5torVci/DJcPMeldDFPeMITyh6RFTf2
 +JC70EOYHjjj8zb9ztkvrU2mnOnibOOjfU+zztN8zqyvmxaFBQQdbkIs2m7xBFNqBxt9hnen6kiRKgaP3zPXTrdjs4zhVxNT5FecAVJSrOTGRwNQKunNppCG
 BnnE4uzs3lEdduiKJMsoNJiF07f0rTZbmFa7NwPOJhWcQzsr4YqZlmYaTmF6GCwohJ0wQUy7ZcN+14aQVEu7rhNhnqvJFG2sSesNk5BWVxU2PqqBItUyV5YF
 VIryvmSsZGoei9jwPoa+5czrupD8B2Nl2g7w7jPD/Fzu0/4cv/k7xv785z9/MN+TYeSrdUDusW6+qry3z2uYp9rwlZTn7xX5+4YFAnuAPEsdkDUm1xkRfKF1
 lP1nfZ61HtdsGgPRGJK9SbDIG9RoCHZ60hpQmGEGG3pIX5nrptux5cIx/7TZgHCvdotXJ537zk7qHBxjbl/3r22EzLDCvpI7bTIcu6UNYfctdZT5VTlCH+UQ
 8vtqUksoJHffYh6yAq+WbMQLs0Opl13di0J2DKfkLr/UaslihSEbMwSQLmGcmMIxsYcnnKttBBu0YdsvGWI/ELuuT4mUHV2yeVA5YWJspSpZLXkOLV0ZNJfN
 pC9HGK6U+OpC+xUiO43sRM8sAOa67Mf+vhtjGrPeO7IfKy//zFXZesOcVNKarIb5j6rLVyO8N8yDpXz7a7oUASNFK6JR/WREPj8Cm3G0mVl5JzGQdfWBRIeU
 c8D5Po4P55PIqUUkTvTYHVxCK4XPhRmm+EXUle03o4qiI6dWn9M0NgOV6yJbcBPSS/GfxEax4rxj6xQ+u5lYnHrqqSVsUyXDXDS1TS5wglpoC+XaDGMWAsnn
 wQHIyS1yh02/LwkfzfDoGKTlCNK+9+Z16s/PJVqL/2BRSB/HQlH8RMbBaikWlGavvfZqYvNpCVY4I3x+p0Xor9M39YOy2ev5BdjU2eyFnPLp8WHwFcTepRJs
 cnJEWyHh5GFGXW1VmsifVcKLhb7y1WwmXSXmE78SH4RAgBDImkhCOHY+8eNwdvNHid586lOe0twoIqf4pX4ZUVWRzLD4HrQJ5o7AdVy2kHbjeR4Ui3/pG2UJ
 WOnSK2KtMB8FKlx46Ohd/RwpWJpgcE2kb+netjDv54PQhOYImRNzHpLRqkNgJxQ7158sQkL0dFhsIGpClSzl67Q4f2JVjm+RGpyOBh+HWMaqY0SO0DSQ7b3Y
 TAainZx6HPpdR+IkUEWpIEz0wAMPLAu/vQyhTTW/DuxEsFnQRLT0JQu+QAtlWuzUx8LYh1HnMywKue9EHy4K2feDRNXEprPe1eq2P3HwXXjeCqOwz+bK4fy+
 4FlRPcMFG3sWV87fU2O/Q6GzBJ9Rztvh+0d9hq85jIGk83rUdRvxnfFm/CZxjFtgw8xWGC4mCS8OcJFTBLZ0YId5tblVMGVkbr/6Na9pItFh+cwhLlxfcMNa
 yTMxDcyIsHVERH4hGHYZCDxfHBF3BAWMcJgwD8JA7ESfGzMbfsY8Pg9Utyhsxfu1OtGjIws5zYt5Y5zzKq/brFdmJmaQ6MjirIIDpy4zAZpmcsrfj+2cUTBq
 x2kevckeu5kksZw2sq32tWlT+dOM0R0nTE8323PPcn4Bmz/Hel+S2kVZTJxSW89CTGvMaMphclgUkuJCnexYFmDQl3IsBWNtg7mXMgQL9CXBBcZt1+fI5Kgu
 IczMlLSRzV7iQGXY0b6ZZId4jkO+1TRNqZu/YLrlNT97lWVBJudhyuCWWORX5aMaLqf72dxK5zvcpTLK+g5nopBXz7ozKiGsMcOpLnHksM+k+7yNeL/uJiyb
 Y+yXwOlTaoqOK0TqEQNNarBxSUjdvIg0QmpdK9m0RTogNSC7nW2yE66HhttUvuz88zvN4imhHovxFmoXzKKE3wX3KVeePTSSsK2W9+LN3xeaTtnAGNKSEb9R
 RDNKU49698WPCY40xVSk/sKzmRC096MRGulPeUJ7E8cQBce2zXONCa9MnMJFmUpWS4cffvhAKoY7idS4iIm12qLmdv3OMebhgpiUaG/GWB/KsWYvSO5hsgej
 L5Ggh6XoSJVSNsCpkz07NmgaB33Jfh9zF7mPuVefbTQZK3Z8W2doRRERVeplDMAZDfe7MRtpjMqGvzIujYsYp0x+siggmxKN6dRUlJH9UC4Y/jc0rl3rj7lM
 vzHpqqvwcmuCcmnnzNkftsbEtV8K7UN4sI2I6hLC56Du1sw4UKqY1FkumI+zbsNVMdbDV1Y2R06s8/CNc/y8A8eOsst3fTUQnDTvmfQ6Kg/QajlogF1OZCMJ
 7R3Or2kJ1fqWn9JITL6SVK3vfXldmHV6YTAJn43+Lcw/M0eF6QeamyM/u2Gns7YhFoT29SG9rYbeETmaRAbN+syNuk+IZt9w+N8FruaJvlE/53f0OeRoEm4k
 WOeDZHuFqU47WCrLE1iSUYR5f339w/q4KDgYK0Lh14smaSC8wgbWSKI5sNMNSzTDF7P5cfbhkuO4oGvC1NG8813vGnvNcLnDnzkmOcwiuVsTWTOLT0EuJA6v
 mKTF5mgz2yz02Mc+tokDYko72JmjM3rXk08jzAzFj8IRZ8PYOAmti49nbDSRjKQKCdW6tNFu9NVIuKPqGyGrJXDAJjXSFE2AI5I2khrYqPtg8duQ1vidON9J
 XmzBnJ76UznDBDOSOX+SHet8LjQ7Ura+3wxMh+vos7bxg8jDhkiaNvKxd49qFwd4nNHRvCzG4Puib5Dd9fpnHg5UAQsyKZhDiLYXEXMlp9woH43xy9EsrYb5
 hmQr4NMsGG/A2KW1nyt8XL+LdSUYa6lDANtMXLT+cNVs/9fapqjbOBpV51Hfde+f9rtr9dOuu+5atDH+tvUg62Hsjxlb9EAyiStWvO+rgcSAmkpCPtkiv7+K
 szS6hQrFk4mXxOss9EiwV2zeJFASmzz/9iLMStIsx6QfaDShGvYqKibk4MAY4YUx6Xrdt5kXheloEAoZKnapSt/2Tqq3voEhm69T1vqS/SVp7zcGhZtGkEG7
 zz77lH0hNj3ybxhDtM4M43StkMtFO/Qo222DmL0yOa/4emySDLNE0aJtunzcYx/bkvByz1FeywfnJLt5UjiWSzbXfIZX537wWz3ykY9sDwqbPU3aplm2dz6T
 vNbGutwLNc86TSpL+GwEsQzm5KRr62/rh8AkDSTGx0qm0f08TwYiLbKyV3N+cReSyBFUFg/fWUwiZ1IZ6ExPHLB3DZMWVX1W4shSv0MPPbQU0WdBxcQsdO7j
 DFMnm4bC1rzir09Zs9Z7lvt+H3W00VG9xdLbVzAPssAQEpRrQ6H9NBbRaX8w48y3cEVocLlfGZP+QpMqxxkbF11SFvNaaMPdrzftvU1qmHTfdmGO9tKs12LN
 HGafUkitE/FN7ENbau2iJ3RsJAkEYPpTD5sgpVivtDkITGIgE7Wkvias6OSpJNeR0NXYuFbyS029YeiCgK7hUJKLh2rN6cUxxVQgVJYZ64hIenjOs74bun3q
 R/l1rnWta5W4eWYyz1P2OOJUtD9Ehs8k+xHcl/cKMfU+jhAtSQKHUzTnfZvxGoyyOBc9WyJJ8fBrJSYbIdFCc40dzkkqNgymkX0kwp7lLHpvhF9+KPL/MFUx
 UclRxPTGJCZcmDlIOn+BCsYVE1jmkGJOY0qFPdNMLJSNfRhxklsJqZ1Wj/X63XhhjhI2Gsyh7AuAjbHLRMysyIwlmGBSZtd51c94t6ckTtQrQSPMapmynEnQ
 mBVyKjyWSXYjidM4fDYlHD52gBeHsr5nDpyUi8s4sU/F2AuNdpCAcyPrvhWfNcmEtWEMxEaoG8dAZKdm62XvXg3ZsGavgIgQi51BbSObQYOZ2K8hIWKYOVZT
 7OBak1mZ6ieSZ1L9+A/EeJ8aG4DQ7W5727IJiP0fWewsmga8BdAZD+Lyrxgx6otC4sstwHxcos4sXJL8rYVkDJUlOTSusvjoH++nMRB4yUJr4cKQLRYInr+J
 yCF+EtdYbP0hix1fiWfqO4zC4iI6B+MXCSM6UB9ZLPmm+E3m4U8oFVjDP/4e4wI2Fm5JDPkjbKrEaPg+NoIwerhIIsr/p04IM8/EihtRj+4z4rzyIigaM2+J
 TMsXjwgj9dPfor8IKKNIf4u2eu1rX1vmrrFAqJTBluAqotL8I4CY45X6IzCJgShlrCo7TxNWDIiSR0mMdEiWPq6KpCyg0qYPRXQIc0WSXDWOg1wLSbgmfQTb
 6yjiY3EMaMZ5s2k7E3nYXKJuYr5j0Su5moZz74wqezO+k7BQG4wBPqS+e0JG1RVm4WQtZckdJkXDakgqDvWIBaDcNs3sF+Go5Xo+BmZR5kNms/3iaFj7Jphd
 pGlxnVdly+m0qHT38IPwe0xLuDjv+sfi0Ia2UdLRzLvs1Zan75mCQxgoUUX2fEgBwiz6rgkpWEKAKL4yfSz3nv5m2uZX40/zvT9mRCmL7EtjxnPSYgifJV/d
 auvq+ti4WtayCMEtJxiqx1akSSaswHXjGIi8PZ43y9ngFunYLdqG1FkWiL3Dmcr5J5su2nfffdecdC7PI+5u7PFc+Xc488LENRiMBvVhI843xhwzJPgRES4Z
 Ut1Cj6nMyqtfQmoreb+6jLlP5W16ciyrMjjRZ8lJxTYvHBgT4kOaRmHOLM/TDxacSeM4mWRI2xMz/E575nr9buGRe8lG02mMc951yE2lIfnPu+je5Wlz5uvi
 rOdbJFBgJqE1tJGCfWJZOYb59WTTleuNH8zc40sRpBH7M4pwQQjtBmEYGxio9cTaYk4TALtkPhBg+dveGPWSkZcwYjyFtjwYe9qwFWlhGAhHnKgpHTULha2+
 SGnHHHNMOfA+zBNlQbDgGDwyV66FSBK0BpEyGJLEh6JRMhFaLlKiwEZl1Q0Vu2SnJfWsVRtaSztWcy8Gycmr3dk+k+nwww8vSfnGMROLHvxfGhOT5Jz3mvSk
 ulnIJFdOnzO4pcfGPJx1b1Gwa9dkDhNX+ROdRQLVloMPPrhkRhYvL/swYYCDf1HIngzMe1T2gvWuI+bP0kBj2wyyWAsa0O8EL5kF9I/PmMm0DLhxRka5VnYH
 zOOUSCSJGbpPWYjlgICCjFsMxv4xgT1PfvKTW/cawzkHwpxXkk4aNyIBrQcwyjHu1XiTMVngRxxQVQI6fP/617++PGcr/VsYBmKxsviK/Bnm8n0ANyDcL7xR
 p4X9u9wmT/6tb33rHfLs9ymze43JlBlQu4PFe0whfAUl1DG1iq60KNWHBYz5ZjUhrN3nb+Z7Ep+UDt12W6BtZLORjFmA+c6EC19TGxkFBhFX7qF50ATgNIq5
 9mmbiS0tCWFgHOPKcmwkVV/XGVfhwC+MS1i1PyGrFo7sIxqL8OJrhNSovk996h/Cl7O8zXy12GFuwqA3g5gB9XXfzY7zqqN5lJuQMTCpjtLcyBRF6p9EaTFw
 rU2thB5CZPiRijXic5/9bLndusH6MSmyTaSgcdcd/97TVswB9aNhYE6eMWzmtnYwj1kDbDnYSrQwDASoYs2pjbPa218QOfNJA3E6YDl8R5mxSbElka6VMDVS
 DynW4hQRQaXcAyM2Xsw+DWWYSLLqYrBhbhauZSWTQP/kORbDk2ncZ9KYvRiYqGvWEnIJa2VE2omJMGIgzBsmfh9i98acMBUTQp37njvSp/y1XMPEos0OMNsM
 ovF5PnPWRlJqnIQSzCOZib5yRMAkIulbR5j+MD7zkKkrIiiLYBmBOuV2AgStg1A3vOh3y6c1y4ZA23CsgPFsT5ecZH0pIgHLGDPOCDFbhRaKgViEDdZ0lq4W
 ZKqok/1S+3A/57fT3+ZBUj4wf3DKkmT4WDhpR50GxiQXESKlPerAFLEVyGTGlNmGmRyZiKj4zCy0DJIW/5OJT+Paa6+9SrNzIVqLBMYEAf9pCQQNanXp63Sm
 RZnYNBbCAEkRw1wEkniRdhT5kTalOk72w4z7aH7zqiDtwx4qfUiqZ0GwLsRRvlOtE8aZ/SnGofHSJUxi2IfG2U0DGf6+e58x4fnDggtGhhlgTvwpmNEkslYQ
 QLVtmgY1qZxF+m0SA+mfUS3QnQdFp5f4/GOOOWamkFthmkJ2hesKhxTeK5GjNBrzICF/Yc4pYYzKta8gpN0d0rmEo73UwT4ECd6kQpm0b2QedduoMoRw2gcQ
 0nrZtxO24JIYLjS0EiIpzNMeHCTkWdI4FNJeeY1JVl5n+RcLStkDIJmcvR36YxQZB8J8w3w16ucdvjNO1Dns3CWdR2ixzQWjjM0mWIVQVcJTr3pW8s5unWDq
 Gn/aGgyw/Hmv/d0//ROLZEns6fVXkeDz19Fubc8/13ivz2IRL68+G+exGBdsxoXKduu11vdSEOnf0CJKKp1g6GVehwlrYtH20USkXQk9l87IeOmSvUj22Rij
 xo5QcqHfUrBYc6RjCUGie0vBS2JDCS/hIqQdrsKc4Ww/kT1efjd+JlGY00q6JeuTbQXCzI3VrUobzkDka7Hfwl4QA3gWcB0AJaZfnh4D0GIjI+U8yADDCGTq
 tJFKjiUDsEsOf5Gj36APyaZc0/19K7x/5StfWSZLOHYLxhbcURSO6oKBjLMmnuviiNrBpSZgmJnKXhB4hY9k8Nu4N/b56FObHZ3RMIpsvrMY+utDxpoxoo7y
 soW5ZOyZGn3Km9c1oQWVzW8Ytn0MFrCy+MfZJs438d5fMopsczISjCWZTEitY6tl4cNA7TuxyXL4z34Qf/YEbQRZ+DECm0VtCiWkdcfNqDqEFlD2GWEO9sxY
 1LsEo7AKNHGUdDlHg3BjPtsTIisvwUgG6VNib9AN9thjIAR9ODat2ktmThsjGCqs4OHe4fnffab3cE/m7j6MAyMLzbKJEwjLJmLr1FakDWcgQNTBUh+Hv2Cm
 FN7KwERIDfMmA03KatJYmGhK8QYIpmKASohH27FbGBObNujnXb+NKM8kJDlhnsOTdPj5do9Lk20BsPhJhkhiC7W/pKz+YWxYhBuCa/geyi5hkxmmtMg4yrNs
 xJQJwMKmb5VHE7QT2WIzTHb1u5eWNCoZYPd6iy4J2yKhXExPvReBMFWLnLkQppmycMHAQu/P4oUp53vX+ztvfH/eeO1+71p/ySi85ntl5mfvMXKLmj7xisFs
 pAatTeH07t0FNpiGs78s1ua9cUQIxTwvEmPparHYa8t5AjP9jRHamGpsapvrtRmjthmZECG5JEwIhLG3q1gRzP9xhFlj6sq3sVGfySDgM4ZuHFrbCMkOryIc
 2O2PCYWJbFyxS/39pjAQO6DRR2Jnqaygi0S7xABz4puBkWRiRSRHE2mwyyQnOcUBNUU9zmu20ivTgF3cdv5PW1SYJCMcspghSPmkNvdblOz8lX3Agqcck47p
 wgSmlSBmJKfrXSImd/dZkdyvZEa2s5gJbZjsLDZpLSwYzyQykZkanccQoaJNnMNedilPumejfiPxyuYMO8zAgpaLOwy7mGxUnRbtObIlEEyMn7cfdVSxYGAA
 mWLltDiF8aPBTPbae+/mCiFsSGcSPpVi3YhQ23Iei3Ns4ArvywZz+UEIHhiCseHkQlrQOOZBmyAQEVbcQyAiVGJKTGOYIQaEgWBiCNNi2lY+oZPAc8ABBywa
 tHOpT3EeRUk7vAYw6xLaF5JqiXKKNOLRD4tHnEacipxvSMy40OOYzG0cdztwpIX0s3iVX2ONRDXZMMkJGKaTFaWFml4iWYRFclbGAj74HUZHve1tJeiAU7ZL
 kjfGpOt+Vd7HZNzhu/zCszhWOfKF9w6TaCph1UI4p/WDeH7je9hBOlxm/bx4CAi9DQ2tbCrMDcgh8beRJqgEt1hLEOd1jjFRgMNZsd0zapy86JBDytiwh8T9
 gmU+/vGPt8HUB5GinOdCdwWXRGqdkU5+ZYvYEuTTJXNFZGcIByUwpfvbsrxfKCc6lkcilZNIfiimhfXKY+9ZsxAnIqmQFiJxI4cYyYLdVV4YFJ0/S9ELf4/8
 XUw9ciORiJE++lRIdd8OSa/rjCV1kdqYhJgMmAckBPxknFZIWssgB/ZhxHQiFxFNgFrPlt0l0p0/kjdpUX4oeNNo2Ke7FPsWikbIVBCbyYpJitQ3TBIt+t35
 JBuRpHD4+fXz7AjQ+iUsNR4PD3NXhOqXwoyjCJkviSlDuChmLOPFH6KZ5DimnTI7mb+0OfM2tTqmpzfGyaOc4xFJWMascWvc+y41kjyZtBR+1j9rg/rRHM0P
 497z9w4tqOvXpaUwkdF8+UaC0RUTbbesZX6/U1R+7EpINaM+TjtQahYA+A+odBxY1MxFIo5WC44kbtRbDIUpZVxE0CLVfS11MblC2iiRVZyNGa3CJ2KS+Iz5
 h7RVTFwmNnMURmHhR8UOHXZ4QoFFPl/Z5qn+3wqV/lthEuAYdm2aaXJSY0qy7VL5XeMIZBM9NJwdGI7f999//8Jg9BXHuMSVyo0QytJ3Mg3z4wh28Ft3AVkL
 VvXe9UeArzHSlJQxJ8rR4sxfZFwhJi2OdWa/G0cW5/OFQJL9a6yJlDR2/BlHeV/WXHLGCF0uJijji8m1S0yknkEwMaaMe4yDOUv55sPFYpx7Lp+eiK8cx91y
 vFcX/jdmLuY3Qtey0KRkipvGQCw+BoNIJwuGzlkEMgBIFqQGJLts7HovkvMi1G8964Bp8EnFnpZywmCfZ5lI/CUkMRON45JmkdrLuDL4QDCA1E7cS7LEQLwm
 paDBz0IatUCgnKjKOOSQQ0p9Y0PZ4DfXYVqRiqLYoGk8w/eWi+u/hURAX/GXiQZkqbBoIxI97UN6fq8WfYv7OMpxaVzlmMlr+TQzyo+Wy0mPrEVCjAlH3hvj
 6uO9sb1b+D2uEZGGozTeLNsrBkRDIswQxJ2Xzj9jTMZGxe6lC/1+IRkIcDlpnZGwKMwje9HAIFGon2idSQM079kKryKeLMZhA16Y4AYSIA2QFiyiBnMxmS0G
 +Qp7MfscpadGiKYJzwTBwd+VKl2PhheS8mX9t1AIdPuW2ZTAeWzsl2ECIs0bF+YlgQczoUkQSM3bUdQtz+80ifBNFCHDAk+TdQ4QMpY8z+/MUYQcGrZ1Ic1k
 5cKz/imbEEVTF/7/+RDEPhsMD9MwHjExhIkRZAho0/a7nFX0QrwsJAOBjI5JqXEhkDqrEgaEzh5WeRepjvOui0kjfNaZIMx2i7LIMnHxYfB1qBfT4vBiMG8s
 anmLjYCoKOfHMDVjKhZqxA/Hl5H+LsKD8TOKDjvssOLbZJo+PPwrTLT2Mk3TnJlrMR/akefabMxPKtqqu5YxVfGhMJ/6IwQxy2IgzMDLRJMYyHjdbwNaSIIg
 KS4ylcVKBUPi3cpkMjLdYeqczenT2Ow2k/rsL0H8GcItF4W5bTY22/H55iNGYVHzxxTqEC7aqcAX+8veFI5xfhHaSaQ9auwOp43y6SYxUTOHRcqcsp9Ddgva
 Qu57Mg/MB9oJ0y6NBLPANHzPr4KYbK1hfCwEMGHs9oapI//fNDNX1mdZXzfNB7KsgG3VepOiDjrooGIaWESTImGDo9zO3spAtuoonN4uDCRp1DignTgSl99E
 6hImJUT63zuc8MYQLSPOFiqnikZG38J4bJq1P4jWwqGOUdhMnMILTYb2EDnhyr4jATWis1xP2+j67bJ+W+V1kgZSGchW6eVt0o5cQEYtHtsEgtrMngjQTmgO
 mEkk+Gw+EWavX5y1gdX4EYVlUzOTVCQMLWYoRdtsiDEIAaZNMD9hHjSN4dDznlVZ6ssmMZBNNWEtNaq18puCQGUcmwL7Uj6UryGOZCh/fGiSLAr7tTcIkxD9
 SSChQUibwwneNT9tl+CZtXRuZSBrQa/eWxGoCCwNAsxY/oTRJqVGKyR4HLmmCi6j0dlx6+7o6+q3FYGKQEVgyyHQhzH0uWbLAdOzQZWB9ASqXlYRqAhUBCoC
 KxGoDGQlHvVTRaAiUBGoCPREoDKQnkDVyyoCFYGKQEVgJQKVgazEo36qCFQEKgIVgZ4IVAbSE6h6WUWgIlARqAisRKAykJV41E8VgYpARaAi0BOBykB6AlUv
 qwhUBCoCFYGVCFQGshKP+qkiUBGoCFQEeiJQGUhPoOplFYGKQEWgIrASgcpAVuJRP1UEKgIVgYpATwQqA+kJVL2sIlARqAhUBFYiUBnISjzqp4pARaAiUBHo
 iUBlID2BqpdVBCoCFYGKwEoEKgNZiUf9VBGoCFQEKgI9EagMpCdQ9bKKQEWgIlARWIlAZSAr8aifKgIVgYpARaAnApWB9ASqXlYRqAhUBCoCKxGoDGQlHvVT
 RaAiUBGoCPREoDKQnkDVyyoCFYGKQEVgJQKVgazEo36qCFQEKgIVgZ4IVAbSE6h6WUWgIlARqAisRKAykJV41E8VgYpARaAi0BOBykB6AlUvqwhUBCoCFYGV
 CFQGshKP+qkiUBGoCFQEeiJQGUhPoOplFYGKQEWgIrASgcpAVuJRP1UEKgIVgYpATwQqA+kJVL2sIlARqAhUBFYiUBnISjzqp4pARaAiUBHoicBEBtK2bfO7
 3/2uZ1H1sopARaAiUBHYaghM4gETGchvfvOb5sc//vFWw6O2pyJQEagIVAR6IHDmmWc2P/3pT8deOZGB/Pa3v22+/vWvj725/lARqAhUBCoCWxeB008/vfnG
 N74xtoETGYi7TjjhhLE31x8qAhWBikBFYOsi8JWvfKU57bTTxjZwKgP5wAc+0FBjKlUEKgIVgYrA9kLgmGOOaX7/+9+PbfRUBvLJT36y+exnPzu2gPpDRaAi
 UBGoCGw9BDCOt771rRMbNpWBcKS/6lWvmlhI/bEiUBGoCFQEthYCH/zgB5tPfOITExu1U/zaTrwifrzABS7QHH/88c21r33taZfW3ysCFYGKQEVgyRGgfdz+
 9rdvmLAm0VQNxM088U95ylOaM844Y1JZ9beKQEWgIlAR2AIIHHbYYVOZh2aePf4O8mYafelLX2rOd77zNTe/+c2nXVp/rwhUBCoCFYElReDEE09s7n//+ze/
 /vWvp7agNwNR0kc+8pHm6le/ejVlTYW1XlARqAhUBJYPAfv+7nWve03c+9FtVS8TVt7Aof6ABzygeeMb35hf1deKQEWgIlAR2AIInHzyyc3d7na35vOf/3zv
 1qxKA1GqvCjvfOc7i3qz5557Nuc85zl7P6xeWBGoCFQEKgKLh8Db3/72Zt99922+/OUvr6pyvaKwxpV405vetHn605/e3OlOd2p22klRlSoCFYGKQEVgWRCg
 dTzvec9rjjjiiJmCpNbEQBKk29zmNs1+++3X3Pa2t2122WWX/Lq+VgQqAhWBisCCISCqVooqroi3ve1tE5MlTqv6XBhIPuTiF794s/vuuze77bZbc6UrXam5
 yEUu0pztbKtys2RR9bUiUBGoCFQE5oTAL3/5y+ab3/xmc9JJJzWf+cxnmq9+9atzKXmuDGQuNaqFVAQqAhWBisBSIFDVg6XoplrJikBFoCKweAhUBrJ4fVJr
 VBGoCFQElgKBykCWoptqJSsCFYGKwOIhUBnI4vVJrVFFoCJQEVgKBCoDWYpuqpWsCFQEKgKLh0BlIIvXJ7VGFYGKQEVgKRDYOWr5q6Woaa1kRaAiUBGoCCwU
 Av8fgwPy24mbuF8AAAAASUVORK5CYII=
 `
 // imageEncodingOllamaHome is a 415x293 JPEG of the ollama.com homepage.
 // Shows a cartoon llama character with text "Start building with open models".
 const imageEncodingOllamaHome = `/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAA0JCgsKCA0LCgsODg0PEyAVExISEyccHhcgLikxMC4pLSwzOko+MzZGNywtQFdBRkxO
 UlNSMj5aYVpQYEpRUk//2wBDAQ4ODhMREyYVFSZPNS01T09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09P
 T09PT09PT0//wAARCAElAZ8DASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUF
 BAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVW
 V1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi
 4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAEC
 AxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVm
 Z2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq
 8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD06iiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiq
 2o39rpllLeXsqxQRDLMf5e5oAs0V5XffEXXL6WeXQdOC2dsNzu8ZchfVuwrufCOvDxFocd80YjlDFJUHQMPT270AbdFFFABRRRQA
 UUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUU
 AFFFFABRRRQAUUUUAFeUeI7u68b+L00PT5CLG2chnHTj7zn+Q/8Ar12XjTxLZ6LpNzD9pUX8sRWGIcsCRjJ9BXmPg/xPJ4djuDa6
 V9rnnI3SliMKO3A9eaAO/wDFyWHhfwDPYWSLGJlECDu5PUn1OM1V+HuoaVovhWFb7UbWGa4kaUo0oyAeBkduBXMXc2t/EfV44obc
 W1vbLyGJKR56knHJPpXT2fwr0mOMfa7y6mkxyVwg/Ac0AdpZ6lY3wzZ3kE//AFzkDfyq1XmupfDF7YfafD2oypcJyqSnBP0YdKzU
 +IWuabp1xpV/bltUjPlpM45X13DufQ96APQtf8U6T4fTF9PmYjKwxjc5/Dt+Nc7pnxP0291GO1ns5rZJWCrKzhgCemR2qn4V8Atd
 v/a/ikvNPMd4t3Y9+7n19qzfHsVrfeLNM0PSoIkaHCMIkAwWI449AM/jQB61RSKNqgegxS0AFFFFABRRRQAUUUUAFFFFABRRRQAU
 UUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFBIAyTgCsifxBbCYw2UU17KvUQLkD8e
 lAGvRSIxZFYqVJGSD2paACub8b+JB4c0fzItrXk5KQKegPdj7D/Cukry7xWn9s/FLT9LnOYItgK9iMbz+fSgCfwh4I/tEDW/Exe4
 luD5iQyE8g/xP/hXosFtb20Qit4I4kHRUUAD8qkAAAAGBXC6d4pvtL8XXGieIZ45Yp5M2064AXJ4U47duehoA7pUVc7VAzycDrS1
 FPc29uM3E8cQ9XcL/OiG5t7gZt54pR/sOG/lQBLXn/xH8OXt3dWesaNbNLdQnEojGWOOVOO+OlegUUAeXr8ULuCxuLfUNMMeoouI
 yMhd3+0p5HrV34ceHbgzSeJNWDNc3GTCH64PVz9e3tW94z8LW3iDTJGWNVv4lJhlA5J/un1BrK+F2tzXumTaXeMTPYkBN3XYeMfg
 ePyoA7qiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKK
 KACiimyIJInjJIDKRkdRmgDAmeTXrmWJZTDpduSJXBwZiOoz6ClsLq4uGWLQ7OGGwjbBmkBG/wBdoHX61dk0dP7DOl20rRJtC78Z
 JGcnP1qle+IbDSsWNvG0hiXZ8mAF9s+tAHK6/wCKdc1nxDJofhTKCElXmXGWI6nJ+6oPFVk8ReKvCGoxReJQ13Zyn7+Q3Hcqw7j0
 NSfCJkbUdXZv9aQpyeuMnP613+u6Pa65pcthdr8rjKt3RuzCgC1Z3UF7aRXVrIJIZVDIw7g15p468zQvH2na8ULQPtLY9V4Yf98k
 VF4X1u58FaxLoGvZW0LZSTshP8Q/2T+n51FbxXHxF8XySTM6aTaHgA9FzwB/tNjJNAHpd9Aut6G8VpevCl1GCk8J5APORXC63oWk
 eCdIW/hha+1SSQJBLcfMFfruC9OP8K9Gt4Ira3jggRY4o1Coq9FA6CuT+Jem3F74fS6tFLS2Mon2gZyuOfy60AYkfhCxAt7rxnqs
 0t/ek7IzLtXdjO3d/wDqFY+k6XpOrXwttEfUtK1VC/RvMiUr0y4wRmug8RahB4s8F295ZR/aHtpo5Lq3UZkUDhgO/wCPpVXQtK/t
 PxBJP4b/ALQ0jRgi+a24r5zjoFB/Xr+tAGx4V8SajHqz+HPEqhb9B+5m7TD+vHQ96u6z480PSpXt/Oe6uUO0xQLuwfQnpWN43eKb
 xx4dgsyDfRygvt6qm4EZ/JjS6h4avtE8XQa1oNot1b3MmLi3IB8sk8kE9B3z2+lAHdWdwt3Zw3Ko6LKgcK4wwyM4I9a828G4i+KO
 sxQcRHzuB0++K7/XdWg0XSJ7+5YARr8o7u3YD8a4j4U2E0smoa7cg7rhiiEj73OWP54FAHo9FFFABRRRQAUUUUAFFFFABRRRQAUU
 UUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFADJiwhcp94KcfXFcl4NgguJLuS4RZ
 JRj74zgHOf1rsK4u4EnhvxCZ1Um0nJOB3U9R9QeaAOdsivhL4pSwP+7s70kL2AV+V/JuK9WriviBoS+INCj1LTsSXNqpdCv/AC0T
 uPqOtWPh94mXXdJFvcv/AKdagLICeXXs3+PvQA34mWFnP4VuLueBWuLfHkydCuWA/L2pPhdaR2/hCKZQN9xI7ufXBwP0Fb/iHTf7
 Y0K80/IDTRkKT2bqP1ArB+G9lq2m6LNZarbGBYpj5O48kHr+Gen1oA6+ggEYNFFAHF6t4Bie+OoeH76TS7snJEedhP0HT+VVjonj
 9h5B8QWwj6eYBhv/AEHNd7RQBzHhfwdb6HO99c3D3uoyZ3Tyds9cf4109FFAHlfxYiu11ewmupXfTGGFjU42sD834kdDXpWlwWtv
 pltFYIEtljXygP7uMiuT+LMaN4TV2A3JcptP1Brd8Hu0nhLS2fkm2T+VAGzRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUU
 AFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAVWv7GDULVre4XKnoR1U+oqzRQBxSSaj4WuSki+dZ
 O3H90/T0PtXK69GNF1mPxP4bfEJfM8GMGJj1BH91v5/hXrsscc0bRyorowwVYZBrl9W8HxTK5sGChgQ0Mn3SPQHtQBpaV4k07UtB
 OrrMscEa5mDHmIjqD/nmrOjavY63Yi806XzItxU5UggjsRXiOv6VqXh2WSzJmitrz+DPD4OcH1we9e0eGNKi0bQLSyjXDKgaQ/3n
 PJNAGrRRRQAUUUUAFFFFAHmvxW1H7XNY6BaZknaQSOq9ieFH6k13+lWY0/SrWzHSCJY/yFcJ8U9GEKQeIrMmO5idUlZT1/ut9QeP
 yrtfD2o/2toNlfnAaeIFsf3uh/UGgDRooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooA
 KKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigDzD4rZGuaKW/1fP/oQzXpy4IBHTtXC/FnTHutBgvolJazky+OytwT+YFdH
 4T1P+1/DdleEEM0e18/3l4P8qANiiiigAooooArajfW+m2E17dvshhUsx/w968zOveL/ABldSJoKNZ2SHG5Ttx/vP6+wrqfibb3F
 x4On+zgt5ciSSAd0B5/ofwqD4d65pEvh610+KaKC6hXbJE5Clmzyw9c0Ac3efD3xPPaO0+sJcSEZ8lpnIb8TxWr8M9ddQ/hq/i8m
 4tN3l5GCQD8yn3BP5V6CzKqFmYBQMkk8AV5Xpk0er/F57zTObeMszyL0YBNpP4mgD1WiiigAooooAKKKKACiiigAooooAKKKKACi
 iigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKAGyIkiMkiK6MMFWGQR9KSKK
 OGJYoY1jjUYVVGAPoKfRQAUUUUAFFFFACMoZSrAEEYIPeuJ1r4aaTfyvPYSyWMrHO1BuTP07fga7eigDy9vhtrxUwHX1NueCpaTG
 P93pXaeF/DFj4aszFbZknkwZZmHL+3sPatyigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiii
 gAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAoo
 ooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK
 KKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKA
 CiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiii
 gAooooAKZPNHbwSTTOEjjUszHoAOSafWR4tt5rrwrqcNsC0rW7bQOp4zj8qAPPdQ8a+IvEeqNY+GIpIoudvlqPMYf3mY8KPy+tJN
 p/xIsIzdfabuXb8xVbhZSP8AgPf8Ki+F2vaZpVxeW2oSJbtc7DHM/C8Z+Unt1zXrcM0U8YkgkSRD0ZGBH5igDiPAvjmTWbn+zNWV
 EvcExyKNolx1BHZu/wCddJ4n16Pw7pQv5YHmXzFj2owB5zzz9Kz/APhB9LXxH/bkU11Hced52xWUJu78Yzg/XvXH/EvUtdke5sJ7
 DZpUcyGK48phuO3+9nB5J/KgD0Lw3rcfiDSU1CKF4VZ2XYxBPB9q1a8j8Cax4lt4LKysdM83THuQHn8hmwCw3fMDjiu+8W+J7bwz
 p6zSoZriUkQwg43EdST2AoA3qK8qi8W+OtRiN5Y6Xm2PK+XallI9iTk/hWz4P8ftq2oLper26W92xKo6AhWYfwkHoaAO8orkvH/i
 W+8N2tnLYJAzTOyt5qk8AA8YIrnLjx9r1/b28OhWHnzrCrXMkUDSAOR90DsB70Aej6neLp+mXV66F1t4mlKg4JwM4rF8JeLYPFBu
 hDaSQfZtmd7A53Z9PpWB4w1jxGmgW0cOn+ZDdaduvZDA37pivzd/lxz1ri/Bmq+INMN5/YGn/a/M2eb+5aTbjOOh46mgD3aq2oyP
 Fpt1JG210hdlI7EKafZPLLYwSXCbJnjVpFxjaxHIx9ai1b/kE3n/AFwf/wBBNAHC/DLXtV1i/vo9SvZLhY4lZQwHBJ9hXoteH+Bv
 ENv4cGpXUqGWZ4kSGEHBds/yrYu/Gfje2T7dPpYgtTyN9owQD3JOaAPWKK5nwZ4vg8TW8iNGIL2EZkiByCP7y+38qxvGXjPVND8S
 RafZx2zQvGjEyIS2SSD0I9KAO/oorgPHPjPVPD+vQ2VjHbNE8CyEyoSclmHYj0oA3/G76zH4ekbQBIbneu7yhlwnOdvv0/Wl8Evr
 L+HYm14SC63tt8wYcp23D16/pSeNdautC8Ptf2SxNKJEXEikjB+hFL4K1m617w+l9erEsrSOpEYIGAfcmgDkPAviLWNS8YTWd9fy
 TW6pKQjAYBBGOgr06vHfht/yPtx/1zm/9CFeg+MPFVt4ZskZk866mz5UOcZx1JPYfzoA6GuA+KGuapoz6aNMvHt/NEm/aB82NuOo
 9zWNaeM/G18DeWemCa2B6R2jMh9s5z+tZXjnxJD4jstLlEZguYDKk8JP3T8uCPY4P5UAenm41SfwNHc2DeZqUlijoxAyzlQSfTPX
 8azfh5L4jktLv/hIRcbA6+QbhcP33e+OlXY72XTfh5Be24UywaajqHGRkIOtU/h/4mv/ABJb3sl+kCmB0C+UpHUHrkn0oA6+ivNr
 r4hXlh4vubG9S3Gn280iMyxkyEAHAHOMk4FVb3xn4ynja+stHe3sfvK32Zn+X1LH+YGKAPU6K4nwN45OvznT9QijivApZGjyFkA6
 8Hoe9b3ifxDa+HNMN3cgu7HbFEpwXb+g9TQBsUV5Nb+N/GWqyvNpenK8KHlYrZnA9i3rVu++Jd5HpC7LSK21WKcJPBMjEbcH5gMg
 jkDg9KAPTqKyfCupT6v4cs9QuggmnUlggwvDEcflWpLv8pvKID4O0sOM9qAHUVwngjxlqOta5c6bqsVvG8cZZfKUg7lYAg5J9f0r
 b8ba9L4e0Bry2EbXDyLHGJBkZPJ4HsDQB0FFcl4A8U3PiS1u/tywrcW7rxECAVI44JPcGmfEDxXdeG47JLBYXmnLFhKpICjHoR3P
 6UAXvHL61H4eZtAEhuPMXf5Qy4TnO33zj8Kk8FvrD+HYW14OLrc2PMGH2dtw9f8A61Z+v+IdU0jwTaaqUt/t0vl+YrIdg3AkjGf6
 1DZeJNbv/AX9r2dpFPqJlKrFHEzAgNg8Zz096ALEfji3fxWdA+wyiQTmHzd4xkd8V1MzFYHZTghSR+VeBR3+rr4yN8lnnVftBf7P
 5Z+/zkbc5r17wxqGsajodzNrtn9kuFdlVPLKZXaOcE+pNAHL/DPxDq+r61dQ6lfSXEaW5dVYDg7gM8D3r0qvC/Auuw+H7y+vJUMs
 jW/lwxL1kcuuBW5f+M/G1ov2y50wW1sTwHtWCj0BJOaAPWKK5zwb4rh8TWTsYxDdwECWIHI56MPb+VdHQAUUUUAFFFZHiqbUrbw7
 d3Gjvtu4VDr8gbIB+YYPtmgDC8QfDjS9Vne5s5XsZ3JLbFDIx9dvb8DXJ3Hw88TaUxn0q7SUryPIlMT/AJHH863vBPj+O8SS18Q3
 kcdzvzFM4CIy+mRwCD6+tdnPrWlW8Jmm1G0SMDO4zL/jQB554O8canDrEejeIC0m+TyVkkXEkb5wA3qM8c81vfFf/kUB/wBfKfyN
 cNdTJ4o+JUc2lxnypbiMhsYJVAMv7cKTXc/FZSfB+QOlyhP60ASfC3/kTYf+u0n86t+L9P8ADMqRX3iVgqxgpGTKy574CqeTVD4V
 3EL+ElhWRDJHM4dc8jJyOK5D4nSNJ41hhvXdbVI49uOyE/MR79fyoA6pviZ4bto1ighvHRAFUJEAAB0AyRXCXGq22rfEW21KwieG
 Oa8gIVwAc5UE8epFer2ln4XstNWa3h0xLVVyJSEII9Sx615Tf6ha6n8R4LqxQLbNeQrHhdoIUqM498ZoA634xf8AIP0z/rq/8hXS
 eAbWG18Haf5KBTLH5rkdWYnqf5fhXN/GL/kH6Z/12f8AkK6rwV/yJ+lf9e60AT+Kv+RV1b/r0l/9BNcL8Gvvav8ASH/2eu78UKW8
 L6qqjJNpLx/wE15/8HbiGO51OB5FWSRY2RScFgN2cfmKAPVKqat/yCbz/rg//oJq3VTVv+QTef8AXB//AEE0AeS/CrTYL3xHJc3C
 BxaRb0BGfnJwD+HNexyIkkbRyKGRgQysMgj0NeH/AA912HQtfL3h22twnlSPjhDnKk+3H617Be6/pNlYteT6hb+SFyCsgYt7ADqa
 APK9Aj/sP4qfY7c4hFy8AH+wwOB/L8qk+J3/ACPFt/1wi/8AQjTPBizeIPiK+qFCsaSPcv8A7IOQo/Mj8jUvxYikg8U2t1j5Ht12
 ntlWOR+o/OgD2CvHvix/yN1r/wBeqf8AobV6bp/iDStQ0+O9hvrcRsoZg0gBT1DA9CK8f8eazb634qM9m2+CFFhR+z4JJI9sk0Ae
 hfFL/kTW/wCu8f8AWl+Fv/InRf8AXaT+dJ8Uv+RNb/rvH/Wl+Fv/ACJ0X/XaT+dAHHfDf/kfbj/rnN/6EKZ41B1f4mJp8jHy/Mht
 h7KcE/qxp/w2/wCR9uP+uc3/AKEKPiJBPo/jqHVkQlZTHPGexZMAj9B+dAHr0EMVvAkECLHFGoVFUYCgdBXkvxb02C11m1vYUCNd
 xt5mB1ZSOfrgj8q9K03xDpOpWCXdvfQBCuWDyBWT2YHpXk/xJ1+31vWYksW8y1tEKCUdHYnLEe3QUAegXv8AySs/9gpf/RYrD+Dn
 /Hnqn/XSP+TVuXv/ACSs/wDYKX/0WKw/g5/x56p/10j/AJNQBzwtIb74tSW1wgeJr9yynocZOD+Ve0dBXj1j/wAlkb/r+l/k1exd
 qAPGtPhSy+LoitwERb1wqjgAEHj9am+LNxJceJbSzB+SK3UqP9pmOT+gpsf/ACWM/wDX8f8A0Grfxd02WPULLVUU+W8fksw/hYEk
 fmCfyoA9M0uwg0vToLK2QLFCgUYHX1P1PWvPfjBp0CxWOpogWZnMLsP4hjIz9MH866vw74t0vV9Lime9ghuAg86KSQKVbv16j3rg
 vif4jtdWmt7DTpBNBasWklTlS54AB74Gfz9qAO7+H3/IkaZ/uN/6G1dHXOfD7/kSNM/3G/8AQ2ro6APJbhf+Ef8Ai/G4+WG5nDex
 Eowf/HifyrQ+JrvqfiDRtBhPLsGYD1Zto/IA0nxds2ifTdWh4ZGMLN6H7y/+zVF4YnHif4lz6wATBbQ7kyOh2hQPzLGgA8Mxr4d+
 KN7pSjZb3SsIl7YI3r+mRVbxoDrvxKs9KBykflxMPQH52P5H9Kv/ABJjOl+JdF1+MYCuFkI/2Gz+oJH4VW8BL/bfj7VNbOTHGXdC
 R0LnC/8AjoNAG/8AFUAeD8AYH2iP+tTfC/8A5Eu3/wCusn/oVRfFb/kUP+3mP+tSfC//AJEu3/66yf8AoVAHFW//ACWI/wDX+/8A
 I17Bcf8AHtJ/uH+VeO+bHa/F1pLh1jQX5yzHAGen8xXsMxDWshBBGw9PpQB498KLKG68UvLMgY21u0keR0bIGfyJr2G5t4ru2ltr
 hA8UqlHU9CCMGvJvg/8A8jDef9eh/wDQ1r16gDx34WlrfxlcQKx2mCRT74Yf4V7FXjvw2/5Hy4/65Tf+hCvYqACiiigAooooA4zX
 Phxo+qXD3Ns8ljM5y3lAFCfXaen4EVjx/CWMPmXWXZPRbcA/nur0uigDF8O+FtL8OxsLCJjM4w80hy7D09h7Cr+qadbatp01jepv
 gmXDAHBHcEe4PNW6KAPPrH4YQ2Or217Dq0hSCZZRG0IydpBxkH29K6XxN4W07xJAi3geOaP/AFc0f3l9vce1blFAHndp8KLGO4D3
 epTTxA58tYwmfqcmtS68AWE2vQanDcSQLA0RSBEG0BMYHr2rsKKAMDxX4Xh8TQW8U9zJAIGLAooOcjHetPSNPTStKtrCORpFt4wg
 ZhgnFXKKAEdFdGR1DKwwQehFee3/AMKrKa5aSx1KW2jY5EbRiTb7A5HFeh0UAQ2cH2Wygtg2/wAmNU3YxnAxmluoRc2ssBYqJUZC
 R2yMVLRQBxulfDvTLG3vLe4nlu4rpFUh1ClCDkMCOhrKb4TWpnJXV5hD2Uwgt+ecfpXo9FAGXoGg6f4fsvs2nxEBjl5GOXc+pNJ4
 h8P2HiKx+y36N8p3RyIcMh9R/hWrRQB5xH8JrUTgy6vM0WeVWEK355P8q0tU+HGmX0tsbe4ltI7eERKiKDnBJ3EnqSTXa0UAZPiT
 Q4vEGknT5p3hUur7kAJ4+tL4b0SPw/pK2EMzzKrs+5wAefpWrRQBynh/wRbaHrb6nFezSu6suxlAHzHPatrW9EsNdsTaajFvTOVZ
 ThkPqD2rRooA83/4VNa+fkavN5Ofu+SN355x+la2pfDrSruws7O2mltY7XecqAzSM2Mlie/y12VFAGZNo0cvhr+xDM4j+zC38zA3
 YC4zjpVLwn4Wg8MRXMcFzJOLhlJ3qBjGfT610FFAHKQ+CLaHxYdfF7MZTM0vlFRtyQeM9e9dX2oooA5RfBFsviv+3/ts3m+cZfK2
 jbnGMZ61Y8Ya1o2m2iWmu2001vdqQAse5TjHfIweQa6OqOr6TY61YtZ6hCJImOR2Kn1B7GgDgrP4feG9ZiW90vVbg20nzbAVYp7H
 IyD9ayvH0Gh6NpVpoejlWmWbzp2Dbm4Ugbj689O1a1z8J4/NJstYkjQ/wyQ7jj6gjP5VpaF8NNL025S5vp3v5EOVVlCJn1I5z+Jx
 QBueCbaSz8IaZDMpVxDuIPUbiW/rW5RRQBl+I9Eg8QaS+n3EjRhmVw6gEqQff8R+NU/CfhS28MR3K288k7XBUszqAQBnA4+proKK
 AMfxP4ft/EemCyuJGiCyCRXQAkEZHf2JqLwp4YtvDNrPDbzPMZnDs7gA8DAHH4/nW7RQBkeJtCi8RaV9gmneFfMV9yAE8Z9frT/D
 mix6BpCafDM8yIzNucAHk57VqUUAcj4o8BWHiC9N8tw9pdMAHZVDK+OASOOfxq/4V8NDw7pE1h9rNz5shkL7NuMqBjGT6Vv0UAct
 4V8FW3hq/lu4LyadpYvLKuoAHIOePpXU0UUAcp4f8EW2ha0+pxXs0rurLsZQB8xz2rq6KKACiiigAooooAKKKKACiiigAooooAKK
 hvLuCxtJbq6kEcMKl3Y9gK80uviHrmrX7W/hrTMoM4JjMkhHqQOFoA9Rory23+IWvaRfLB4l0zCN1xGY3A9Rng/55r0qyvYL+xiv
 LSQSQzJvRh3FAFiivLdG+J10Zbp9Yit/KihLRJCpVpJNwAXJJ4wSfwqK78ZeNljN+NK8iz6jNqxUD1JPP48UAer0VyXgrxrD4kD2
 1xEtvfRruKKcrIvqv+Fa/iTxBaeHdNN3d5dmO2KJTzI3p7D1NAGtRXlUPjDxtq+660rSx9mB48u3Lj6bieT9K1/CvxAe+1FdK122
 W1u2bYjgFQW/usp5U0Ad9RXN+Otdu/D2hpe2KxNI06xkSqSMEE9iPSuWi+Ier32m29vpenLd6q4ZpvLiYpENxAwM8nGD1xQB6bRX
 kqfELxLpOoLHrlguw8tE8JifHqp//XXqWn3sGpWEF7atuhnQOh9j6+9AFiiivOdT8f3um+MptOuFtl0+GYK7eWS+3GT36/hQB6NR
 XleoeNfF8kbahaaS1tp33lZrdnG31LH+YwK0rD4mwSaBLPdWw/tKNhGlvGTiUnoR3A4569vWgD0KivJr7xr41sdt5eaatvbMeBJa
 sF+mSc/rXeeEfEsPiXSzcJH5U8TbJos52nsQfQ0AbtFcn4y8bW/hsrawRC5vnXdsJwqD1Y/0rlB4t8dm3+3jS/8ARsbs/ZG249eu
 ce9AHq9Fcj4N8cW/iNzaXEQtr5VLBAcrIB1K+/tWz4j1608PaW17d5Y52xxqeZG9P/r0Aatcl8SdUvtJ8PQ3GnXLQStcqhZQORtY
 45+grkoPHPjDVpnk0nTleJDysVuZAPYt6/lVfxZ4sOu+GBY39sbTU7a6QyREEBhtbkA8jqOD60AeheBL661LwlZ3d9M008hfc7Yy
 cOQOnsK6CuW+Gv8AyI9h9ZP/AEY1Y3iX4hTQ6k2leHLVbq4VijSFS4Ldwqjr9aAPQqD0ryuTxn4z0YpPrGlg27HB8yAp+G4dD9a9
 B8P65aeINLS+syQD8ro33o27g0AcKviLWD8T/wCzDfyfYvtZTycDG3HTpmvTR0rx5P8Aksf/AG/H/wBBr0Lxb4nt/DOnJNJGZriY
 lYYgcbiOpJ7AUAb9FeXQ+JvH2owfbrLS0+zNyuy3yGHtk5P4VueDfHX9t3h0zU7dba/AO3bkK5HUYPII9PrQB2tFFFABRRRQAUUU
 UAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAcH8XLuSHw9bWqHAuLj5/cKM4/PH5Vo/DbTobLwjbSoo826zLI2OTyQB+AH86rfFLTJ
 b7wwLiBSzWcolYAZ+TBBP4cH8Kr/AA08SWU2gxaVcXEcV1a5VVdgvmITkEZ64zjHtQBqfEXT4b7whdvIgMlsPOjbHKkHn8xmsf4R
 3ckug3tq7ZW3mynsGHT8wfzqf4k+JLK30CbTILiOW7ugE2IwbYucknHTpj8ad8LNMlsvDMl1MpVryTegIx8gGAfx5oA4f4babDqP
 i5PtCB0to2n2sMgkEAfqc/hXtxAKkEZB6g14N4J1qPQvE0d3cZ+zuGimYDO1T3/AgV7TLrukw2RvH1K1Fvt3bxKCCPbHX6UAeUPC
 nh/4sRw2g8uIXiBVHQLIBkfTDEVN8VrszeKoLWVm8i3hXhevzHJP1xj8qh0d38VfE5b6KNhELgXByPuxpjbn8lH41ofFaxmtNfs9
 XRN0UiKpJGQHQ5wfqP5GgDWtvib4ftLaO2t9PvkhiUKihEwAP+BVxnjbX9O17VLfUNMgngmVNsrSBQWIPyngnn/61esaNfeH9YsI
 7q1Sy+ZQXQqgaM9wRWDrnjDQtN1NLCy0uDUZW4byQmAxOAucHJ+lAEPxHuDd/D/T7liCZpYZDj3jY1f+FlpDB4RjuEQCW5kdpG7n
 BKgfp+tV/iqMeDrcbBHi5j+QdF+VuK0Phn/yJFl/vSf+hmgDP+LdtFJ4ZhuGUeZFcqFbuAQcj9B+VXvhi7N4JtAxzteQD6bzVf4r
 /wDIoD/r5T+Rqb4X/wDIlW3/AF0k/wDQjQB1x6V4xqVtFd/FtredQ8T3qBlPQjA4r2c9K8euP+SyD/r+T+QoA9fZVZCjKCpGCCOC
 K8a+H9pA3xAZGjBW381owecEHA/LNez9q8f+Hv8AyUO5/wB2f/0KgD07xHDHceHNSilUMhtZOD6hSQfzFeffBtj9p1Vc8FIjj8Wr
 0XXf+QDqP/XrL/6Ca85+Dn/H3qv/AFzi/m1AGZoUS+IPifJJegSR+fJKVIyCEztH04X8q9nrxdpG8HfEt57pW+zGZmzjrFJnkeuM
 /pXrQ1jTDZ/bBqFr9n27vM81cYoA8m8TRJoHxLjnsgI1MsU4UDgbvvD6Hn86ufGC6d9asbPPyRW5kA92Yj+SiqU0p8ZfEmN7NWNt
 5qYbHSJMZY+mefzFavxg0+QXVjqaqTGUMDn+6QSw/PJ/KgD0XRNOh0rSLayt0CpFGAcfxHHJPuTXC/F/TYfsNnqioBMJfIdh1ZSC
 Rn6YP510/hfxRp2saPBIbqGO5RAs0TuFZWA5OD2PXNcR8UvEVpqIt9LsJVnSB/MmkQ5UNjAUHv1NAG54au3sPhI11EcSRQTlD6He
 2P1rivAviLSvDlzc3WoW1xNcSKEiaNVOwfxdSOvH5V3XhCyOpfCxbEHDTwzop9CXbH61y3w41Gw03UrzS9bjhiaVgEadR8jrkFST
 0z/SgDb1D4k+H9QsJ7O4sL5op0KMCid/+BVl/B+6ddWv7Pd8kkAlx7qwGf8Ax6u+1S78P6VYvd3a2Soq5UBEJc+gHc1n+DPENt4g
 e5ktNHFmkICmUFfmJ/h4A9M/lQBxKf8AJY/+34/+g1vfFfRb2+trPULSJ5ktgyyooyVBwQ2PTjn8KwU/5LH/ANvx/wDQa7fxZ4yH
 hi6t4ZdOknSdCyyLIFGQcEdPp+dAHP8Ah74mWEGn21nqlpNE8Max+ZCAykAYzjgj9a3tItvCOt6yda0xklv1bzWIkdWBxjJQ/wCF
 XTpvhnxHZLetZ2dxHKu7zQArD6kYINeX20EOm/Ey3t/D87SwJdoisrbvlON657gfMPwoA9vooooAKKKKACiiigAooooAKKKKACii
 igAooooAKKKKACiiigBGVXUqwBUjBBHBrhtX+GGlXtw01jcS2Jc5MaqHQH2HBH513VFAHB6T8L9Ks7hZr+5lvdpBEZUIhPuOSfzr
 ugqpFsRQqqMAAYAFOoIyCKAPFfhpY22p67f2V7EJYJbJwyn/AH059j710k3wntGuC0GqzRw54RogzD8cj+VbPhXwPD4b1SS+jv5L
 gyRGLa0YXGSDnr7V1tAGP4c8Nad4ctmisUYySY8yaQ5d/wDAe1XtS0601Wyks7+FZoJByp/mD2PvVqigDzm6+E9m8pa01WaKMnhZ
 Ig5H45FbnhvwHpWg3C3eXu7tfuySgAJ7qo6H35rqqKAMfxRoEXiPTEsZp3hVZRJuQAngEY5+tS+HtHj0HR4dOileVIixDsACcknt
 9a06KAMjxNoUXiLSxYTTvCvmCTcgBPGfX60/w5osegaRHp8MzzIjMwZwAeTntWpRQAVykngi2fxYNfN7MJfOEvlbRtyB0z1rq6KA
 CuV0LwTbaLrsmqxXs0ruHBRlAHzHPauqooAhvbcXdjcWrMVE0bRlh1GRjP61geE/CFv4YluZILuWc3CqCHUDGM+n1rpaKAMjxD4b
 03xFbrFqER3p/q5UOHT6H09jXG/8Klt/Nz/bEvl/3fIGfzz/AEr0migDH8O+GdM8OwNHYRkyP/rJpDl3/HsPYVf1GwtdTspLO+hW
 WCUYZT/Meh96s0UAec3Hwns2nLW2qzRxE/ceIOQPrkfyrUl+HWlf2ENMglmiJlWWSfAZ5CAQAewHzHgV2VFAGb4f0iPQtGh02KVp
 Uh3YdgATlie31rI8SeBdK1+c3TF7W7b70sQGH/3gev1rqaKAPObb4T2iTBrnVZpY8/dSIISPqSf5V3emabZ6TYpZ2EKwwp0A7n1J
 7n3q3RQByg8EWw8V/wBv/bZvN87zfK2jbnHTPWtrW9EsNdsTaajFvTOVYHDIfUHtWjRQB5vJ8J4PMPk6zMkZ/haEE4+oI/lXSeGf
 Bel+HXM8Iee7Ix50uMqO4UDgfzrpKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiig
 AooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooo
 oAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKK
 KKACiiigAooooAKKKKACiiigAooooAKKKKAP/9k=`
--- a/llama/patches/0035-CUDA-get_rows-q6_k-support.patch
+++ b/llama/patches/0035-CUDA-get_rows-q6_k-support.patch
@@ -0,0 +1,121 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Daniel Hiltgen <daniel@ollama.com>
 Date: Fri, 20 Mar 2026 18:50:38 -0700
 Subject: [PATCH] CUDA get_rows q6_k support
 ---
 ggml/src/ggml-cuda/getrows.cu   | 80 ++++++++++++++++++++++++++++++++-
 ggml/src/ggml-cuda/ggml-cuda.cu |  1 +
 2 files changed, 80 insertions(+), 1 deletion(-)
 diff --git a/ggml/src/ggml-cuda/getrows.cu b/ggml/src/ggml-cuda/getrows.cu
 index 2fab33243..dc5c4f57a 100644
 --- a/ggml/src/ggml-cuda/getrows.cu
 +++ b/ggml/src/ggml-cuda/getrows.cu
@@ -155,6 +155,81 @@ static void get_rows_cuda_float(
         s10, s11, s12/*, s13*/);
 }
 +// Specialized GET_ROWS kernel for Q6_K — the k_get_rows template doesn't work for K-quants
 +// because they lack the simple dequantize_kernel_t (float2) interface.
 +// Based on dequantize_block_q6_K from convert.cu with row-selection logic added.
 +template<typename dst_t>
 +static __global__ void k_get_rows_q6_K(
 +        const void * __restrict__ src0, const int32_t * __restrict__ src1, dst_t * __restrict__ dst,
 +        const int64_t ne00,
 +        const int64_t ne11, const int64_t ne12,
 +        const size_t s1, const size_t s2, const size_t s3,
 +        const size_t nb01, const size_t nb02, const size_t nb03,
 +        const size_t s10, const size_t s11, const size_t s12) {
 +
 +    const int64_t i10 = blockIdx.x;  // row index into src1
 +    const int64_t z   = blockIdx.z;
 +    const int64_t i11 = z / ne12;
 +    const int64_t i12 = z % ne12;
 +
 +    const int i01 = src1[i10*s10 + i11*s11 + i12*s12];
 +
 +    dst_t * dst_row = dst + i10*s1 + i11*s2 + i12*s3;
 +    const char * src0_row = (const char *)src0 + i01*nb01 + i11*nb02 + i12*nb03;
 +
 +    const int64_t nb = ne00 / QK_K;  // number of Q6_K blocks per row
 +
 +    // blockIdx.y iterates over Q6_K blocks within the row
 +    for (int64_t iblk = blockIdx.y; iblk < nb; iblk += gridDim.y) {
 +        const block_q6_K * x = (const block_q6_K *)src0_row + iblk;
 +
 +        // Same dequantization as dequantize_block_q6_K (assumes 64 threads)
 +        const int64_t tid = threadIdx.x;
 +        const int64_t ip  = tid / 32;   // 0 or 1
 +        const int64_t il  = tid - 32*ip; // 0..31
 +        const int64_t is  = 8*ip + il/16;
 +
 +        const int64_t y_offset = iblk * QK_K + 128*ip + il;
 +
 +        const float d = x->d;
 +        const uint8_t * ql = x->ql + 64*ip + il;
 +        const uint8_t   qh = x->qh[32*ip + il];
 +        const int8_t  * sc = x->scales + is;
 +
 +        if (y_offset + 0  < ne00) dst_row[y_offset +  0] = ggml_cuda_cast<dst_t>(d * sc[0] * ((int8_t)((ql[ 0] & 0xF) | (((qh >> 0) & 3) << 4)) - 32));
 +        if (y_offset + 32 < ne00) dst_row[y_offset + 32] = ggml_cuda_cast<dst_t>(d * sc[2] * ((int8_t)((ql[32] & 0xF) | (((qh >> 2) & 3) << 4)) - 32));
 +        if (y_offset + 64 < ne00) dst_row[y_offset + 64] = ggml_cuda_cast<dst_t>(d * sc[4] * ((int8_t)((ql[ 0]  >> 4) | (((qh >> 4) & 3) << 4)) - 32));
 +        if (y_offset + 96 < ne00) dst_row[y_offset + 96] = ggml_cuda_cast<dst_t>(d * sc[6] * ((int8_t)((ql[32]  >> 4) | (((qh >> 6) & 3) << 4)) - 32));
 +    }
 +}
 +
 +template<typename dst_t>
 +static void get_rows_cuda_q6_K(
 +        const void * src0_d, const int32_t * src1_d, dst_t * dst_d,
 +        const int64_t ne00, const size_t nb01, const size_t nb02, const size_t nb03,
 +        const int64_t ne10, const int64_t ne11, const int64_t ne12, const size_t nb10, const size_t nb11, const size_t nb12,
 +        const size_t nb1, const size_t nb2, const size_t nb3,
 +        cudaStream_t stream) {
 +    const int64_t nb_blocks = ne00 / QK_K;
 +    const dim3 block_dims(64, 1, 1);
 +    const dim3 block_nums(ne10, MIN(nb_blocks, (int64_t)UINT16_MAX), MIN(ne11*ne12, (int64_t)UINT16_MAX));
 +
 +    const size_t s1 = nb1 / sizeof(dst_t);
 +    const size_t s2 = nb2 / sizeof(dst_t);
 +    const size_t s3 = nb3 / sizeof(dst_t);
 +
 +    const size_t s10 = nb10 / sizeof(int32_t);
 +    const size_t s11 = nb11 / sizeof(int32_t);
 +    const size_t s12 = nb12 / sizeof(int32_t);
 +
 +    k_get_rows_q6_K<<<block_nums, block_dims, 0, stream>>>(
 +        src0_d, src1_d, dst_d,
 +        ne00, ne11, ne12,
 +        s1, s2, s3,
 +        nb01, nb02, nb03,
 +        s10, s11, s12);
 +}
 +
 template <typename dst_t>
 static void ggml_cuda_get_rows_switch_src0_type(
         const void * src0_d, const ggml_type src0_type, const int32_t * src1_d, dst_t * dst_d,
@@ -199,8 +274,11 @@ static void ggml_cuda_get_rows_switch_src0_type(
             get_rows_cuda_q<QK8_0, QR8_0, dequantize_q8_0>(src0_d, src1_d, dst_d,
                 ne00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb1, nb2, nb3, stream);
             break;
 +        case GGML_TYPE_Q6_K:
 +            get_rows_cuda_q6_K(src0_d, src1_d, dst_d,
 +                ne00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb1, nb2, nb3, stream);
 +            break;
         default:
 -            // TODO: k-quants
             GGML_ABORT("%s: unsupported src0 type: %s\n", __func__, ggml_type_name(src0_type));
             break;
     }
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
 index 5c9dfd032..b8ed3709b 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -4693,6 +4693,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
                     case GGML_TYPE_Q5_0:
                     case GGML_TYPE_Q5_1:
                     case GGML_TYPE_Q8_0:
 +                    case GGML_TYPE_Q6_K:
                         return true;
                     default:
                         return false;
--- a/middleware/anthropic.go
+++ b/middleware/anthropic.go
@@ -283,7 +283,7 @@ func (w *WebSearchAnthropicWriter) runWebSearchLoop(ctx context.Context, initial
 				Type:  "server_tool_use",
 				ID:    toolUseID,
 				Name:  "web_search",
-				Input: map[string]any{"query": query},
+				Input: queryArgs(query),
 			},
 			anthropic.ContentBlock{
 				Type:      "web_search_tool_result",
@@ -348,7 +348,7 @@ func (w *WebSearchAnthropicWriter) runWebSearchLoop(ctx context.Context, initial
 			Type:  "server_tool_use",
 			ID:    maxLoopToolUseID,
 			Name:  "web_search",
-			Input: map[string]any{"query": maxLoopQuery},
+			Input: queryArgs(maxLoopQuery),
 		},
 		anthropic.ContentBlock{
 			Type:      "web_search_tool_result",
@@ -786,7 +786,7 @@ func (w *WebSearchAnthropicWriter) webSearchErrorResponse(errorCode, query strin
 				Type:  "server_tool_use",
 				ID:    toolUseID,
 				Name:  "web_search",
-				Input: map[string]any{"query": query},
+				Input: queryArgs(query),
 			},
 			{
 				Type:      "web_search_tool_result",
@@ -942,6 +942,13 @@ func writeSSE(w http.ResponseWriter, eventType string, data any) error {
 	return nil
 }
 // queryArgs creates a ToolCallFunctionArguments with a single "query" key.
 func queryArgs(query string) api.ToolCallFunctionArguments {
 	args := api.NewToolCallFunctionArguments()
 	args.Set("query", query)
 	return args
 }
 // serverToolUseID derives a server tool use ID from a message ID
 func serverToolUseID(messageID string) string {
 	return "srvtoolu_" + strings.TrimPrefix(messageID, "msg_")
--- a/middleware/anthropic_test.go
+++ b/middleware/anthropic_test.go
@@ -1208,7 +1208,7 @@ func TestWebSearchStreamResponse(t *testing.T) {
 				Type:  "server_tool_use",
 				ID:    "srvtoolu_test123",
 				Name:  "web_search",
-				Input: map[string]any{"query": "test query"},
+				Input: queryArgs("test query"),
 			},
 			{
 				Type:      "web_search_tool_result",
@@ -1413,12 +1413,8 @@ func TestWebSearchSendError_NonStreaming(t *testing.T) {
 		t.Errorf("expected name 'web_search', got %q", result.Content[0].Name)
 	}
 	// Verify input contains the query
-	inputMap, ok := result.Content[0].Input.(map[string]any)
+	if q, ok := result.Content[0].Input.Get("query"); !ok || q != "test query" {
-	if !ok {
+		t.Errorf("expected query 'test query', got %v", q)
 		t.Fatalf("expected Input to be map, got %T", result.Content[0].Input)
 	}
 	if inputMap["query"] != "test query" {
 		t.Errorf("expected query 'test query', got %v", inputMap["query"])
 	}
 	// Block 1: web_search_tool_result with error
@@ -1561,12 +1557,8 @@ func TestWebSearchSendError_EmptyQuery(t *testing.T) {
 	}
 	// Verify the input has empty query
-	inputMap, ok := result.Content[0].Input.(map[string]any)
+	if q, ok := result.Content[0].Input.Get("query"); !ok || q != "" {
-	if !ok {
+		t.Errorf("expected empty query, got %v", q)
 		t.Fatalf("expected Input to be map, got %T", result.Content[0].Input)
 	}
 	if inputMap["query"] != "" {
 		t.Errorf("expected empty query, got %v", inputMap["query"])
 	}
 }
--- a/middleware/openai.go
+++ b/middleware/openai.go
@@ -678,3 +678,113 @@ func ImageEditsMiddleware() gin.HandlerFunc {
 		c.Next()
 	}
 }
 // TranscriptionWriter collects streamed chat responses and outputs a transcription response.
 type TranscriptionWriter struct {
 	BaseWriter
 	responseFormat string
 	text           strings.Builder
 }
 func (w *TranscriptionWriter) Write(data []byte) (int, error) {
 	code := w.ResponseWriter.Status()
 	if code != http.StatusOK {
 		return w.writeError(data)
 	}
 	var chatResponse api.ChatResponse
 	if err := json.Unmarshal(data, &chatResponse); err != nil {
 		return 0, err
 	}
 	w.text.WriteString(chatResponse.Message.Content)
 	if chatResponse.Done {
 		text := strings.TrimSpace(w.text.String())
 		if w.responseFormat == "text" {
 			w.ResponseWriter.Header().Set("Content-Type", "text/plain")
 			_, err := w.ResponseWriter.Write([]byte(text))
 			if err != nil {
 				return 0, err
 			}
 			return len(data), nil
 		}
 		w.ResponseWriter.Header().Set("Content-Type", "application/json")
 		resp := openai.TranscriptionResponse{Text: text}
 		if err := json.NewEncoder(w.ResponseWriter).Encode(resp); err != nil {
 			return 0, err
 		}
 	}
 	return len(data), nil
 }
 // TranscriptionMiddleware handles /v1/audio/transcriptions requests.
 // It accepts multipart/form-data with an audio file and converts it to a chat request.
 func TranscriptionMiddleware() gin.HandlerFunc {
 	return func(c *gin.Context) {
 		// Parse multipart form (limit 25MB).
 		if err := c.Request.ParseMultipartForm(25 << 20); err != nil {
 			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "failed to parse multipart form: "+err.Error()))
 			return
 		}
 		model := c.Request.FormValue("model")
 		if model == "" {
 			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "model is required"))
 			return
 		}
 		file, _, err := c.Request.FormFile("file")
 		if err != nil {
 			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "file is required: "+err.Error()))
 			return
 		}
 		defer file.Close()
 		audioData, err := io.ReadAll(file)
 		if err != nil {
 			c.AbortWithStatusJSON(http.StatusInternalServerError, openai.NewError(http.StatusInternalServerError, "failed to read audio file"))
 			return
 		}
 		if len(audioData) == 0 {
 			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, "audio file is empty"))
 			return
 		}
 		req := openai.TranscriptionRequest{
 			Model:          model,
 			AudioData:      audioData,
 			ResponseFormat: c.Request.FormValue("response_format"),
 			Language:       c.Request.FormValue("language"),
 			Prompt:         c.Request.FormValue("prompt"),
 		}
 		chatReq, err := openai.FromTranscriptionRequest(req)
 		if err != nil {
 			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, err.Error()))
 			return
 		}
 		var b bytes.Buffer
 		if err := json.NewEncoder(&b).Encode(chatReq); err != nil {
 			c.AbortWithStatusJSON(http.StatusInternalServerError, openai.NewError(http.StatusInternalServerError, err.Error()))
 			return
 		}
 		c.Request.Body = io.NopCloser(&b)
 		c.Request.ContentLength = int64(b.Len())
 		c.Request.Header.Set("Content-Type", "application/json")
 		w := &TranscriptionWriter{
 			BaseWriter:     BaseWriter{ResponseWriter: c.Writer},
 			responseFormat: req.ResponseFormat,
 		}
 		c.Writer = w
 		c.Next()
 	}
 }
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -137,6 +137,7 @@ type Tensor interface {
 	Bytes() []byte
 	Floats() []float32
 	BackendGet() []float32
 	FromBytes([]byte)
 	FromFloats([]float32)
@@ -162,6 +163,7 @@ type Tensor interface {
 	AvgPool2D(ctx Context, k, s int, p float32) Tensor
 	Conv2D(ctx Context, weight Tensor, s0, s1, p0, p1, d0, d1 int) Tensor
 	Conv3D(ctx Context, weight Tensor, c, s0, s1, s2, p0, p1, p2, d0, d1, d2 int) Tensor
 	Conv1DDW(ctx Context, weight Tensor, s, p, d int) Tensor
 	SSMConv(ctx Context, kernel Tensor) Tensor
 	SSMScan(ctx Context, x, dt, A, B, C, ids Tensor) Tensor
@@ -187,6 +189,9 @@ type Tensor interface {
 	Contiguous(ctx Context, shape ...int) Tensor
 	Pad(ctx Context, shape ...int) Tensor
 	// PadExt pads with independent left/right amounts per dimension.
 	// Arguments: lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3 for dims 0-3.
 	PadExt(ctx Context, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3 int) Tensor
 	Stack(ctx Context, dim int, s ...Tensor) Tensor
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -1069,6 +1069,21 @@ func (t *Tensor) Floats() (data []float32) {
 	return
 }
 func (t *Tensor) BackendGet() []float32 {
 	n := int(C.ggml_nelements(t.t))
 	if n == 0 {
 		return nil
 	}
 	if t.sync != nil {
 		t.sync()
 	}
 	data := make([]float32, n)
 	C.ggml_backend_tensor_get(t.t, unsafe.Pointer(&data[0]), 0, C.ggml_nbytes(t.t))
 	return data
 }
 func tensorSet[S ~[]E, E byte | float32 | int32](t *Tensor, s S) {
 	if len(s) == 0 {
 		return
@@ -1313,6 +1328,13 @@ func (t *Tensor) Pad(ctx ml.Context, shape ...int) ml.Tensor {
 	}
 }
 func (t *Tensor) PadExt(ctx ml.Context, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3 int) ml.Tensor {
 	return &Tensor{
 		b: t.b,
 		t: C.ggml_pad_ext(ctx.(*Context).ctx, t.t, C.int(lp0), C.int(rp0), C.int(lp1), C.int(rp1), C.int(lp2), C.int(rp2), C.int(lp3), C.int(rp3)),
 	}
 }
 // Permute permutes t according to order. Permute panics if the number of dimensions
 // in order does not match the number of dimensions in t.
 func (t *Tensor) Permute(ctx ml.Context, order ...int) ml.Tensor {
@@ -1660,6 +1682,13 @@ func (t *Tensor) Conv2D(ctx ml.Context, t2 ml.Tensor, s0, s1, p0, p1, d0, d1 int
 	}
 }
 func (t *Tensor) Conv1DDW(ctx ml.Context, weight ml.Tensor, s, p, d int) ml.Tensor {
 	return &Tensor{
 		b: t.b,
 		t: C.ggml_conv_1d_dw(ctx.(*Context).ctx, weight.(*Tensor).t, t.t, C.int(s), C.int(p), C.int(d)),
 	}
 }
 func (t *Tensor) Conv3D(ctx ml.Context, t2 ml.Tensor, c, s0, s1, s2, p0, p1, p2, d0, d1, d2 int) ml.Tensor {
 	var tt ml.Tensor = &Tensor{
 		b: t.b,
--- a/ml/backend/ggml/ggml/src/ggml-cuda/getrows.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/getrows.cu
@@ -155,6 +155,81 @@ static void get_rows_cuda_float(
        s10, s11, s12/*, s13*/);
 }
 // Specialized GET_ROWS kernel for Q6_K — the k_get_rows template doesn't work for K-quants
 // because they lack the simple dequantize_kernel_t (float2) interface.
 // Based on dequantize_block_q6_K from convert.cu with row-selection logic added.
 template<typename dst_t>
 static __global__ void k_get_rows_q6_K(
        const void * __restrict__ src0, const int32_t * __restrict__ src1, dst_t * __restrict__ dst,
        const int64_t ne00,
        const int64_t ne11, const int64_t ne12,
        const size_t s1, const size_t s2, const size_t s3,
        const size_t nb01, const size_t nb02, const size_t nb03,
        const size_t s10, const size_t s11, const size_t s12) {
    const int64_t i10 = blockIdx.x;  // row index into src1
    const int64_t z   = blockIdx.z;
    const int64_t i11 = z / ne12;
    const int64_t i12 = z % ne12;
    const int i01 = src1[i10*s10 + i11*s11 + i12*s12];
    dst_t * dst_row = dst + i10*s1 + i11*s2 + i12*s3;
    const char * src0_row = (const char *)src0 + i01*nb01 + i11*nb02 + i12*nb03;
    const int64_t nb = ne00 / QK_K;  // number of Q6_K blocks per row
    // blockIdx.y iterates over Q6_K blocks within the row
    for (int64_t iblk = blockIdx.y; iblk < nb; iblk += gridDim.y) {
        const block_q6_K * x = (const block_q6_K *)src0_row + iblk;
        // Same dequantization as dequantize_block_q6_K (assumes 64 threads)
        const int64_t tid = threadIdx.x;
        const int64_t ip  = tid / 32;   // 0 or 1
        const int64_t il  = tid - 32*ip; // 0..31
        const int64_t is  = 8*ip + il/16;
        const int64_t y_offset = iblk * QK_K + 128*ip + il;
        const float d = x->d;
        const uint8_t * ql = x->ql + 64*ip + il;
        const uint8_t   qh = x->qh[32*ip + il];
        const int8_t  * sc = x->scales + is;
        if (y_offset + 0  < ne00) dst_row[y_offset +  0] = ggml_cuda_cast<dst_t>(d * sc[0] * ((int8_t)((ql[ 0] & 0xF) | (((qh >> 0) & 3) << 4)) - 32));
        if (y_offset + 32 < ne00) dst_row[y_offset + 32] = ggml_cuda_cast<dst_t>(d * sc[2] * ((int8_t)((ql[32] & 0xF) | (((qh >> 2) & 3) << 4)) - 32));
        if (y_offset + 64 < ne00) dst_row[y_offset + 64] = ggml_cuda_cast<dst_t>(d * sc[4] * ((int8_t)((ql[ 0]  >> 4) | (((qh >> 4) & 3) << 4)) - 32));
        if (y_offset + 96 < ne00) dst_row[y_offset + 96] = ggml_cuda_cast<dst_t>(d * sc[6] * ((int8_t)((ql[32]  >> 4) | (((qh >> 6) & 3) << 4)) - 32));
    }
 }
 template<typename dst_t>
 static void get_rows_cuda_q6_K(
        const void * src0_d, const int32_t * src1_d, dst_t * dst_d,
        const int64_t ne00, const size_t nb01, const size_t nb02, const size_t nb03,
        const int64_t ne10, const int64_t ne11, const int64_t ne12, const size_t nb10, const size_t nb11, const size_t nb12,
        const size_t nb1, const size_t nb2, const size_t nb3,
        cudaStream_t stream) {
    const int64_t nb_blocks = ne00 / QK_K;
    const dim3 block_dims(64, 1, 1);
    const dim3 block_nums(ne10, MIN(nb_blocks, (int64_t)UINT16_MAX), MIN(ne11*ne12, (int64_t)UINT16_MAX));
    const size_t s1 = nb1 / sizeof(dst_t);
    const size_t s2 = nb2 / sizeof(dst_t);
    const size_t s3 = nb3 / sizeof(dst_t);
    const size_t s10 = nb10 / sizeof(int32_t);
    const size_t s11 = nb11 / sizeof(int32_t);
    const size_t s12 = nb12 / sizeof(int32_t);
    k_get_rows_q6_K<<<block_nums, block_dims, 0, stream>>>(
        src0_d, src1_d, dst_d,
        ne00, ne11, ne12,
        s1, s2, s3,
        nb01, nb02, nb03,
        s10, s11, s12);
 }
 template <typename dst_t>
 static void ggml_cuda_get_rows_switch_src0_type(
        const void * src0_d, const ggml_type src0_type, const int32_t * src1_d, dst_t * dst_d,
@@ -199,8 +274,11 @@ static void ggml_cuda_get_rows_switch_src0_type(
            get_rows_cuda_q<QK8_0, QR8_0, dequantize_q8_0>(src0_d, src1_d, dst_d,
                ne00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb1, nb2, nb3, stream);
            break;
        case GGML_TYPE_Q6_K:
            get_rows_cuda_q6_K(src0_d, src1_d, dst_d,
                ne00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb1, nb2, nb3, stream);
            break;
        default:
            // TODO: k-quants
            GGML_ABORT("%s: unsupported src0 type: %s\n", __func__, ggml_type_name(src0_type));
            break;
    }
--- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -4693,6 +4693,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
                    case GGML_TYPE_Q5_0:
                    case GGML_TYPE_Q5_1:
                    case GGML_TYPE_Q8_0:
                    case GGML_TYPE_Q6_K:
                        return true;
                    default:
                        return false;
--- a/model/model.go
+++ b/model/model.go
@@ -47,6 +47,12 @@ type Validator interface {
 	Validate() error
 }
 // PostLoader is an optional interface that models can implement to run
 // initialization steps after backend weights have been loaded.
 type PostLoader interface {
 	PostLoad() error
 }
 // MultimodalProcessor must be implemented by multimodal models.
 type MultimodalProcessor interface {
 	// EncodeMultimodal processes a single input (such as an image) and
--- a/model/model_test.go
+++ b/model/model_test.go
@@ -68,6 +68,8 @@ func (f *fakeTensor) Fill(ctx ml.Context, _ float32) ml.Tensor
 func (f *fakeTensor) Repeat4D(ctx ml.Context, _, _, _, _ int) ml.Tensor            { return f }
 func (f *fakeTensor) SolveTri(ctx ml.Context, _ ml.Tensor, _, _, _ bool) ml.Tensor { return f }
 func (f *fakeTensor) SSMScan(ctx ml.Context, _, _, _, _, _, _ ml.Tensor) ml.Tensor { return f }
 func (f *fakeTensor) Conv1DDW(ctx ml.Context, _ ml.Tensor, _, _, _ int) ml.Tensor { return f }
 func (f *fakeTensor) PadExt(ctx ml.Context, _, _, _, _, _, _, _, _ int) ml.Tensor  { return f }
 func (m *fakeBackend) Get(name string) ml.Tensor {
 	if slices.Contains(m.names, name) {
--- a/model/models/gemma4/model.go
+++ b/model/models/gemma4/model.go
@@ -0,0 +1,265 @@
 package gemma4
 import (
 	"bytes"
 	"fmt"
 	"image"
 	"log/slog"
 	"slices"
 	"time"
 	"github.com/ollama/ollama/fs"
 	"github.com/ollama/ollama/kvcache"
 	"github.com/ollama/ollama/ml"
 	"github.com/ollama/ollama/ml/nn"
 	"github.com/ollama/ollama/ml/nn/rope"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/model/input"
 	"github.com/ollama/ollama/tokenizer"
 )
 type Model struct {
 	model.Base
 	tokenizer.Tokenizer
 	*VisionModel `gguf:"v"`
 	*TextModel
 	*AudioModel `gguf:"a"`
 	*MultiModalProjector      `gguf:"mm"`
 	*AudioMultimodalProjector `gguf:"mm.a"`
 	ImageProcessor
 	imageTokenID    int32
 	imageEndTokenID int32
 	audioTokenID    int32
 	audioEndTokenID int32
 	audioOpts *AudioModelOptions
 }
 var _ model.MultimodalProcessor = (*Model)(nil)
 type MultiModalProjector struct {
 	Projection *ClippableLinear `gguf:"input_projection"`
 }
 func (p *MultiModalProjector) Forward(ctx ml.Context, visionOutputs ml.Tensor, eps float32) ml.Tensor {
 	visionOutputs = p.Projection.Forward(ctx, visionOutputs)
 	// Post-projection RMSNorm without learned weight
 	visionOutputs = visionOutputs.RMSNorm(ctx, nil, eps)
 	return visionOutputs
 }
 func New(c fs.Config) (model.Model, error) {
 	vocabulary := tokenizer.Vocabulary{
 		Values: c.Strings("tokenizer.ggml.tokens"),
 		Scores: c.Floats("tokenizer.ggml.scores"),
 		Types:  c.Ints("tokenizer.ggml.token_type"),
 		Merges: c.Strings("tokenizer.ggml.merges"),
 		AddBOS: c.Bool("tokenizer.ggml.add_bos_token", false),
 		BOS:    []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
 		AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
 		EOS: append(
 			[]int32{
 				int32(c.Uint("tokenizer.ggml.eos_token_id")),
 			},
 			c.Ints("tokenizer.ggml.eos_token_ids")...,
 		),
 	}
 	vocabulary.EOS = append(vocabulary.EOS, int32(c.Uint("tokenizer.ggml.eot_token_id", 106)))
 	// Gemma 4 uses BPE with SentencePiece-style ▁ space markers (not GPT-2 byte-level encoding).
 	// The tokenizer.json has merges and a Replace normalizer (space → ▁), with no pre-tokenizer.
 	t := tokenizer.NewBytePairEncodingWithOptions(&vocabulary, []string{},
 		tokenizer.WithSentencePieceNormalizer())
 	// Look up special token IDs for vision and audio
 	imageTokenID := int32(-1)
 	imageEndTokenID := int32(-1)
 	audioTokenID := int32(-1)
 	audioEndTokenID := int32(-1)
 	for i, tok := range vocabulary.Values {
 		switch tok {
 		case "<|image>":
 			imageTokenID = int32(i)
 		case "<image|>":
 			imageEndTokenID = int32(i)
 		case "<|audio>":
 			audioTokenID = int32(i)
 		case "<audio|>":
 			audioEndTokenID = int32(i)
 		}
 	}
 	slog.Info("gemma4: token IDs", "image", imageTokenID, "image_end", imageEndTokenID, "audio", audioTokenID, "audio_end", audioEndTokenID)
 	m := Model{
 		Tokenizer:                t,
 		TextModel:                newTextModel(c),
 		VisionModel:              newVisionModel(c),
 		AudioModel:               newAudioModel(c),
 		MultiModalProjector:      &MultiModalProjector{},
 		AudioMultimodalProjector: &AudioMultimodalProjector{},
 		ImageProcessor:           newImageProcessor(c),
 		imageTokenID:             imageTokenID,
 		imageEndTokenID:          imageEndTokenID,
 		audioTokenID:             audioTokenID,
 		audioEndTokenID:          audioEndTokenID,
 		audioOpts:                newAudioModelOptions(c),
 	}
 	slidingWindowLen := int32(c.Uint("attention.sliding_window"))
 	m.Cache = kvcache.NewWrapperCache(
 		kvcache.NewSWAMemCache(slidingWindowLen, 4096, m.Shift),
 		kvcache.NewCausalCache(m.Shift),
 	)
 	return &m, nil
 }
 func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) ([]input.Multimodal, error) {
 	// Audio input: detect WAV format and route to audio encoder.
 	if isAudioData(multimodalData) {
 		return m.encodeAudioMultimodal(ctx, multimodalData)
 	}
 	if len(m.VisionModel.Layers) == 0 {
 		return nil, model.ErrNoVisionModel
 	}
 	t0 := time.Now()
 	img, _, err := image.Decode(bytes.NewReader(multimodalData))
 	if err != nil {
 		return nil, err
 	}
 	slog.Info("vision: decode", "elapsed", time.Since(t0), "bounds", img.Bounds())
 	t1 := time.Now()
 	f32s, imgW, imgH, err := m.ImageProcessor.ProcessImage(img)
 	if err != nil {
 		return nil, err
 	}
 	slog.Info("vision: preprocess", "elapsed", time.Since(t1), "size", [2]int{imgW, imgH})
 	pixelValues := ctx.Input().FromFloats(f32s, imgW, imgH, m.ImageProcessor.numChannels)
 	slog.Info("vision: pixelValues", "shape", pixelValues.Shape(), "dim0", pixelValues.Dim(0), "dim1", pixelValues.Dim(1), "dim2", pixelValues.Dim(2))
 	numPatchesX := imgW / m.ImageProcessor.patchSize
 	numPatchesY := imgH / m.ImageProcessor.patchSize
 	slog.Info("vision: patches", "patchesX", numPatchesX, "patchesY", numPatchesY, "total", numPatchesX*numPatchesY, "patchSize", m.ImageProcessor.patchSize)
 	visionOutputs := m.VisionModel.Forward(ctx, pixelValues, numPatchesX, numPatchesY)
 	visionOutputs = visionPoolAndProject(ctx, visionOutputs, numPatchesX, numPatchesY, m.VisionModel.VisionModelOptions, m.MultiModalProjector, m.VisionModel.StdBias, m.VisionModel.StdScale)
 	slog.Info("vision: encoded", "elapsed", time.Since(t0), "shape", visionOutputs.Shape())
 	return []input.Multimodal{{Tensor: visionOutputs}}, nil
 }
 func (m *Model) PostLoad() error {
 	m.VisionModel.InitClamp(m.MultiModalProjector)
 	return nil
 }
 func (m *Model) encodeAudioMultimodal(ctx ml.Context, data []byte) ([]input.Multimodal, error) {
 	if m.AudioModel == nil || m.audioOpts == nil {
 		return nil, model.ErrNoVisionModel
 	}
 	t0 := time.Now()
 	samples, err := decodeWAV(data)
 	if err != nil {
 		return nil, err
 	}
 	slog.Info("audio: decode", "elapsed", time.Since(t0), "samples", len(samples), "duration_s", float64(len(samples))/audioSampleRate)
 	// Pad waveform to next multiple of 128.
 	if rem := len(samples) % 128; rem != 0 {
 		samples = append(samples, make([]float32, 128-rem)...)
 	}
 	// Compute mel spectrogram.
 	melData, numFrames := computeMelSpectrogram(samples)
 	if numFrames == 0 {
 		return nil, fmt.Errorf("audio too short to encode")
 	}
 	slog.Info("audio: mel", "frames", numFrames, "elapsed", time.Since(t0))
 	// Create input tensor [melBins, numFrames] (GGML ne order). FromFloats creates F32.
 	melTensor := ctx.Input().FromFloats(melData, melBins, numFrames)
 	// Run audio encoder.
 	audioOutputs := m.AudioModel.ForwardAudio(ctx, melTensor, m.AudioMultimodalProjector, m.audioOpts)
 	slog.Info("audio: encoded", "elapsed", time.Since(t0), "shape", audioOutputs.Shape())
 	return []input.Multimodal{{Tensor: audioOutputs, Data: audioTag{}}}, nil
 }
 // audioTag marks multimodal data as audio (vs vision) for PostTokenize.
 type audioTag struct{}
 func (m *Model) PostTokenize(inputs []*input.Input) ([]*input.Input, error) {
 	var result []*input.Input
 	for _, inp := range inputs {
 		if len(inp.Multimodal) == 0 {
 			result = append(result, inp)
 			continue
 		}
 		inputMultimodal := inp.Multimodal[0].Tensor
 		numTokens := inputMultimodal.Dim(1)
 		// Determine if this is audio or vision based on the tag.
 		_, isAudio := inp.Multimodal[0].Data.(audioTag)
 		var beginToken, endToken int32
 		if isAudio {
 			beginToken = m.audioTokenID
 			endToken = m.audioEndTokenID
 		} else {
 			beginToken = m.imageTokenID
 			endToken = m.imageEndTokenID
 		}
 		if beginToken >= 0 {
 			result = append(result, &input.Input{Token: beginToken, SameBatch: numTokens + 2})
 		}
 		result = append(result,
 			&input.Input{Multimodal: []input.Multimodal{{Tensor: inputMultimodal}}, MultimodalHash: inp.MultimodalHash},
 		)
 		result = append(result, slices.Repeat([]*input.Input{{Token: 0}}, numTokens-1)...)
 		if endToken >= 0 {
 			result = append(result, &input.Input{Token: endToken})
 		}
 	}
 	return result, nil
 }
 func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
 	hiddenState := m.TextModel.Forward(ctx, batch, m.Cache)
 	hiddenState = m.TextModel.Output.Forward(ctx, hiddenState)
 	if m.TextModel.TextOptions.finalLogitSoftcap > 0.0 {
 		hiddenState = hiddenState.Scale(ctx, 1.0/float64(m.TextModel.TextOptions.finalLogitSoftcap))
 		hiddenState = hiddenState.Tanh(ctx)
 		hiddenState = hiddenState.Scale(ctx, float64(m.TextModel.TextOptions.finalLogitSoftcap))
 	}
 	return hiddenState, nil
 }
 func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
 	ropeBase, ropeDims := m.TextModel.ropeForLayer(layer)
 	return nn.RoPE(ctx, key, shift, ropeDims, ropeBase, 1.0, rope.WithTypeNeoX()), nil
 }
 func init() {
 	model.Register("gemma4", New)
 }
--- a/model/models/gemma4/model_audio.go
+++ b/model/models/gemma4/model_audio.go
@@ -0,0 +1,612 @@
 package gemma4
 import (
 	"math"
 	"github.com/ollama/ollama/fs"
 	"github.com/ollama/ollama/ml"
 	"github.com/ollama/ollama/ml/nn"
 )
 // AudioModel holds the audio encoder and configuration.
 type AudioModel struct {
 	// SSCP: Sub-Sample Convolution Projection.
 	SSCPConv0 *AudioConvBlock `gguf:"conv1d.0"`
 	SSCPConv1 *AudioConvBlock `gguf:"conv1d.1"`
 	// SSCP output projection (linear).
 	SSCPInputProj *nn.Linear `gguf:"pre_encode.out"`
 	// Conformer blocks.
 	Layers []AudioConformerBlock `gguf:"blk"`
 	// Output projection to embedder dimension.
 	OutputProj *AudioOutputProj `gguf:"output_proj"`
 	AudioModelOptions
 }
 type AudioOutputProj struct {
 	Weight ml.Tensor `gguf:"weight"`
 	Bias   ml.Tensor `gguf:"bias"`
 }
 // AudioModelOptions holds audio model hyperparameters.
 type AudioModelOptions struct {
 	hiddenSize      int
 	numHeads        int
 	headDim         int
 	ffnSize         int
 	numLayers       int
 	melBins         int
 	chunkSize       int
 	maxPast         int
 	maxFuture       int
 	contextSize     int
 	logitCap        float32
 	residualWeight  float32
 	gradClip        float32
 	convKernelSize  int
 	eps             float32
 }
 // AudioConvBlock is a single 2D convolution block for the SSCP.
 type AudioConvBlock struct {
 	Weight ml.Tensor   `gguf:"weight"`
 	Norm   *nn.LayerNorm `gguf:"norm"`
 }
 // AudioConformerBlock is a single conformer layer.
 // All tensors are flat at the block level (a.blk.N.<name>) using underscore naming.
 type AudioConformerBlock struct {
 	// Block-level norm
 	Norm *nn.RMSNorm `gguf:"layer_pre_norm"`
 	// FFW start
 	FFWNorm     *nn.RMSNorm           `gguf:"ffn_norm"`
 	FFWUp       *AudioClippableLinear `gguf:"ffn_up"`
 	FFWDown     *AudioClippableLinear `gguf:"ffn_down"`
 	FFWPostNorm *nn.RMSNorm           `gguf:"ffn_post_norm"`
 	// FFW end
 	FFWNorm1     *nn.RMSNorm           `gguf:"ffn_norm_1"`
 	FFWUp1       *AudioClippableLinear `gguf:"ffn_up_1"`
 	FFWDown1     *AudioClippableLinear `gguf:"ffn_down_1"`
 	FFWPostNorm1 *nn.RMSNorm           `gguf:"ffn_post_norm_1"`
 	// Attention
 	AttnQ        *AudioClippableLinear `gguf:"attn_q"`
 	AttnK        *AudioClippableLinear `gguf:"attn_k"`
 	AttnV        *AudioClippableLinear `gguf:"attn_v"`
 	AttnOut      *AudioClippableLinear `gguf:"attn_out"`
 	AttnPreNorm  *nn.RMSNorm           `gguf:"ln1"`
 	AttnPostNorm *nn.RMSNorm           `gguf:"ln2"`
 	LinearPos    ml.Tensor             `gguf:"linear_pos.weight"`
 	PerDimScale  ml.Tensor             `gguf:"per_dim_scale.weight"`
 	// LightConv1d
 	ConvPW1  *AudioClippableLinear `gguf:"conv_pw1"`
 	ConvPW2  *AudioClippableLinear `gguf:"conv_pw2"`
 	ConvDW   ml.Tensor             `gguf:"conv_dw.weight"`
 	ConvNorm *nn.RMSNorm           `gguf:"conv_norm"`
 	NormConv *nn.RMSNorm           `gguf:"norm_conv"`
 }
 // AudioClippableLinear is a linear layer with optional input/output clamping.
 type AudioClippableLinear struct {
 	Weight    ml.Tensor `gguf:"weight"`
 	Bias      ml.Tensor `gguf:"bias"`
 	InputMin  ml.Tensor `gguf:"input_min"`
 	InputMax  ml.Tensor `gguf:"input_max"`
 	OutputMin ml.Tensor `gguf:"output_min"`
 	OutputMax ml.Tensor `gguf:"output_max"`
 	// Cached scalar clamp values (populated on first forward).
 	inMin, inMax, outMin, outMax float32
 	clampsLoaded                 bool
 }
 func (l *AudioClippableLinear) loadClamps() {
 	if l.clampsLoaded {
 		return
 	}
 	l.clampsLoaded = true
 	if l.InputMin != nil {
 		vals := l.InputMin.BackendGet()
 		if len(vals) > 0 {
 			l.inMin = vals[0]
 		}
 	}
 	if l.InputMax != nil {
 		vals := l.InputMax.BackendGet()
 		if len(vals) > 0 {
 			l.inMax = vals[0]
 		}
 	}
 	if l.OutputMin != nil {
 		vals := l.OutputMin.BackendGet()
 		if len(vals) > 0 {
 			l.outMin = vals[0]
 		}
 	}
 	if l.OutputMax != nil {
 		vals := l.OutputMax.BackendGet()
 		if len(vals) > 0 {
 			l.outMax = vals[0]
 		}
 	}
 }
 func (l *AudioClippableLinear) Forward(ctx ml.Context, x ml.Tensor) ml.Tensor {
 	l.loadClamps()
 	if l.inMax != 0 {
 		x = x.Clamp(ctx, l.inMin, l.inMax)
 	}
 	out := l.Weight.Mulmat(ctx, x)
 	if l.Bias != nil {
 		out = out.Add(ctx, l.Bias)
 	}
 	if l.outMax != 0 {
 		out = out.Clamp(ctx, l.outMin, l.outMax)
 	}
 	return out
 }
 // AudioMultimodalProjector is the audio-to-text embedding projector.
 type AudioMultimodalProjector struct {
 	Projection *AudioClippableLinear `gguf:"input_projection"`
 	FC         *AudioFC              `gguf:"fc"`
 }
 type AudioFC struct {
 	Weight ml.Tensor `gguf:"weight"`
 	Bias   ml.Tensor `gguf:"bias"`
 }
 func (p *AudioMultimodalProjector) Forward(ctx ml.Context, x ml.Tensor, eps float32) ml.Tensor {
 	// FC: output projection from conformer to embedder dimension.
 	x = p.FC.Weight.Mulmat(ctx, x)
 	if p.FC.Bias != nil {
 		x = x.Add(ctx, p.FC.Bias)
 	}
 	// Pre-projection RMSNorm (without learned weight) — matches Python's embedding_pre_projection_norm.
 	x = x.RMSNorm(ctx, nil, eps)
 	// Embedding projection to text hidden size.
 	x = p.Projection.Forward(ctx, x)
 	return x
 }
 // ForwardAudio encodes mel spectrogram features into soft tokens.
 // melFeatures: float32 tensor with ne[0]=melBins, ne[1]=numFrames.
 // Returns: [hiddenSize, numTokens] tensor.
 func (m *AudioModel) ForwardAudio(ctx ml.Context, melFeatures ml.Tensor, proj *AudioMultimodalProjector, opts *AudioModelOptions) ml.Tensor {
 	// SSCP Conv2D input: ne[0]=F (freq/width), ne[1]=T (time/height), ne[2]=C_in, ne[3]=B
 	// melFeatures is [melBins, numFrames], add channel and batch dims.
 	x := melFeatures.Reshape(ctx, melFeatures.Dim(0), melFeatures.Dim(1), 1, 1)
 	// SSCP Conv block 0: [F, T, 1, 1] → [F', T', C0, 1]
 	x = forwardConvBlock(ctx, m.SSCPConv0, x, opts)
 	// SSCP Conv block 1: [F', T', C0, 1] → [F'', T'', C1, 1]
 	x = forwardConvBlock(ctx, m.SSCPConv1, x, opts)
 	// After conv blocks, layout is [F'', T'', C_out, B].
 	// Permute to [C_out*F'', T'', B] for linear projection (channels+freq in ne[0]).
 	fOut := x.Dim(0)
 	tOut := x.Dim(1)
 	cOut := x.Dim(2)
 	// Permute [F'', T'', C, B] → [C, F'', T'', B]
 	// (1,2,0,3): old[0]→pos1, old[1]→pos2, old[2]→pos0
 	x = x.Permute(ctx, 1, 2, 0, 3).Contiguous(ctx)
 	x = x.Reshape(ctx, cOut*fOut, tOut)
 	// Linear projection to hidden size.
 	x = m.SSCPInputProj.Forward(ctx, x)
 	// Build causal-valid mask for conformer attention.
 	causalMask := buildCausalValidMaskF32(int(opts.chunkSize), opts.maxPast, opts.maxFuture)
 	// Run conformer blocks.
 	for i := range m.Layers {
 		x = m.Layers[i].Forward(ctx, x, causalMask, opts, i)
 	}
 	// Output projection.
 	if m.OutputProj != nil {
 		x = m.OutputProj.Weight.Mulmat(ctx, x)
 		if m.OutputProj.Bias != nil {
 			x = x.Add(ctx, m.OutputProj.Bias)
 		}
 	}
 	// Audio embedder: project to text embedding space.
 	if proj != nil {
 		x = proj.Forward(ctx, x, opts.eps)
 	}
 	return x
 }
 // forwardConvBlock runs a single SSCP Conv2D block.
 // Conv2D receiver is the kernel, argument is the input data.
 // Input: [F, T, C_in, B]. Output: [F', T', C_out, B].
 func forwardConvBlock(ctx ml.Context, block *AudioConvBlock, x ml.Tensor, opts *AudioModelOptions) ml.Tensor {
 	// Conv2D: kernel.Conv2D(ctx, input, s0, s1, p0, p1, d0, d1)
 	// Kernel is 3x3, stride 2x2, padding 1x1 (matching SSCP config).
 	// Output layout: [F', T', C_out, B]
 	// Make weight contiguous — the shape reversal in the converter creates
 	// a tensor where the physical data order doesn't match ne[]/stride[].
 	weight := block.Weight.Contiguous(ctx)
 	x = weight.Conv2D(ctx, x, 2, 2, 1, 1, 1, 1)
 	// LayerNorm needs channels in ne[0]. Permute [F', T', C_out, B] → [C_out, F', T', B],
 	// norm, then permute back.
 	// GGML permute: axis i says where old axis i goes.
 	// (1,2,0,3): old[0]→pos1, old[1]→pos2, old[2]→pos0 → [C_out, F', T', B]
 	x = x.Permute(ctx, 1, 2, 0, 3).Contiguous(ctx)
 	x = block.Norm.Forward(ctx, x, opts.eps)
 	// (2,0,1,3): old[0]→pos2, old[1]→pos0, old[2]→pos1 → [F', T', C_out, B]
 	x = x.Permute(ctx, 2, 0, 1, 3).Contiguous(ctx)
 	x = x.RELU(ctx)
 	return x
 }
 // Forward runs a single conformer block.
 func (cb *AudioConformerBlock) Forward(ctx ml.Context, x ml.Tensor, causalMask []float32, opts *AudioModelOptions, blockIdx int) ml.Tensor {
 	// FFW start (half-residual).
 	x = cb.forwardFFW(ctx, cb.FFWNorm, cb.FFWUp, cb.FFWDown, cb.FFWPostNorm, x, opts)
 	// Self-attention.
 	x = cb.forwardAttention(ctx, x, causalMask, opts, blockIdx)
 	// Lightweight Conv1d.
 	x = cb.forwardLightConv(ctx, x, opts, blockIdx)
 	// FFW end (half-residual).
 	x = cb.forwardFFW(ctx, cb.FFWNorm1, cb.FFWUp1, cb.FFWDown1, cb.FFWPostNorm1, x, opts)
 	// Gradient clipping + final norm.
 	x = x.Clamp(ctx, -opts.gradClip, opts.gradClip)
 	x = cb.Norm.Forward(ctx, x, opts.eps)
 	return x
 }
 // forwardFFW runs a feedforward module with half-residual connection.
 func (cb *AudioConformerBlock) forwardFFW(ctx ml.Context, preNorm *nn.RMSNorm, up, down *AudioClippableLinear, postNorm *nn.RMSNorm, x ml.Tensor, opts *AudioModelOptions) ml.Tensor {
 	residual := x
 	x = x.Clamp(ctx, -opts.gradClip, opts.gradClip)
 	x = preNorm.Forward(ctx, x, opts.eps)
 	x = up.Forward(ctx, x)
 	x = x.SILU(ctx)
 	x = down.Forward(ctx, x)
 	x = x.Clamp(ctx, -opts.gradClip, opts.gradClip)
 	x = postNorm.Forward(ctx, x, opts.eps)
 	x = x.Scale(ctx, float64(opts.residualWeight))
 	return residual.Add(ctx, x)
 }
 // forwardAttention runs the conformer block-local attention with relative position embeddings.
 func (cb *AudioConformerBlock) forwardAttention(ctx ml.Context, x ml.Tensor, causalMask []float32, opts *AudioModelOptions, blockIdx int) ml.Tensor {
 	residual := x
 	x = x.Clamp(ctx, -opts.gradClip, opts.gradClip)
 	x = cb.AttnPreNorm.Forward(ctx, x, opts.eps)
 	hiddenSize := x.Dim(0)
 	seqLen := x.Dim(1)
 	// QKV projections: [hiddenSize, seqLen] → [headDim, numHeads, seqLen]
 	q := cb.AttnQ.Forward(ctx, x).Reshape(ctx, opts.headDim, opts.numHeads, seqLen)
 	k := cb.AttnK.Forward(ctx, x).Reshape(ctx, opts.headDim, opts.numHeads, seqLen)
 	v := cb.AttnV.Forward(ctx, x).Reshape(ctx, opts.headDim, opts.numHeads, seqLen)
 	// Per-dim scaling for queries: (headDim^-0.5 / log(2)) * softplus(per_dim_scale)
 	// per_dim_scale is already softplus'd from the converter.
 	qScale := float64(math.Pow(float64(opts.headDim), -0.5)) / math.Log(2)
 	q = q.Scale(ctx, qScale)
 	if cb.PerDimScale != nil {
 		q = q.Mul(ctx, cb.PerDimScale)
 	}
 	// Key scaling: softplus(1) / log(2) — matches the query base scaling convention.
 	kScale := math.Log(1+math.E) / math.Log(2)
 	k = k.Scale(ctx, kScale)
 	// Build sinusoidal position embeddings for the block-local context.
 	maxSpan := opts.maxPast + opts.maxFuture + 1 // 13 unique relative positions
 	posEmb := cb.buildPositionEmbeddings(ctx, maxSpan, opts)
 	// posEmb: [headDim, numHeads, maxSpan]
 	// Block-local attention: process chunks of size chunkSize.
 	chunkSize := opts.chunkSize
 	numChunks := (seqLen + chunkSize - 1) / chunkSize
 	contextSize := opts.contextSize
 	// Pad q/k/v to multiple of chunkSize on the time dimension (dim 2).
 	padT := numChunks*chunkSize - seqLen
 	if padT > 0 {
 		q = q.Pad(ctx, 0, 0, padT, 0)
 		k = k.Pad(ctx, 0, 0, padT, 0)
 		v = v.Pad(ctx, 0, 0, padT, 0)
 	}
 	paddedLen := numChunks * chunkSize
 	// Pad k/v for context extraction: add maxPast on left, (maxFuture+chunkSize-1) on right.
 	// Use Pad (right) + PadExt (left) workaround since PadExt+Slice has issues.
 	// Actually use Concat with zero tensors for reliable left-padding.
 	padLeft := opts.maxPast
 	padRight := opts.maxFuture + chunkSize - 1
 	zeroLeft := ctx.Input().FromFloats(make([]float32, opts.headDim*opts.numHeads*padLeft), opts.headDim, opts.numHeads, padLeft)
 	zeroRight := ctx.Input().FromFloats(make([]float32, opts.headDim*opts.numHeads*padRight), opts.headDim, opts.numHeads, padRight)
 	kPadded := zeroLeft.Concat(ctx, k, 2).Concat(ctx, zeroRight, 2)
 	vPadded := zeroLeft.Concat(ctx, v, 2).Concat(ctx, zeroRight, 2)
 	// Reshape q into chunks: [headDim, numHeads, numChunks, chunkSize]
 	qChunked := q.Reshape(ctx, opts.headDim, opts.numHeads, numChunks, chunkSize)
 	// Process each chunk and collect results.
 	chunkOutputs := make([]ml.Tensor, numChunks)
 	for u := range numChunks {
 		// Extract query block: [headDim, numHeads, 1, chunkSize] → [headDim, numHeads, chunkSize]
 		qBlock := qChunked.Slice(ctx, 2, u, u+1, 1).Reshape(ctx, opts.headDim, opts.numHeads, chunkSize)
 		// Extract key/value context: [headDim, numHeads, contextSize]
 		cStart := u * chunkSize // offset in kPadded (padLeft already accounts for left context)
 		kCtx := kPadded.Slice(ctx, 2, cStart, cStart+contextSize, 1).Contiguous(ctx)
 		vCtx := vPadded.Slice(ctx, 2, cStart, cStart+contextSize, 1).Contiguous(ctx)
 		// Content-content logits: qBlock^T @ kCtx → [chunkSize, contextSize] per head.
 		// Mulmat(a, b) = a^T @ b. We want Q^T K, so: kCtx.Mulmat(qBlock) but that gives
 		// [numHeads, chunkSize, contextSize] with wrong batching.
 		// Instead: permute to [headDim, chunkSize, numHeads] and [headDim, contextSize, numHeads]
 		// then Mulmat batches over numHeads.
 		// GGML permute(0,2,1,3): old[0]→0, old[1]→2, old[2]→1
 		qP := qBlock.Permute(ctx, 0, 2, 1, 3) // [headDim, chunkSize, numHeads]
 		kP := kCtx.Permute(ctx, 0, 2, 1, 3)   // [headDim, contextSize, numHeads]
 		termAC := kP.MulmatFullPrec(ctx, qP) // [contextSize, chunkSize, numHeads]
 		// Content-position logits: qBlock^T @ posEmb → [chunkSize, maxSpan] per head.
 		pP := posEmb.Permute(ctx, 0, 2, 1, 3) // [headDim, maxSpan, numHeads]
 		termBDRaw := pP.MulmatFullPrec(ctx, qP) // [maxSpan, chunkSize, numHeads]
 		// Relative shift: [maxSpan, chunkSize, numHeads] → [contextSize, chunkSize, numHeads]
 		termBD := cb.relativeShiftGGML(ctx, termBDRaw, maxSpan, chunkSize, contextSize, opts.numHeads)
 		// Combined logits.
 		logits := termAC.Add(ctx, termBD)
 		// Logit softcap: tanh(logits / cap) * cap
 		logits = logits.Scale(ctx, 1.0/float64(opts.logitCap))
 		logits = logits.Tanh(ctx)
 		logits = logits.Scale(ctx, float64(opts.logitCap))
 		// Apply combined causal + validity mask.
 		// causalMask [chunkSize * contextSize]: 1=causal-allowed, 0=masked.
 		// Validity: context positions before the actual sequence start are invalid.
 		// For chunk u, context position c corresponds to actual time: u*chunkSize + c - padLeft.
 		// Valid if 0 <= actual_time < seqLen.
 		// Mask tensor layout: [contextSize, chunkSize, 1] with ne[0]=contextSize contiguous.
 		// Element at (context=j, chunk=i) is at flat index: i*contextSize + j.
 		maskData := make([]float32, contextSize*chunkSize)
 		for i := range chunkSize {
 			for j := range contextSize {
 				actualTime := u*chunkSize + j - padLeft
 				causalOK := causalMask[i*contextSize+j] > 0
 				validOK := actualTime >= 0 && actualTime < seqLen
 				if causalOK && validOK {
 					maskData[i*contextSize+j] = 0
 				} else {
 					maskData[i*contextSize+j] = -1e9
 				}
 			}
 		}
 		mask := ctx.Input().FromFloats(maskData, contextSize, chunkSize, 1) // 3D for broadcasting over numHeads
 		logits = logits.Add(ctx, mask)
 		// Softmax over context dimension (dim 0 = contextSize).
 		logits = logits.Softmax(ctx) // softmax over ne[0]=contextSize
 		// Weighted sum: logits^T @ vCtx.
 		// logits: [contextSize, chunkSize, numHeads], vCtx: [headDim, numHeads, contextSize]
 		// vCtx permuted: [headDim, contextSize, numHeads]
 		vP := vCtx.Permute(ctx, 0, 2, 1, 3) // [headDim, contextSize, numHeads]
 		// Weighted sum: for each head, value[headDim, contextSize] @ weights[contextSize, chunkSize]
 		// = [headDim, chunkSize].
 		// Mulmat(a, b) = a^T @ b. Need a=[contextSize, headDim, numHeads], b=[contextSize, chunkSize, numHeads].
 		vPT := vP.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx) // [contextSize, headDim, numHeads]
 		chunkOut := vPT.Mulmat(ctx, logits) // [headDim, chunkSize, numHeads]
 		// Permute back to [headDim, numHeads, chunkSize]
 		chunkOut = chunkOut.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
 		chunkOutputs[u] = chunkOut
 	}
 	// Concatenate chunk outputs along time dimension.
 	var attnOut ml.Tensor
 	if numChunks == 1 {
 		attnOut = chunkOutputs[0]
 	} else {
 		attnOut = chunkOutputs[0]
 		for _, co := range chunkOutputs[1:] {
 			attnOut = attnOut.Concat(ctx, co, 2)
 		}
 	}
 	// Trim to original sequence length if we padded.
 	if paddedLen > seqLen {
 		attnOut = attnOut.Slice(ctx, 2, 0, seqLen, 1).Contiguous(ctx)
 	}
 	// Reshape to [hiddenSize, seqLen] and project.
 	attnOut = attnOut.Reshape(ctx, hiddenSize, seqLen)
 	x = cb.AttnOut.Forward(ctx, attnOut)
 	x = x.Clamp(ctx, -opts.gradClip, opts.gradClip)
 	x = cb.AttnPostNorm.Forward(ctx, x, opts.eps)
 	return residual.Add(ctx, x)
 }
 // buildPositionEmbeddings builds sinusoidal position embeddings and projects through linear_pos.
 // Returns [headDim, numHeads, maxSpan] tensor.
 func (cb *AudioConformerBlock) buildPositionEmbeddings(ctx ml.Context, maxSpan int, opts *AudioModelOptions) ml.Tensor {
 	halfDim := opts.hiddenSize / 2
 	hiddenSize := opts.hiddenSize
 	// inv_timescales: exp(-i * log(10000) / max(D/2-1, 1))
 	logInc := math.Log(10000.0) / math.Max(float64(halfDim-1), 1)
 	// Sinusoidal embeddings for relative positions [maxPast, maxPast-1, ..., -maxFuture].
 	posData := make([]float32, hiddenSize*maxSpan)
 	for p := range maxSpan {
 		relPos := float64(opts.maxPast - p)
 		for d := range halfDim {
 			angle := relPos * math.Exp(float64(-d)*logInc)
 			posData[p*hiddenSize+d] = float32(math.Sin(angle))
 			posData[p*hiddenSize+halfDim+d] = float32(math.Cos(angle))
 		}
 	}
 	// Create [hiddenSize, maxSpan] input tensor.
 	posEmb := ctx.Input().FromFloats(posData, hiddenSize, maxSpan)
 	// Project through linear_pos: [hiddenSize, maxSpan] → Mulmat → [numHeads*headDim, maxSpan]
 	projPos := cb.LinearPos.Mulmat(ctx, posEmb)
 	// Reshape to [headDim, numHeads, maxSpan].
 	return projPos.Reshape(ctx, opts.headDim, opts.numHeads, maxSpan)
 }
 // relativeShiftGGML performs the relative shift to extract correct position logits.
 // Input: [maxSpan, chunkSize, numHeads]. Output: [contextSize, chunkSize, numHeads].
 func (cb *AudioConformerBlock) relativeShiftGGML(ctx ml.Context, x ml.Tensor, maxSpan, chunkSize, contextSize, numHeads int) ml.Tensor {
 	// The shift trick: pad ne[0] to contextSize+1, reshape to flatten first two dims,
 	// skip first (contextSize+1-maxSpan) elements, take contextSize*chunkSize elements, reshape back.
 	padAmt := contextSize + 1 - maxSpan
 	if padAmt > 0 {
 		x = x.Pad(ctx, padAmt, 0, 0, 0) // [maxSpan+padAmt, chunkSize, numHeads] = [contextSize+1, chunkSize, numHeads]
 	}
 	// Reshape to [(contextSize+1)*chunkSize, numHeads]
 	x = x.Reshape(ctx, (contextSize+1)*chunkSize, numHeads)
 	// Take the first contextSize*chunkSize elements (the standard relative shift trick).
 	x = x.Slice(ctx, 0, 0, contextSize*chunkSize, 1).Contiguous(ctx)
 	// Reshape to [contextSize, chunkSize, numHeads]
 	return x.Reshape(ctx, contextSize, chunkSize, numHeads)
 }
 // forwardLightConv runs the lightweight depthwise convolution module.
 func (cb *AudioConformerBlock) forwardLightConv(ctx ml.Context, x ml.Tensor, opts *AudioModelOptions, blockIdx int) ml.Tensor {
 	residual := x
 	x = cb.ConvNorm.Forward(ctx, x, opts.eps)
 	x = cb.ConvPW1.Forward(ctx, x) // [2*D, T, B]
 	// GLU: split in half along dim 0, sigmoid gate, multiply.
 	d := x.Dim(0) / 2
 	data := x.Slice(ctx, 0, 0, d, 1).Contiguous(ctx)
 	gate := x.Slice(ctx, 0, d, d*2, 1).Contiguous(ctx).Sigmoid(ctx)
 	x = data.Mul(ctx, gate) // [D, T, B]
 	// Depthwise Conv1d: manual implementation using model weight tensor slices.
 	// Kernel cb.ConvDW shape: [K=5, D=1024] (ne[0]=K, ne[1]=D) after shape reversal.
 	// Actually in GGML, ne[0]=K=5 contiguous, ne[1]=D=1024.
 	// We need per-tap weights [D] and shifted input copies.
 	kernelSize := cb.ConvDW.Dim(0) // K=5
 	seqLen := x.Dim(1)
 	// Transpose kernel to [D, K] for per-tap slicing.
 	// GGML permute(1,0,2,3): old[0]→pos1, old[1]→pos0 → swap ne[0] and ne[1]
 	kernelT := cb.ConvDW.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx) // [D, K]
 	var convOut ml.Tensor
 	for k := range kernelSize {
 		shift := kernelSize - 1 - k
 		var shifted ml.Tensor
 		if shift == 0 {
 			shifted = x
 		} else {
 			trimmed := x.Slice(ctx, 1, 0, seqLen-shift, 1).Contiguous(ctx)
 			shifted = trimmed.PadExt(ctx, 0, 0, shift, 0, 0, 0, 0, 0)
 		}
 		wk := kernelT.Slice(ctx, 1, k, k+1, 1).Contiguous(ctx) // [D, 1]
 		term := shifted.Mul(ctx, wk)
 		if convOut == nil {
 			convOut = term
 		} else {
 			convOut = convOut.Add(ctx, term)
 		}
 	}
 	x = convOut
 	x = x.Clamp(ctx, -opts.gradClip, opts.gradClip)
 	x = cb.NormConv.Forward(ctx, x, opts.eps)
 	x = x.SILU(ctx)
 	x = cb.ConvPW2.Forward(ctx, x)
 	return x.Add(ctx, residual)
 }
 func newAudioModel(c fs.Config) *AudioModel {
 	numLayers := int(c.Uint("audio.block_count", 0))
 	if numLayers == 0 {
 		return nil
 	}
 	return &AudioModel{
 		Layers: make([]AudioConformerBlock, numLayers),
 	}
 }
 func newAudioModelOptions(c fs.Config) *AudioModelOptions {
 	hiddenSize := int(c.Uint("audio.embedding_length", 0))
 	if hiddenSize == 0 {
 		return nil
 	}
 	numHeads := int(c.Uint("audio.attention.head_count", 8))
 	headDim := hiddenSize / numHeads
 	chunkSize := 12 // default conformer chunk size
 	maxPast := 12   // conf_attention_context_left - 1
 	maxFuture := 0  // conf_attention_context_right
 	convKernel := int(c.Uint("audio.conv_kernel_size", 5))
 	eps := c.Float("audio.attention.layer_norm_epsilon", 1e-6)
 	return &AudioModelOptions{
 		hiddenSize:     hiddenSize,
 		numHeads:       numHeads,
 		headDim:        headDim,
 		ffnSize:        int(c.Uint("audio.feed_forward_length", uint32(hiddenSize*4))),
 		numLayers:      int(c.Uint("audio.block_count", 12)),
 		melBins:        int(c.Uint("audio.num_mel_bins", 128)),
 		chunkSize:      chunkSize,
 		maxPast:        maxPast,
 		maxFuture:      maxFuture,
 		contextSize:    chunkSize + maxPast + maxFuture,
 		logitCap:       50.0,
 		residualWeight: 0.5,
 		gradClip:       1e10,
 		convKernelSize: convKernel,
 		eps:            float32(eps),
 	}
 }
 // buildCausalValidMaskF32 creates the causal-valid mask for block-local attention.
 // Returns flat [chunkSize * contextSize] float32 data (1.0 = allowed, 0.0 = masked).
 func buildCausalValidMaskF32(chunkSize, maxPast, maxFuture int) []float32 {
 	contextSize := chunkSize + maxPast + maxFuture
 	upperDiag := maxPast + maxFuture
 	result := make([]float32, chunkSize*contextSize)
 	for r := range chunkSize {
 		for c := range contextSize {
 			lower := (r <= c)                // tril(contextSize, chunkSize) transposed
 			upper := (c <= r+int(upperDiag)) // tril(chunkSize, contextSize, diag=upperDiag)
 			if lower && upper {
 				result[r*contextSize+c] = 1.0
 			}
 		}
 	}
 	return result
 }
--- a/model/models/gemma4/model_text.go
+++ b/model/models/gemma4/model_text.go
@@ -0,0 +1,475 @@
 package gemma4
 import (
 	"math"
 	"github.com/ollama/ollama/fs"
 	"github.com/ollama/ollama/kvcache"
 	"github.com/ollama/ollama/ml"
 	"github.com/ollama/ollama/ml/nn"
 	"github.com/ollama/ollama/ml/nn/rope"
 	"github.com/ollama/ollama/model/input"
 )
 const (
 	cacheTypeSWA = iota
 	cacheTypeCausal
 )
 type TextOptions struct {
 	hiddenSize              int
 	numHeads, numKVHeads    int
 	numGlobalKVHeads        int
 	headDim, globalHeadDim  int
 	hiddenLayers            int
 	hiddenSizePerLayerInput int
 	eps               float32
 	ropeBase          float32
 	ropeLocalBase     float32
 	partialRotaryDims int // RoPE dims for full-attention (global) layers
 	slidingWindowPattern []bool
 	// kvDonorMap maps shared layer index -> donor layer index.
 	// Donor is the last non-shared layer of the same type (sliding/full).
 	kvDonorMap map[int]int
 	finalLogitSoftcap float32
 	numExperts     int
 	numExpertsUsed int
 }
 func (o *TextOptions) isLocal(layer int) bool {
 	if layer < len(o.slidingWindowPattern) {
 		return o.slidingWindowPattern[layer]
 	}
 	return false
 }
 func (o *TextOptions) ropeForLayer(layer int) (base float32, dims int) {
 	if o.isLocal(layer) {
 		return o.ropeLocalBase, o.headDim
 	}
 	return o.ropeBase, o.partialRotaryDims
 }
 func (o *TextOptions) kvHeadsForLayer(layer int) int {
 	if o.isLocal(layer) {
 		return o.numKVHeads
 	}
 	if o.numGlobalKVHeads > 0 {
 		return o.numGlobalKVHeads
 	}
 	return o.numKVHeads
 }
 func (o *TextOptions) headDimForLayer(layer int) int {
 	if o.isLocal(layer) {
 		return o.headDim
 	}
 	return o.globalHeadDim
 }
 type TextModel struct {
 	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
 	*PerLayerProjector
 	Layers     []TextLayer `gguf:"blk"`
 	OutputNorm *nn.RMSNorm `gguf:"output_norm"`
 	Output     *nn.Linear  `gguf:"output,alt:token_embd"`
 	TextOptions
 }
 func newTextModel(c fs.Config) *TextModel {
 	numLayers := int(c.Uint("block_count"))
 	// Head dimensions: key_length is global head dim, key_length_swa is local (SWA) head dim.
 	globalHeadDim := int(c.Uint("attention.key_length", 512))
 	headDim := int(c.Uint("attention.key_length_swa", 256))
 	// RoPE dimensions for global (full attention) layers with proportional RoPE.
 	// The freq_factors tensor handles partial rotation (1.0 for rotated pairs,
 	// 1e30 for non-rotated), so ropeDims equals the full global head dim.
 	partialRotaryDims := int(c.Uint("rope.dimension_count", 0))
 	if partialRotaryDims == 0 {
 		partialFactor := c.Float("rope.partial_rotary_factor", 1.0)
 		partialRotaryDims = int(float32(globalHeadDim) * partialFactor)
 	}
 	ropeBase := c.Float("rope.freq_base", 1000000.0)
 	ropeLocalBase := c.Float("rope.freq_base_swa", 0)
 	if ropeLocalBase == 0 {
 		ropeLocalBase = c.Float("rope.local.freq_base", 10000.0)
 	}
 	numGlobalKVHeads := int(c.Uint("attention.global_head_count_kv", 0))
 	slidingPattern := c.Bools("attention.sliding_window_pattern")
 	// KV heads: try per-layer array first (MoE models), then fall back to scalar
 	numKVHeads := 0
 	kvHeadsArray := c.Ints("attention.head_count_kv")
 	if len(kvHeadsArray) > 0 {
 		numKVHeads = int(kvHeadsArray[0])
 		if numGlobalKVHeads == 0 && len(slidingPattern) > 0 {
 			for i, isLocal := range slidingPattern {
 				if !isLocal && i < len(kvHeadsArray) {
 					numGlobalKVHeads = int(kvHeadsArray[i])
 					break
 				}
 			}
 		}
 	}
 	if numKVHeads == 0 {
 		numKVHeads = int(c.Uint("attention.head_count_kv", 0))
 	}
 	// Compute KV sharing donor map (same logic as MLX)
 	sharedLayers := int(c.Uint("attention.shared_kv_layers", 0))
 	kvDonorMap := make(map[int]int)
 	if sharedLayers > 0 && len(slidingPattern) > 0 {
 		firstShared := numLayers - sharedLayers
 		for i := firstShared; i < numLayers; i++ {
 			isLocal := slidingPattern[i]
 			// Find last non-shared layer of same type
 			for j := firstShared - 1; j >= 0; j-- {
 				if slidingPattern[j] == isLocal {
 					kvDonorMap[i] = j
 					break
 				}
 			}
 		}
 	}
 	return &TextModel{
 		Layers: make([]TextLayer, numLayers),
 		TextOptions: TextOptions{
 			hiddenSize:              int(c.Uint("embedding_length")),
 			numHeads:                int(c.Uint("attention.head_count")),
 			numKVHeads:              numKVHeads,
 			numGlobalKVHeads:        numGlobalKVHeads,
 			headDim:                 headDim,
 			globalHeadDim:           globalHeadDim,
 			hiddenLayers:            numLayers,
 			hiddenSizePerLayerInput: int(c.Uint("embedding_length_per_layer_input", 0)),
 			eps:                     c.Float("attention.layer_norm_rms_epsilon", 1e-06),
 			ropeBase:                ropeBase,
 			ropeLocalBase:           ropeLocalBase,
 			partialRotaryDims:       partialRotaryDims,
 			slidingWindowPattern:    slidingPattern,
 			kvDonorMap:              kvDonorMap,
 			finalLogitSoftcap:       c.Float("final_logit_softcapping", 0.0),
 			numExperts:              int(c.Uint("expert_count", 0)),
 			numExpertsUsed:          int(c.Uint("expert_used_count", 0)),
 		},
 	}
 }
 func (m *TextModel) Forward(ctx ml.Context, batch input.Batch, cache kvcache.Cache) ml.Tensor {
 	positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))
 	hiddenState := m.TokenEmbedding.Forward(ctx, batch.Inputs)
 	hiddenState = hiddenState.Scale(ctx, math.Sqrt(float64(m.hiddenSize)))
 	// Inject vision embeddings into the hidden state
 	var except []int
 	for _, image := range batch.Multimodal {
 		visionOutputs := image.Multimodal[0].Tensor
 		ctx.Forward(visionOutputs.Copy(ctx, hiddenState.View(ctx, image.Index*hiddenState.Stride(1), visionOutputs.Dim(0)*visionOutputs.Dim(1))))
 		for i := range visionOutputs.Dim(1) {
 			except = append(except, image.Index+i)
 		}
 	}
 	// PLE
 	var perLayerInputs ml.Tensor
 	if m.PerLayerProjector != nil {
 		perLayerInputs = m.PerLayerProjector.Forward(ctx, batch, hiddenState, &m.TextOptions)
 	}
 	for i := range len(m.Layers) {
 		layer := m.Layers[i]
 		if cache != nil {
 			cache.SetLayer(i)
 			cacheType := cacheTypeSWA
 			if !m.isLocal(i) {
 				cacheType = cacheTypeCausal
 			}
 			wc := cache.(*kvcache.WrapperCache)
 			wc.SetLayerType(cacheType)
 			if causal, ok := wc.UnderlyingCache().(*kvcache.Causal); ok {
 				causal.SetCausal(ctx, kvcache.CausalOptions{Except: except})
 			}
 		}
 		var lastLayerOutputs ml.Tensor
 		if i == len(m.Layers)-1 {
 			lastLayerOutputs = batch.Outputs
 		}
 		var perLayerInput ml.Tensor
 		if perLayerInputs != nil {
 			perLayerInput = perLayerInputs.View(ctx, i*perLayerInputs.Stride(1), perLayerInputs.Dim(0), perLayerInputs.Stride(2), perLayerInputs.Dim(2))
 		}
 		// KV sharing: layers >= firstShared reuse K/V from donor layers
 		isShared := false
 		if donorLayer, ok := m.kvDonorMap[i]; ok {
 			// Set cache layer to donor so Get() reads donor's K/V
 			cache.SetLayer(donorLayer)
 			isShared = true
 		}
 		hiddenState = layer.Forward(ctx, i, hiddenState, positions, perLayerInput, lastLayerOutputs, cache, isShared, &m.TextOptions)
 	}
 	return m.OutputNorm.Forward(ctx, hiddenState, m.eps)
 }
 // PerLayerProjector implements PLE.
 type PerLayerProjector struct {
 	TokenEmbedding *nn.Embedding `gguf:"per_layer_token_embd"`
 	Projector      *nn.Linear    `gguf:"per_layer_model_proj"`
 	Norm           *nn.RMSNorm   `gguf:"per_layer_proj_norm"`
 }
 func (p *PerLayerProjector) Forward(ctx ml.Context, batch input.Batch, inputs ml.Tensor, opts *TextOptions) ml.Tensor {
 	inputsPerLayer := p.TokenEmbedding.Forward(ctx, batch.Inputs)
 	inputsPerLayer = inputsPerLayer.Scale(ctx, math.Sqrt(float64(opts.hiddenSizePerLayerInput)))
 	// Reshape to [pleDim, numLayers, numTokens] — matching projection shape
 	inputsPerLayer = inputsPerLayer.Reshape(ctx, opts.hiddenSizePerLayerInput, opts.hiddenLayers, inputs.Dim(1))
 	perLayerProjection := p.Projector.Forward(ctx, inputs)
 	perLayerProjection = perLayerProjection.Scale(ctx, 1.0/math.Sqrt(float64(opts.hiddenSize)))
 	perLayerProjection = perLayerProjection.Reshape(ctx, opts.hiddenSizePerLayerInput, opts.hiddenLayers, inputs.Dim(1))
 	perLayerProjection = p.Norm.Forward(ctx, perLayerProjection, opts.eps)
 	if inputsPerLayer != nil {
 		perLayerProjection = perLayerProjection.Add(ctx, inputsPerLayer)
 		perLayerProjection = perLayerProjection.Scale(ctx, 1/math.Sqrt(2))
 	}
 	return perLayerProjection
 }
 type TextSelfAttention struct {
 	Query       *nn.Linear  `gguf:"attn_q"`
 	QueryNorm   *nn.RMSNorm `gguf:"attn_q_norm"`
 	Key         *nn.Linear  `gguf:"attn_k"`
 	KeyNorm     *nn.RMSNorm `gguf:"attn_k_norm"`
 	Value       *nn.Linear  `gguf:"attn_v"`
 	Output      *nn.Linear  `gguf:"attn_output"`
 	RopeFactors ml.Tensor   `gguf:"rope_freqs.weight"` // proportional RoPE freq_factors
 }
 func (sa *TextSelfAttention) Forward(ctx ml.Context, layer int, hiddenState, positions ml.Tensor, cache kvcache.Cache, sharedKV bool, opts *TextOptions) ml.Tensor {
 	batchSize := hiddenState.Dim(1)
 	hd := opts.headDimForLayer(layer)
 	kvHeads := opts.kvHeadsForLayer(layer)
 	ropeBase, ropeDims := opts.ropeForLayer(layer)
 	q := sa.Query.Forward(ctx, hiddenState)
 	q = q.Reshape(ctx, hd, opts.numHeads, batchSize)
 	q = sa.QueryNorm.Forward(ctx, q, opts.eps)
 	var k, v ml.Tensor
 	if !sharedKV {
 		k = sa.Key.Forward(ctx, hiddenState)
 		k = k.Reshape(ctx, hd, kvHeads, batchSize)
 		if sa.Value != nil {
 			v = sa.Value.Forward(ctx, hiddenState)
 			v = v.Reshape(ctx, hd, kvHeads, batchSize)
 		} else {
 			// K=V: use raw K projection (before K norm) as V
 			v = k
 		}
 		k = sa.KeyNorm.Forward(ctx, k, opts.eps)
 		v = v.RMSNorm(ctx, nil, opts.eps) // V norm: unweighted RMSNorm
 	}
 	// RoPE with proportional freq_factors on global layers
 	ropeOpts := []func(*rope.Options){rope.WithTypeNeoX()}
 	if sa.RopeFactors != nil && !opts.isLocal(layer) {
 		ropeOpts = append(ropeOpts, rope.WithFactors(sa.RopeFactors))
 	}
 	q = nn.RoPE(ctx, q, positions, ropeDims, ropeBase, 1.0, ropeOpts...)
 	if k != nil {
 		k = nn.RoPE(ctx, k, positions, ropeDims, ropeBase, 1.0, ropeOpts...)
 	}
 	attention := nn.Attention(ctx, q, k, v, 1.0, cache)
 	attention = attention.Reshape(ctx, hd*opts.numHeads, batchSize)
 	return sa.Output.Forward(ctx, attention)
 }
 type TextMLP struct {
 	Gate *nn.Linear `gguf:"ffn_gate"`
 	Up   *nn.Linear `gguf:"ffn_up"`
 	Down *nn.Linear `gguf:"ffn_down"`
 }
 func (mlp *TextMLP) Forward(ctx ml.Context, hiddenState ml.Tensor) ml.Tensor {
 	hiddenState = mlp.Gate.Forward(ctx, hiddenState).GELU(ctx, mlp.Up.Forward(ctx, hiddenState))
 	return mlp.Down.Forward(ctx, hiddenState)
 }
 // TextRouter implements the Gemma 4 MoE router.
 type TextRouter struct {
 	Proj  *nn.Linear `gguf:"ffn_gate_inp"`
 	Scale ml.Tensor  `gguf:"ffn_gate_inp.scale"`
 }
 func (r *TextRouter) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *TextOptions) (routingWeights, selectedExperts ml.Tensor) {
 	// RMSNorm without learned weight
 	x := hiddenState.RMSNorm(ctx, nil, opts.eps)
 	// Scale by 1/sqrt(hidden_size)
 	x = x.Scale(ctx, 1.0/math.Sqrt(float64(opts.hiddenSize)))
 	// Multiply by learned scale parameter
 	x = x.Mul(ctx, r.Scale)
 	// Project to expert logits
 	expertScores := r.Proj.Forward(ctx, x)
 	// Softmax over experts
 	routingWeights = expertScores.Softmax(ctx)
 	// TopK expert selection
 	selectedExperts = routingWeights.TopK(ctx, opts.numExpertsUsed)
 	return routingWeights, selectedExperts
 }
 // TextMoEBlock implements the Gemma 4 sparse MoE.
 type TextMoEBlock struct {
 	GateUp    *nn.LinearBatch `gguf:"ffn_gate_up_exps"`
 	Gate      *nn.LinearBatch `gguf:"ffn_gate_exps"`
 	Up        *nn.LinearBatch `gguf:"ffn_up_exps"`
 	Down      *nn.LinearBatch `gguf:"ffn_down_exps"`
 	DownScale ml.Tensor       `gguf:"ffn_down_exps.scale,alt:ffn_gate_inp.per_expert_scale"`
 }
 func (moe *TextMoEBlock) Forward(ctx ml.Context, hiddenState, routingWeights, selectedExperts ml.Tensor, opts *TextOptions) ml.Tensor {
 	// Select routing weights for chosen experts and renormalize
 	routingWeights = routingWeights.Reshape(ctx, 1, opts.numExperts, hiddenState.Dim(1)).Rows(ctx, selectedExperts)
 	routingWeights = routingWeights.Reshape(ctx, opts.numExpertsUsed, hiddenState.Dim(1))
 	routingWeights = routingWeights.Div(ctx, routingWeights.SumRows(ctx))
 	routingWeights = routingWeights.Reshape(ctx, 1, opts.numExpertsUsed, hiddenState.Dim(1))
 	hiddenState = hiddenState.Reshape(ctx, hiddenState.Dim(0), 1, hiddenState.Dim(1))
 	// Expert computation using LinearBatch (MulmatID selecting experts by index)
 	var gateOut, upOut ml.Tensor
 	if moe.GateUp != nil && moe.GateUp.Weight != nil {
 		gateUp := moe.GateUp.Forward(ctx, hiddenState, selectedExperts)
 		nFF := gateUp.Dim(0) / 2
 		gateOut = gateUp.Slice(ctx, 0, 0, nFF, 1)
 		upOut = gateUp.Slice(ctx, 0, nFF, gateUp.Dim(0), 1)
 	} else {
 		gateOut = moe.Gate.Forward(ctx, hiddenState, selectedExperts)
 		upOut = moe.Up.Forward(ctx, hiddenState, selectedExperts)
 	}
 	hiddenState = gateOut.GELU(ctx, upOut)
 	experts := moe.Down.Forward(ctx, hiddenState, selectedExperts)
 	// Apply per-expert down projection scale when present.
 	if moe.DownScale != nil {
 		expertScales := moe.DownScale.Reshape(ctx, opts.numExperts, 1)
 		expertScales = expertScales.Repeat(ctx, 1, hiddenState.Dim(2))
 		expertScales = expertScales.Reshape(ctx, 1, opts.numExperts, hiddenState.Dim(2)).Rows(ctx, selectedExperts)
 		expertScales = expertScales.Reshape(ctx, opts.numExpertsUsed, hiddenState.Dim(2))
 		expertScales = expertScales.Reshape(ctx, 1, opts.numExpertsUsed, hiddenState.Dim(2))
 		experts = experts.Mul(ctx, expertScales)
 	}
 	// Apply routing weights
 	experts = experts.Mul(ctx, routingWeights)
 	// Sum across experts
 	nextStates := experts.View(ctx, 0, experts.Dim(0), experts.Stride(2), experts.Dim(2))
 	for i := 1; i < opts.numExpertsUsed; i++ {
 		nextStates = nextStates.Add(ctx, experts.View(ctx, i*experts.Stride(1), experts.Dim(0), experts.Stride(2), experts.Dim(2)))
 	}
 	return nextStates
 }
 type TextLayer struct {
 	AttentionNorm     *nn.RMSNorm `gguf:"attn_norm"`
 	SelfAttention     *TextSelfAttention
 	PostAttentionNorm *nn.RMSNorm `gguf:"post_attention_norm,alt:attn_post_norm"`
 	MLPNorm           *nn.RMSNorm `gguf:"ffn_norm,alt:ffn_pre_norm"`
 	MLP               *TextMLP
 	PostMLPNorm       *nn.RMSNorm `gguf:"post_ffw_norm,alt:ffn_post_norm"`
 	// MoE (present only for models with enable_moe_block=true)
 	Router       *TextRouter
 	MoE          *TextMoEBlock
 	MoENorm      *nn.RMSNorm `gguf:"pre_ffw_norm_2,alt:ffn_pre_norm_2"`
 	PostMoENorm  *nn.RMSNorm `gguf:"post_ffw_norm_2,alt:ffn_post_norm_2"`
 	PostMLPNorm1 *nn.RMSNorm `gguf:"post_ffw_norm_1,alt:ffn_post_norm_1"` // used instead of PostMLPNorm when MoE is present
 	PerLayerInputGate  *nn.Linear  `gguf:"inp_gate"`
 	PerLayerProjection *nn.Linear  `gguf:"proj"`
 	PostPerLayerNorm   *nn.RMSNorm `gguf:"post_norm"`
 	LayerScalar        ml.Tensor   `gguf:"layer_scalar,alt:layer_output_scale.weight"`
 }
 func (l *TextLayer) Forward(ctx ml.Context, layer int, hiddenState, positions, perLayerInput, outputs ml.Tensor, cache kvcache.Cache, sharedKV bool, opts *TextOptions) ml.Tensor {
 	residual := hiddenState
 	hiddenState = l.AttentionNorm.Forward(ctx, hiddenState, opts.eps)
 	hiddenState = l.SelfAttention.Forward(ctx, layer, hiddenState, positions, cache, sharedKV, opts)
 	hiddenState = l.PostAttentionNorm.Forward(ctx, hiddenState, opts.eps)
 	if outputs != nil {
 		hiddenState = hiddenState.Rows(ctx, outputs)
 		residual = residual.Rows(ctx, outputs)
 		if perLayerInput != nil {
 			perLayerInput = perLayerInput.Rows(ctx, outputs)
 		}
 	}
 	hiddenState = hiddenState.Add(ctx, residual)
 	residual = hiddenState
 	// MLP (+ optional MoE in parallel)
 	hasSplitExperts := l.MoE != nil && l.MoE.Gate != nil && l.MoE.Up != nil && l.MoE.Gate.Weight != nil && l.MoE.Up.Weight != nil
 	hasFusedExperts := l.MoE != nil && l.MoE.GateUp != nil && l.MoE.GateUp.Weight != nil
 	if l.Router != nil && l.MoE != nil && l.MoE.Down != nil && l.MoE.Down.Weight != nil && (hasSplitExperts || hasFusedExperts) {
 		// MoE layers: run MLP and MoE in parallel, sum results
 		mlpState := l.MLPNorm.Forward(ctx, hiddenState, opts.eps)
 		mlpState = l.MLP.Forward(ctx, mlpState)
 		mlpState = l.PostMLPNorm1.Forward(ctx, mlpState, opts.eps)
 		routingWeights, selectedExperts := l.Router.Forward(ctx, hiddenState, opts)
 		moeState := l.MoENorm.Forward(ctx, hiddenState, opts.eps)
 		moeState = l.MoE.Forward(ctx, moeState, routingWeights, selectedExperts, opts)
 		moeState = l.PostMoENorm.Forward(ctx, moeState, opts.eps)
 		// Combine MLP + MoE, apply outer post-FFN norm, then add residual
 		combined := mlpState.Add(ctx, moeState)
 		combined = l.PostMLPNorm.Forward(ctx, combined, opts.eps)
 		hiddenState = combined.Add(ctx, residual)
 	} else {
 		// Dense layers: MLP only
 		hiddenState = l.MLPNorm.Forward(ctx, hiddenState, opts.eps)
 		hiddenState = l.MLP.Forward(ctx, hiddenState)
 		hiddenState = l.PostMLPNorm.Forward(ctx, hiddenState, opts.eps)
 		hiddenState = hiddenState.Add(ctx, residual)
 	}
 	// PLE injection (after MLP residual)
 	if perLayerInput != nil && l.PerLayerInputGate != nil {
 		pleState := l.PerLayerInputGate.Forward(ctx, hiddenState)
 		pleState = pleState.GELU(ctx, perLayerInput)
 		pleState = l.PerLayerProjection.Forward(ctx, pleState)
 		pleState = l.PostPerLayerNorm.Forward(ctx, pleState, opts.eps)
 		hiddenState = hiddenState.Add(ctx, pleState)
 	}
 	// Layer scalar applied at end of layer (full-attention layers only)
 	if l.LayerScalar != nil {
 		hiddenState = hiddenState.Mul(ctx, l.LayerScalar)
 	}
 	return hiddenState
 }
--- a/model/models/gemma4/model_vision.go
+++ b/model/models/gemma4/model_vision.go
@@ -0,0 +1,392 @@
 package gemma4
 import (
 	"math"
 	"github.com/ollama/ollama/fs"
 	"github.com/ollama/ollama/ml"
 	"github.com/ollama/ollama/ml/nn"
 	"github.com/ollama/ollama/ml/nn/rope"
 )
 const batchSize = 1
 // ClippableLinear is a linear layer with optional input/output clamping.
 // Required by Gemma4 vision encoder for numerical stability with F16 weights.
 type ClippableLinear struct {
 	Weight ml.Tensor `gguf:"weight"`
 	InputMin  ml.Tensor `gguf:"input_min"`
 	InputMax  ml.Tensor `gguf:"input_max"`
 	OutputMin ml.Tensor `gguf:"output_min"`
 	OutputMax ml.Tensor `gguf:"output_max"`
 	inMin, inMax, outMin, outMax float32
 	hasClamp                     bool
 	clampsLoaded                 bool
 }
 func scalarValue(t ml.Tensor) (float32, bool) {
 	if t == nil {
 		return 0, false
 	}
 	data := t.BackendGet()
 	if len(data) == 0 {
 		return 0, false
 	}
 	return data[0], true
 }
 func (l *ClippableLinear) loadClampFromScalars() {
 	if l.clampsLoaded {
 		return
 	}
 	l.clampsLoaded = true
 	const (
 		defaultMin = -math.MaxFloat32
 		defaultMax = math.MaxFloat32
 	)
 	inMin, hasInMin := scalarValue(l.InputMin)
 	inMax, hasInMax := scalarValue(l.InputMax)
 	outMin, hasOutMin := scalarValue(l.OutputMin)
 	outMax, hasOutMax := scalarValue(l.OutputMax)
 	if !(hasInMin || hasInMax || hasOutMin || hasOutMax) {
 		return
 	}
 	l.hasClamp = true
 	l.inMin = defaultMin
 	l.inMax = defaultMax
 	l.outMin = defaultMin
 	l.outMax = defaultMax
 	if hasInMin {
 		l.inMin = inMin
 	}
 	if hasInMax {
 		l.inMax = inMax
 	}
 	if hasOutMin {
 		l.outMin = outMin
 	}
 	if hasOutMax {
 		l.outMax = outMax
 	}
 }
 func (l *ClippableLinear) Forward(ctx ml.Context, x ml.Tensor) ml.Tensor {
 	if l.hasClamp {
 		x = x.Clamp(ctx, l.inMin, l.inMax)
 	}
 	out := l.Weight.Mulmat(ctx, x)
 	if l.hasClamp {
 		out = out.Clamp(ctx, l.outMin, l.outMax)
 	}
 	return out
 }
 // InitClamp distributes packed clamp values from v.clamp_data to ClippableLinear structs.
 // If scalar clamp tensors (input_min/max, output_min/max) are present, they are used too.
 // Layout: numLayers × 7 linears (q,k,v,out,gate,up,down) × 4 floats (inMin,inMax,outMin,outMax)
 // then 4 floats for the projector.
 func (m *VisionModel) InitClamp(proj *MultiModalProjector) {
 	if m.clampInitDone {
 		return
 	}
 	m.clampInitDone = true
 	linears := func(l *VisionEncoderLayer) []*ClippableLinear {
 		return []*ClippableLinear{
 			l.SelfAttention.Query, l.SelfAttention.Key, l.SelfAttention.Value,
 			l.SelfAttention.Output, l.MLP.Gate, l.MLP.Up, l.MLP.Down,
 		}
 	}
 	for i := range m.Layers {
 		for _, cl := range linears(&m.Layers[i]) {
 			if cl != nil {
 				cl.loadClampFromScalars()
 			}
 		}
 	}
 	if proj != nil && proj.Projection != nil {
 		proj.Projection.loadClampFromScalars()
 	}
 	// Load packed clamp data when present (legacy Ollama format).
 	if m.ClampData == nil {
 		return
 	}
 	// Read all clamp values from packed F32 tensor
 	data := m.ClampData.BackendGet()
 	if len(data) == 0 {
 		return
 	}
 	// Distribute to layer linears: 7 per layer × 4 values each
 	for i := range m.Layers {
 		for li, cl := range linears(&m.Layers[i]) {
 			if cl == nil {
 				continue
 			}
 			idx := (i*7 + li) * 4
 			if idx+3 < len(data) {
 				cl.inMin = data[idx]
 				cl.inMax = data[idx+1]
 				cl.outMin = data[idx+2]
 				cl.outMax = data[idx+3]
 				cl.hasClamp = true
 			}
 		}
 	}
 	// Projector clamp values (last 4 floats)
 	if proj != nil && proj.Projection != nil {
 		projIdx := len(m.Layers) * 7 * 4
 		if projIdx+3 < len(data) {
 			proj.Projection.inMin = data[projIdx]
 			proj.Projection.inMax = data[projIdx+1]
 			proj.Projection.outMin = data[projIdx+2]
 			proj.Projection.outMax = data[projIdx+3]
 			proj.Projection.hasClamp = true
 		}
 	}
 }
 type VisionSelfAttention struct {
 	Query     *ClippableLinear `gguf:"attn_q"`
 	Key       *ClippableLinear `gguf:"attn_k"`
 	Value     *ClippableLinear `gguf:"attn_v"`
 	QueryNorm *nn.RMSNorm      `gguf:"attn_q_norm"`
 	KeyNorm   *nn.RMSNorm      `gguf:"attn_k_norm"`
 	Output    *ClippableLinear `gguf:"attn_out"`
 }
 func (sa *VisionSelfAttention) Forward(ctx ml.Context, hiddenState, posX, posY, attnMask ml.Tensor, opts *VisionModelOptions) ml.Tensor {
 	numPatches := hiddenState.Dim(1)
 	headDim := opts.hiddenSize / opts.numHeads
 	query := sa.Query.Forward(ctx, hiddenState)
 	key := sa.Key.Forward(ctx, hiddenState)
 	value := sa.Value.Forward(ctx, hiddenState)
 	query = query.Reshape(ctx, headDim, opts.numHeads, numPatches, batchSize)
 	key = key.Reshape(ctx, headDim, opts.numHeads, numPatches, batchSize)
 	value = value.Reshape(ctx, headDim, opts.numHeads, numPatches, batchSize)
 	// Q/K norms (Gemma-style: x * (1 + weight) / rms(x))
 	query = sa.QueryNorm.Forward(ctx, query, opts.eps)
 	key = sa.KeyNorm.Forward(ctx, key, opts.eps)
 	// V norm (RMSNorm without learned weights)
 	value = value.RMSNorm(ctx, nil, opts.eps)
 	// 2D RoPE: split head dim in half, apply NeoX RoPE with x positions to first half,
 	// y positions to second half, then concatenate.
 	halfDim := headDim / 2
 	ropeOpts := rope.WithTypeNeoX()
 	qFirst := query.View(ctx, 0, halfDim, query.Stride(1), opts.numHeads, query.Stride(2), numPatches)
 	qFirst = nn.RoPE(ctx, qFirst, posX, halfDim, opts.ropeTheta, 1.0, ropeOpts)
 	kFirst := key.View(ctx, 0, halfDim, key.Stride(1), opts.numHeads, key.Stride(2), numPatches)
 	kFirst = nn.RoPE(ctx, kFirst, posX, halfDim, opts.ropeTheta, 1.0, ropeOpts)
 	halfOffset := halfDim * query.Stride(0)
 	qSecond := query.View(ctx, halfOffset, halfDim, query.Stride(1), opts.numHeads, query.Stride(2), numPatches)
 	qSecond = nn.RoPE(ctx, qSecond, posY, halfDim, opts.ropeTheta, 1.0, ropeOpts)
 	halfOffsetK := halfDim * key.Stride(0)
 	kSecond := key.View(ctx, halfOffsetK, halfDim, key.Stride(1), opts.numHeads, key.Stride(2), numPatches)
 	kSecond = nn.RoPE(ctx, kSecond, posY, halfDim, opts.ropeTheta, 1.0, ropeOpts)
 	query = qFirst.Concat(ctx, qSecond, 0)
 	key = kFirst.Concat(ctx, kSecond, 0)
 	// Use flash attention for numerical stability (handles large attention scores
 	// from unclamped RMSNorm weights, e.g. 26B has addOne weights up to 19.5)
 	attention := nn.Attention(ctx, query, key, value, 1.0, nil)
 	attention = attention.Reshape(ctx, opts.hiddenSize, attention.Dim(2), batchSize)
 	return sa.Output.Forward(ctx, attention)
 }
 type VisionMLP struct {
 	Gate *ClippableLinear `gguf:"ffn_gate"`
 	Up   *ClippableLinear `gguf:"ffn_up"`
 	Down *ClippableLinear `gguf:"ffn_down"`
 }
 func (mlp *VisionMLP) Forward(ctx ml.Context, hiddenState ml.Tensor) ml.Tensor {
 	gate := mlp.Gate.Forward(ctx, hiddenState)
 	up := mlp.Up.Forward(ctx, hiddenState)
 	hiddenState = gate.QuickGELU(ctx, up)
 	return mlp.Down.Forward(ctx, hiddenState)
 }
 type VisionEncoderLayer struct {
 	AttentionNorm     *nn.RMSNorm `gguf:"ln1"`
 	SelfAttention     *VisionSelfAttention
 	PostAttentionNorm *nn.RMSNorm `gguf:"attn_post_norm"`
 	FFNNorm     *nn.RMSNorm `gguf:"ln2"`
 	MLP         *VisionMLP
 	PostFFNNorm *nn.RMSNorm `gguf:"ffn_post_norm"`
 	LayerOutputScale ml.Tensor `gguf:"out_scale.weight"`
 }
 func (e *VisionEncoderLayer) Forward(ctx ml.Context, hiddenState, posX, posY, attnMask ml.Tensor, opts *VisionModelOptions) ml.Tensor {
 	residual := hiddenState
 	// Pre-attention norm -> self attention -> post-attention norm
 	hiddenState = e.AttentionNorm.Forward(ctx, hiddenState, opts.eps)
 	hiddenState = e.SelfAttention.Forward(ctx, hiddenState, posX, posY, attnMask, opts)
 	hiddenState = e.PostAttentionNorm.Forward(ctx, hiddenState, opts.eps)
 	// Residual connection
 	hiddenState = hiddenState.Add(ctx, residual)
 	residual = hiddenState
 	// Pre-FFN norm -> FFN -> post-FFN norm
 	hiddenState = e.FFNNorm.Forward(ctx, hiddenState, opts.eps)
 	hiddenState = e.MLP.Forward(ctx, hiddenState)
 	hiddenState = e.PostFFNNorm.Forward(ctx, hiddenState, opts.eps)
 	// Residual connection
 	hiddenState = hiddenState.Add(ctx, residual)
 	// Per-layer output scale
 	if e.LayerOutputScale != nil {
 		hiddenState = hiddenState.Mul(ctx, e.LayerOutputScale)
 	}
 	return hiddenState
 }
 type VisionModelOptions struct {
 	hiddenSize int
 	numHeads   int
 	patchSize  int
 	nMerge     int
 	eps        float32
 	ropeTheta  float32
 }
 type VisionModel struct {
 	PatchEmbedding    *nn.Conv2D `gguf:"patch_embd"`
 	PositionEmbedding ml.Tensor  `gguf:"position_embd.weight"`
 	ClampData         ml.Tensor  `gguf:"clamp_data"`
 	StdBias           ml.Tensor  `gguf:"std_bias"`
 	StdScale          ml.Tensor  `gguf:"std_scale"`
 	Layers []VisionEncoderLayer `gguf:"blk"`
 	*VisionModelOptions
 	clampInitDone bool
 }
 func (m *VisionModel) Forward(ctx ml.Context, pixelValues ml.Tensor, numPatchesX, numPatchesY int) ml.Tensor {
 	numPatches := numPatchesX * numPatchesY
 	// Patch embedding via Conv2D
 	hiddenState := m.PatchEmbedding.Forward(ctx, pixelValues, m.patchSize, m.patchSize, 0, 0, 1, 1)
 	hiddenState = hiddenState.Reshape(ctx, numPatches, m.hiddenSize)
 	hiddenState = hiddenState.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx)
 	// Conv2D with F16 weights produces F16 output via im2col; cast to F32 for encoder precision
 	hiddenState = hiddenState.Cast(ctx, ml.DTypeF32)
 	// 2D positional embeddings from 3D tensor [nEmbd, maxPos, 2]
 	posSize := m.PositionEmbedding.Dim(1)
 	nb1 := m.PositionEmbedding.Stride(1)
 	tblX := m.PositionEmbedding.View(ctx, 0, m.hiddenSize, nb1, posSize)
 	tblY := m.PositionEmbedding.View(ctx, posSize*nb1, m.hiddenSize, nb1, posSize)
 	// Position indices for patches
 	posXData := make([]int32, numPatches)
 	posYData := make([]int32, numPatches)
 	for i := range numPatches {
 		posXData[i] = int32(i % numPatchesX)
 		posYData[i] = int32(i / numPatchesX)
 	}
 	posXEmb := ctx.Input().FromInts(posXData, numPatches)
 	posYEmb := ctx.Input().FromInts(posYData, numPatches)
 	hiddenState = hiddenState.Add(ctx, tblX.Rows(ctx, posXEmb))
 	hiddenState = hiddenState.Add(ctx, tblY.Rows(ctx, posYEmb))
 	// No attention mask — all positions are real patches
 	var attnMask ml.Tensor
 	// RoPE positions
 	posXRope := ctx.Input().FromInts(posXData, numPatches)
 	posYRope := ctx.Input().FromInts(posYData, numPatches)
 	// Vision transformer layers
 	for i := range m.Layers {
 		hiddenState = m.Layers[i].Forward(ctx, hiddenState, posXRope, posYRope, attnMask, m.VisionModelOptions)
 	}
 	return hiddenState
 }
 func newVisionModel(c fs.Config) *VisionModel {
 	return &VisionModel{
 		Layers: make([]VisionEncoderLayer, c.Uint("vision.block_count")),
 		VisionModelOptions: &VisionModelOptions{
 			hiddenSize: int(c.Uint("vision.embedding_length")),
 			numHeads:   int(c.Uint("vision.attention.head_count")),
 			patchSize:  int(c.Uint("vision.patch_size", 16)),
 			nMerge:     int(c.Uint("vision.projector.scale_factor", 3)),
 			eps:        c.Float("vision.attention.layer_norm_epsilon", 1e-6),
 			ropeTheta:  100.0,
 		},
 	}
 }
 func visionTokenCount(imageWidth, imageHeight, patchSize, nMerge int) int {
 	patchesX := imageWidth / patchSize
 	patchesY := imageHeight / patchSize
 	mergedX := patchesX / nMerge
 	mergedY := patchesY / nMerge
 	return mergedX * mergedY
 }
 func visionPoolAndProject(ctx ml.Context, hiddenState ml.Tensor, numPatchesX, numPatchesY int, opts *VisionModelOptions, proj *MultiModalProjector, stdBias, stdScale ml.Tensor) ml.Tensor {
 	hiddenSize := opts.hiddenSize
 	// Reshape from [hiddenSize, numPatches] to spatial layout for pooling
 	hiddenState = hiddenState.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx)
 	hiddenState = hiddenState.Reshape(ctx, numPatchesX, numPatchesY, hiddenSize)
 	// AvgPool2D with kernel=stride=nMerge
 	hiddenState = hiddenState.AvgPool2D(ctx, opts.nMerge, opts.nMerge, 0)
 	// Reshape back to [hiddenSize, numMergedPatches]
 	mergedX := numPatchesX / opts.nMerge
 	mergedY := numPatchesY / opts.nMerge
 	hiddenState = hiddenState.Reshape(ctx, mergedX*mergedY, hiddenSize)
 	hiddenState = hiddenState.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx)
 	hiddenState = hiddenState.Cast(ctx, ml.DTypeF32)
 	hiddenState = hiddenState.Scale(ctx, math.Sqrt(float64(hiddenSize)))
 	// Optional vision standardization before projection.
 	if stdBias != nil && stdScale != nil {
 		hiddenState = hiddenState.Sub(ctx, stdBias)
 		hiddenState = hiddenState.Mul(ctx, stdScale)
 	}
 	// Project to text embedding dimension
 	hiddenState = proj.Forward(ctx, hiddenState, opts.eps)
 	return hiddenState
 }
--- a/model/models/gemma4/process_audio.go
+++ b/model/models/gemma4/process_audio.go
@@ -0,0 +1,331 @@
 package gemma4
 import (
 	"encoding/binary"
 	"fmt"
 	"log/slog"
 	"math"
 	"math/cmplx"
 )
 // Audio preprocessing constants.
 const (
 	audioSampleRate    = 16000
 	melBins            = 128
 	frameLengthMs      = 20.0
 	hopLengthMs        = 10.0
 	minFrequency       = 0.0
 	maxFrequency       = 8000.0
 	melFloor           = 1e-3
 	maxAudioSoftTokens = 750
 	// Chunking parameters for long audio.
 	maxChunkSamples    = 28 * audioSampleRate // 28s target (headroom below 30s cap)
 	minChunkSamples    = 20 * audioSampleRate // don't scan for silence before 20s
 	silenceWindowSize  = 800                  // 50ms at 16kHz for RMS window
 )
 // Computed from the above constants.
 var (
 	frameLength = int(math.Round(audioSampleRate * frameLengthMs / 1000.0)) // 320
 	hopLength   = int(math.Round(audioSampleRate * hopLengthMs / 1000.0))   // 160
 )
 // decodeWAV extracts mono float32 PCM samples from a WAV file, resampled to 16kHz.
 func decodeWAV(data []byte) ([]float32, error) {
 	if len(data) < 12 {
 		return nil, fmt.Errorf("WAV file too short")
 	}
 	if string(data[0:4]) != "RIFF" || string(data[8:12]) != "WAVE" {
 		return nil, fmt.Errorf("not a WAV file")
 	}
 	var audioFormat uint16
 	var numChannels, sampleRate, bitsPerSample int
 	var audioData []byte
 	foundFmt := false
 	offset := 12
 	for offset+8 <= len(data) {
 		chunkID := string(data[offset : offset+4])
 		chunkSize := int(binary.LittleEndian.Uint32(data[offset+4 : offset+8]))
 		chunkData := data[offset+8 : min(offset+8+chunkSize, len(data))]
 		switch chunkID {
 		case "fmt ":
 			if len(chunkData) < 16 {
 				return nil, fmt.Errorf("fmt chunk too short")
 			}
 			audioFormat = binary.LittleEndian.Uint16(chunkData[0:2])
 			numChannels = int(binary.LittleEndian.Uint16(chunkData[2:4]))
 			sampleRate = int(binary.LittleEndian.Uint32(chunkData[4:8]))
 			bitsPerSample = int(binary.LittleEndian.Uint16(chunkData[14:16]))
 			if audioFormat == 0xFFFE && len(chunkData) >= 26 {
 				audioFormat = binary.LittleEndian.Uint16(chunkData[24:26])
 			}
 			foundFmt = true
 		case "data":
 			audioData = chunkData
 		}
 		offset += 8 + chunkSize
 		if chunkSize%2 != 0 {
 			offset++
 		}
 	}
 	if !foundFmt {
 		return nil, fmt.Errorf("no fmt chunk found in WAV file")
 	}
 	if audioFormat != 1 && audioFormat != 3 {
 		return nil, fmt.Errorf("unsupported WAV format: %d (need PCM=1 or float=3)", audioFormat)
 	}
 	if audioData == nil {
 		return nil, fmt.Errorf("no data chunk found in WAV file")
 	}
 	samples := decodeWAVSamples(audioData, audioFormat, bitsPerSample, numChannels)
 	if sampleRate != audioSampleRate {
 		samples = resampleLinear(samples, sampleRate, audioSampleRate)
 	}
 	return samples, nil
 }
 func decodeWAVSamples(data []byte, format uint16, bits, channels int) []float32 {
 	bytesPerSample := bits / 8
 	totalSamples := len(data) / (bytesPerSample * channels)
 	mono := make([]float32, totalSamples)
 	for i := range totalSamples {
 		var sum float64
 		for ch := range channels {
 			off := (i*channels + ch) * bytesPerSample
 			if off+bytesPerSample > len(data) {
 				break
 			}
 			switch {
 			case format == 1 && bits == 16:
 				v := int16(binary.LittleEndian.Uint16(data[off : off+2]))
 				sum += float64(v) / 32768.0
 			case format == 1 && bits == 32:
 				v := int32(binary.LittleEndian.Uint32(data[off : off+4]))
 				sum += float64(v) / 2147483648.0
 			case format == 1 && bits == 24:
 				v := int32(data[off]) | int32(data[off+1])<<8 | int32(data[off+2])<<16
 				if v&0x800000 != 0 {
 					v |= ^0xFFFFFF
 				}
 				sum += float64(v) / 8388608.0
 			case format == 3 && bits == 32:
 				v := math.Float32frombits(binary.LittleEndian.Uint32(data[off : off+4]))
 				sum += float64(v)
 			case format == 1 && bits == 8:
 				sum += (float64(data[off]) - 128.0) / 128.0
 			}
 		}
 		mono[i] = float32(sum / float64(channels))
 	}
 	return mono
 }
 func resampleLinear(samples []float32, fromRate, toRate int) []float32 {
 	n := int(float64(len(samples)) / float64(fromRate) * float64(toRate))
 	out := make([]float32, n)
 	for i := range n {
 		pos := float64(i) * float64(len(samples)-1) / float64(n-1)
 		idx := int(pos)
 		frac := float32(pos - float64(idx))
 		if idx+1 < len(samples) {
 			out[i] = samples[idx]*(1-frac) + samples[idx+1]*frac
 		} else {
 			out[i] = samples[idx]
 		}
 	}
 	return out
 }
 // computeMelSpectrogram computes the log mel spectrogram from PCM samples.
 // Returns shape [numFrames, melBins] as float32 slice, and numFrames.
 func computeMelSpectrogram(samples []float32) ([]float32, int) {
 	fftLen := 1
 	for fftLen < frameLength {
 		fftLen <<= 1
 	}
 	fftLen *= 2 // fft_overdrive=True
 	// Hanning-nonzero window.
 	window := make([]float64, frameLength)
 	arg := math.Pi * 2.0 / float64(frameLength)
 	for i := range frameLength {
 		window[i] = 0.5 - 0.5*math.Cos(arg*(float64(i)+0.5))
 	}
 	numFreqBins := fftLen/2 + 1
 	melFilters := buildMelFilterBank(numFreqBins, melBins, minFrequency, maxFrequency, audioSampleRate)
 	frameSizeForUnfold := frameLength + 1
 	numFrames := (len(samples) - frameSizeForUnfold) / hopLength
 	if numFrames <= 0 {
 		return nil, 0
 	}
 	result := make([]float32, numFrames*melBins)
 	fftInput := make([]complex128, fftLen)
 	for f := range numFrames {
 		start := f * hopLength
 		for i := range frameLength {
 			fftInput[i] = complex(float64(samples[start+i])*window[i], 0)
 		}
 		for i := frameLength; i < fftLen; i++ {
 			fftInput[i] = 0
 		}
 		fft(fftInput)
 		for m := range melBins {
 			var melVal float64
 			for k := range numFreqBins {
 				mag := cmplx.Abs(fftInput[k])
 				melVal += mag * float64(melFilters[k*melBins+m])
 			}
 			if melVal < melFloor {
 				melVal = melFloor
 			}
 			result[f*melBins+m] = float32(math.Log(melVal))
 		}
 	}
 	return result, numFrames
 }
 func buildMelFilterBank(numFreqBins, numMels int, fMin, fMax float64, sr int) []float32 {
 	hzToMel := func(f float64) float64 {
 		return 2595.0 * math.Log10(1.0+f/700.0)
 	}
 	melToHz := func(m float64) float64 {
 		return 700.0 * (math.Pow(10.0, m/2595.0) - 1.0)
 	}
 	melMin := hzToMel(fMin)
 	melMax := hzToMel(fMax)
 	melPts := make([]float64, numMels+2)
 	for i := range melPts {
 		melPts[i] = melMin + float64(i)*(melMax-melMin)/float64(numMels+1)
 	}
 	filterFreqs := make([]float64, numMels+2)
 	for i, m := range melPts {
 		filterFreqs[i] = melToHz(m)
 	}
 	fftFreqs := make([]float64, numFreqBins)
 	for i := range fftFreqs {
 		fftFreqs[i] = float64(i) * float64(sr) / float64(2*(numFreqBins-1))
 	}
 	filters := make([]float32, numFreqBins*numMels)
 	for m := range numMels {
 		fLeft := filterFreqs[m]
 		fCenter := filterFreqs[m+1]
 		fRight := filterFreqs[m+2]
 		for k := range numFreqBins {
 			f := fftFreqs[k]
 			var v float64
 			if f >= fLeft && f <= fCenter && fCenter > fLeft {
 				v = (f - fLeft) / (fCenter - fLeft)
 			} else if f > fCenter && f <= fRight && fRight > fCenter {
 				v = (fRight - f) / (fRight - fCenter)
 			}
 			if v > 0 {
 				filters[k*numMels+m] = float32(v)
 			}
 		}
 	}
 	return filters
 }
 // fft performs an in-place Cooley-Tukey radix-2 FFT.
 func fft(x []complex128) {
 	n := len(x)
 	if n <= 1 {
 		return
 	}
 	j := 0
 	for i := 1; i < n; i++ {
 		bit := n >> 1
 		for j&bit != 0 {
 			j ^= bit
 			bit >>= 1
 		}
 		j ^= bit
 		if i < j {
 			x[i], x[j] = x[j], x[i]
 		}
 	}
 	for size := 2; size <= n; size <<= 1 {
 		halfSize := size / 2
 		w := complex(math.Cos(2*math.Pi/float64(size)), -math.Sin(2*math.Pi/float64(size)))
 		for start := 0; start < n; start += size {
 			wn := complex(1, 0)
 			for k := range halfSize {
 				t := wn * x[start+k+halfSize]
 				x[start+k+halfSize] = x[start+k] - t
 				x[start+k] = x[start+k] + t
 				wn *= w
 			}
 		}
 	}
 }
 // splitAudioChunks splits PCM samples into chunks of at most maxChunkSamples,
 // preferring to split at low-energy (silence) regions for natural boundaries.
 func splitAudioChunks(samples []float32) [][]float32 {
 	if len(samples) <= maxChunkSamples {
 		return [][]float32{samples}
 	}
 	var chunks [][]float32
 	offset := 0
 	for offset < len(samples) {
 		remaining := len(samples) - offset
 		if remaining <= maxChunkSamples {
 			chunks = append(chunks, samples[offset:])
 			break
 		}
 		splitAt := offset + maxChunkSamples
 		bestEnergy := float64(math.MaxFloat64)
 		scanStart := offset + maxChunkSamples - silenceWindowSize
 		scanEnd := offset + minChunkSamples
 		for pos := scanStart; pos >= scanEnd; pos -= silenceWindowSize / 2 {
 			end := pos + silenceWindowSize
 			if end > len(samples) {
 				end = len(samples)
 			}
 			var sumSq float64
 			for _, s := range samples[pos:end] {
 				sumSq += float64(s) * float64(s)
 			}
 			rms := math.Sqrt(sumSq / float64(end-pos))
 			if rms < bestEnergy {
 				bestEnergy = rms
 				splitAt = pos + silenceWindowSize/2
 			}
 		}
 		chunks = append(chunks, samples[offset:splitAt])
 		offset = splitAt
 	}
 	slog.Debug("Audio chunked", "chunks", len(chunks), "total_samples", len(samples))
 	return chunks
 }
 // isAudioData checks if the data starts with WAV magic bytes.
 func isAudioData(data []byte) bool {
 	return len(data) >= 12 && string(data[0:4]) == "RIFF" && string(data[8:12]) == "WAVE"
 }
--- a/model/models/gemma4/process_image.go
+++ b/model/models/gemma4/process_image.go
@@ -0,0 +1,103 @@
 package gemma4
 import (
 	"image"
 	"math"
 	"golang.org/x/image/draw"
 	"github.com/ollama/ollama/fs"
 )
 type ImageProcessor struct {
 	patchSize   int
 	numChannels int
 	nMerge      int
 	minPixels   int
 	maxPixels   int
 }
 func newImageProcessor(c fs.Config) ImageProcessor {
 	patchSize := int(c.Uint("vision.patch_size", 16))
 	nMerge := int(c.Uint("vision.projector.scale_factor", 3))
 	numChannels := int(c.Uint("vision.num_channels", 3))
 	// Token limits from reference: min=40, max=280 output tokens after pooling.
 	// Convert to pixel counts: tokens * nMerge^2 * patchSize^2
 	minTokens := 40
 	maxTokens := 280
 	patchArea := patchSize * patchSize * nMerge * nMerge
 	minPixels := minTokens * patchArea
 	maxPixels := maxTokens * patchArea
 	return ImageProcessor{
 		patchSize:   patchSize,
 		numChannels: numChannels,
 		nMerge:      nMerge,
 		minPixels:   minPixels,
 		maxPixels:   maxPixels,
 	}
 }
 // ProcessImage resizes an image preserving aspect ratio, aligning dimensions
 // to (patchSize * nMerge) boundaries, and normalizes pixels to [-1, 1].
 // Returns the float32 pixel data and the actual output dimensions.
 func (p *ImageProcessor) ProcessImage(img image.Image) ([]float32, int, int, error) {
 	// Compute target size preserving aspect ratio
 	alignSize := p.patchSize * p.nMerge
 	targetW, targetH := p.smartResize(img.Bounds().Dx(), img.Bounds().Dy(), alignSize)
 	// Resize directly without alpha compositing, matching MLX reference.
 	dst := image.NewRGBA(image.Rect(0, 0, targetW, targetH))
 	draw.BiLinear.Scale(dst, dst.Bounds(), img, img.Bounds(), draw.Over, nil)
 	// Normalize to [-1, 1] using mean=0.5, std=0.5: (pixel/255 - 0.5) / 0.5 = 2*pixel/255 - 1
 	data := p.pack(dst)
 	return data, targetW, targetH, nil
 }
 // smartResize computes target dimensions that preserve aspect ratio and
 // align to alignSize boundaries. It scales the image to fill the maximum
 // patch budget (maxPixels), matching the MLX reference.
 func (p *ImageProcessor) smartResize(origW, origH, alignSize int) (int, int) {
 	totalPx := origW * origH
 	var targetW, targetH int
 	if p.maxPixels > 0 && totalPx > 0 {
 		factor := math.Sqrt(float64(p.maxPixels) / float64(totalPx))
 		targetH = max(alignSize, int(math.Floor(factor*float64(origH)/float64(alignSize)))*alignSize)
 		targetW = max(alignSize, int(math.Floor(factor*float64(origW)/float64(alignSize)))*alignSize)
 	} else {
 		targetH = max(alignSize, (origH/alignSize)*alignSize)
 		targetW = max(alignSize, (origW/alignSize)*alignSize)
 	}
 	return targetW, targetH
 }
 // pack extracts RGB values from an image and normalizes to [-1, 1].
 // Returns channel-first layout: [R..., G..., B...].
 func (p *ImageProcessor) pack(img image.Image) []float32 {
 	bounds := img.Bounds()
 	w := bounds.Dx()
 	h := bounds.Dy()
 	size := w * h
 	pixelVals := make([]float32, 3*size)
 	rOff, gOff, bOff := 0, size, 2*size
 	for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
 		for x := bounds.Min.X; x < bounds.Max.X; x++ {
 			c := img.At(x, y)
 			r, g, b, _ := c.RGBA()
 			idx := (y-bounds.Min.Y)*w + (x - bounds.Min.X)
 			// Normalize [0, 255] -> [-1, 1]: 2 * (val/255) - 1
 			pixelVals[rOff+idx] = float32(r>>8)/255.0*2.0 - 1.0
 			pixelVals[gOff+idx] = float32(g>>8)/255.0*2.0 - 1.0
 			pixelVals[bOff+idx] = float32(b>>8)/255.0*2.0 - 1.0
 		}
 	}
 	return pixelVals
 }
--- a/model/models/gemma4/tokenizer_compare_test.go
+++ b/model/models/gemma4/tokenizer_compare_test.go
@@ -0,0 +1,102 @@
 package gemma4
 import (
 	"os"
 	"testing"
 	"github.com/ollama/ollama/ml"
 	"github.com/ollama/ollama/model"
 	"github.com/ollama/ollama/tokenizer"
 )
 // TestTokenizerMatchesHF compares our tokenizer output against HuggingFace reference tokens.
 func TestTokenizerMatchesHF(t *testing.T) {
 	modelPath := os.Getenv("GEMMA4_MODEL_PATH")
 	if modelPath == "" {
 		t.Skip("set GEMMA4_MODEL_PATH to a gemma4 GGUF file")
 	}
 	m, err := model.New(modelPath, ml.BackendParams{AllocMemory: true})
 	if err != nil {
 		t.Fatalf("Failed to load model: %v", err)
 	}
 	defer m.Backend().Close()
 	tok := m.(tokenizer.Tokenizer)
 	tests := []struct {
 		name     string
 		input    string
 		expected []int32
 	}{
 		{
 			name:     "simple",
 			input:    "Hello, world!",
 			expected: []int32{9259, 236764, 1902, 236888},
 		},
 		{
 			name:     "special_tokens",
 			input:    "<|turn>user\nWhat is 2+2?<turn|>\n<|turn>model\n",
 			expected: []int32{105, 2364, 107, 3689, 563, 236743, 236778, 236862, 236778, 236881, 106, 107, 105, 4368, 107},
 		},
 		{
 			name:     "tool_declaration",
 			input:    "<|tool>declaration:bash{description:<|\"|>Run a command<|\"|>}<tool|>",
 			expected: []int32{46, 163688, 236787, 42422, 236782, 7777, 236787, 52, 7306, 496, 4991, 52, 236783, 47},
 		},
 		{
 			name:     "tool_call",
 			input:    "<|tool_call>call:bash{command:<|\"|>ls -la<|\"|>}<tool_call|>",
 			expected: []int32{48, 6639, 236787, 42422, 236782, 7674, 236787, 52, 5629, 753, 2149, 52, 236783, 49},
 		},
 		{
 			name:     "thinking",
 			input:    "<|channel>thought\nLet me think about this...<channel|>The answer is 42.",
 			expected: []int32{100, 45518, 107, 6481, 786, 1751, 1003, 672, 1390, 101, 818, 3890, 563, 236743, 236812, 236778, 236761},
 		},
 		{
 			name:     "code",
 			input:    "func main() { fmt.Println(\"hello\") }",
 			expected: []int32{6823, 1689, 825, 642, 22766, 236761, 29006, 885, 23391, 1373, 682},
 		},
 		{
 			name:     "numbers",
 			input:    "The answer is 42, not 43.5 or -1",
 			expected: []int32{818, 3890, 563, 236743, 236812, 236778, 236764, 711, 236743, 236812, 236800, 236761, 236810, 653, 753, 236770},
 		},
 		{
 			name:     "mixed_chat_with_tools",
 			input:    "<|turn>system\nYou are a helpful assistant.\n<|tool>declaration:get_weather{description:<|\"|>Get weather<|\"|>,parameters:{properties:{city:{type:<|\"|>STRING<|\"|>}},type:<|\"|>OBJECT<|\"|>}}<tool|><turn|>\n<|turn>user\nWhat's the weather in Paris?<turn|>\n<|turn>model\n<|channel>thought\n<channel|>",
 			expected: []int32{105, 9731, 107, 3048, 659, 496, 11045, 16326, 236761, 107, 46, 163688, 236787, 828, 236779, 19323, 236782, 7777, 236787, 52, 3407, 7606, 52, 236764, 19031, 29616, 15921, 29616, 13319, 29616, 2084, 236787, 52, 35410, 52, 5237, 2084, 236787, 52, 60688, 52, 1807, 47, 106, 107, 105, 2364, 107, 3689, 236789, 236751, 506, 7606, 528, 9079, 236881, 106, 107, 105, 4368, 107, 100, 45518, 107, 101},
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			tokens, err := tok.Encode(tt.input, false) // no BOS
 			if err != nil {
 				t.Fatalf("encode error: %v", err)
 			}
 			if len(tokens) != len(tt.expected) {
 				t.Errorf("token count mismatch: got %d, want %d", len(tokens), len(tt.expected))
 				t.Logf("got:  %v", tokens)
 				t.Logf("want: %v", tt.expected)
 				return
 			}
 			mismatches := 0
 			for i := range tokens {
 				if tokens[i] != tt.expected[i] {
 					mismatches++
 					if mismatches <= 5 {
 						t.Errorf("mismatch at [%d]: got %d, want %d", i, tokens[i], tt.expected[i])
 					}
 				}
 			}
 			if mismatches > 5 {
 				t.Errorf("... and %d more mismatches", mismatches-5)
 			}
 		})
 	}
 }
--- a/model/models/models.go
+++ b/model/models/models.go
@@ -7,6 +7,7 @@ import (
 	_ "github.com/ollama/ollama/model/models/gemma2"
 	_ "github.com/ollama/ollama/model/models/gemma3"
 	_ "github.com/ollama/ollama/model/models/gemma3n"
 	_ "github.com/ollama/ollama/model/models/gemma4"
 	_ "github.com/ollama/ollama/model/models/glm4moelite"
 	_ "github.com/ollama/ollama/model/models/glmocr"
 	_ "github.com/ollama/ollama/model/models/gptoss"
--- a/model/models/qwen3next/deltanet.go
+++ b/model/models/qwen3next/deltanet.go
@@ -34,9 +34,9 @@ type Masks struct {
 // GatedDeltaNet implements linear attention with SSM convolution and recurrent state.
 // It implements the Operator interface directly.
 type GatedDeltaNet struct {
 	// Optimized path: pre-split QKV and gate
 	SSMQKV       *nn.Linear  `gguf:"attn_qkv"`  // -> Q, K, V (concatenated)
 	SSMQKVGate   *nn.Linear  `gguf:"attn_gate"` // -> Z gate
 	SSMIn        *nn.Linear  `gguf:"ssm_in"`
 	SSMBetaAlpha *nn.Linear  `gguf:"ssm_ba"`    // -> beta, alpha (legacy qwen3next)
 	SSMBeta      *nn.Linear  `gguf:"ssm_beta"`  // -> beta (qwen35)
 	SSMAlpha     *nn.Linear  `gguf:"ssm_alpha"` // -> alpha (qwen35)
@@ -100,12 +100,27 @@ func (gdn *GatedDeltaNet) Forward(ctx ml.Context, hiddenStates, _ ml.Tensor, cac
 	qkvDim := headKDim*numKHeads*2 + headVDim*numVHeads
-	if gdn.SSMQKV == nil || gdn.SSMQKVGate == nil {
+	// Support both current split projections and older qwen3-next imports that use ssm_in.
-		return nil, errors.New("qwen3next: missing attn_qkv/attn_gate projections (legacy ssm_in is not supported)")
+	var qkvMixed, z ml.Tensor
 	switch {
 	case gdn.SSMQKV != nil && gdn.SSMQKVGate != nil:
 		qkvMixed = gdn.SSMQKV.Forward(ctx, hiddenStates).Reshape(ctx, qkvDim, nSeqTokens, nSeqs)
 		z = gdn.SSMQKVGate.Forward(ctx, hiddenStates)
 	case gdn.SSMIn != nil:
 		vPerHead := headVDim * numVHeads / numKHeads
 		qkvzDim := 2*headKDim + 2*vPerHead
 		combined := gdn.SSMIn.Forward(ctx, hiddenStates).Reshape(ctx, qkvzDim, numKHeads, nSeqTokens, nSeqs)
 		qPart := combined.Slice(ctx, 0, 0, headKDim, 1).Contiguous(ctx, headKDim*numKHeads, nSeqTokens, nSeqs)
 		kPart := combined.Slice(ctx, 0, headKDim, 2*headKDim, 1).Contiguous(ctx, headKDim*numKHeads, nSeqTokens, nSeqs)
 		vPart := combined.Slice(ctx, 0, 2*headKDim, 2*headKDim+vPerHead, 1).Contiguous(ctx, headVDim*numVHeads, nSeqTokens, nSeqs)
 		zPart := combined.Slice(ctx, 0, 2*headKDim+vPerHead, qkvzDim, 1).Contiguous(ctx, headVDim*numVHeads, nSeqTokens, nSeqs)
 		qkvMixed = qPart.Concat(ctx, kPart, 0).Concat(ctx, vPart, 0)
 		z = zPart
 	default:
 		return nil, errors.New("qwen3next: missing attn_qkv/attn_gate or ssm_in projections")
 	}
 	// Optimized path: pre-split QKV and gate
 	qkvMixed := gdn.SSMQKV.Forward(ctx, hiddenStates).Reshape(ctx, qkvDim, nSeqTokens, nSeqs)
 	z := gdn.SSMQKVGate.Forward(ctx, hiddenStates)
 	var beta ml.Tensor
 	var alpha ml.Tensor
--- a/model/models/qwen3next/model.go
+++ b/model/models/qwen3next/model.go
@@ -454,7 +454,7 @@ func (m *Model) Validate() error {
 		if !ok || gdn == nil {
 			return fmt.Errorf("qwen3next: layer %d expected recurrent operator", i)
 		}
-		if gdn.SSMQKV == nil || gdn.SSMQKVGate == nil {
+		if gdn.SSMIn == nil && (gdn.SSMQKV == nil || gdn.SSMQKVGate == nil) {
 			return fmt.Errorf("qwen3next: layer %d missing attn_qkv/attn_gate projections", i)
 		}
 		if gdn.SSMBetaAlpha == nil && (gdn.SSMBeta == nil || gdn.SSMAlpha == nil) {
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Daniel Hiltgen	8d846fdbc0	Merge pull request #42 from ollama/jmorganca/gemma4-ggml-improvements gemma4: fix MoE fused gate_up split and multiline tool-call arg parsing	2026-04-02 07:16:06 -07:00
jmorganca	f3536a356e	use 4096 kvcache.NewSWAMemCache	2026-04-02 01:56:36 -07:00
jmorganca	c89280fb0c	Format Gemma4 MoE block field alignment	2026-04-02 01:43:14 -07:00
jmorganca	eb5434d7fb	Remove redundant comments in gemma4 vision model	2026-04-02 01:33:46 -07:00
jmorganca	2b949a11d9	convert: align gemma4 audio tensor renames with llama.cpp	2026-04-02 01:28:10 -07:00
jmorganca	6b013002fc	gemma4: initialize clamps after backend load	2026-04-02 01:12:05 -07:00
jmorganca	5e622289c5	gemma4: use full SWA memory for better cache reuse	2026-04-02 00:40:32 -07:00
jmorganca	9c8bcecdb2	cmd: simplify audio input to dropped file attachments	2026-04-02 00:40:27 -07:00
jmorganca	1cbe7950d6	gemma4: fix MoE fused gate_up split and multiline tool-call arg parsing - Text MoE: split `ffn_gate_up_exps` into contiguous `[gate\|up]` halves instead of stride-2 slices. - Parser: escape control characters in `<\|"\|>...<\|"\|>` string literals when converting tool-call args to JSON. - Fixes warnings like `invalid character '\n' in string literal` for multiline tool arguments. - Add Gemma4 parser regressions for multiline tool-call args and `gemma4ArgsToJSON`.	2026-04-02 00:01:44 -07:00
Daniel Hiltgen	95073400fc	gemma4: rewrite renderer to match HF Jinja2 template exactly Fix 8 bugs found by building 55 reference tests verified against the HF Jinja2 chat template (VERIFY_JINJA2=1 shells out to Python): - Tool responses use separate <\|turn>tool turns (not inline tags) - Tool calls emitted before content in assistant messages - Thinking content stripped from assistant history (strip_thinking) - User, tool, and system content trimmed (template does \| trim) - Empty system message still emits system turn (check role, not content) - Nested object properties rendered recursively with required field - Array items specification rendered for array-type properties - OBJECT/ARRAY type-specific rendering comma logic matches template Also adds Required field to api.ToolProperty for nested object schemas, replaces old gemma4_test.go with comprehensive gemma4_reference_test.go, and commits the Jinja2 template as testdata for verification.	2026-04-01 22:36:30 -07:00
Daniel Hiltgen	c29932c631	gemma4: add per_expert_scale to MoE router and fix moe_intermediate_size config	2026-04-01 17:34:20 -07:00
Daniel Hiltgen	1ce101c9a0	gemma4: update converter for new weight drop naming	2026-04-01 15:44:47 -07:00
Daniel Hiltgen	5a7928ed38	gemma4: add OpenAI audio transcription API and input_audio support	2026-04-01 15:34:05 -07:00
Daniel Hiltgen	7fdc051091	gemma4: fix renderer to emit BOS token	2026-04-01 15:33:45 -07:00
Daniel Hiltgen	5bad871241	gemma4: add parser, renderer, and integration test plumbing	2026-04-01 15:31:08 -07:00
Daniel Hiltgen	82437d620a	gemma4: add transcribe command (ollama transcribe MODEL) - Interactive mode with readline prompt and slash commands - Non-interactive mode for piped audio or record-until-Ctrl+C - Chunked streaming transcription for long recordings - Word-wrapped output matching run command style	2026-04-01 15:24:59 -07:00
Daniel Hiltgen	570c53859d	gemma4: add audio input support for run command - /audio toggle in interactive mode for voice chat - Platform-specific microphone recording (AVFoundation on macOS, PulseAudio/ALSA on Linux, WASAPI on Windows) - Space to start/stop recording, automatic chunking for long audio	2026-04-01 15:24:50 -07:00
Daniel Hiltgen	ebd70f73b7	gemma4: add OpenAI audio API support and capability detection - Add CapabilityAudio and detect from audio.block_count in GGUF - Add /v1/audio/transcriptions endpoint with TranscriptionMiddleware - Add input_audio content type support in /v1/chat/completions - Add TranscriptionRequest/Response types in openai package	2026-04-01 15:24:28 -07:00
Daniel Hiltgen	eb5df80733	integration: add gemma4 audio tests including OpenAI API coverage Test audio transcription and response via the Ollama native API, plus two new tests exercising the OpenAI-compatible endpoints: - /v1/audio/transcriptions (multipart form upload) - /v1/chat/completions with input_audio content type All tests use capability checks and skip models without audio support.	2026-04-01 15:24:22 -07:00
Daniel Hiltgen	356c0b8e34	gemma4: add audio support with USM conformer encoder Add audio encoding for Gemma 4 using the USM conformer architecture: - Converter: audio tensor mapping, SSCP/conformer/embedder name replacements, softplus repacker for per_dim_scale, F32 enforcement for conv weights - GGML backend: Conv1DDW and PadExt tensor ops - Audio encoder: SSCP Conv2D, 12 conformer blocks (FFW + block-local attention with relative position embeddings + LightConv1d + FFW), output projection, audio-to-text embedding projector - Audio preprocessing: WAV decode, mel spectrogram, FFT (pure Go) - Model wiring: WAV detection, audio token handling, unified PostTokenize Correctly transcribes "why is the sky blue" from test audio.	2026-04-01 15:24:17 -07:00
Daniel Hiltgen	ea3c6a3cbe	gemma4: add Gemma 4 GGML model support Add full Gemma 4 model family support (E2B, E4B, 26B MoE, 31B Dense) for the GGML backend including text, vision, converter, parser, and renderer. Text model features: - Sliding window + full attention with per-layer patterns - KV sharing across layers with donor map - Per-layer embeddings (PLE) with learned projections - MoE routing with RMSNorm + learned scale - Proportional RoPE with freq_factors for global attention - Final logit softcapping Vision model features: - SigLIP vision encoder with 2D RoPE - ClippableLinear with input/output clamping via packed v.clamp_data - Adaptive average pooling with nMerge kernel - Multi-modal projection with unweighted RMSNorm Converter: - Safetensors to GGUF with vision tensor renaming - Fused MoE gate_up_proj splitting - Vision patch embedding reshape (HF to Conv2D layout) - Packed clamp data tensor for ClippableLinear bounds - Proportional RoPE freq_factors generation Also includes: - BackendGet() on ml.Tensor for reading weight tensor data - Q6_K CUDA get_rows kernel support - MoE-aware ffn_down quantization layer counting - Gemma4 parser with tool calling and thinking support - Gemma4 renderer with structured tool format - Architecture-based auto-detection of renderer/parser/stop tokens - Integration test gemma4 model list additions	2026-04-01 15:23:10 -07:00
Daniel Hiltgen	f6b69f3f28	integration: improve vision test robustness and add thinking tests Add skipIfNoVisionOverride() to skip vision tests when OLLAMA_TEST_MODEL is set to a non-vision model. Add Think:false to context exhaustion test to prevent thinking models from using all context before the test can measure it. Add third test image (ollama homepage) and replace OCR test with ImageDescription test using it. Relax match strings for broader model compatibility. Add TestThinkingEnabled and TestThinkingSuppressed to verify thinking output and channel tag handling.	2026-04-01 15:20:43 -07:00
Daniel Hiltgen	e38b606e8b	bench: add prompt calibration, context size flag, and NumCtx reporting Add --num-ctx flag to set context size, and report NumCtx in model info header. Calibrate tokens-per-word ratio during warmup using actual tokenization metrics from the model, replacing the fixed 1.3 heuristic. This produces more accurate prompt token counts for --prompt-tokens. Also add fetchContextLength() to query running model context via /api/ps.	2026-04-01 15:20:37 -07:00
Daniel Hiltgen	cb0033598e	tokenizer: add SentencePiece-style BPE support (#15162 ) * tokenizer: add SentencePiece-style BPE support Add WithSentencePieceNormalizer option to BytePairEncoding for models that use BPE with SentencePiece-style space markers (space to/from U+2581). NewBytePairEncoding is unchanged; the new NewBytePairEncodingWithOptions constructor accepts BPEOption functions. Decoding handles the reverse mapping of U+2581 back to spaces. * review comments	2026-03-31 17:00:36 -07:00
Daniel Hiltgen	4d14b0ff92	mlx: respect tokenizer add_bos_token setting in pipeline (#15185 ) Replace hardcoded Encode(prompt, true) with Encode(prompt, r.Tokenizer.AddBOS()) so the pipeline respects each model's tokenizer configuration. Models with add_bos_token=true (gemma3, llama): unchanged, tokenizer still prepends BOS. Models with bos_token=null (qwen3, qwen3.5): unchanged, the BOS guard (vocab.BOS >= 0) already prevented prepending regardless of the flag. This aligns the pipeline with the /v1/tokenize endpoint which already uses Tokenizer.AddBOS().	2026-03-31 16:46:30 -07:00
Parth Sareen	d9cb70c270	docs: update pi docs (#15152 )	2026-03-31 16:37:55 -07:00
Jeffrey Morgan	31f968fe1f	cmd: set OpenCode default model in config (#15127 )	2026-03-29 12:11:36 -07:00
Jeffrey Morgan	b7bda92d52	model: add qwen3-next compatibility for legacy ssm_in projections (#15133 )	2026-03-29 11:50:47 -07:00
Parth Sareen	8e54823fd3	revert context length warnings change (#15121 )	2026-03-28 16:43:59 -07:00
Parth Sareen	7c8da5679e	launch: improve multi-select for already added models (#15113 )	2026-03-28 13:44:40 -07:00
Parth Sareen	6214103e66	launch: auto-install pi and manage web-search lifecycle (#15118 )	2026-03-28 13:06:20 -07:00
Patrick Devine	9e7cb9697e	mlx: fix vision capability + min version (#15106 )	2026-03-27 17:09:28 -07:00
Bruce MacDonald	3824e380a8	server: preserve raw manifest bytes during pull (#15104 ) pullModelManifest unmarshals the registry response into a Go struct then re-marshals with json.Marshal before writing to disk. When the registry's JSON formatting or field ordering differs from Go's output, the local SHA256 won't match the registry's Ollama-Content-Digest header, causing false "out of date" warnings. Preserve the raw bytes from the registry response and write them directly to disk so the local manifest is byte-for-byte identical to what the registry serves.	2026-03-27 15:42:31 -07:00
Devon Rifkin	c9b2dcfc52	anthropic: fix empty inputs in content blocks (#15105 ) * anthropic: fix empty inputs in content blocks When we switched to `api.ToolCallFunctionArguments`, `omitempty` stopped doing what we were relying on it for before. This would cause non-tool content blocks to have an `"input": {}` field, which doesn't match our old behavior. * use omitzero instead	2026-03-27 15:41:27 -07:00
Parth Sareen	b00bd1dfd4	launch: skip context length warning for MLX models and show model name (#15102 )	2026-03-27 15:01:33 -07:00
Jesse Gross	ac83ac20c4	anthropic: fix KV cache reuse degraded by tool call argument reordering Use typed structs for tool call arguments instead of map[string]any to preserve JSON key order, which Go maps do not guarantee.	2026-03-27 14:30:16 -07:00
Bruce MacDonald	e7ccc129ea	app: fix false "out of date" model warnings (#15101 ) The staleness check compared the local manifest digest (SHA256 of the file on disk) against the registry's Ollama-Content-Digest header. These never matched because PullModel re-serializes the manifest JSON before writing, producing different bytes than the registry's original. The fallback comparison (local modified_at vs upstream push time) was also broken: the generated TypeScript Time class discards the actual timestamp value, so Date parsing always produced NaN. Fix by moving the staleness comparison server-side where we have reliable access to both the local manifest file mtime and the upstream push time. The /api/v1/model/upstream endpoint now returns a simple `stale` boolean instead of raw digests for the frontend to compare. Also adds User-Agent to the CORS allowed headers for dev mode.	2026-03-27 14:15:10 -07:00
Jeffrey Morgan	69ed0c2729	parsers: qwen3.5 streaming tool-call parsing and add regression test (#15098 )	2026-03-27 14:04:14 -07:00
Alfredo Matas	1cefa749aa	model/parsers: close think block if tool block starts in Qwen3.5 (#15022 )	2026-03-27 11:28:34 -07:00
Daniel Hiltgen	aec2fef95d	ci: harden cuda include path handling (#15093 ) On windows we can get multiple include dirs, so find where the headers are then copy from that location.	2026-03-27 07:57:07 -07:00
Eva H	366625a831	launch: warn when server context length is below 64k for local models (#15044 ) A stop-gap for now to guide users better. We'll add more in-depth recommendations per integration as well. --------- Co-authored-by: Parth Sareen <parth.sareen@ollama.com>	2026-03-27 00:15:53 -07:00
Daniel Hiltgen	516ebd8548	ci: include mlx jit headers on linux (#15083 ) * ci: include mlx jit headers on linux * handle CUDA JIT headers	2026-03-26 23:10:07 -07:00
Parth Sareen	f567abc63f	tui: update chat title (#15082 )	2026-03-26 18:06:53 -07:00
Eva H	1adfc27f04	launch/vscode: prefer known vs code paths over `code` on PATH (#15073 )	2026-03-26 18:06:28 -04:00
Parth Sareen	4a2b9f9dbc	launch: hide cline integration (#15080 )	2026-03-26 14:33:43 -07:00
Parth Sareen	e46b67a6cc	launch: hide vs code (#15076 )	2026-03-26 13:52:50 -07:00
Eva H	c000afe76c	doc: update vscode doc (#15064 ) --------- Co-authored-by: ParthSareen <parth.sareen@ollama.com>	2026-03-26 13:45:48 -07:00
Jesse Gross	9d7b18f81e	mlxrunner: combine setStateRaw and setStateDetached into setState	2026-03-26 13:32:11 -07:00
Jesse Gross	4f5999fd3f	mlxrunner: schedule periodic snapshots during prefill Add periodic snapshots every 8k tokens and near the end of the prompt so that long prompts can be partially restored and thinking/generation can be retried without full reprocessing.	2026-03-26 13:32:11 -07:00
Jesse Gross	ac5f0dbb6a	mlxrunner: improve eviction and LRU tracking Update LRU last used time just on the nodes that actually used during processing rather than all snapshots along the path. This allows eviction to remove nodes more accurately so we can avoid other heuristics to auto-merge nodes.	2026-03-26 13:32:11 -07:00
Jesse Gross	d1151e18a1	mlx: fix KV cache snapshot memory leak mlx.Copy shares the backing buffer with its source (via copy_shared_buffer) rather than allocating independent storage. When used to snapshot a slice of the KV cache, the snapshot array holds the entire original cache buffer alive through the shared data pointer — even after eval detaches the computation graph. Replace Copy with Contiguous in Snapshot and Split. Contiguous allocates a compact buffer when the source buffer is significantly larger than the logical slice (Contiguous::eval checks buffer_size > nbytes + 16384), which is always the case for KV cache slices.	2026-03-25 17:26:34 -07:00
rick	ebbce136c7	ggml: force flash attention off for grok	2026-03-25 16:15:49 -07:00
Devon Rifkin	26b9f53f8e	api/show: overwrite basename for copilot chat (#15062 ) Copilot Chat prefers to use `general.basename` in the built-in Ollama integration, but this name isn't usually shown directly to users (and there may be many models that share this name). Instead we pass back `req.Model`, which for this extension is the value that we return from `/api/tags`	2026-03-25 14:02:22 -07:00
Eva H	7575438366	cmd: ollama launch vscode (#15060 ) Co-authored-by: Parth Sareen <parth.sareen@ollama.com>	2026-03-25 16:37:02 -04:00
Eva H	7d7c90d702	tui: add left arrow back navigation in model selector (#14940 )	2026-03-25 11:53:48 -07:00
Daniel Hiltgen	4fda69809a	ci: fix windows cgo compiler error (#15046 )	2026-03-24 16:45:36 -07:00
Daniel Hiltgen	c9b5da6b0c	integration: improve ability to test individual models (#14948 ) * integration: improve ability to test individual models Add OLLAMA_TEST_MODEL env var to run integration tests against a single model. Enhance vision tests: multi-turn chat with cached image tokens, object counting, spatial reasoning, detail recognition, scene understanding, OCR, and multi-image comparison. Add tool calling stress tests with complex agent-style prompts, large system messages, and multi-turn tool response handling. * review comments	2026-03-24 14:28:23 -07:00
Patrick Devine	de5cb7311f	mlx: add mxfp4/mxfp8/nvfp4 importing (#15015 ) This change allows importing bf16 and converting to mxfp4/mxfp8/nvfp4 and also importing fp8 and converting directly to mxfp8.	2026-03-24 13:45:44 -07:00
Jesse Gross	95ee7fbd29	mlxrunner: panic on double unpin	2026-03-23 17:44:19 -07:00
Jesse Gross	ec55536734	mlxrunner: show time since last used in cache dump tree	2026-03-23 17:44:19 -07:00
Jesse Gross	77491439c2	mlxrunner: support partial match on pure transformer caches Previously, a partial match within a node's edge would truncate the path to the parent snapshot - effectively making all cache types behave as recurrent caches. Caches with only transformer layers can rewind to arbitrary boundary so this restores this capability to improve cache hits	2026-03-23 17:44:19 -07:00
Parth Sareen	b166b36cd2	docs: update Claude Code with Telegram guide (#15026 )	2026-03-23 16:31:21 -07:00
Daniel Hiltgen	c2b0bb7a52	mlx: update as of 3/23 (#14789 ) * mlx: update to HEAD on 3/23 Also fixes a few misc vendoring bugs uncovered with this first update. This also renames the version files to make them clearer. * CUDA Fast Gated Delta kernel * mlx: detect eval errors and panic On model errors or missing kernels, don't mask the error, bubble it up.	2026-03-23 11:28:44 -07:00
`@@ -1 +1 @@`
	`v0.5.0`	`38ad257088fb2193ad47e527cf6534a689f30943`