gemma4: add parser, renderer, and integration test plumbing

2026-04-17 21:54:08 +02:00 · 2026-04-01 15:31:08 -07:00
parent 82437d620a
commit 5bad871241
13 changed files with 1371 additions and 8 deletions
--- a/integration/llm_image_test.go
+++ b/integration/llm_image_test.go
@@ -15,6 +15,7 @@ func TestVisionModels(t *testing.T) {
 	skipUnderMinVRAM(t, 6)

 	defaultVisionModels := []string{
+		"gemma4",
 		"qwen2.5vl",
 		"llama3.2-vision",
 		"gemma3",
--- a/integration/tools_test.go
+++ b/integration/tools_test.go
@@ -30,6 +30,7 @@ func TestAPIToolCalling(t *testing.T) {
 	defer cleanup()

 	minVRAM := map[string]uint64{
+		"gemma4":        8,
 		"qwen3-vl":      16,
 		"gpt-oss:20b":   16,
 		"gpt-oss:120b":  70,
--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -45,6 +45,7 @@ var (

 	// Note: add newer models at the top of the list to test them first
 	ollamaEngineChatModels = []string{
+		"gemma4",
 		"lfm2.5-thinking",
 		"ministral-3",
 		"qwen3-coder:30b",
@@ -137,6 +138,7 @@ var (
 		"gemma2",
 		"gemma3",
 		"gemma3n",
+		"gemma4",
 		"glm4",
 		"goliath",
 		"gpt-oss:20b",
@@ -272,6 +274,7 @@ var (
 		"snowflake-arctic-embed2",
 	}
 	libraryToolsModels = []string{
+		"gemma4",
 		"lfm2.5-thinking",
 		"qwen3-vl",
 		"gpt-oss:20b",
--- a/integration/vision_test.go
+++ b/integration/vision_test.go
@@ -16,6 +16,7 @@ import (
 // Default set of vision models to test. When OLLAMA_TEST_MODEL is set,
 // only that model is tested (with a capability check for vision).
 var defaultVisionModels = []string{
+	"gemma4",
 	"gemma3",
 	"llama3.2-vision",
 	"qwen2.5vl",
--- a/model/parsers/gemma4.go
+++ b/model/parsers/gemma4.go
@@ -0,0 +1,399 @@
+package parsers
+
+import (
+	"encoding/json"
+	"errors"
+	"log/slog"
+	"strings"
+	"unicode"
+
+	"github.com/ollama/ollama/api"
+)
+
+type Gemma4ParserState int
+
+const (
+	Gemma4CollectingContent Gemma4ParserState = iota
+	Gemma4CollectingThinking
+	Gemma4CollectingToolCall
+)
+
+const (
+	gemma4ThinkingOpenTag  = "<|channel>"
+	gemma4ThinkingCloseTag = "<channel|>"
+	gemma4ToolCallOpenTag  = "<|tool_call>"
+	gemma4ToolCallCloseTag = "<tool_call|>"
+)
+
+type Gemma4Parser struct {
+	state                 Gemma4ParserState
+	buffer                strings.Builder
+	hasThinkingSupport    bool
+	thinkingEnabled       bool // true when both model supports and user requested thinking
+	needsChannelNameStrip bool // true when we just entered thinking and need to strip "thought\n"
+}
+
+func (p *Gemma4Parser) HasToolSupport() bool {
+	return true
+}
+
+func (p *Gemma4Parser) HasThinkingSupport() bool {
+	return p.hasThinkingSupport
+}
+
+func (p *Gemma4Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
+	prefill := lastMessage != nil && lastMessage.Role == "assistant"
+
+	p.thinkingEnabled = p.HasThinkingSupport() && (thinkValue != nil && thinkValue.Bool())
+
+	if !p.thinkingEnabled {
+		p.state = Gemma4CollectingContent
+		return tools
+	}
+
+	if prefill && lastMessage.Content != "" {
+		p.state = Gemma4CollectingContent
+		return tools
+	}
+
+	// When thinking is enabled, start in content mode but we'll switch to
+	// thinking when we see <|channel>. The model typically starts with
+	// <|channel> immediately when thinking is enabled.
+	p.state = Gemma4CollectingContent
+	return tools
+}
+
+type gemma4Event interface {
+	isGemma4Event()
+}
+
+type gemma4EventThinkingContent struct {
+	content string
+}
+
+type gemma4EventContent struct {
+	content string
+}
+
+type gemma4EventToolCall struct {
+	toolCall api.ToolCall
+}
+
+func (gemma4EventThinkingContent) isGemma4Event() {}
+func (gemma4EventContent) isGemma4Event()         {}
+func (gemma4EventToolCall) isGemma4Event()        {}
+
+func (p *Gemma4Parser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
+	p.buffer.WriteString(s)
+	events := p.parseEvents(done)
+
+	var toolCalls []api.ToolCall
+	var contentSb strings.Builder
+	var thinkingSb strings.Builder
+	for _, event := range events {
+		switch event := event.(type) {
+		case gemma4EventToolCall:
+			toolCalls = append(toolCalls, event.toolCall)
+		case gemma4EventThinkingContent:
+			if p.thinkingEnabled {
+				thinkingSb.WriteString(event.content)
+			}
+			// When thinking is disabled, silently discard channel content
+		case gemma4EventContent:
+			contentSb.WriteString(event.content)
+		}
+	}
+
+	return contentSb.String(), thinkingSb.String(), toolCalls, nil
+}
+
+func (p *Gemma4Parser) parseEvents(done bool) []gemma4Event {
+	var all []gemma4Event
+
+	keepLooping := true
+	for keepLooping {
+		var events []gemma4Event
+		events, keepLooping = p.eat(done)
+		if len(events) > 0 {
+			all = append(all, events...)
+		}
+	}
+
+	return all
+}
+
+// longestOverlap returns the longest overlap between the suffix of bufStr and
+// a prefix of any of the given tags.
+func longestOverlap(bufStr string, tags ...string) int {
+	maxOverlap := 0
+	for _, tag := range tags {
+		if o := overlap(bufStr, tag); o > maxOverlap {
+			maxOverlap = o
+		}
+	}
+	return maxOverlap
+}
+
+func (p *Gemma4Parser) eat(done bool) ([]gemma4Event, bool) {
+	var events []gemma4Event
+	bufStr := p.buffer.String()
+	if bufStr == "" {
+		return events, false
+	}
+
+	switch p.state {
+	case Gemma4CollectingContent:
+		// Check for thinking open tag
+		if idx := strings.Index(bufStr, gemma4ThinkingOpenTag); idx != -1 {
+			contentBefore := bufStr[:idx]
+			remaining := bufStr[idx+len(gemma4ThinkingOpenTag):]
+
+			p.buffer.Reset()
+			p.buffer.WriteString(remaining)
+			p.state = Gemma4CollectingThinking
+			p.needsChannelNameStrip = true
+
+			if contentBefore = strings.TrimRightFunc(contentBefore, unicode.IsSpace); len(contentBefore) > 0 {
+				events = append(events, gemma4EventContent{content: contentBefore})
+			}
+			return events, true
+		}
+
+		// Check for tool call open tag
+		if idx := strings.Index(bufStr, gemma4ToolCallOpenTag); idx != -1 {
+			contentBefore := bufStr[:idx]
+			remaining := bufStr[idx+len(gemma4ToolCallOpenTag):]
+
+			p.buffer.Reset()
+			p.buffer.WriteString(remaining)
+			p.state = Gemma4CollectingToolCall
+
+			if contentBefore = strings.TrimRightFunc(contentBefore, unicode.IsSpace); len(contentBefore) > 0 {
+				events = append(events, gemma4EventContent{content: contentBefore})
+			}
+			return events, true
+		}
+
+		// Check for partial tag overlap
+		if !done {
+			if overlapLen := longestOverlap(bufStr, gemma4ThinkingOpenTag, gemma4ToolCallOpenTag); overlapLen > 0 {
+				beforePartialTag := bufStr[:len(bufStr)-overlapLen]
+				trailingLen := trailingWhitespaceLen(beforePartialTag)
+				ambiguousStart := len(beforePartialTag) - trailingLen
+
+				unambiguous := bufStr[:ambiguousStart]
+				ambiguous := bufStr[ambiguousStart:]
+				p.buffer.Reset()
+				p.buffer.WriteString(ambiguous)
+				if len(unambiguous) > 0 {
+					events = append(events, gemma4EventContent{content: unambiguous})
+				}
+				return events, false
+			}
+		}
+
+		// No tags found, emit all content
+		p.buffer.Reset()
+		if len(bufStr) > 0 {
+			events = append(events, gemma4EventContent{content: bufStr})
+		}
+		return events, false
+
+	case Gemma4CollectingThinking:
+		// Strip channel name (e.g., "thought\n") after <|channel>.
+		// Gemma 4 format: <|channel>thought\n...content...<channel|>
+		// In streaming mode, "thought" and "\n" may arrive in separate chunks.
+		if p.needsChannelNameStrip {
+			if strings.HasPrefix(bufStr, "thought\n") {
+				bufStr = bufStr[len("thought\n"):]
+				p.buffer.Reset()
+				p.buffer.WriteString(bufStr)
+				p.needsChannelNameStrip = false
+			} else if !done && (bufStr == "thought" || strings.HasPrefix("thought\n", bufStr)) {
+				// Partial match — wait for more data.
+				return events, false
+			} else {
+				// No match (different channel name or no newline) — don't strip.
+				p.needsChannelNameStrip = false
+			}
+		}
+
+		if strings.Contains(bufStr, gemma4ThinkingCloseTag) {
+			split := strings.SplitN(bufStr, gemma4ThinkingCloseTag, 2)
+			thinking := strings.TrimRightFunc(split[0], unicode.IsSpace)
+			remaining := strings.TrimLeftFunc(split[1], unicode.IsSpace)
+
+			p.buffer.Reset()
+			p.buffer.WriteString(remaining)
+			p.state = Gemma4CollectingContent
+
+			if len(thinking) > 0 {
+				events = append(events, gemma4EventThinkingContent{content: thinking})
+			}
+			return events, true
+		}
+
+		// Check for partial close tag
+		if !done {
+			if overlapLen := overlap(bufStr, gemma4ThinkingCloseTag); overlapLen > 0 {
+				beforePartialTag := bufStr[:len(bufStr)-overlapLen]
+				trailingLen := trailingWhitespaceLen(beforePartialTag)
+				ambiguousStart := len(beforePartialTag) - trailingLen
+
+				unambiguous := bufStr[:ambiguousStart]
+				ambiguous := bufStr[ambiguousStart:]
+				p.buffer.Reset()
+				p.buffer.WriteString(ambiguous)
+				if len(unambiguous) > 0 {
+					events = append(events, gemma4EventThinkingContent{content: unambiguous})
+				}
+				return events, false
+			}
+		}
+
+		// No close tag, emit thinking content (hold back trailing whitespace)
+		if !done {
+			whitespaceLen := trailingWhitespaceLen(bufStr)
+			ambiguousStart := len(bufStr) - whitespaceLen
+
+			unambiguous := bufStr[:ambiguousStart]
+			ambiguous := bufStr[ambiguousStart:]
+			p.buffer.Reset()
+			p.buffer.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, gemma4EventThinkingContent{content: unambiguous})
+			}
+		} else {
+			p.buffer.Reset()
+			if len(bufStr) > 0 {
+				events = append(events, gemma4EventThinkingContent{content: bufStr})
+			}
+		}
+		return events, false
+
+	case Gemma4CollectingToolCall:
+		if idx := strings.Index(bufStr, gemma4ToolCallCloseTag); idx != -1 {
+			toolCallContent := bufStr[:idx]
+			remaining := bufStr[idx+len(gemma4ToolCallCloseTag):]
+			remaining = strings.TrimLeftFunc(remaining, unicode.IsSpace)
+
+			p.buffer.Reset()
+			p.buffer.WriteString(remaining)
+			p.state = Gemma4CollectingContent
+
+			if toolCall, err := parseGemma4ToolCall(toolCallContent); err == nil {
+				events = append(events, gemma4EventToolCall{toolCall: toolCall})
+			} else {
+				slog.Warn("gemma4 tool call parsing failed", "error", err, "content", toolCallContent)
+			}
+			return events, true
+		}
+
+		// If done, flush any accumulated tool call content even without closing tag.
+		// The model may hit a stop token before emitting <tool_call|>.
+		if done && len(bufStr) > 0 {
+			p.buffer.Reset()
+			p.state = Gemma4CollectingContent
+			if toolCall, err := parseGemma4ToolCall(bufStr); err == nil {
+				events = append(events, gemma4EventToolCall{toolCall: toolCall})
+			} else {
+				slog.Warn("gemma4 tool call flush on done failed", "error", err, "content", bufStr)
+			}
+			return events, false
+		}
+
+		// Wait for closing tag
+		return events, false
+	}
+
+	return events, false
+}
+
+// parseGemma4ToolCall parses a tool call in Gemma 4 format:
+// call:NAME{key:value,key:value}
+func parseGemma4ToolCall(content string) (api.ToolCall, error) {
+	// Expected format: call:NAME{args}
+	if !strings.HasPrefix(content, "call:") {
+		return api.ToolCall{}, errors.New("expected 'call:' prefix")
+	}
+	content = content[len("call:"):]
+
+	// Find the opening brace for args
+	braceIdx := strings.Index(content, "{")
+	if braceIdx == -1 {
+		return api.ToolCall{}, errors.New("expected '{' in tool call")
+	}
+
+	toolName := strings.TrimSpace(content[:braceIdx])
+	argsStr := content[braceIdx:]
+
+	// Convert Gemma 4 argument format to JSON
+	jsonStr := gemma4ArgsToJSON(argsStr)
+
+	var args api.ToolCallFunctionArguments
+	if err := json.Unmarshal([]byte(jsonStr), &args); err != nil {
+		return api.ToolCall{}, err
+	}
+
+	return api.ToolCall{
+		Function: api.ToolCallFunction{
+			Name:      toolName,
+			Arguments: args,
+		},
+	}, nil
+}
+
+// gemma4ArgsToJSON converts Gemma 4's custom argument format to valid JSON.
+// The format uses <|"|> for string delimiters and bare identifier keys.
+// Example: {location:<|"|>Paris<|"|>,count:42} → {"location":"Paris","count":42}
+func gemma4ArgsToJSON(s string) string {
+	// Step 1: Replace <|"|> with "
+	s = strings.ReplaceAll(s, `<|"|>`, `"`)
+
+	// Step 2: Quote bare keys (identifiers followed by : that aren't inside strings)
+	var buf strings.Builder
+	buf.Grow(len(s) + 32)
+	inString := false
+	i := 0
+	for i < len(s) {
+		ch := s[i]
+		if ch == '"' && !inString {
+			inString = true
+			buf.WriteByte(ch)
+			i++
+			// Write until closing quote
+			for i < len(s) {
+				buf.WriteByte(s[i])
+				if s[i] == '"' {
+					inString = false
+					i++
+					break
+				}
+				i++
+			}
+			continue
+		}
+		if !inString && isIdentStart(ch) {
+			// Read the full identifier
+			j := i + 1
+			for j < len(s) && isIdentPart(s[j]) {
+				j++
+			}
+			word := s[i:j]
+			if j < len(s) && s[j] == ':' {
+				// It's an object key — quote it
+				buf.WriteByte('"')
+				buf.WriteString(word)
+				buf.WriteByte('"')
+			} else {
+				// It's a bare value (true, false, null, etc.)
+				buf.WriteString(word)
+			}
+			i = j
+		} else {
+			buf.WriteByte(ch)
+			i++
+		}
+	}
+	return buf.String()
+}
--- a/model/parsers/gemma4_test.go
+++ b/model/parsers/gemma4_test.go
@@ -0,0 +1,442 @@
+package parsers
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+
+	"github.com/ollama/ollama/api"
+)
+
+func TestGemma4Parser(t *testing.T) {
+	tests := []struct {
+		name              string
+		input             string
+		expectedContent   string
+		expectedThinking  string
+		expectedToolCalls []api.ToolCall
+		thinkingEnabled   bool
+		lastMessage       *api.Message
+	}{
+		{
+			name:            "simple_content",
+			input:           "This is a simple response.",
+			expectedContent: "This is a simple response.",
+		},
+		{
+			name:             "thinking_then_content",
+			input:            "<|channel>thought\nLet me think about this...<channel|>The answer is 42.",
+			expectedContent:  "The answer is 42.",
+			expectedThinking: "Let me think about this...",
+			thinkingEnabled:  true,
+		},
+		{
+			name:             "multiple_thinking_blocks",
+			input:            "<|channel>first thought<channel|><|channel>second thought<channel|>Final answer.",
+			expectedContent:  "Final answer.",
+			expectedThinking: "first thoughtsecond thought",
+			thinkingEnabled:  true,
+		},
+		{
+			name:             "thinking_only_no_content",
+			input:            "<|channel>just thinking<channel|>",
+			expectedContent:  "",
+			expectedThinking: "just thinking",
+			thinkingEnabled:  true,
+		},
+		{
+			name:  "tool_call_simple",
+			input: `<|tool_call>call:get_weather{location:<|"|>Paris<|"|>}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "get_weather",
+						Arguments: testArgs(map[string]any{
+							"location": "Paris",
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:  "tool_call_with_multiple_args",
+			input: `<|tool_call>call:get_weather{location:<|"|>Paris<|"|>,units:<|"|>metric<|"|>}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "get_weather",
+						Arguments: testArgs(map[string]any{
+							"location": "Paris",
+							"units":    "metric",
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:  "tool_call_with_number_arg",
+			input: `<|tool_call>call:set_temp{value:42}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "set_temp",
+						Arguments: testArgs(map[string]any{
+							"value": 42.0,
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:  "tool_call_with_boolean_arg",
+			input: `<|tool_call>call:toggle{enabled:true}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "toggle",
+						Arguments: testArgs(map[string]any{
+							"enabled": true,
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:  "tool_call_with_nested_object",
+			input: `<|tool_call>call:process{config:{enabled:true,name:<|"|>test<|"|>}}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "process",
+						Arguments: testArgs(map[string]any{
+							"config": map[string]any{
+								"enabled": true,
+								"name":    "test",
+							},
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:  "tool_call_with_array",
+			input: `<|tool_call>call:process{items:[<|"|>a<|"|>,<|"|>b<|"|>]}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "process",
+						Arguments: testArgs(map[string]any{
+							"items": []any{"a", "b"},
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:  "multiple_tool_calls",
+			input: `<|tool_call>call:get_weather{location:<|"|>Paris<|"|>}<tool_call|><|tool_call>call:get_weather{location:<|"|>London<|"|>}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "get_weather",
+						Arguments: testArgs(map[string]any{
+							"location": "Paris",
+						}),
+					},
+				},
+				{
+					Function: api.ToolCallFunction{
+						Name: "get_weather",
+						Arguments: testArgs(map[string]any{
+							"location": "London",
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:             "thinking_then_tool_call",
+			input:            "<|channel>thought\nI need to check the weather<channel|><|tool_call>call:get_weather{location:<|\"|>Paris<|\"|>}<tool_call|>",
+			expectedThinking: "I need to check the weather",
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "get_weather",
+						Arguments: testArgs(map[string]any{
+							"location": "Paris",
+						}),
+					},
+				},
+			},
+			thinkingEnabled: true,
+		},
+		{
+			name:            "content_then_tool_call",
+			input:           `Let me check that for you.<|tool_call>call:get_weather{location:<|"|>Paris<|"|>}<tool_call|>`,
+			expectedContent: "Let me check that for you.",
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "get_weather",
+						Arguments: testArgs(map[string]any{
+							"location": "Paris",
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:            "thinking_disabled_channel_tags_as_content",
+			input:           "<|channel>this is not thinking<channel|>actual content",
+			expectedContent: "actual content",
+			thinkingEnabled: false,
+		},
+		{
+			name:            "prefill_content_only",
+			input:           "Continuing content.",
+			expectedContent: "Continuing content.",
+			lastMessage: &api.Message{
+				Role:    "assistant",
+				Content: "Previous content",
+			},
+			thinkingEnabled: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			parser := &Gemma4Parser{hasThinkingSupport: true}
+			parser.Init(nil, tt.lastMessage, &api.ThinkValue{Value: tt.thinkingEnabled})
+
+			content, thinking, toolCalls, err := parser.Add(tt.input, true)
+			if err != nil {
+				t.Fatalf("Add() error = %v", err)
+			}
+
+			if diff := cmp.Diff(tt.expectedContent, content); diff != "" {
+				t.Errorf("content mismatch (-want +got):\n%s", diff)
+			}
+
+			if diff := cmp.Diff(tt.expectedThinking, thinking); diff != "" {
+				t.Errorf("thinking mismatch (-want +got):\n%s", diff)
+			}
+
+			if diff := cmp.Diff(tt.expectedToolCalls, toolCalls, argsComparer); diff != "" {
+				t.Errorf("tool calls mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
+
+func TestGemma4Parser_Streaming(t *testing.T) {
+	parser := &Gemma4Parser{hasThinkingSupport: true}
+	parser.Init(nil, nil, &api.ThinkValue{Value: true})
+
+	chunks := []string{
+		"<|channel>thought",
+		"\nLet me think",
+		"...<channel|>The answer",
+		" is 42.",
+	}
+
+	var finalContent, finalThinking strings.Builder
+
+	for i, chunk := range chunks {
+		done := i == len(chunks)-1
+		content, thinking, _, err := parser.Add(chunk, done)
+		if err != nil {
+			t.Fatalf("Add() error on chunk %d: %v", i, err)
+		}
+
+		finalContent.WriteString(content)
+		finalThinking.WriteString(thinking)
+	}
+
+	if finalContent.String() != "The answer is 42." {
+		t.Errorf("expected content %q, got %q", "The answer is 42.", finalContent.String())
+	}
+
+	if finalThinking.String() != "Let me think..." {
+		t.Errorf("expected thinking %q, got %q", "Let me think...", finalThinking.String())
+	}
+}
+
+func TestGemma4Parser_StreamingToolCall(t *testing.T) {
+	parser := &Gemma4Parser{hasThinkingSupport: false}
+	parser.Init(nil, nil, nil)
+
+	chunks := []string{
+		`<|tool_call>call:get_`,
+		`weather{location:<|"|>Par`,
+		`is<|"|>}<tool_call|>`,
+	}
+
+	var finalContent strings.Builder
+	var finalToolCalls []api.ToolCall
+
+	for i, chunk := range chunks {
+		done := i == len(chunks)-1
+		content, _, toolCalls, err := parser.Add(chunk, done)
+		if err != nil {
+			t.Fatalf("Add() error on chunk %d: %v", i, err)
+		}
+
+		finalContent.WriteString(content)
+		finalToolCalls = append(finalToolCalls, toolCalls...)
+	}
+
+	if finalContent.String() != "" {
+		t.Errorf("expected no content, got %q", finalContent.String())
+	}
+
+	expectedToolCalls := []api.ToolCall{
+		{
+			Function: api.ToolCallFunction{
+				Name: "get_weather",
+				Arguments: testArgs(map[string]any{
+					"location": "Paris",
+				}),
+			},
+		},
+	}
+
+	if diff := cmp.Diff(expectedToolCalls, finalToolCalls, argsComparer); diff != "" {
+		t.Errorf("tool calls mismatch (-want +got):\n%s", diff)
+	}
+}
+
+func TestGemma4Parser_StreamingSplitThinkingTag(t *testing.T) {
+	tests := []struct {
+		name             string
+		chunks           []string
+		expectedContent  string
+		expectedThinking string
+	}{
+		{
+			name: "split_channel_open_tag",
+			chunks: []string{
+				"<|chan",
+				"nel>thinking here<channel|>content",
+			},
+			expectedContent:  "content",
+			expectedThinking: "thinking here",
+		},
+		{
+			name: "split_channel_close_tag",
+			chunks: []string{
+				"<|channel>thinking here<chan",
+				"nel|>content",
+			},
+			expectedContent:  "content",
+			expectedThinking: "thinking here",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			parser := &Gemma4Parser{hasThinkingSupport: true}
+			parser.Init(nil, nil, &api.ThinkValue{Value: true})
+
+			var finalContent, finalThinking strings.Builder
+			for i, chunk := range tt.chunks {
+				done := i == len(tt.chunks)-1
+				content, thinking, _, err := parser.Add(chunk, done)
+				if err != nil {
+					t.Fatalf("Add() error on chunk %d: %v", i, err)
+				}
+				finalContent.WriteString(content)
+				finalThinking.WriteString(thinking)
+			}
+
+			if finalContent.String() != tt.expectedContent {
+				t.Errorf("expected content %q, got %q", tt.expectedContent, finalContent.String())
+			}
+			if finalThinking.String() != tt.expectedThinking {
+				t.Errorf("expected thinking %q, got %q", tt.expectedThinking, finalThinking.String())
+			}
+		})
+	}
+}
+
+func TestGemma4ArgsToJSON(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name:     "simple_string",
+			input:    `{location:<|"|>Paris<|"|>}`,
+			expected: `{"location":"Paris"}`,
+		},
+		{
+			name:     "multiple_args",
+			input:    `{location:<|"|>Paris<|"|>,units:<|"|>metric<|"|>}`,
+			expected: `{"location":"Paris","units":"metric"}`,
+		},
+		{
+			name:     "number_value",
+			input:    `{value:42}`,
+			expected: `{"value":42}`,
+		},
+		{
+			name:     "boolean_value",
+			input:    `{enabled:true}`,
+			expected: `{"enabled":true}`,
+		},
+		{
+			name:     "nested_object",
+			input:    `{config:{enabled:true,name:<|"|>test<|"|>}}`,
+			expected: `{"config":{"enabled":true,"name":"test"}}`,
+		},
+		{
+			name:     "array_value",
+			input:    `{items:[<|"|>a<|"|>,<|"|>b<|"|>]}`,
+			expected: `{"items":["a","b"]}`,
+		},
+		{
+			name:     "empty_object",
+			input:    `{}`,
+			expected: `{}`,
+		},
+		{
+			name:     "mixed_types",
+			input:    `{name:<|"|>test<|"|>,count:5,active:true,tags:[<|"|>a<|"|>]}`,
+			expected: `{"name":"test","count":5,"active":true,"tags":["a"]}`,
+		},
+		{
+			name:     "null_value",
+			input:    `{value:null}`,
+			expected: `{"value":null}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := gemma4ArgsToJSON(tt.input)
+			if result != tt.expected {
+				t.Errorf("expected %q, got %q", tt.expected, result)
+			}
+		})
+	}
+}
+
+func TestGemma4Parser_HasToolSupport(t *testing.T) {
+	parser := &Gemma4Parser{}
+	if !parser.HasToolSupport() {
+		t.Error("Gemma4Parser should support tools")
+	}
+}
+
+func TestGemma4Parser_HasThinkingSupport(t *testing.T) {
+	parser := &Gemma4Parser{hasThinkingSupport: true}
+	if !parser.HasThinkingSupport() {
+		t.Error("Gemma4Parser with thinking support should report it")
+	}
+
+	parser2 := &Gemma4Parser{hasThinkingSupport: false}
+	if parser2.HasThinkingSupport() {
+		t.Error("Gemma4Parser without thinking support should not report it")
+	}
+}
--- a/model/parsers/parsers.go
+++ b/model/parsers/parsers.go
@@ -77,6 +77,10 @@ func ParserForName(name string) Parser {
 		return &FunctionGemmaParser{}
 	case "glm-4.7":
 		return &GLM47Parser{}
+	case "gemma4":
+		return &Gemma4Parser{hasThinkingSupport: true}
+	case "gemma4-no-thinking":
+		return &Gemma4Parser{hasThinkingSupport: false}
 	case "glm-ocr":
 		return &GlmOcrParser{}
 	case "lfm2":
--- a/model/renderers/gemma4.go
+++ b/model/renderers/gemma4.go
@@ -24,10 +24,6 @@ func (r *Gemma4Renderer) Render(messages []api.Message, tools []api.Tool, thinkV
 	var sb strings.Builder
 	imageOffset := 0

-	// BOS token — Gemma 4 models have add_bos_token=false in their tokenizer
-	// config, so the tokenizer does not auto-prepend BOS. We must emit it
-	// explicitly in the rendered prompt, matching the HF chat template.
-	sb.WriteString("<bos>")
 	// Extract system message if present.
 	var systemMessage string
 	var loopMessages []api.Message
--- a/model/renderers/gemma4_test.go
+++ b/model/renderers/gemma4_test.go
@@ -0,0 +1,493 @@
+package renderers
+
+import (
+	"testing"
+
+	"github.com/ollama/ollama/api"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestGemma4Renderer(t *testing.T) {
+	q := `<|"|>` // string delimiter shorthand for readability
+
+	tests := []struct {
+		name     string
+		messages []api.Message
+		tools    []api.Tool
+		think    *api.ThinkValue
+		expected string
+	}{
+		{
+			name: "basic_user_message",
+			messages: []api.Message{
+				{Role: "user", Content: "Hello!"},
+			},
+			expected: "<|turn>user\nHello!<turn|>\n<|turn>model\n",
+		},
+		{
+			name: "with_system_message",
+			messages: []api.Message{
+				{Role: "system", Content: "You are helpful"},
+				{Role: "user", Content: "Hello!"},
+			},
+			expected: "<|turn>system\nYou are helpful<turn|>\n<|turn>user\nHello!<turn|>\n<|turn>model\n",
+		},
+		{
+			name: "with_developer_role",
+			messages: []api.Message{
+				{Role: "developer", Content: "You are a coding assistant"},
+				{Role: "user", Content: "Hello!"},
+			},
+			expected: "<|turn>system\nYou are a coding assistant<turn|>\n<|turn>user\nHello!<turn|>\n<|turn>model\n",
+		},
+		{
+			name: "multi_turn",
+			messages: []api.Message{
+				{Role: "user", Content: "Hi"},
+				{Role: "assistant", Content: "Hello!"},
+				{Role: "user", Content: "More"},
+			},
+			expected: "<|turn>user\nHi<turn|>\n<|turn>model\nHello!<turn|>\n<|turn>user\nMore<turn|>\n<|turn>model\n",
+		},
+		{
+			name: "assistant_last_message_no_close",
+			messages: []api.Message{
+				{Role: "user", Content: "Hi"},
+				{Role: "assistant", Content: "Hello!"},
+			},
+			expected: "<|turn>user\nHi<turn|>\n<|turn>model\nHello!",
+		},
+		{
+			name:     "empty_messages",
+			messages: []api.Message{},
+			expected: "<|turn>model\n",
+		},
+		{
+			name: "thinking_enabled",
+			messages: []api.Message{
+				{Role: "user", Content: "Think hard"},
+			},
+			think:    thinkTrue(),
+			expected: "<|turn>system\n<|think|><turn|>\n<|turn>user\nThink hard<turn|>\n<|turn>model\n",
+		},
+		{
+			name: "thinking_with_system",
+			messages: []api.Message{
+				{Role: "system", Content: "Be careful"},
+				{Role: "user", Content: "Think hard"},
+			},
+			think:    thinkTrue(),
+			expected: "<|turn>system\n<|think|>Be careful<turn|>\n<|turn>user\nThink hard<turn|>\n<|turn>model\n",
+		},
+		{
+			// Tools with no system message — tool declarations follow immediately after system\n
+			name: "with_tools",
+			messages: []api.Message{
+				{Role: "user", Content: "Weather?"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: testPropsMap(map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}, Description: "City"},
+							}),
+						},
+					},
+				},
+			},
+			expected: "<|turn>system\n<|tool>declaration:get_weather{description:" + q + "Get weather" + q + ",parameters:{properties:{city:{description:" + q + "City" + q + ",type:" + q + "STRING" + q + "}},type:" + q + "OBJECT" + q + "}}<tool|><turn|>\n<|turn>user\nWeather?<turn|>\n<|turn>model\n",
+		},
+		{
+			// System message with tools — tools follow directly after system content (no newline)
+			name: "system_message_with_tools",
+			messages: []api.Message{
+				{Role: "system", Content: "You are a weather expert."},
+				{Role: "user", Content: "Weather?"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: testPropsMap(map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}, Description: "City"},
+							}),
+						},
+					},
+				},
+			},
+			expected: "<|turn>system\nYou are a weather expert.<|tool>declaration:get_weather{description:" + q + "Get weather" + q + ",parameters:{properties:{city:{description:" + q + "City" + q + ",type:" + q + "STRING" + q + "}},type:" + q + "OBJECT" + q + "}}<tool|><turn|>\n<|turn>user\nWeather?<turn|>\n<|turn>model\n",
+		},
+		{
+			// Tool call + tool response: response is inline in the model turn, no separate <|turn>tool
+			// Non-JSON tool response falls back to {value:<|"|>...<|"|>}
+			name: "tool_call",
+			messages: []api.Message{
+				{Role: "user", Content: "Weather?"},
+				{
+					Role: "assistant",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name:      "get_weather",
+								Arguments: testArgs(map[string]any{"city": "Paris"}),
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: "Sunny"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: testPropsMap(map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}, Description: "City"},
+							}),
+						},
+					},
+				},
+			},
+			expected: "<|turn>system\n<|tool>declaration:get_weather{description:" + q + "Get weather" + q + ",parameters:{properties:{city:{description:" + q + "City" + q + ",type:" + q + "STRING" + q + "}},type:" + q + "OBJECT" + q + "}}<tool|><turn|>\n" +
+				"<|turn>user\nWeather?<turn|>\n" +
+				"<|turn>model\n<|tool_call>call:get_weather{city:" + q + "Paris" + q + "}<tool_call|>" +
+				"<|tool_response>response:get_weather{value:" + q + "Sunny" + q + "}<tool_response|>",
+		},
+		{
+			// Assistant content + tool call + tool response inline
+			name: "assistant_content_with_tool_call",
+			messages: []api.Message{
+				{Role: "user", Content: "Weather?"},
+				{
+					Role:    "assistant",
+					Content: "Let me check.",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name:      "get_weather",
+								Arguments: testArgs(map[string]any{"city": "Paris"}),
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: "Sunny"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: testPropsMap(map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}, Description: "City"},
+							}),
+						},
+					},
+				},
+			},
+			expected: "<|turn>system\n<|tool>declaration:get_weather{description:" + q + "Get weather" + q + ",parameters:{properties:{city:{description:" + q + "City" + q + ",type:" + q + "STRING" + q + "}},type:" + q + "OBJECT" + q + "}}<tool|><turn|>\n" +
+				"<|turn>user\nWeather?<turn|>\n" +
+				"<|turn>model\nLet me check.<|tool_call>call:get_weather{city:" + q + "Paris" + q + "}<tool_call|>" +
+				"<|tool_response>response:get_weather{value:" + q + "Sunny" + q + "}<tool_response|>",
+		},
+		{
+			// Parallel tool calls — both responses inline
+			name: "parallel_tool_calls",
+			messages: []api.Message{
+				{Role: "user", Content: "Weather and time?"},
+				{
+					Role: "assistant",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name:      "get_weather",
+								Arguments: testArgs(map[string]any{"city": "Paris"}),
+							},
+						},
+						{
+							Function: api.ToolCallFunction{
+								Name:      "get_time",
+								Arguments: testArgs(map[string]any{"timezone": "UTC"}),
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: "Sunny"},
+				{Role: "tool", Content: "12:00"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: testPropsMap(map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}, Description: "City"},
+							}),
+						},
+					},
+				},
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_time",
+						Description: "Get current time",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: testPropsMap(map[string]api.ToolProperty{
+								"timezone": {Type: api.PropertyType{"string"}, Description: "Timezone"},
+							}),
+						},
+					},
+				},
+			},
+			expected: "<|turn>system\n<|tool>declaration:get_weather{description:" + q + "Get weather" + q + ",parameters:{properties:{city:{description:" + q + "City" + q + ",type:" + q + "STRING" + q + "}},type:" + q + "OBJECT" + q + "}}<tool|><|tool>declaration:get_time{description:" + q + "Get current time" + q + ",parameters:{properties:{timezone:{description:" + q + "Timezone" + q + ",type:" + q + "STRING" + q + "}},type:" + q + "OBJECT" + q + "}}<tool|><turn|>\n" +
+				"<|turn>user\nWeather and time?<turn|>\n" +
+				"<|turn>model\n<|tool_call>call:get_weather{city:" + q + "Paris" + q + "}<tool_call|><|tool_call>call:get_time{timezone:" + q + "UTC" + q + "}<tool_call|>" +
+				"<|tool_response>response:get_weather{value:" + q + "Sunny" + q + "}<tool_response|>" +
+				"<|tool_response>response:get_time{value:" + q + "12:00" + q + "}<tool_response|>",
+		},
+		{
+			// Numeric arguments — JSON tool response with individual key:value pairs
+			name: "numeric_arguments",
+			messages: []api.Message{
+				{Role: "user", Content: "Add"},
+				{
+					Role: "assistant",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name:      "add",
+								Arguments: testArgs(map[string]any{"a": float64(1), "b": float64(2)}),
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: `{"result":3}`},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "add",
+						Description: "Add numbers",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: testPropsMap(map[string]api.ToolProperty{
+								"a": {Type: api.PropertyType{"number"}},
+								"b": {Type: api.PropertyType{"number"}},
+							}),
+						},
+					},
+				},
+			},
+			expected: "<|turn>system\n<|tool>declaration:add{description:" + q + "Add numbers" + q + ",parameters:{properties:{a:{type:" + q + "NUMBER" + q + "},b:{type:" + q + "NUMBER" + q + "}},type:" + q + "OBJECT" + q + "}}<tool|><turn|>\n" +
+				"<|turn>user\nAdd<turn|>\n" +
+				"<|turn>model\n<|tool_call>call:add{a:1,b:2}<tool_call|>" +
+				"<|tool_response>response:add{result:3}<tool_response|>",
+		},
+		{
+			// Boolean argument — non-JSON tool response
+			name: "boolean_argument",
+			messages: []api.Message{
+				{Role: "user", Content: "Set flag"},
+				{
+					Role: "assistant",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name:      "set_flag",
+								Arguments: testArgs(map[string]any{"enabled": true}),
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: "done"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "set_flag",
+						Description: "Set a flag",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: testPropsMap(map[string]api.ToolProperty{
+								"enabled": {Type: api.PropertyType{"boolean"}, Description: "Flag value"},
+							}),
+						},
+					},
+				},
+			},
+			expected: "<|turn>system\n<|tool>declaration:set_flag{description:" + q + "Set a flag" + q + ",parameters:{properties:{enabled:{description:" + q + "Flag value" + q + ",type:" + q + "BOOLEAN" + q + "}},type:" + q + "OBJECT" + q + "}}<tool|><turn|>\n" +
+				"<|turn>user\nSet flag<turn|>\n" +
+				"<|turn>model\n<|tool_call>call:set_flag{enabled:true}<tool_call|>" +
+				"<|tool_response>response:set_flag{value:" + q + "done" + q + "}<tool_response|>",
+		},
+		{
+			name: "tool_with_required_params",
+			messages: []api.Message{
+				{Role: "user", Content: "Weather?"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Gets the weather for a given city",
+						Parameters: api.ToolFunctionParameters{
+							Type:     "object",
+							Required: []string{"city"},
+							Properties: testPropsMap(map[string]api.ToolProperty{
+								"city":    {Type: api.PropertyType{"string"}, Description: "City Name"},
+								"country": {Type: api.PropertyType{"string"}, Description: "Country Name"},
+							}),
+						},
+					},
+				},
+			},
+			expected: "<|turn>system\n<|tool>declaration:get_weather{description:" + q + "Gets the weather for a given city" + q + ",parameters:{properties:{city:{description:" + q + "City Name" + q + ",type:" + q + "STRING" + q + "},country:{description:" + q + "Country Name" + q + ",type:" + q + "STRING" + q + "}},required:[" + q + "city" + q + "],type:" + q + "OBJECT" + q + "}}<tool|><turn|>\n" +
+				"<|turn>user\nWeather?<turn|>\n<|turn>model\n",
+		},
+		{
+			name: "tool_with_enum",
+			messages: []api.Message{
+				{Role: "user", Content: "Test"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "set_mode",
+						Description: "Set mode",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: testPropsMap(map[string]api.ToolProperty{
+								"mode": {Type: api.PropertyType{"string"}, Description: "The mode", Enum: []any{"fast", "slow"}},
+							}),
+						},
+					},
+				},
+			},
+			expected: "<|turn>system\n<|tool>declaration:set_mode{description:" + q + "Set mode" + q + ",parameters:{properties:{mode:{description:" + q + "The mode" + q + ",enum:[" + q + "fast" + q + "," + q + "slow" + q + "],type:" + q + "STRING" + q + "}},type:" + q + "OBJECT" + q + "}}<tool|><turn|>\n" +
+				"<|turn>user\nTest<turn|>\n<|turn>model\n",
+		},
+		{
+			name: "unicode_content",
+			messages: []api.Message{
+				{Role: "user", Content: "こんにちは"},
+			},
+			expected: "<|turn>user\nこんにちは<turn|>\n<|turn>model\n",
+		},
+		{
+			name: "newlines_in_content",
+			messages: []api.Message{
+				{Role: "user", Content: "Line 1\nLine 2\nLine 3"},
+			},
+			expected: "<|turn>user\nLine 1\nLine 2\nLine 3<turn|>\n<|turn>model\n",
+		},
+		{
+			// Thinking + tools — <|think|> immediately followed by tool declarations
+			name: "thinking_with_tools",
+			messages: []api.Message{
+				{Role: "user", Content: "Weather?"},
+			},
+			think: thinkTrue(),
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: testPropsMap(map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}, Description: "City"},
+							}),
+						},
+					},
+				},
+			},
+			expected: "<|turn>system\n<|think|><|tool>declaration:get_weather{description:" + q + "Get weather" + q + ",parameters:{properties:{city:{description:" + q + "City" + q + ",type:" + q + "STRING" + q + "}},type:" + q + "OBJECT" + q + "}}<tool|><turn|>\n" +
+				"<|turn>user\nWeather?<turn|>\n<|turn>model\n",
+		},
+		{
+			name: "image_tags_when_enabled",
+			messages: []api.Message{
+				{Role: "user", Content: "What is this?", Images: []api.ImageData{[]byte("fake")}},
+			},
+			expected: "<|turn>user\n[img-0]What is this?<turn|>\n<|turn>model\n",
+		},
+		{
+			// JSON tool response — parsed into individual key:value pairs
+			name: "json_tool_response",
+			messages: []api.Message{
+				{Role: "user", Content: "Weather?"},
+				{
+					Role: "assistant",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name:      "get_weather",
+								Arguments: testArgs(map[string]any{"city": "Tokyo"}),
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: `{"temperature":15,"weather":"sunny"}`},
+				{Role: "user", Content: "Thanks!"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get_weather",
+						Description: "Get weather",
+						Parameters: api.ToolFunctionParameters{
+							Type: "object",
+							Properties: testPropsMap(map[string]api.ToolProperty{
+								"city": {Type: api.PropertyType{"string"}, Description: "City"},
+							}),
+						},
+					},
+				},
+			},
+			// Matches HF reference: tool response inline, JSON fields as key:value, no <turn|> before next user
+			expected: "<|turn>system\n<|tool>declaration:get_weather{description:" + q + "Get weather" + q + ",parameters:{properties:{city:{description:" + q + "City" + q + ",type:" + q + "STRING" + q + "}},type:" + q + "OBJECT" + q + "}}<tool|><turn|>\n" +
+				"<|turn>user\nWeather?<turn|>\n" +
+				"<|turn>model\n<|tool_call>call:get_weather{city:" + q + "Tokyo" + q + "}<tool_call|>" +
+				"<|tool_response>response:get_weather{temperature:15,weather:" + q + "sunny" + q + "}<tool_response|>" +
+				"<|turn>user\nThanks!<turn|>\n" +
+				"<|turn>model\n",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			renderer := &Gemma4Renderer{useImgTags: true}
+			result, err := renderer.Render(tt.messages, tt.tools, tt.think)
+			assert.NoError(t, err)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func thinkTrue() *api.ThinkValue {
+	return &api.ThinkValue{Value: true}
+}
--- a/model/renderers/renderer.go
+++ b/model/renderers/renderer.go
@@ -81,6 +81,8 @@ func rendererForName(name string) Renderer {
 		return renderer
 	case "nemotron-3-nano":
 		return &Nemotron3NanoRenderer{}
+	case "gemma4":
+		return &Gemma4Renderer{useImgTags: RenderImgTags}
 	case "functiongemma":
 		return &FunctionGemmaRenderer{}
 	case "glm-4.7":
--- a/server/create.go
+++ b/server/create.go
@@ -141,7 +141,7 @@ func (s *Server) CreateHandler(c *gin.Context) {
 					ch <- gin.H{"error": err.Error()}
 				}

-				if err == nil && !remote && (config.Renderer == "" || config.Parser == "" || config.Requires == "") {
+				if err == nil && !remote && (config.Renderer == "" || config.Parser == "" || config.Requires == "" || len(config.Capabilities) == 0) {
 					mf, mErr := manifest.ParseNamedManifest(fromName)
 					if mErr == nil && mf.Config.Digest != "" {
 						configPath, pErr := manifest.BlobsPath(mf.Config.Digest)
@@ -158,6 +158,9 @@ func (s *Server) CreateHandler(c *gin.Context) {
 									if config.Requires == "" {
 										config.Requires = baseConfig.Requires
 									}
+									if len(config.Capabilities) == 0 {
+										config.Capabilities = baseConfig.Capabilities
+									}
 								}
 								cfgFile.Close()
 							}
--- a/server/images.go
+++ b/server/images.go
@@ -100,11 +100,20 @@ func (m *Model) Capabilities() []model.Capability {
 		} else {
 			slog.Error("couldn't open model file", "error", err)
 		}
-	} else if len(m.Config.Capabilities) > 0 {
+	}
+
+	// Also include capabilities from the model config (e.g. vision capability
+	// set during creation for MLX/safetensors models).
+	if len(m.Config.Capabilities) > 0 {
 		for _, c := range m.Config.Capabilities {
-			capabilities = append(capabilities, model.Capability(c))
+			cap := model.Capability(c)
+			if !slices.Contains(capabilities, cap) {
+				capabilities = append(capabilities, cap)
+			}
 		}
-	} else {
+	}
+
+	if len(capabilities) == 0 {
 		slog.Warn("unknown capabilities for model", "model", m.Name)
 	}

@@ -145,6 +154,14 @@ func (m *Model) Capabilities() []model.Capability {
 		capabilities = append(capabilities, model.CapabilityThinking)
 	}

+	// Temporary workaround — suppress vision/audio for gemma4 MLX models
+	// until multimodal runtime pipeline lands. Remove when imageproc.go is wired up.
+	if m.Config.ModelFormat == "safetensors" && m.Config.Renderer == "gemma4" {
+		capabilities = slices.DeleteFunc(capabilities, func(c model.Capability) bool {
+			return c == model.CapabilityVision || c == "audio"
+		})
+	}
+
 	return capabilities
 }

--- a/types/model/capability.go
+++ b/types/model/capability.go
@@ -10,6 +10,7 @@ const (
 	CapabilityEmbedding  = Capability("embedding")
 	CapabilityThinking   = Capability("thinking")
 	CapabilityImage      = Capability("image")
+	CapabilityAudio      = Capability("audio")
 )

 func (c Capability) String() string {