diff --git a/model/parsers/gemma4.go b/model/parsers/gemma4.go index 166d4b63f..55d45b5e6 100644 --- a/model/parsers/gemma4.go +++ b/model/parsers/gemma4.go @@ -17,6 +17,7 @@ const ( Gemma4CollectingContent Gemma4ParserState = iota Gemma4CollectingThinking Gemma4CollectingToolCall + Gemma4IgnoringPostToolCallNoise ) const ( @@ -285,7 +286,7 @@ func (p *Gemma4Parser) eat(done bool) ([]gemma4Event, bool) { p.buffer.Reset() p.buffer.WriteString(remaining) - p.state = Gemma4CollectingContent + p.state = Gemma4IgnoringPostToolCallNoise if toolCall, err := parseGemma4ToolCall(toolCallContent); err == nil { events = append(events, gemma4EventToolCall{toolCall: toolCall}) @@ -310,6 +311,38 @@ func (p *Gemma4Parser) eat(done bool) ([]gemma4Event, bool) { // Wait for closing tag return events, false + + case Gemma4IgnoringPostToolCallNoise: + // We've observed Gemma 4 occasionally emitting extra tags + // after a valid tool call. We suppress leading close tags in this immediate + // post-tool-call state so the extra close tags do not leak into assistant + // content. The tradeoff is that if the model intentionally begins its next + // content span with the literal string "", we will erroneously + // treat it as noise and drop it. + bufStr = strings.TrimLeftFunc(bufStr, unicode.IsSpace) + p.buffer.Reset() + p.buffer.WriteString(bufStr) + + for strings.HasPrefix(bufStr, gemma4ToolCallCloseTag) { + bufStr = strings.TrimLeftFunc(bufStr[len(gemma4ToolCallCloseTag):], unicode.IsSpace) + p.buffer.Reset() + p.buffer.WriteString(bufStr) + } + + if bufStr == "" { + return events, false + } + + if strings.HasPrefix(gemma4ToolCallCloseTag, bufStr) { + if done { + p.buffer.Reset() + p.state = Gemma4CollectingContent + } + return events, false + } + + p.state = Gemma4CollectingContent + return events, true } return events, false diff --git a/model/parsers/gemma4_test.go b/model/parsers/gemma4_test.go index d2a612793..00784c4f7 100644 --- a/model/parsers/gemma4_test.go +++ b/model/parsers/gemma4_test.go @@ -457,6 +457,77 @@ func TestGemma4Parser_StreamingToolCall(t *testing.T) { } } +func TestGemma4Parser_IgnoresExtraToolCallCloseTags(t *testing.T) { + tests := []struct { + name string + chunks []string + expectedContent string + }{ + { + name: "same_chunk_without_trailing_content", + chunks: []string{ + `<|tool_call>call:get_weather{location:<|"|>Paris<|"|>}`, + }, + expectedContent: "", + }, + { + name: "same_chunk_before_real_content", + chunks: []string{ + `<|tool_call>call:get_weather{location:<|"|>Paris<|"|>}Done.`, + }, + expectedContent: "Done.", + }, + { + name: "split_across_chunks_before_real_content", + chunks: []string{ + `<|tool_call>call:get_weather{location:<|"|>Paris<|"|>}Done.`, + }, + expectedContent: "Done.", + }, + } + + expectedToolCalls := []api.ToolCall{ + { + Function: api.ToolCallFunction{ + Name: "get_weather", + Arguments: testArgs(map[string]any{ + "location": "Paris", + }), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + parser := &Gemma4Parser{hasThinkingSupport: false} + parser.Init(nil, nil, nil) + + var finalContent strings.Builder + var finalToolCalls []api.ToolCall + + for i, chunk := range tt.chunks { + done := i == len(tt.chunks)-1 + content, _, toolCalls, err := parser.Add(chunk, done) + if err != nil { + t.Fatalf("Add() error on chunk %d: %v", i, err) + } + + finalContent.WriteString(content) + finalToolCalls = append(finalToolCalls, toolCalls...) + } + + if diff := cmp.Diff(tt.expectedContent, finalContent.String()); diff != "" { + t.Errorf("content mismatch (-want +got):\n%s", diff) + } + + if diff := cmp.Diff(expectedToolCalls, finalToolCalls, argsComparer); diff != "" { + t.Errorf("tool calls mismatch (-want +got):\n%s", diff) + } + }) + } +} + func TestGemma4Parser_StreamingSplitThinkingTag(t *testing.T) { tests := []struct { name string