model/parsers: fix gemma4 arg parsing when quoted strings contain " (#15254)

* model/parsers: fix gemma4 arg parsing when quoted strings contain " Fixes: #15241 * add more tests, be careful about what we escape We want Windows-style paths to not get misinterpreted * fix backslash-quote case, it really should be a literal backslash h/t to @chathaway-codes for pointing this out! Co-Authored-By: Charles H <2773397+chathaway-codes@users.noreply.github.com> --------- Co-authored-by: Charles H <2773397+chathaway-codes@users.noreply.github.com>
2026-04-17 15:53:27 +02:00 · 2026-04-02 22:52:51 -07:00
parent 3536ef58f6
commit 036ed1b9b5
2 changed files with 270 additions and 6 deletions
--- a/model/parsers/gemma4.go
+++ b/model/parsers/gemma4.go
@@ -345,27 +345,85 @@ func parseGemma4ToolCall(content string) (api.ToolCall, error) {

 // gemma4ArgsToJSON converts Gemma 4's custom argument format to valid JSON.
 func gemma4ArgsToJSON(s string) string {
-	s = strings.ReplaceAll(s, `<|"|>`, `"`)
+	const quoteToken = `<|"|>`

 	var buf strings.Builder
 	buf.Grow(len(s) + 32)
-	inString := false
+	const (
+		stringModeNone = iota
+		stringModeGemmaToken
+		stringModeRawQuote
+	)
+
+	stringMode := stringModeNone
 	hex := "0123456789abcdef"
 	i := 0
 	for i < len(s) {
+		if strings.HasPrefix(s[i:], quoteToken) {
+			if stringMode == stringModeGemmaToken {
+				stringMode = stringModeNone
+			} else if stringMode == stringModeNone {
+				stringMode = stringModeGemmaToken
+			} else {
+				// In a raw-quote string, treat the Gemma quote token literally.
+				buf.WriteString(quoteToken)
+				i += len(quoteToken)
+				continue
+			}
+			buf.WriteByte('"')
+			i += len(quoteToken)
+			continue
+		}
+
 		ch := s[i]

-		if ch == '"' {
-			inString = !inString
+		if stringMode == stringModeNone && ch == '"' {
+			stringMode = stringModeRawQuote
 			buf.WriteByte('"')
 			i++
 			continue
 		}

-		if inString {
+		if stringMode != stringModeNone {
 			switch ch {
 			case '\\':
+				if i+1 < len(s) {
+					next := s[i+1]
+					if stringMode == stringModeGemmaToken {
+						switch next {
+						case '"':
+							// In Gemma-token strings, preserve \" as two literal characters.
+							buf.WriteString(`\\\"`)
+							i += 2
+							continue
+						case '\\', '/':
+							// Keep existing behavior for \\ and \/ in Gemma-token strings.
+							buf.WriteByte('\\')
+							buf.WriteByte(next)
+							i += 2
+							continue
+						}
+					} else {
+						switch next {
+						case '"', '\\', '/':
+							// Preserve valid JSON escapes that are already in raw-quoted strings.
+							buf.WriteByte('\\')
+							buf.WriteByte(next)
+							i += 2
+							continue
+						}
+					}
+				}
+				// Unknown escape sequence: treat backslash as a literal character.
 				buf.WriteString(`\\`)
+			case '"':
+				if stringMode == stringModeRawQuote {
+					stringMode = stringModeNone
+					buf.WriteByte('"')
+				} else {
+					// In Gemma-token strings, raw double quotes are string content.
+					buf.WriteString(`\"`)
+				}
 			case '\n':
 				buf.WriteString(`\n`)
 			case '\r':
@@ -389,7 +447,7 @@ func gemma4ArgsToJSON(s string) string {
 			continue
 		}

-		if !inString && isIdentStart(ch) {
+		if isIdentStart(ch) {
 			j := i + 1
 			for j < len(s) && isIdentPart(s[j]) {
 				j++
--- a/model/parsers/gemma4_test.go
+++ b/model/parsers/gemma4_test.go
@@ -133,6 +133,20 @@ func TestGemma4Parser(t *testing.T) {
 				},
 			},
 		},
+		{
+			name:  "tool_call_with_array_of_multiple_gemma_quoted_strings",
+			input: `<|tool_call>call:process{items:[<|"|>a<|"|>,<|"|>b "quoted"<|"|>,<|"|>c<|"|>]}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "process",
+						Arguments: testArgs(map[string]any{
+							"items": []any{"a", `b "quoted"`, "c"},
+						}),
+					},
+				},
+			},
+		},
 		{
 			name: "tool_call_with_multiline_string_arg",
 			input: `<|tool_call>call:bash{command:<|"|>date
@@ -148,6 +162,128 @@ func TestGemma4Parser(t *testing.T) {
 				},
 			},
 		},
+		{
+			name:  "tool_call_with_escaped_double_quotes_in_string_arg",
+			input: `<|tool_call>call:search{query:<|"|>say \"hello\"<|"|>}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "search",
+						Arguments: testArgs(map[string]any{
+							"query": `say \"hello\"`,
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:  "tool_call_with_unescaped_double_quotes_in_string_arg",
+			input: `<|tool_call>call:search{query:<|"|>say "hello"<|"|>}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "search",
+						Arguments: testArgs(map[string]any{
+							"query": `say "hello"`,
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:  "tool_call_with_multiple_unescaped_double_quote_segments",
+			input: `<|tool_call>call:search{query:<|"|>say "hello", then "goodbye"<|"|>}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "search",
+						Arguments: testArgs(map[string]any{
+							"query": `say "hello", then "goodbye"`,
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:  "tool_call_with_mixed_escaped_and_unescaped_double_quotes",
+			input: `<|tool_call>call:search{query:<|"|>first \"quoted\" then "raw"<|"|>}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "search",
+						Arguments: testArgs(map[string]any{
+							"query": `first \"quoted\" then "raw"`,
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:  "tool_call_done_flush_without_close_tag_with_unescaped_double_quotes",
+			input: `<|tool_call>call:search{query:<|"|>say "hello" and "bye"<|"|>}`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "search",
+						Arguments: testArgs(map[string]any{
+							"query": `say "hello" and "bye"`,
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:  "tool_call_with_mixed_raw_and_gemma_quoted_values",
+			input: `<|tool_call>call:search{query:"raw \"quoted\"",note:<|"|>gemma "quoted"<|"|>}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "search",
+						Arguments: testArgs(map[string]any{
+							"query": `raw "quoted"`,
+							"note":  `gemma "quoted"`,
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:  "tool_call_with_array_of_objects_and_mixed_quotes",
+			input: `<|tool_call>call:plan{steps:[{title:<|"|>step "one"<|"|>,done:false},{title:<|"|>step \"two\"<|"|>,done:true}]}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "plan",
+						Arguments: testArgs(map[string]any{
+							"steps": []any{
+								map[string]any{
+									"title": `step "one"`,
+									"done":  false,
+								},
+								map[string]any{
+									"title": `step \"two\"`,
+									"done":  true,
+								},
+							},
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:  "tool_call_with_windows_path_single_backslashes",
+			input: `<|tool_call>call:open_file{path:<|"|>C:\users\bob\file.txt<|"|>}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "open_file",
+						Arguments: testArgs(map[string]any{
+							"path": `C:\users\bob\file.txt`,
+						}),
+					},
+				},
+			},
+		},
 		{
 			name:  "multiple_tool_calls",
 			input: `<|tool_call>call:get_weather{location:<|"|>Paris<|"|>}<tool_call|><|tool_call>call:get_weather{location:<|"|>London<|"|>}<tool_call|>`,
@@ -410,6 +546,11 @@ func TestGemma4ArgsToJSON(t *testing.T) {
 			input:    `{items:[<|"|>a<|"|>,<|"|>b<|"|>]}`,
 			expected: `{"items":["a","b"]}`,
 		},
+		{
+			name:     "array_value_with_multiple_gemma_quoted_strings",
+			input:    `{items:[<|"|>a<|"|>,<|"|>b "quoted"<|"|>,<|"|>c<|"|>]}`,
+			expected: `{"items":["a","b \"quoted\"","c"]}`,
+		},
 		{
 			name:     "empty_object",
 			input:    `{}`,
@@ -431,6 +572,71 @@ func TestGemma4ArgsToJSON(t *testing.T) {
 <|"|>}`,
 			expected: `{"command":"date\n"}`,
 		},
+		{
+			name:     "string_value_with_escaped_double_quotes",
+			input:    `{query:<|"|>say \"hello\"<|"|>}`,
+			expected: `{"query":"say \\\"hello\\\""}`,
+		},
+		{
+			name:     "string_value_with_unescaped_double_quotes",
+			input:    `{query:<|"|>say "hello"<|"|>}`,
+			expected: `{"query":"say \"hello\""}`,
+		},
+		{
+			name:     "string_value_with_multiple_unescaped_double_quote_segments",
+			input:    `{query:<|"|>say "hello", then "goodbye"<|"|>}`,
+			expected: `{"query":"say \"hello\", then \"goodbye\""}`,
+		},
+		{
+			name:     "string_value_with_mixed_escaped_and_unescaped_double_quotes",
+			input:    `{query:<|"|>first \"quoted\" then "raw"<|"|>}`,
+			expected: `{"query":"first \\\"quoted\\\" then \"raw\""}`,
+		},
+		{
+			name:     "string_value_with_punctuation_and_structural_chars",
+			input:    `{query:<|"|>a,b:{c}[d]<|"|>}`,
+			expected: `{"query":"a,b:{c}[d]"}`,
+		},
+		{
+			name:     "string_value_with_windows_path_backslashes",
+			input:    `{path:<|"|>C:\\Temp\\file.txt<|"|>}`,
+			expected: `{"path":"C:\\Temp\\file.txt"}`,
+		},
+		{
+			name:     "string_value_with_windows_path_single_backslashes",
+			input:    `{path:<|"|>C:\users\bob<|"|>}`,
+			expected: `{"path":"C:\\users\\bob"}`,
+		},
+		{
+			name:     "string_value_with_escaped_forward_slashes",
+			input:    `{url:<|"|>https:\/\/example.com\/a<|"|>}`,
+			expected: `{"url":"https:\/\/example.com\/a"}`,
+		},
+		{
+			name:     "string_value_with_unicode_escape_sequence",
+			input:    `{s:<|"|>snowman:\u2603<|"|>}`,
+			expected: `{"s":"snowman:\\u2603"}`,
+		},
+		{
+			name:     "string_value_with_unknown_escape_sequence",
+			input:    `{s:<|"|>bad \x escape<|"|>}`,
+			expected: `{"s":"bad \\x escape"}`,
+		},
+		{
+			name:     "string_value_with_invalid_unicode_escape_sequence",
+			input:    `{s:<|"|>bad \uZZZZ escape<|"|>}`,
+			expected: `{"s":"bad \\uZZZZ escape"}`,
+		},
+		{
+			name:     "raw_quoted_string_with_escaped_quotes",
+			input:    `{q:"say \"hi\" and \"bye\""}`,
+			expected: `{"q":"say \"hi\" and \"bye\""}`,
+		},
+		{
+			name:     "nested_mixed_raw_and_gemma_quoted_values",
+			input:    `{meta:{title:<|"|>t "1"<|"|>,note:"n \"2\""},items:[<|"|>x "3"<|"|>,"y \"4\""]}`,
+			expected: `{"meta":{"title":"t \"1\"","note":"n \"2\""},"items":["x \"3\"","y \"4\""]}`,
+		},
 	}

 	for _, tt := range tests {