gemma4: be less strict about whitespace before bare keys (#15494)

2026-04-17 19:54:03 +02:00 · 2026-04-11 16:30:27 -07:00
parent 40a1317dfd
commit 9330bb9120
2 changed files with 194 additions and 6 deletions
--- a/model/parsers/gemma4.go
+++ b/model/parsers/gemma4.go
@@ -32,7 +32,6 @@ const (

 var (
 	gemma4QuotedStringRe = regexp.MustCompile(`(?s)<\|"\|>(.*?)<\|"\|>`)
-	gemma4BareKeyRe      = regexp.MustCompile(`([,{])(\w+):`)
 )

 type Gemma4Parser struct {
@@ -421,7 +420,7 @@ func gemma4ArgsToJSON(s string) string {
 		return "\x00" + string(rune(len(quotedStrings)-1)) + "\x00"
 	})

-	text = gemma4BareKeyRe.ReplaceAllString(text, `$1"$2":`)
+	text = quoteGemma4BareKeys(text)

 	for i, value := range quotedStrings {
 		escaped, _ := json.Marshal(value)
@@ -431,6 +430,58 @@ func gemma4ArgsToJSON(s string) string {
 	return text
 }

+func quoteGemma4BareKeys(s string) string {
+	var sb strings.Builder
+	sb.Grow(len(s) + 16)
+
+	for i := 0; i < len(s); {
+		if s[i] == '"' {
+			if end := gemma4JSONQuotedStringEnd(s, i); end != -1 {
+				sb.WriteString(s[i:end])
+				i = end
+				continue
+			}
+		}
+
+		if s[i] != '{' && s[i] != ',' {
+			sb.WriteByte(s[i])
+			i++
+			continue
+		}
+
+		sb.WriteByte(s[i])
+		i++
+
+		spaceStart := i
+		i = gemma4SkipSpace(s, i)
+		sb.WriteString(s[spaceStart:i])
+
+		keyEnd := gemma4BareKeyEnd(s, i)
+		if keyEnd > i && keyEnd < len(s) && s[keyEnd] == ':' {
+			sb.WriteByte('"')
+			sb.WriteString(s[i:keyEnd])
+			sb.WriteByte('"')
+			sb.WriteByte(':')
+			i = keyEnd + 1
+			continue
+		}
+	}
+
+	return sb.String()
+}
+
+func gemma4BareKeyEnd(s string, start int) int {
+	i := start
+	for i < len(s) {
+		r, size := utf8.DecodeRuneInString(s[i:])
+		if !(r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)) {
+			break
+		}
+		i += size
+	}
+	return i
+}
+
 // repairGemma4ToolCallArgs is a best-effort repair after strict parsing fails.
 // For example, if the model emits an unclosed gemma string as the last value,
 // we can repair it by closing it with the gemma string delimiter.
--- a/model/parsers/gemma4_test.go
+++ b/model/parsers/gemma4_test.go
@@ -218,6 +218,34 @@ func TestGemma4Parser(t *testing.T) {
 				},
 			},
 		},
+		{
+			name:  "tool_call_exec_with_embedded_quoted_path",
+			input: `<|tool_call>call:exec{command:<|"|>ls -F "vault/"<|"|>}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "exec",
+						Arguments: testArgs(map[string]any{
+							"command": `ls -F "vault/"`,
+						}),
+					},
+				},
+			},
+		},
+		{
+			name:  "tool_call_exec_with_embedded_quoted_url",
+			input: `<|tool_call>call:exec{command:<|"|>fetch "https://ollama.com/library/gemma4" --extract<|"|>}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "exec",
+						Arguments: testArgs(map[string]any{
+							"command": `fetch "https://ollama.com/library/gemma4" --extract`,
+						}),
+					},
+				},
+			},
+		},
 		{
 			name:  "tool_call_done_flush_without_close_tag_with_unescaped_double_quotes",
 			input: `<|tool_call>call:search{query:<|"|>say "hello" and "bye"<|"|>}`,
@@ -270,6 +298,71 @@ func TestGemma4Parser(t *testing.T) {
 				},
 			},
 		},
+		{
+			name: "tool_call_edit_with_array_of_objects_multiline_markdown_and_quotes",
+			input: `<|tool_call>call:edit{edits:[
+  {newText:<|"|># Gemma4 + openclaude speed optimization guide
+
+Use "nvfp4" only after validating tool calls.
+<|"|>,oldText:<|"|># Gemma4 + openclaude speed optimization guide
+
+Use the default quantization.
+<|"|>},
+  {newText:<|"|>## 14. Methods tried but not adopted
+
+### 1. Model quantization
+Do not enable "mxfp8" by default.
+<|"|>,oldText:<|"|>## 14. Methods tried but not adopted
+<|"|>}
+]}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "edit",
+						Arguments: testArgs(map[string]any{
+							"edits": []any{
+								map[string]any{
+									"newText": "# Gemma4 + openclaude speed optimization guide\n\nUse \"nvfp4\" only after validating tool calls.\n",
+									"oldText": "# Gemma4 + openclaude speed optimization guide\n\nUse the default quantization.\n",
+								},
+								map[string]any{
+									"newText": "## 14. Methods tried but not adopted\n\n### 1. Model quantization\nDo not enable \"mxfp8\" by default.\n",
+									"oldText": "## 14. Methods tried but not adopted\n",
+								},
+							},
+						}),
+					},
+				},
+			},
+		},
+		{
+			name: "tool_call_edit_with_array_of_objects_issue_comment_spacing",
+			input: `<|tool_call>call:edit{edits:[
+  {newText:<|"|># Gemma4 + openclaude speed optimization guide...<|"|>,
+   oldText:<|"|># Gemma4 + openclaude speed optimization guide...<|"|>},
+  {newText:<|"|>## 14. Methods tried but not adopted\n\n### 1. Model quantization...<|"|>,
+   oldText:<|"|>## 14. Methods tried but not adopted\n<|"|>}
+]}<tool_call|>`,
+			expectedToolCalls: []api.ToolCall{
+				{
+					Function: api.ToolCallFunction{
+						Name: "edit",
+						Arguments: testArgs(map[string]any{
+							"edits": []any{
+								map[string]any{
+									"newText": "# Gemma4 + openclaude speed optimization guide...",
+									"oldText": "# Gemma4 + openclaude speed optimization guide...",
+								},
+								map[string]any{
+									"newText": "## 14. Methods tried but not adopted\\n\\n### 1. Model quantization...",
+									"oldText": "## 14. Methods tried but not adopted\\n",
+								},
+							},
+						}),
+					},
+				},
+			},
+		},
 		{
 			name:  "tool_call_with_windows_path_single_backslashes",
 			input: `<|tool_call>call:open_file{path:<|"|>C:\users\bob\file.txt<|"|>}<tool_call|>`,
@@ -685,6 +778,27 @@ func TestGemma4ArgsToJSON(t *testing.T) {
 			input:    `{config:{enabled:true,name:<|"|>test<|"|>}}`,
 			expected: `{"config":{"enabled":true,"name":"test"}}`,
 		},
+		{
+			name:     "nested_object_with_space_after_object_open_and_before_bare_key",
+			input:    `{edits:[{ newText:<|"|>a<|"|>, oldText:<|"|>b<|"|>} ]}`,
+			expected: `{"edits":[{ "newText":"a", "oldText":"b"} ]}`,
+		},
+		{
+			name:     "nested_object_with_space_before_bare_key",
+			input:    `{edits:[{newText:<|"|>a<|"|>, oldText:<|"|>b<|"|>} ]}`,
+			expected: `{"edits":[{"newText":"a", "oldText":"b"} ]}`,
+		},
+		{
+			name: "nested_object_with_newline_before_bare_key",
+			input: `{edits:[
+  {newText:<|"|>a<|"|>,
+   oldText:<|"|>b<|"|>}
+]}`,
+			expected: `{"edits":[
+  {"newText":"a",
+   "oldText":"b"}
+]}`,
+		},
 		{
 			name:     "array_value",
 			input:    `{items:[<|"|>a<|"|>,<|"|>b<|"|>]}`,
@@ -776,6 +890,16 @@ func TestGemma4ArgsToJSON(t *testing.T) {
 			input:    `{q:"say \"hi\" and \"bye\""}`,
 			expected: `{"q":"say \"hi\" and \"bye\""}`,
 		},
+		{
+			name:     "raw_quoted_string_containing_key_like_text",
+			input:    `{q:"keep , oldText: literal",note:<|"|>ok<|"|>}`,
+			expected: `{"q":"keep , oldText: literal","note":"ok"}`,
+		},
+		{
+			name:     "raw_quoted_string_with_escaped_quotes_and_key_like_text",
+			input:    `{q:"keep , oldText: and \"quoted\" text",note:<|"|>ok<|"|>}`,
+			expected: `{"q":"keep , oldText: and \"quoted\" text","note":"ok"}`,
+		},
 		{
 			name:     "nested_mixed_raw_and_gemma_quoted_values",
 			input:    `{meta:{title:<|"|>t "1"<|"|>,note:"n \"2\""},items:[<|"|>x "3"<|"|>,"y \"4\""]}`,
@@ -1321,9 +1445,22 @@ func TestParseGemma4ToolCall_DoesNotRepairNonTerminalUnclosedGemmaString(t *test
 	}
 }

-func TestParseGemma4ToolCall_InvalidRawQuotedStructuralString(t *testing.T) {
-	_, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`, nil)
-	if err == nil {
-		t.Fatal("expected parseGemma4ToolCall to reject raw-quoted strings with structural text that the reference implementation does not support")
+func TestParseGemma4ToolCall_RawQuotedStructuralString(t *testing.T) {
+	got, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`, nil)
+	if err != nil {
+		t.Fatalf("parseGemma4ToolCall returned error: %v", err)
+	}
+
+	want := api.ToolCall{
+		Function: api.ToolCallFunction{
+			Name: "foo",
+			Arguments: testArgs(map[string]any{
+				"q": "a,b:c",
+			}),
+		},
+	}
+
+	if diff := cmp.Diff(want, got, argsComparer); diff != "" {
+		t.Fatalf("tool call mismatch (-want +got):\n%s", diff)
 	}
 }