gemma4: be less strict about whitespace before bare keys (#15494)

2026-04-17 22:54:05 +02:00 · 2026-04-11 16:30:27 -07:00
parent 40a1317dfd
commit 9330bb9120
2 changed files with 194 additions and 6 deletions
--- a/model/parsers/gemma4.go
+++ b/model/parsers/gemma4.go
@@ -32,7 +32,6 @@ const (
 var (
 	gemma4QuotedStringRe = regexp.MustCompile(`(?s)<\|"\|>(.*?)<\|"\|>`)
 	gemma4BareKeyRe      = regexp.MustCompile(`([,{])(\w+):`)
 )
 type Gemma4Parser struct {
@@ -421,7 +420,7 @@ func gemma4ArgsToJSON(s string) string {
 		return "\x00" + string(rune(len(quotedStrings)-1)) + "\x00"
 	})
-	text = gemma4BareKeyRe.ReplaceAllString(text, `$1"$2":`)
+	text = quoteGemma4BareKeys(text)
 	for i, value := range quotedStrings {
 		escaped, _ := json.Marshal(value)
@@ -431,6 +430,58 @@ func gemma4ArgsToJSON(s string) string {
 	return text
 }
 func quoteGemma4BareKeys(s string) string {
 	var sb strings.Builder
 	sb.Grow(len(s) + 16)
 	for i := 0; i < len(s); {
 		if s[i] == '"' {
 			if end := gemma4JSONQuotedStringEnd(s, i); end != -1 {
 				sb.WriteString(s[i:end])
 				i = end
 				continue
 			}
 		}
 		if s[i] != '{' && s[i] != ',' {
 			sb.WriteByte(s[i])
 			i++
 			continue
 		}
 		sb.WriteByte(s[i])
 		i++
 		spaceStart := i
 		i = gemma4SkipSpace(s, i)
 		sb.WriteString(s[spaceStart:i])
 		keyEnd := gemma4BareKeyEnd(s, i)
 		if keyEnd > i && keyEnd < len(s) && s[keyEnd] == ':' {
 			sb.WriteByte('"')
 			sb.WriteString(s[i:keyEnd])
 			sb.WriteByte('"')
 			sb.WriteByte(':')
 			i = keyEnd + 1
 			continue
 		}
 	}
 	return sb.String()
 }
 func gemma4BareKeyEnd(s string, start int) int {
 	i := start
 	for i < len(s) {
 		r, size := utf8.DecodeRuneInString(s[i:])
 		if !(r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)) {
 			break
 		}
 		i += size
 	}
 	return i
 }
 // repairGemma4ToolCallArgs is a best-effort repair after strict parsing fails.
 // For example, if the model emits an unclosed gemma string as the last value,
 // we can repair it by closing it with the gemma string delimiter.
--- a/model/parsers/gemma4_test.go
+++ b/model/parsers/gemma4_test.go
@@ -218,6 +218,34 @@ func TestGemma4Parser(t *testing.T) {
 				},
 			},
 		},
 		{
 			name:  "tool_call_exec_with_embedded_quoted_path",
 			input: `<|tool_call>call:exec{command:<|"|>ls -F "vault/"<|"|>}<tool_call|>`,
 			expectedToolCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name: "exec",
 						Arguments: testArgs(map[string]any{
 							"command": `ls -F "vault/"`,
 						}),
 					},
 				},
 			},
 		},
 		{
 			name:  "tool_call_exec_with_embedded_quoted_url",
 			input: `<|tool_call>call:exec{command:<|"|>fetch "https://ollama.com/library/gemma4" --extract<|"|>}<tool_call|>`,
 			expectedToolCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name: "exec",
 						Arguments: testArgs(map[string]any{
 							"command": `fetch "https://ollama.com/library/gemma4" --extract`,
 						}),
 					},
 				},
 			},
 		},
 		{
 			name:  "tool_call_done_flush_without_close_tag_with_unescaped_double_quotes",
 			input: `<|tool_call>call:search{query:<|"|>say "hello" and "bye"<|"|>}`,
@@ -270,6 +298,71 @@ func TestGemma4Parser(t *testing.T) {
 				},
 			},
 		},
 		{
 			name: "tool_call_edit_with_array_of_objects_multiline_markdown_and_quotes",
 			input: `<|tool_call>call:edit{edits:[
  {newText:<|"|># Gemma4 + openclaude speed optimization guide
 Use "nvfp4" only after validating tool calls.
 <|"|>,oldText:<|"|># Gemma4 + openclaude speed optimization guide
 Use the default quantization.
 <|"|>},
  {newText:<|"|>## 14. Methods tried but not adopted
 ### 1. Model quantization
 Do not enable "mxfp8" by default.
 <|"|>,oldText:<|"|>## 14. Methods tried but not adopted
 <|"|>}
 ]}<tool_call|>`,
 			expectedToolCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name: "edit",
 						Arguments: testArgs(map[string]any{
 							"edits": []any{
 								map[string]any{
 									"newText": "# Gemma4 + openclaude speed optimization guide\n\nUse \"nvfp4\" only after validating tool calls.\n",
 									"oldText": "# Gemma4 + openclaude speed optimization guide\n\nUse the default quantization.\n",
 								},
 								map[string]any{
 									"newText": "## 14. Methods tried but not adopted\n\n### 1. Model quantization\nDo not enable \"mxfp8\" by default.\n",
 									"oldText": "## 14. Methods tried but not adopted\n",
 								},
 							},
 						}),
 					},
 				},
 			},
 		},
 		{
 			name: "tool_call_edit_with_array_of_objects_issue_comment_spacing",
 			input: `<|tool_call>call:edit{edits:[
  {newText:<|"|># Gemma4 + openclaude speed optimization guide...<|"|>,
   oldText:<|"|># Gemma4 + openclaude speed optimization guide...<|"|>},
  {newText:<|"|>## 14. Methods tried but not adopted\n\n### 1. Model quantization...<|"|>,
   oldText:<|"|>## 14. Methods tried but not adopted\n<|"|>}
 ]}<tool_call|>`,
 			expectedToolCalls: []api.ToolCall{
 				{
 					Function: api.ToolCallFunction{
 						Name: "edit",
 						Arguments: testArgs(map[string]any{
 							"edits": []any{
 								map[string]any{
 									"newText": "# Gemma4 + openclaude speed optimization guide...",
 									"oldText": "# Gemma4 + openclaude speed optimization guide...",
 								},
 								map[string]any{
 									"newText": "## 14. Methods tried but not adopted\\n\\n### 1. Model quantization...",
 									"oldText": "## 14. Methods tried but not adopted\\n",
 								},
 							},
 						}),
 					},
 				},
 			},
 		},
 		{
 			name:  "tool_call_with_windows_path_single_backslashes",
 			input: `<|tool_call>call:open_file{path:<|"|>C:\users\bob\file.txt<|"|>}<tool_call|>`,
@@ -685,6 +778,27 @@ func TestGemma4ArgsToJSON(t *testing.T) {
 			input:    `{config:{enabled:true,name:<|"|>test<|"|>}}`,
 			expected: `{"config":{"enabled":true,"name":"test"}}`,
 		},
 		{
 			name:     "nested_object_with_space_after_object_open_and_before_bare_key",
 			input:    `{edits:[{ newText:<|"|>a<|"|>, oldText:<|"|>b<|"|>} ]}`,
 			expected: `{"edits":[{ "newText":"a", "oldText":"b"} ]}`,
 		},
 		{
 			name:     "nested_object_with_space_before_bare_key",
 			input:    `{edits:[{newText:<|"|>a<|"|>, oldText:<|"|>b<|"|>} ]}`,
 			expected: `{"edits":[{"newText":"a", "oldText":"b"} ]}`,
 		},
 		{
 			name: "nested_object_with_newline_before_bare_key",
 			input: `{edits:[
  {newText:<|"|>a<|"|>,
   oldText:<|"|>b<|"|>}
 ]}`,
 			expected: `{"edits":[
  {"newText":"a",
   "oldText":"b"}
 ]}`,
 		},
 		{
 			name:     "array_value",
 			input:    `{items:[<|"|>a<|"|>,<|"|>b<|"|>]}`,
@@ -776,6 +890,16 @@ func TestGemma4ArgsToJSON(t *testing.T) {
 			input:    `{q:"say \"hi\" and \"bye\""}`,
 			expected: `{"q":"say \"hi\" and \"bye\""}`,
 		},
 		{
 			name:     "raw_quoted_string_containing_key_like_text",
 			input:    `{q:"keep , oldText: literal",note:<|"|>ok<|"|>}`,
 			expected: `{"q":"keep , oldText: literal","note":"ok"}`,
 		},
 		{
 			name:     "raw_quoted_string_with_escaped_quotes_and_key_like_text",
 			input:    `{q:"keep , oldText: and \"quoted\" text",note:<|"|>ok<|"|>}`,
 			expected: `{"q":"keep , oldText: and \"quoted\" text","note":"ok"}`,
 		},
 		{
 			name:     "nested_mixed_raw_and_gemma_quoted_values",
 			input:    `{meta:{title:<|"|>t "1"<|"|>,note:"n \"2\""},items:[<|"|>x "3"<|"|>,"y \"4\""]}`,
@@ -1321,9 +1445,22 @@ func TestParseGemma4ToolCall_DoesNotRepairNonTerminalUnclosedGemmaString(t *test
 	}
 }
-func TestParseGemma4ToolCall_InvalidRawQuotedStructuralString(t *testing.T) {
+func TestParseGemma4ToolCall_RawQuotedStructuralString(t *testing.T) {
-	_, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`, nil)
+	got, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`, nil)
-	if err == nil {
+	if err != nil {
-		t.Fatal("expected parseGemma4ToolCall to reject raw-quoted strings with structural text that the reference implementation does not support")
+		t.Fatalf("parseGemma4ToolCall returned error: %v", err)
 	}
 	want := api.ToolCall{
 		Function: api.ToolCallFunction{
 			Name: "foo",
 			Arguments: testArgs(map[string]any{
 				"q": "a,b:c",
 			}),
 		},
 	}
 	if diff := cmp.Diff(want, got, argsComparer); diff != "" {
 		t.Fatalf("tool call mismatch (-want +got):\n%s", diff)
 	}
 }