gemma4: be less strict about whitespace before bare keys (#15494)

This commit is contained in:
Devon Rifkin
2026-04-11 16:30:27 -07:00
committed by GitHub
parent 40a1317dfd
commit 9330bb9120
2 changed files with 194 additions and 6 deletions

View File

@@ -32,7 +32,6 @@ const (
var (
gemma4QuotedStringRe = regexp.MustCompile(`(?s)<\|"\|>(.*?)<\|"\|>`)
gemma4BareKeyRe = regexp.MustCompile(`([,{])(\w+):`)
)
type Gemma4Parser struct {
@@ -421,7 +420,7 @@ func gemma4ArgsToJSON(s string) string {
return "\x00" + string(rune(len(quotedStrings)-1)) + "\x00"
})
text = gemma4BareKeyRe.ReplaceAllString(text, `$1"$2":`)
text = quoteGemma4BareKeys(text)
for i, value := range quotedStrings {
escaped, _ := json.Marshal(value)
@@ -431,6 +430,58 @@ func gemma4ArgsToJSON(s string) string {
return text
}
func quoteGemma4BareKeys(s string) string {
var sb strings.Builder
sb.Grow(len(s) + 16)
for i := 0; i < len(s); {
if s[i] == '"' {
if end := gemma4JSONQuotedStringEnd(s, i); end != -1 {
sb.WriteString(s[i:end])
i = end
continue
}
}
if s[i] != '{' && s[i] != ',' {
sb.WriteByte(s[i])
i++
continue
}
sb.WriteByte(s[i])
i++
spaceStart := i
i = gemma4SkipSpace(s, i)
sb.WriteString(s[spaceStart:i])
keyEnd := gemma4BareKeyEnd(s, i)
if keyEnd > i && keyEnd < len(s) && s[keyEnd] == ':' {
sb.WriteByte('"')
sb.WriteString(s[i:keyEnd])
sb.WriteByte('"')
sb.WriteByte(':')
i = keyEnd + 1
continue
}
}
return sb.String()
}
func gemma4BareKeyEnd(s string, start int) int {
i := start
for i < len(s) {
r, size := utf8.DecodeRuneInString(s[i:])
if !(r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)) {
break
}
i += size
}
return i
}
// repairGemma4ToolCallArgs is a best-effort repair after strict parsing fails.
// For example, if the model emits an unclosed gemma string as the last value,
// we can repair it by closing it with the gemma string delimiter.

View File

@@ -218,6 +218,34 @@ func TestGemma4Parser(t *testing.T) {
},
},
},
{
name: "tool_call_exec_with_embedded_quoted_path",
input: `<|tool_call>call:exec{command:<|"|>ls -F "vault/"<|"|>}<tool_call|>`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "exec",
Arguments: testArgs(map[string]any{
"command": `ls -F "vault/"`,
}),
},
},
},
},
{
name: "tool_call_exec_with_embedded_quoted_url",
input: `<|tool_call>call:exec{command:<|"|>fetch "https://ollama.com/library/gemma4" --extract<|"|>}<tool_call|>`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "exec",
Arguments: testArgs(map[string]any{
"command": `fetch "https://ollama.com/library/gemma4" --extract`,
}),
},
},
},
},
{
name: "tool_call_done_flush_without_close_tag_with_unescaped_double_quotes",
input: `<|tool_call>call:search{query:<|"|>say "hello" and "bye"<|"|>}`,
@@ -270,6 +298,71 @@ func TestGemma4Parser(t *testing.T) {
},
},
},
{
name: "tool_call_edit_with_array_of_objects_multiline_markdown_and_quotes",
input: `<|tool_call>call:edit{edits:[
{newText:<|"|># Gemma4 + openclaude speed optimization guide
Use "nvfp4" only after validating tool calls.
<|"|>,oldText:<|"|># Gemma4 + openclaude speed optimization guide
Use the default quantization.
<|"|>},
{newText:<|"|>## 14. Methods tried but not adopted
### 1. Model quantization
Do not enable "mxfp8" by default.
<|"|>,oldText:<|"|>## 14. Methods tried but not adopted
<|"|>}
]}<tool_call|>`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "edit",
Arguments: testArgs(map[string]any{
"edits": []any{
map[string]any{
"newText": "# Gemma4 + openclaude speed optimization guide\n\nUse \"nvfp4\" only after validating tool calls.\n",
"oldText": "# Gemma4 + openclaude speed optimization guide\n\nUse the default quantization.\n",
},
map[string]any{
"newText": "## 14. Methods tried but not adopted\n\n### 1. Model quantization\nDo not enable \"mxfp8\" by default.\n",
"oldText": "## 14. Methods tried but not adopted\n",
},
},
}),
},
},
},
},
{
name: "tool_call_edit_with_array_of_objects_issue_comment_spacing",
input: `<|tool_call>call:edit{edits:[
{newText:<|"|># Gemma4 + openclaude speed optimization guide...<|"|>,
oldText:<|"|># Gemma4 + openclaude speed optimization guide...<|"|>},
{newText:<|"|>## 14. Methods tried but not adopted\n\n### 1. Model quantization...<|"|>,
oldText:<|"|>## 14. Methods tried but not adopted\n<|"|>}
]}<tool_call|>`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "edit",
Arguments: testArgs(map[string]any{
"edits": []any{
map[string]any{
"newText": "# Gemma4 + openclaude speed optimization guide...",
"oldText": "# Gemma4 + openclaude speed optimization guide...",
},
map[string]any{
"newText": "## 14. Methods tried but not adopted\\n\\n### 1. Model quantization...",
"oldText": "## 14. Methods tried but not adopted\\n",
},
},
}),
},
},
},
},
{
name: "tool_call_with_windows_path_single_backslashes",
input: `<|tool_call>call:open_file{path:<|"|>C:\users\bob\file.txt<|"|>}<tool_call|>`,
@@ -685,6 +778,27 @@ func TestGemma4ArgsToJSON(t *testing.T) {
input: `{config:{enabled:true,name:<|"|>test<|"|>}}`,
expected: `{"config":{"enabled":true,"name":"test"}}`,
},
{
name: "nested_object_with_space_after_object_open_and_before_bare_key",
input: `{edits:[{ newText:<|"|>a<|"|>, oldText:<|"|>b<|"|>} ]}`,
expected: `{"edits":[{ "newText":"a", "oldText":"b"} ]}`,
},
{
name: "nested_object_with_space_before_bare_key",
input: `{edits:[{newText:<|"|>a<|"|>, oldText:<|"|>b<|"|>} ]}`,
expected: `{"edits":[{"newText":"a", "oldText":"b"} ]}`,
},
{
name: "nested_object_with_newline_before_bare_key",
input: `{edits:[
{newText:<|"|>a<|"|>,
oldText:<|"|>b<|"|>}
]}`,
expected: `{"edits":[
{"newText":"a",
"oldText":"b"}
]}`,
},
{
name: "array_value",
input: `{items:[<|"|>a<|"|>,<|"|>b<|"|>]}`,
@@ -776,6 +890,16 @@ func TestGemma4ArgsToJSON(t *testing.T) {
input: `{q:"say \"hi\" and \"bye\""}`,
expected: `{"q":"say \"hi\" and \"bye\""}`,
},
{
name: "raw_quoted_string_containing_key_like_text",
input: `{q:"keep , oldText: literal",note:<|"|>ok<|"|>}`,
expected: `{"q":"keep , oldText: literal","note":"ok"}`,
},
{
name: "raw_quoted_string_with_escaped_quotes_and_key_like_text",
input: `{q:"keep , oldText: and \"quoted\" text",note:<|"|>ok<|"|>}`,
expected: `{"q":"keep , oldText: and \"quoted\" text","note":"ok"}`,
},
{
name: "nested_mixed_raw_and_gemma_quoted_values",
input: `{meta:{title:<|"|>t "1"<|"|>,note:"n \"2\""},items:[<|"|>x "3"<|"|>,"y \"4\""]}`,
@@ -1321,9 +1445,22 @@ func TestParseGemma4ToolCall_DoesNotRepairNonTerminalUnclosedGemmaString(t *test
}
}
func TestParseGemma4ToolCall_InvalidRawQuotedStructuralString(t *testing.T) {
_, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`, nil)
if err == nil {
t.Fatal("expected parseGemma4ToolCall to reject raw-quoted strings with structural text that the reference implementation does not support")
func TestParseGemma4ToolCall_RawQuotedStructuralString(t *testing.T) {
got, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`, nil)
if err != nil {
t.Fatalf("parseGemma4ToolCall returned error: %v", err)
}
want := api.ToolCall{
Function: api.ToolCallFunction{
Name: "foo",
Arguments: testArgs(map[string]any{
"q": "a,b:c",
}),
},
}
if diff := cmp.Diff(want, got, argsComparer); diff != "" {
t.Fatalf("tool call mismatch (-want +got):\n%s", diff)
}
}