model/parsers: fix gemma4 arg parsing when quoted strings contain " (#15254)

* model/parsers: fix gemma4 arg parsing when quoted strings contain "

Fixes: #15241

* add more tests, be careful about what we escape

We want Windows-style paths to not get misinterpreted

* fix backslash-quote case, it really should be a literal backslash

h/t to @chathaway-codes for pointing this out!

Co-Authored-By: Charles H <2773397+chathaway-codes@users.noreply.github.com>

---------

Co-authored-by: Charles H <2773397+chathaway-codes@users.noreply.github.com>
This commit is contained in:
Devon Rifkin
2026-04-02 22:52:51 -07:00
committed by GitHub
parent 3536ef58f6
commit 036ed1b9b5
2 changed files with 270 additions and 6 deletions

View File

@@ -345,27 +345,85 @@ func parseGemma4ToolCall(content string) (api.ToolCall, error) {
// gemma4ArgsToJSON converts Gemma 4's custom argument format to valid JSON.
func gemma4ArgsToJSON(s string) string {
s = strings.ReplaceAll(s, `<|"|>`, `"`)
const quoteToken = `<|"|>`
var buf strings.Builder
buf.Grow(len(s) + 32)
inString := false
const (
stringModeNone = iota
stringModeGemmaToken
stringModeRawQuote
)
stringMode := stringModeNone
hex := "0123456789abcdef"
i := 0
for i < len(s) {
if strings.HasPrefix(s[i:], quoteToken) {
if stringMode == stringModeGemmaToken {
stringMode = stringModeNone
} else if stringMode == stringModeNone {
stringMode = stringModeGemmaToken
} else {
// In a raw-quote string, treat the Gemma quote token literally.
buf.WriteString(quoteToken)
i += len(quoteToken)
continue
}
buf.WriteByte('"')
i += len(quoteToken)
continue
}
ch := s[i]
if ch == '"' {
inString = !inString
if stringMode == stringModeNone && ch == '"' {
stringMode = stringModeRawQuote
buf.WriteByte('"')
i++
continue
}
if inString {
if stringMode != stringModeNone {
switch ch {
case '\\':
if i+1 < len(s) {
next := s[i+1]
if stringMode == stringModeGemmaToken {
switch next {
case '"':
// In Gemma-token strings, preserve \" as two literal characters.
buf.WriteString(`\\\"`)
i += 2
continue
case '\\', '/':
// Keep existing behavior for \\ and \/ in Gemma-token strings.
buf.WriteByte('\\')
buf.WriteByte(next)
i += 2
continue
}
} else {
switch next {
case '"', '\\', '/':
// Preserve valid JSON escapes that are already in raw-quoted strings.
buf.WriteByte('\\')
buf.WriteByte(next)
i += 2
continue
}
}
}
// Unknown escape sequence: treat backslash as a literal character.
buf.WriteString(`\\`)
case '"':
if stringMode == stringModeRawQuote {
stringMode = stringModeNone
buf.WriteByte('"')
} else {
// In Gemma-token strings, raw double quotes are string content.
buf.WriteString(`\"`)
}
case '\n':
buf.WriteString(`\n`)
case '\r':
@@ -389,7 +447,7 @@ func gemma4ArgsToJSON(s string) string {
continue
}
if !inString && isIdentStart(ch) {
if isIdentStart(ch) {
j := i + 1
for j < len(s) && isIdentPart(s[j]) {
j++

View File

@@ -133,6 +133,20 @@ func TestGemma4Parser(t *testing.T) {
},
},
},
{
name: "tool_call_with_array_of_multiple_gemma_quoted_strings",
input: `<|tool_call>call:process{items:[<|"|>a<|"|>,<|"|>b "quoted"<|"|>,<|"|>c<|"|>]}<tool_call|>`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "process",
Arguments: testArgs(map[string]any{
"items": []any{"a", `b "quoted"`, "c"},
}),
},
},
},
},
{
name: "tool_call_with_multiline_string_arg",
input: `<|tool_call>call:bash{command:<|"|>date
@@ -148,6 +162,128 @@ func TestGemma4Parser(t *testing.T) {
},
},
},
{
name: "tool_call_with_escaped_double_quotes_in_string_arg",
input: `<|tool_call>call:search{query:<|"|>say \"hello\"<|"|>}<tool_call|>`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "search",
Arguments: testArgs(map[string]any{
"query": `say \"hello\"`,
}),
},
},
},
},
{
name: "tool_call_with_unescaped_double_quotes_in_string_arg",
input: `<|tool_call>call:search{query:<|"|>say "hello"<|"|>}<tool_call|>`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "search",
Arguments: testArgs(map[string]any{
"query": `say "hello"`,
}),
},
},
},
},
{
name: "tool_call_with_multiple_unescaped_double_quote_segments",
input: `<|tool_call>call:search{query:<|"|>say "hello", then "goodbye"<|"|>}<tool_call|>`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "search",
Arguments: testArgs(map[string]any{
"query": `say "hello", then "goodbye"`,
}),
},
},
},
},
{
name: "tool_call_with_mixed_escaped_and_unescaped_double_quotes",
input: `<|tool_call>call:search{query:<|"|>first \"quoted\" then "raw"<|"|>}<tool_call|>`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "search",
Arguments: testArgs(map[string]any{
"query": `first \"quoted\" then "raw"`,
}),
},
},
},
},
{
name: "tool_call_done_flush_without_close_tag_with_unescaped_double_quotes",
input: `<|tool_call>call:search{query:<|"|>say "hello" and "bye"<|"|>}`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "search",
Arguments: testArgs(map[string]any{
"query": `say "hello" and "bye"`,
}),
},
},
},
},
{
name: "tool_call_with_mixed_raw_and_gemma_quoted_values",
input: `<|tool_call>call:search{query:"raw \"quoted\"",note:<|"|>gemma "quoted"<|"|>}<tool_call|>`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "search",
Arguments: testArgs(map[string]any{
"query": `raw "quoted"`,
"note": `gemma "quoted"`,
}),
},
},
},
},
{
name: "tool_call_with_array_of_objects_and_mixed_quotes",
input: `<|tool_call>call:plan{steps:[{title:<|"|>step "one"<|"|>,done:false},{title:<|"|>step \"two\"<|"|>,done:true}]}<tool_call|>`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "plan",
Arguments: testArgs(map[string]any{
"steps": []any{
map[string]any{
"title": `step "one"`,
"done": false,
},
map[string]any{
"title": `step \"two\"`,
"done": true,
},
},
}),
},
},
},
},
{
name: "tool_call_with_windows_path_single_backslashes",
input: `<|tool_call>call:open_file{path:<|"|>C:\users\bob\file.txt<|"|>}<tool_call|>`,
expectedToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "open_file",
Arguments: testArgs(map[string]any{
"path": `C:\users\bob\file.txt`,
}),
},
},
},
},
{
name: "multiple_tool_calls",
input: `<|tool_call>call:get_weather{location:<|"|>Paris<|"|>}<tool_call|><|tool_call>call:get_weather{location:<|"|>London<|"|>}<tool_call|>`,
@@ -410,6 +546,11 @@ func TestGemma4ArgsToJSON(t *testing.T) {
input: `{items:[<|"|>a<|"|>,<|"|>b<|"|>]}`,
expected: `{"items":["a","b"]}`,
},
{
name: "array_value_with_multiple_gemma_quoted_strings",
input: `{items:[<|"|>a<|"|>,<|"|>b "quoted"<|"|>,<|"|>c<|"|>]}`,
expected: `{"items":["a","b \"quoted\"","c"]}`,
},
{
name: "empty_object",
input: `{}`,
@@ -431,6 +572,71 @@ func TestGemma4ArgsToJSON(t *testing.T) {
<|"|>}`,
expected: `{"command":"date\n"}`,
},
{
name: "string_value_with_escaped_double_quotes",
input: `{query:<|"|>say \"hello\"<|"|>}`,
expected: `{"query":"say \\\"hello\\\""}`,
},
{
name: "string_value_with_unescaped_double_quotes",
input: `{query:<|"|>say "hello"<|"|>}`,
expected: `{"query":"say \"hello\""}`,
},
{
name: "string_value_with_multiple_unescaped_double_quote_segments",
input: `{query:<|"|>say "hello", then "goodbye"<|"|>}`,
expected: `{"query":"say \"hello\", then \"goodbye\""}`,
},
{
name: "string_value_with_mixed_escaped_and_unescaped_double_quotes",
input: `{query:<|"|>first \"quoted\" then "raw"<|"|>}`,
expected: `{"query":"first \\\"quoted\\\" then \"raw\""}`,
},
{
name: "string_value_with_punctuation_and_structural_chars",
input: `{query:<|"|>a,b:{c}[d]<|"|>}`,
expected: `{"query":"a,b:{c}[d]"}`,
},
{
name: "string_value_with_windows_path_backslashes",
input: `{path:<|"|>C:\\Temp\\file.txt<|"|>}`,
expected: `{"path":"C:\\Temp\\file.txt"}`,
},
{
name: "string_value_with_windows_path_single_backslashes",
input: `{path:<|"|>C:\users\bob<|"|>}`,
expected: `{"path":"C:\\users\\bob"}`,
},
{
name: "string_value_with_escaped_forward_slashes",
input: `{url:<|"|>https:\/\/example.com\/a<|"|>}`,
expected: `{"url":"https:\/\/example.com\/a"}`,
},
{
name: "string_value_with_unicode_escape_sequence",
input: `{s:<|"|>snowman:\u2603<|"|>}`,
expected: `{"s":"snowman:\\u2603"}`,
},
{
name: "string_value_with_unknown_escape_sequence",
input: `{s:<|"|>bad \x escape<|"|>}`,
expected: `{"s":"bad \\x escape"}`,
},
{
name: "string_value_with_invalid_unicode_escape_sequence",
input: `{s:<|"|>bad \uZZZZ escape<|"|>}`,
expected: `{"s":"bad \\uZZZZ escape"}`,
},
{
name: "raw_quoted_string_with_escaped_quotes",
input: `{q:"say \"hi\" and \"bye\""}`,
expected: `{"q":"say \"hi\" and \"bye\""}`,
},
{
name: "nested_mixed_raw_and_gemma_quoted_values",
input: `{meta:{title:<|"|>t "1"<|"|>,note:"n \"2\""},items:[<|"|>x "3"<|"|>,"y \"4\""]}`,
expected: `{"meta":{"title":"t \"1\"","note":"n \"2\""},"items":["x \"3\"","y \"4\""]}`,
},
}
for _, tt := range tests {