model/parsers: rework gemma4 tool call handling (#15306)

Replace the custom Gemma4 argument normalizer with a stricter
reference-style conversion: preserve Gemma-quoted strings, quote bare
keys, and then unmarshal the result as JSON.

This keeps quoted scalars as strings, preserves typed unquoted values,
and adds test coverage for malformed raw-quoted inputs that the
reference implementation rejects.
This commit is contained in:
Devon Rifkin
2026-04-03 14:35:00 -07:00
committed by GitHub
parent 3cd2b03a5e
commit 49d5fd5a3e
2 changed files with 100 additions and 121 deletions

View File

@@ -4,6 +4,7 @@ import (
"encoding/json"
"errors"
"log/slog"
"regexp"
"strings"
"unicode"
@@ -25,6 +26,11 @@ const (
gemma4ToolCallCloseTag = "<tool_call|>"
)
var (
gemma4QuotedStringRe = regexp.MustCompile(`(?s)<\|"\|>(.*?)<\|"\|>`)
gemma4BareKeyRe = regexp.MustCompile(`([,{])(\w+):`)
)
type Gemma4Parser struct {
state Gemma4ParserState
buffer strings.Builder
@@ -345,126 +351,19 @@ func parseGemma4ToolCall(content string) (api.ToolCall, error) {
// gemma4ArgsToJSON converts Gemma 4's custom argument format to valid JSON.
func gemma4ArgsToJSON(s string) string {
const quoteToken = `<|"|>`
var quotedStrings []string
text := gemma4QuotedStringRe.ReplaceAllStringFunc(s, func(match string) string {
submatches := gemma4QuotedStringRe.FindStringSubmatch(match)
quotedStrings = append(quotedStrings, submatches[1])
return "\x00" + string(rune(len(quotedStrings)-1)) + "\x00"
})
var buf strings.Builder
buf.Grow(len(s) + 32)
const (
stringModeNone = iota
stringModeGemmaToken
stringModeRawQuote
)
text = gemma4BareKeyRe.ReplaceAllString(text, `$1"$2":`)
stringMode := stringModeNone
hex := "0123456789abcdef"
i := 0
for i < len(s) {
if strings.HasPrefix(s[i:], quoteToken) {
if stringMode == stringModeGemmaToken {
stringMode = stringModeNone
} else if stringMode == stringModeNone {
stringMode = stringModeGemmaToken
} else {
// In a raw-quote string, treat the Gemma quote token literally.
buf.WriteString(quoteToken)
i += len(quoteToken)
continue
}
buf.WriteByte('"')
i += len(quoteToken)
continue
}
ch := s[i]
if stringMode == stringModeNone && ch == '"' {
stringMode = stringModeRawQuote
buf.WriteByte('"')
i++
continue
}
if stringMode != stringModeNone {
switch ch {
case '\\':
if i+1 < len(s) {
next := s[i+1]
if stringMode == stringModeGemmaToken {
switch next {
case '"':
// In Gemma-token strings, preserve \" as two literal characters.
buf.WriteString(`\\\"`)
i += 2
continue
case '\\', '/':
// Keep existing behavior for \\ and \/ in Gemma-token strings.
buf.WriteByte('\\')
buf.WriteByte(next)
i += 2
continue
}
} else {
switch next {
case '"', '\\', '/':
// Preserve valid JSON escapes that are already in raw-quoted strings.
buf.WriteByte('\\')
buf.WriteByte(next)
i += 2
continue
}
}
}
// Unknown escape sequence: treat backslash as a literal character.
buf.WriteString(`\\`)
case '"':
if stringMode == stringModeRawQuote {
stringMode = stringModeNone
buf.WriteByte('"')
} else {
// In Gemma-token strings, raw double quotes are string content.
buf.WriteString(`\"`)
}
case '\n':
buf.WriteString(`\n`)
case '\r':
buf.WriteString(`\r`)
case '\t':
buf.WriteString(`\t`)
case '\b':
buf.WriteString(`\b`)
case '\f':
buf.WriteString(`\f`)
default:
if ch < 0x20 {
buf.WriteString(`\u00`)
buf.WriteByte(hex[ch>>4])
buf.WriteByte(hex[ch&0x0f])
} else {
buf.WriteByte(ch)
}
}
i++
continue
}
if isIdentStart(ch) {
j := i + 1
for j < len(s) && isIdentPart(s[j]) {
j++
}
word := s[i:j]
if j < len(s) && s[j] == ':' {
buf.WriteByte('"')
buf.WriteString(word)
buf.WriteByte('"')
} else {
buf.WriteString(word)
}
i = j
} else {
buf.WriteByte(ch)
i++
}
for i, value := range quotedStrings {
escaped, _ := json.Marshal(value)
text = strings.ReplaceAll(text, "\x00"+string(rune(i))+"\x00", string(escaped))
}
return buf.String()
return text
}

View File

@@ -600,7 +600,7 @@ func TestGemma4ArgsToJSON(t *testing.T) {
{
name: "string_value_with_windows_path_backslashes",
input: `{path:<|"|>C:\\Temp\\file.txt<|"|>}`,
expected: `{"path":"C:\\Temp\\file.txt"}`,
expected: `{"path":"C:\\\\Temp\\\\file.txt"}`,
},
{
name: "string_value_with_windows_path_single_backslashes",
@@ -610,7 +610,7 @@ func TestGemma4ArgsToJSON(t *testing.T) {
{
name: "string_value_with_escaped_forward_slashes",
input: `{url:<|"|>https:\/\/example.com\/a<|"|>}`,
expected: `{"url":"https:\/\/example.com\/a"}`,
expected: `{"url":"https:\\/\\/example.com\\/a"}`,
},
{
name: "string_value_with_unicode_escape_sequence",
@@ -667,3 +667,83 @@ func TestGemma4Parser_HasThinkingSupport(t *testing.T) {
t.Error("Gemma4Parser without thinking support should not report it")
}
}
func TestParseGemma4ToolCall_InvalidRawQuotedEscape(t *testing.T) {
_, err := parseGemma4ToolCall(`call:open_file{path:"C:\users\bob\file.txt"}`)
if err == nil {
t.Fatal("expected parseGemma4ToolCall to reject malformed raw-quoted JSON escapes")
}
}
func TestParseGemma4ToolCall_QuotedScalarsStayStrings(t *testing.T) {
toolCall, err := parseGemma4ToolCall(`call:foo{n:<|"|>1<|"|>,b:<|"|>true<|"|>,z:<|"|>null<|"|>}`)
if err != nil {
t.Fatalf("parseGemma4ToolCall returned error: %v", err)
}
want := api.ToolCall{
Function: api.ToolCallFunction{
Name: "foo",
Arguments: testArgs(map[string]any{
"n": "1",
"b": "true",
"z": "null",
}),
},
}
if diff := cmp.Diff(want, toolCall, argsComparer); diff != "" {
t.Fatalf("quoted scalar handling differed from the reference implementation (-want +got):\n%s", diff)
}
}
func TestParseGemma4ToolCall_UnquotedScalarsKeepStructuredTypes(t *testing.T) {
toolCall, err := parseGemma4ToolCall(`call:foo{n:1,b:true,z:null}`)
if err != nil {
t.Fatalf("parseGemma4ToolCall returned error: %v", err)
}
want := api.ToolCall{
Function: api.ToolCallFunction{
Name: "foo",
Arguments: testArgs(map[string]any{
"n": 1.0,
"b": true,
"z": nil,
}),
},
}
if diff := cmp.Diff(want, toolCall, argsComparer); diff != "" {
t.Fatalf("unquoted scalar handling differed from the reference implementation (-want +got):\n%s", diff)
}
}
func TestParseGemma4ToolCall_ReferenceImplementationExample(t *testing.T) {
toolCall, err := parseGemma4ToolCall(`call:get_current_temperature{detail_level:0,location:<|"|>Paris, France<|"|>,unit:<|"|>celsius<|"|>}`)
if err != nil {
t.Fatalf("parseGemma4ToolCall returned error: %v", err)
}
want := api.ToolCall{
Function: api.ToolCallFunction{
Name: "get_current_temperature",
Arguments: testArgs(map[string]any{
"detail_level": 0.0,
"location": "Paris, France",
"unit": "celsius",
}),
},
}
if diff := cmp.Diff(want, toolCall, argsComparer); diff != "" {
t.Fatalf("tool call handling differed from the reference implementation (-want +got):\n%s", diff)
}
}
func TestParseGemma4ToolCall_InvalidRawQuotedStructuralString(t *testing.T) {
_, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`)
if err == nil {
t.Fatal("expected parseGemma4ToolCall to reject raw-quoted strings with structural text that the reference implementation does not support")
}
}