mirror of
https://github.com/ollama/ollama.git
synced 2026-04-17 21:54:08 +02:00
model/parsers: rework gemma4 tool call handling (#15306)
Replace the custom Gemma4 argument normalizer with a stricter reference-style conversion: preserve Gemma-quoted strings, quote bare keys, and then unmarshal the result as JSON. This keeps quoted scalars as strings, preserves typed unquoted values, and adds test coverage for malformed raw-quoted inputs that the reference implementation rejects.
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"log/slog"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
@@ -25,6 +26,11 @@ const (
|
||||
gemma4ToolCallCloseTag = "<tool_call|>"
|
||||
)
|
||||
|
||||
var (
|
||||
gemma4QuotedStringRe = regexp.MustCompile(`(?s)<\|"\|>(.*?)<\|"\|>`)
|
||||
gemma4BareKeyRe = regexp.MustCompile(`([,{])(\w+):`)
|
||||
)
|
||||
|
||||
type Gemma4Parser struct {
|
||||
state Gemma4ParserState
|
||||
buffer strings.Builder
|
||||
@@ -345,126 +351,19 @@ func parseGemma4ToolCall(content string) (api.ToolCall, error) {
|
||||
|
||||
// gemma4ArgsToJSON converts Gemma 4's custom argument format to valid JSON.
|
||||
func gemma4ArgsToJSON(s string) string {
|
||||
const quoteToken = `<|"|>`
|
||||
var quotedStrings []string
|
||||
text := gemma4QuotedStringRe.ReplaceAllStringFunc(s, func(match string) string {
|
||||
submatches := gemma4QuotedStringRe.FindStringSubmatch(match)
|
||||
quotedStrings = append(quotedStrings, submatches[1])
|
||||
return "\x00" + string(rune(len(quotedStrings)-1)) + "\x00"
|
||||
})
|
||||
|
||||
var buf strings.Builder
|
||||
buf.Grow(len(s) + 32)
|
||||
const (
|
||||
stringModeNone = iota
|
||||
stringModeGemmaToken
|
||||
stringModeRawQuote
|
||||
)
|
||||
text = gemma4BareKeyRe.ReplaceAllString(text, `$1"$2":`)
|
||||
|
||||
stringMode := stringModeNone
|
||||
hex := "0123456789abcdef"
|
||||
i := 0
|
||||
for i < len(s) {
|
||||
if strings.HasPrefix(s[i:], quoteToken) {
|
||||
if stringMode == stringModeGemmaToken {
|
||||
stringMode = stringModeNone
|
||||
} else if stringMode == stringModeNone {
|
||||
stringMode = stringModeGemmaToken
|
||||
} else {
|
||||
// In a raw-quote string, treat the Gemma quote token literally.
|
||||
buf.WriteString(quoteToken)
|
||||
i += len(quoteToken)
|
||||
continue
|
||||
}
|
||||
buf.WriteByte('"')
|
||||
i += len(quoteToken)
|
||||
continue
|
||||
}
|
||||
|
||||
ch := s[i]
|
||||
|
||||
if stringMode == stringModeNone && ch == '"' {
|
||||
stringMode = stringModeRawQuote
|
||||
buf.WriteByte('"')
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
if stringMode != stringModeNone {
|
||||
switch ch {
|
||||
case '\\':
|
||||
if i+1 < len(s) {
|
||||
next := s[i+1]
|
||||
if stringMode == stringModeGemmaToken {
|
||||
switch next {
|
||||
case '"':
|
||||
// In Gemma-token strings, preserve \" as two literal characters.
|
||||
buf.WriteString(`\\\"`)
|
||||
i += 2
|
||||
continue
|
||||
case '\\', '/':
|
||||
// Keep existing behavior for \\ and \/ in Gemma-token strings.
|
||||
buf.WriteByte('\\')
|
||||
buf.WriteByte(next)
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
switch next {
|
||||
case '"', '\\', '/':
|
||||
// Preserve valid JSON escapes that are already in raw-quoted strings.
|
||||
buf.WriteByte('\\')
|
||||
buf.WriteByte(next)
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
// Unknown escape sequence: treat backslash as a literal character.
|
||||
buf.WriteString(`\\`)
|
||||
case '"':
|
||||
if stringMode == stringModeRawQuote {
|
||||
stringMode = stringModeNone
|
||||
buf.WriteByte('"')
|
||||
} else {
|
||||
// In Gemma-token strings, raw double quotes are string content.
|
||||
buf.WriteString(`\"`)
|
||||
}
|
||||
case '\n':
|
||||
buf.WriteString(`\n`)
|
||||
case '\r':
|
||||
buf.WriteString(`\r`)
|
||||
case '\t':
|
||||
buf.WriteString(`\t`)
|
||||
case '\b':
|
||||
buf.WriteString(`\b`)
|
||||
case '\f':
|
||||
buf.WriteString(`\f`)
|
||||
default:
|
||||
if ch < 0x20 {
|
||||
buf.WriteString(`\u00`)
|
||||
buf.WriteByte(hex[ch>>4])
|
||||
buf.WriteByte(hex[ch&0x0f])
|
||||
} else {
|
||||
buf.WriteByte(ch)
|
||||
}
|
||||
}
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
if isIdentStart(ch) {
|
||||
j := i + 1
|
||||
for j < len(s) && isIdentPart(s[j]) {
|
||||
j++
|
||||
}
|
||||
word := s[i:j]
|
||||
if j < len(s) && s[j] == ':' {
|
||||
buf.WriteByte('"')
|
||||
buf.WriteString(word)
|
||||
buf.WriteByte('"')
|
||||
} else {
|
||||
buf.WriteString(word)
|
||||
}
|
||||
i = j
|
||||
} else {
|
||||
buf.WriteByte(ch)
|
||||
i++
|
||||
}
|
||||
for i, value := range quotedStrings {
|
||||
escaped, _ := json.Marshal(value)
|
||||
text = strings.ReplaceAll(text, "\x00"+string(rune(i))+"\x00", string(escaped))
|
||||
}
|
||||
return buf.String()
|
||||
|
||||
return text
|
||||
}
|
||||
|
||||
@@ -600,7 +600,7 @@ func TestGemma4ArgsToJSON(t *testing.T) {
|
||||
{
|
||||
name: "string_value_with_windows_path_backslashes",
|
||||
input: `{path:<|"|>C:\\Temp\\file.txt<|"|>}`,
|
||||
expected: `{"path":"C:\\Temp\\file.txt"}`,
|
||||
expected: `{"path":"C:\\\\Temp\\\\file.txt"}`,
|
||||
},
|
||||
{
|
||||
name: "string_value_with_windows_path_single_backslashes",
|
||||
@@ -610,7 +610,7 @@ func TestGemma4ArgsToJSON(t *testing.T) {
|
||||
{
|
||||
name: "string_value_with_escaped_forward_slashes",
|
||||
input: `{url:<|"|>https:\/\/example.com\/a<|"|>}`,
|
||||
expected: `{"url":"https:\/\/example.com\/a"}`,
|
||||
expected: `{"url":"https:\\/\\/example.com\\/a"}`,
|
||||
},
|
||||
{
|
||||
name: "string_value_with_unicode_escape_sequence",
|
||||
@@ -667,3 +667,83 @@ func TestGemma4Parser_HasThinkingSupport(t *testing.T) {
|
||||
t.Error("Gemma4Parser without thinking support should not report it")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseGemma4ToolCall_InvalidRawQuotedEscape(t *testing.T) {
|
||||
_, err := parseGemma4ToolCall(`call:open_file{path:"C:\users\bob\file.txt"}`)
|
||||
if err == nil {
|
||||
t.Fatal("expected parseGemma4ToolCall to reject malformed raw-quoted JSON escapes")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseGemma4ToolCall_QuotedScalarsStayStrings(t *testing.T) {
|
||||
toolCall, err := parseGemma4ToolCall(`call:foo{n:<|"|>1<|"|>,b:<|"|>true<|"|>,z:<|"|>null<|"|>}`)
|
||||
if err != nil {
|
||||
t.Fatalf("parseGemma4ToolCall returned error: %v", err)
|
||||
}
|
||||
|
||||
want := api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "foo",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"n": "1",
|
||||
"b": "true",
|
||||
"z": "null",
|
||||
}),
|
||||
},
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(want, toolCall, argsComparer); diff != "" {
|
||||
t.Fatalf("quoted scalar handling differed from the reference implementation (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseGemma4ToolCall_UnquotedScalarsKeepStructuredTypes(t *testing.T) {
|
||||
toolCall, err := parseGemma4ToolCall(`call:foo{n:1,b:true,z:null}`)
|
||||
if err != nil {
|
||||
t.Fatalf("parseGemma4ToolCall returned error: %v", err)
|
||||
}
|
||||
|
||||
want := api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "foo",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"n": 1.0,
|
||||
"b": true,
|
||||
"z": nil,
|
||||
}),
|
||||
},
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(want, toolCall, argsComparer); diff != "" {
|
||||
t.Fatalf("unquoted scalar handling differed from the reference implementation (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseGemma4ToolCall_ReferenceImplementationExample(t *testing.T) {
|
||||
toolCall, err := parseGemma4ToolCall(`call:get_current_temperature{detail_level:0,location:<|"|>Paris, France<|"|>,unit:<|"|>celsius<|"|>}`)
|
||||
if err != nil {
|
||||
t.Fatalf("parseGemma4ToolCall returned error: %v", err)
|
||||
}
|
||||
|
||||
want := api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get_current_temperature",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"detail_level": 0.0,
|
||||
"location": "Paris, France",
|
||||
"unit": "celsius",
|
||||
}),
|
||||
},
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(want, toolCall, argsComparer); diff != "" {
|
||||
t.Fatalf("tool call handling differed from the reference implementation (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseGemma4ToolCall_InvalidRawQuotedStructuralString(t *testing.T) {
|
||||
_, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`)
|
||||
if err == nil {
|
||||
t.Fatal("expected parseGemma4ToolCall to reject raw-quoted strings with structural text that the reference implementation does not support")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user