mirror of
https://github.com/ollama/ollama.git
synced 2026-04-17 15:53:27 +02:00
model/parsers: fix gemma4 arg parsing when quoted strings contain " (#15254)
* model/parsers: fix gemma4 arg parsing when quoted strings contain " Fixes: #15241 * add more tests, be careful about what we escape We want Windows-style paths to not get misinterpreted * fix backslash-quote case, it really should be a literal backslash h/t to @chathaway-codes for pointing this out! Co-Authored-By: Charles H <2773397+chathaway-codes@users.noreply.github.com> --------- Co-authored-by: Charles H <2773397+chathaway-codes@users.noreply.github.com>
This commit is contained in:
@@ -345,27 +345,85 @@ func parseGemma4ToolCall(content string) (api.ToolCall, error) {
|
||||
|
||||
// gemma4ArgsToJSON converts Gemma 4's custom argument format to valid JSON.
|
||||
func gemma4ArgsToJSON(s string) string {
|
||||
s = strings.ReplaceAll(s, `<|"|>`, `"`)
|
||||
const quoteToken = `<|"|>`
|
||||
|
||||
var buf strings.Builder
|
||||
buf.Grow(len(s) + 32)
|
||||
inString := false
|
||||
const (
|
||||
stringModeNone = iota
|
||||
stringModeGemmaToken
|
||||
stringModeRawQuote
|
||||
)
|
||||
|
||||
stringMode := stringModeNone
|
||||
hex := "0123456789abcdef"
|
||||
i := 0
|
||||
for i < len(s) {
|
||||
if strings.HasPrefix(s[i:], quoteToken) {
|
||||
if stringMode == stringModeGemmaToken {
|
||||
stringMode = stringModeNone
|
||||
} else if stringMode == stringModeNone {
|
||||
stringMode = stringModeGemmaToken
|
||||
} else {
|
||||
// In a raw-quote string, treat the Gemma quote token literally.
|
||||
buf.WriteString(quoteToken)
|
||||
i += len(quoteToken)
|
||||
continue
|
||||
}
|
||||
buf.WriteByte('"')
|
||||
i += len(quoteToken)
|
||||
continue
|
||||
}
|
||||
|
||||
ch := s[i]
|
||||
|
||||
if ch == '"' {
|
||||
inString = !inString
|
||||
if stringMode == stringModeNone && ch == '"' {
|
||||
stringMode = stringModeRawQuote
|
||||
buf.WriteByte('"')
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
if inString {
|
||||
if stringMode != stringModeNone {
|
||||
switch ch {
|
||||
case '\\':
|
||||
if i+1 < len(s) {
|
||||
next := s[i+1]
|
||||
if stringMode == stringModeGemmaToken {
|
||||
switch next {
|
||||
case '"':
|
||||
// In Gemma-token strings, preserve \" as two literal characters.
|
||||
buf.WriteString(`\\\"`)
|
||||
i += 2
|
||||
continue
|
||||
case '\\', '/':
|
||||
// Keep existing behavior for \\ and \/ in Gemma-token strings.
|
||||
buf.WriteByte('\\')
|
||||
buf.WriteByte(next)
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
switch next {
|
||||
case '"', '\\', '/':
|
||||
// Preserve valid JSON escapes that are already in raw-quoted strings.
|
||||
buf.WriteByte('\\')
|
||||
buf.WriteByte(next)
|
||||
i += 2
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
// Unknown escape sequence: treat backslash as a literal character.
|
||||
buf.WriteString(`\\`)
|
||||
case '"':
|
||||
if stringMode == stringModeRawQuote {
|
||||
stringMode = stringModeNone
|
||||
buf.WriteByte('"')
|
||||
} else {
|
||||
// In Gemma-token strings, raw double quotes are string content.
|
||||
buf.WriteString(`\"`)
|
||||
}
|
||||
case '\n':
|
||||
buf.WriteString(`\n`)
|
||||
case '\r':
|
||||
@@ -389,7 +447,7 @@ func gemma4ArgsToJSON(s string) string {
|
||||
continue
|
||||
}
|
||||
|
||||
if !inString && isIdentStart(ch) {
|
||||
if isIdentStart(ch) {
|
||||
j := i + 1
|
||||
for j < len(s) && isIdentPart(s[j]) {
|
||||
j++
|
||||
|
||||
@@ -133,6 +133,20 @@ func TestGemma4Parser(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_with_array_of_multiple_gemma_quoted_strings",
|
||||
input: `<|tool_call>call:process{items:[<|"|>a<|"|>,<|"|>b "quoted"<|"|>,<|"|>c<|"|>]}<tool_call|>`,
|
||||
expectedToolCalls: []api.ToolCall{
|
||||
{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "process",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"items": []any{"a", `b "quoted"`, "c"},
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_with_multiline_string_arg",
|
||||
input: `<|tool_call>call:bash{command:<|"|>date
|
||||
@@ -148,6 +162,128 @@ func TestGemma4Parser(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_with_escaped_double_quotes_in_string_arg",
|
||||
input: `<|tool_call>call:search{query:<|"|>say \"hello\"<|"|>}<tool_call|>`,
|
||||
expectedToolCalls: []api.ToolCall{
|
||||
{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "search",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"query": `say \"hello\"`,
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_with_unescaped_double_quotes_in_string_arg",
|
||||
input: `<|tool_call>call:search{query:<|"|>say "hello"<|"|>}<tool_call|>`,
|
||||
expectedToolCalls: []api.ToolCall{
|
||||
{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "search",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"query": `say "hello"`,
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_with_multiple_unescaped_double_quote_segments",
|
||||
input: `<|tool_call>call:search{query:<|"|>say "hello", then "goodbye"<|"|>}<tool_call|>`,
|
||||
expectedToolCalls: []api.ToolCall{
|
||||
{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "search",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"query": `say "hello", then "goodbye"`,
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_with_mixed_escaped_and_unescaped_double_quotes",
|
||||
input: `<|tool_call>call:search{query:<|"|>first \"quoted\" then "raw"<|"|>}<tool_call|>`,
|
||||
expectedToolCalls: []api.ToolCall{
|
||||
{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "search",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"query": `first \"quoted\" then "raw"`,
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_done_flush_without_close_tag_with_unescaped_double_quotes",
|
||||
input: `<|tool_call>call:search{query:<|"|>say "hello" and "bye"<|"|>}`,
|
||||
expectedToolCalls: []api.ToolCall{
|
||||
{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "search",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"query": `say "hello" and "bye"`,
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_with_mixed_raw_and_gemma_quoted_values",
|
||||
input: `<|tool_call>call:search{query:"raw \"quoted\"",note:<|"|>gemma "quoted"<|"|>}<tool_call|>`,
|
||||
expectedToolCalls: []api.ToolCall{
|
||||
{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "search",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"query": `raw "quoted"`,
|
||||
"note": `gemma "quoted"`,
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_with_array_of_objects_and_mixed_quotes",
|
||||
input: `<|tool_call>call:plan{steps:[{title:<|"|>step "one"<|"|>,done:false},{title:<|"|>step \"two\"<|"|>,done:true}]}<tool_call|>`,
|
||||
expectedToolCalls: []api.ToolCall{
|
||||
{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "plan",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"steps": []any{
|
||||
map[string]any{
|
||||
"title": `step "one"`,
|
||||
"done": false,
|
||||
},
|
||||
map[string]any{
|
||||
"title": `step \"two\"`,
|
||||
"done": true,
|
||||
},
|
||||
},
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_with_windows_path_single_backslashes",
|
||||
input: `<|tool_call>call:open_file{path:<|"|>C:\users\bob\file.txt<|"|>}<tool_call|>`,
|
||||
expectedToolCalls: []api.ToolCall{
|
||||
{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "open_file",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"path": `C:\users\bob\file.txt`,
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "multiple_tool_calls",
|
||||
input: `<|tool_call>call:get_weather{location:<|"|>Paris<|"|>}<tool_call|><|tool_call>call:get_weather{location:<|"|>London<|"|>}<tool_call|>`,
|
||||
@@ -410,6 +546,11 @@ func TestGemma4ArgsToJSON(t *testing.T) {
|
||||
input: `{items:[<|"|>a<|"|>,<|"|>b<|"|>]}`,
|
||||
expected: `{"items":["a","b"]}`,
|
||||
},
|
||||
{
|
||||
name: "array_value_with_multiple_gemma_quoted_strings",
|
||||
input: `{items:[<|"|>a<|"|>,<|"|>b "quoted"<|"|>,<|"|>c<|"|>]}`,
|
||||
expected: `{"items":["a","b \"quoted\"","c"]}`,
|
||||
},
|
||||
{
|
||||
name: "empty_object",
|
||||
input: `{}`,
|
||||
@@ -431,6 +572,71 @@ func TestGemma4ArgsToJSON(t *testing.T) {
|
||||
<|"|>}`,
|
||||
expected: `{"command":"date\n"}`,
|
||||
},
|
||||
{
|
||||
name: "string_value_with_escaped_double_quotes",
|
||||
input: `{query:<|"|>say \"hello\"<|"|>}`,
|
||||
expected: `{"query":"say \\\"hello\\\""}`,
|
||||
},
|
||||
{
|
||||
name: "string_value_with_unescaped_double_quotes",
|
||||
input: `{query:<|"|>say "hello"<|"|>}`,
|
||||
expected: `{"query":"say \"hello\""}`,
|
||||
},
|
||||
{
|
||||
name: "string_value_with_multiple_unescaped_double_quote_segments",
|
||||
input: `{query:<|"|>say "hello", then "goodbye"<|"|>}`,
|
||||
expected: `{"query":"say \"hello\", then \"goodbye\""}`,
|
||||
},
|
||||
{
|
||||
name: "string_value_with_mixed_escaped_and_unescaped_double_quotes",
|
||||
input: `{query:<|"|>first \"quoted\" then "raw"<|"|>}`,
|
||||
expected: `{"query":"first \\\"quoted\\\" then \"raw\""}`,
|
||||
},
|
||||
{
|
||||
name: "string_value_with_punctuation_and_structural_chars",
|
||||
input: `{query:<|"|>a,b:{c}[d]<|"|>}`,
|
||||
expected: `{"query":"a,b:{c}[d]"}`,
|
||||
},
|
||||
{
|
||||
name: "string_value_with_windows_path_backslashes",
|
||||
input: `{path:<|"|>C:\\Temp\\file.txt<|"|>}`,
|
||||
expected: `{"path":"C:\\Temp\\file.txt"}`,
|
||||
},
|
||||
{
|
||||
name: "string_value_with_windows_path_single_backslashes",
|
||||
input: `{path:<|"|>C:\users\bob<|"|>}`,
|
||||
expected: `{"path":"C:\\users\\bob"}`,
|
||||
},
|
||||
{
|
||||
name: "string_value_with_escaped_forward_slashes",
|
||||
input: `{url:<|"|>https:\/\/example.com\/a<|"|>}`,
|
||||
expected: `{"url":"https:\/\/example.com\/a"}`,
|
||||
},
|
||||
{
|
||||
name: "string_value_with_unicode_escape_sequence",
|
||||
input: `{s:<|"|>snowman:\u2603<|"|>}`,
|
||||
expected: `{"s":"snowman:\\u2603"}`,
|
||||
},
|
||||
{
|
||||
name: "string_value_with_unknown_escape_sequence",
|
||||
input: `{s:<|"|>bad \x escape<|"|>}`,
|
||||
expected: `{"s":"bad \\x escape"}`,
|
||||
},
|
||||
{
|
||||
name: "string_value_with_invalid_unicode_escape_sequence",
|
||||
input: `{s:<|"|>bad \uZZZZ escape<|"|>}`,
|
||||
expected: `{"s":"bad \\uZZZZ escape"}`,
|
||||
},
|
||||
{
|
||||
name: "raw_quoted_string_with_escaped_quotes",
|
||||
input: `{q:"say \"hi\" and \"bye\""}`,
|
||||
expected: `{"q":"say \"hi\" and \"bye\""}`,
|
||||
},
|
||||
{
|
||||
name: "nested_mixed_raw_and_gemma_quoted_values",
|
||||
input: `{meta:{title:<|"|>t "1"<|"|>,note:"n \"2\""},items:[<|"|>x "3"<|"|>,"y \"4\""]}`,
|
||||
expected: `{"meta":{"title":"t \"1\"","note":"n \"2\""},"items":["x \"3\"","y \"4\""]}`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
||||
Reference in New Issue
Block a user