mirror of
https://github.com/ollama/ollama.git
synced 2026-04-17 19:54:03 +02:00
gemma4: be less strict about whitespace before bare keys (#15494)
This commit is contained in:
@@ -32,7 +32,6 @@ const (
|
||||
|
||||
var (
|
||||
gemma4QuotedStringRe = regexp.MustCompile(`(?s)<\|"\|>(.*?)<\|"\|>`)
|
||||
gemma4BareKeyRe = regexp.MustCompile(`([,{])(\w+):`)
|
||||
)
|
||||
|
||||
type Gemma4Parser struct {
|
||||
@@ -421,7 +420,7 @@ func gemma4ArgsToJSON(s string) string {
|
||||
return "\x00" + string(rune(len(quotedStrings)-1)) + "\x00"
|
||||
})
|
||||
|
||||
text = gemma4BareKeyRe.ReplaceAllString(text, `$1"$2":`)
|
||||
text = quoteGemma4BareKeys(text)
|
||||
|
||||
for i, value := range quotedStrings {
|
||||
escaped, _ := json.Marshal(value)
|
||||
@@ -431,6 +430,58 @@ func gemma4ArgsToJSON(s string) string {
|
||||
return text
|
||||
}
|
||||
|
||||
func quoteGemma4BareKeys(s string) string {
|
||||
var sb strings.Builder
|
||||
sb.Grow(len(s) + 16)
|
||||
|
||||
for i := 0; i < len(s); {
|
||||
if s[i] == '"' {
|
||||
if end := gemma4JSONQuotedStringEnd(s, i); end != -1 {
|
||||
sb.WriteString(s[i:end])
|
||||
i = end
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if s[i] != '{' && s[i] != ',' {
|
||||
sb.WriteByte(s[i])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
sb.WriteByte(s[i])
|
||||
i++
|
||||
|
||||
spaceStart := i
|
||||
i = gemma4SkipSpace(s, i)
|
||||
sb.WriteString(s[spaceStart:i])
|
||||
|
||||
keyEnd := gemma4BareKeyEnd(s, i)
|
||||
if keyEnd > i && keyEnd < len(s) && s[keyEnd] == ':' {
|
||||
sb.WriteByte('"')
|
||||
sb.WriteString(s[i:keyEnd])
|
||||
sb.WriteByte('"')
|
||||
sb.WriteByte(':')
|
||||
i = keyEnd + 1
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func gemma4BareKeyEnd(s string, start int) int {
|
||||
i := start
|
||||
for i < len(s) {
|
||||
r, size := utf8.DecodeRuneInString(s[i:])
|
||||
if !(r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)) {
|
||||
break
|
||||
}
|
||||
i += size
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// repairGemma4ToolCallArgs is a best-effort repair after strict parsing fails.
|
||||
// For example, if the model emits an unclosed gemma string as the last value,
|
||||
// we can repair it by closing it with the gemma string delimiter.
|
||||
|
||||
@@ -218,6 +218,34 @@ func TestGemma4Parser(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_exec_with_embedded_quoted_path",
|
||||
input: `<|tool_call>call:exec{command:<|"|>ls -F "vault/"<|"|>}<tool_call|>`,
|
||||
expectedToolCalls: []api.ToolCall{
|
||||
{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "exec",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"command": `ls -F "vault/"`,
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_exec_with_embedded_quoted_url",
|
||||
input: `<|tool_call>call:exec{command:<|"|>fetch "https://ollama.com/library/gemma4" --extract<|"|>}<tool_call|>`,
|
||||
expectedToolCalls: []api.ToolCall{
|
||||
{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "exec",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"command": `fetch "https://ollama.com/library/gemma4" --extract`,
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_done_flush_without_close_tag_with_unescaped_double_quotes",
|
||||
input: `<|tool_call>call:search{query:<|"|>say "hello" and "bye"<|"|>}`,
|
||||
@@ -270,6 +298,71 @@ func TestGemma4Parser(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_edit_with_array_of_objects_multiline_markdown_and_quotes",
|
||||
input: `<|tool_call>call:edit{edits:[
|
||||
{newText:<|"|># Gemma4 + openclaude speed optimization guide
|
||||
|
||||
Use "nvfp4" only after validating tool calls.
|
||||
<|"|>,oldText:<|"|># Gemma4 + openclaude speed optimization guide
|
||||
|
||||
Use the default quantization.
|
||||
<|"|>},
|
||||
{newText:<|"|>## 14. Methods tried but not adopted
|
||||
|
||||
### 1. Model quantization
|
||||
Do not enable "mxfp8" by default.
|
||||
<|"|>,oldText:<|"|>## 14. Methods tried but not adopted
|
||||
<|"|>}
|
||||
]}<tool_call|>`,
|
||||
expectedToolCalls: []api.ToolCall{
|
||||
{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "edit",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"edits": []any{
|
||||
map[string]any{
|
||||
"newText": "# Gemma4 + openclaude speed optimization guide\n\nUse \"nvfp4\" only after validating tool calls.\n",
|
||||
"oldText": "# Gemma4 + openclaude speed optimization guide\n\nUse the default quantization.\n",
|
||||
},
|
||||
map[string]any{
|
||||
"newText": "## 14. Methods tried but not adopted\n\n### 1. Model quantization\nDo not enable \"mxfp8\" by default.\n",
|
||||
"oldText": "## 14. Methods tried but not adopted\n",
|
||||
},
|
||||
},
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_edit_with_array_of_objects_issue_comment_spacing",
|
||||
input: `<|tool_call>call:edit{edits:[
|
||||
{newText:<|"|># Gemma4 + openclaude speed optimization guide...<|"|>,
|
||||
oldText:<|"|># Gemma4 + openclaude speed optimization guide...<|"|>},
|
||||
{newText:<|"|>## 14. Methods tried but not adopted\n\n### 1. Model quantization...<|"|>,
|
||||
oldText:<|"|>## 14. Methods tried but not adopted\n<|"|>}
|
||||
]}<tool_call|>`,
|
||||
expectedToolCalls: []api.ToolCall{
|
||||
{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "edit",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"edits": []any{
|
||||
map[string]any{
|
||||
"newText": "# Gemma4 + openclaude speed optimization guide...",
|
||||
"oldText": "# Gemma4 + openclaude speed optimization guide...",
|
||||
},
|
||||
map[string]any{
|
||||
"newText": "## 14. Methods tried but not adopted\\n\\n### 1. Model quantization...",
|
||||
"oldText": "## 14. Methods tried but not adopted\\n",
|
||||
},
|
||||
},
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool_call_with_windows_path_single_backslashes",
|
||||
input: `<|tool_call>call:open_file{path:<|"|>C:\users\bob\file.txt<|"|>}<tool_call|>`,
|
||||
@@ -685,6 +778,27 @@ func TestGemma4ArgsToJSON(t *testing.T) {
|
||||
input: `{config:{enabled:true,name:<|"|>test<|"|>}}`,
|
||||
expected: `{"config":{"enabled":true,"name":"test"}}`,
|
||||
},
|
||||
{
|
||||
name: "nested_object_with_space_after_object_open_and_before_bare_key",
|
||||
input: `{edits:[{ newText:<|"|>a<|"|>, oldText:<|"|>b<|"|>} ]}`,
|
||||
expected: `{"edits":[{ "newText":"a", "oldText":"b"} ]}`,
|
||||
},
|
||||
{
|
||||
name: "nested_object_with_space_before_bare_key",
|
||||
input: `{edits:[{newText:<|"|>a<|"|>, oldText:<|"|>b<|"|>} ]}`,
|
||||
expected: `{"edits":[{"newText":"a", "oldText":"b"} ]}`,
|
||||
},
|
||||
{
|
||||
name: "nested_object_with_newline_before_bare_key",
|
||||
input: `{edits:[
|
||||
{newText:<|"|>a<|"|>,
|
||||
oldText:<|"|>b<|"|>}
|
||||
]}`,
|
||||
expected: `{"edits":[
|
||||
{"newText":"a",
|
||||
"oldText":"b"}
|
||||
]}`,
|
||||
},
|
||||
{
|
||||
name: "array_value",
|
||||
input: `{items:[<|"|>a<|"|>,<|"|>b<|"|>]}`,
|
||||
@@ -776,6 +890,16 @@ func TestGemma4ArgsToJSON(t *testing.T) {
|
||||
input: `{q:"say \"hi\" and \"bye\""}`,
|
||||
expected: `{"q":"say \"hi\" and \"bye\""}`,
|
||||
},
|
||||
{
|
||||
name: "raw_quoted_string_containing_key_like_text",
|
||||
input: `{q:"keep , oldText: literal",note:<|"|>ok<|"|>}`,
|
||||
expected: `{"q":"keep , oldText: literal","note":"ok"}`,
|
||||
},
|
||||
{
|
||||
name: "raw_quoted_string_with_escaped_quotes_and_key_like_text",
|
||||
input: `{q:"keep , oldText: and \"quoted\" text",note:<|"|>ok<|"|>}`,
|
||||
expected: `{"q":"keep , oldText: and \"quoted\" text","note":"ok"}`,
|
||||
},
|
||||
{
|
||||
name: "nested_mixed_raw_and_gemma_quoted_values",
|
||||
input: `{meta:{title:<|"|>t "1"<|"|>,note:"n \"2\""},items:[<|"|>x "3"<|"|>,"y \"4\""]}`,
|
||||
@@ -1321,9 +1445,22 @@ func TestParseGemma4ToolCall_DoesNotRepairNonTerminalUnclosedGemmaString(t *test
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseGemma4ToolCall_InvalidRawQuotedStructuralString(t *testing.T) {
|
||||
_, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`, nil)
|
||||
if err == nil {
|
||||
t.Fatal("expected parseGemma4ToolCall to reject raw-quoted strings with structural text that the reference implementation does not support")
|
||||
func TestParseGemma4ToolCall_RawQuotedStructuralString(t *testing.T) {
|
||||
got, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("parseGemma4ToolCall returned error: %v", err)
|
||||
}
|
||||
|
||||
want := api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "foo",
|
||||
Arguments: testArgs(map[string]any{
|
||||
"q": "a,b:c",
|
||||
}),
|
||||
},
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(want, got, argsComparer); diff != "" {
|
||||
t.Fatalf("tool call mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user