mirror of
https://github.com/ollama/ollama.git
synced 2026-04-17 22:54:05 +02:00
gemma4: be less strict about whitespace before bare keys (#15494)
This commit is contained in:
@@ -32,7 +32,6 @@ const (
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
gemma4QuotedStringRe = regexp.MustCompile(`(?s)<\|"\|>(.*?)<\|"\|>`)
|
gemma4QuotedStringRe = regexp.MustCompile(`(?s)<\|"\|>(.*?)<\|"\|>`)
|
||||||
gemma4BareKeyRe = regexp.MustCompile(`([,{])(\w+):`)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Gemma4Parser struct {
|
type Gemma4Parser struct {
|
||||||
@@ -421,7 +420,7 @@ func gemma4ArgsToJSON(s string) string {
|
|||||||
return "\x00" + string(rune(len(quotedStrings)-1)) + "\x00"
|
return "\x00" + string(rune(len(quotedStrings)-1)) + "\x00"
|
||||||
})
|
})
|
||||||
|
|
||||||
text = gemma4BareKeyRe.ReplaceAllString(text, `$1"$2":`)
|
text = quoteGemma4BareKeys(text)
|
||||||
|
|
||||||
for i, value := range quotedStrings {
|
for i, value := range quotedStrings {
|
||||||
escaped, _ := json.Marshal(value)
|
escaped, _ := json.Marshal(value)
|
||||||
@@ -431,6 +430,58 @@ func gemma4ArgsToJSON(s string) string {
|
|||||||
return text
|
return text
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func quoteGemma4BareKeys(s string) string {
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.Grow(len(s) + 16)
|
||||||
|
|
||||||
|
for i := 0; i < len(s); {
|
||||||
|
if s[i] == '"' {
|
||||||
|
if end := gemma4JSONQuotedStringEnd(s, i); end != -1 {
|
||||||
|
sb.WriteString(s[i:end])
|
||||||
|
i = end
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if s[i] != '{' && s[i] != ',' {
|
||||||
|
sb.WriteByte(s[i])
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.WriteByte(s[i])
|
||||||
|
i++
|
||||||
|
|
||||||
|
spaceStart := i
|
||||||
|
i = gemma4SkipSpace(s, i)
|
||||||
|
sb.WriteString(s[spaceStart:i])
|
||||||
|
|
||||||
|
keyEnd := gemma4BareKeyEnd(s, i)
|
||||||
|
if keyEnd > i && keyEnd < len(s) && s[keyEnd] == ':' {
|
||||||
|
sb.WriteByte('"')
|
||||||
|
sb.WriteString(s[i:keyEnd])
|
||||||
|
sb.WriteByte('"')
|
||||||
|
sb.WriteByte(':')
|
||||||
|
i = keyEnd + 1
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func gemma4BareKeyEnd(s string, start int) int {
|
||||||
|
i := start
|
||||||
|
for i < len(s) {
|
||||||
|
r, size := utf8.DecodeRuneInString(s[i:])
|
||||||
|
if !(r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
i += size
|
||||||
|
}
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
|
||||||
// repairGemma4ToolCallArgs is a best-effort repair after strict parsing fails.
|
// repairGemma4ToolCallArgs is a best-effort repair after strict parsing fails.
|
||||||
// For example, if the model emits an unclosed gemma string as the last value,
|
// For example, if the model emits an unclosed gemma string as the last value,
|
||||||
// we can repair it by closing it with the gemma string delimiter.
|
// we can repair it by closing it with the gemma string delimiter.
|
||||||
|
|||||||
@@ -218,6 +218,34 @@ func TestGemma4Parser(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "tool_call_exec_with_embedded_quoted_path",
|
||||||
|
input: `<|tool_call>call:exec{command:<|"|>ls -F "vault/"<|"|>}<tool_call|>`,
|
||||||
|
expectedToolCalls: []api.ToolCall{
|
||||||
|
{
|
||||||
|
Function: api.ToolCallFunction{
|
||||||
|
Name: "exec",
|
||||||
|
Arguments: testArgs(map[string]any{
|
||||||
|
"command": `ls -F "vault/"`,
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tool_call_exec_with_embedded_quoted_url",
|
||||||
|
input: `<|tool_call>call:exec{command:<|"|>fetch "https://ollama.com/library/gemma4" --extract<|"|>}<tool_call|>`,
|
||||||
|
expectedToolCalls: []api.ToolCall{
|
||||||
|
{
|
||||||
|
Function: api.ToolCallFunction{
|
||||||
|
Name: "exec",
|
||||||
|
Arguments: testArgs(map[string]any{
|
||||||
|
"command": `fetch "https://ollama.com/library/gemma4" --extract`,
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "tool_call_done_flush_without_close_tag_with_unescaped_double_quotes",
|
name: "tool_call_done_flush_without_close_tag_with_unescaped_double_quotes",
|
||||||
input: `<|tool_call>call:search{query:<|"|>say "hello" and "bye"<|"|>}`,
|
input: `<|tool_call>call:search{query:<|"|>say "hello" and "bye"<|"|>}`,
|
||||||
@@ -270,6 +298,71 @@ func TestGemma4Parser(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "tool_call_edit_with_array_of_objects_multiline_markdown_and_quotes",
|
||||||
|
input: `<|tool_call>call:edit{edits:[
|
||||||
|
{newText:<|"|># Gemma4 + openclaude speed optimization guide
|
||||||
|
|
||||||
|
Use "nvfp4" only after validating tool calls.
|
||||||
|
<|"|>,oldText:<|"|># Gemma4 + openclaude speed optimization guide
|
||||||
|
|
||||||
|
Use the default quantization.
|
||||||
|
<|"|>},
|
||||||
|
{newText:<|"|>## 14. Methods tried but not adopted
|
||||||
|
|
||||||
|
### 1. Model quantization
|
||||||
|
Do not enable "mxfp8" by default.
|
||||||
|
<|"|>,oldText:<|"|>## 14. Methods tried but not adopted
|
||||||
|
<|"|>}
|
||||||
|
]}<tool_call|>`,
|
||||||
|
expectedToolCalls: []api.ToolCall{
|
||||||
|
{
|
||||||
|
Function: api.ToolCallFunction{
|
||||||
|
Name: "edit",
|
||||||
|
Arguments: testArgs(map[string]any{
|
||||||
|
"edits": []any{
|
||||||
|
map[string]any{
|
||||||
|
"newText": "# Gemma4 + openclaude speed optimization guide\n\nUse \"nvfp4\" only after validating tool calls.\n",
|
||||||
|
"oldText": "# Gemma4 + openclaude speed optimization guide\n\nUse the default quantization.\n",
|
||||||
|
},
|
||||||
|
map[string]any{
|
||||||
|
"newText": "## 14. Methods tried but not adopted\n\n### 1. Model quantization\nDo not enable \"mxfp8\" by default.\n",
|
||||||
|
"oldText": "## 14. Methods tried but not adopted\n",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tool_call_edit_with_array_of_objects_issue_comment_spacing",
|
||||||
|
input: `<|tool_call>call:edit{edits:[
|
||||||
|
{newText:<|"|># Gemma4 + openclaude speed optimization guide...<|"|>,
|
||||||
|
oldText:<|"|># Gemma4 + openclaude speed optimization guide...<|"|>},
|
||||||
|
{newText:<|"|>## 14. Methods tried but not adopted\n\n### 1. Model quantization...<|"|>,
|
||||||
|
oldText:<|"|>## 14. Methods tried but not adopted\n<|"|>}
|
||||||
|
]}<tool_call|>`,
|
||||||
|
expectedToolCalls: []api.ToolCall{
|
||||||
|
{
|
||||||
|
Function: api.ToolCallFunction{
|
||||||
|
Name: "edit",
|
||||||
|
Arguments: testArgs(map[string]any{
|
||||||
|
"edits": []any{
|
||||||
|
map[string]any{
|
||||||
|
"newText": "# Gemma4 + openclaude speed optimization guide...",
|
||||||
|
"oldText": "# Gemma4 + openclaude speed optimization guide...",
|
||||||
|
},
|
||||||
|
map[string]any{
|
||||||
|
"newText": "## 14. Methods tried but not adopted\\n\\n### 1. Model quantization...",
|
||||||
|
"oldText": "## 14. Methods tried but not adopted\\n",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "tool_call_with_windows_path_single_backslashes",
|
name: "tool_call_with_windows_path_single_backslashes",
|
||||||
input: `<|tool_call>call:open_file{path:<|"|>C:\users\bob\file.txt<|"|>}<tool_call|>`,
|
input: `<|tool_call>call:open_file{path:<|"|>C:\users\bob\file.txt<|"|>}<tool_call|>`,
|
||||||
@@ -685,6 +778,27 @@ func TestGemma4ArgsToJSON(t *testing.T) {
|
|||||||
input: `{config:{enabled:true,name:<|"|>test<|"|>}}`,
|
input: `{config:{enabled:true,name:<|"|>test<|"|>}}`,
|
||||||
expected: `{"config":{"enabled":true,"name":"test"}}`,
|
expected: `{"config":{"enabled":true,"name":"test"}}`,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "nested_object_with_space_after_object_open_and_before_bare_key",
|
||||||
|
input: `{edits:[{ newText:<|"|>a<|"|>, oldText:<|"|>b<|"|>} ]}`,
|
||||||
|
expected: `{"edits":[{ "newText":"a", "oldText":"b"} ]}`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "nested_object_with_space_before_bare_key",
|
||||||
|
input: `{edits:[{newText:<|"|>a<|"|>, oldText:<|"|>b<|"|>} ]}`,
|
||||||
|
expected: `{"edits":[{"newText":"a", "oldText":"b"} ]}`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "nested_object_with_newline_before_bare_key",
|
||||||
|
input: `{edits:[
|
||||||
|
{newText:<|"|>a<|"|>,
|
||||||
|
oldText:<|"|>b<|"|>}
|
||||||
|
]}`,
|
||||||
|
expected: `{"edits":[
|
||||||
|
{"newText":"a",
|
||||||
|
"oldText":"b"}
|
||||||
|
]}`,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "array_value",
|
name: "array_value",
|
||||||
input: `{items:[<|"|>a<|"|>,<|"|>b<|"|>]}`,
|
input: `{items:[<|"|>a<|"|>,<|"|>b<|"|>]}`,
|
||||||
@@ -776,6 +890,16 @@ func TestGemma4ArgsToJSON(t *testing.T) {
|
|||||||
input: `{q:"say \"hi\" and \"bye\""}`,
|
input: `{q:"say \"hi\" and \"bye\""}`,
|
||||||
expected: `{"q":"say \"hi\" and \"bye\""}`,
|
expected: `{"q":"say \"hi\" and \"bye\""}`,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "raw_quoted_string_containing_key_like_text",
|
||||||
|
input: `{q:"keep , oldText: literal",note:<|"|>ok<|"|>}`,
|
||||||
|
expected: `{"q":"keep , oldText: literal","note":"ok"}`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "raw_quoted_string_with_escaped_quotes_and_key_like_text",
|
||||||
|
input: `{q:"keep , oldText: and \"quoted\" text",note:<|"|>ok<|"|>}`,
|
||||||
|
expected: `{"q":"keep , oldText: and \"quoted\" text","note":"ok"}`,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "nested_mixed_raw_and_gemma_quoted_values",
|
name: "nested_mixed_raw_and_gemma_quoted_values",
|
||||||
input: `{meta:{title:<|"|>t "1"<|"|>,note:"n \"2\""},items:[<|"|>x "3"<|"|>,"y \"4\""]}`,
|
input: `{meta:{title:<|"|>t "1"<|"|>,note:"n \"2\""},items:[<|"|>x "3"<|"|>,"y \"4\""]}`,
|
||||||
@@ -1321,9 +1445,22 @@ func TestParseGemma4ToolCall_DoesNotRepairNonTerminalUnclosedGemmaString(t *test
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseGemma4ToolCall_InvalidRawQuotedStructuralString(t *testing.T) {
|
func TestParseGemma4ToolCall_RawQuotedStructuralString(t *testing.T) {
|
||||||
_, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`, nil)
|
got, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`, nil)
|
||||||
if err == nil {
|
if err != nil {
|
||||||
t.Fatal("expected parseGemma4ToolCall to reject raw-quoted strings with structural text that the reference implementation does not support")
|
t.Fatalf("parseGemma4ToolCall returned error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
want := api.ToolCall{
|
||||||
|
Function: api.ToolCallFunction{
|
||||||
|
Name: "foo",
|
||||||
|
Arguments: testArgs(map[string]any{
|
||||||
|
"q": "a,b:c",
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if diff := cmp.Diff(want, got, argsComparer); diff != "" {
|
||||||
|
t.Fatalf("tool call mismatch (-want +got):\n%s", diff)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user