From 8c8f8f3450d39735355fc6cd7f2e436c8aa42ab1 Mon Sep 17 00:00:00 2001 From: Devon Rifkin Date: Mon, 6 Apr 2026 18:47:17 -0700 Subject: [PATCH] model/parsers: add gemma4 tool call repair (#15374) The existing strict gemma4 tool parser is still the primary path, but if this fails, we try to repair by fixing some of the most commonly seen mistakes these models seem to make in practice. We repair by building up a set of candidates, and use the first candidate that parses. Repairs cover: - missing Gemma string delimiters - single-quoted string values, including a dangling Gemma delimiter - raw terminal string values (if the corresponding tool schema indicates it should be a string) - missing object close only after a concrete repair Add regression coverage for malformed tool calls from issue #15315 and focused unit tests for the individual repair helpers and candidate pipeline. --- model/parsers/gemma4.go | 375 ++++++++++++++++++++++++++++- model/parsers/gemma4_test.go | 446 ++++++++++++++++++++++++++++++++++- 2 files changed, 812 insertions(+), 9 deletions(-) diff --git a/model/parsers/gemma4.go b/model/parsers/gemma4.go index 55d45b5e6..156605824 100644 --- a/model/parsers/gemma4.go +++ b/model/parsers/gemma4.go @@ -7,6 +7,7 @@ import ( "regexp" "strings" "unicode" + "unicode/utf8" "github.com/ollama/ollama/api" ) @@ -25,6 +26,7 @@ const ( gemma4ThinkingCloseTag = "" gemma4ToolCallOpenTag = "<|tool_call>" gemma4ToolCallCloseTag = "" + gemma4StringDelimiter = `<|"|>` ) var ( @@ -35,6 +37,7 @@ var ( type Gemma4Parser struct { state Gemma4ParserState buffer strings.Builder + tools []api.Tool hasThinkingSupport bool thinkingEnabled bool // true when both model supports and user requested thinking needsChannelNameStrip bool // true when we just entered thinking and need to strip "thought\n" @@ -49,6 +52,8 @@ func (p *Gemma4Parser) HasThinkingSupport() bool { } func (p *Gemma4Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool { + p.tools = tools + prefill := lastMessage != nil && lastMessage.Role == "assistant" p.thinkingEnabled = p.HasThinkingSupport() && (thinkValue != nil && thinkValue.Bool()) @@ -288,7 +293,7 @@ func (p *Gemma4Parser) eat(done bool) ([]gemma4Event, bool) { p.buffer.WriteString(remaining) p.state = Gemma4IgnoringPostToolCallNoise - if toolCall, err := parseGemma4ToolCall(toolCallContent); err == nil { + if toolCall, err := parseGemma4ToolCall(toolCallContent, p.tools); err == nil { events = append(events, gemma4EventToolCall{toolCall: toolCall}) } else { slog.Warn("gemma4 tool call parsing failed", "error", err, "content", toolCallContent) @@ -301,7 +306,7 @@ func (p *Gemma4Parser) eat(done bool) ([]gemma4Event, bool) { if done && len(bufStr) > 0 { p.buffer.Reset() p.state = Gemma4CollectingContent - if toolCall, err := parseGemma4ToolCall(bufStr); err == nil { + if toolCall, err := parseGemma4ToolCall(bufStr, p.tools); err == nil { events = append(events, gemma4EventToolCall{toolCall: toolCall}) } else { slog.Warn("gemma4 tool call flush on done failed", "error", err, "content", bufStr) @@ -350,7 +355,7 @@ func (p *Gemma4Parser) eat(done bool) ([]gemma4Event, bool) { // parseGemma4ToolCall parses a tool call in Gemma 4 format: // call:NAME{key:value,key:value} -func parseGemma4ToolCall(content string) (api.ToolCall, error) { +func parseGemma4ToolCall(content string, tools []api.Tool) (api.ToolCall, error) { // Expected format: call:NAME{args} if !strings.HasPrefix(content, "call:") { return api.ToolCall{}, errors.New("expected 'call:' prefix") @@ -371,7 +376,11 @@ func parseGemma4ToolCall(content string) (api.ToolCall, error) { var args api.ToolCallFunctionArguments if err := json.Unmarshal([]byte(jsonStr), &args); err != nil { - return api.ToolCall{}, err + repairedArgs, repairErr := repairGemma4ToolCallArgs(argsStr, toolName, tools) + if repairErr != nil { + return api.ToolCall{}, errors.Join(err, repairErr) + } + args = repairedArgs } return api.ToolCall{ @@ -400,3 +409,361 @@ func gemma4ArgsToJSON(s string) string { return text } + +// repairGemma4ToolCallArgs is a best-effort repair after strict parsing fails. +// For example, if the model emits an unclosed gemma string as the last value, +// we can repair it by closing it with the gemma string delimiter. +func repairGemma4ToolCallArgs(argsStr, toolName string, tools []api.Tool) (api.ToolCallFunctionArguments, error) { + for _, candidate := range gemma4RepairCandidates(argsStr, toolName, tools) { + jsonStr := gemma4ArgsToJSON(candidate) + + var args api.ToolCallFunctionArguments + if err := json.Unmarshal([]byte(jsonStr), &args); err == nil { + return args, nil + } + } + + return api.ToolCallFunctionArguments{}, errors.New("repair failed to produce valid JSON arguments") +} + +func gemma4ToolProperties(toolName string, tools []api.Tool) *api.ToolPropertiesMap { + for i := range tools { + if tools[i].Function.Name == toolName { + return tools[i].Function.Parameters.Properties + } + } + return nil +} + +// gemma4RepairCandidates returns the small set of repaired argument strings we +// are willing to try after strict parsing fails. Each candidate still has to +// pass the normal Gemma4-to-JSON conversion and JSON unmarshal before it is used. +func gemma4RepairCandidates(argsStr, toolName string, tools []api.Tool) []string { + seen := map[string]bool{} + var candidates []string + addCandidate := func(candidate string, allowMissingObjectClose bool) { + original := candidate + candidate = repairGemma4SingleQuotedValues(candidate) + candidate = repairGemma4MissingStringDelimiter(candidate) + if allowMissingObjectClose || candidate != original { + candidate = repairGemma4MissingObjectClose(candidate) + } + if !seen[candidate] { + candidates = append(candidates, candidate) + seen[candidate] = true + } + } + + addCandidate(argsStr, false) + if raw, ok := repairGemma4RawTerminalStringValue(argsStr, toolName, tools); ok { + addCandidate(raw, true) + } + + return candidates +} + +// repairGemma4MissingStringDelimiter closes an unbalanced Gemma string marker. +// When the value is immediately followed by a closing brace/bracket, the marker +// is inserted before that structural close rather than after it. +func repairGemma4MissingStringDelimiter(s string) string { + if strings.Count(s, gemma4StringDelimiter)%2 == 0 { + return s + } + + insertAt := gemma4TrimRightSpaceIndex(s) + if insertAt > 0 && (s[insertAt-1] == '}' || s[insertAt-1] == ']') { + insertAt-- + } + + var sb strings.Builder + sb.Grow(len(s) + len(gemma4StringDelimiter)) + sb.WriteString(s[:insertAt]) + sb.WriteString(gemma4StringDelimiter) + sb.WriteString(s[insertAt:]) + return sb.String() +} + +// repairGemma4MissingObjectClose adds a final object close after another repair +// has made a truncated object plausible. Callers decide when that guardrail is +// satisfied; this helper only performs the mechanical insertion. +func repairGemma4MissingObjectClose(s string) string { + trimmedStart := strings.TrimLeftFunc(s, unicode.IsSpace) + if !strings.HasPrefix(trimmedStart, "{") { + return s + } + + trimmedEnd := gemma4TrimRightSpaceIndex(s) + if trimmedEnd > 0 && s[trimmedEnd-1] == '}' { + return s + } + + return s[:trimmedEnd] + "}" + s[trimmedEnd:] +} + +// repairGemma4SingleQuotedValues converts single-quoted argument values into +// Gemma string-delimited values. It also drops a stray Gemma delimiter that +// sometimes appears immediately after the closing single quote. +func repairGemma4SingleQuotedValues(s string) string { + var sb strings.Builder + sb.Grow(len(s)) + + for i := 0; i < len(s); { + if strings.HasPrefix(s[i:], gemma4StringDelimiter) { + end := strings.Index(s[i+len(gemma4StringDelimiter):], gemma4StringDelimiter) + if end == -1 { + sb.WriteString(s[i:]) + break + } + + end = i + len(gemma4StringDelimiter) + end + len(gemma4StringDelimiter) + sb.WriteString(s[i:end]) + i = end + continue + } + + if s[i] == '"' { + end := gemma4JSONQuotedStringEnd(s, i) + if end != -1 { + sb.WriteString(s[i:end]) + i = end + continue + } + } + + if s[i] != ':' { + sb.WriteByte(s[i]) + i++ + continue + } + + sb.WriteByte(s[i]) + i++ + + spaceEnd := gemma4SkipSpace(s, i) + sb.WriteString(s[i:spaceEnd]) + i = spaceEnd + if i >= len(s) || s[i] != '\'' { + continue + } + + value, end, ok := gemma4SingleQuotedValue(s, i) + if !ok { + continue + } + + sb.WriteString(gemma4StringDelimiter) + sb.WriteString(value) + sb.WriteString(gemma4StringDelimiter) + i = end + if strings.HasPrefix(s[i:], gemma4StringDelimiter) { + i += len(gemma4StringDelimiter) + } + } + + return sb.String() +} + +func gemma4SingleQuotedValue(s string, start int) (string, int, bool) { + var sb strings.Builder + escaped := false + for i := start + 1; i < len(s); i++ { + if s[i] == '\'' && !escaped { + return sb.String(), i + 1, true + } + + sb.WriteByte(s[i]) + escaped = s[i] == '\\' && !escaped + if s[i] != '\\' { + escaped = false + } + } + + return "", start, false +} + +// repairGemma4RawTerminalStringValue wraps a raw value in Gemma string +// delimiters only when the tool schema says that argument is a string. This is +// deliberately schema-gated because raw text is otherwise too ambiguous. +func repairGemma4RawTerminalStringValue(argsStr, toolName string, tools []api.Tool) (string, bool) { + props := gemma4ToolProperties(toolName, tools) + if props == nil { + return "", false + } + + for key, prop := range props.All() { + if !gemma4PropertyAcceptsString(prop) { + continue + } + + if repaired, ok := repairGemma4RawTerminalStringValueForKey(argsStr, key, props); ok { + return repaired, true + } + } + + return "", false +} + +func repairGemma4RawTerminalStringValueForKey(s, key string, props *api.ToolPropertiesMap) (string, bool) { + for searchStart := 0; searchStart < len(s); { + valueStart, ok := gemma4FindValueStartForKey(s, key, searchStart) + if !ok { + return "", false + } + + valueCheck := gemma4SkipSpace(s, valueStart) + if valueCheck < len(s) && gemma4ValueStartsStructured(s, valueCheck) { + searchStart = valueStart + continue + } + + valueEnd := gemma4RawStringValueEnd(s, valueStart, props) + return s[:valueStart] + gemma4StringDelimiter + s[valueStart:valueEnd] + gemma4StringDelimiter + s[valueEnd:], true + } + + return "", false +} + +func gemma4FindValueStartForKey(s, key string, searchStart int) (int, bool) { + for i := searchStart; i < len(s); i++ { + if strings.HasPrefix(s[i:], gemma4StringDelimiter) { + end := strings.Index(s[i+len(gemma4StringDelimiter):], gemma4StringDelimiter) + if end == -1 { + return 0, false + } + i += len(gemma4StringDelimiter) + end + len(gemma4StringDelimiter) - 1 + continue + } + + if s[i] == '"' { + if end := gemma4JSONQuotedStringEnd(s, i); end != -1 { + i = end - 1 + continue + } + } + + if s[i] != '{' && s[i] != ',' { + continue + } + + keyStart := gemma4SkipSpace(s, i+1) + if !strings.HasPrefix(s[keyStart:], key) { + continue + } + + colon := gemma4SkipSpace(s, keyStart+len(key)) + if colon < len(s) && s[colon] == ':' { + return colon + 1, true + } + } + + return 0, false +} + +func gemma4RawStringValueEnd(s string, start int, props *api.ToolPropertiesMap) int { + for i := start; i < len(s); i++ { + if s[i] != ',' { + continue + } + + keyStart := gemma4SkipSpace(s, i+1) + keyEnd := keyStart + for keyEnd < len(s) { + r, size := utf8.DecodeRuneInString(s[keyEnd:]) + if !(r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)) { + break + } + keyEnd += size + } + if keyEnd == keyStart { + continue + } + + colon := gemma4SkipSpace(s, keyEnd) + if colon < len(s) && s[colon] == ':' { + if _, ok := props.Get(s[keyStart:keyEnd]); ok { + return i + } + } + } + + end := gemma4TrimRightSpaceIndex(s) + if end > start && s[end-1] == '}' { + return end - 1 + } + return len(s) +} + +func gemma4ValueStartsStructured(s string, pos int) bool { + if pos >= len(s) { + return false + } + if strings.HasPrefix(s[pos:], gemma4StringDelimiter) { + return true + } + + switch s[pos] { + case '\'', '"', '{', '[': + return true + } + + return gemma4LooksLikeJSONLiteralStart(s[pos]) +} + +func gemma4JSONQuotedStringEnd(s string, start int) int { + escaped := false + for i := start + 1; i < len(s); i++ { + if s[i] == '"' && !escaped { + return i + 1 + } + + escaped = s[i] == '\\' && !escaped + if s[i] != '\\' { + escaped = false + } + } + + return -1 +} + +func gemma4SkipSpace(s string, i int) int { + for i < len(s) { + r, size := utf8.DecodeRuneInString(s[i:]) + if !unicode.IsSpace(r) { + return i + } + i += size + } + return i +} + +func gemma4TrimRightSpaceIndex(s string) int { + i := len(s) + for i > 0 { + r, size := utf8.DecodeLastRuneInString(s[:i]) + if !unicode.IsSpace(r) { + return i + } + i -= size + } + return i +} + +func gemma4PropertyAcceptsString(prop api.ToolProperty) bool { + for _, typ := range prop.Type { + if strings.EqualFold(typ, "string") { + return true + } + } + + for _, anyOf := range prop.AnyOf { + if gemma4PropertyAcceptsString(anyOf) { + return true + } + } + + return false +} + +func gemma4LooksLikeJSONLiteralStart(ch byte) bool { + return ch == '-' || ('0' <= ch && ch <= '9') || ch == 't' || ch == 'f' || ch == 'n' +} diff --git a/model/parsers/gemma4_test.go b/model/parsers/gemma4_test.go index 00784c4f7..07ff67bf1 100644 --- a/model/parsers/gemma4_test.go +++ b/model/parsers/gemma4_test.go @@ -720,6 +720,292 @@ func TestGemma4ArgsToJSON(t *testing.T) { } } +func TestRepairGemma4MissingStringDelimiter(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "closes_before_object_close", + input: `{command:<|"|>ls}`, + expected: `{command:<|"|>ls<|"|>}`, + }, + { + name: "closes_terminal_value_after_previous_property", + input: `{path:<|"|>/tmp<|"|>,command:<|"|>ls}`, + expected: `{path:<|"|>/tmp<|"|>,command:<|"|>ls<|"|>}`, + }, + { + name: "closes_at_end", + input: `{command:<|"|>ls`, + expected: `{command:<|"|>ls<|"|>`, + }, + { + name: "preserves_valid_gemma_quoted_string", + input: `{command:<|"|>ls<|"|>}`, + expected: `{command:<|"|>ls<|"|>}`, + }, + { + name: "preserves_input_without_gemma_delimiter", + input: `{command:ls}`, + expected: `{command:ls}`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := repairGemma4MissingStringDelimiter(tt.input) + if got != tt.expected { + t.Fatalf("repairGemma4MissingStringDelimiter(%q) = %q, want %q", tt.input, got, tt.expected) + } + }) + } +} + +func TestRepairGemma4MissingObjectClose(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "adds_object_close", + input: `{content:<|"|>hello<|"|>`, + expected: `{content:<|"|>hello<|"|>}`, + }, + { + name: "adds_object_close_before_trailing_space", + input: "{content:<|\"|>hello<|\"|> ", + expected: "{content:<|\"|>hello<|\"|>} ", + }, + { + name: "preserves_existing_object_close", + input: `{content:<|"|>hello<|"|>}`, + expected: `{content:<|"|>hello<|"|>}`, + }, + { + name: "preserves_non_object_input", + input: `content:<|"|>hello<|"|>`, + expected: `content:<|"|>hello<|"|>`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := repairGemma4MissingObjectClose(tt.input) + if got != tt.expected { + t.Fatalf("repairGemma4MissingObjectClose(%q) = %q, want %q", tt.input, got, tt.expected) + } + }) + } +} + +func TestRepairGemma4SingleQuotedValues(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "converts_single_quoted_value", + input: `{pattern:':\s*\w+'}`, + expected: `{pattern:<|"|>:\s*\w+<|"|>}`, + }, + { + name: "converts_middle_single_quoted_value", + input: `{include:<|"|>*.py<|"|>,pattern:'abc',path:<|"|>/tmp<|"|>}`, + expected: `{include:<|"|>*.py<|"|>,pattern:<|"|>abc<|"|>,path:<|"|>/tmp<|"|>}`, + }, + { + name: "drops_dangling_gemma_delimiter_after_single_quoted_value", + input: `{pattern:'abc'<|"|>}`, + expected: `{pattern:<|"|>abc<|"|>}`, + }, + { + name: "preserves_gemma_quoted_value", + input: `{pattern:<|"|>abc<|"|>}`, + expected: `{pattern:<|"|>abc<|"|>}`, + }, + { + name: "preserves_json_quoted_value", + input: `{pattern:"abc"}`, + expected: `{pattern:"abc"}`, + }, + { + name: "preserves_unterminated_single_quote", + input: `{pattern:'abc}`, + expected: `{pattern:'abc}`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := repairGemma4SingleQuotedValues(tt.input) + if got != tt.expected { + t.Fatalf("repairGemma4SingleQuotedValues(%q) = %q, want %q", tt.input, got, tt.expected) + } + }) + } +} + +func TestRepairGemma4RawTerminalStringValue(t *testing.T) { + numberProps := api.NewToolPropertiesMap() + numberProps.Set("content", api.ToolProperty{Type: api.PropertyType{"number"}}) + numberTool := api.Tool{ + Type: "function", + Function: api.ToolFunction{ + Name: "write", + Parameters: api.ToolFunctionParameters{ + Type: "object", + Properties: numberProps, + }, + }, + } + + tests := []struct { + name string + input string + toolName string + tools []api.Tool + expected string + expectedOK bool + }{ + { + name: "wraps_known_string_property_terminal_value", + input: "{content:\n\n# Title", + toolName: "write", + tools: []api.Tool{gemma4TestStringTool("write", "content")}, + expected: "{content:<|\"|>\n\n# Title<|\"|>", + expectedOK: true, + }, + { + name: "stops_before_next_known_property", + input: `{content:hello,mode:<|"|>fast<|"|>}`, + toolName: "write", + tools: []api.Tool{gemma4TestStringTool("write", "content", "mode")}, + expected: `{content:<|"|>hello<|"|>,mode:<|"|>fast<|"|>}`, + expectedOK: true, + }, + { + name: "does_not_repair_without_schema", + input: `{content:hello`, + toolName: "write", + tools: nil, + expectedOK: false, + }, + { + name: "does_not_repair_non_string_property", + input: `{content:hello`, + toolName: "write", + tools: []api.Tool{numberTool}, + expectedOK: false, + }, + { + name: "does_not_repair_already_structured_value", + input: `{content:<|"|>hello<|"|>}`, + toolName: "write", + tools: []api.Tool{gemma4TestStringTool("write", "content")}, + expectedOK: false, + }, + { + name: "does_not_repair_json_literal_start", + input: `{content:123}`, + toolName: "write", + tools: []api.Tool{gemma4TestStringTool("write", "content")}, + expectedOK: false, + }, + { + name: "does_not_repair_unknown_tool", + input: `{content:hello`, + toolName: "missing", + tools: []api.Tool{gemma4TestStringTool("write", "content")}, + expectedOK: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, ok := repairGemma4RawTerminalStringValue(tt.input, tt.toolName, tt.tools) + if ok != tt.expectedOK { + t.Fatalf("repairGemma4RawTerminalStringValue ok = %t, want %t", ok, tt.expectedOK) + } + if got != tt.expected { + t.Fatalf("repairGemma4RawTerminalStringValue got %q, want %q", got, tt.expected) + } + }) + } +} + +func TestGemma4RepairCandidates(t *testing.T) { + tests := []struct { + name string + input string + toolName string + tools []api.Tool + expected []string + }{ + { + name: "missing_string_delimiter_candidate", + input: `{command:<|"|>ls}`, + toolName: "bash", + tools: []api.Tool{gemma4TestStringTool("bash", "command")}, + expected: []string{`{command:<|"|>ls<|"|>}`}, + }, + { + name: "single_quoted_value_candidate", + input: `{pattern:'abc'<|"|>}`, + toolName: "grep", + tools: []api.Tool{gemma4TestStringTool("grep", "pattern")}, + expected: []string{`{pattern:<|"|>abc<|"|>}`}, + }, + { + name: "raw_string_candidate_also_gets_missing_object_close", + input: `{content:hello`, + toolName: "write", + tools: []api.Tool{gemma4TestStringTool("write", "content")}, + expected: []string{ + `{content:hello`, + `{content:<|"|>hello<|"|>}`, + }, + }, + { + name: "does_not_add_missing_object_close_without_another_repair", + input: `{n:1`, + toolName: "count", + tools: []api.Tool{gemma4TestStringTool("count", "name")}, + expected: []string{`{n:1`}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := gemma4RepairCandidates(tt.input, tt.toolName, tt.tools) + if diff := cmp.Diff(tt.expected, got); diff != "" { + t.Fatalf("gemma4RepairCandidates mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func gemma4TestStringTool(name string, argNames ...string) api.Tool { + props := api.NewToolPropertiesMap() + for _, argName := range argNames { + props.Set(argName, api.ToolProperty{Type: api.PropertyType{"string"}}) + } + + return api.Tool{ + Type: "function", + Function: api.ToolFunction{ + Name: name, + Parameters: api.ToolFunctionParameters{ + Type: "object", + Properties: props, + }, + }, + } +} + func TestGemma4Parser_HasToolSupport(t *testing.T) { parser := &Gemma4Parser{} if !parser.HasToolSupport() { @@ -740,14 +1026,14 @@ func TestGemma4Parser_HasThinkingSupport(t *testing.T) { } func TestParseGemma4ToolCall_InvalidRawQuotedEscape(t *testing.T) { - _, err := parseGemma4ToolCall(`call:open_file{path:"C:\users\bob\file.txt"}`) + _, err := parseGemma4ToolCall(`call:open_file{path:"C:\users\bob\file.txt"}`, nil) if err == nil { t.Fatal("expected parseGemma4ToolCall to reject malformed raw-quoted JSON escapes") } } func TestParseGemma4ToolCall_QuotedScalarsStayStrings(t *testing.T) { - toolCall, err := parseGemma4ToolCall(`call:foo{n:<|"|>1<|"|>,b:<|"|>true<|"|>,z:<|"|>null<|"|>}`) + toolCall, err := parseGemma4ToolCall(`call:foo{n:<|"|>1<|"|>,b:<|"|>true<|"|>,z:<|"|>null<|"|>}`, nil) if err != nil { t.Fatalf("parseGemma4ToolCall returned error: %v", err) } @@ -769,7 +1055,7 @@ func TestParseGemma4ToolCall_QuotedScalarsStayStrings(t *testing.T) { } func TestParseGemma4ToolCall_UnquotedScalarsKeepStructuredTypes(t *testing.T) { - toolCall, err := parseGemma4ToolCall(`call:foo{n:1,b:true,z:null}`) + toolCall, err := parseGemma4ToolCall(`call:foo{n:1,b:true,z:null}`, nil) if err != nil { t.Fatalf("parseGemma4ToolCall returned error: %v", err) } @@ -791,7 +1077,7 @@ func TestParseGemma4ToolCall_UnquotedScalarsKeepStructuredTypes(t *testing.T) { } func TestParseGemma4ToolCall_ReferenceImplementationExample(t *testing.T) { - toolCall, err := parseGemma4ToolCall(`call:get_current_temperature{detail_level:0,location:<|"|>Paris, France<|"|>,unit:<|"|>celsius<|"|>}`) + toolCall, err := parseGemma4ToolCall(`call:get_current_temperature{detail_level:0,location:<|"|>Paris, France<|"|>,unit:<|"|>celsius<|"|>}`, nil) if err != nil { t.Fatalf("parseGemma4ToolCall returned error: %v", err) } @@ -812,8 +1098,158 @@ func TestParseGemma4ToolCall_ReferenceImplementationExample(t *testing.T) { } } +func TestParseGemma4ToolCall_RepairsIssue15315Examples(t *testing.T) { + writeContent := "\n\n# Project Style Guide for Autonomous Agents' Code Generation (AGENTS.md)\n\n" + + "This document captures the *de facto* coding standards observed across the `src/` and `components/` source code, designed to ensure consistency for all generated code and modules consumed by the agent system." + + tests := []struct { + name string + content string + tools []api.Tool + want api.ToolCall + }{ + { + name: "raw multiline string", + // Source: https://github.com/ollama/ollama/issues/15315#issue-4203625511 + content: "call:write{content:" + writeContent, + tools: []api.Tool{gemma4TestStringTool("write", "content")}, + want: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "write", + Arguments: testArgs(map[string]any{ + "content": writeContent, + }), + }, + }, + }, + { + name: "single quoted value with dangling gemma string delimiter", + content: `call:grep{include:<|"|>*.py<|"|>,output_mode:<|"|>content<|"|>,path:<|"|>/data/robotics/experiment1<|"|>,pattern:':\s*\w+'<|"|>}`, + tools: []api.Tool{gemma4TestStringTool("grep", "include", "output_mode", "path", "pattern")}, + // Source: https://github.com/ollama/ollama/issues/15315#issue-4203625511 + want: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "grep", + Arguments: testArgs(map[string]any{ + "include": "*.py", + "output_mode": "content", + "path": "/data/robotics/experiment1", + "pattern": `:\s*\w+`, + }), + }, + }, + }, + { + name: "unclosed gemma string before object close", + content: `call:bash{command:<|"|>ls}`, + tools: []api.Tool{gemma4TestStringTool("bash", "command")}, + // Source: https://github.com/ollama/ollama/issues/15315#issuecomment-4194547092 + want: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "bash", + Arguments: testArgs(map[string]any{ + "command": "ls", + }), + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseGemma4ToolCall(tt.content, tt.tools) + if err != nil { + t.Fatalf("parseGemma4ToolCall returned error: %v", err) + } + + if diff := cmp.Diff(tt.want, got, argsComparer); diff != "" { + t.Fatalf("tool call mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestParseGemma4ToolCall_RepairsMultipleProperties(t *testing.T) { + tests := []struct { + name string + content string + tools []api.Tool + want api.ToolCall + }{ + { + name: "single_quoted_middle_property", + content: `call:grep{include:<|"|>*.py<|"|>,pattern:'abc',path:<|"|>/tmp<|"|>}`, + tools: []api.Tool{gemma4TestStringTool("grep", "include", "pattern", "path")}, + want: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "grep", + Arguments: testArgs(map[string]any{ + "include": "*.py", + "pattern": "abc", + "path": "/tmp", + }), + }, + }, + }, + { + name: "unclosed_gemma_string_terminal_property", + content: `call:bash{path:<|"|>/tmp<|"|>,command:<|"|>ls}`, + tools: []api.Tool{gemma4TestStringTool("bash", "path", "command")}, + want: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "bash", + Arguments: testArgs(map[string]any{ + "path": "/tmp", + "command": "ls", + }), + }, + }, + }, + { + name: "raw_string_before_next_property", + content: `call:write{content:hello,mode:<|"|>fast<|"|>}`, + tools: []api.Tool{gemma4TestStringTool("write", "content", "mode")}, + want: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "write", + Arguments: testArgs(map[string]any{ + "content": "hello", + "mode": "fast", + }), + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseGemma4ToolCall(tt.content, tt.tools) + if err != nil { + t.Fatalf("parseGemma4ToolCall returned error: %v", err) + } + + if diff := cmp.Diff(tt.want, got, argsComparer); diff != "" { + t.Fatalf("tool call mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestParseGemma4ToolCall_DoesNotRepairNonTerminalUnclosedGemmaString(t *testing.T) { + // TODO(drifkin): our current examples show unclosed gemma strings as the last + // values, but if we find examples where there's an unclosed non-last value, + // we should consider repairing it. This test shows that we don't yet repair + // this type (the heuristics of where to close are much more complicated) + _, err := parseGemma4ToolCall(`call:example{first:<|"|>one,second:<|"|>two<|"|>}`, []api.Tool{ + gemma4TestStringTool("example", "first", "second"), + }) + if err == nil { + t.Fatal("expected non-terminal unclosed Gemma string to remain unsupported") + } +} + func TestParseGemma4ToolCall_InvalidRawQuotedStructuralString(t *testing.T) { - _, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`) + _, err := parseGemma4ToolCall(`call:foo{q:"a,b:c"}`, nil) if err == nil { t.Fatal("expected parseGemma4ToolCall to reject raw-quoted strings with structural text that the reference implementation does not support") }