mirror of
https://github.com/ollama/ollama.git
synced 2026-04-18 13:54:11 +02:00
Compare commits
2 Commits
v0.17.2
...
pdevine/sa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
857cffd22a | ||
|
|
d98dda4676 |
@@ -204,6 +204,24 @@ func (p *Qwen3Parser) eat() ([]qwen3Event, bool) {
|
|||||||
p.maybeThinkingOpenAtBOL = false
|
p.maybeThinkingOpenAtBOL = false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
thinkingCloseIdx := strings.Index(acc, qwen3ThinkingCloseTag)
|
||||||
|
toolOpenIdx := strings.Index(acc, qwen3ToolOpenTag)
|
||||||
|
|
||||||
|
// If a tool call starts before </think>, treat that as the end of thinking
|
||||||
|
// for parsing purposes and continue in tool-call mode.
|
||||||
|
if toolOpenIdx != -1 && (thinkingCloseIdx == -1 || toolOpenIdx < thinkingCloseIdx) {
|
||||||
|
before, after := p.splitAtTag(qwen3ToolOpenTag, true)
|
||||||
|
if len(before) > 0 {
|
||||||
|
events = append(events, qwen3EventThinkingContent{content: before})
|
||||||
|
}
|
||||||
|
if after == "" {
|
||||||
|
p.state = qwen3ParserStateToolStartedEatingWhitespace
|
||||||
|
} else {
|
||||||
|
p.state = qwen3ParserStateCollectingToolContent
|
||||||
|
}
|
||||||
|
return events, true
|
||||||
|
}
|
||||||
|
|
||||||
if strings.Contains(acc, qwen3ThinkingCloseTag) {
|
if strings.Contains(acc, qwen3ThinkingCloseTag) {
|
||||||
thinking, remaining := p.splitAtTag(qwen3ThinkingCloseTag, true)
|
thinking, remaining := p.splitAtTag(qwen3ThinkingCloseTag, true)
|
||||||
if len(thinking) > 0 {
|
if len(thinking) > 0 {
|
||||||
@@ -215,7 +233,7 @@ func (p *Qwen3Parser) eat() ([]qwen3Event, bool) {
|
|||||||
p.state = qwen3ParserStateCollectingContent
|
p.state = qwen3ParserStateCollectingContent
|
||||||
}
|
}
|
||||||
return events, true
|
return events, true
|
||||||
} else if overlapLen := overlap(acc, qwen3ThinkingCloseTag); overlapLen > 0 {
|
} else if overlapLen := max(overlap(acc, qwen3ThinkingCloseTag), overlap(acc, qwen3ToolOpenTag)); overlapLen > 0 {
|
||||||
beforePartialTag := acc[:len(acc)-overlapLen]
|
beforePartialTag := acc[:len(acc)-overlapLen]
|
||||||
trailingWsLen := trailingWhitespaceLen(beforePartialTag)
|
trailingWsLen := trailingWhitespaceLen(beforePartialTag)
|
||||||
ambiguousStart := len(beforePartialTag) - trailingWsLen
|
ambiguousStart := len(beforePartialTag) - trailingWsLen
|
||||||
|
|||||||
@@ -146,6 +146,68 @@ func TestQwen3ParserToolCall(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestQwen3ParserThinkingWithToolCallBeforeThinkingClose(t *testing.T) {
|
||||||
|
parser := &Qwen3Parser{hasThinkingSupport: true, defaultThinking: true}
|
||||||
|
parser.Init(nil, nil, &api.ThinkValue{Value: true})
|
||||||
|
|
||||||
|
input := "Let me think<tool_call>{\"name\":\"get_weather\",\"arguments\":{\"location\":\"San Francisco\",\"unit\":\"celsius\"}}</tool_call>"
|
||||||
|
content, thinking, calls, err := parser.Add(input, true)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parse failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if content != "" {
|
||||||
|
t.Fatalf("expected empty content, got %q", content)
|
||||||
|
}
|
||||||
|
if thinking != "Let me think" {
|
||||||
|
t.Fatalf("expected thinking %q, got %q", "Let me think", thinking)
|
||||||
|
}
|
||||||
|
if len(calls) != 1 {
|
||||||
|
t.Fatalf("expected 1 tool call, got %d", len(calls))
|
||||||
|
}
|
||||||
|
if calls[0].Function.Name != "get_weather" {
|
||||||
|
t.Fatalf("expected tool name %q, got %q", "get_weather", calls[0].Function.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestQwen3ParserThinkingWithSplitToolOpenTag(t *testing.T) {
|
||||||
|
parser := &Qwen3Parser{hasThinkingSupport: true, defaultThinking: true}
|
||||||
|
parser.Init(nil, nil, &api.ThinkValue{Value: true})
|
||||||
|
|
||||||
|
content, thinking, calls, err := parser.Add("Let me think<tool_ca", false)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parse failed on first chunk: %v", err)
|
||||||
|
}
|
||||||
|
if content != "" || thinking != "Let me think" || len(calls) != 0 {
|
||||||
|
t.Fatalf(
|
||||||
|
"expected content=%q thinking=%q calls=%d, got content=%q thinking=%q calls=%d",
|
||||||
|
"",
|
||||||
|
"Let me think",
|
||||||
|
0,
|
||||||
|
content,
|
||||||
|
thinking,
|
||||||
|
len(calls),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
content, thinking, calls, err = parser.Add("ll>{\"name\":\"get_weather\",\"arguments\":{\"location\":\"SF\"}}</tool_call>", true)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parse failed on second chunk: %v", err)
|
||||||
|
}
|
||||||
|
if content != "" {
|
||||||
|
t.Fatalf("expected empty content, got %q", content)
|
||||||
|
}
|
||||||
|
if thinking != "" {
|
||||||
|
t.Fatalf("expected no additional thinking on second chunk, got %q", thinking)
|
||||||
|
}
|
||||||
|
if len(calls) != 1 {
|
||||||
|
t.Fatalf("expected 1 tool call, got %d", len(calls))
|
||||||
|
}
|
||||||
|
if calls[0].Function.Name != "get_weather" {
|
||||||
|
t.Fatalf("expected tool name %q, got %q", "get_weather", calls[0].Function.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestQwen35ParserRespectsNoThink(t *testing.T) {
|
func TestQwen35ParserRespectsNoThink(t *testing.T) {
|
||||||
parser := ParserForName("qwen3.5")
|
parser := ParserForName("qwen3.5")
|
||||||
if parser == nil {
|
if parser == nil {
|
||||||
|
|||||||
@@ -180,7 +180,22 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
|
|||||||
return events, false
|
return events, false
|
||||||
}
|
}
|
||||||
case CollectingThinkingContent:
|
case CollectingThinkingContent:
|
||||||
if strings.Contains(p.buffer.String(), thinkingCloseTag) {
|
acc := p.buffer.String()
|
||||||
|
thinkingCloseIdx := strings.Index(acc, thinkingCloseTag)
|
||||||
|
toolOpenIdx := strings.Index(acc, toolOpenTag)
|
||||||
|
|
||||||
|
// If a tool call starts before </think>, treat that as the end of thinking
|
||||||
|
// for parsing purposes and continue in tool-call mode.
|
||||||
|
if toolOpenIdx != -1 && (thinkingCloseIdx == -1 || toolOpenIdx < thinkingCloseIdx) {
|
||||||
|
before, _ := splitAtTag(&p.buffer, toolOpenTag, false)
|
||||||
|
if len(before) > 0 {
|
||||||
|
events = append(events, qwenEventThinkingContent{content: before})
|
||||||
|
}
|
||||||
|
p.state = CollectingToolContent
|
||||||
|
return events, true
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.Contains(acc, thinkingCloseTag) {
|
||||||
thinking, remaining := splitAtTag(&p.buffer, thinkingCloseTag, true)
|
thinking, remaining := splitAtTag(&p.buffer, thinkingCloseTag, true)
|
||||||
if len(thinking) > 0 {
|
if len(thinking) > 0 {
|
||||||
events = append(events, qwenEventThinkingContent{content: thinking})
|
events = append(events, qwenEventThinkingContent{content: thinking})
|
||||||
@@ -191,13 +206,13 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
|
|||||||
p.state = CollectingContent
|
p.state = CollectingContent
|
||||||
}
|
}
|
||||||
return events, true
|
return events, true
|
||||||
} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 {
|
} else if overlapLen := max(overlap(acc, thinkingCloseTag), overlap(acc, toolOpenTag)); overlapLen > 0 {
|
||||||
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
|
beforePartialTag := acc[:len(acc)-overlapLen]
|
||||||
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
|
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
|
||||||
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
|
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
|
||||||
|
|
||||||
unambiguous := p.buffer.String()[:ambiguousStart]
|
unambiguous := acc[:ambiguousStart]
|
||||||
ambiguous := p.buffer.String()[ambiguousStart:]
|
ambiguous := acc[ambiguousStart:]
|
||||||
p.buffer.Reset()
|
p.buffer.Reset()
|
||||||
p.buffer.WriteString(ambiguous)
|
p.buffer.WriteString(ambiguous)
|
||||||
if len(unambiguous) > 0 {
|
if len(unambiguous) > 0 {
|
||||||
@@ -205,11 +220,11 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
|
|||||||
}
|
}
|
||||||
return events, false
|
return events, false
|
||||||
} else {
|
} else {
|
||||||
whitespaceLen := trailingWhitespaceLen(p.buffer.String())
|
whitespaceLen := trailingWhitespaceLen(acc)
|
||||||
ambiguousStart := len(p.buffer.String()) - whitespaceLen
|
ambiguousStart := len(acc) - whitespaceLen
|
||||||
|
|
||||||
unambiguous := p.buffer.String()[:ambiguousStart]
|
unambiguous := acc[:ambiguousStart]
|
||||||
ambiguous := p.buffer.String()[ambiguousStart:]
|
ambiguous := acc[ambiguousStart:]
|
||||||
p.buffer.Reset()
|
p.buffer.Reset()
|
||||||
p.buffer.WriteString(ambiguous)
|
p.buffer.WriteString(ambiguous)
|
||||||
if len(unambiguous) > 0 {
|
if len(unambiguous) > 0 {
|
||||||
|
|||||||
@@ -98,8 +98,12 @@ func TestQwen3VLThinkingParserStreaming(t *testing.T) {
|
|||||||
desc: "nested thinking and tool call (outside thinking, inside tool call)",
|
desc: "nested thinking and tool call (outside thinking, inside tool call)",
|
||||||
steps: []step{
|
steps: []step{
|
||||||
{
|
{
|
||||||
input: "I'm thinking<tool_call>I'm nested tool call</tool_call></think>",
|
input: "I'm thinking<tool_call>I'm nested tool call</tool_call></think>",
|
||||||
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "I'm thinking<tool_call>I'm nested tool call</tool_call>"}},
|
wantEvents: []qwenEvent{
|
||||||
|
qwenEventThinkingContent{content: "I'm thinking"},
|
||||||
|
qwenEventRawToolCall{raw: "I'm nested tool call"},
|
||||||
|
qwenEventContent{content: "</think>"},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -109,8 +113,7 @@ func TestQwen3VLThinkingParserStreaming(t *testing.T) {
|
|||||||
{
|
{
|
||||||
input: "<tool_call>I'm nested tool call<think>I'm thinking</think></tool_call>",
|
input: "<tool_call>I'm nested tool call<think>I'm thinking</think></tool_call>",
|
||||||
wantEvents: []qwenEvent{
|
wantEvents: []qwenEvent{
|
||||||
qwenEventThinkingContent{content: "<tool_call>I'm nested tool call<think>I'm thinking"},
|
qwenEventRawToolCall{raw: "I'm nested tool call<think>I'm thinking</think>"},
|
||||||
qwenEventContent{content: "</tool_call>"},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -121,8 +124,8 @@ func TestQwen3VLThinkingParserStreaming(t *testing.T) {
|
|||||||
{
|
{
|
||||||
input: "I'm thinking<tool_call>I'm NOT a nested tool call</think></tool_call><tool_call>I'm nested tool call 2<think></tool_call></think>",
|
input: "I'm thinking<tool_call>I'm NOT a nested tool call</think></tool_call><tool_call>I'm nested tool call 2<think></tool_call></think>",
|
||||||
wantEvents: []qwenEvent{
|
wantEvents: []qwenEvent{
|
||||||
qwenEventThinkingContent{content: "I'm thinking<tool_call>I'm NOT a nested tool call"},
|
qwenEventThinkingContent{content: "I'm thinking"},
|
||||||
qwenEventContent{content: "</tool_call>"},
|
qwenEventRawToolCall{raw: "I'm NOT a nested tool call</think>"},
|
||||||
qwenEventRawToolCall{raw: "I'm nested tool call 2<think>"},
|
qwenEventRawToolCall{raw: "I'm nested tool call 2<think>"},
|
||||||
qwenEventContent{content: "</think>"},
|
qwenEventContent{content: "</think>"},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -78,6 +78,11 @@ func (c *kvCache) findRemaining(tokens []int32) []int32 {
|
|||||||
prefix++
|
prefix++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if prefix == len(tokens) && prefix > 0 {
|
||||||
|
// Leave one token to run through the model so we can sample a response.
|
||||||
|
prefix--
|
||||||
|
}
|
||||||
|
|
||||||
if prefix < len(c.tokens) {
|
if prefix < len(c.tokens) {
|
||||||
trim := len(c.tokens) - prefix
|
trim := len(c.tokens) - prefix
|
||||||
for _, kv := range c.caches {
|
for _, kv := range c.caches {
|
||||||
|
|||||||
Reference in New Issue
Block a user