Files
ollama/integration/thinking_test.go
Daniel Hiltgen f6b69f3f28 integration: improve vision test robustness and add thinking tests
Add skipIfNoVisionOverride() to skip vision tests when OLLAMA_TEST_MODEL
is set to a non-vision model. Add Think:false to context exhaustion test
to prevent thinking models from using all context before the test can
measure it. Add third test image (ollama homepage) and replace OCR test
with ImageDescription test using it. Relax match strings for broader
model compatibility. Add TestThinkingEnabled and TestThinkingSuppressed
to verify thinking output and channel tag handling.
2026-04-01 15:20:43 -07:00

156 lines
4.8 KiB
Go

//go:build integration
package integration
import (
"context"
"strings"
"testing"
"time"
"github.com/ollama/ollama/api"
)
// TestThinkingEnabled verifies that when thinking is requested, the model
// produces both thinking and content output without leaking raw channel tags.
func TestThinkingEnabled(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
models := testModels([]string{smol})
for _, modelName := range models {
t.Run(modelName, func(t *testing.T) {
requireCapability(ctx, t, client, modelName, "thinking")
pullOrSkip(ctx, t, client, modelName)
think := api.ThinkValue{Value: true}
stream := false
req := api.ChatRequest{
Model: modelName,
Stream: &stream,
Think: &think,
Messages: []api.Message{
{Role: "user", Content: "What is 12 * 15? Think step by step."},
},
Options: map[string]any{
"temperature": 0,
"seed": 42,
"num_predict": 512,
},
}
var response api.ChatResponse
err := client.Chat(ctx, &req, func(cr api.ChatResponse) error {
response = cr
return nil
})
if err != nil {
if strings.Contains(err.Error(), "model requires more system memory") {
t.Skip("model too large for test system")
}
t.Fatalf("chat failed: %v", err)
}
content := response.Message.Content
thinking := response.Message.Thinking
// Thinking should be non-empty when thinking is enabled
if thinking == "" {
t.Error("expected non-empty thinking output when thinking is enabled")
}
// The answer (180) should appear in thinking, content, or both.
// Some models put everything in thinking and leave content empty
// if they hit the token limit while still thinking.
combined := thinking + " " + content
if !strings.Contains(combined, "180") {
t.Errorf("expected '180' in thinking or content, got thinking=%q content=%q", thinking, content)
}
// Neither thinking nor content should contain raw channel tags
if strings.Contains(content, "<|channel>") || strings.Contains(content, "<channel|>") {
t.Errorf("content contains raw channel tags: %s", content)
}
if strings.Contains(thinking, "<|channel>") || strings.Contains(thinking, "<channel|>") {
t.Errorf("thinking contains raw channel tags: %s", thinking)
}
t.Logf("thinking (%d chars): %.100s...", len(thinking), thinking)
t.Logf("content (%d chars): %s", len(content), content)
})
}
}
// TestThinkingSuppressed verifies that when thinking is NOT requested,
// the model does not leak thinking/channel content into the response.
func TestThinkingSuppressed(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
models := testModels([]string{smol})
for _, modelName := range models {
t.Run(modelName, func(t *testing.T) {
requireCapability(ctx, t, client, modelName, "thinking")
pullOrSkip(ctx, t, client, modelName)
stream := false
req := api.ChatRequest{
Model: modelName,
Stream: &stream,
// Think is nil — thinking not requested
Messages: []api.Message{
{Role: "user", Content: "What is the capital of Japan? Answer in one word."},
},
Options: map[string]any{
"temperature": 0,
"seed": 42,
"num_predict": 64,
},
}
var response api.ChatResponse
err := client.Chat(ctx, &req, func(cr api.ChatResponse) error {
response = cr
return nil
})
if err != nil {
if strings.Contains(err.Error(), "model requires more system memory") {
t.Skip("model too large for test system")
}
t.Fatalf("chat failed: %v", err)
}
content := response.Message.Content
thinking := response.Message.Thinking
// The answer should appear in content or thinking
combined := content + " " + thinking
if !strings.Contains(combined, "Tokyo") {
t.Errorf("expected 'Tokyo' in content or thinking, got content=%q thinking=%q", content, thinking)
}
// Content must NOT contain channel/thinking tags
if strings.Contains(content, "<|channel>") || strings.Contains(content, "<channel|>") {
t.Errorf("content contains leaked channel tags when thinking not requested: %s", content)
}
if strings.Contains(content, "thought") && strings.Contains(content, "<channel|>") {
t.Errorf("content contains leaked thinking block: %s", content)
}
// Thinking field should ideally be empty when not requested.
// Some small models may still produce thinking output; log but don't fail.
if thinking != "" {
t.Logf("WARNING: model produced thinking output when not requested (%d chars): %.100s...", len(thinking), thinking)
}
t.Logf("content: %s", content)
})
}
}