mirror of
https://github.com/ollama/ollama.git
synced 2026-04-26 02:36:09 +02:00
Add skipIfNoVisionOverride() to skip vision tests when OLLAMA_TEST_MODEL is set to a non-vision model. Add Think:false to context exhaustion test to prevent thinking models from using all context before the test can measure it. Add third test image (ollama homepage) and replace OCR test with ImageDescription test using it. Relax match strings for broader model compatibility. Add TestThinkingEnabled and TestThinkingSuppressed to verify thinking output and channel tag handling.
156 lines
4.8 KiB
Go
156 lines
4.8 KiB
Go
//go:build integration
|
|
|
|
package integration
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/ollama/ollama/api"
|
|
)
|
|
|
|
// TestThinkingEnabled verifies that when thinking is requested, the model
|
|
// produces both thinking and content output without leaking raw channel tags.
|
|
func TestThinkingEnabled(t *testing.T) {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
|
defer cancel()
|
|
|
|
client, _, cleanup := InitServerConnection(ctx, t)
|
|
defer cleanup()
|
|
|
|
models := testModels([]string{smol})
|
|
for _, modelName := range models {
|
|
t.Run(modelName, func(t *testing.T) {
|
|
requireCapability(ctx, t, client, modelName, "thinking")
|
|
pullOrSkip(ctx, t, client, modelName)
|
|
|
|
think := api.ThinkValue{Value: true}
|
|
stream := false
|
|
req := api.ChatRequest{
|
|
Model: modelName,
|
|
Stream: &stream,
|
|
Think: &think,
|
|
Messages: []api.Message{
|
|
{Role: "user", Content: "What is 12 * 15? Think step by step."},
|
|
},
|
|
Options: map[string]any{
|
|
"temperature": 0,
|
|
"seed": 42,
|
|
"num_predict": 512,
|
|
},
|
|
}
|
|
|
|
var response api.ChatResponse
|
|
err := client.Chat(ctx, &req, func(cr api.ChatResponse) error {
|
|
response = cr
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
if strings.Contains(err.Error(), "model requires more system memory") {
|
|
t.Skip("model too large for test system")
|
|
}
|
|
t.Fatalf("chat failed: %v", err)
|
|
}
|
|
|
|
content := response.Message.Content
|
|
thinking := response.Message.Thinking
|
|
|
|
// Thinking should be non-empty when thinking is enabled
|
|
if thinking == "" {
|
|
t.Error("expected non-empty thinking output when thinking is enabled")
|
|
}
|
|
|
|
// The answer (180) should appear in thinking, content, or both.
|
|
// Some models put everything in thinking and leave content empty
|
|
// if they hit the token limit while still thinking.
|
|
combined := thinking + " " + content
|
|
if !strings.Contains(combined, "180") {
|
|
t.Errorf("expected '180' in thinking or content, got thinking=%q content=%q", thinking, content)
|
|
}
|
|
|
|
// Neither thinking nor content should contain raw channel tags
|
|
if strings.Contains(content, "<|channel>") || strings.Contains(content, "<channel|>") {
|
|
t.Errorf("content contains raw channel tags: %s", content)
|
|
}
|
|
if strings.Contains(thinking, "<|channel>") || strings.Contains(thinking, "<channel|>") {
|
|
t.Errorf("thinking contains raw channel tags: %s", thinking)
|
|
}
|
|
|
|
t.Logf("thinking (%d chars): %.100s...", len(thinking), thinking)
|
|
t.Logf("content (%d chars): %s", len(content), content)
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestThinkingSuppressed verifies that when thinking is NOT requested,
|
|
// the model does not leak thinking/channel content into the response.
|
|
func TestThinkingSuppressed(t *testing.T) {
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
|
defer cancel()
|
|
|
|
client, _, cleanup := InitServerConnection(ctx, t)
|
|
defer cleanup()
|
|
|
|
models := testModels([]string{smol})
|
|
for _, modelName := range models {
|
|
t.Run(modelName, func(t *testing.T) {
|
|
requireCapability(ctx, t, client, modelName, "thinking")
|
|
pullOrSkip(ctx, t, client, modelName)
|
|
|
|
stream := false
|
|
req := api.ChatRequest{
|
|
Model: modelName,
|
|
Stream: &stream,
|
|
// Think is nil — thinking not requested
|
|
Messages: []api.Message{
|
|
{Role: "user", Content: "What is the capital of Japan? Answer in one word."},
|
|
},
|
|
Options: map[string]any{
|
|
"temperature": 0,
|
|
"seed": 42,
|
|
"num_predict": 64,
|
|
},
|
|
}
|
|
|
|
var response api.ChatResponse
|
|
err := client.Chat(ctx, &req, func(cr api.ChatResponse) error {
|
|
response = cr
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
if strings.Contains(err.Error(), "model requires more system memory") {
|
|
t.Skip("model too large for test system")
|
|
}
|
|
t.Fatalf("chat failed: %v", err)
|
|
}
|
|
|
|
content := response.Message.Content
|
|
thinking := response.Message.Thinking
|
|
|
|
// The answer should appear in content or thinking
|
|
combined := content + " " + thinking
|
|
if !strings.Contains(combined, "Tokyo") {
|
|
t.Errorf("expected 'Tokyo' in content or thinking, got content=%q thinking=%q", content, thinking)
|
|
}
|
|
|
|
// Content must NOT contain channel/thinking tags
|
|
if strings.Contains(content, "<|channel>") || strings.Contains(content, "<channel|>") {
|
|
t.Errorf("content contains leaked channel tags when thinking not requested: %s", content)
|
|
}
|
|
if strings.Contains(content, "thought") && strings.Contains(content, "<channel|>") {
|
|
t.Errorf("content contains leaked thinking block: %s", content)
|
|
}
|
|
|
|
// Thinking field should ideally be empty when not requested.
|
|
// Some small models may still produce thinking output; log but don't fail.
|
|
if thinking != "" {
|
|
t.Logf("WARNING: model produced thinking output when not requested (%d chars): %.100s...", len(thinking), thinking)
|
|
}
|
|
|
|
t.Logf("content: %s", content)
|
|
})
|
|
}
|
|
}
|