//go:build integration package integration import ( "context" "strings" "testing" "time" "github.com/ollama/ollama/api" ) // TestThinkingEnabled verifies that when thinking is requested, the model // produces both thinking and content output without leaking raw channel tags. func TestThinkingEnabled(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) defer cancel() client, _, cleanup := InitServerConnection(ctx, t) defer cleanup() models := testModels([]string{smol}) for _, modelName := range models { t.Run(modelName, func(t *testing.T) { requireCapability(ctx, t, client, modelName, "thinking") pullOrSkip(ctx, t, client, modelName) think := api.ThinkValue{Value: true} stream := false req := api.ChatRequest{ Model: modelName, Stream: &stream, Think: &think, Messages: []api.Message{ {Role: "user", Content: "What is 12 * 15? Think step by step."}, }, Options: map[string]any{ "temperature": 0, "seed": 42, "num_predict": 512, }, } var response api.ChatResponse err := client.Chat(ctx, &req, func(cr api.ChatResponse) error { response = cr return nil }) if err != nil { if strings.Contains(err.Error(), "model requires more system memory") { t.Skip("model too large for test system") } t.Fatalf("chat failed: %v", err) } content := response.Message.Content thinking := response.Message.Thinking // Thinking should be non-empty when thinking is enabled if thinking == "" { t.Error("expected non-empty thinking output when thinking is enabled") } // The answer (180) should appear in thinking, content, or both. // Some models put everything in thinking and leave content empty // if they hit the token limit while still thinking. combined := thinking + " " + content if !strings.Contains(combined, "180") { t.Errorf("expected '180' in thinking or content, got thinking=%q content=%q", thinking, content) } // Neither thinking nor content should contain raw channel tags if strings.Contains(content, "<|channel>") || strings.Contains(content, "") { t.Errorf("content contains raw channel tags: %s", content) } if strings.Contains(thinking, "<|channel>") || strings.Contains(thinking, "") { t.Errorf("thinking contains raw channel tags: %s", thinking) } t.Logf("thinking (%d chars): %.100s...", len(thinking), thinking) t.Logf("content (%d chars): %s", len(content), content) }) } } // TestThinkingSuppressed verifies that when thinking is NOT requested, // the model does not leak thinking/channel content into the response. func TestThinkingSuppressed(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) defer cancel() client, _, cleanup := InitServerConnection(ctx, t) defer cleanup() models := testModels([]string{smol}) for _, modelName := range models { t.Run(modelName, func(t *testing.T) { requireCapability(ctx, t, client, modelName, "thinking") pullOrSkip(ctx, t, client, modelName) stream := false req := api.ChatRequest{ Model: modelName, Stream: &stream, // Think is nil — thinking not requested Messages: []api.Message{ {Role: "user", Content: "What is the capital of Japan? Answer in one word."}, }, Options: map[string]any{ "temperature": 0, "seed": 42, "num_predict": 64, }, } var response api.ChatResponse err := client.Chat(ctx, &req, func(cr api.ChatResponse) error { response = cr return nil }) if err != nil { if strings.Contains(err.Error(), "model requires more system memory") { t.Skip("model too large for test system") } t.Fatalf("chat failed: %v", err) } content := response.Message.Content thinking := response.Message.Thinking // The answer should appear in content or thinking combined := content + " " + thinking if !strings.Contains(combined, "Tokyo") { t.Errorf("expected 'Tokyo' in content or thinking, got content=%q thinking=%q", content, thinking) } // Content must NOT contain channel/thinking tags if strings.Contains(content, "<|channel>") || strings.Contains(content, "") { t.Errorf("content contains leaked channel tags when thinking not requested: %s", content) } if strings.Contains(content, "thought") && strings.Contains(content, "") { t.Errorf("content contains leaked thinking block: %s", content) } // Thinking field should ideally be empty when not requested. // Some small models may still produce thinking output; log but don't fail. if thinking != "" { t.Logf("WARNING: model produced thinking output when not requested (%d chars): %.100s...", len(thinking), thinking) } t.Logf("content: %s", content) }) } }