From 79865e6c5a2f8aa7bc8135eacffdcbe2fea953d9 Mon Sep 17 00:00:00 2001 From: Devon Rifkin Date: Thu, 2 Apr 2026 11:07:50 -0700 Subject: [PATCH] app: use the same client for inference and other requests (#15204) Previously we were accidentally using different clients/UAs depending on whether it was an inference call or a different call. This change makes them consistent, other than the timeout being different. --- app/ui/ui.go | 19 ++++++++++++------- app/ui/ui_test.go | 28 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/app/ui/ui.go b/app/ui/ui.go index c37cd9b25..02cd422ab 100644 --- a/app/ui/ui.go +++ b/app/ui/ui.go @@ -342,8 +342,18 @@ func (t *userAgentTransport) RoundTrip(req *http.Request) (*http.Response, error // httpClient returns an HTTP client that automatically adds the User-Agent header func (s *Server) httpClient() *http.Client { + return userAgentHTTPClient(10 * time.Second) +} + +// inferenceClient uses almost the same HTTP client, but without a timeout so +// long requests aren't truncated +func (s *Server) inferenceClient() *api.Client { + return api.NewClient(envconfig.Host(), userAgentHTTPClient(0)) +} + +func userAgentHTTPClient(timeout time.Duration) *http.Client { return &http.Client{ - Timeout: 10 * time.Second, + Timeout: timeout, Transport: &userAgentTransport{ base: http.DefaultTransport, }, @@ -721,11 +731,7 @@ func (s *Server) chat(w http.ResponseWriter, r *http.Request) error { _, cancelLoading := context.WithCancel(ctx) loading := false - c, err := api.ClientFromEnvironment() - if err != nil { - cancelLoading() - return err - } + c := s.inferenceClient() // Check if the model exists locally by trying to show it // TODO (jmorganca): skip this round trip and instead just act @@ -1682,7 +1688,6 @@ func supportsBrowserTools(model string) bool { return strings.HasPrefix(strings.ToLower(model), "gpt-oss") } - // buildChatRequest converts store.Chat to api.ChatRequest func (s *Server) buildChatRequest(chat *store.Chat, model string, think any, availableTools []map[string]any) (*api.ChatRequest, error) { var msgs []api.Message diff --git a/app/ui/ui_test.go b/app/ui/ui_test.go index 270f3145f..5658b61eb 100644 --- a/app/ui/ui_test.go +++ b/app/ui/ui_test.go @@ -15,6 +15,7 @@ import ( "sync/atomic" "testing" + "github.com/ollama/ollama/api" "github.com/ollama/ollama/app/store" "github.com/ollama/ollama/app/updater" ) @@ -526,6 +527,33 @@ func TestUserAgentTransport(t *testing.T) { t.Logf("User-Agent transport successfully set: %s", receivedUA) } +func TestInferenceClientUsesUserAgent(t *testing.T) { + var gotUserAgent atomic.Value + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotUserAgent.Store(r.Header.Get("User-Agent")) + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(`{}`)) + })) + defer ts.Close() + + t.Setenv("OLLAMA_HOST", ts.URL) + + server := &Server{} + client := server.inferenceClient() + + _, err := client.Show(context.Background(), &api.ShowRequest{Model: "test"}) + if err != nil { + t.Fatalf("show request failed: %v", err) + } + + receivedUA, _ := gotUserAgent.Load().(string) + expectedUA := userAgent() + + if receivedUA != expectedUA { + t.Errorf("User-Agent mismatch\nExpected: %s\nReceived: %s", expectedUA, receivedUA) + } +} + func TestSupportsBrowserTools(t *testing.T) { tests := []struct { model string