From 79865e6c5a2f8aa7bc8135eacffdcbe2fea953d9 Mon Sep 17 00:00:00 2001
From: Devon Rifkin <drifkin@drifkin.net>
Date: Thu, 2 Apr 2026 11:07:50 -0700
Subject: [PATCH] app: use the same client for inference and other requests
 (#15204)

Previously we were accidentally using different clients/UAs depending on
whether it was an inference call or a different call. This change makes
them consistent, other than the timeout being different.
---
 app/ui/ui.go      | 19 ++++++++++++-------
 app/ui/ui_test.go | 28 ++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/app/ui/ui.go b/app/ui/ui.go
index c37cd9b25..02cd422ab 100644
--- a/app/ui/ui.go
+++ b/app/ui/ui.go
@@ -342,8 +342,18 @@ func (t *userAgentTransport) RoundTrip(req *http.Request) (*http.Response, error
 
 // httpClient returns an HTTP client that automatically adds the User-Agent header
 func (s *Server) httpClient() *http.Client {
+	return userAgentHTTPClient(10 * time.Second)
+}
+
+// inferenceClient uses almost the same HTTP client, but without a timeout so
+// long requests aren't truncated
+func (s *Server) inferenceClient() *api.Client {
+	return api.NewClient(envconfig.Host(), userAgentHTTPClient(0))
+}
+
+func userAgentHTTPClient(timeout time.Duration) *http.Client {
 	return &http.Client{
-		Timeout: 10 * time.Second,
+		Timeout: timeout,
 		Transport: &userAgentTransport{
 			base: http.DefaultTransport,
 		},
@@ -721,11 +731,7 @@ func (s *Server) chat(w http.ResponseWriter, r *http.Request) error {
 	_, cancelLoading := context.WithCancel(ctx)
 	loading := false
 
-	c, err := api.ClientFromEnvironment()
-	if err != nil {
-		cancelLoading()
-		return err
-	}
+	c := s.inferenceClient()
 
 	// Check if the model exists locally by trying to show it
 	// TODO (jmorganca): skip this round trip and instead just act
@@ -1682,7 +1688,6 @@ func supportsBrowserTools(model string) bool {
 	return strings.HasPrefix(strings.ToLower(model), "gpt-oss")
 }
 
-
 // buildChatRequest converts store.Chat to api.ChatRequest
 func (s *Server) buildChatRequest(chat *store.Chat, model string, think any, availableTools []map[string]any) (*api.ChatRequest, error) {
 	var msgs []api.Message
diff --git a/app/ui/ui_test.go b/app/ui/ui_test.go
index 270f3145f..5658b61eb 100644
--- a/app/ui/ui_test.go
+++ b/app/ui/ui_test.go
@@ -15,6 +15,7 @@ import (
 	"sync/atomic"
 	"testing"
 
+	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/app/store"
 	"github.com/ollama/ollama/app/updater"
 )
@@ -526,6 +527,33 @@ func TestUserAgentTransport(t *testing.T) {
 	t.Logf("User-Agent transport successfully set: %s", receivedUA)
 }
 
+func TestInferenceClientUsesUserAgent(t *testing.T) {
+	var gotUserAgent atomic.Value
+	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		gotUserAgent.Store(r.Header.Get("User-Agent"))
+		w.Header().Set("Content-Type", "application/json")
+		w.Write([]byte(`{}`))
+	}))
+	defer ts.Close()
+
+	t.Setenv("OLLAMA_HOST", ts.URL)
+
+	server := &Server{}
+	client := server.inferenceClient()
+
+	_, err := client.Show(context.Background(), &api.ShowRequest{Model: "test"})
+	if err != nil {
+		t.Fatalf("show request failed: %v", err)
+	}
+
+	receivedUA, _ := gotUserAgent.Load().(string)
+	expectedUA := userAgent()
+
+	if receivedUA != expectedUA {
+		t.Errorf("User-Agent mismatch\nExpected: %s\nReceived: %s", expectedUA, receivedUA)
+	}
+}
+
 func TestSupportsBrowserTools(t *testing.T) {
 	tests := []struct {
 		model string