convert: check file size for safetensors to warn for improper conversion

routes: add logprobs in tool calls (#13238 )
cmd/bench: fix binary name in README (#13276 )
2026-04-22 00:36:11 +02:00 · 2025-12-10 17:58:16 -08:00 · 2025-12-10 17:28:41 -08:00 · 2025-12-10 14:16:58 -08:00 · 2025-12-10 14:07:48 -08:00
4 changed files with 109 additions and 7 deletions
--- a/cmd/bench/README.md
+++ b/cmd/bench/README.md
@@ -15,7 +15,7 @@ A Go-based command-line tool for benchmarking Ollama models with configurable pa

 ```
 go build -o ollama-bench bench.go
-./bench -model gpt-oss:20b -epochs 6 -format csv
+./ollama-bench -model gpt-oss:20b -epochs 6 -format csv
 ```

 Using Go Run (without building)
@@ -29,31 +29,32 @@ go run bench.go -model gpt-oss:20b -epochs 3
 ### Basic Example

 ```
-./bench -model gemma3 -epochs 6
+./ollama-bench -model gemma3 -epochs 6
 ```

 ### Benchmark Multiple Models

 ```
-./bench -model gemma3,gemma3n -epochs 6 -max-tokens 100 -p "Write me a short story" | tee gemma.bench
+./ollama-bench -model gemma3,gemma3n -epochs 6 -max-tokens 100 -p "Write me a short story" | tee gemma.bench
 benchstat -col /name gemma.bench
 ```

 ### With Image Prompt

 ```
-./bench -model qwen3-vl -image photo.jpg -epochs 6 -max-tokens 100 -p "Describe this image"
+./ollama-bench -model qwen3-vl -image photo.jpg -epochs 6 -max-tokens 100 -p "Describe this image"
 ```

 ### Advanced Example

 ```
-./bench -model llama3 -epochs 10 -temperature 0.7 -max-tokens 500 -seed 42 -format csv -output results.csv
+./ollama-bench -model llama3 -epochs 10 -temperature 0.7 -max-tokens 500 -seed 42 -format csv -output results.csv
 ```

 ## Command Line Options

 | Option  	| Description | Default |
+|----------|-------------|---------|
 | -model	| Comma-separated list of models to benchmark	| (required)		|
 | -epochs	| Number of iterations per model		| 1			|
 | -max-tokens	| Maximum tokens for model response		| 0 (unlimited)		|
--- a/convert/reader_safetensors.go
+++ b/convert/reader_safetensors.go
@@ -37,6 +37,10 @@ func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]T
 			return nil, err
 		}

+		if n <= 0 || n > 100<<20 {
+			return nil, fmt.Errorf("invalid safetensors file %q (header size: %d): file may be corrupted or a Git LFS pointer", p, n)
+		}
+
 		b := bytes.NewBuffer(make([]byte, 0, n))
 		if _, err = io.CopyN(b, f, n); err != nil {
 			return nil, err
--- a/server/routes.go
+++ b/server/routes.go
@@ -2195,7 +2195,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
 						return
 					}

-					if res.Message.Content != "" || res.Message.Thinking != "" || len(res.Message.ToolCalls) > 0 || r.Done {
+					if res.Message.Content != "" || res.Message.Thinking != "" || len(res.Message.ToolCalls) > 0 || r.Done || len(res.Logprobs) > 0 {
 						slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser output", "parser", m.Config.Parser, "content", content, "thinking", thinking, "toolCalls", toolCalls, "done", r.Done)
 						ch <- res
 					} else {
@@ -2235,8 +2235,16 @@ func (s *Server) ChatHandler(c *gin.Context) {
 						res.Message.ToolCalls = toolCalls
 						res.Message.Content = ""
 					} else if res.Message.Thinking != "" {
-						// don't return
+						// don't return, fall through to send
 					} else {
+						//  Send logprobs while content is being buffered by the parser for tool calls
+						if len(res.Logprobs) > 0 && !r.Done {
+							logprobRes := res
+							logprobRes.Message.Content = ""
+							logprobRes.Message.ToolCalls = nil
+							ch <- logprobRes
+						}
+
 						if r.Done {
 							res.Message.Content = toolParser.Content()
 							ch <- res
--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@@ -708,6 +708,95 @@ func TestGenerateChat(t *testing.T) {
 		}
 	})

+	t.Run("messages with tools and logprobs (streaming)", func(t *testing.T) {
+		tools := []api.Tool{
+			{
+				Type: "function",
+				Function: api.ToolFunction{
+					Name: "get_weather",
+					Parameters: api.ToolFunctionParameters{
+						Type: "object",
+						Properties: map[string]api.ToolProperty{
+							"location": {Type: api.PropertyType{"string"}},
+						},
+					},
+				},
+			},
+		}
+
+		var wg sync.WaitGroup
+		wg.Add(1)
+
+		mock.CompletionFn = func(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
+			defer wg.Done()
+
+			// Simulate a response where logprobs are sent while the tool call is being buffered
+			responses := []llm.CompletionResponse{
+				{
+					Content:  `{ "name": "get_weather"`,
+					Done:     false,
+					Logprobs: []llm.Logprob{{}},
+				},
+				{
+					Content:  `,"arguments":{"location":"Seattle, WA","unit":"celsius"}}`,
+					Done:     false,
+					Logprobs: []llm.Logprob{{}},
+				},
+				{
+					Content:    ``,
+					Done:       true,
+					DoneReason: llm.DoneReasonStop,
+					Logprobs:   nil,
+				},
+			}
+
+			for _, resp := range responses {
+				select {
+				case <-ctx.Done():
+					return ctx.Err()
+				default:
+					fn(resp)
+					time.Sleep(10 * time.Millisecond)
+				}
+			}
+			return nil
+		}
+
+		w := createRequest(t, s.ChatHandler, api.ChatRequest{
+			Model: "test-system",
+			Messages: []api.Message{
+				{Role: "user", Content: "Weather?"},
+			},
+			Tools:  tools,
+			Stream: &stream,
+		})
+
+		wg.Wait()
+
+		if w.Code != http.StatusOK {
+			t.Errorf("expected status 200, got %d", w.Code)
+		}
+
+		decoder := json.NewDecoder(w.Body)
+		var totalLogprobs int
+
+		for {
+			var resp api.ChatResponse
+			if err := decoder.Decode(&resp); err == io.EOF {
+				break
+			} else if err != nil {
+				t.Fatal(err)
+			}
+
+			totalLogprobs += len(resp.Logprobs)
+		}
+
+		expectedLogprobs := 2
+		if totalLogprobs != expectedLogprobs {
+			t.Errorf("expected %d logprobs, got %d", expectedLogprobs, totalLogprobs)
+		}
+	})
+
 	t.Run("status error non-streaming", func(t *testing.T) {
 		mock.CompletionFn = func(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
 			return api.StatusError{
Author	SHA1	Message	Date
ParthSareen	5d3eeb43c0	convert: check file size for safetensors to warn for improper conversion	2025-12-10 17:58:16 -08:00
EasonLin	1c4e85b4df	routes: add logprobs in tool calls (#13238 )	2025-12-10 17:28:41 -08:00
Eloi Torrents	dac4f17fea	cmd/bench: fix binary name in README (#13276 )	2025-12-10 14:16:58 -08:00
Julia Scheaffer	56b8fb024c	cmd/bench: fix options table in cmd/bench/README.md (#13216 )	2025-12-10 14:07:48 -08:00