mirror of
https://github.com/ollama/ollama.git
synced 2026-04-23 17:29:54 +02:00
Compare commits
4 Commits
v0.13.3-rc
...
parth/add-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5d3eeb43c0 | ||
|
|
1c4e85b4df | ||
|
|
dac4f17fea | ||
|
|
56b8fb024c |
@@ -15,7 +15,7 @@ A Go-based command-line tool for benchmarking Ollama models with configurable pa
|
|||||||
|
|
||||||
```
|
```
|
||||||
go build -o ollama-bench bench.go
|
go build -o ollama-bench bench.go
|
||||||
./bench -model gpt-oss:20b -epochs 6 -format csv
|
./ollama-bench -model gpt-oss:20b -epochs 6 -format csv
|
||||||
```
|
```
|
||||||
|
|
||||||
Using Go Run (without building)
|
Using Go Run (without building)
|
||||||
@@ -29,31 +29,32 @@ go run bench.go -model gpt-oss:20b -epochs 3
|
|||||||
### Basic Example
|
### Basic Example
|
||||||
|
|
||||||
```
|
```
|
||||||
./bench -model gemma3 -epochs 6
|
./ollama-bench -model gemma3 -epochs 6
|
||||||
```
|
```
|
||||||
|
|
||||||
### Benchmark Multiple Models
|
### Benchmark Multiple Models
|
||||||
|
|
||||||
```
|
```
|
||||||
./bench -model gemma3,gemma3n -epochs 6 -max-tokens 100 -p "Write me a short story" | tee gemma.bench
|
./ollama-bench -model gemma3,gemma3n -epochs 6 -max-tokens 100 -p "Write me a short story" | tee gemma.bench
|
||||||
benchstat -col /name gemma.bench
|
benchstat -col /name gemma.bench
|
||||||
```
|
```
|
||||||
|
|
||||||
### With Image Prompt
|
### With Image Prompt
|
||||||
|
|
||||||
```
|
```
|
||||||
./bench -model qwen3-vl -image photo.jpg -epochs 6 -max-tokens 100 -p "Describe this image"
|
./ollama-bench -model qwen3-vl -image photo.jpg -epochs 6 -max-tokens 100 -p "Describe this image"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Advanced Example
|
### Advanced Example
|
||||||
|
|
||||||
```
|
```
|
||||||
./bench -model llama3 -epochs 10 -temperature 0.7 -max-tokens 500 -seed 42 -format csv -output results.csv
|
./ollama-bench -model llama3 -epochs 10 -temperature 0.7 -max-tokens 500 -seed 42 -format csv -output results.csv
|
||||||
```
|
```
|
||||||
|
|
||||||
## Command Line Options
|
## Command Line Options
|
||||||
|
|
||||||
| Option | Description | Default |
|
| Option | Description | Default |
|
||||||
|
|----------|-------------|---------|
|
||||||
| -model | Comma-separated list of models to benchmark | (required) |
|
| -model | Comma-separated list of models to benchmark | (required) |
|
||||||
| -epochs | Number of iterations per model | 1 |
|
| -epochs | Number of iterations per model | 1 |
|
||||||
| -max-tokens | Maximum tokens for model response | 0 (unlimited) |
|
| -max-tokens | Maximum tokens for model response | 0 (unlimited) |
|
||||||
|
|||||||
@@ -37,6 +37,10 @@ func parseSafetensors(fsys fs.FS, replacer *strings.Replacer, ps ...string) ([]T
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if n <= 0 || n > 100<<20 {
|
||||||
|
return nil, fmt.Errorf("invalid safetensors file %q (header size: %d): file may be corrupted or a Git LFS pointer", p, n)
|
||||||
|
}
|
||||||
|
|
||||||
b := bytes.NewBuffer(make([]byte, 0, n))
|
b := bytes.NewBuffer(make([]byte, 0, n))
|
||||||
if _, err = io.CopyN(b, f, n); err != nil {
|
if _, err = io.CopyN(b, f, n); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|||||||
@@ -2195,7 +2195,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if res.Message.Content != "" || res.Message.Thinking != "" || len(res.Message.ToolCalls) > 0 || r.Done {
|
if res.Message.Content != "" || res.Message.Thinking != "" || len(res.Message.ToolCalls) > 0 || r.Done || len(res.Logprobs) > 0 {
|
||||||
slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser output", "parser", m.Config.Parser, "content", content, "thinking", thinking, "toolCalls", toolCalls, "done", r.Done)
|
slog.Log(context.TODO(), logutil.LevelTrace, "builtin parser output", "parser", m.Config.Parser, "content", content, "thinking", thinking, "toolCalls", toolCalls, "done", r.Done)
|
||||||
ch <- res
|
ch <- res
|
||||||
} else {
|
} else {
|
||||||
@@ -2235,8 +2235,16 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
res.Message.ToolCalls = toolCalls
|
res.Message.ToolCalls = toolCalls
|
||||||
res.Message.Content = ""
|
res.Message.Content = ""
|
||||||
} else if res.Message.Thinking != "" {
|
} else if res.Message.Thinking != "" {
|
||||||
// don't return
|
// don't return, fall through to send
|
||||||
} else {
|
} else {
|
||||||
|
// Send logprobs while content is being buffered by the parser for tool calls
|
||||||
|
if len(res.Logprobs) > 0 && !r.Done {
|
||||||
|
logprobRes := res
|
||||||
|
logprobRes.Message.Content = ""
|
||||||
|
logprobRes.Message.ToolCalls = nil
|
||||||
|
ch <- logprobRes
|
||||||
|
}
|
||||||
|
|
||||||
if r.Done {
|
if r.Done {
|
||||||
res.Message.Content = toolParser.Content()
|
res.Message.Content = toolParser.Content()
|
||||||
ch <- res
|
ch <- res
|
||||||
|
|||||||
@@ -708,6 +708,95 @@ func TestGenerateChat(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("messages with tools and logprobs (streaming)", func(t *testing.T) {
|
||||||
|
tools := []api.Tool{
|
||||||
|
{
|
||||||
|
Type: "function",
|
||||||
|
Function: api.ToolFunction{
|
||||||
|
Name: "get_weather",
|
||||||
|
Parameters: api.ToolFunctionParameters{
|
||||||
|
Type: "object",
|
||||||
|
Properties: map[string]api.ToolProperty{
|
||||||
|
"location": {Type: api.PropertyType{"string"}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(1)
|
||||||
|
|
||||||
|
mock.CompletionFn = func(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
// Simulate a response where logprobs are sent while the tool call is being buffered
|
||||||
|
responses := []llm.CompletionResponse{
|
||||||
|
{
|
||||||
|
Content: `{ "name": "get_weather"`,
|
||||||
|
Done: false,
|
||||||
|
Logprobs: []llm.Logprob{{}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Content: `,"arguments":{"location":"Seattle, WA","unit":"celsius"}}`,
|
||||||
|
Done: false,
|
||||||
|
Logprobs: []llm.Logprob{{}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Content: ``,
|
||||||
|
Done: true,
|
||||||
|
DoneReason: llm.DoneReasonStop,
|
||||||
|
Logprobs: nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, resp := range responses {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
default:
|
||||||
|
fn(resp)
|
||||||
|
time.Sleep(10 * time.Millisecond)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
w := createRequest(t, s.ChatHandler, api.ChatRequest{
|
||||||
|
Model: "test-system",
|
||||||
|
Messages: []api.Message{
|
||||||
|
{Role: "user", Content: "Weather?"},
|
||||||
|
},
|
||||||
|
Tools: tools,
|
||||||
|
Stream: &stream,
|
||||||
|
})
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
if w.Code != http.StatusOK {
|
||||||
|
t.Errorf("expected status 200, got %d", w.Code)
|
||||||
|
}
|
||||||
|
|
||||||
|
decoder := json.NewDecoder(w.Body)
|
||||||
|
var totalLogprobs int
|
||||||
|
|
||||||
|
for {
|
||||||
|
var resp api.ChatResponse
|
||||||
|
if err := decoder.Decode(&resp); err == io.EOF {
|
||||||
|
break
|
||||||
|
} else if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
totalLogprobs += len(resp.Logprobs)
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedLogprobs := 2
|
||||||
|
if totalLogprobs != expectedLogprobs {
|
||||||
|
t.Errorf("expected %d logprobs, got %d", expectedLogprobs, totalLogprobs)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
t.Run("status error non-streaming", func(t *testing.T) {
|
t.Run("status error non-streaming", func(t *testing.T) {
|
||||||
mock.CompletionFn = func(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
|
mock.CompletionFn = func(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
|
||||||
return api.StatusError{
|
return api.StatusError{
|
||||||
|
|||||||
Reference in New Issue
Block a user