mirror of
https://github.com/ollama/ollama.git
synced 2026-04-25 10:16:00 +02:00
Compare commits
8 Commits
v0.5.3-rc0
...
jmorganca/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
04314765f2 | ||
|
|
2cde4b8817 | ||
|
|
87f0a49fe6 | ||
|
|
0f06a6daa7 | ||
|
|
8f805dd74b | ||
|
|
89d5e2f2fd | ||
|
|
297ada6c87 | ||
|
|
8c9fb8eb73 |
@@ -8,8 +8,6 @@ linters:
|
|||||||
- containedctx
|
- containedctx
|
||||||
- contextcheck
|
- contextcheck
|
||||||
- errcheck
|
- errcheck
|
||||||
- exportloopref
|
|
||||||
- gci
|
|
||||||
- gocheckcompilerdirectives
|
- gocheckcompilerdirectives
|
||||||
- gofmt
|
- gofmt
|
||||||
- gofumpt
|
- gofumpt
|
||||||
@@ -30,8 +28,6 @@ linters:
|
|||||||
- wastedassign
|
- wastedassign
|
||||||
- whitespace
|
- whitespace
|
||||||
linters-settings:
|
linters-settings:
|
||||||
gci:
|
|
||||||
sections: [standard, default, localmodule]
|
|
||||||
staticcheck:
|
staticcheck:
|
||||||
checks:
|
checks:
|
||||||
- all
|
- all
|
||||||
|
|||||||
2
Makefile
2
Makefile
@@ -8,11 +8,9 @@ include make/cuda-v12-defs.make
|
|||||||
include make/rocm-defs.make
|
include make/rocm-defs.make
|
||||||
|
|
||||||
ifeq ($(CUSTOM_CPU_FLAGS),)
|
ifeq ($(CUSTOM_CPU_FLAGS),)
|
||||||
ifneq ($(OS),darwin)
|
|
||||||
ifeq ($(ARCH),amd64)
|
ifeq ($(ARCH),amd64)
|
||||||
RUNNER_TARGETS=cpu
|
RUNNER_TARGETS=cpu
|
||||||
endif
|
endif
|
||||||
endif
|
|
||||||
# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present
|
# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present
|
||||||
ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),)
|
ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),)
|
||||||
ifneq ($(CUDA_11_COMPILER),)
|
ifneq ($(CUDA_11_COMPILER),)
|
||||||
|
|||||||
@@ -407,6 +407,8 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
|
|
||||||
### Database
|
### Database
|
||||||
|
|
||||||
|
- [pgai](https://github.com/timescale/pgai) - PostgreSQL as a vector database (Create and search embeddings from Ollama models using pgvector)
|
||||||
|
- [Get started guide](https://github.com/timescale/pgai/blob/main/docs/vectorizer-quick-start.md)
|
||||||
- [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md) (Connects Ollama models with nearly 200 data platforms and apps)
|
- [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md) (Connects Ollama models with nearly 200 data platforms and apps)
|
||||||
- [chromem-go](https://github.com/philippgille/chromem-go/blob/v0.5.0/embed_ollama.go) with [example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama)
|
- [chromem-go](https://github.com/philippgille/chromem-go/blob/v0.5.0/embed_ollama.go) with [example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama)
|
||||||
- [Kangaroo](https://github.com/dbkangaroo/kangaroo) (AI-powered SQL client and admin tool for popular databases)
|
- [Kangaroo](https://github.com/dbkangaroo/kangaroo) (AI-powered SQL client and admin tool for popular databases)
|
||||||
|
|||||||
@@ -674,21 +674,6 @@ type CompletionResponse struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
|
func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
|
||||||
if err := s.sem.Acquire(ctx, 1); err != nil {
|
|
||||||
if errors.Is(err, context.Canceled) {
|
|
||||||
slog.Info("aborting completion request due to client closing the connection")
|
|
||||||
} else {
|
|
||||||
slog.Error("Failed to acquire semaphore", "error", err)
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer s.sem.Release(1)
|
|
||||||
|
|
||||||
// put an upper limit on num_predict to avoid the model running on forever
|
|
||||||
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
|
|
||||||
req.Options.NumPredict = 10 * s.options.NumCtx
|
|
||||||
}
|
|
||||||
|
|
||||||
request := map[string]any{
|
request := map[string]any{
|
||||||
"prompt": req.Prompt,
|
"prompt": req.Prompt,
|
||||||
"stream": true,
|
"stream": true,
|
||||||
@@ -714,16 +699,10 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
|||||||
"cache_prompt": true,
|
"cache_prompt": true,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure the server is ready
|
|
||||||
status, err := s.getServerStatusRetry(ctx)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
} else if status != ServerStatusReady {
|
|
||||||
return fmt.Errorf("unexpected server status: %s", status.ToString())
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(req.Format) > 0 {
|
if len(req.Format) > 0 {
|
||||||
switch {
|
switch {
|
||||||
|
case bytes.Equal(req.Format, []byte(`""`)) || bytes.Equal(req.Format, []byte(`null`)):
|
||||||
|
// fallthrough
|
||||||
case bytes.Equal(req.Format, []byte(`"json"`)):
|
case bytes.Equal(req.Format, []byte(`"json"`)):
|
||||||
request["grammar"] = grammarJSON
|
request["grammar"] = grammarJSON
|
||||||
case bytes.HasPrefix(req.Format, []byte("{")):
|
case bytes.HasPrefix(req.Format, []byte("{")):
|
||||||
@@ -734,10 +713,33 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
|
|||||||
}
|
}
|
||||||
request["grammar"] = string(g)
|
request["grammar"] = string(g)
|
||||||
default:
|
default:
|
||||||
return errors.New(`invalid format: expected "json" or a JSON schema`)
|
return fmt.Errorf("invalid format: %q; expected \"json\" or a valid JSON Schema", req.Format)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := s.sem.Acquire(ctx, 1); err != nil {
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
|
slog.Info("aborting completion request due to client closing the connection")
|
||||||
|
} else {
|
||||||
|
slog.Error("Failed to acquire semaphore", "error", err)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer s.sem.Release(1)
|
||||||
|
|
||||||
|
// put an upper limit on num_predict to avoid the model running on forever
|
||||||
|
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
|
||||||
|
req.Options.NumPredict = 10 * s.options.NumCtx
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure the server is ready
|
||||||
|
status, err := s.getServerStatusRetry(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
} else if status != ServerStatusReady {
|
||||||
|
return fmt.Errorf("unexpected server status: %s", status.ToString())
|
||||||
|
}
|
||||||
|
|
||||||
// Handling JSON marshaling with special characters unescaped.
|
// Handling JSON marshaling with special characters unescaped.
|
||||||
buffer := &bytes.Buffer{}
|
buffer := &bytes.Buffer{}
|
||||||
enc := json.NewEncoder(buffer)
|
enc := json.NewEncoder(buffer)
|
||||||
|
|||||||
63
llm/server_test.go
Normal file
63
llm/server_test.go
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
package llm
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/api"
|
||||||
|
"golang.org/x/sync/semaphore"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLLMServerCompletionFormat(t *testing.T) {
|
||||||
|
// This test was written to fix an already deployed issue. It is a bit
|
||||||
|
// of a mess, and but it's good enough, until we can refactoring the
|
||||||
|
// Completion method to be more testable.
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
s := &llmServer{
|
||||||
|
sem: semaphore.NewWeighted(1), // required to prevent nil panic
|
||||||
|
}
|
||||||
|
|
||||||
|
checkInvalid := func(format string) {
|
||||||
|
t.Helper()
|
||||||
|
err := s.Completion(ctx, CompletionRequest{
|
||||||
|
Options: new(api.Options),
|
||||||
|
Format: []byte(format),
|
||||||
|
}, nil)
|
||||||
|
|
||||||
|
want := fmt.Sprintf("invalid format: %q; expected \"json\" or a valid JSON Schema", format)
|
||||||
|
if err == nil || !strings.Contains(err.Error(), want) {
|
||||||
|
t.Fatalf("err = %v; want %q", err, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
checkInvalid("X") // invalid format
|
||||||
|
checkInvalid(`"X"`) // invalid JSON Schema
|
||||||
|
|
||||||
|
cancel() // prevent further processing if request makes it past the format check
|
||||||
|
|
||||||
|
checkCanceled := func(err error) {
|
||||||
|
t.Helper()
|
||||||
|
if !errors.Is(err, context.Canceled) {
|
||||||
|
t.Fatalf("Completion: err = %v; expected context.Canceled", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
valids := []string{`"json"`, `{"type":"object"}`, ``, `""`, `null`}
|
||||||
|
for _, valid := range valids {
|
||||||
|
err := s.Completion(ctx, CompletionRequest{
|
||||||
|
Options: new(api.Options),
|
||||||
|
Format: []byte(valid),
|
||||||
|
}, nil)
|
||||||
|
checkCanceled(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err := s.Completion(ctx, CompletionRequest{
|
||||||
|
Options: new(api.Options),
|
||||||
|
Format: nil, // missing format
|
||||||
|
}, nil)
|
||||||
|
checkCanceled(err)
|
||||||
|
}
|
||||||
@@ -19,6 +19,7 @@ const config: ForgeConfig = {
|
|||||||
icon: './assets/icon.icns',
|
icon: './assets/icon.icns',
|
||||||
extraResource: [
|
extraResource: [
|
||||||
'../dist/ollama',
|
'../dist/ollama',
|
||||||
|
'../dist/darwin-amd64/lib',
|
||||||
path.join(__dirname, './assets/iconTemplate.png'),
|
path.join(__dirname, './assets/iconTemplate.png'),
|
||||||
path.join(__dirname, './assets/iconTemplate@2x.png'),
|
path.join(__dirname, './assets/iconTemplate@2x.png'),
|
||||||
path.join(__dirname, './assets/iconUpdateTemplate.png'),
|
path.join(__dirname, './assets/iconUpdateTemplate.png'),
|
||||||
@@ -42,7 +43,7 @@ const config: ForgeConfig = {
|
|||||||
}
|
}
|
||||||
: {}),
|
: {}),
|
||||||
osxUniversal: {
|
osxUniversal: {
|
||||||
x64ArchFiles: '**/ollama',
|
x64ArchFiles: '**/ollama*',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
rebuildConfig: {},
|
rebuildConfig: {},
|
||||||
|
|||||||
111
model/imageproc/images.go
Normal file
111
model/imageproc/images.go
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
package imageproc
|
||||||
|
|
||||||
|
import (
|
||||||
|
"image"
|
||||||
|
"image/color"
|
||||||
|
|
||||||
|
"golang.org/x/image/draw"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
ImageNetDefaultMean = [3]float32{0.485, 0.456, 0.406}
|
||||||
|
ImageNetDefaultSTD = [3]float32{0.229, 0.224, 0.225}
|
||||||
|
ImageNetStandardMean = [3]float32{0.5, 0.5, 0.5}
|
||||||
|
ImageNetStandardSTD = [3]float32{0.5, 0.5, 0.5}
|
||||||
|
ClipDefaultMean = [3]float32{0.48145466, 0.4578275, 0.40821073}
|
||||||
|
ClipDefaultSTD = [3]float32{0.26862954, 0.26130258, 0.27577711}
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
ResizeBilinear = iota
|
||||||
|
ResizeNearestNeighbor
|
||||||
|
ResizeApproxBilinear
|
||||||
|
ResizeCatmullrom
|
||||||
|
)
|
||||||
|
|
||||||
|
// Composite returns an image with the alpha channel removed by drawing over a white background.
|
||||||
|
func Composite(img image.Image) image.Image {
|
||||||
|
dst := image.NewRGBA(img.Bounds())
|
||||||
|
|
||||||
|
white := color.RGBA{255, 255, 255, 255}
|
||||||
|
draw.Draw(dst, dst.Bounds(), &image.Uniform{white}, image.Point{}, draw.Src)
|
||||||
|
draw.Draw(dst, dst.Bounds(), img, img.Bounds().Min, draw.Over)
|
||||||
|
|
||||||
|
return dst
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resize returns an image which has been scaled to a new size.
|
||||||
|
func Resize(img image.Image, newSize image.Point, method int) image.Image {
|
||||||
|
dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
|
||||||
|
|
||||||
|
kernels := map[int]draw.Interpolator{
|
||||||
|
ResizeBilinear: draw.BiLinear,
|
||||||
|
ResizeNearestNeighbor: draw.NearestNeighbor,
|
||||||
|
ResizeApproxBilinear: draw.ApproxBiLinear,
|
||||||
|
ResizeCatmullrom: draw.CatmullRom,
|
||||||
|
}
|
||||||
|
|
||||||
|
kernel, ok := kernels[method]
|
||||||
|
if !ok {
|
||||||
|
panic("no resizing method found")
|
||||||
|
}
|
||||||
|
|
||||||
|
kernel.Scale(dst, dst.Rect, img, img.Bounds(), draw.Over, nil)
|
||||||
|
|
||||||
|
return dst
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize returns a slice of float32 containing each of the r, g, b values for an image normalized around a value.
|
||||||
|
func Normalize(img image.Image, mean, std [3]float32, rescale bool, channelFirst bool) []float32 {
|
||||||
|
var pixelVals []float32
|
||||||
|
|
||||||
|
bounds := img.Bounds()
|
||||||
|
if channelFirst {
|
||||||
|
var rVals, gVals, bVals []float32
|
||||||
|
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
|
||||||
|
for x := bounds.Min.X; x < bounds.Max.X; x++ {
|
||||||
|
c := img.At(x, y)
|
||||||
|
r, g, b, _ := c.RGBA()
|
||||||
|
var rVal, gVal, bVal float32
|
||||||
|
if rescale {
|
||||||
|
rVal = float32(r>>8) / 255.0
|
||||||
|
gVal = float32(g>>8) / 255.0
|
||||||
|
bVal = float32(b>>8) / 255.0
|
||||||
|
}
|
||||||
|
|
||||||
|
rVal = (rVal - mean[0]) / std[0]
|
||||||
|
gVal = (gVal - mean[1]) / std[1]
|
||||||
|
bVal = (bVal - mean[2]) / std[2]
|
||||||
|
|
||||||
|
rVals = append(rVals, rVal)
|
||||||
|
gVals = append(gVals, gVal)
|
||||||
|
bVals = append(bVals, bVal)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pixelVals = append(pixelVals, rVals...)
|
||||||
|
pixelVals = append(pixelVals, gVals...)
|
||||||
|
pixelVals = append(pixelVals, bVals...)
|
||||||
|
} else {
|
||||||
|
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
|
||||||
|
for x := bounds.Min.X; x < bounds.Max.X; x++ {
|
||||||
|
c := img.At(x, y)
|
||||||
|
r, g, b, _ := c.RGBA()
|
||||||
|
var rVal, gVal, bVal float32
|
||||||
|
if rescale {
|
||||||
|
rVal = float32(r>>8) / 255.0
|
||||||
|
gVal = float32(g>>8) / 255.0
|
||||||
|
bVal = float32(b>>8) / 255.0
|
||||||
|
}
|
||||||
|
|
||||||
|
rVal = (rVal - mean[0]) / std[0]
|
||||||
|
gVal = (gVal - mean[1]) / std[1]
|
||||||
|
bVal = (bVal - mean[2]) / std[2]
|
||||||
|
|
||||||
|
pixelVals = append(pixelVals, rVal, gVal, bVal)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return pixelVals
|
||||||
|
}
|
||||||
177
model/imageproc/images_test.go
Normal file
177
model/imageproc/images_test.go
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
package imageproc
|
||||||
|
|
||||||
|
import (
|
||||||
|
"image"
|
||||||
|
"image/color"
|
||||||
|
"image/draw"
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func createImage(width, height int, fillCol color.RGBA) image.Image {
|
||||||
|
img := image.NewRGBA(image.Rect(0, 0, width, height))
|
||||||
|
draw.Draw(img, img.Bounds(), &image.Uniform{fillCol}, image.Point{}, draw.Src)
|
||||||
|
return img
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestComposite(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
img image.Image
|
||||||
|
expectedRGBA color.RGBA
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Transparent image",
|
||||||
|
img: createImage(5, 5, color.RGBA{0, 0, 0, 0}),
|
||||||
|
expectedRGBA: color.RGBA{255, 255, 255, 255},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Solid red image",
|
||||||
|
img: createImage(5, 5, color.RGBA{255, 0, 0, 255}),
|
||||||
|
expectedRGBA: color.RGBA{255, 0, 0, 255},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
resultImg := Composite(tt.img)
|
||||||
|
|
||||||
|
// Check the pixel values in the resulting image
|
||||||
|
for x := range resultImg.Bounds().Dx() {
|
||||||
|
for y := range resultImg.Bounds().Dy() {
|
||||||
|
r, g, b, a := resultImg.At(x, y).RGBA()
|
||||||
|
expectedR, expectedG, expectedB, expectedA := tt.expectedRGBA.RGBA()
|
||||||
|
|
||||||
|
if r != expectedR || g != expectedG || b != expectedB || a != expectedA {
|
||||||
|
t.Errorf("Pixel mismatch at (%d, %d): got (%d, %d, %d, %d), want (%d, %d, %d, %d)",
|
||||||
|
x, y, r, g, b, a, expectedR, expectedG, expectedB, expectedA)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResize(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
img image.Image
|
||||||
|
newSize image.Point
|
||||||
|
method int
|
||||||
|
expected image.Point
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Resize with bilinear interpolation",
|
||||||
|
img: createImage(5, 5, color.RGBA{255, 0, 0, 255}),
|
||||||
|
newSize: image.Point{10, 10},
|
||||||
|
method: ResizeBilinear,
|
||||||
|
expected: image.Point{10, 10},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Resize with nearest neighbor",
|
||||||
|
img: createImage(10, 10, color.RGBA{0, 255, 0, 255}),
|
||||||
|
newSize: image.Point{5, 5},
|
||||||
|
method: ResizeNearestNeighbor,
|
||||||
|
expected: image.Point{5, 5},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Resize with catmullrom",
|
||||||
|
img: createImage(1024, 1024, color.RGBA{0, 0, 255, 255}),
|
||||||
|
newSize: image.Point{10, 10},
|
||||||
|
method: ResizeCatmullrom,
|
||||||
|
expected: image.Point{10, 10},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Resize with approx bilinear",
|
||||||
|
img: createImage(1024, 768, color.RGBA{100, 100, 100, 255}),
|
||||||
|
newSize: image.Point{4, 3},
|
||||||
|
method: ResizeApproxBilinear,
|
||||||
|
expected: image.Point{4, 3},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
resizedImg := Resize(tt.img, tt.newSize, tt.method)
|
||||||
|
|
||||||
|
if resizedImg.Bounds().Dx() != tt.expected.X || resizedImg.Bounds().Dy() != tt.expected.Y {
|
||||||
|
t.Errorf("Unexpected size for resized image: got (%d, %d), want (%d, %d)",
|
||||||
|
resizedImg.Bounds().Dx(), resizedImg.Bounds().Dy(), tt.expected.X, tt.expected.Y)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResizeInvalidMethod(t *testing.T) {
|
||||||
|
defer func() {
|
||||||
|
if r := recover(); r == nil {
|
||||||
|
t.Errorf("Expected panic for invalid resizing method, but did not panic")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
img := createImage(10, 10, color.RGBA{0, 0, 0, 255})
|
||||||
|
Resize(img, image.Point{5, 5}, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalize(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
img image.Image
|
||||||
|
mean [3]float32
|
||||||
|
std [3]float32
|
||||||
|
rescale bool
|
||||||
|
channelFirst bool
|
||||||
|
expected []float32
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Rescale with channel first",
|
||||||
|
img: createImage(2, 2, color.RGBA{128, 128, 128, 255}),
|
||||||
|
mean: ImageNetStandardMean,
|
||||||
|
std: ImageNetStandardSTD,
|
||||||
|
rescale: true,
|
||||||
|
channelFirst: true,
|
||||||
|
expected: []float32{
|
||||||
|
0.003921628, 0.003921628, 0.003921628, 0.003921628, // R values
|
||||||
|
0.003921628, 0.003921628, 0.003921628, 0.003921628, // G values
|
||||||
|
0.003921628, 0.003921628, 0.003921628, 0.003921628, // B values
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Rescale without channel first",
|
||||||
|
img: createImage(2, 2, color.RGBA{255, 0, 0, 255}),
|
||||||
|
mean: [3]float32{0.0, 0.0, 0.0},
|
||||||
|
std: [3]float32{1.0, 1.0, 1.0},
|
||||||
|
rescale: true,
|
||||||
|
channelFirst: false,
|
||||||
|
expected: []float32{
|
||||||
|
1.0, 0.0, 0.0,
|
||||||
|
1.0, 0.0, 0.0,
|
||||||
|
1.0, 0.0, 0.0,
|
||||||
|
1.0, 0.0, 0.0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "No rescale with mean/std adjustment",
|
||||||
|
img: createImage(2, 2, color.RGBA{100, 150, 200, 255}),
|
||||||
|
mean: ClipDefaultMean,
|
||||||
|
std: ClipDefaultSTD,
|
||||||
|
rescale: false,
|
||||||
|
channelFirst: false,
|
||||||
|
expected: []float32{
|
||||||
|
-1.7922626, -1.7520971, -1.4802198,
|
||||||
|
-1.7922626, -1.7520971, -1.4802198,
|
||||||
|
-1.7922626, -1.7520971, -1.4802198,
|
||||||
|
-1.7922626, -1.7520971, -1.4802198,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := Normalize(tt.img, tt.mean, tt.std, tt.rescale, tt.channelFirst)
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(result, tt.expected) {
|
||||||
|
t.Errorf("Test %s failed: got %v, want %v", tt.name, result, tt.expected)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,19 +1,20 @@
|
|||||||
package imageproc
|
package mllama
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"image"
|
"image"
|
||||||
"image/color"
|
|
||||||
_ "image/jpeg"
|
_ "image/jpeg"
|
||||||
_ "image/png"
|
_ "image/png"
|
||||||
|
"io"
|
||||||
"math"
|
"math"
|
||||||
"slices"
|
"slices"
|
||||||
|
|
||||||
"golang.org/x/image/draw"
|
"golang.org/x/image/draw"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/model/imageproc"
|
||||||
)
|
)
|
||||||
|
|
||||||
func GetSupportedAspectRatios(maxTiles int) []image.Point {
|
func getSupportedAspectRatios(maxTiles int) []image.Point {
|
||||||
ratios := []image.Point{}
|
ratios := []image.Point{}
|
||||||
|
|
||||||
for w := range maxTiles {
|
for w := range maxTiles {
|
||||||
@@ -37,28 +38,8 @@ func clip(a, a_min, a_max int) int {
|
|||||||
return a
|
return a
|
||||||
}
|
}
|
||||||
|
|
||||||
func getImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
|
|
||||||
targetWidth := clip(imageSize.X, tileSize, canvasSize.X)
|
|
||||||
targetHeight := clip(imageSize.Y, tileSize, canvasSize.Y)
|
|
||||||
|
|
||||||
scaleWidth := float64(targetWidth) / float64(imageSize.X)
|
|
||||||
scaleHeight := float64(targetHeight) / float64(imageSize.Y)
|
|
||||||
|
|
||||||
var w, h int
|
|
||||||
|
|
||||||
if scaleWidth < scaleHeight {
|
|
||||||
w = targetWidth
|
|
||||||
h = min(int(math.Floor(float64(imageSize.Y)*scaleWidth)), targetHeight)
|
|
||||||
} else {
|
|
||||||
w = min(int(math.Floor(float64(imageSize.X)*scaleHeight)), targetWidth)
|
|
||||||
h = targetHeight
|
|
||||||
}
|
|
||||||
|
|
||||||
return image.Point{w, h}
|
|
||||||
}
|
|
||||||
|
|
||||||
func getOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) image.Point {
|
func getOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) image.Point {
|
||||||
possibleTileArrangements := GetSupportedAspectRatios(maxImageTiles)
|
possibleTileArrangements := getSupportedAspectRatios(maxImageTiles)
|
||||||
possibleCanvasSizes := []image.Point{}
|
possibleCanvasSizes := []image.Point{}
|
||||||
for _, pta := range possibleTileArrangements {
|
for _, pta := range possibleTileArrangements {
|
||||||
possibleCanvasSizes = append(possibleCanvasSizes, image.Point{pta.X * tileSize, pta.Y * tileSize})
|
possibleCanvasSizes = append(possibleCanvasSizes, image.Point{pta.X * tileSize, pta.Y * tileSize})
|
||||||
@@ -113,6 +94,53 @@ func getOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) i
|
|||||||
return selectedCanvas
|
return selectedCanvas
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
|
||||||
|
targetWidth := clip(imageSize.X, tileSize, canvasSize.X)
|
||||||
|
targetHeight := clip(imageSize.Y, tileSize, canvasSize.Y)
|
||||||
|
|
||||||
|
scaleWidth := float64(targetWidth) / float64(imageSize.X)
|
||||||
|
scaleHeight := float64(targetHeight) / float64(imageSize.Y)
|
||||||
|
|
||||||
|
var w, h int
|
||||||
|
|
||||||
|
if scaleWidth < scaleHeight {
|
||||||
|
w = targetWidth
|
||||||
|
h = min(int(math.Floor(float64(imageSize.Y)*scaleWidth)), targetHeight)
|
||||||
|
} else {
|
||||||
|
w = min(int(math.Floor(float64(imageSize.X)*scaleHeight)), targetWidth)
|
||||||
|
h = targetHeight
|
||||||
|
}
|
||||||
|
|
||||||
|
return image.Point{w, h}
|
||||||
|
}
|
||||||
|
|
||||||
|
func resizeImage(img image.Image, format string, outputSize image.Point, maxImageTiles int) (image.Image, image.Point) {
|
||||||
|
if format == "png" {
|
||||||
|
img = imageproc.Composite(img)
|
||||||
|
}
|
||||||
|
|
||||||
|
b := img.Bounds()
|
||||||
|
tileSize := outputSize.Y
|
||||||
|
|
||||||
|
canvasSize := getOptimalTiledCanvas(b.Max, maxImageTiles, tileSize)
|
||||||
|
aspectRatio := image.Point{canvasSize.X / tileSize, canvasSize.Y / tileSize}
|
||||||
|
newSize := getImageSizeFitToCanvas(b.Max, canvasSize, tileSize)
|
||||||
|
|
||||||
|
return imageproc.Resize(img, newSize, imageproc.ResizeBilinear), aspectRatio
|
||||||
|
}
|
||||||
|
|
||||||
|
func padImage(img image.Image, outputSize, aspectRatio image.Point) image.Image {
|
||||||
|
paddedSize := image.Point{
|
||||||
|
X: outputSize.X * aspectRatio.X,
|
||||||
|
Y: outputSize.Y * aspectRatio.Y,
|
||||||
|
}
|
||||||
|
|
||||||
|
dst := image.NewRGBA(image.Rect(0, 0, paddedSize.X, paddedSize.Y))
|
||||||
|
draw.Draw(dst, img.Bounds(), img, image.Point{0, 0}, draw.Over)
|
||||||
|
|
||||||
|
return dst
|
||||||
|
}
|
||||||
|
|
||||||
func splitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
|
func splitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
|
||||||
b := img.Bounds()
|
b := img.Bounds()
|
||||||
width := b.Max.X - b.Min.X
|
width := b.Max.X - b.Min.X
|
||||||
@@ -134,107 +162,40 @@ func splitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
|
|||||||
return images
|
return images
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove the "alpha" channel by drawing over a prefilled image
|
func packImages(img image.Image, aspectRatio image.Point) []float32 {
|
||||||
func compositeImage(img image.Image) image.Image {
|
|
||||||
dst := image.NewRGBA(img.Bounds())
|
|
||||||
|
|
||||||
white := color.RGBA{255, 255, 255, 255}
|
|
||||||
draw.Draw(dst, dst.Bounds(), &image.Uniform{white}, image.Point{}, draw.Src)
|
|
||||||
draw.Draw(dst, dst.Bounds(), img, img.Bounds().Min, draw.Over)
|
|
||||||
|
|
||||||
return dst
|
|
||||||
}
|
|
||||||
|
|
||||||
func ResizeImage(img image.Image, format string, outputSize image.Point, maxImageTiles int) (image.Image, image.Point) {
|
|
||||||
if format == "png" {
|
|
||||||
img = compositeImage(img)
|
|
||||||
}
|
|
||||||
|
|
||||||
b := img.Bounds()
|
|
||||||
tileSize := outputSize.Y
|
|
||||||
|
|
||||||
canvasSize := getOptimalTiledCanvas(b.Max, maxImageTiles, tileSize)
|
|
||||||
aspectRatio := image.Point{canvasSize.X / tileSize, canvasSize.Y / tileSize}
|
|
||||||
newSize := getImageSizeFitToCanvas(b.Max, canvasSize, tileSize)
|
|
||||||
|
|
||||||
dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
|
|
||||||
|
|
||||||
// scaling choices:
|
|
||||||
// NearestNeighbor fast, blocky output
|
|
||||||
// ApproxBiLinear fast, medium quality
|
|
||||||
// BiLinear slow, high quality
|
|
||||||
// CatmullRom very slow, very high quality
|
|
||||||
draw.BiLinear.Scale(dst, dst.Rect, img, b, draw.Over, nil)
|
|
||||||
|
|
||||||
return dst, aspectRatio
|
|
||||||
}
|
|
||||||
|
|
||||||
func PadImage(img image.Image, outputSize, aspectRatio image.Point) image.Image {
|
|
||||||
paddedSize := image.Point{
|
|
||||||
X: outputSize.X * aspectRatio.X,
|
|
||||||
Y: outputSize.Y * aspectRatio.Y,
|
|
||||||
}
|
|
||||||
|
|
||||||
dst := image.NewRGBA(image.Rect(0, 0, paddedSize.X, paddedSize.Y))
|
|
||||||
draw.Draw(dst, img.Bounds(), img, image.Point{0, 0}, draw.Over)
|
|
||||||
|
|
||||||
return dst
|
|
||||||
}
|
|
||||||
|
|
||||||
func PackImages(img image.Image, aspectRatio image.Point, mean, std [3]float32) []float32 {
|
|
||||||
subImages := splitToTiles(img, aspectRatio)
|
subImages := splitToTiles(img, aspectRatio)
|
||||||
|
|
||||||
var pixelVals []float32
|
var pixelVals []float32
|
||||||
|
|
||||||
|
rescale := true
|
||||||
|
channelFirst := true
|
||||||
|
|
||||||
for _, subImg := range subImages {
|
for _, subImg := range subImages {
|
||||||
bounds := subImg.Bounds()
|
vals := imageproc.Normalize(subImg, imageproc.ClipDefaultMean, imageproc.ClipDefaultSTD, rescale, channelFirst)
|
||||||
var rVals, gVals, bVals []float32
|
pixelVals = append(pixelVals, vals...)
|
||||||
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
|
|
||||||
for x := bounds.Min.X; x < bounds.Max.X; x++ {
|
|
||||||
c := subImg.At(x, y)
|
|
||||||
r, g, b, _ := c.RGBA()
|
|
||||||
rVal := float32(r>>8) / 255.0
|
|
||||||
gVal := float32(g>>8) / 255.0
|
|
||||||
bVal := float32(b>>8) / 255.0
|
|
||||||
|
|
||||||
rVal = (rVal - mean[0]) / std[0]
|
|
||||||
gVal = (gVal - mean[1]) / std[1]
|
|
||||||
bVal = (bVal - mean[2]) / std[2]
|
|
||||||
|
|
||||||
rVals = append(rVals, rVal)
|
|
||||||
gVals = append(gVals, gVal)
|
|
||||||
bVals = append(bVals, bVal)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pixelVals = append(pixelVals, rVals...)
|
|
||||||
pixelVals = append(pixelVals, gVals...)
|
|
||||||
pixelVals = append(pixelVals, bVals...)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return pixelVals
|
return pixelVals
|
||||||
}
|
}
|
||||||
|
|
||||||
func Preprocess(imageData []byte) ([]float32, int, error) {
|
func Preprocess(imageData io.Reader) ([]float32, map[string]any, error) {
|
||||||
// todo: need guard in here for bad image data
|
|
||||||
|
|
||||||
// mllama values
|
|
||||||
outputSize := image.Point{560, 560}
|
outputSize := image.Point{560, 560}
|
||||||
maxTiles := 4
|
maxTiles := 4
|
||||||
|
|
||||||
// clip values
|
img, format, err := image.Decode(imageData)
|
||||||
mean := [3]float32{0.48145466, 0.4578275, 0.40821073}
|
|
||||||
std := [3]float32{0.26862954, 0.26130258, 0.27577711}
|
|
||||||
|
|
||||||
img, format, err := image.Decode(bytes.NewReader(imageData))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, 0, fmt.Errorf("failed to decode image: %w", err)
|
return nil, nil, fmt.Errorf("failed to decode image: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
newImage, aspectRatio := ResizeImage(img, format, outputSize, maxTiles)
|
newImage, aspectRatio := resizeImage(img, format, outputSize, maxTiles)
|
||||||
newImage = PadImage(newImage, outputSize, aspectRatio)
|
newImage = padImage(newImage, outputSize, aspectRatio)
|
||||||
|
|
||||||
data := PackImages(newImage, aspectRatio, mean, std)
|
data := packImages(newImage, aspectRatio)
|
||||||
aspectRatioIndex := slices.Index(GetSupportedAspectRatios(maxTiles), aspectRatio) + 1
|
aspectRatioIndex := slices.Index(getSupportedAspectRatios(maxTiles), aspectRatio) + 1
|
||||||
|
|
||||||
return data, aspectRatioIndex, nil
|
opts := map[string]any{
|
||||||
|
"aspectRatioIndex": aspectRatioIndex,
|
||||||
|
}
|
||||||
|
|
||||||
|
return data, opts, nil
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package imageproc
|
package mllama
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
@@ -35,7 +35,7 @@ func TestAspectRatios(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
actual := GetSupportedAspectRatios(c.MaxTiles)
|
actual := getSupportedAspectRatios(c.MaxTiles)
|
||||||
|
|
||||||
if diff := cmp.Diff(actual, c.Expected); diff != "" {
|
if diff := cmp.Diff(actual, c.Expected); diff != "" {
|
||||||
t.Errorf("mismatch (-got +want):\n%s", diff)
|
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||||
@@ -299,7 +299,7 @@ func TestResize(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
actualImage, actualAspectRatio := ResizeImage(c.TestImage, "png", c.OutputSize, c.MaxImageTiles)
|
actualImage, actualAspectRatio := resizeImage(c.TestImage, "png", c.OutputSize, c.MaxImageTiles)
|
||||||
|
|
||||||
if actualImage.Bounds() != c.ExpectedImage.Bounds() {
|
if actualImage.Bounds() != c.ExpectedImage.Bounds() {
|
||||||
t.Errorf("image size incorrect: '%#v': expected: '%#v'", actualImage.Bounds(), c.ExpectedImage.Bounds())
|
t.Errorf("image size incorrect: '%#v': expected: '%#v'", actualImage.Bounds(), c.ExpectedImage.Bounds())
|
||||||
@@ -329,7 +329,7 @@ func TestPad(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
actual := PadImage(c.TestImage, c.OutputSize, c.AspectRatio)
|
actual := padImage(c.TestImage, c.OutputSize, c.AspectRatio)
|
||||||
|
|
||||||
if actual.Bounds() != c.Expected.Bounds() {
|
if actual.Bounds() != c.Expected.Bounds() {
|
||||||
t.Errorf("image size incorrect: '%#v': expected: '%#v'", actual.Bounds(), c.Expected.Bounds())
|
t.Errorf("image size incorrect: '%#v': expected: '%#v'", actual.Bounds(), c.Expected.Bounds())
|
||||||
@@ -344,9 +344,6 @@ func TestPackImages(t *testing.T) {
|
|||||||
ExpectedVals int
|
ExpectedVals int
|
||||||
}
|
}
|
||||||
|
|
||||||
mean := [3]float32{0.48145466, 0.4578275, 0.40821073}
|
|
||||||
std := [3]float32{0.26862954, 0.26130258, 0.27577711}
|
|
||||||
|
|
||||||
cases := []packCase{
|
cases := []packCase{
|
||||||
{
|
{
|
||||||
TestImage: image.NewRGBA(image.Rect(0, 0, 1120, 1120)),
|
TestImage: image.NewRGBA(image.Rect(0, 0, 1120, 1120)),
|
||||||
@@ -366,7 +363,7 @@ func TestPackImages(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
actualVals := PackImages(c.TestImage, c.AspectRatio, mean, std)
|
actualVals := packImages(c.TestImage, c.AspectRatio)
|
||||||
if len(actualVals) != c.ExpectedVals {
|
if len(actualVals) != c.ExpectedVals {
|
||||||
t.Errorf("packed image size incorrect: '%d': expected: '%d'", len(actualVals), c.ExpectedVals)
|
t.Errorf("packed image size incorrect: '%d': expected: '%d'", len(actualVals), c.ExpectedVals)
|
||||||
}
|
}
|
||||||
@@ -400,7 +397,7 @@ func TestPreprocess(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
imgData, aspectRatioID, err := Preprocess(buf.Bytes())
|
imgData, opts, err := Preprocess(&buf)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("error processing: %q", err)
|
t.Fatalf("error processing: %q", err)
|
||||||
}
|
}
|
||||||
@@ -409,6 +406,13 @@ func TestPreprocess(t *testing.T) {
|
|||||||
t.Errorf("no image data returned")
|
t.Errorf("no image data returned")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ar, ok := opts["aspectRatioIndex"]
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("no aspect ratio found")
|
||||||
|
}
|
||||||
|
|
||||||
|
aspectRatioID := ar.(int)
|
||||||
|
|
||||||
if aspectRatioID != c.ExpectedAspectRatioID {
|
if aspectRatioID != c.ExpectedAspectRatioID {
|
||||||
t.Errorf("aspect ratio incorrect: '%d': expected: '%d'", aspectRatioID, c.ExpectedAspectRatioID)
|
t.Errorf("aspect ratio incorrect: '%d': expected: '%d'", aspectRatioID, c.ExpectedAspectRatioID)
|
||||||
}
|
}
|
||||||
68
model/pixtral/imageproc.go
Normal file
68
model/pixtral/imageproc.go
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
package pixtral
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"image"
|
||||||
|
_ "image/jpeg"
|
||||||
|
_ "image/png"
|
||||||
|
"io"
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/model/imageproc"
|
||||||
|
)
|
||||||
|
|
||||||
|
func getNumImageTokens(imageSize, patchSize image.Point) image.Point {
|
||||||
|
return image.Point{
|
||||||
|
(imageSize.X-1)/patchSize.X + 1,
|
||||||
|
(imageSize.Y-1)/patchSize.Y + 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getResizeOutputImageSize(img image.Image, longestEdge int, patchSize image.Point) image.Point {
|
||||||
|
b := img.Bounds()
|
||||||
|
le := float64(longestEdge)
|
||||||
|
ratio := math.Max(float64(b.Max.Y)/le, float64(b.Max.X)/le)
|
||||||
|
|
||||||
|
newSize := img.Bounds().Max
|
||||||
|
|
||||||
|
if ratio > 1.0 {
|
||||||
|
newSize = image.Point{
|
||||||
|
int(math.Ceil(float64(b.Max.X) / ratio)),
|
||||||
|
int(math.Ceil(float64(b.Max.Y) / ratio)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens := getNumImageTokens(newSize, patchSize)
|
||||||
|
return image.Point{
|
||||||
|
tokens.X * patchSize.X,
|
||||||
|
tokens.Y * patchSize.Y,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func resizeImage(img image.Image, format string, longestEdge int, patchSize image.Point) image.Image {
|
||||||
|
if format == "png" {
|
||||||
|
img = imageproc.Composite(img)
|
||||||
|
}
|
||||||
|
|
||||||
|
newSize := getResizeOutputImageSize(img, longestEdge, patchSize)
|
||||||
|
|
||||||
|
// todo should be ResizeBicubic, but it doesn't exist
|
||||||
|
return imageproc.Resize(img, newSize, imageproc.ResizeBilinear)
|
||||||
|
}
|
||||||
|
|
||||||
|
func Preprocess(imageData io.Reader) ([]float32, map[string]any, error) {
|
||||||
|
img, format, err := image.Decode(imageData)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("failed to decode image: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
longestEdge := 1024
|
||||||
|
patchSize := image.Point{16, 16}
|
||||||
|
|
||||||
|
img = resizeImage(img, format, longestEdge, patchSize)
|
||||||
|
|
||||||
|
data := imageproc.Normalize(img, imageproc.ClipDefaultMean, imageproc.ClipDefaultSTD, true, true)
|
||||||
|
|
||||||
|
opts := map[string]any{}
|
||||||
|
return data, opts, nil
|
||||||
|
}
|
||||||
219
model/pixtral/imageproc_test.go
Normal file
219
model/pixtral/imageproc_test.go
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
package pixtral
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
|
"image"
|
||||||
|
"image/png"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGetNumImageTokens(t *testing.T) {
|
||||||
|
type numImageTokensCase struct {
|
||||||
|
ImageSize image.Point
|
||||||
|
PatchSize image.Point
|
||||||
|
Expected image.Point
|
||||||
|
}
|
||||||
|
|
||||||
|
cases := []numImageTokensCase{
|
||||||
|
{
|
||||||
|
ImageSize: image.Point{1024, 764},
|
||||||
|
PatchSize: image.Point{16, 16},
|
||||||
|
Expected: image.Point{64, 48},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ImageSize: image.Point{800, 600},
|
||||||
|
PatchSize: image.Point{16, 16},
|
||||||
|
Expected: image.Point{50, 38},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ImageSize: image.Point{640, 480},
|
||||||
|
PatchSize: image.Point{16, 16},
|
||||||
|
Expected: image.Point{40, 30},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ImageSize: image.Point{320, 200},
|
||||||
|
PatchSize: image.Point{16, 16},
|
||||||
|
Expected: image.Point{20, 13},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ImageSize: image.Point{1320, 200},
|
||||||
|
PatchSize: image.Point{16, 16},
|
||||||
|
Expected: image.Point{83, 13},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ImageSize: image.Point{2000, 200},
|
||||||
|
PatchSize: image.Point{16, 16},
|
||||||
|
Expected: image.Point{125, 13},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ImageSize: image.Point{10000, 200},
|
||||||
|
PatchSize: image.Point{16, 16},
|
||||||
|
Expected: image.Point{625, 13},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ImageSize: image.Point{1131, 577},
|
||||||
|
PatchSize: image.Point{16, 16},
|
||||||
|
Expected: image.Point{71, 37},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ImageSize: image.Point{16, 16},
|
||||||
|
PatchSize: image.Point{16, 16},
|
||||||
|
Expected: image.Point{1, 1},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
actual := getNumImageTokens(c.ImageSize, c.PatchSize)
|
||||||
|
|
||||||
|
if diff := cmp.Diff(actual, c.Expected); diff != "" {
|
||||||
|
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetResizeOutputImageSize(t *testing.T) {
|
||||||
|
type resizeCase struct {
|
||||||
|
Image image.Image
|
||||||
|
LongestEdge int
|
||||||
|
PatchSize image.Point
|
||||||
|
Expected image.Point
|
||||||
|
}
|
||||||
|
|
||||||
|
cases := []resizeCase{
|
||||||
|
{
|
||||||
|
Image: image.NewRGBA(image.Rect(0, 0, 1024, 768)),
|
||||||
|
LongestEdge: 1024,
|
||||||
|
PatchSize: image.Point{16, 16},
|
||||||
|
Expected: image.Point{1024, 768},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Image: image.NewRGBA(image.Rect(0, 0, 1162, 690)),
|
||||||
|
LongestEdge: 1024,
|
||||||
|
PatchSize: image.Point{16, 16},
|
||||||
|
Expected: image.Point{1024, 624},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Image: image.NewRGBA(image.Rect(0, 0, 300, 200)),
|
||||||
|
LongestEdge: 1024,
|
||||||
|
PatchSize: image.Point{16, 16},
|
||||||
|
Expected: image.Point{304, 208},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Image: image.NewRGBA(image.Rect(0, 0, 1862, 522)),
|
||||||
|
LongestEdge: 1024,
|
||||||
|
PatchSize: image.Point{16, 16},
|
||||||
|
Expected: image.Point{1024, 288},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
actual := getResizeOutputImageSize(c.Image, c.LongestEdge, c.PatchSize)
|
||||||
|
|
||||||
|
if diff := cmp.Diff(actual, c.Expected); diff != "" {
|
||||||
|
t.Errorf("mismatch (-got +want):\n%s", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResize(t *testing.T) {
|
||||||
|
type resizeCase struct {
|
||||||
|
Image image.Image
|
||||||
|
LongestEdge int
|
||||||
|
PatchSize image.Point
|
||||||
|
Expected image.Image
|
||||||
|
}
|
||||||
|
|
||||||
|
cases := []resizeCase{
|
||||||
|
{
|
||||||
|
Image: image.NewRGBA(image.Rect(0, 0, 1862, 522)),
|
||||||
|
LongestEdge: 1024,
|
||||||
|
PatchSize: image.Point{16, 16},
|
||||||
|
Expected: image.NewRGBA(image.Rect(0, 0, 1024, 288)),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Image: image.NewRGBA(image.Rect(0, 0, 10, 10)),
|
||||||
|
LongestEdge: 1024,
|
||||||
|
PatchSize: image.Point{16, 16},
|
||||||
|
Expected: image.NewRGBA(image.Rect(0, 0, 16, 16)),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
actual := resizeImage(c.Image, "png", c.LongestEdge, c.PatchSize)
|
||||||
|
|
||||||
|
if actual.Bounds() != c.Expected.Bounds() {
|
||||||
|
t.Errorf("image size incorrect: '%#v': expected: '%#v'", actual.Bounds(), c.Expected.Bounds())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPreprocess(t *testing.T) {
|
||||||
|
type preprocessCase struct {
|
||||||
|
TestImage image.Image
|
||||||
|
ExpectedLen int
|
||||||
|
}
|
||||||
|
|
||||||
|
cases := []preprocessCase{
|
||||||
|
{
|
||||||
|
TestImage: image.NewRGBA(image.Rect(0, 0, 10, 10)),
|
||||||
|
ExpectedLen: 16 * 16 * 3 * 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TestImage: image.NewRGBA(image.Rect(0, 0, 2000, 2000)),
|
||||||
|
ExpectedLen: 1024 * 1024 * 3 * 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
err := png.Encode(&buf, c.TestImage)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
imgData, _, err := Preprocess(&buf)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error processing: %q", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
switch len(imgData) {
|
||||||
|
case 0:
|
||||||
|
t.Errorf("no image data returned")
|
||||||
|
case c.ExpectedLen:
|
||||||
|
// ok
|
||||||
|
default:
|
||||||
|
t.Errorf("unexpected image data length: %d, expected: %d", len(imgData), c.ExpectedLen)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPreprocessImages(t *testing.T) {
|
||||||
|
for _, testFile := range []string{"flight.png", "sportsball.png"} {
|
||||||
|
f, err := os.Open(testFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Skipf("skipping test, no test image found at %s", testFile)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
imgData, _, err := Preprocess(f)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error processing: %q", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
byteData := make([]byte, len(imgData)*4) // float32 is 4 bytes
|
||||||
|
for i, f := range imgData {
|
||||||
|
binary.LittleEndian.PutUint32(byteData[i*4:], math.Float32bits(f))
|
||||||
|
}
|
||||||
|
|
||||||
|
outputPath := "processed_" + testFile + ".bin"
|
||||||
|
err = os.WriteFile(outputPath, byteData, 0o644)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error writing processed image: %q", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
74
model/qwen2vl/imageproc.go
Normal file
74
model/qwen2vl/imageproc.go
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
package qwen2vl
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"image"
|
||||||
|
_ "image/jpeg"
|
||||||
|
_ "image/png"
|
||||||
|
"io"
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/model/imageproc"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
DefaultFactor = 28
|
||||||
|
DefaultMinPixels = 56 * 56
|
||||||
|
DefaultMaxPixels = 14 * 14 * 4 * 1280
|
||||||
|
)
|
||||||
|
|
||||||
|
// smartResize calculates the size of the image to resize to based on the
|
||||||
|
// factor, minPixels, and maxPixels.
|
||||||
|
func smartResize(size image.Point, factor, minPixels, maxPixels int) image.Point {
|
||||||
|
// 1. Both dimensions of size are divisible by factor
|
||||||
|
// 2. The area of the image is between minPixels and maxPixels
|
||||||
|
// 3. The aspect ratio of the image is as close to 1:1 as possible
|
||||||
|
|
||||||
|
if size.Y < factor || size.X < factor {
|
||||||
|
panic("image is too small to resize")
|
||||||
|
} else if max(size.X, size.Y)/min(size.X, size.Y) > 200 {
|
||||||
|
panic("aspect ratio must be less than 200:1")
|
||||||
|
}
|
||||||
|
|
||||||
|
f := float64(factor)
|
||||||
|
width := float64(size.X)
|
||||||
|
height := float64(size.Y)
|
||||||
|
|
||||||
|
xBar := math.Round(width/f) * f
|
||||||
|
yBar := math.Round(height/f) * f
|
||||||
|
|
||||||
|
if xBar*yBar > float64(maxPixels) {
|
||||||
|
beta := math.Sqrt(height * width / float64(maxPixels))
|
||||||
|
xBar = math.Floor(width/beta/f) * f
|
||||||
|
yBar = math.Floor(height/beta/f) * f
|
||||||
|
} else if xBar*yBar < float64(minPixels) {
|
||||||
|
beta := math.Sqrt(float64(minPixels) / (height * width))
|
||||||
|
xBar = math.Ceil(width*beta/f) * f
|
||||||
|
yBar = math.Ceil(height*beta/f) * f
|
||||||
|
}
|
||||||
|
|
||||||
|
return image.Point{int(xBar), int(yBar)}
|
||||||
|
}
|
||||||
|
|
||||||
|
func resizeImage(img image.Image, format string, size image.Point) image.Image {
|
||||||
|
if format == "png" {
|
||||||
|
img = imageproc.Composite(img)
|
||||||
|
}
|
||||||
|
|
||||||
|
return imageproc.Resize(img, size, imageproc.ResizeBilinear)
|
||||||
|
}
|
||||||
|
|
||||||
|
func Preprocess(imageData io.Reader) ([]float32, map[string]any, error) {
|
||||||
|
img, format, err := image.Decode(imageData)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("failed to decode image: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
size := smartResize(img.Bounds().Max, DefaultFactor, DefaultMinPixels, DefaultMaxPixels)
|
||||||
|
img = resizeImage(img, format, size)
|
||||||
|
|
||||||
|
data := imageproc.Normalize(img, imageproc.ClipDefaultMean, imageproc.ClipDefaultSTD, true, true)
|
||||||
|
|
||||||
|
opts := map[string]any{}
|
||||||
|
return data, opts, nil
|
||||||
|
}
|
||||||
78
model/qwen2vl/imageproc_test.go
Normal file
78
model/qwen2vl/imageproc_test.go
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
package qwen2vl
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"image"
|
||||||
|
"image/png"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSmartResize(t *testing.T) {
|
||||||
|
type smartResizeCase struct {
|
||||||
|
TestImage image.Image
|
||||||
|
Expected image.Point
|
||||||
|
}
|
||||||
|
|
||||||
|
cases := []smartResizeCase{
|
||||||
|
{
|
||||||
|
TestImage: image.NewRGBA(image.Rect(0, 0, 1024, 1024)),
|
||||||
|
Expected: image.Point{980, 980},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TestImage: image.NewRGBA(image.Rect(0, 0, 1024, 768)),
|
||||||
|
Expected: image.Point{1036, 756},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TestImage: image.NewRGBA(image.Rect(0, 0, 2000, 2000)),
|
||||||
|
Expected: image.Point{980, 980},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
b := c.TestImage.Bounds().Max
|
||||||
|
actual := smartResize(b, DefaultFactor, DefaultMinPixels, DefaultMaxPixels)
|
||||||
|
if actual != c.Expected {
|
||||||
|
t.Errorf("expected: %v, actual: %v", c.Expected, actual)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPreprocess(t *testing.T) {
|
||||||
|
type preprocessCase struct {
|
||||||
|
TestImage image.Image
|
||||||
|
ExpectedLen int
|
||||||
|
}
|
||||||
|
|
||||||
|
cases := []preprocessCase{
|
||||||
|
{
|
||||||
|
TestImage: image.NewRGBA(image.Rect(0, 0, 256, 256)),
|
||||||
|
ExpectedLen: 252 * 252 * 3 * 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TestImage: image.NewRGBA(image.Rect(0, 0, 2000, 2000)),
|
||||||
|
ExpectedLen: 980 * 980 * 3 * 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
err := png.Encode(&buf, c.TestImage)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
imgData, _, err := Preprocess(&buf)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error processing: %q", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
switch len(imgData) {
|
||||||
|
case 0:
|
||||||
|
t.Errorf("no image data returned")
|
||||||
|
case c.ExpectedLen:
|
||||||
|
// ok
|
||||||
|
default:
|
||||||
|
t.Errorf("unexpected image data length: %d, expected: %d", len(imgData), c.ExpectedLen)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -72,6 +72,7 @@ func locateRunnersOnce() {
|
|||||||
paths := []string{
|
paths := []string{
|
||||||
filepath.Join(filepath.Dir(exe), "llama", "build", runtime.GOOS+"-"+runtime.GOARCH, "runners"),
|
filepath.Join(filepath.Dir(exe), "llama", "build", runtime.GOOS+"-"+runtime.GOARCH, "runners"),
|
||||||
filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama", "runners"),
|
filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama", "runners"),
|
||||||
|
filepath.Join(filepath.Dir(exe), "lib", "ollama", "runners"),
|
||||||
}
|
}
|
||||||
for _, path := range paths {
|
for _, path := range paths {
|
||||||
if _, err := os.Stat(path); err == nil {
|
if _, err := os.Stat(path); err == nil {
|
||||||
|
|||||||
@@ -18,10 +18,18 @@ rm -rf llama/build dist/darwin-*
|
|||||||
echo "Building darwin arm64"
|
echo "Building darwin arm64"
|
||||||
GOOS=darwin ARCH=arm64 GOARCH=arm64 make -j 8 dist
|
GOOS=darwin ARCH=arm64 GOARCH=arm64 make -j 8 dist
|
||||||
echo "Building darwin amd64 with AVX enabled"
|
echo "Building darwin amd64 with AVX enabled"
|
||||||
GOOS=darwin ARCH=amd64 GOARCH=amd64 CUSTOM_CPU_FLAGS="avx" make -j 8 dist
|
GOOS=darwin ARCH=amd64 GOARCH=amd64 CUSTOM_CPU_FLAGS="avx" make -j 8 dist_exe
|
||||||
|
|
||||||
|
# Generate the universal ollama binary for stand-alone usage: metal + avx
|
||||||
|
lipo -create -output dist/ollama-darwin dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
|
||||||
|
|
||||||
|
echo "Building darwin amd64 with runners"
|
||||||
|
rm dist/darwin-amd64/bin/ollama
|
||||||
|
GOOS=darwin ARCH=amd64 GOARCH=amd64 make -j 8 dist
|
||||||
|
# Generate the universal ollama binary for the app bundle: metal + no-avx
|
||||||
lipo -create -output dist/ollama dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
|
lipo -create -output dist/ollama dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
|
||||||
|
|
||||||
|
|
||||||
if [ -n "$APPLE_IDENTITY" ]; then
|
if [ -n "$APPLE_IDENTITY" ]; then
|
||||||
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
|
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
|
||||||
else
|
else
|
||||||
@@ -48,5 +56,4 @@ ditto -c -k --keepParent dist/ollama dist/temp.zip
|
|||||||
if [ -n "$APPLE_IDENTITY" ]; then
|
if [ -n "$APPLE_IDENTITY" ]; then
|
||||||
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
|
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
|
||||||
fi
|
fi
|
||||||
mv dist/ollama dist/ollama-darwin
|
|
||||||
rm -f dist/temp.zip
|
rm -f dist/temp.zip
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ import (
|
|||||||
|
|
||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/ollama/ollama/server/imageproc"
|
"github.com/ollama/ollama/model/mllama"
|
||||||
"github.com/ollama/ollama/template"
|
"github.com/ollama/ollama/template"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -92,7 +92,7 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
|
|||||||
var imgData llm.ImageData
|
var imgData llm.ImageData
|
||||||
|
|
||||||
if isMllama {
|
if isMllama {
|
||||||
data, aspectRatioID, err := imageproc.Preprocess(i)
|
data, opts, err := mllama.Preprocess(bytes.NewReader(i))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", nil, err
|
return "", nil, err
|
||||||
}
|
}
|
||||||
@@ -103,10 +103,15 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
|
|||||||
return "", nil, err
|
return "", nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ar, ok := opts["aspectRatioIndex"].(int)
|
||||||
|
if !ok {
|
||||||
|
return "", nil, fmt.Errorf("missing aspect ratio for image")
|
||||||
|
}
|
||||||
|
|
||||||
imgData = llm.ImageData{
|
imgData = llm.ImageData{
|
||||||
ID: len(images),
|
ID: len(images),
|
||||||
Data: buf.Bytes(),
|
Data: buf.Bytes(),
|
||||||
AspectRatioID: aspectRatioID,
|
AspectRatioID: ar,
|
||||||
}
|
}
|
||||||
imgPrompt = "<|image|>"
|
imgPrompt = "<|image|>"
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -31,10 +31,10 @@ import (
|
|||||||
"github.com/ollama/ollama/discover"
|
"github.com/ollama/ollama/discover"
|
||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
|
"github.com/ollama/ollama/model/mllama"
|
||||||
"github.com/ollama/ollama/openai"
|
"github.com/ollama/ollama/openai"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/parser"
|
||||||
"github.com/ollama/ollama/runners"
|
"github.com/ollama/ollama/runners"
|
||||||
"github.com/ollama/ollama/server/imageproc"
|
|
||||||
"github.com/ollama/ollama/template"
|
"github.com/ollama/ollama/template"
|
||||||
"github.com/ollama/ollama/types/errtypes"
|
"github.com/ollama/ollama/types/errtypes"
|
||||||
"github.com/ollama/ollama/types/model"
|
"github.com/ollama/ollama/types/model"
|
||||||
@@ -205,12 +205,18 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
images := make([]llm.ImageData, len(req.Images))
|
images := make([]llm.ImageData, len(req.Images))
|
||||||
for i := range req.Images {
|
for i := range req.Images {
|
||||||
if isMllama {
|
if isMllama {
|
||||||
data, aspectRatioID, err := imageproc.Preprocess(req.Images[i])
|
data, opts, err := mllama.Preprocess(bytes.NewReader(req.Images[i]))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "error processing image"})
|
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "error processing image"})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ar, ok := opts["aspectRatioIndex"].(int)
|
||||||
|
if !ok {
|
||||||
|
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "error processing image"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
buf := new(bytes.Buffer)
|
buf := new(bytes.Buffer)
|
||||||
err = binary.Write(buf, binary.LittleEndian, data)
|
err = binary.Write(buf, binary.LittleEndian, data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -218,7 +224,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
images[i] = llm.ImageData{ID: i, Data: buf.Bytes(), AspectRatioID: aspectRatioID}
|
images[i] = llm.ImageData{ID: i, Data: buf.Bytes(), AspectRatioID: ar}
|
||||||
} else {
|
} else {
|
||||||
images[i] = llm.ImageData{ID: i, Data: req.Images[i]}
|
images[i] = llm.ImageData{ID: i, Data: req.Images[i]}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user