Compare commits

...

8 Commits

Author SHA1 Message Date
jmorganca
04314765f2 llm: consider null format same as empty value 2024-12-17 09:16:01 -08:00
Jascha Beste
2cde4b8817 readme: change getting started guide link for pgai (#8119) 2024-12-16 22:13:23 -08:00
Blake Mizerany
87f0a49fe6 llm: do not silently fail for supplied, but invalid formats (#8130)
Changes in #8002 introduced fixes for bugs with mangling JSON Schemas.
It also fixed a bug where the server would silently fail when clients
requested invalid formats. It also, unfortunately, introduced a bug
where the server would reject requests with an empty format, which
should be allowed.

The change in #8127 updated the code to allow the empty format, but also
reintroduced the regression where the server would silently fail when
the format was set, but invalid.

This commit fixes both regressions. The server does not reject the empty
format, but it does reject invalid formats. It also adds tests to help
us catch regressions in the future.

Also, the updated code provides a more detailed error message when a
client sends a non-empty, but invalid format, echoing the invalid format
in the response.

This commits also takes the opportunity to remove superfluous linter
checks.
2024-12-16 21:57:49 -08:00
Jeffrey Morgan
0f06a6daa7 llm: loosen format check to default to no format (#8127) 2024-12-16 18:45:46 -08:00
Daniel Hiltgen
8f805dd74b darwin: restore multiple runners for x86 (#8125)
In 0.5.2 we simplified packaging to have avx only for macos x86.  It looks like
there may still be some non-AVX systems out there, so this puts back the prior
logic of building no-AVX for the primary binary, and now 2 runners for avx and avx2.
These will be packaged in the App bundle only, so the stand-alone binary will now be
without AVX support on macos.  On arm, we'll also see these runners reported
as available in the log, but they're dormant and will never be used at runtime.
2024-12-16 18:45:02 -08:00
Michael
89d5e2f2fd readme: example/get started guide for pgai with Ollama (#8115)
readme: example/get started guide for pgai with Ollama
2024-12-16 17:14:37 +08:00
Jascha Beste
297ada6c87 readme: add pgai to readme for semantic search (#8028)
* docs: switch around database integrations order and link to quickstart

* docs: link to blog post in example readme

* chore: link to main readme

* readme: removing example to link externally

readme: removing example to link externally so we don't have to keep this example up-to-date

---------
2024-12-16 17:02:28 +08:00
Patrick Devine
8c9fb8eb73 imageproc mllama refactor (#7537)
Refactor mllama image processing code, and add pixtral and qwen2vl
2024-12-14 19:50:15 -08:00
18 changed files with 931 additions and 158 deletions

View File

@@ -8,8 +8,6 @@ linters:
- containedctx - containedctx
- contextcheck - contextcheck
- errcheck - errcheck
- exportloopref
- gci
- gocheckcompilerdirectives - gocheckcompilerdirectives
- gofmt - gofmt
- gofumpt - gofumpt
@@ -30,8 +28,6 @@ linters:
- wastedassign - wastedassign
- whitespace - whitespace
linters-settings: linters-settings:
gci:
sections: [standard, default, localmodule]
staticcheck: staticcheck:
checks: checks:
- all - all

View File

@@ -8,11 +8,9 @@ include make/cuda-v12-defs.make
include make/rocm-defs.make include make/rocm-defs.make
ifeq ($(CUSTOM_CPU_FLAGS),) ifeq ($(CUSTOM_CPU_FLAGS),)
ifneq ($(OS),darwin)
ifeq ($(ARCH),amd64) ifeq ($(ARCH),amd64)
RUNNER_TARGETS=cpu RUNNER_TARGETS=cpu
endif endif
endif
# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present # Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present
ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),) ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),)
ifneq ($(CUDA_11_COMPILER),) ifneq ($(CUDA_11_COMPILER),)

View File

@@ -407,6 +407,8 @@ See the [API documentation](./docs/api.md) for all endpoints.
### Database ### Database
- [pgai](https://github.com/timescale/pgai) - PostgreSQL as a vector database (Create and search embeddings from Ollama models using pgvector)
- [Get started guide](https://github.com/timescale/pgai/blob/main/docs/vectorizer-quick-start.md)
- [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md) (Connects Ollama models with nearly 200 data platforms and apps) - [MindsDB](https://github.com/mindsdb/mindsdb/blob/staging/mindsdb/integrations/handlers/ollama_handler/README.md) (Connects Ollama models with nearly 200 data platforms and apps)
- [chromem-go](https://github.com/philippgille/chromem-go/blob/v0.5.0/embed_ollama.go) with [example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama) - [chromem-go](https://github.com/philippgille/chromem-go/blob/v0.5.0/embed_ollama.go) with [example](https://github.com/philippgille/chromem-go/tree/v0.5.0/examples/rag-wikipedia-ollama)
- [Kangaroo](https://github.com/dbkangaroo/kangaroo) (AI-powered SQL client and admin tool for popular databases) - [Kangaroo](https://github.com/dbkangaroo/kangaroo) (AI-powered SQL client and admin tool for popular databases)

View File

@@ -674,21 +674,6 @@ type CompletionResponse struct {
} }
func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error { func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
if err := s.sem.Acquire(ctx, 1); err != nil {
if errors.Is(err, context.Canceled) {
slog.Info("aborting completion request due to client closing the connection")
} else {
slog.Error("Failed to acquire semaphore", "error", err)
}
return err
}
defer s.sem.Release(1)
// put an upper limit on num_predict to avoid the model running on forever
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
req.Options.NumPredict = 10 * s.options.NumCtx
}
request := map[string]any{ request := map[string]any{
"prompt": req.Prompt, "prompt": req.Prompt,
"stream": true, "stream": true,
@@ -714,16 +699,10 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
"cache_prompt": true, "cache_prompt": true,
} }
// Make sure the server is ready
status, err := s.getServerStatusRetry(ctx)
if err != nil {
return err
} else if status != ServerStatusReady {
return fmt.Errorf("unexpected server status: %s", status.ToString())
}
if len(req.Format) > 0 { if len(req.Format) > 0 {
switch { switch {
case bytes.Equal(req.Format, []byte(`""`)) || bytes.Equal(req.Format, []byte(`null`)):
// fallthrough
case bytes.Equal(req.Format, []byte(`"json"`)): case bytes.Equal(req.Format, []byte(`"json"`)):
request["grammar"] = grammarJSON request["grammar"] = grammarJSON
case bytes.HasPrefix(req.Format, []byte("{")): case bytes.HasPrefix(req.Format, []byte("{")):
@@ -734,10 +713,33 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
} }
request["grammar"] = string(g) request["grammar"] = string(g)
default: default:
return errors.New(`invalid format: expected "json" or a JSON schema`) return fmt.Errorf("invalid format: %q; expected \"json\" or a valid JSON Schema", req.Format)
} }
} }
if err := s.sem.Acquire(ctx, 1); err != nil {
if errors.Is(err, context.Canceled) {
slog.Info("aborting completion request due to client closing the connection")
} else {
slog.Error("Failed to acquire semaphore", "error", err)
}
return err
}
defer s.sem.Release(1)
// put an upper limit on num_predict to avoid the model running on forever
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
req.Options.NumPredict = 10 * s.options.NumCtx
}
// Make sure the server is ready
status, err := s.getServerStatusRetry(ctx)
if err != nil {
return err
} else if status != ServerStatusReady {
return fmt.Errorf("unexpected server status: %s", status.ToString())
}
// Handling JSON marshaling with special characters unescaped. // Handling JSON marshaling with special characters unescaped.
buffer := &bytes.Buffer{} buffer := &bytes.Buffer{}
enc := json.NewEncoder(buffer) enc := json.NewEncoder(buffer)

63
llm/server_test.go Normal file
View File

@@ -0,0 +1,63 @@
package llm
import (
"context"
"errors"
"fmt"
"strings"
"testing"
"github.com/ollama/ollama/api"
"golang.org/x/sync/semaphore"
)
func TestLLMServerCompletionFormat(t *testing.T) {
// This test was written to fix an already deployed issue. It is a bit
// of a mess, and but it's good enough, until we can refactoring the
// Completion method to be more testable.
ctx, cancel := context.WithCancel(context.Background())
s := &llmServer{
sem: semaphore.NewWeighted(1), // required to prevent nil panic
}
checkInvalid := func(format string) {
t.Helper()
err := s.Completion(ctx, CompletionRequest{
Options: new(api.Options),
Format: []byte(format),
}, nil)
want := fmt.Sprintf("invalid format: %q; expected \"json\" or a valid JSON Schema", format)
if err == nil || !strings.Contains(err.Error(), want) {
t.Fatalf("err = %v; want %q", err, want)
}
}
checkInvalid("X") // invalid format
checkInvalid(`"X"`) // invalid JSON Schema
cancel() // prevent further processing if request makes it past the format check
checkCanceled := func(err error) {
t.Helper()
if !errors.Is(err, context.Canceled) {
t.Fatalf("Completion: err = %v; expected context.Canceled", err)
}
}
valids := []string{`"json"`, `{"type":"object"}`, ``, `""`, `null`}
for _, valid := range valids {
err := s.Completion(ctx, CompletionRequest{
Options: new(api.Options),
Format: []byte(valid),
}, nil)
checkCanceled(err)
}
err := s.Completion(ctx, CompletionRequest{
Options: new(api.Options),
Format: nil, // missing format
}, nil)
checkCanceled(err)
}

View File

@@ -19,6 +19,7 @@ const config: ForgeConfig = {
icon: './assets/icon.icns', icon: './assets/icon.icns',
extraResource: [ extraResource: [
'../dist/ollama', '../dist/ollama',
'../dist/darwin-amd64/lib',
path.join(__dirname, './assets/iconTemplate.png'), path.join(__dirname, './assets/iconTemplate.png'),
path.join(__dirname, './assets/iconTemplate@2x.png'), path.join(__dirname, './assets/iconTemplate@2x.png'),
path.join(__dirname, './assets/iconUpdateTemplate.png'), path.join(__dirname, './assets/iconUpdateTemplate.png'),
@@ -42,7 +43,7 @@ const config: ForgeConfig = {
} }
: {}), : {}),
osxUniversal: { osxUniversal: {
x64ArchFiles: '**/ollama', x64ArchFiles: '**/ollama*',
}, },
}, },
rebuildConfig: {}, rebuildConfig: {},

111
model/imageproc/images.go Normal file
View File

@@ -0,0 +1,111 @@
package imageproc
import (
"image"
"image/color"
"golang.org/x/image/draw"
)
var (
ImageNetDefaultMean = [3]float32{0.485, 0.456, 0.406}
ImageNetDefaultSTD = [3]float32{0.229, 0.224, 0.225}
ImageNetStandardMean = [3]float32{0.5, 0.5, 0.5}
ImageNetStandardSTD = [3]float32{0.5, 0.5, 0.5}
ClipDefaultMean = [3]float32{0.48145466, 0.4578275, 0.40821073}
ClipDefaultSTD = [3]float32{0.26862954, 0.26130258, 0.27577711}
)
const (
ResizeBilinear = iota
ResizeNearestNeighbor
ResizeApproxBilinear
ResizeCatmullrom
)
// Composite returns an image with the alpha channel removed by drawing over a white background.
func Composite(img image.Image) image.Image {
dst := image.NewRGBA(img.Bounds())
white := color.RGBA{255, 255, 255, 255}
draw.Draw(dst, dst.Bounds(), &image.Uniform{white}, image.Point{}, draw.Src)
draw.Draw(dst, dst.Bounds(), img, img.Bounds().Min, draw.Over)
return dst
}
// Resize returns an image which has been scaled to a new size.
func Resize(img image.Image, newSize image.Point, method int) image.Image {
dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
kernels := map[int]draw.Interpolator{
ResizeBilinear: draw.BiLinear,
ResizeNearestNeighbor: draw.NearestNeighbor,
ResizeApproxBilinear: draw.ApproxBiLinear,
ResizeCatmullrom: draw.CatmullRom,
}
kernel, ok := kernels[method]
if !ok {
panic("no resizing method found")
}
kernel.Scale(dst, dst.Rect, img, img.Bounds(), draw.Over, nil)
return dst
}
// Normalize returns a slice of float32 containing each of the r, g, b values for an image normalized around a value.
func Normalize(img image.Image, mean, std [3]float32, rescale bool, channelFirst bool) []float32 {
var pixelVals []float32
bounds := img.Bounds()
if channelFirst {
var rVals, gVals, bVals []float32
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
for x := bounds.Min.X; x < bounds.Max.X; x++ {
c := img.At(x, y)
r, g, b, _ := c.RGBA()
var rVal, gVal, bVal float32
if rescale {
rVal = float32(r>>8) / 255.0
gVal = float32(g>>8) / 255.0
bVal = float32(b>>8) / 255.0
}
rVal = (rVal - mean[0]) / std[0]
gVal = (gVal - mean[1]) / std[1]
bVal = (bVal - mean[2]) / std[2]
rVals = append(rVals, rVal)
gVals = append(gVals, gVal)
bVals = append(bVals, bVal)
}
}
pixelVals = append(pixelVals, rVals...)
pixelVals = append(pixelVals, gVals...)
pixelVals = append(pixelVals, bVals...)
} else {
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
for x := bounds.Min.X; x < bounds.Max.X; x++ {
c := img.At(x, y)
r, g, b, _ := c.RGBA()
var rVal, gVal, bVal float32
if rescale {
rVal = float32(r>>8) / 255.0
gVal = float32(g>>8) / 255.0
bVal = float32(b>>8) / 255.0
}
rVal = (rVal - mean[0]) / std[0]
gVal = (gVal - mean[1]) / std[1]
bVal = (bVal - mean[2]) / std[2]
pixelVals = append(pixelVals, rVal, gVal, bVal)
}
}
}
return pixelVals
}

View File

@@ -0,0 +1,177 @@
package imageproc
import (
"image"
"image/color"
"image/draw"
"reflect"
"testing"
)
func createImage(width, height int, fillCol color.RGBA) image.Image {
img := image.NewRGBA(image.Rect(0, 0, width, height))
draw.Draw(img, img.Bounds(), &image.Uniform{fillCol}, image.Point{}, draw.Src)
return img
}
func TestComposite(t *testing.T) {
tests := []struct {
name string
img image.Image
expectedRGBA color.RGBA
}{
{
name: "Transparent image",
img: createImage(5, 5, color.RGBA{0, 0, 0, 0}),
expectedRGBA: color.RGBA{255, 255, 255, 255},
},
{
name: "Solid red image",
img: createImage(5, 5, color.RGBA{255, 0, 0, 255}),
expectedRGBA: color.RGBA{255, 0, 0, 255},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
resultImg := Composite(tt.img)
// Check the pixel values in the resulting image
for x := range resultImg.Bounds().Dx() {
for y := range resultImg.Bounds().Dy() {
r, g, b, a := resultImg.At(x, y).RGBA()
expectedR, expectedG, expectedB, expectedA := tt.expectedRGBA.RGBA()
if r != expectedR || g != expectedG || b != expectedB || a != expectedA {
t.Errorf("Pixel mismatch at (%d, %d): got (%d, %d, %d, %d), want (%d, %d, %d, %d)",
x, y, r, g, b, a, expectedR, expectedG, expectedB, expectedA)
}
}
}
})
}
}
func TestResize(t *testing.T) {
tests := []struct {
name string
img image.Image
newSize image.Point
method int
expected image.Point
}{
{
name: "Resize with bilinear interpolation",
img: createImage(5, 5, color.RGBA{255, 0, 0, 255}),
newSize: image.Point{10, 10},
method: ResizeBilinear,
expected: image.Point{10, 10},
},
{
name: "Resize with nearest neighbor",
img: createImage(10, 10, color.RGBA{0, 255, 0, 255}),
newSize: image.Point{5, 5},
method: ResizeNearestNeighbor,
expected: image.Point{5, 5},
},
{
name: "Resize with catmullrom",
img: createImage(1024, 1024, color.RGBA{0, 0, 255, 255}),
newSize: image.Point{10, 10},
method: ResizeCatmullrom,
expected: image.Point{10, 10},
},
{
name: "Resize with approx bilinear",
img: createImage(1024, 768, color.RGBA{100, 100, 100, 255}),
newSize: image.Point{4, 3},
method: ResizeApproxBilinear,
expected: image.Point{4, 3},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
resizedImg := Resize(tt.img, tt.newSize, tt.method)
if resizedImg.Bounds().Dx() != tt.expected.X || resizedImg.Bounds().Dy() != tt.expected.Y {
t.Errorf("Unexpected size for resized image: got (%d, %d), want (%d, %d)",
resizedImg.Bounds().Dx(), resizedImg.Bounds().Dy(), tt.expected.X, tt.expected.Y)
}
})
}
}
func TestResizeInvalidMethod(t *testing.T) {
defer func() {
if r := recover(); r == nil {
t.Errorf("Expected panic for invalid resizing method, but did not panic")
}
}()
img := createImage(10, 10, color.RGBA{0, 0, 0, 255})
Resize(img, image.Point{5, 5}, -1)
}
func TestNormalize(t *testing.T) {
tests := []struct {
name string
img image.Image
mean [3]float32
std [3]float32
rescale bool
channelFirst bool
expected []float32
}{
{
name: "Rescale with channel first",
img: createImage(2, 2, color.RGBA{128, 128, 128, 255}),
mean: ImageNetStandardMean,
std: ImageNetStandardSTD,
rescale: true,
channelFirst: true,
expected: []float32{
0.003921628, 0.003921628, 0.003921628, 0.003921628, // R values
0.003921628, 0.003921628, 0.003921628, 0.003921628, // G values
0.003921628, 0.003921628, 0.003921628, 0.003921628, // B values
},
},
{
name: "Rescale without channel first",
img: createImage(2, 2, color.RGBA{255, 0, 0, 255}),
mean: [3]float32{0.0, 0.0, 0.0},
std: [3]float32{1.0, 1.0, 1.0},
rescale: true,
channelFirst: false,
expected: []float32{
1.0, 0.0, 0.0,
1.0, 0.0, 0.0,
1.0, 0.0, 0.0,
1.0, 0.0, 0.0,
},
},
{
name: "No rescale with mean/std adjustment",
img: createImage(2, 2, color.RGBA{100, 150, 200, 255}),
mean: ClipDefaultMean,
std: ClipDefaultSTD,
rescale: false,
channelFirst: false,
expected: []float32{
-1.7922626, -1.7520971, -1.4802198,
-1.7922626, -1.7520971, -1.4802198,
-1.7922626, -1.7520971, -1.4802198,
-1.7922626, -1.7520971, -1.4802198,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := Normalize(tt.img, tt.mean, tt.std, tt.rescale, tt.channelFirst)
if !reflect.DeepEqual(result, tt.expected) {
t.Errorf("Test %s failed: got %v, want %v", tt.name, result, tt.expected)
}
})
}
}

View File

@@ -1,19 +1,20 @@
package imageproc package mllama
import ( import (
"bytes"
"fmt" "fmt"
"image" "image"
"image/color"
_ "image/jpeg" _ "image/jpeg"
_ "image/png" _ "image/png"
"io"
"math" "math"
"slices" "slices"
"golang.org/x/image/draw" "golang.org/x/image/draw"
"github.com/ollama/ollama/model/imageproc"
) )
func GetSupportedAspectRatios(maxTiles int) []image.Point { func getSupportedAspectRatios(maxTiles int) []image.Point {
ratios := []image.Point{} ratios := []image.Point{}
for w := range maxTiles { for w := range maxTiles {
@@ -37,28 +38,8 @@ func clip(a, a_min, a_max int) int {
return a return a
} }
func getImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
targetWidth := clip(imageSize.X, tileSize, canvasSize.X)
targetHeight := clip(imageSize.Y, tileSize, canvasSize.Y)
scaleWidth := float64(targetWidth) / float64(imageSize.X)
scaleHeight := float64(targetHeight) / float64(imageSize.Y)
var w, h int
if scaleWidth < scaleHeight {
w = targetWidth
h = min(int(math.Floor(float64(imageSize.Y)*scaleWidth)), targetHeight)
} else {
w = min(int(math.Floor(float64(imageSize.X)*scaleHeight)), targetWidth)
h = targetHeight
}
return image.Point{w, h}
}
func getOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) image.Point { func getOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) image.Point {
possibleTileArrangements := GetSupportedAspectRatios(maxImageTiles) possibleTileArrangements := getSupportedAspectRatios(maxImageTiles)
possibleCanvasSizes := []image.Point{} possibleCanvasSizes := []image.Point{}
for _, pta := range possibleTileArrangements { for _, pta := range possibleTileArrangements {
possibleCanvasSizes = append(possibleCanvasSizes, image.Point{pta.X * tileSize, pta.Y * tileSize}) possibleCanvasSizes = append(possibleCanvasSizes, image.Point{pta.X * tileSize, pta.Y * tileSize})
@@ -113,6 +94,53 @@ func getOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) i
return selectedCanvas return selectedCanvas
} }
func getImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
targetWidth := clip(imageSize.X, tileSize, canvasSize.X)
targetHeight := clip(imageSize.Y, tileSize, canvasSize.Y)
scaleWidth := float64(targetWidth) / float64(imageSize.X)
scaleHeight := float64(targetHeight) / float64(imageSize.Y)
var w, h int
if scaleWidth < scaleHeight {
w = targetWidth
h = min(int(math.Floor(float64(imageSize.Y)*scaleWidth)), targetHeight)
} else {
w = min(int(math.Floor(float64(imageSize.X)*scaleHeight)), targetWidth)
h = targetHeight
}
return image.Point{w, h}
}
func resizeImage(img image.Image, format string, outputSize image.Point, maxImageTiles int) (image.Image, image.Point) {
if format == "png" {
img = imageproc.Composite(img)
}
b := img.Bounds()
tileSize := outputSize.Y
canvasSize := getOptimalTiledCanvas(b.Max, maxImageTiles, tileSize)
aspectRatio := image.Point{canvasSize.X / tileSize, canvasSize.Y / tileSize}
newSize := getImageSizeFitToCanvas(b.Max, canvasSize, tileSize)
return imageproc.Resize(img, newSize, imageproc.ResizeBilinear), aspectRatio
}
func padImage(img image.Image, outputSize, aspectRatio image.Point) image.Image {
paddedSize := image.Point{
X: outputSize.X * aspectRatio.X,
Y: outputSize.Y * aspectRatio.Y,
}
dst := image.NewRGBA(image.Rect(0, 0, paddedSize.X, paddedSize.Y))
draw.Draw(dst, img.Bounds(), img, image.Point{0, 0}, draw.Over)
return dst
}
func splitToTiles(img image.Image, numTilesSize image.Point) []image.Image { func splitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
b := img.Bounds() b := img.Bounds()
width := b.Max.X - b.Min.X width := b.Max.X - b.Min.X
@@ -134,107 +162,40 @@ func splitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
return images return images
} }
// remove the "alpha" channel by drawing over a prefilled image func packImages(img image.Image, aspectRatio image.Point) []float32 {
func compositeImage(img image.Image) image.Image {
dst := image.NewRGBA(img.Bounds())
white := color.RGBA{255, 255, 255, 255}
draw.Draw(dst, dst.Bounds(), &image.Uniform{white}, image.Point{}, draw.Src)
draw.Draw(dst, dst.Bounds(), img, img.Bounds().Min, draw.Over)
return dst
}
func ResizeImage(img image.Image, format string, outputSize image.Point, maxImageTiles int) (image.Image, image.Point) {
if format == "png" {
img = compositeImage(img)
}
b := img.Bounds()
tileSize := outputSize.Y
canvasSize := getOptimalTiledCanvas(b.Max, maxImageTiles, tileSize)
aspectRatio := image.Point{canvasSize.X / tileSize, canvasSize.Y / tileSize}
newSize := getImageSizeFitToCanvas(b.Max, canvasSize, tileSize)
dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
// scaling choices:
// NearestNeighbor fast, blocky output
// ApproxBiLinear fast, medium quality
// BiLinear slow, high quality
// CatmullRom very slow, very high quality
draw.BiLinear.Scale(dst, dst.Rect, img, b, draw.Over, nil)
return dst, aspectRatio
}
func PadImage(img image.Image, outputSize, aspectRatio image.Point) image.Image {
paddedSize := image.Point{
X: outputSize.X * aspectRatio.X,
Y: outputSize.Y * aspectRatio.Y,
}
dst := image.NewRGBA(image.Rect(0, 0, paddedSize.X, paddedSize.Y))
draw.Draw(dst, img.Bounds(), img, image.Point{0, 0}, draw.Over)
return dst
}
func PackImages(img image.Image, aspectRatio image.Point, mean, std [3]float32) []float32 {
subImages := splitToTiles(img, aspectRatio) subImages := splitToTiles(img, aspectRatio)
var pixelVals []float32 var pixelVals []float32
rescale := true
channelFirst := true
for _, subImg := range subImages { for _, subImg := range subImages {
bounds := subImg.Bounds() vals := imageproc.Normalize(subImg, imageproc.ClipDefaultMean, imageproc.ClipDefaultSTD, rescale, channelFirst)
var rVals, gVals, bVals []float32 pixelVals = append(pixelVals, vals...)
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
for x := bounds.Min.X; x < bounds.Max.X; x++ {
c := subImg.At(x, y)
r, g, b, _ := c.RGBA()
rVal := float32(r>>8) / 255.0
gVal := float32(g>>8) / 255.0
bVal := float32(b>>8) / 255.0
rVal = (rVal - mean[0]) / std[0]
gVal = (gVal - mean[1]) / std[1]
bVal = (bVal - mean[2]) / std[2]
rVals = append(rVals, rVal)
gVals = append(gVals, gVal)
bVals = append(bVals, bVal)
}
}
pixelVals = append(pixelVals, rVals...)
pixelVals = append(pixelVals, gVals...)
pixelVals = append(pixelVals, bVals...)
} }
return pixelVals return pixelVals
} }
func Preprocess(imageData []byte) ([]float32, int, error) { func Preprocess(imageData io.Reader) ([]float32, map[string]any, error) {
// todo: need guard in here for bad image data
// mllama values
outputSize := image.Point{560, 560} outputSize := image.Point{560, 560}
maxTiles := 4 maxTiles := 4
// clip values img, format, err := image.Decode(imageData)
mean := [3]float32{0.48145466, 0.4578275, 0.40821073}
std := [3]float32{0.26862954, 0.26130258, 0.27577711}
img, format, err := image.Decode(bytes.NewReader(imageData))
if err != nil { if err != nil {
return nil, 0, fmt.Errorf("failed to decode image: %w", err) return nil, nil, fmt.Errorf("failed to decode image: %w", err)
} }
newImage, aspectRatio := ResizeImage(img, format, outputSize, maxTiles) newImage, aspectRatio := resizeImage(img, format, outputSize, maxTiles)
newImage = PadImage(newImage, outputSize, aspectRatio) newImage = padImage(newImage, outputSize, aspectRatio)
data := PackImages(newImage, aspectRatio, mean, std) data := packImages(newImage, aspectRatio)
aspectRatioIndex := slices.Index(GetSupportedAspectRatios(maxTiles), aspectRatio) + 1 aspectRatioIndex := slices.Index(getSupportedAspectRatios(maxTiles), aspectRatio) + 1
return data, aspectRatioIndex, nil opts := map[string]any{
"aspectRatioIndex": aspectRatioIndex,
}
return data, opts, nil
} }

View File

@@ -1,4 +1,4 @@
package imageproc package mllama
import ( import (
"bytes" "bytes"
@@ -35,7 +35,7 @@ func TestAspectRatios(t *testing.T) {
} }
for _, c := range cases { for _, c := range cases {
actual := GetSupportedAspectRatios(c.MaxTiles) actual := getSupportedAspectRatios(c.MaxTiles)
if diff := cmp.Diff(actual, c.Expected); diff != "" { if diff := cmp.Diff(actual, c.Expected); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff) t.Errorf("mismatch (-got +want):\n%s", diff)
@@ -299,7 +299,7 @@ func TestResize(t *testing.T) {
} }
for _, c := range cases { for _, c := range cases {
actualImage, actualAspectRatio := ResizeImage(c.TestImage, "png", c.OutputSize, c.MaxImageTiles) actualImage, actualAspectRatio := resizeImage(c.TestImage, "png", c.OutputSize, c.MaxImageTiles)
if actualImage.Bounds() != c.ExpectedImage.Bounds() { if actualImage.Bounds() != c.ExpectedImage.Bounds() {
t.Errorf("image size incorrect: '%#v': expected: '%#v'", actualImage.Bounds(), c.ExpectedImage.Bounds()) t.Errorf("image size incorrect: '%#v': expected: '%#v'", actualImage.Bounds(), c.ExpectedImage.Bounds())
@@ -329,7 +329,7 @@ func TestPad(t *testing.T) {
} }
for _, c := range cases { for _, c := range cases {
actual := PadImage(c.TestImage, c.OutputSize, c.AspectRatio) actual := padImage(c.TestImage, c.OutputSize, c.AspectRatio)
if actual.Bounds() != c.Expected.Bounds() { if actual.Bounds() != c.Expected.Bounds() {
t.Errorf("image size incorrect: '%#v': expected: '%#v'", actual.Bounds(), c.Expected.Bounds()) t.Errorf("image size incorrect: '%#v': expected: '%#v'", actual.Bounds(), c.Expected.Bounds())
@@ -344,9 +344,6 @@ func TestPackImages(t *testing.T) {
ExpectedVals int ExpectedVals int
} }
mean := [3]float32{0.48145466, 0.4578275, 0.40821073}
std := [3]float32{0.26862954, 0.26130258, 0.27577711}
cases := []packCase{ cases := []packCase{
{ {
TestImage: image.NewRGBA(image.Rect(0, 0, 1120, 1120)), TestImage: image.NewRGBA(image.Rect(0, 0, 1120, 1120)),
@@ -366,7 +363,7 @@ func TestPackImages(t *testing.T) {
} }
for _, c := range cases { for _, c := range cases {
actualVals := PackImages(c.TestImage, c.AspectRatio, mean, std) actualVals := packImages(c.TestImage, c.AspectRatio)
if len(actualVals) != c.ExpectedVals { if len(actualVals) != c.ExpectedVals {
t.Errorf("packed image size incorrect: '%d': expected: '%d'", len(actualVals), c.ExpectedVals) t.Errorf("packed image size incorrect: '%d': expected: '%d'", len(actualVals), c.ExpectedVals)
} }
@@ -400,7 +397,7 @@ func TestPreprocess(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
imgData, aspectRatioID, err := Preprocess(buf.Bytes()) imgData, opts, err := Preprocess(&buf)
if err != nil { if err != nil {
t.Fatalf("error processing: %q", err) t.Fatalf("error processing: %q", err)
} }
@@ -409,6 +406,13 @@ func TestPreprocess(t *testing.T) {
t.Errorf("no image data returned") t.Errorf("no image data returned")
} }
ar, ok := opts["aspectRatioIndex"]
if !ok {
t.Fatalf("no aspect ratio found")
}
aspectRatioID := ar.(int)
if aspectRatioID != c.ExpectedAspectRatioID { if aspectRatioID != c.ExpectedAspectRatioID {
t.Errorf("aspect ratio incorrect: '%d': expected: '%d'", aspectRatioID, c.ExpectedAspectRatioID) t.Errorf("aspect ratio incorrect: '%d': expected: '%d'", aspectRatioID, c.ExpectedAspectRatioID)
} }

View File

@@ -0,0 +1,68 @@
package pixtral
import (
"fmt"
"image"
_ "image/jpeg"
_ "image/png"
"io"
"math"
"github.com/ollama/ollama/model/imageproc"
)
func getNumImageTokens(imageSize, patchSize image.Point) image.Point {
return image.Point{
(imageSize.X-1)/patchSize.X + 1,
(imageSize.Y-1)/patchSize.Y + 1,
}
}
func getResizeOutputImageSize(img image.Image, longestEdge int, patchSize image.Point) image.Point {
b := img.Bounds()
le := float64(longestEdge)
ratio := math.Max(float64(b.Max.Y)/le, float64(b.Max.X)/le)
newSize := img.Bounds().Max
if ratio > 1.0 {
newSize = image.Point{
int(math.Ceil(float64(b.Max.X) / ratio)),
int(math.Ceil(float64(b.Max.Y) / ratio)),
}
}
tokens := getNumImageTokens(newSize, patchSize)
return image.Point{
tokens.X * patchSize.X,
tokens.Y * patchSize.Y,
}
}
func resizeImage(img image.Image, format string, longestEdge int, patchSize image.Point) image.Image {
if format == "png" {
img = imageproc.Composite(img)
}
newSize := getResizeOutputImageSize(img, longestEdge, patchSize)
// todo should be ResizeBicubic, but it doesn't exist
return imageproc.Resize(img, newSize, imageproc.ResizeBilinear)
}
func Preprocess(imageData io.Reader) ([]float32, map[string]any, error) {
img, format, err := image.Decode(imageData)
if err != nil {
return nil, nil, fmt.Errorf("failed to decode image: %w", err)
}
longestEdge := 1024
patchSize := image.Point{16, 16}
img = resizeImage(img, format, longestEdge, patchSize)
data := imageproc.Normalize(img, imageproc.ClipDefaultMean, imageproc.ClipDefaultSTD, true, true)
opts := map[string]any{}
return data, opts, nil
}

View File

@@ -0,0 +1,219 @@
package pixtral
import (
"bytes"
"encoding/binary"
"image"
"image/png"
"math"
"os"
"testing"
"github.com/google/go-cmp/cmp"
)
func TestGetNumImageTokens(t *testing.T) {
type numImageTokensCase struct {
ImageSize image.Point
PatchSize image.Point
Expected image.Point
}
cases := []numImageTokensCase{
{
ImageSize: image.Point{1024, 764},
PatchSize: image.Point{16, 16},
Expected: image.Point{64, 48},
},
{
ImageSize: image.Point{800, 600},
PatchSize: image.Point{16, 16},
Expected: image.Point{50, 38},
},
{
ImageSize: image.Point{640, 480},
PatchSize: image.Point{16, 16},
Expected: image.Point{40, 30},
},
{
ImageSize: image.Point{320, 200},
PatchSize: image.Point{16, 16},
Expected: image.Point{20, 13},
},
{
ImageSize: image.Point{1320, 200},
PatchSize: image.Point{16, 16},
Expected: image.Point{83, 13},
},
{
ImageSize: image.Point{2000, 200},
PatchSize: image.Point{16, 16},
Expected: image.Point{125, 13},
},
{
ImageSize: image.Point{10000, 200},
PatchSize: image.Point{16, 16},
Expected: image.Point{625, 13},
},
{
ImageSize: image.Point{1131, 577},
PatchSize: image.Point{16, 16},
Expected: image.Point{71, 37},
},
{
ImageSize: image.Point{16, 16},
PatchSize: image.Point{16, 16},
Expected: image.Point{1, 1},
},
}
for _, c := range cases {
actual := getNumImageTokens(c.ImageSize, c.PatchSize)
if diff := cmp.Diff(actual, c.Expected); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
}
}
func TestGetResizeOutputImageSize(t *testing.T) {
type resizeCase struct {
Image image.Image
LongestEdge int
PatchSize image.Point
Expected image.Point
}
cases := []resizeCase{
{
Image: image.NewRGBA(image.Rect(0, 0, 1024, 768)),
LongestEdge: 1024,
PatchSize: image.Point{16, 16},
Expected: image.Point{1024, 768},
},
{
Image: image.NewRGBA(image.Rect(0, 0, 1162, 690)),
LongestEdge: 1024,
PatchSize: image.Point{16, 16},
Expected: image.Point{1024, 624},
},
{
Image: image.NewRGBA(image.Rect(0, 0, 300, 200)),
LongestEdge: 1024,
PatchSize: image.Point{16, 16},
Expected: image.Point{304, 208},
},
{
Image: image.NewRGBA(image.Rect(0, 0, 1862, 522)),
LongestEdge: 1024,
PatchSize: image.Point{16, 16},
Expected: image.Point{1024, 288},
},
}
for _, c := range cases {
actual := getResizeOutputImageSize(c.Image, c.LongestEdge, c.PatchSize)
if diff := cmp.Diff(actual, c.Expected); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
}
}
func TestResize(t *testing.T) {
type resizeCase struct {
Image image.Image
LongestEdge int
PatchSize image.Point
Expected image.Image
}
cases := []resizeCase{
{
Image: image.NewRGBA(image.Rect(0, 0, 1862, 522)),
LongestEdge: 1024,
PatchSize: image.Point{16, 16},
Expected: image.NewRGBA(image.Rect(0, 0, 1024, 288)),
},
{
Image: image.NewRGBA(image.Rect(0, 0, 10, 10)),
LongestEdge: 1024,
PatchSize: image.Point{16, 16},
Expected: image.NewRGBA(image.Rect(0, 0, 16, 16)),
},
}
for _, c := range cases {
actual := resizeImage(c.Image, "png", c.LongestEdge, c.PatchSize)
if actual.Bounds() != c.Expected.Bounds() {
t.Errorf("image size incorrect: '%#v': expected: '%#v'", actual.Bounds(), c.Expected.Bounds())
}
}
}
func TestPreprocess(t *testing.T) {
type preprocessCase struct {
TestImage image.Image
ExpectedLen int
}
cases := []preprocessCase{
{
TestImage: image.NewRGBA(image.Rect(0, 0, 10, 10)),
ExpectedLen: 16 * 16 * 3 * 1,
},
{
TestImage: image.NewRGBA(image.Rect(0, 0, 2000, 2000)),
ExpectedLen: 1024 * 1024 * 3 * 1,
},
}
for _, c := range cases {
var buf bytes.Buffer
err := png.Encode(&buf, c.TestImage)
if err != nil {
t.Fatal(err)
}
imgData, _, err := Preprocess(&buf)
if err != nil {
t.Fatalf("error processing: %q", err)
}
switch len(imgData) {
case 0:
t.Errorf("no image data returned")
case c.ExpectedLen:
// ok
default:
t.Errorf("unexpected image data length: %d, expected: %d", len(imgData), c.ExpectedLen)
}
}
}
func TestPreprocessImages(t *testing.T) {
for _, testFile := range []string{"flight.png", "sportsball.png"} {
f, err := os.Open(testFile)
if err != nil {
t.Skipf("skipping test, no test image found at %s", testFile)
}
defer f.Close()
imgData, _, err := Preprocess(f)
if err != nil {
t.Fatalf("error processing: %q", err)
}
byteData := make([]byte, len(imgData)*4) // float32 is 4 bytes
for i, f := range imgData {
binary.LittleEndian.PutUint32(byteData[i*4:], math.Float32bits(f))
}
outputPath := "processed_" + testFile + ".bin"
err = os.WriteFile(outputPath, byteData, 0o644)
if err != nil {
t.Fatalf("error writing processed image: %q", err)
}
}
}

View File

@@ -0,0 +1,74 @@
package qwen2vl
import (
"fmt"
"image"
_ "image/jpeg"
_ "image/png"
"io"
"math"
"github.com/ollama/ollama/model/imageproc"
)
const (
DefaultFactor = 28
DefaultMinPixels = 56 * 56
DefaultMaxPixels = 14 * 14 * 4 * 1280
)
// smartResize calculates the size of the image to resize to based on the
// factor, minPixels, and maxPixels.
func smartResize(size image.Point, factor, minPixels, maxPixels int) image.Point {
// 1. Both dimensions of size are divisible by factor
// 2. The area of the image is between minPixels and maxPixels
// 3. The aspect ratio of the image is as close to 1:1 as possible
if size.Y < factor || size.X < factor {
panic("image is too small to resize")
} else if max(size.X, size.Y)/min(size.X, size.Y) > 200 {
panic("aspect ratio must be less than 200:1")
}
f := float64(factor)
width := float64(size.X)
height := float64(size.Y)
xBar := math.Round(width/f) * f
yBar := math.Round(height/f) * f
if xBar*yBar > float64(maxPixels) {
beta := math.Sqrt(height * width / float64(maxPixels))
xBar = math.Floor(width/beta/f) * f
yBar = math.Floor(height/beta/f) * f
} else if xBar*yBar < float64(minPixels) {
beta := math.Sqrt(float64(minPixels) / (height * width))
xBar = math.Ceil(width*beta/f) * f
yBar = math.Ceil(height*beta/f) * f
}
return image.Point{int(xBar), int(yBar)}
}
func resizeImage(img image.Image, format string, size image.Point) image.Image {
if format == "png" {
img = imageproc.Composite(img)
}
return imageproc.Resize(img, size, imageproc.ResizeBilinear)
}
func Preprocess(imageData io.Reader) ([]float32, map[string]any, error) {
img, format, err := image.Decode(imageData)
if err != nil {
return nil, nil, fmt.Errorf("failed to decode image: %w", err)
}
size := smartResize(img.Bounds().Max, DefaultFactor, DefaultMinPixels, DefaultMaxPixels)
img = resizeImage(img, format, size)
data := imageproc.Normalize(img, imageproc.ClipDefaultMean, imageproc.ClipDefaultSTD, true, true)
opts := map[string]any{}
return data, opts, nil
}

View File

@@ -0,0 +1,78 @@
package qwen2vl
import (
"bytes"
"image"
"image/png"
"testing"
)
func TestSmartResize(t *testing.T) {
type smartResizeCase struct {
TestImage image.Image
Expected image.Point
}
cases := []smartResizeCase{
{
TestImage: image.NewRGBA(image.Rect(0, 0, 1024, 1024)),
Expected: image.Point{980, 980},
},
{
TestImage: image.NewRGBA(image.Rect(0, 0, 1024, 768)),
Expected: image.Point{1036, 756},
},
{
TestImage: image.NewRGBA(image.Rect(0, 0, 2000, 2000)),
Expected: image.Point{980, 980},
},
}
for _, c := range cases {
b := c.TestImage.Bounds().Max
actual := smartResize(b, DefaultFactor, DefaultMinPixels, DefaultMaxPixels)
if actual != c.Expected {
t.Errorf("expected: %v, actual: %v", c.Expected, actual)
}
}
}
func TestPreprocess(t *testing.T) {
type preprocessCase struct {
TestImage image.Image
ExpectedLen int
}
cases := []preprocessCase{
{
TestImage: image.NewRGBA(image.Rect(0, 0, 256, 256)),
ExpectedLen: 252 * 252 * 3 * 1,
},
{
TestImage: image.NewRGBA(image.Rect(0, 0, 2000, 2000)),
ExpectedLen: 980 * 980 * 3 * 1,
},
}
for _, c := range cases {
var buf bytes.Buffer
err := png.Encode(&buf, c.TestImage)
if err != nil {
t.Fatal(err)
}
imgData, _, err := Preprocess(&buf)
if err != nil {
t.Fatalf("error processing: %q", err)
}
switch len(imgData) {
case 0:
t.Errorf("no image data returned")
case c.ExpectedLen:
// ok
default:
t.Errorf("unexpected image data length: %d, expected: %d", len(imgData), c.ExpectedLen)
}
}
}

View File

@@ -72,6 +72,7 @@ func locateRunnersOnce() {
paths := []string{ paths := []string{
filepath.Join(filepath.Dir(exe), "llama", "build", runtime.GOOS+"-"+runtime.GOARCH, "runners"), filepath.Join(filepath.Dir(exe), "llama", "build", runtime.GOOS+"-"+runtime.GOARCH, "runners"),
filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama", "runners"), filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama", "runners"),
filepath.Join(filepath.Dir(exe), "lib", "ollama", "runners"),
} }
for _, path := range paths { for _, path := range paths {
if _, err := os.Stat(path); err == nil { if _, err := os.Stat(path); err == nil {

View File

@@ -18,10 +18,18 @@ rm -rf llama/build dist/darwin-*
echo "Building darwin arm64" echo "Building darwin arm64"
GOOS=darwin ARCH=arm64 GOARCH=arm64 make -j 8 dist GOOS=darwin ARCH=arm64 GOARCH=arm64 make -j 8 dist
echo "Building darwin amd64 with AVX enabled" echo "Building darwin amd64 with AVX enabled"
GOOS=darwin ARCH=amd64 GOARCH=amd64 CUSTOM_CPU_FLAGS="avx" make -j 8 dist GOOS=darwin ARCH=amd64 GOARCH=amd64 CUSTOM_CPU_FLAGS="avx" make -j 8 dist_exe
# Generate the universal ollama binary for stand-alone usage: metal + avx
lipo -create -output dist/ollama-darwin dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
echo "Building darwin amd64 with runners"
rm dist/darwin-amd64/bin/ollama
GOOS=darwin ARCH=amd64 GOARCH=amd64 make -j 8 dist
# Generate the universal ollama binary for the app bundle: metal + no-avx
lipo -create -output dist/ollama dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama lipo -create -output dist/ollama dist/darwin-arm64/bin/ollama dist/darwin-amd64/bin/ollama
if [ -n "$APPLE_IDENTITY" ]; then if [ -n "$APPLE_IDENTITY" ]; then
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
else else
@@ -48,5 +56,4 @@ ditto -c -k --keepParent dist/ollama dist/temp.zip
if [ -n "$APPLE_IDENTITY" ]; then if [ -n "$APPLE_IDENTITY" ]; then
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
fi fi
mv dist/ollama dist/ollama-darwin
rm -f dist/temp.zip rm -f dist/temp.zip

View File

@@ -11,7 +11,7 @@ import (
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/llm"
"github.com/ollama/ollama/server/imageproc" "github.com/ollama/ollama/model/mllama"
"github.com/ollama/ollama/template" "github.com/ollama/ollama/template"
) )
@@ -92,7 +92,7 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
var imgData llm.ImageData var imgData llm.ImageData
if isMllama { if isMllama {
data, aspectRatioID, err := imageproc.Preprocess(i) data, opts, err := mllama.Preprocess(bytes.NewReader(i))
if err != nil { if err != nil {
return "", nil, err return "", nil, err
} }
@@ -103,10 +103,15 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
return "", nil, err return "", nil, err
} }
ar, ok := opts["aspectRatioIndex"].(int)
if !ok {
return "", nil, fmt.Errorf("missing aspect ratio for image")
}
imgData = llm.ImageData{ imgData = llm.ImageData{
ID: len(images), ID: len(images),
Data: buf.Bytes(), Data: buf.Bytes(),
AspectRatioID: aspectRatioID, AspectRatioID: ar,
} }
imgPrompt = "<|image|>" imgPrompt = "<|image|>"
} else { } else {

View File

@@ -31,10 +31,10 @@ import (
"github.com/ollama/ollama/discover" "github.com/ollama/ollama/discover"
"github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/llm" "github.com/ollama/ollama/llm"
"github.com/ollama/ollama/model/mllama"
"github.com/ollama/ollama/openai" "github.com/ollama/ollama/openai"
"github.com/ollama/ollama/parser" "github.com/ollama/ollama/parser"
"github.com/ollama/ollama/runners" "github.com/ollama/ollama/runners"
"github.com/ollama/ollama/server/imageproc"
"github.com/ollama/ollama/template" "github.com/ollama/ollama/template"
"github.com/ollama/ollama/types/errtypes" "github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model" "github.com/ollama/ollama/types/model"
@@ -205,12 +205,18 @@ func (s *Server) GenerateHandler(c *gin.Context) {
images := make([]llm.ImageData, len(req.Images)) images := make([]llm.ImageData, len(req.Images))
for i := range req.Images { for i := range req.Images {
if isMllama { if isMllama {
data, aspectRatioID, err := imageproc.Preprocess(req.Images[i]) data, opts, err := mllama.Preprocess(bytes.NewReader(req.Images[i]))
if err != nil { if err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "error processing image"}) c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "error processing image"})
return return
} }
ar, ok := opts["aspectRatioIndex"].(int)
if !ok {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "error processing image"})
return
}
buf := new(bytes.Buffer) buf := new(bytes.Buffer)
err = binary.Write(buf, binary.LittleEndian, data) err = binary.Write(buf, binary.LittleEndian, data)
if err != nil { if err != nil {
@@ -218,7 +224,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
return return
} }
images[i] = llm.ImageData{ID: i, Data: buf.Bytes(), AspectRatioID: aspectRatioID} images[i] = llm.ImageData{ID: i, Data: buf.Bytes(), AspectRatioID: ar}
} else { } else {
images[i] = llm.ImageData{ID: i, Data: req.Images[i]} images[i] = llm.ImageData{ID: i, Data: req.Images[i]}
} }