safetensors quantization for mlx (#14184)

This change includes: - changes to the safetensors metadata format - changes to the create command to properly create the blobs with the new format - changes to load the new format - fixes ollama show to properly show each tensor
2026-04-21 00:05:40 +02:00 · 2026-02-10 11:29:17 -08:00
parent 9ec733e527
commit a0407d07fa
14 changed files with 1640 additions and 461 deletions
--- a/x/create/client/quantize_stub.go
+++ b/x/create/client/quantize_stub.go
@@ -5,11 +5,18 @@ package client
 import (
 	"fmt"
 	"io"
+
+	"github.com/ollama/ollama/x/create"
 )

 // quantizeTensor is not available without MLX
-func quantizeTensor(r io.Reader, name, dtype string, shape []int32, quantize string) (qweightData, scalesData, qbiasData []byte, qweightShape, scalesShape, qbiasShape []int32, err error) {
-	return nil, nil, nil, nil, nil, nil, fmt.Errorf("quantization requires MLX support (build with mlx tag)")
+func quantizeTensor(r io.Reader, tensorName, dtype string, shape []int32, quantize string) (blobData []byte, err error) {
+	return nil, fmt.Errorf("quantization requires MLX support (build with mlx tag)")
+}
+
+// quantizePackedGroup is not available without MLX
+func quantizePackedGroup(inputs []create.PackedTensorInput) ([]byte, error) {
+	return nil, fmt.Errorf("quantization requires MLX support (build with mlx tag)")
 }

 // QuantizeSupported returns false when MLX is not available