safetensors quantization for mlx (#14184)

This change includes:
  - changes to the safetensors metadata format
  - changes to the create command to properly create the blobs with the new format
  - changes to load the new format
  - fixes ollama show to properly show each tensor
This commit is contained in:
Patrick Devine
2026-02-10 11:29:17 -08:00
committed by GitHub
parent 9ec733e527
commit a0407d07fa
14 changed files with 1640 additions and 461 deletions

View File

@@ -5,11 +5,18 @@ package client
import (
"fmt"
"io"
"github.com/ollama/ollama/x/create"
)
// quantizeTensor is not available without MLX
func quantizeTensor(r io.Reader, name, dtype string, shape []int32, quantize string) (qweightData, scalesData, qbiasData []byte, qweightShape, scalesShape, qbiasShape []int32, err error) {
return nil, nil, nil, nil, nil, nil, fmt.Errorf("quantization requires MLX support (build with mlx tag)")
func quantizeTensor(r io.Reader, tensorName, dtype string, shape []int32, quantize string) (blobData []byte, err error) {
return nil, fmt.Errorf("quantization requires MLX support (build with mlx tag)")
}
// quantizePackedGroup is not available without MLX
func quantizePackedGroup(inputs []create.PackedTensorInput) ([]byte, error) {
return nil, fmt.Errorf("quantization requires MLX support (build with mlx tag)")
}
// QuantizeSupported returns false when MLX is not available