ollama-ollama/x/safetensors/extractor.go

package safetensors

import (
	"bytes"
	"encoding/binary"
	"encoding/json"
	"fmt"
	"io"
	"os"
	"sort"
)

// tensorInfo holds tensor metadata from safetensors headers.
type tensorInfo struct {
	Dtype       string  `json:"dtype"`
	Shape       []int32 `json:"shape"`
	DataOffsets [2]int  `json:"data_offsets"`
}

// TensorExtractor extracts individual tensors from a safetensors file.
// It provides io.Reader interfaces for each tensor's raw data, enabling
// streaming writes to blobs without loading entire tensors into memory.
type TensorExtractor struct {
	file       *os.File
	dataOffset int64 // Start of tensor data region
	header     map[string]tensorInfo
}

// TensorData holds tensor metadata and a reader for its raw bytes.
type TensorData struct {
	Name   string
	Dtype  string
	Shape  []int32
	Size   int64
	reader *io.SectionReader
}

// WithName returns a shallow copy of TensorData with a different logical tensor
// name but the same underlying raw data reader.
func (td *TensorData) WithName(name string) *TensorData {
	if td == nil {
		return nil
	}
	shape := make([]int32, len(td.Shape))
	copy(shape, td.Shape)
	return &TensorData{
		Name:   name,
		Dtype:  td.Dtype,
		Shape:  shape,
		Size:   td.Size,
		reader: td.reader,
	}
}

// Reader returns an io.Reader for the tensor's raw bytes.
func (td *TensorData) Reader() io.Reader {
	return td.reader
}

// safetensorsHeader builds the JSON header for a minimal safetensors blob
// containing a single tensor keyed by its name.
func (td *TensorData) safetensorsHeader() []byte {
	header := map[string]any{
		td.Name: tensorInfo{
			Dtype:       td.Dtype,
			Shape:       td.Shape,
			DataOffsets: [2]int{0, int(td.Size)},
		},
	}
	headerJSON, _ := json.Marshal(header)

	// Pad header to 8-byte alignment
	padding := (8 - len(headerJSON)%8) % 8
	headerJSON = append(headerJSON, bytes.Repeat([]byte(" "), padding)...)
	return headerJSON
}

// SafetensorsReader returns a reader that outputs the tensor wrapped in
// minimal safetensors format. This allows using mlx_load_safetensors on
// individual tensor blobs for native zero-copy loading.
// The tensor is keyed by its name in the safetensors header.
func (td *TensorData) SafetensorsReader() io.Reader {
	headerJSON := td.safetensorsHeader()

	// Build header with size prefix
	headerBuf := new(bytes.Buffer)
	binary.Write(headerBuf, binary.LittleEndian, uint64(len(headerJSON)))
	headerBuf.Write(headerJSON)

	// Return multi-reader: header + tensor data
	td.reader.Seek(0, io.SeekStart)
	return io.MultiReader(headerBuf, td.reader)
}

// SafetensorsSize returns the total size of the safetensors-wrapped tensor.
func (td *TensorData) SafetensorsSize() int64 {
	headerJSON := td.safetensorsHeader()
	return 8 + int64(len(headerJSON)) + td.Size
}

// NewTensorDataFromBytes creates a TensorData from raw tensor bytes.
// This is useful for constructing packed blobs from already-extracted data.
func NewTensorDataFromBytes(name, dtype string, shape []int32, rawData []byte) *TensorData {
	return &TensorData{
		Name:   name,
		Dtype:  dtype,
		Shape:  shape,
		Size:   int64(len(rawData)),
		reader: io.NewSectionReader(bytes.NewReader(rawData), 0, int64(len(rawData))),
	}
}

// ExtractRawFromSafetensors reads a safetensors-wrapped reader and extracts
// the raw tensor data bytes (stripping the header).
func ExtractRawFromSafetensors(r io.Reader) ([]byte, error) {
	// Read header size (8 bytes, little endian)
	var headerSize uint64
	if err := binary.Read(r, binary.LittleEndian, &headerSize); err != nil {
		return nil, fmt.Errorf("failed to read header size: %w", err)
	}

	// Skip header
	if _, err := io.CopyN(io.Discard, r, int64(headerSize)); err != nil {
		return nil, fmt.Errorf("failed to skip header: %w", err)
	}

	// Read remaining bytes (the raw tensor data)
	return io.ReadAll(r)
}

// BuildPackedSafetensorsReader builds a streaming io.Reader that outputs a valid
// safetensors file containing multiple tensors. Used for packing expert tensors
// into a single blob without loading all data into memory.
// Each TensorData must have been obtained from GetTensor.
func BuildPackedSafetensorsReader(tensors []*TensorData) io.Reader {
	return BuildPackedSafetensorsReaderWithMetadata(tensors, nil)
}

// BuildPackedSafetensorsReaderWithMetadata builds a streaming io.Reader that
// outputs a valid safetensors file containing multiple tensors and optional
// metadata.
func BuildPackedSafetensorsReaderWithMetadata(tensors []*TensorData, metadata map[string]string) io.Reader {
	// Build the header with sequential data offsets
	header := make(map[string]any, len(tensors)+1)
	var offset int
	for _, td := range tensors {
		header[td.Name] = tensorInfo{
			Dtype:       td.Dtype,
			Shape:       td.Shape,
			DataOffsets: [2]int{offset, offset + int(td.Size)},
		}
		offset += int(td.Size)
	}
	if len(metadata) > 0 {
		header["__metadata__"] = metadata
	}

	headerJSON, _ := json.Marshal(header)

	// Pad header to 8-byte alignment
	padding := (8 - len(headerJSON)%8) % 8
	headerJSON = append(headerJSON, bytes.Repeat([]byte(" "), padding)...)

	// Build header with size prefix
	headerBuf := new(bytes.Buffer)
	binary.Write(headerBuf, binary.LittleEndian, uint64(len(headerJSON)))
	headerBuf.Write(headerJSON)

	// Build multi-reader: header + all tensor data readers
	readers := make([]io.Reader, 0, 1+len(tensors))
	readers = append(readers, headerBuf)
	for _, td := range tensors {
		td.reader.Seek(0, io.SeekStart)
		readers = append(readers, td.reader)
	}

	return io.MultiReader(readers...)
}

// OpenForExtraction opens a safetensors file for tensor extraction.
// The caller must call Close() when done.
func OpenForExtraction(path string) (*TensorExtractor, error) {
	f, err := os.Open(path)
	if err != nil {
		return nil, fmt.Errorf("failed to open file: %w", err)
	}

	var headerSize uint64
	if err := binary.Read(f, binary.LittleEndian, &headerSize); err != nil {
		f.Close()
		return nil, fmt.Errorf("failed to read header size: %w", err)
	}

	headerBytes := make([]byte, headerSize)
	if _, err := f.Read(headerBytes); err != nil {
		f.Close()
		return nil, fmt.Errorf("failed to read header: %w", err)
	}

	var header map[string]tensorInfo
	if err := json.Unmarshal(headerBytes, &header); err != nil {
		f.Close()
		return nil, fmt.Errorf("failed to parse header: %w", err)
	}

	delete(header, "__metadata__")

	return &TensorExtractor{
		file:       f,
		dataOffset: 8 + int64(headerSize), // 8 bytes for header size + header content
		header:     header,
	}, nil
}

// GetTensor returns tensor metadata and a reader for extracting a single tensor.
func (te *TensorExtractor) GetTensor(name string) (*TensorData, error) {
	info, ok := te.header[name]
	if !ok {
		return nil, fmt.Errorf("tensor %q not found", name)
	}

	start := te.dataOffset + int64(info.DataOffsets[0])
	size := int64(info.DataOffsets[1] - info.DataOffsets[0])

	return &TensorData{
		Name:   name,
		Dtype:  info.Dtype,
		Shape:  info.Shape,
		Size:   size,
		reader: io.NewSectionReader(te.file, start, size),
	}, nil
}

// ListTensors returns all tensor names in sorted order.
func (te *TensorExtractor) ListTensors() []string {
	names := make([]string, 0, len(te.header))
	for name := range te.header {
		names = append(names, name)
	}
	sort.Strings(names)
	return names
}

// TensorCount returns the number of tensors in the file.
func (te *TensorExtractor) TensorCount() int {
	return len(te.header)
}

// Close closes the underlying file.
func (te *TensorExtractor) Close() error {
	return te.file.Close()
}

// ExtractAll returns TensorData for all tensors in the file.
// Each TensorData has a reader that reads from the original file.
// The caller must call Close() on the TensorExtractor when done.
func (te *TensorExtractor) ExtractAll() ([]*TensorData, error) {
	names := te.ListTensors()
	tensors := make([]*TensorData, 0, len(names))

	for _, name := range names {
		td, err := te.GetTensor(name)
		if err != nil {
			return nil, err
		}
		tensors = append(tensors, td)
	}

	return tensors, nil
}