mirror of
https://github.com/ollama/ollama.git
synced 2026-04-18 21:54:18 +02:00
Compare commits
1 Commits
pdevine/qw
...
jmorganca/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
330b19b73f |
@@ -21,33 +21,76 @@ type quantizer struct {
|
|||||||
progressFn func(n uint64)
|
progressFn func(n uint64)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const quantizationChunkElements uint64 = 4 * 1024 * 1024
|
||||||
|
|
||||||
func (q quantizer) WriteTo(w io.Writer) (int64, error) {
|
func (q quantizer) WriteTo(w io.Writer) (int64, error) {
|
||||||
quantize := q.from.Kind != q.to.Kind
|
quantize := q.from.Kind != q.to.Kind
|
||||||
sr := io.NewSectionReader(q, int64(q.offset), int64(q.from.Size()))
|
sr := io.NewSectionReader(q, int64(q.offset), int64(q.from.Size()))
|
||||||
if !quantize {
|
if !quantize {
|
||||||
n, err := io.Copy(w, sr)
|
n, err := io.Copy(w, sr)
|
||||||
q.progressFn(q.from.Size())
|
if q.progressFn != nil {
|
||||||
|
q.progressFn(q.from.Size())
|
||||||
|
}
|
||||||
return n, err
|
return n, err
|
||||||
}
|
}
|
||||||
data, err := io.ReadAll(sr)
|
|
||||||
if err != nil {
|
if len(q.from.Shape) == 0 || q.from.Shape[0] == 0 {
|
||||||
slog.Warn("file read error", "tensor", q.from.Name, "file", q.Name(), "error", err)
|
return 0, fmt.Errorf("tensor %s has invalid shape %v", q.from.Name, q.from.Shape)
|
||||||
return 0, fmt.Errorf("unable to read tensor %s from %s: %s", q.from.Name, q.Name(), err)
|
|
||||||
}
|
}
|
||||||
if uint64(len(data)) < q.from.Size() {
|
|
||||||
return 0, fmt.Errorf("tensor %s data size %d is less than expected %d from shape %v", q.from.Name, len(data), q.from.Size(), q.from.Shape)
|
fromType := fsggml.TensorType(q.from.Kind)
|
||||||
|
toType := fsggml.TensorType(q.to.Kind)
|
||||||
|
nPerRow := q.from.Shape[0]
|
||||||
|
totalElements := q.from.Elements()
|
||||||
|
if totalElements%nPerRow != 0 {
|
||||||
|
return 0, fmt.Errorf("tensor %s has non-row-aligned shape %v", q.from.Name, q.from.Shape)
|
||||||
}
|
}
|
||||||
var f32s []float32
|
|
||||||
newType := fsggml.TensorType(q.to.Kind)
|
inRowSize := fromType.RowSize(nPerRow)
|
||||||
if fsggml.TensorType(q.from.Kind) == fsggml.TensorTypeF32 {
|
if inRowSize == 0 {
|
||||||
f32s = unsafe.Slice((*float32)(unsafe.Pointer(&data[0])), q.from.Elements())
|
return 0, fmt.Errorf("tensor %s has unsupported source type %v", q.from.Name, fromType)
|
||||||
} else {
|
|
||||||
f32s = ggml.ConvertToF32(data, q.from.Kind, q.from.Elements())
|
|
||||||
}
|
}
|
||||||
data = ggml.Quantize(newType, f32s, q.from.Shape)
|
|
||||||
n, err := w.Write(data)
|
totalRows := totalElements / nPerRow
|
||||||
q.progressFn(q.from.Size())
|
rowsPerChunk := max(quantizationChunkElements/nPerRow, uint64(1))
|
||||||
return int64(n), err
|
chunkBuf := make([]byte, inRowSize*rowsPerChunk)
|
||||||
|
var written int64
|
||||||
|
|
||||||
|
for row := uint64(0); row < totalRows; {
|
||||||
|
chunkRows := min(rowsPerChunk, totalRows-row)
|
||||||
|
chunkBytes := inRowSize * chunkRows
|
||||||
|
data := chunkBuf[:chunkBytes]
|
||||||
|
|
||||||
|
if _, err := io.ReadFull(sr, data); err != nil {
|
||||||
|
slog.Warn("file read error", "tensor", q.from.Name, "file", q.Name(), "error", err)
|
||||||
|
return written, fmt.Errorf("unable to read tensor %s from %s: %w", q.from.Name, q.Name(), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var f32s []float32
|
||||||
|
chunkElements := chunkRows * nPerRow
|
||||||
|
if fromType == fsggml.TensorTypeF32 {
|
||||||
|
f32s = unsafe.Slice((*float32)(unsafe.Pointer(&data[0])), chunkElements)
|
||||||
|
} else {
|
||||||
|
f32s = ggml.ConvertToF32(data, q.from.Kind, chunkElements)
|
||||||
|
}
|
||||||
|
|
||||||
|
quantized := ggml.Quantize(toType, f32s, []uint64{nPerRow, chunkRows})
|
||||||
|
n, err := w.Write(quantized)
|
||||||
|
written += int64(n)
|
||||||
|
if err != nil {
|
||||||
|
return written, err
|
||||||
|
}
|
||||||
|
if n != len(quantized) {
|
||||||
|
return written, io.ErrShortWrite
|
||||||
|
}
|
||||||
|
|
||||||
|
if q.progressFn != nil {
|
||||||
|
q.progressFn(chunkBytes)
|
||||||
|
}
|
||||||
|
row += chunkRows
|
||||||
|
}
|
||||||
|
|
||||||
|
return written, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type quantizeState struct {
|
type quantizeState struct {
|
||||||
|
|||||||
Reference in New Issue
Block a user