mirror of
https://github.com/ollama/ollama.git
synced 2026-04-25 18:25:42 +02:00
mlxrunner: Cache.Update takes ForwardBatch and returns KVHistory
Signature changes from Update(k, v) to Update(batch, k, v) returning (k, v, KVHistory). KVCache returns a real page table mapping positions to buffer slots. RecurrentCache returns empty KVHistory from Update. Replace Cache.Offset() with Offsets() returning per-sequence offsets. Add KVHistory type to mlx package.
This commit is contained in:
13
x/mlxrunner/mlx/sdpa.go
Normal file
13
x/mlxrunner/mlx/sdpa.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package mlx
|
||||
|
||||
// KVHistory carries sequence metadata alongside K/V buffers for SDPA.
|
||||
// Page table and seq lens travel together — SDPA always needs both.
|
||||
type KVHistory struct {
|
||||
// PageTable maps (seqIdx, position) → slot index in the K/V buffer.
|
||||
// Shape: [numSeqs, maxSeqLen], int32. Unused entries are 0.
|
||||
PageTable *Array
|
||||
|
||||
// SeqLens is the history length per sequence (number of valid
|
||||
// entries in each row of PageTable).
|
||||
SeqLens []int
|
||||
}
|
||||
Reference in New Issue
Block a user