mirror of
https://github.com/ollama/ollama.git
synced 2026-04-25 02:06:11 +02:00
the updated interface supports variadic attention options which removes the need for individual `AttentionWith...` functions. it means more models can use the attention interface, e.g. models with custom masks, logit softcapping, etc. additionally, this interface should be less error prone since there are now reasonable defaults for all optional parameters
182 lines
5.1 KiB
Go
182 lines
5.1 KiB
Go
package bert
|
|
|
|
import (
|
|
"cmp"
|
|
|
|
"github.com/ollama/ollama/fs"
|
|
"github.com/ollama/ollama/ml"
|
|
"github.com/ollama/ollama/ml/nn"
|
|
"github.com/ollama/ollama/ml/nn/pooling"
|
|
"github.com/ollama/ollama/model"
|
|
"github.com/ollama/ollama/model/input"
|
|
)
|
|
|
|
type Model struct {
|
|
model.Base
|
|
model.TextProcessor
|
|
|
|
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
|
|
TypeEmbedding *nn.Embedding `gguf:"token_types"`
|
|
PositionEmbedding *nn.Embedding `gguf:"position_embd"`
|
|
TokenEmbeddingNorm *nn.LayerNorm `gguf:"token_embd_norm"`
|
|
|
|
Layers []EncoderLayer `gguf:"blk"`
|
|
|
|
Options
|
|
}
|
|
|
|
// Forward implements model.Model.
|
|
func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
|
|
hiddenStates := m.TokenEmbedding.Forward(ctx, batch.Inputs)
|
|
hiddenStates = hiddenStates.Add(ctx, m.TypeEmbedding.Weight.Slice(ctx, 1, 0, 1, 1))
|
|
hiddenStates = hiddenStates.Add(ctx, m.PositionEmbedding.Forward(ctx, ctx.Input().FromInts(batch.Positions, len(batch.Positions))))
|
|
hiddenStates = m.TokenEmbeddingNorm.Forward(ctx, hiddenStates, m.eps)
|
|
|
|
for _, layer := range m.Layers {
|
|
hiddenStates = layer.Forward(ctx, hiddenStates, &m.Options)
|
|
}
|
|
|
|
hiddenStates = m.poolingType.Forward(ctx, hiddenStates)
|
|
if m.normalize {
|
|
hiddenStates = hiddenStates.L2Norm(ctx, 1e-12)
|
|
}
|
|
|
|
return hiddenStates, nil
|
|
}
|
|
|
|
type EncoderLayer struct {
|
|
*Attention
|
|
AttentionNorm *nn.LayerNorm `gguf:"attn_output_norm"`
|
|
|
|
*MLP
|
|
MLPNorm *nn.LayerNorm `gguf:"layer_output_norm"`
|
|
}
|
|
|
|
func (e *EncoderLayer) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *Options) ml.Tensor {
|
|
// Attention
|
|
residual := hiddenStates
|
|
hiddenStates = e.Attention.Forward(ctx, hiddenStates, opts)
|
|
hiddenStates = hiddenStates.Add(ctx, residual)
|
|
hiddenStates = e.AttentionNorm.Forward(ctx, hiddenStates, opts.eps)
|
|
|
|
// MLP
|
|
residual = hiddenStates
|
|
hiddenStates = e.MLP.Forward(ctx, hiddenStates, opts)
|
|
hiddenStates = hiddenStates.Add(ctx, residual)
|
|
hiddenStates = e.MLPNorm.Forward(ctx, hiddenStates, opts.eps)
|
|
|
|
return hiddenStates
|
|
}
|
|
|
|
type Attention struct {
|
|
Query *nn.Linear `gguf:"attn_q"`
|
|
QueryNorm *nn.LayerNorm `gguf:"attn_q_norm"`
|
|
|
|
Key *nn.Linear `gguf:"attn_k"`
|
|
KeyNorm *nn.LayerNorm `gguf:"attn_k_norm"`
|
|
|
|
Value *nn.Linear `gguf:"attn_v"`
|
|
|
|
Output *nn.Linear `gguf:"attn_output"`
|
|
}
|
|
|
|
func (a *Attention) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *Options) ml.Tensor {
|
|
batchSize := hiddenStates.Dim(1)
|
|
|
|
query := a.Query.Forward(ctx, hiddenStates)
|
|
if a.QueryNorm != nil {
|
|
query = a.QueryNorm.Forward(ctx, query, opts.eps)
|
|
}
|
|
query = query.Reshape(ctx, opts.headDim(), opts.numHeads, batchSize)
|
|
|
|
key := a.Key.Forward(ctx, hiddenStates)
|
|
if a.KeyNorm != nil {
|
|
key = a.KeyNorm.Forward(ctx, key, opts.eps)
|
|
}
|
|
key = key.Reshape(ctx, opts.headDim(), cmp.Or(opts.numKVHeads, opts.numHeads), batchSize)
|
|
|
|
value := a.Value.Forward(ctx, hiddenStates)
|
|
value = value.Reshape(ctx, opts.headDim(), cmp.Or(opts.numKVHeads, opts.numHeads), batchSize)
|
|
|
|
attention := nn.Attention(ctx, query, key, value, nil)
|
|
attention = attention.Reshape(ctx, opts.hiddenSize, batchSize)
|
|
return a.Output.Forward(ctx, attention)
|
|
}
|
|
|
|
type MLP struct {
|
|
Up *nn.Linear `gguf:"ffn_up"`
|
|
Down *nn.Linear `gguf:"ffn_down"`
|
|
}
|
|
|
|
func (m *MLP) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *Options) ml.Tensor {
|
|
return m.Down.Forward(ctx, m.Up.Forward(ctx, hiddenStates).GELU(ctx))
|
|
}
|
|
|
|
type Options struct {
|
|
hiddenSize,
|
|
numHeads,
|
|
numKVHeads,
|
|
keyLength,
|
|
valueLength int
|
|
poolingType pooling.Type
|
|
eps float32
|
|
normalize bool
|
|
}
|
|
|
|
func (o Options) headDim() int {
|
|
return cmp.Or(o.keyLength, o.valueLength, o.hiddenSize/o.numHeads)
|
|
}
|
|
|
|
func New(c fs.Config) (model.Model, error) {
|
|
var processor model.TextProcessor
|
|
switch c.String("tokenizer.ggml.model", "bert") {
|
|
case "bert":
|
|
processor = model.NewWordPiece(
|
|
&model.Vocabulary{
|
|
Values: c.Strings("tokenizer.ggml.tokens"),
|
|
Scores: c.Floats("tokenizer.ggml.scores"),
|
|
Types: c.Ints("tokenizer.ggml.token_type"),
|
|
AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
|
|
BOS: []int32{
|
|
int32(cmp.Or(
|
|
c.Uint("tokenizer.ggml.cls_token_id"),
|
|
c.Uint("tokenizer.ggml.bos_token_id"),
|
|
)),
|
|
},
|
|
AddEOS: c.Bool("tokenizer.ggml.add_eos_token", true),
|
|
EOS: []int32{
|
|
int32(cmp.Or(
|
|
c.Uint("tokenizer.ggml.separator_token_id"),
|
|
//nolint:misspell
|
|
// NOTE: "seperator_token_id" is a typo in model metadata but we need to
|
|
// support it for compatibility.
|
|
c.Uint("tokenizer.ggml.seperator_token_id"),
|
|
c.Uint("tokenizer.ggml.eos_token_id"),
|
|
)),
|
|
},
|
|
},
|
|
true,
|
|
)
|
|
default:
|
|
return nil, model.ErrUnsupportedTokenizer
|
|
}
|
|
|
|
return &Model{
|
|
TextProcessor: processor,
|
|
Layers: make([]EncoderLayer, c.Uint("block_count")),
|
|
Options: Options{
|
|
hiddenSize: int(c.Uint("embedding_length")),
|
|
numHeads: int(c.Uint("attention.head_count")),
|
|
numKVHeads: int(c.Uint("attention.head_count_kv")),
|
|
eps: c.Float("attention.layer_norm_epsilon"),
|
|
poolingType: pooling.Type(c.Uint("pooling_type")),
|
|
normalize: c.Bool("normalize_embeddings", true),
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
func init() {
|
|
model.Register("bert", New)
|
|
model.Register("bert_embed", New)
|
|
}
|