gemma4: initialize clamps after backend load

This commit is contained in:
jmorganca
2026-04-02 01:12:05 -07:00
parent 5e622289c5
commit 6b013002fc
4 changed files with 17 additions and 5 deletions

View File

@@ -47,6 +47,12 @@ type Validator interface {
Validate() error
}
// PostLoader is an optional interface that models can implement to run
// initialization steps after backend weights have been loaded.
type PostLoader interface {
PostLoad() error
}
// MultimodalProcessor must be implemented by multimodal models.
type MultimodalProcessor interface {
// EncodeMultimodal processes a single input (such as an image) and

View File

@@ -131,9 +131,6 @@ func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) ([]input
return nil, model.ErrNoVisionModel
}
// Initialize clamp values from model tensors (lazy, once, after model is fully loaded)
m.VisionModel.InitClamp(m.MultiModalProjector)
t0 := time.Now()
img, _, err := image.Decode(bytes.NewReader(multimodalData))
if err != nil {
@@ -162,6 +159,11 @@ func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) ([]input
return []input.Multimodal{{Tensor: visionOutputs}}, nil
}
func (m *Model) PostLoad() error {
m.VisionModel.InitClamp(m.MultiModalProjector)
return nil
}
func (m *Model) encodeAudioMultimodal(ctx ml.Context, data []byte) ([]input.Multimodal, error) {
if m.AudioModel == nil || m.audioOpts == nil {
return nil, model.ErrNoVisionModel

View File

@@ -80,8 +80,6 @@ func (l *ClippableLinear) loadClampFromScalars() {
}
func (l *ClippableLinear) Forward(ctx ml.Context, x ml.Tensor) ml.Tensor {
l.loadClampFromScalars()
if l.hasClamp {
x = x.Clamp(ctx, l.inMin, l.inMax)
}

View File

@@ -1258,6 +1258,12 @@ func (s *Server) loadModel() {
panic(fmt.Errorf("failed to load model: %v", err))
}
if postLoader, ok := s.model.(model.PostLoader); ok {
if err := postLoader.PostLoad(); err != nil {
panic(fmt.Errorf("failed to finalize model initialization: %v", err))
}
}
s.status = llm.ServerStatusReady
s.ready.Done()
}