server: fix ollama ps showing configured instead of actual context length

When context length is clamped to the model's trained context length, ollama ps now shows the actual clamped value instead of the originally configured value.
2026-04-17 21:54:08 +02:00 · 2026-01-27 16:27:55 -08:00
parent 6a7c3f188e
commit d11fbd2c60
4 changed files with 13 additions and 2 deletions
--- a/llm/server.go
+++ b/llm/server.go
@@ -80,6 +80,7 @@ type LlamaServer interface {
 	GetPort() int
 	GetDeviceInfos(ctx context.Context) []ml.DeviceInfo
 	HasExited() bool
+	ContextLength() int
 }

 // llmServer is an instance of a runner hosting a single model
@@ -1901,6 +1902,10 @@ func (s *llmServer) VRAMByGPU(id ml.DeviceID) uint64 {
 	return 0
 }

+func (s *llmServer) ContextLength() int {
+	return s.options.NumCtx
+}
+
 func (s *ollamaServer) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo {
 	devices, err := ml.GetDevicesFromRunner(ctx, s)
 	if err != nil {