server: fix ollama ps showing configured instead of actual context length

When context length is clamped to the model's trained context length,
ollama ps now shows the actual clamped value instead of the originally
configured value.
This commit is contained in:
Jesse Gross
2026-01-27 16:27:55 -08:00
parent 6a7c3f188e
commit d11fbd2c60
4 changed files with 13 additions and 2 deletions

View File

@@ -80,6 +80,7 @@ type LlamaServer interface {
GetPort() int
GetDeviceInfos(ctx context.Context) []ml.DeviceInfo
HasExited() bool
ContextLength() int
}
// llmServer is an instance of a runner hosting a single model
@@ -1901,6 +1902,10 @@ func (s *llmServer) VRAMByGPU(id ml.DeviceID) uint64 {
return 0
}
func (s *llmServer) ContextLength() int {
return s.options.NumCtx
}
func (s *ollamaServer) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo {
devices, err := ml.GetDevicesFromRunner(ctx, s)
if err != nil {