diff --git a/llm/server.go b/llm/server.go index 8fedc8468..c09f52c6d 100644 --- a/llm/server.go +++ b/llm/server.go @@ -80,6 +80,7 @@ type LlamaServer interface { GetPort() int GetDeviceInfos(ctx context.Context) []ml.DeviceInfo HasExited() bool + ContextLength() int } // llmServer is an instance of a runner hosting a single model @@ -1901,6 +1902,10 @@ func (s *llmServer) VRAMByGPU(id ml.DeviceID) uint64 { return 0 } +func (s *llmServer) ContextLength() int { + return s.options.NumCtx +} + func (s *ollamaServer) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo { devices, err := ml.GetDevicesFromRunner(ctx, s) if err != nil { diff --git a/server/routes.go b/server/routes.go index d6c1cbe16..e28eb7798 100644 --- a/server/routes.go +++ b/server/routes.go @@ -1897,8 +1897,8 @@ func (s *Server) PsHandler(c *gin.Context) { Details: modelDetails, ExpiresAt: v.expiresAt, } - if v.Options != nil { - mr.ContextLength = v.Options.NumCtx + if v.llama != nil { + mr.ContextLength = v.llama.ContextLength() } // The scheduler waits to set expiresAt, so if a model is loading it's // possible that it will be set to the unix epoch. For those cases, just diff --git a/server/sched_test.go b/server/sched_test.go index ebf9d7695..7eaf4a9f9 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -804,6 +804,7 @@ func (s *mockLlm) GetPort() int { return - func (s *mockLlm) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo { return nil } func (s *mockLlm) HasExited() bool { return false } func (s *mockLlm) GetActiveDeviceIDs() []ml.DeviceID { return nil } +func (s *mockLlm) ContextLength() int { return 0 } // TestImageGenRunnerCanBeEvicted verifies that an image generation model // loaded in the scheduler can be evicted when idle. diff --git a/x/imagegen/server.go b/x/imagegen/server.go index ae13f5ad7..ca9367694 100644 --- a/x/imagegen/server.go +++ b/x/imagegen/server.go @@ -347,6 +347,11 @@ func (s *Server) VRAMByGPU(id ml.DeviceID) uint64 { return s.vramSize } +// Context length is not applicable for image generation. +func (s *Server) ContextLength() int { + return 0 +} + func (s *Server) Embedding(ctx context.Context, input string) ([]float32, int, error) { return nil, 0, errors.New("not supported") }