From d11fbd2c603aad64535c5cecd6ee68a02d01aa0c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 27 Jan 2026 16:27:55 -0800 Subject: [PATCH] server: fix ollama ps showing configured instead of actual context length When context length is clamped to the model's trained context length, ollama ps now shows the actual clamped value instead of the originally configured value. --- llm/server.go | 5 +++++ server/routes.go | 4 ++-- server/sched_test.go | 1 + x/imagegen/server.go | 5 +++++ 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/llm/server.go b/llm/server.go index 8fedc8468..c09f52c6d 100644 --- a/llm/server.go +++ b/llm/server.go @@ -80,6 +80,7 @@ type LlamaServer interface { GetPort() int GetDeviceInfos(ctx context.Context) []ml.DeviceInfo HasExited() bool + ContextLength() int } // llmServer is an instance of a runner hosting a single model @@ -1901,6 +1902,10 @@ func (s *llmServer) VRAMByGPU(id ml.DeviceID) uint64 { return 0 } +func (s *llmServer) ContextLength() int { + return s.options.NumCtx +} + func (s *ollamaServer) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo { devices, err := ml.GetDevicesFromRunner(ctx, s) if err != nil { diff --git a/server/routes.go b/server/routes.go index d6c1cbe16..e28eb7798 100644 --- a/server/routes.go +++ b/server/routes.go @@ -1897,8 +1897,8 @@ func (s *Server) PsHandler(c *gin.Context) { Details: modelDetails, ExpiresAt: v.expiresAt, } - if v.Options != nil { - mr.ContextLength = v.Options.NumCtx + if v.llama != nil { + mr.ContextLength = v.llama.ContextLength() } // The scheduler waits to set expiresAt, so if a model is loading it's // possible that it will be set to the unix epoch. For those cases, just diff --git a/server/sched_test.go b/server/sched_test.go index ebf9d7695..7eaf4a9f9 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -804,6 +804,7 @@ func (s *mockLlm) GetPort() int { return - func (s *mockLlm) GetDeviceInfos(ctx context.Context) []ml.DeviceInfo { return nil } func (s *mockLlm) HasExited() bool { return false } func (s *mockLlm) GetActiveDeviceIDs() []ml.DeviceID { return nil } +func (s *mockLlm) ContextLength() int { return 0 } // TestImageGenRunnerCanBeEvicted verifies that an image generation model // loaded in the scheduler can be evicted when idle. diff --git a/x/imagegen/server.go b/x/imagegen/server.go index ae13f5ad7..ca9367694 100644 --- a/x/imagegen/server.go +++ b/x/imagegen/server.go @@ -347,6 +347,11 @@ func (s *Server) VRAMByGPU(id ml.DeviceID) uint64 { return s.vramSize } +// Context length is not applicable for image generation. +func (s *Server) ContextLength() int { + return 0 +} + func (s *Server) Embedding(ctx context.Context, input string) ([]float32, int, error) { return nil, 0, errors.New("not supported") }