server: emit load and total durations for a load

This commit is contained in:
Richard Lyons 2025-02-21 18:41:34 +01:00
parent 5c5535c064
commit 6b6746b5b4

View file

@ -191,6 +191,10 @@ func (s *Server) GenerateHandler(c *gin.Context) {
CreatedAt: time.Now().UTC(),
Done: true,
DoneReason: "load",
Metrics: api.Metrics{
LoadDuration: checkpointLoaded.Sub(checkpointStart),
TotalDuration: checkpointLoaded.Sub(checkpointStart),
},
})
return
}
@ -1443,6 +1447,10 @@ func (s *Server) ChatHandler(c *gin.Context) {
Message: api.Message{Role: "assistant"},
Done: true,
DoneReason: "load",
Metrics: api.Metrics{
LoadDuration: checkpointLoaded.Sub(checkpointStart),
TotalDuration: checkpointLoaded.Sub(checkpointStart),
},
})
return
}