mirror of
https://github.com/ollama/ollama
synced 2026-04-23 08:45:14 +00:00
bugfix: fix crash bug in token cache logic
This change fixes a problem in the token cache logic to avoid panics caused by empty token arrays by ensuring at least one token remains on full cache hits in the relevant function. The happens if there is an exact match in the cache on subsequent generations.
This commit is contained in:
parent
d98dda4676
commit
857cffd22a
|
|
@ -78,6 +78,11 @@ func (c *kvCache) findRemaining(tokens []int32) []int32 {
|
|||
prefix++
|
||||
}
|
||||
|
||||
if prefix == len(tokens) && prefix > 0 {
|
||||
// Leave one token to run through the model so we can sample a response.
|
||||
prefix--
|
||||
}
|
||||
|
||||
if prefix < len(c.tokens) {
|
||||
trim := len(c.tokens) - prefix
|
||||
for _, kv := range c.caches {
|
||||
|
|
|
|||
Loading…
Reference in a new issue