runner.go: Implement RepeatLastN to penalize repeated tokens

RepeatLastN is a user-facing parameter that is exposed that is exposed through the APIs but is not currently plumbed through.
2026-04-23 09:15:44 +02:00 · 2024-08-20 11:21:19 -07:00
parent eccd4dd8d2
commit 477f529d26
4 changed files with 5 additions and 0 deletions
--- a/llama/sampling_ext.cpp
+++ b/llama/sampling_ext.cpp
@@ -10,6 +10,7 @@ struct llama_sampling_context *llama_sampling_cinit(struct llama_sampling_cparam
    sparams.tfs_z = params->tfs_z;
    sparams.typical_p = params->typical_p;
    sparams.temp = params->temp;
+    sparams.penalty_last_n = params->penalty_last_n;
    sparams.penalty_repeat = params->penalty_repeat;
    sparams.penalty_freq = params->penalty_freq;
    sparams.penalty_present = params->penalty_present;