From 3cf2d7fe7c6ec6c8d390f25fd881d0f341ff59f9 Mon Sep 17 00:00:00 2001 From: ju6ge Date: Sat, 13 Dec 2025 15:01:45 +0100 Subject: [PATCH] fix temperature sampling using temperature requires randomized picking of final token, since the probability of the most probable token will also be the most probable after appling temperature, so greedy may not be used! --- Cargo.lock | 1 + Cargo.toml | 1 + src/extract.rs | 7 ++++--- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3a0ee9a..5b3894f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1765,6 +1765,7 @@ dependencies = [ "log", "once_cell", "paperless-api-client", + "rand 0.9.2", "regex", "regex_static", "schemars 1.1.0", diff --git a/Cargo.toml b/Cargo.toml index d37b54f..5830445 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ once_cell = "1.21.3" itertools = "0.14.0" utoipa = { version = "5.4.0", features = ["actix_extras"] } utoipa-swagger-ui = { version = "9.0.2", features = ["actix-web"] } +rand = "0.9.2" [features] vulkan = [ "llama-cpp-2/vulkan" ] diff --git a/src/extract.rs b/src/extract.rs index 0be97c9..ae54319 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -109,9 +109,10 @@ impl LLModelExtractor { let grammar = gen_gbnf(response_schema, self.eos_string.to_string()); let mut sampler = LlamaSampler::chain_simple([ LlamaSampler::grammar(&self.model, &grammar, "root").unwrap(), - LlamaSampler::dry(&self.model, 5., 1.75, 2, 1024, ["\n", ":", "\"", "*"]), - LlamaSampler::temp(0.5), - LlamaSampler::greedy(), + LlamaSampler::dry(&self.model, 5., 1.75, 2, 256, ["\"", ":", "*"], ), + LlamaSampler::min_p(0.01, 64), + LlamaSampler::temp(0.1), + LlamaSampler::dist(rand::random()), ]); let prompt = format!("{}\n", serde_json::to_string(base_data).unwrap()); let mut ctx = self