update llama-cpp bindings

there are multiple new versions of llama-cpp. even the rust bindings are not up to date which can cause issues when using bleeding edge models or quantizations updatey my fork of llama-cpp-rs
2025-12-08 00:32:59 +01:00 · 2025-12-08 00:32:59 +01:00 · 642ffc60c6
commit 642ffc60c6
parent 44bb6244f3
5 changed files with 235 additions and 233 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Fixed
 - increase default num gpu layers to 1024 for better performance with gpu
+- updated llama-cpp bindings to version b7314 2025-12-07

 ## [0.3.1] - 2025-11-26

--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -36,6 +36,5 @@ utoipa-swagger-ui = { version = "9.0.2", features = ["actix-web"] }

 [features]
 vulkan = [ "llama-cpp-2/vulkan" ]
-native = [ "llama-cpp-2/native" ]
 openmp = [ "llama-cpp-2/openmp" ]
 cuda = [ "llama-cpp-2/cuda" ]
--- a/src/extract.rs
+++ b/src/extract.rs
@ -1,4 +1,4 @@
-use llama_cpp_2::LLamaCppError;
+use llama_cpp_2::LlamaCppError;
 use llama_cpp_2::context::params::LlamaContextParams;
 use llama_cpp_2::llama_backend::LlamaBackend;
 use llama_cpp_2::llama_batch::LlamaBatch;
@ -57,7 +57,7 @@ pub(crate) enum ModelError {
    #[error("Model has not been loaded!")]
    ModelNotLoaded,
    #[error(transparent)]
-    LlamaCppError(#[from] LLamaCppError),
+    LlamaCppError(#[from] LlamaCppError),
 }

 pub(crate) struct LLModelExtractor {
@ -110,6 +110,7 @@ impl LLModelExtractor {
        let mut sampler = LlamaSampler::chain_simple([
            LlamaSampler::grammar(&self.model, &grammar, "root").unwrap(),
            LlamaSampler::dry(&self.model, 5., 1.75, 2, 1024, ["\n", ":", "\"", "*"]),
+            LlamaSampler::temp(0.5),
            LlamaSampler::greedy(),
        ]);
        let prompt = format!("{}\n", serde_json::to_string(base_data).unwrap());
--- a/src/main.rs
+++ b/src/main.rs
@ -13,25 +13,21 @@ mod server;
 mod types;

 #[cfg(any(
-    all(feature = "vulkan", feature = "native"),
    all(feature = "vulkan", feature = "openmp"),
    all(feature = "vulkan", feature = "cuda"),
    all(feature = "openmp", feature = "cuda"),
-    all(feature = "openmp", feature = "native"),
-    all(feature = "cuda", feature = "native")
 ))]
 compile_error!(
-    "Only one compute backend can be used, choose feature `vulkan`, `openmp`, `cuda` or `native`!"
+    "Only one compute backend can be used, choose feature `vulkan`, `openmp`, or `cuda`!"
 );

 #[cfg(not(any(
    feature = "vulkan",
-    feature = "native",
    feature = "openmp",
    feature = "cuda"
 )))]
 compile_error!(
-    "Choose feature `vulkan`, `openmp`, `cuda` or `native` to select what compute backend should be used for inference!"
+    "Choose feature `vulkan`, `openmp`, or `cuda` to select what compute backend should be used for inference!"
 );

 #[derive(Parser, Debug)]