update llama-cpp bindings

there are multiple new versions of llama-cpp. even the rust bindings are
not up to date which can cause issues when using bleeding edge models or quantizations
updatey my fork of llama-cpp-rs
This commit is contained in:
judge 2025-12-08 00:32:59 +01:00
parent 44bb6244f3
commit 642ffc60c6
No known key found for this signature in database
GPG key ID: 6512C30DD8E017B5
5 changed files with 235 additions and 233 deletions

View file

@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- increase default num gpu layers to 1024 for better performance with gpu
- updated llama-cpp bindings to version b7314 2025-12-07
## [0.3.1] - 2025-11-26

453
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -36,6 +36,5 @@ utoipa-swagger-ui = { version = "9.0.2", features = ["actix-web"] }
[features]
vulkan = [ "llama-cpp-2/vulkan" ]
native = [ "llama-cpp-2/native" ]
openmp = [ "llama-cpp-2/openmp" ]
cuda = [ "llama-cpp-2/cuda" ]

View file

@ -1,4 +1,4 @@
use llama_cpp_2::LLamaCppError;
use llama_cpp_2::LlamaCppError;
use llama_cpp_2::context::params::LlamaContextParams;
use llama_cpp_2::llama_backend::LlamaBackend;
use llama_cpp_2::llama_batch::LlamaBatch;
@ -57,7 +57,7 @@ pub(crate) enum ModelError {
#[error("Model has not been loaded!")]
ModelNotLoaded,
#[error(transparent)]
LlamaCppError(#[from] LLamaCppError),
LlamaCppError(#[from] LlamaCppError),
}
pub(crate) struct LLModelExtractor {
@ -110,6 +110,7 @@ impl LLModelExtractor {
let mut sampler = LlamaSampler::chain_simple([
LlamaSampler::grammar(&self.model, &grammar, "root").unwrap(),
LlamaSampler::dry(&self.model, 5., 1.75, 2, 1024, ["\n", ":", "\"", "*"]),
LlamaSampler::temp(0.5),
LlamaSampler::greedy(),
]);
let prompt = format!("{}\n", serde_json::to_string(base_data).unwrap());

View file

@ -13,25 +13,21 @@ mod server;
mod types;
#[cfg(any(
all(feature = "vulkan", feature = "native"),
all(feature = "vulkan", feature = "openmp"),
all(feature = "vulkan", feature = "cuda"),
all(feature = "openmp", feature = "cuda"),
all(feature = "openmp", feature = "native"),
all(feature = "cuda", feature = "native")
))]
compile_error!(
"Only one compute backend can be used, choose feature `vulkan`, `openmp`, `cuda` or `native`!"
"Only one compute backend can be used, choose feature `vulkan`, `openmp`, or `cuda`!"
);
#[cfg(not(any(
feature = "vulkan",
feature = "native",
feature = "openmp",
feature = "cuda"
)))]
compile_error!(
"Choose feature `vulkan`, `openmp`, `cuda` or `native` to select what compute backend should be used for inference!"
"Choose feature `vulkan`, `openmp`, or `cuda` to select what compute backend should be used for inference!"
);
#[derive(Parser, Debug)]