update llama-cpp bindings
there are multiple new versions of llama-cpp. even the rust bindings are not up to date which can cause issues when using bleeding edge models or quantizations updatey my fork of llama-cpp-rs
This commit is contained in:
parent
44bb6244f3
commit
642ffc60c6
5 changed files with 235 additions and 233 deletions
|
|
@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
### Fixed
|
||||
- increase default num gpu layers to 1024 for better performance with gpu
|
||||
- updated llama-cpp bindings to version b7314 2025-12-07
|
||||
|
||||
## [0.3.1] - 2025-11-26
|
||||
|
||||
|
|
|
|||
453
Cargo.lock
generated
453
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -36,6 +36,5 @@ utoipa-swagger-ui = { version = "9.0.2", features = ["actix-web"] }
|
|||
|
||||
[features]
|
||||
vulkan = [ "llama-cpp-2/vulkan" ]
|
||||
native = [ "llama-cpp-2/native" ]
|
||||
openmp = [ "llama-cpp-2/openmp" ]
|
||||
cuda = [ "llama-cpp-2/cuda" ]
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use llama_cpp_2::LLamaCppError;
|
||||
use llama_cpp_2::LlamaCppError;
|
||||
use llama_cpp_2::context::params::LlamaContextParams;
|
||||
use llama_cpp_2::llama_backend::LlamaBackend;
|
||||
use llama_cpp_2::llama_batch::LlamaBatch;
|
||||
|
|
@ -57,7 +57,7 @@ pub(crate) enum ModelError {
|
|||
#[error("Model has not been loaded!")]
|
||||
ModelNotLoaded,
|
||||
#[error(transparent)]
|
||||
LlamaCppError(#[from] LLamaCppError),
|
||||
LlamaCppError(#[from] LlamaCppError),
|
||||
}
|
||||
|
||||
pub(crate) struct LLModelExtractor {
|
||||
|
|
@ -110,6 +110,7 @@ impl LLModelExtractor {
|
|||
let mut sampler = LlamaSampler::chain_simple([
|
||||
LlamaSampler::grammar(&self.model, &grammar, "root").unwrap(),
|
||||
LlamaSampler::dry(&self.model, 5., 1.75, 2, 1024, ["\n", ":", "\"", "*"]),
|
||||
LlamaSampler::temp(0.5),
|
||||
LlamaSampler::greedy(),
|
||||
]);
|
||||
let prompt = format!("{}\n", serde_json::to_string(base_data).unwrap());
|
||||
|
|
|
|||
|
|
@ -13,25 +13,21 @@ mod server;
|
|||
mod types;
|
||||
|
||||
#[cfg(any(
|
||||
all(feature = "vulkan", feature = "native"),
|
||||
all(feature = "vulkan", feature = "openmp"),
|
||||
all(feature = "vulkan", feature = "cuda"),
|
||||
all(feature = "openmp", feature = "cuda"),
|
||||
all(feature = "openmp", feature = "native"),
|
||||
all(feature = "cuda", feature = "native")
|
||||
))]
|
||||
compile_error!(
|
||||
"Only one compute backend can be used, choose feature `vulkan`, `openmp`, `cuda` or `native`!"
|
||||
"Only one compute backend can be used, choose feature `vulkan`, `openmp`, or `cuda`!"
|
||||
);
|
||||
|
||||
#[cfg(not(any(
|
||||
feature = "vulkan",
|
||||
feature = "native",
|
||||
feature = "openmp",
|
||||
feature = "cuda"
|
||||
)))]
|
||||
compile_error!(
|
||||
"Choose feature `vulkan`, `openmp`, `cuda` or `native` to select what compute backend should be used for inference!"
|
||||
"Choose feature `vulkan`, `openmp`, or `cuda` to select what compute backend should be used for inference!"
|
||||
);
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue