diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e0e10f..580c059 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Configuration option to set llm maximum context window + ### Changed - Changed default model shipped with paperless-llm-workflow to ministral 8b base (smaller model with better results) diff --git a/src/config.rs b/src/config.rs index b92a47d..d14ccd2 100644 --- a/src/config.rs +++ b/src/config.rs @@ -16,6 +16,7 @@ pub(crate) struct Config { pub(crate) tag_user_name: String, pub(crate) model: String, pub(crate) num_gpu_layers: usize, + pub(crate) max_ctx: usize, } #[derive(Deserialize, Default)] @@ -31,6 +32,7 @@ pub(crate) struct OverlayConfig { pub(crate) tag_user_name: Option, pub(crate) model: Option, pub(crate) num_gpu_layers: Option, + pub(crate) max_ctx: Option, } #[derive(Debug, Error)] @@ -55,6 +57,7 @@ impl Config { tag_user_name: tag_user.to_string(), model: model.to_string(), num_gpu_layers: 1024, + max_ctx: 0, // 0 will mean that per default max ctx train of the model will be used, this is potentially way to large } } @@ -77,6 +80,7 @@ impl Config { tag_user_name: overlay_config.tag_user_name.unwrap_or(self.tag_user_name), model: overlay_config.model.unwrap_or(self.model), num_gpu_layers: overlay_config.num_gpu_layers.unwrap_or(self.num_gpu_layers), + max_ctx: overlay_config.max_ctx.unwrap_or(self.max_ctx), } } } @@ -119,6 +123,9 @@ impl OverlayConfig { num_gpu_layers: std::env::var("NUM_GPU_LAYERS") .ok() .and_then(|num| num.parse().ok()), + max_ctx: std::env::var("PAPERLESS_LLM_MAX_CTX") + .ok() + .and_then(|num| num.parse().ok()), } } } diff --git a/src/server.rs b/src/server.rs index db8ffd4..eefc0f8 100644 --- a/src/server.rs +++ b/src/server.rs @@ -723,8 +723,9 @@ async fn document_processor( { let mut model_singleton = MODEL_SINGLETON.lock().await; if model_singleton.is_none() { + let max_ctx = if config.max_ctx == 0 { None } else { Some(config.max_ctx as u32) }; *model_singleton = spawn_blocking(move || { - LLModelExtractor::new(Path::new(&model_path), config.num_gpu_layers, None) + LLModelExtractor::new(Path::new(&model_path), config.num_gpu_layers, max_ctx) }) .await .map_err(|err| {