added max context size configuration option
This commit is contained in:
parent
b3ad72a7a2
commit
ec8dd6e8a0
3 changed files with 12 additions and 1 deletions
|
|
@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Configuration option to set llm maximum context window
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
- Changed default model shipped with paperless-llm-workflow to ministral 8b base (smaller model with better results)
|
- Changed default model shipped with paperless-llm-workflow to ministral 8b base (smaller model with better results)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ pub(crate) struct Config {
|
||||||
pub(crate) tag_user_name: String,
|
pub(crate) tag_user_name: String,
|
||||||
pub(crate) model: String,
|
pub(crate) model: String,
|
||||||
pub(crate) num_gpu_layers: usize,
|
pub(crate) num_gpu_layers: usize,
|
||||||
|
pub(crate) max_ctx: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize, Default)]
|
#[derive(Deserialize, Default)]
|
||||||
|
|
@ -31,6 +32,7 @@ pub(crate) struct OverlayConfig {
|
||||||
pub(crate) tag_user_name: Option<String>,
|
pub(crate) tag_user_name: Option<String>,
|
||||||
pub(crate) model: Option<String>,
|
pub(crate) model: Option<String>,
|
||||||
pub(crate) num_gpu_layers: Option<usize>,
|
pub(crate) num_gpu_layers: Option<usize>,
|
||||||
|
pub(crate) max_ctx: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
|
|
@ -55,6 +57,7 @@ impl Config {
|
||||||
tag_user_name: tag_user.to_string(),
|
tag_user_name: tag_user.to_string(),
|
||||||
model: model.to_string(),
|
model: model.to_string(),
|
||||||
num_gpu_layers: 1024,
|
num_gpu_layers: 1024,
|
||||||
|
max_ctx: 0, // 0 will mean that per default max ctx train of the model will be used, this is potentially way to large
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -77,6 +80,7 @@ impl Config {
|
||||||
tag_user_name: overlay_config.tag_user_name.unwrap_or(self.tag_user_name),
|
tag_user_name: overlay_config.tag_user_name.unwrap_or(self.tag_user_name),
|
||||||
model: overlay_config.model.unwrap_or(self.model),
|
model: overlay_config.model.unwrap_or(self.model),
|
||||||
num_gpu_layers: overlay_config.num_gpu_layers.unwrap_or(self.num_gpu_layers),
|
num_gpu_layers: overlay_config.num_gpu_layers.unwrap_or(self.num_gpu_layers),
|
||||||
|
max_ctx: overlay_config.max_ctx.unwrap_or(self.max_ctx),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -119,6 +123,9 @@ impl OverlayConfig {
|
||||||
num_gpu_layers: std::env::var("NUM_GPU_LAYERS")
|
num_gpu_layers: std::env::var("NUM_GPU_LAYERS")
|
||||||
.ok()
|
.ok()
|
||||||
.and_then(|num| num.parse().ok()),
|
.and_then(|num| num.parse().ok()),
|
||||||
|
max_ctx: std::env::var("PAPERLESS_LLM_MAX_CTX")
|
||||||
|
.ok()
|
||||||
|
.and_then(|num| num.parse().ok()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -723,8 +723,9 @@ async fn document_processor(
|
||||||
{
|
{
|
||||||
let mut model_singleton = MODEL_SINGLETON.lock().await;
|
let mut model_singleton = MODEL_SINGLETON.lock().await;
|
||||||
if model_singleton.is_none() {
|
if model_singleton.is_none() {
|
||||||
|
let max_ctx = if config.max_ctx == 0 { None } else { Some(config.max_ctx as u32) };
|
||||||
*model_singleton = spawn_blocking(move || {
|
*model_singleton = spawn_blocking(move || {
|
||||||
LLModelExtractor::new(Path::new(&model_path), config.num_gpu_layers, None)
|
LLModelExtractor::new(Path::new(&model_path), config.num_gpu_layers, max_ctx)
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.map_err(|err| {
|
.map_err(|err| {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue