diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3e0e10f..580c059 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+- Configuration option to set llm maximum context window
+
 ### Changed
 - Changed default model shipped with paperless-llm-workflow to ministral 8b base (smaller model with better results)
 
diff --git a/src/config.rs b/src/config.rs
index b92a47d..d14ccd2 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -16,6 +16,7 @@ pub(crate) struct Config {
     pub(crate) tag_user_name: String,
     pub(crate) model: String,
     pub(crate) num_gpu_layers: usize,
+    pub(crate) max_ctx: usize,
 }
 
 #[derive(Deserialize, Default)]
@@ -31,6 +32,7 @@ pub(crate) struct OverlayConfig {
     pub(crate) tag_user_name: Option<String>,
     pub(crate) model: Option<String>,
     pub(crate) num_gpu_layers: Option<usize>,
+    pub(crate) max_ctx: Option<usize>,
 }
 
 #[derive(Debug, Error)]
@@ -55,6 +57,7 @@ impl Config {
             tag_user_name: tag_user.to_string(),
             model: model.to_string(),
             num_gpu_layers: 1024,
+            max_ctx: 0, // 0 will mean that per default max ctx train of the model will be used, this is potentially way to large
         }
     }
 
@@ -77,6 +80,7 @@ impl Config {
             tag_user_name: overlay_config.tag_user_name.unwrap_or(self.tag_user_name),
             model: overlay_config.model.unwrap_or(self.model),
             num_gpu_layers: overlay_config.num_gpu_layers.unwrap_or(self.num_gpu_layers),
+            max_ctx: overlay_config.max_ctx.unwrap_or(self.max_ctx),
         }
     }
 }
@@ -119,6 +123,9 @@ impl OverlayConfig {
             num_gpu_layers: std::env::var("NUM_GPU_LAYERS")
                 .ok()
                 .and_then(|num| num.parse().ok()),
+            max_ctx: std::env::var("PAPERLESS_LLM_MAX_CTX")
+                .ok()
+                .and_then(|num| num.parse().ok()),
         }
     }
 }
diff --git a/src/server.rs b/src/server.rs
index db8ffd4..eefc0f8 100644
--- a/src/server.rs
+++ b/src/server.rs
@@ -723,8 +723,9 @@ async fn document_processor(
             {
                 let mut model_singleton = MODEL_SINGLETON.lock().await;
                 if model_singleton.is_none() {
+                    let max_ctx = if config.max_ctx == 0 { None } else { Some(config.max_ctx as u32) };
                     *model_singleton = spawn_blocking(move || {
-                        LLModelExtractor::new(Path::new(&model_path), config.num_gpu_layers, None)
+                        LLModelExtractor::new(Path::new(&model_path), config.num_gpu_layers, max_ctx)
                     })
                     .await
                     .map_err(|err| {