chore: code readability implement clippy suggestions
This commit is contained in:
parent
96bd2c9ce0
commit
5176b29587
5 changed files with 104 additions and 138 deletions
|
|
@ -17,7 +17,7 @@ use gbnf::{self, GrammarItem, NonTerminalSymbol, ProductionItem, RepetitionType,
|
|||
|
||||
fn gen_gbnf(schema: &schemars::Schema, eos_token: String) -> String {
|
||||
let js = &serde_json::to_string(schema.as_value()).unwrap();
|
||||
let mut gram = gbnf::Grammar::from_json_schema(&js)
|
||||
let mut gram = gbnf::Grammar::from_json_schema(js)
|
||||
.map_err(|err| {
|
||||
println!("{err}");
|
||||
err
|
||||
|
|
@ -27,27 +27,25 @@ fn gen_gbnf(schema: &schemars::Schema, eos_token: String) -> String {
|
|||
match &mut r {
|
||||
GrammarItem::LineBreak | GrammarItem::Comment(_) => {}
|
||||
GrammarItem::Rule(rule) => {
|
||||
if rule.lhs.name == "root".to_string() {
|
||||
if let Some(last_rule) = rule.rhs.items.last_mut() {
|
||||
*last_rule = gbnf::ProductionItem::Terminal(
|
||||
TerminalSymbol {
|
||||
value: eos_token.clone(),
|
||||
},
|
||||
gbnf::RepetitionType::One,
|
||||
);
|
||||
}
|
||||
if rule.lhs.name == "root"
|
||||
&& let Some(last_rule) = rule.rhs.items.last_mut()
|
||||
{
|
||||
*last_rule = gbnf::ProductionItem::Terminal(
|
||||
TerminalSymbol {
|
||||
value: eos_token.clone(),
|
||||
},
|
||||
gbnf::RepetitionType::One,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(p) = gram.recurring_items.get_mut(&NonTerminalSymbol {
|
||||
name: "ws".to_string(),
|
||||
}) {
|
||||
if let Some(last_item) = p.items.last_mut() {
|
||||
if let ProductionItem::CharacterSet(_, rep_type) = last_item {
|
||||
*rep_type = RepetitionType::One
|
||||
}
|
||||
}
|
||||
}) && let Some(last_item) = p.items.last_mut()
|
||||
&& let ProductionItem::CharacterSet(_, rep_type) = last_item
|
||||
{
|
||||
*rep_type = RepetitionType::One
|
||||
}
|
||||
gram.to_string()
|
||||
}
|
||||
|
|
|
|||
42
src/main.rs
42
src/main.rs
|
|
@ -62,11 +62,10 @@ async fn main() {
|
|||
|
||||
let _model_path = Path::new(&config.model)
|
||||
.canonicalize()
|
||||
.map_err(|err| {
|
||||
.inspect_err(|_| {
|
||||
log::error!(
|
||||
"Could not find model file! Can not run without a language model! … Stop execution!"
|
||||
);
|
||||
err
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
|
@ -78,8 +77,7 @@ async fn main() {
|
|||
|
||||
let user = users
|
||||
.iter()
|
||||
.filter(|user| user.username == config.tag_user_name)
|
||||
.next()
|
||||
.find(|user| user.username == config.tag_user_name)
|
||||
.or_else(|| {
|
||||
log::warn!(
|
||||
"configured user `{}` could not be found, running without user!",
|
||||
|
|
@ -89,12 +87,7 @@ async fn main() {
|
|||
});
|
||||
|
||||
//make sure tags for processing and finshed exists
|
||||
let processing_tag = if tags
|
||||
.iter()
|
||||
.filter(|t| t.name == config.processing_tag)
|
||||
.next()
|
||||
.is_none()
|
||||
{
|
||||
let processing_tag = if !tags.iter().any(|t| t.name == config.processing_tag) {
|
||||
requests::create_tag(
|
||||
&mut api_client,
|
||||
user,
|
||||
|
|
@ -102,31 +95,23 @@ async fn main() {
|
|||
&config.processing_color,
|
||||
)
|
||||
.await
|
||||
.map_err(|err| {
|
||||
.inspect_err(|err| {
|
||||
log::error!("could not create processing tag: {err}");
|
||||
err
|
||||
})
|
||||
.map(|tag| {
|
||||
.inspect(|_| {
|
||||
log::info!(
|
||||
"created processing tag `{}` to paperless ",
|
||||
config.processing_tag
|
||||
);
|
||||
tag
|
||||
})
|
||||
.ok()
|
||||
} else {
|
||||
tags.iter()
|
||||
.filter(|t| t.name == config.processing_tag)
|
||||
.next()
|
||||
.map(|t| t.clone())
|
||||
.find(|t| t.name == config.processing_tag)
|
||||
.cloned()
|
||||
};
|
||||
|
||||
let finished_tag = if tags
|
||||
.iter()
|
||||
.filter(|t| t.name == config.finished_tag)
|
||||
.next()
|
||||
.is_none()
|
||||
{
|
||||
let finished_tag = if !tags.iter().any(|t| t.name == config.finished_tag) {
|
||||
requests::create_tag(
|
||||
&mut api_client,
|
||||
user,
|
||||
|
|
@ -134,23 +119,18 @@ async fn main() {
|
|||
&config.finished_color,
|
||||
)
|
||||
.await
|
||||
.map_err(|err| {
|
||||
.inspect_err(|err| {
|
||||
log::error!("could not create finished tag: {err}");
|
||||
err
|
||||
})
|
||||
.map(|tag| {
|
||||
.inspect(|_| {
|
||||
log::info!(
|
||||
"created processing tag `{}` to paperless ",
|
||||
config.finished_tag
|
||||
);
|
||||
tag
|
||||
})
|
||||
.ok()
|
||||
} else {
|
||||
tags.iter()
|
||||
.filter(|t| t.name == config.finished_tag)
|
||||
.next()
|
||||
.map(|t| t.clone())
|
||||
tags.iter().find(|t| t.name == config.finished_tag).cloned()
|
||||
};
|
||||
|
||||
if processing_tag.is_none() || finished_tag.is_none() {
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ pub async fn processed_doc_update(
|
|||
.partial_update(
|
||||
doc_id,
|
||||
&PatchedDocumentRequest {
|
||||
correspondent: correspondent,
|
||||
correspondent,
|
||||
document_type: None,
|
||||
storage_path: None,
|
||||
title: None,
|
||||
|
|
@ -296,14 +296,14 @@ pub(crate) async fn get_all_users(api_client: &mut Client) -> Vec<User> {
|
|||
pub(crate) async fn create_tag(
|
||||
api_client: &mut Client,
|
||||
tag_user: Option<&User>,
|
||||
tag_name: &String,
|
||||
tag_color: &String,
|
||||
tag_name: &str,
|
||||
tag_color: &str,
|
||||
) -> Result<Tag, paperless_api_client::types::error::Error> {
|
||||
api_client
|
||||
.tags()
|
||||
.create(&TagRequest {
|
||||
name: tag_name.clone(),
|
||||
color: Some(tag_color.clone()),
|
||||
name: tag_name.to_owned(),
|
||||
color: Some(tag_color.to_owned()),
|
||||
match_: Some("".to_string()),
|
||||
matching_algorithm: Some(0),
|
||||
is_insensitive: Some(true),
|
||||
|
|
|
|||
|
|
@ -182,13 +182,13 @@ async fn handle_custom_field_prediction(
|
|||
doc.id,
|
||||
cf_value
|
||||
);
|
||||
doc.custom_fields.as_mut().map(|doc_custom_fields| {
|
||||
if let Some(doc_custom_fields) = doc.custom_fields.as_mut() {
|
||||
for doc_cf_i in doc_custom_fields.iter_mut() {
|
||||
if doc_cf_i.field == cf_value.field {
|
||||
*doc_cf_i = cf_value.clone()
|
||||
*doc_cf_i = cf_value.clone();
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -228,18 +228,16 @@ async fn handle_decision(
|
|||
|
||||
if let Some(true_tag) = true_tag
|
||||
&& extracted_decision.answer_bool
|
||||
&& !doc.tags.contains(&true_tag.id)
|
||||
{
|
||||
if !doc.tags.contains(&true_tag.id) {
|
||||
doc.tags.push(true_tag.id);
|
||||
}
|
||||
doc.tags.push(true_tag.id);
|
||||
}
|
||||
|
||||
if let Some(false_tag) = false_tag
|
||||
&& !extracted_decision.answer_bool
|
||||
&& !doc.tags.contains(&false_tag.id)
|
||||
{
|
||||
if !doc.tags.contains(&false_tag.id) {
|
||||
doc.tags.push(false_tag.id);
|
||||
}
|
||||
doc.tags.push(false_tag.id);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
@ -442,8 +440,8 @@ async fn decision(
|
|||
|
||||
let process_type = ProcessingType::DecsionTagFlow {
|
||||
question: params.question.clone(),
|
||||
true_tag: true_tag,
|
||||
false_tag: false_tag,
|
||||
true_tag,
|
||||
false_tag,
|
||||
};
|
||||
|
||||
let generic_webhook_params = WebhookParams {
|
||||
|
|
@ -479,7 +477,7 @@ async fn suggest_correspondent(
|
|||
config: Data<Config>,
|
||||
document_pipeline: web::Data<tokio::sync::mpsc::UnboundedSender<DocumentProcessingRequest>>,
|
||||
) -> Result<HttpResponse, WebhookError> {
|
||||
let _ = params
|
||||
params
|
||||
.handle_request(
|
||||
status_tags,
|
||||
api_client,
|
||||
|
|
@ -517,7 +515,7 @@ async fn custom_field_prediction(
|
|||
config: Data<Config>,
|
||||
document_pipeline: web::Data<tokio::sync::mpsc::UnboundedSender<DocumentProcessingRequest>>,
|
||||
) -> Result<HttpResponse, WebhookError> {
|
||||
let _ = params
|
||||
params
|
||||
.handle_request(
|
||||
status_tags,
|
||||
api_client,
|
||||
|
|
@ -565,18 +563,18 @@ fn merge_document_status(
|
|||
ProcessingType::CustomFieldPrediction => {
|
||||
if let Some(updated_custom_fields) = &updated_doc.custom_fields {
|
||||
for updated_cf in updated_custom_fields {
|
||||
doc.custom_fields.as_mut().map(|doc_custom_fields| {
|
||||
if let Some(doc_custom_fields) = doc.custom_fields.as_mut() {
|
||||
let mut cf_found = false;
|
||||
for doc_cf_i in &mut *doc_custom_fields {
|
||||
if doc_cf_i.field == updated_cf.field {
|
||||
cf_found = true;
|
||||
doc_cf_i.value = updated_cf.value.clone()
|
||||
doc_cf_i.value = updated_cf.value.clone();
|
||||
}
|
||||
}
|
||||
if !cf_found {
|
||||
doc_custom_fields.push(updated_cf.clone());
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -634,7 +632,7 @@ async fn document_updater(
|
|||
.document
|
||||
.tags
|
||||
.iter()
|
||||
.map(|t| *t)
|
||||
.copied()
|
||||
.filter(|t| *t != status_tags.processing.id)
|
||||
.chain(if doc_req.overwrite_finshed_tag.is_none() {
|
||||
[status_tags.finished.id].into_iter()
|
||||
|
|
@ -647,7 +645,7 @@ async fn document_updater(
|
|||
let mut updated_cf: Option<Vec<CustomFieldInstance>> = None;
|
||||
let mut updated_crrspdnt: Option<i64> = None;
|
||||
|
||||
for doc_processing_steps in vec![doc_req.processing_type]
|
||||
for doc_processing_steps in [doc_req.processing_type]
|
||||
.iter()
|
||||
.chain(
|
||||
defered_doc_updates
|
||||
|
|
@ -690,19 +688,17 @@ async fn document_updater(
|
|||
});
|
||||
} else {
|
||||
// remember how document has been processed until now for defered update later
|
||||
if defered_doc_updates.contains_key(&doc_req.document.id) {
|
||||
if defered_doc_updates
|
||||
.get(&doc_req.document.id)
|
||||
.is_some_and(|v| v.contains(&doc_req.processing_type))
|
||||
{
|
||||
continue;
|
||||
} else {
|
||||
if let Some(v) = defered_doc_updates.get_mut(&doc_req.document.id).as_mut() {
|
||||
v.push(doc_req.processing_type);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
defered_doc_updates.insert(doc_req.document.id, vec![doc_req.processing_type]);
|
||||
if let std::collections::btree_map::Entry::Vacant(e) =
|
||||
defered_doc_updates.entry(doc_req.document.id)
|
||||
{
|
||||
e.insert(vec![doc_req.processing_type]);
|
||||
} else if defered_doc_updates
|
||||
.get(&doc_req.document.id)
|
||||
.is_some_and(|v| v.contains(&doc_req.processing_type))
|
||||
{
|
||||
continue;
|
||||
} else if let Some(v) = defered_doc_updates.get_mut(&doc_req.document.id).as_mut() {
|
||||
v.push(doc_req.processing_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -722,13 +718,13 @@ async fn document_processor(
|
|||
>,
|
||||
) {
|
||||
while !*STOP_FLAG.read().await {
|
||||
while PROCESSING_QUEUE.read().await.len() > 0 {
|
||||
while !PROCESSING_QUEUE.read().await.is_empty() {
|
||||
let model_path = config.model.clone();
|
||||
{
|
||||
let mut model_singleton = MODEL_SINGLETON.lock().await;
|
||||
if model_singleton.is_none() {
|
||||
*model_singleton = spawn_blocking(move || {
|
||||
LLModelExtractor::new(&Path::new(&model_path), config.num_gpu_layers, None)
|
||||
LLModelExtractor::new(Path::new(&model_path), config.num_gpu_layers, None)
|
||||
})
|
||||
.await
|
||||
.map_err(|err| {
|
||||
|
|
@ -805,7 +801,7 @@ async fn document_processor(
|
|||
}
|
||||
}
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
if PROCESSING_QUEUE.read().await.len() == 0 && MODEL_SINGLETON.lock().await.is_some() {
|
||||
if PROCESSING_QUEUE.read().await.is_empty() && MODEL_SINGLETON.lock().await.is_some() {
|
||||
// No Documents need processing drop model
|
||||
log::info!("Unloading Model due to processing queue being empty!");
|
||||
let mut model_singleton = MODEL_SINGLETON.lock().await;
|
||||
|
|
@ -862,7 +858,7 @@ pub async fn run_server(
|
|||
rx_update,
|
||||
));
|
||||
let webhook_server = HttpServer::new(move || {
|
||||
let app = App::new()
|
||||
App::new()
|
||||
.app_data(Data::new(tx.clone()))
|
||||
.app_data(Data::new(config.clone()))
|
||||
.app_data(Data::new(paperless_api_client.clone()))
|
||||
|
|
@ -872,8 +868,7 @@ pub async fn run_server(
|
|||
.config(utoipa_swagger_ui::Config::default().use_base_layout())
|
||||
.url("/docs/openapi.json", DocumentProcessingApiSpec::openapi()),
|
||||
)
|
||||
.service(DocumentProcessingApi);
|
||||
app
|
||||
.service(DocumentProcessingApi)
|
||||
})
|
||||
.bind(("0.0.0.0", 8123))?
|
||||
.run();
|
||||
|
|
|
|||
91
src/types.rs
91
src/types.rs
|
|
@ -62,8 +62,7 @@ impl FieldExtract {
|
|||
|
||||
all_correspondents
|
||||
.iter()
|
||||
.filter(|c| c.name == parsed_value)
|
||||
.next()
|
||||
.find(|c| c.name == parsed_value)
|
||||
.ok_or(FieldError::CorrespondentNotFound(parsed_value))
|
||||
.cloned()
|
||||
}
|
||||
|
|
@ -172,10 +171,7 @@ pub(crate) struct FieldSelect {
|
|||
}
|
||||
|
||||
pub(crate) fn custom_field_learning_supported(cf: &CustomField) -> bool {
|
||||
match cf.data_type {
|
||||
DataTypeEnum::Documentlink | DataTypeEnum::Url => false,
|
||||
_ => true,
|
||||
}
|
||||
!matches!(cf.data_type, DataTypeEnum::Documentlink | DataTypeEnum::Url)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
@ -216,7 +212,7 @@ fn guide_value_from_custom_field(cf: &CustomField) -> Option<GuideDef> {
|
|||
return None;
|
||||
};
|
||||
let enum_values = serde_json::to_value(
|
||||
&select_options
|
||||
select_options
|
||||
.select_options
|
||||
.into_iter()
|
||||
.map(|o| o.label)
|
||||
|
|
@ -249,13 +245,13 @@ pub(crate) fn schema_from_correspondents(crrspd_list: &[Correspondent]) -> schem
|
|||
});
|
||||
|
||||
let mut base_schema = schema_for!(FieldExtract);
|
||||
base_schema.get_mut("properties").map(|properties| {
|
||||
properties.get_mut("description").map(|description_schema| {
|
||||
if let Some(properties) = base_schema.get_mut("properties") {
|
||||
if let Some(description_schema) = properties.get_mut("description") {
|
||||
*description_schema = json_schema!({ "const": "Correspondent" })
|
||||
.as_value()
|
||||
.clone()
|
||||
});
|
||||
properties.get_mut("format").map(|legend_schema| {
|
||||
.clone();
|
||||
}
|
||||
if let Some(legend_schema) = properties.get_mut("format") {
|
||||
*legend_schema = json_schema!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -267,8 +263,8 @@ pub(crate) fn schema_from_correspondents(crrspd_list: &[Correspondent]) -> schem
|
|||
})
|
||||
.as_value()
|
||||
.clone();
|
||||
});
|
||||
properties.as_object_mut().map(|prop| {
|
||||
}
|
||||
if let Some(prop) = properties.as_object_mut() {
|
||||
let key_name = "most_likely_value_reasoning_summarized";
|
||||
prop.shift_insert(
|
||||
2,
|
||||
|
|
@ -277,17 +273,16 @@ pub(crate) fn schema_from_correspondents(crrspd_list: &[Correspondent]) -> schem
|
|||
);
|
||||
prop.get_mut("required")
|
||||
.map(|required| required.as_array_mut().map(|rv| rv.push(json!(key_name))));
|
||||
});
|
||||
properties
|
||||
.get_mut("value")
|
||||
.map(|value_schema| *value_schema = type_allowed_correspondens.as_value().clone());
|
||||
properties.get_mut("alternative_values").map(|array| {
|
||||
array
|
||||
.get_mut("items")
|
||||
.map(|value_schema| *value_schema = type_allowed_correspondens.as_value().clone());
|
||||
});
|
||||
properties
|
||||
});
|
||||
}
|
||||
if let Some(value_schema) = properties.get_mut("value") {
|
||||
*value_schema = type_allowed_correspondens.as_value().clone();
|
||||
}
|
||||
if let Some(array) = properties.get_mut("alternative_values")
|
||||
&& let Some(value_schema) = array.get_mut("items")
|
||||
{
|
||||
*value_schema = type_allowed_correspondens.as_value().clone();
|
||||
}
|
||||
}
|
||||
base_schema
|
||||
}
|
||||
|
||||
|
|
@ -296,12 +291,11 @@ pub(crate) fn schema_from_custom_field(cf: &CustomField) -> Option<schemars::Sch
|
|||
// set field of description schema as a constant string value matching the name
|
||||
// of the custom field. This should guide the llm token generation to extract the
|
||||
// desired information from the document
|
||||
base_schema.get_mut("properties").map(|properties| {
|
||||
properties.get_mut("description").map(|description_schema| {
|
||||
*description_schema = json_schema!({ "const": cf.name }).as_value().clone()
|
||||
});
|
||||
properties
|
||||
});
|
||||
if let Some(properties) = base_schema.get_mut("properties")
|
||||
&& let Some(description_schema) = properties.get_mut("description")
|
||||
{
|
||||
*description_schema = json_schema!({ "const": cf.name }).as_value().clone();
|
||||
}
|
||||
let field_schema = match cf.data_type {
|
||||
paperless_api_client::types::DataTypeEnum::String => schema_for!(String),
|
||||
paperless_api_client::types::DataTypeEnum::Date => schema_for!(chrono::NaiveDate),
|
||||
|
|
@ -316,7 +310,7 @@ pub(crate) fn schema_from_custom_field(cf: &CustomField) -> Option<schemars::Sch
|
|||
return None;
|
||||
};
|
||||
let enum_values = serde_json::to_value(
|
||||
&select_options
|
||||
select_options
|
||||
.select_options
|
||||
.into_iter()
|
||||
.map(|o| o.label)
|
||||
|
|
@ -333,7 +327,7 @@ pub(crate) fn schema_from_custom_field(cf: &CustomField) -> Option<schemars::Sch
|
|||
return None;
|
||||
}
|
||||
};
|
||||
if let Some(guide_value) = guide_value_from_custom_field(&cf) {
|
||||
if let Some(guide_value) = guide_value_from_custom_field(cf) {
|
||||
base_schema.get_mut("properties").map(|properties| {
|
||||
properties.get_mut("format").map(|legend_schema| {
|
||||
*legend_schema = json_schema!({
|
||||
|
|
@ -351,24 +345,23 @@ pub(crate) fn schema_from_custom_field(cf: &CustomField) -> Option<schemars::Sch
|
|||
});
|
||||
} else {
|
||||
// remove field_legend from schema
|
||||
base_schema.get_mut("properties").map(|properties| {
|
||||
if let Some(prop) = properties.as_object_mut() {
|
||||
prop.remove("format");
|
||||
}
|
||||
});
|
||||
if let Some(properties) = base_schema.get_mut("properties")
|
||||
&& let Some(prop) = properties.as_object_mut()
|
||||
{
|
||||
prop.remove("format");
|
||||
}
|
||||
}
|
||||
// set the schema of the field value according to the type of custom field
|
||||
base_schema.get_mut("properties").map(|properties| {
|
||||
properties
|
||||
.get_mut("value")
|
||||
.map(|value_schema| *value_schema = field_schema.as_value().clone());
|
||||
properties.get_mut("alternative_values").map(|array| {
|
||||
array
|
||||
.get_mut("items")
|
||||
.map(|value_schema| *value_schema = field_schema.as_value().clone());
|
||||
});
|
||||
properties
|
||||
});
|
||||
if let Some(properties) = base_schema.get_mut("properties") {
|
||||
if let Some(value_schema) = properties.get_mut("value") {
|
||||
*value_schema = field_schema.as_value().clone();
|
||||
}
|
||||
if let Some(array) = properties.get_mut("alternative_values")
|
||||
&& let Some(value_schema) = array.get_mut("items")
|
||||
{
|
||||
*value_schema = field_schema.as_value().clone();
|
||||
}
|
||||
}
|
||||
Some(base_schema)
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue