From 2024884f49334e7eaf64adc425da77b773204b42 Mon Sep 17 00:00:00 2001 From: Mark Wotton <mwotton@gmail.com> Date: Thu, 9 Sep 2021 17:46:46 +0700 Subject: [PATCH] Reordered fuzzy search (#179) * add test demonstrating problem * add a reordered fuzzy-search mode that presents shorter matches first, rather than using strict chronological ordering. * fix warnings, refactor interface to minspan slightly --- Cargo.lock | 7 +++++++ atuin-client/Cargo.toml | 1 + atuin-client/src/database.rs | 24 +++++++++++++++++++++++- atuin-client/src/import/resh.rs | 1 - atuin-client/src/lib.rs | 1 + atuin-client/src/ordering.rs | 31 +++++++++++++++++++++++++++++++ atuin-client/src/settings.rs | 1 - 7 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 atuin-client/src/ordering.rs diff --git a/Cargo.lock b/Cargo.lock index abb5959..8a25e3d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -138,6 +138,7 @@ dependencies = [ "indicatif", "itertools", "log", + "minspan", "parse_duration", "rand 0.8.3", "reqwest", @@ -1168,6 +1169,12 @@ dependencies = [ "unicase", ] +[[package]] +name = "minspan" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1269a17ac308ae0b906ec1b0ff8062fd0c82f18cc2956faa367302ec3380f4e8" + [[package]] name = "mio" version = "0.7.11" diff --git a/atuin-client/Cargo.toml b/atuin-client/Cargo.toml index b4c9c13..300ce26 100644 --- a/atuin-client/Cargo.toml +++ b/atuin-client/Cargo.toml @@ -40,6 +40,7 @@ humantime = "2.1.0" itertools = "0.10.0" shellexpand = "2" sqlx = { version = "0.5", features = [ "runtime-tokio-rustls", "uuid", "chrono", "sqlite" ] } +minspan = "0.1.1" [dev-dependencies] tokio-test = "*" diff --git a/atuin-client/src/database.rs b/atuin-client/src/database.rs index 6a70ae3..8dff15f 100644 --- a/atuin-client/src/database.rs +++ b/atuin-client/src/database.rs @@ -14,6 +14,7 @@ use sqlx::sqlite::{ use sqlx::Row; use super::history::History; +use super::ordering; use super::settings::SearchMode; #[async_trait] @@ -281,6 +282,7 @@ impl Database for Sqlite { search_mode: SearchMode, query: &str, ) -> Result<Vec<History>> { + let orig_query = query; let query = query.to_string().replace("*", "%"); // allow wildcard char let limit = limit.map_or("".to_owned(), |l| format!("limit {}", l)); @@ -308,7 +310,7 @@ impl Database for Sqlite { .fetch_all(&self.pool) .await?; - Ok(res) + Ok(ordering::reorder_fuzzy(search_mode, orig_query, res)) } async fn query_history(&self, query: &str) -> Result<Vec<History>> { @@ -405,4 +407,24 @@ mod test { results = db.search(None, SearchMode::Fuzzy, " ").await.unwrap(); assert_eq!(results.len(), 3); } + + #[tokio::test(flavor = "multi_thread")] + async fn test_search_reordered_fuzzy() { + let mut db = Sqlite::new("sqlite::memory:").await.unwrap(); + // test ordering of results: we should choose the first, even though it happened longer ago. + + new_history_item(&mut db, "curl").await.unwrap(); + new_history_item(&mut db, "corburl").await.unwrap(); + // if fuzzy reordering is on, it should come back in a more sensible order + let mut results = db.search(None, SearchMode::Fuzzy, "curl").await.unwrap(); + assert_eq!(results.len(), 2); + let commands: Vec<&String> = results.iter().map(|a| &a.command).collect(); + assert_eq!(commands, vec!["curl", "corburl"]); + + results = db.search(None, SearchMode::Fuzzy, "xxxx").await.unwrap(); + assert_eq!(results.len(), 0); + + results = db.search(None, SearchMode::Fuzzy, "").await.unwrap(); + assert_eq!(results.len(), 2); + } } diff --git a/atuin-client/src/import/resh.rs b/atuin-client/src/import/resh.rs index efe5bb5..fa55300 100644 --- a/atuin-client/src/import/resh.rs +++ b/atuin-client/src/import/resh.rs @@ -8,7 +8,6 @@ use atuin_common::utils::uuid_v4; use chrono::{TimeZone, Utc}; use directories::UserDirs; use eyre::{eyre, Result}; -use serde::Deserialize; use super::{count_lines, Importer}; use crate::history::History; diff --git a/atuin-client/src/lib.rs b/atuin-client/src/lib.rs index 82f19b5..fa01c17 100644 --- a/atuin-client/src/lib.rs +++ b/atuin-client/src/lib.rs @@ -11,5 +11,6 @@ pub mod database; pub mod encryption; pub mod history; pub mod import; +pub mod ordering; pub mod settings; pub mod sync; diff --git a/atuin-client/src/ordering.rs b/atuin-client/src/ordering.rs new file mode 100644 index 0000000..b6051d1 --- /dev/null +++ b/atuin-client/src/ordering.rs @@ -0,0 +1,31 @@ +use super::history::History; +use super::settings::SearchMode; +use minspan::minspan; + +pub fn reorder_fuzzy(mode: SearchMode, query: &str, res: Vec<History>) -> Vec<History> { + match mode { + SearchMode::Fuzzy => reorder(query, |x| &x.command, res), + _ => res, + } +} + +fn reorder<F, A>(query: &str, f: F, res: Vec<A>) -> Vec<A> +where + F: Fn(&A) -> &String, + A: Clone, +{ + let mut r = res.clone(); + let qvec = &query.chars().collect(); + r.sort_by_cached_key(|h| { + let (from, to) = match minspan::span(qvec, &(f(h).chars().collect())) { + Some(x) => x, + // this is a little unfortunate: when we are asked to match a query that is found nowhere, + // we don't want to return a None, as the comparison behaviour would put the worst matches + // at the front. therefore, we'll return a set of indices that are one larger than the longest + // possible legitimate match. This is meaningless except as a comparison. + None => (0, res.len()), + }; + 1 + to - from + }); + r +} diff --git a/atuin-client/src/settings.rs b/atuin-client/src/settings.rs index 0cb845c..407013f 100644 --- a/atuin-client/src/settings.rs +++ b/atuin-client/src/settings.rs @@ -51,7 +51,6 @@ pub struct Settings { pub key_path: String, pub session_path: String, pub search_mode: SearchMode, - // This is automatically loaded when settings is created. Do not set in // config! Keep secrets and settings apart. pub session_token: String,