Reordered fuzzy search (#179)

* add test demonstrating problem

* add a reordered fuzzy-search mode that presents shorter matches first,
rather than using strict chronological ordering.

* fix warnings, refactor interface to minspan slightly
This commit is contained in:
Mark Wotton 2021-09-09 17:46:46 +07:00 committed by GitHub
parent 1babb41ea9
commit 2024884f49
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 63 additions and 3 deletions

7
Cargo.lock generated
View file

@ -138,6 +138,7 @@ dependencies = [
"indicatif",
"itertools",
"log",
"minspan",
"parse_duration",
"rand 0.8.3",
"reqwest",
@ -1168,6 +1169,12 @@ dependencies = [
"unicase",
]
[[package]]
name = "minspan"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1269a17ac308ae0b906ec1b0ff8062fd0c82f18cc2956faa367302ec3380f4e8"
[[package]]
name = "mio"
version = "0.7.11"

View file

@ -40,6 +40,7 @@ humantime = "2.1.0"
itertools = "0.10.0"
shellexpand = "2"
sqlx = { version = "0.5", features = [ "runtime-tokio-rustls", "uuid", "chrono", "sqlite" ] }
minspan = "0.1.1"
[dev-dependencies]
tokio-test = "*"

View file

@ -14,6 +14,7 @@ use sqlx::sqlite::{
use sqlx::Row;
use super::history::History;
use super::ordering;
use super::settings::SearchMode;
#[async_trait]
@ -281,6 +282,7 @@ impl Database for Sqlite {
search_mode: SearchMode,
query: &str,
) -> Result<Vec<History>> {
let orig_query = query;
let query = query.to_string().replace("*", "%"); // allow wildcard char
let limit = limit.map_or("".to_owned(), |l| format!("limit {}", l));
@ -308,7 +310,7 @@ impl Database for Sqlite {
.fetch_all(&self.pool)
.await?;
Ok(res)
Ok(ordering::reorder_fuzzy(search_mode, orig_query, res))
}
async fn query_history(&self, query: &str) -> Result<Vec<History>> {
@ -405,4 +407,24 @@ mod test {
results = db.search(None, SearchMode::Fuzzy, " ").await.unwrap();
assert_eq!(results.len(), 3);
}
#[tokio::test(flavor = "multi_thread")]
async fn test_search_reordered_fuzzy() {
let mut db = Sqlite::new("sqlite::memory:").await.unwrap();
// test ordering of results: we should choose the first, even though it happened longer ago.
new_history_item(&mut db, "curl").await.unwrap();
new_history_item(&mut db, "corburl").await.unwrap();
// if fuzzy reordering is on, it should come back in a more sensible order
let mut results = db.search(None, SearchMode::Fuzzy, "curl").await.unwrap();
assert_eq!(results.len(), 2);
let commands: Vec<&String> = results.iter().map(|a| &a.command).collect();
assert_eq!(commands, vec!["curl", "corburl"]);
results = db.search(None, SearchMode::Fuzzy, "xxxx").await.unwrap();
assert_eq!(results.len(), 0);
results = db.search(None, SearchMode::Fuzzy, "").await.unwrap();
assert_eq!(results.len(), 2);
}
}

View file

@ -8,7 +8,6 @@ use atuin_common::utils::uuid_v4;
use chrono::{TimeZone, Utc};
use directories::UserDirs;
use eyre::{eyre, Result};
use serde::Deserialize;
use super::{count_lines, Importer};
use crate::history::History;

View file

@ -11,5 +11,6 @@ pub mod database;
pub mod encryption;
pub mod history;
pub mod import;
pub mod ordering;
pub mod settings;
pub mod sync;

View file

@ -0,0 +1,31 @@
use super::history::History;
use super::settings::SearchMode;
use minspan::minspan;
pub fn reorder_fuzzy(mode: SearchMode, query: &str, res: Vec<History>) -> Vec<History> {
match mode {
SearchMode::Fuzzy => reorder(query, |x| &x.command, res),
_ => res,
}
}
fn reorder<F, A>(query: &str, f: F, res: Vec<A>) -> Vec<A>
where
F: Fn(&A) -> &String,
A: Clone,
{
let mut r = res.clone();
let qvec = &query.chars().collect();
r.sort_by_cached_key(|h| {
let (from, to) = match minspan::span(qvec, &(f(h).chars().collect())) {
Some(x) => x,
// this is a little unfortunate: when we are asked to match a query that is found nowhere,
// we don't want to return a None, as the comparison behaviour would put the worst matches
// at the front. therefore, we'll return a set of indices that are one larger than the longest
// possible legitimate match. This is meaningless except as a comparison.
None => (0, res.len()),
};
1 + to - from
});
r
}

View file

@ -51,7 +51,6 @@ pub struct Settings {
pub key_path: String,
pub session_path: String,
pub search_mode: SearchMode,
// This is automatically loaded when settings is created. Do not set in
// config! Keep secrets and settings apart.
pub session_token: String,