From 2024884f49334e7eaf64adc425da77b773204b42 Mon Sep 17 00:00:00 2001
From: Mark Wotton <mwotton@gmail.com>
Date: Thu, 9 Sep 2021 17:46:46 +0700
Subject: [PATCH] Reordered fuzzy search (#179)

* add test demonstrating problem

* add a reordered fuzzy-search mode that presents shorter matches first,
rather than using strict chronological ordering.

* fix warnings, refactor interface to minspan slightly
---
 Cargo.lock                      |  7 +++++++
 atuin-client/Cargo.toml         |  1 +
 atuin-client/src/database.rs    | 24 +++++++++++++++++++++++-
 atuin-client/src/import/resh.rs |  1 -
 atuin-client/src/lib.rs         |  1 +
 atuin-client/src/ordering.rs    | 31 +++++++++++++++++++++++++++++++
 atuin-client/src/settings.rs    |  1 -
 7 files changed, 63 insertions(+), 3 deletions(-)
 create mode 100644 atuin-client/src/ordering.rs

diff --git a/Cargo.lock b/Cargo.lock
index abb5959..8a25e3d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -138,6 +138,7 @@ dependencies = [
  "indicatif",
  "itertools",
  "log",
+ "minspan",
  "parse_duration",
  "rand 0.8.3",
  "reqwest",
@@ -1168,6 +1169,12 @@ dependencies = [
  "unicase",
 ]
 
+[[package]]
+name = "minspan"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1269a17ac308ae0b906ec1b0ff8062fd0c82f18cc2956faa367302ec3380f4e8"
+
 [[package]]
 name = "mio"
 version = "0.7.11"
diff --git a/atuin-client/Cargo.toml b/atuin-client/Cargo.toml
index b4c9c13..300ce26 100644
--- a/atuin-client/Cargo.toml
+++ b/atuin-client/Cargo.toml
@@ -40,6 +40,7 @@ humantime = "2.1.0"
 itertools = "0.10.0"
 shellexpand = "2"
 sqlx = { version = "0.5", features = [ "runtime-tokio-rustls", "uuid", "chrono", "sqlite" ] }
+minspan = "0.1.1"
 
 [dev-dependencies]
 tokio-test = "*"
diff --git a/atuin-client/src/database.rs b/atuin-client/src/database.rs
index 6a70ae3..8dff15f 100644
--- a/atuin-client/src/database.rs
+++ b/atuin-client/src/database.rs
@@ -14,6 +14,7 @@ use sqlx::sqlite::{
 use sqlx::Row;
 
 use super::history::History;
+use super::ordering;
 use super::settings::SearchMode;
 
 #[async_trait]
@@ -281,6 +282,7 @@ impl Database for Sqlite {
         search_mode: SearchMode,
         query: &str,
     ) -> Result<Vec<History>> {
+        let orig_query = query;
         let query = query.to_string().replace("*", "%"); // allow wildcard char
         let limit = limit.map_or("".to_owned(), |l| format!("limit {}", l));
 
@@ -308,7 +310,7 @@ impl Database for Sqlite {
         .fetch_all(&self.pool)
         .await?;
 
-        Ok(res)
+        Ok(ordering::reorder_fuzzy(search_mode, orig_query, res))
     }
 
     async fn query_history(&self, query: &str) -> Result<Vec<History>> {
@@ -405,4 +407,24 @@ mod test {
         results = db.search(None, SearchMode::Fuzzy, " ").await.unwrap();
         assert_eq!(results.len(), 3);
     }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_search_reordered_fuzzy() {
+        let mut db = Sqlite::new("sqlite::memory:").await.unwrap();
+        // test ordering of results: we should choose the first, even though it happened longer ago.
+
+        new_history_item(&mut db, "curl").await.unwrap();
+        new_history_item(&mut db, "corburl").await.unwrap();
+        // if fuzzy reordering is on, it should come back in a more sensible order
+        let mut results = db.search(None, SearchMode::Fuzzy, "curl").await.unwrap();
+        assert_eq!(results.len(), 2);
+        let commands: Vec<&String> = results.iter().map(|a| &a.command).collect();
+        assert_eq!(commands, vec!["curl", "corburl"]);
+
+        results = db.search(None, SearchMode::Fuzzy, "xxxx").await.unwrap();
+        assert_eq!(results.len(), 0);
+
+        results = db.search(None, SearchMode::Fuzzy, "").await.unwrap();
+        assert_eq!(results.len(), 2);
+    }
 }
diff --git a/atuin-client/src/import/resh.rs b/atuin-client/src/import/resh.rs
index efe5bb5..fa55300 100644
--- a/atuin-client/src/import/resh.rs
+++ b/atuin-client/src/import/resh.rs
@@ -8,7 +8,6 @@ use atuin_common::utils::uuid_v4;
 use chrono::{TimeZone, Utc};
 use directories::UserDirs;
 use eyre::{eyre, Result};
-use serde::Deserialize;
 
 use super::{count_lines, Importer};
 use crate::history::History;
diff --git a/atuin-client/src/lib.rs b/atuin-client/src/lib.rs
index 82f19b5..fa01c17 100644
--- a/atuin-client/src/lib.rs
+++ b/atuin-client/src/lib.rs
@@ -11,5 +11,6 @@ pub mod database;
 pub mod encryption;
 pub mod history;
 pub mod import;
+pub mod ordering;
 pub mod settings;
 pub mod sync;
diff --git a/atuin-client/src/ordering.rs b/atuin-client/src/ordering.rs
new file mode 100644
index 0000000..b6051d1
--- /dev/null
+++ b/atuin-client/src/ordering.rs
@@ -0,0 +1,31 @@
+use super::history::History;
+use super::settings::SearchMode;
+use minspan::minspan;
+
+pub fn reorder_fuzzy(mode: SearchMode, query: &str, res: Vec<History>) -> Vec<History> {
+    match mode {
+        SearchMode::Fuzzy => reorder(query, |x| &x.command, res),
+        _ => res,
+    }
+}
+
+fn reorder<F, A>(query: &str, f: F, res: Vec<A>) -> Vec<A>
+where
+    F: Fn(&A) -> &String,
+    A: Clone,
+{
+    let mut r = res.clone();
+    let qvec = &query.chars().collect();
+    r.sort_by_cached_key(|h| {
+        let (from, to) = match minspan::span(qvec, &(f(h).chars().collect())) {
+            Some(x) => x,
+            // this is a little unfortunate: when we are asked to match a query that is found nowhere,
+            // we don't want to return a None, as the comparison behaviour would put the worst matches
+            // at the front. therefore, we'll return a set of indices that are one larger than the longest
+            // possible legitimate match. This is meaningless except as a comparison.
+            None => (0, res.len()),
+        };
+        1 + to - from
+    });
+    r
+}
diff --git a/atuin-client/src/settings.rs b/atuin-client/src/settings.rs
index 0cb845c..407013f 100644
--- a/atuin-client/src/settings.rs
+++ b/atuin-client/src/settings.rs
@@ -51,7 +51,6 @@ pub struct Settings {
     pub key_path: String,
     pub session_path: String,
     pub search_mode: SearchMode,
-
     // This is automatically loaded when settings is created. Do not set in
     // config! Keep secrets and settings apart.
     pub session_token: String,