Improve fuzzy search (#279)

* Add SearchMode fzf.

Add a new search mode "fzf" that tries to mimic the search syntax of
https://github.com/junegunn/fzf#search-syntax
This search mode splits the query into terms where each term is matched
individually. Terms can have operators like prefix, suffix, exact match
only and can be inverted. Additionally, smart-case matching is
performed: if a term contains a non-lowercase letter the match will be
case-sensitive.

* PR feedback.

 - Use SearchMode::Fuzzy instead of SearchMode::Fzf
 - update docs
 - re-order tests so previous fuzzy tests come first, add more tests for each operator

* PR comments: remove named arguments, match expression

* PR comments: macro -> async func
This commit is contained in:
Patrick 2022-03-18 12:37:27 +01:00 committed by GitHub
parent 7cde55a751
commit fae118a46b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 226 additions and 65 deletions

1
Cargo.lock generated
View file

@ -116,6 +116,7 @@ dependencies = [
"minspan", "minspan",
"parse_duration", "parse_duration",
"rand 0.8.5", "rand 0.8.5",
"regex",
"reqwest", "reqwest",
"rmp-serde", "rmp-serde",
"rust-crypto", "rust-crypto",

View file

@ -47,3 +47,4 @@ sqlx = { version = "0.5", features = [
"sqlite", "sqlite",
] } ] }
minspan = "0.1.1" minspan = "0.1.1"
regex = "1.5.4"

View file

@ -7,6 +7,7 @@ use chrono::Utc;
use eyre::Result; use eyre::Result;
use itertools::Itertools; use itertools::Itertools;
use regex::Regex;
use sqlx::sqlite::{ use sqlx::sqlite::{
SqliteConnectOptions, SqliteJournalMode, SqlitePool, SqlitePoolOptions, SqliteRow, SqliteConnectOptions, SqliteJournalMode, SqlitePool, SqlitePoolOptions, SqliteRow,
@ -286,24 +287,86 @@ impl Database for Sqlite {
let query = query.to_string().replace('*', "%"); // allow wildcard char let query = query.to_string().replace('*', "%"); // allow wildcard char
let limit = limit.map_or("".to_owned(), |l| format!("limit {}", l)); let limit = limit.map_or("".to_owned(), |l| format!("limit {}", l));
let query = match search_mode { let (query_sql, query_params) = match search_mode {
SearchMode::Prefix => query, SearchMode::Prefix => ("command like ?1".to_string(), vec![format!("{}%", query)]),
SearchMode::FullText => format!("%{}", query), SearchMode::FullText => ("command like ?1".to_string(), vec![format!("%{}%", query)]),
SearchMode::Fuzzy => query.split("").join("%"), SearchMode::Fuzzy => {
let split_regex = Regex::new(r" +").unwrap();
let terms: Vec<&str> = split_regex.split(query.as_str()).collect();
let mut query_sql = std::string::String::new();
let mut query_params = Vec::with_capacity(terms.len());
let mut was_or = false;
for (i, query_part) in terms.into_iter().enumerate() {
// TODO smart case mode could be made configurable like in fzf
let (operator, glob) = if query_part.contains(char::is_uppercase) {
("glob", '*')
} else {
("like", '%')
};
let (is_inverse, query_part) = match query_part.strip_prefix('!') {
Some(stripped) => (true, stripped),
None => (false, query_part),
};
match query_part {
"|" => {
if !was_or {
query_sql.push_str(" OR ");
was_or = true;
continue;
} else {
query_params.push(format!("{glob}|{glob}"));
}
}
exact_prefix if query_part.starts_with('^') => query_params.push(format!(
"{term}{glob}",
term = exact_prefix.strip_prefix('^').unwrap()
)),
exact_suffix if query_part.ends_with('$') => query_params.push(format!(
"{glob}{term}",
term = exact_suffix.strip_suffix('$').unwrap()
)),
exact if query_part.starts_with('\'') => query_params.push(format!(
"{glob}{term}{glob}",
term = exact.strip_prefix('\'').unwrap()
)),
exact if is_inverse => {
query_params.push(format!("{glob}{term}{glob}", term = exact))
}
_ => {
query_params.push(query_part.split("").join(glob.to_string().as_str()))
}
}
if i > 0 && !was_or {
query_sql.push_str(" AND ");
}
if is_inverse {
query_sql.push_str("NOT ");
}
query_sql
.push_str(format!("command {} ?{}", operator, query_params.len()).as_str());
was_or = false;
}
(query_sql, query_params)
}
}; };
let res = sqlx::query( let res = query_params
.iter()
.fold(
sqlx::query(
format!( format!(
"select * from history h "select * from history h
where command like ?1 || '%' where {}
group by command group by command
having max(timestamp) having max(timestamp)
order by timestamp desc {}", order by timestamp desc {}",
query_sql.as_str(),
limit.clone() limit.clone()
) )
.as_str(), .as_str(),
),
|query, query_param| query.bind(query_param),
) )
.bind(query)
.map(Self::query_history) .map(Self::query_history)
.fetch_all(&self.pool) .fetch_all(&self.pool)
.await?; .await?;
@ -326,6 +389,36 @@ mod test {
use super::*; use super::*;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
async fn assert_search_eq<'a>(
db: &impl Database,
mode: SearchMode,
query: &str,
expected: usize,
) -> Result<Vec<History>> {
let results = db.search(None, mode, query).await?;
assert_eq!(
results.len(),
expected,
"query \"{}\", commands: {:?}",
query,
results.iter().map(|a| &a.command).collect::<Vec<&String>>()
);
Ok(results)
}
async fn assert_search_commands(
db: &impl Database,
mode: SearchMode,
query: &str,
expected_commands: Vec<&str>,
) {
let results = assert_search_eq(db, mode, query, expected_commands.len())
.await
.unwrap();
let commands: Vec<&str> = results.iter().map(|a| a.command.as_str()).collect();
assert_eq!(commands, expected_commands);
}
async fn new_history_item(db: &mut impl Database, cmd: &str) -> Result<()> { async fn new_history_item(db: &mut impl Database, cmd: &str) -> Result<()> {
let history = History::new( let history = History::new(
chrono::Utc::now(), chrono::Utc::now(),
@ -344,14 +437,15 @@ mod test {
let mut db = Sqlite::new("sqlite::memory:").await.unwrap(); let mut db = Sqlite::new("sqlite::memory:").await.unwrap();
new_history_item(&mut db, "ls /home/ellie").await.unwrap(); new_history_item(&mut db, "ls /home/ellie").await.unwrap();
let mut results = db.search(None, SearchMode::Prefix, "ls").await.unwrap(); assert_search_eq(&db, SearchMode::Prefix, "ls", 1)
assert_eq!(results.len(), 1); .await
.unwrap();
results = db.search(None, SearchMode::Prefix, "/home").await.unwrap(); assert_search_eq(&db, SearchMode::Prefix, "/home", 0)
assert_eq!(results.len(), 0); .await
.unwrap();
results = db.search(None, SearchMode::Prefix, "ls ").await.unwrap(); assert_search_eq(&db, SearchMode::Prefix, "ls ", 0)
assert_eq!(results.len(), 0); .await
.unwrap();
} }
#[tokio::test(flavor = "multi_thread")] #[tokio::test(flavor = "multi_thread")]
@ -359,17 +453,15 @@ mod test {
let mut db = Sqlite::new("sqlite::memory:").await.unwrap(); let mut db = Sqlite::new("sqlite::memory:").await.unwrap();
new_history_item(&mut db, "ls /home/ellie").await.unwrap(); new_history_item(&mut db, "ls /home/ellie").await.unwrap();
let mut results = db.search(None, SearchMode::FullText, "ls").await.unwrap(); assert_search_eq(&db, SearchMode::FullText, "ls", 1)
assert_eq!(results.len(), 1); .await
.unwrap();
results = db assert_search_eq(&db, SearchMode::FullText, "/home", 1)
.search(None, SearchMode::FullText, "/home") .await
.unwrap();
assert_search_eq(&db, SearchMode::FullText, "ls ", 0)
.await .await
.unwrap(); .unwrap();
assert_eq!(results.len(), 1);
results = db.search(None, SearchMode::FullText, "ls ").await.unwrap();
assert_eq!(results.len(), 0);
} }
#[tokio::test(flavor = "multi_thread")] #[tokio::test(flavor = "multi_thread")]
@ -377,34 +469,77 @@ mod test {
let mut db = Sqlite::new("sqlite::memory:").await.unwrap(); let mut db = Sqlite::new("sqlite::memory:").await.unwrap();
new_history_item(&mut db, "ls /home/ellie").await.unwrap(); new_history_item(&mut db, "ls /home/ellie").await.unwrap();
new_history_item(&mut db, "ls /home/frank").await.unwrap(); new_history_item(&mut db, "ls /home/frank").await.unwrap();
new_history_item(&mut db, "cd /home/ellie").await.unwrap(); new_history_item(&mut db, "cd /home/Ellie").await.unwrap();
new_history_item(&mut db, "/home/ellie/.bin/rustup") new_history_item(&mut db, "/home/ellie/.bin/rustup")
.await .await
.unwrap(); .unwrap();
let mut results = db.search(None, SearchMode::Fuzzy, "ls /").await.unwrap(); assert_search_eq(&db, SearchMode::Fuzzy, "ls /", 3)
assert_eq!(results.len(), 2); .await
.unwrap();
results = db.search(None, SearchMode::Fuzzy, "l/h/").await.unwrap(); assert_search_eq(&db, SearchMode::Fuzzy, "ls/", 2)
assert_eq!(results.len(), 2); .await
.unwrap();
results = db.search(None, SearchMode::Fuzzy, "/h/e").await.unwrap(); assert_search_eq(&db, SearchMode::Fuzzy, "l/h/", 2)
assert_eq!(results.len(), 3); .await
.unwrap();
results = db.search(None, SearchMode::Fuzzy, "/hmoe/").await.unwrap(); assert_search_eq(&db, SearchMode::Fuzzy, "/h/e", 3)
assert_eq!(results.len(), 0); .await
.unwrap();
results = db assert_search_eq(&db, SearchMode::Fuzzy, "/hmoe/", 0)
.search(None, SearchMode::Fuzzy, "ellie/home") .await
.unwrap();
assert_search_eq(&db, SearchMode::Fuzzy, "ellie/home", 0)
.await
.unwrap();
assert_search_eq(&db, SearchMode::Fuzzy, "lsellie", 1)
.await
.unwrap();
assert_search_eq(&db, SearchMode::Fuzzy, " ", 4)
.await .await
.unwrap(); .unwrap();
assert_eq!(results.len(), 0);
results = db.search(None, SearchMode::Fuzzy, "lsellie").await.unwrap(); // single term operators
assert_eq!(results.len(), 1); assert_search_eq(&db, SearchMode::Fuzzy, "^ls", 2)
.await
.unwrap();
assert_search_eq(&db, SearchMode::Fuzzy, "'ls", 2)
.await
.unwrap();
assert_search_eq(&db, SearchMode::Fuzzy, "ellie$", 2)
.await
.unwrap();
assert_search_eq(&db, SearchMode::Fuzzy, "!^ls", 2)
.await
.unwrap();
assert_search_eq(&db, SearchMode::Fuzzy, "!ellie", 1)
.await
.unwrap();
assert_search_eq(&db, SearchMode::Fuzzy, "!ellie$", 2)
.await
.unwrap();
results = db.search(None, SearchMode::Fuzzy, " ").await.unwrap(); // multiple terms
assert_eq!(results.len(), 3); assert_search_eq(&db, SearchMode::Fuzzy, "ls !ellie", 1)
.await
.unwrap();
assert_search_eq(&db, SearchMode::Fuzzy, "^ls !e$", 1)
.await
.unwrap();
assert_search_eq(&db, SearchMode::Fuzzy, "home !^ls", 2)
.await
.unwrap();
assert_search_eq(&db, SearchMode::Fuzzy, "'frank | 'rustup", 2)
.await
.unwrap();
assert_search_eq(&db, SearchMode::Fuzzy, "'frank | 'rustup 'ls", 1)
.await
.unwrap();
// case matching
assert_search_eq(&db, SearchMode::Fuzzy, "Ellie", 1)
.await
.unwrap();
} }
#[tokio::test(flavor = "multi_thread")] #[tokio::test(flavor = "multi_thread")]
@ -414,17 +549,16 @@ mod test {
new_history_item(&mut db, "curl").await.unwrap(); new_history_item(&mut db, "curl").await.unwrap();
new_history_item(&mut db, "corburl").await.unwrap(); new_history_item(&mut db, "corburl").await.unwrap();
// if fuzzy reordering is on, it should come back in a more sensible order // if fuzzy reordering is on, it should come back in a more sensible order
let mut results = db.search(None, SearchMode::Fuzzy, "curl").await.unwrap(); assert_search_commands(&db, SearchMode::Fuzzy, "curl", vec!["curl", "corburl"]).await;
assert_eq!(results.len(), 2);
let commands: Vec<&String> = results.iter().map(|a| &a.command).collect();
assert_eq!(commands, vec!["curl", "corburl"]);
results = db.search(None, SearchMode::Fuzzy, "xxxx").await.unwrap(); assert_search_eq(&db, SearchMode::Fuzzy, "xxxx", 0)
assert_eq!(results.len(), 0); .await
.unwrap();
results = db.search(None, SearchMode::Fuzzy, "").await.unwrap(); assert_search_eq(&db, SearchMode::Fuzzy, "", 2)
assert_eq!(results.len(), 2); .await
.unwrap();
} }
#[tokio::test(flavor = "multi_thread")] #[tokio::test(flavor = "multi_thread")]

View file

@ -17,6 +17,7 @@ where
let mut r = res.clone(); let mut r = res.clone();
let qvec = &query.chars().collect(); let qvec = &query.chars().collect();
r.sort_by_cached_key(|h| { r.sort_by_cached_key(|h| {
// TODO for fzf search we should sum up scores for each matched term
let (from, to) = match minspan::span(qvec, &(f(h).chars().collect())) { let (from, to) = match minspan::span(qvec, &(f(h).chars().collect())) {
Some(x) => x, Some(x) => x,
// this is a little unfortunate: when we are asked to match a query that is found nowhere, // this is a little unfortunate: when we are asked to match a query that is found nowhere,

View file

@ -97,7 +97,8 @@ key = "~/.atuin-session"
### `search_mode` ### `search_mode`
Which search mode to use. Atuin supports "prefix", full text and "fuzzy" search Which search mode to use. Atuin supports "prefix", full text and "fuzzy" search
modes. The prefix search for "query\*", fulltext "\*query\*", and fuzzy "\*q\*u\*e\*r\*y\*" modes. The prefix searches for "query\*", fulltext "\*query\*", and fuzzy applies
the search syntax [described below](#fuzzy-search-syntax).
Defaults to "prefix" Defaults to "prefix"
@ -105,6 +106,29 @@ Defaults to "prefix"
search_mode = "fulltext" search_mode = "fulltext"
``` ```
#### `fuzzy` search syntax
The "fuzzy" search syntax is based on the
[fzf search syntax](https://github.com/junegunn/fzf#search-syntax).
| Token | Match type | Description |
| --------- | -------------------------- | ------------------------------------ |
| `sbtrkt` | fuzzy-match | Items that match `sbtrkt` |
| `'wild` | exact-match (quoted) | Items that include `wild` |
| `^music` | prefix-exact-match | Items that start with `music` |
| `.mp3$` | suffix-exact-match | Items that end with `.mp3` |
| `!fire` | inverse-exact-match | Items that do not include `fire` |
| `!^music` | inverse-prefix-exact-match | Items that do not start with `music` |
| `!.mp3$` | inverse-suffix-exact-match | Items that do not end with `.mp3` |
A single bar character term acts as an OR operator. For example, the following
query matches entries that start with `core` and end with either `go`, `rb`,
or `py`.
```
^core go$ | rb$ | py$
```
## Server config ## Server config
`// TODO` `// TODO`