diff --git a/Cargo.lock b/Cargo.lock index 01e5338..5ef5093 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -116,6 +116,7 @@ dependencies = [ "minspan", "parse_duration", "rand 0.8.5", + "regex", "reqwest", "rmp-serde", "rust-crypto", diff --git a/atuin-client/Cargo.toml b/atuin-client/Cargo.toml index d6ad4ce..54c5127 100644 --- a/atuin-client/Cargo.toml +++ b/atuin-client/Cargo.toml @@ -47,3 +47,4 @@ sqlx = { version = "0.5", features = [ "sqlite", ] } minspan = "0.1.1" +regex = "1.5.4" diff --git a/atuin-client/src/database.rs b/atuin-client/src/database.rs index 3cb8947..3ca6171 100644 --- a/atuin-client/src/database.rs +++ b/atuin-client/src/database.rs @@ -7,6 +7,7 @@ use chrono::Utc; use eyre::Result; use itertools::Itertools; +use regex::Regex; use sqlx::sqlite::{ SqliteConnectOptions, SqliteJournalMode, SqlitePool, SqlitePoolOptions, SqliteRow, @@ -286,27 +287,89 @@ impl Database for Sqlite { let query = query.to_string().replace('*', "%"); // allow wildcard char let limit = limit.map_or("".to_owned(), |l| format!("limit {}", l)); - let query = match search_mode { - SearchMode::Prefix => query, - SearchMode::FullText => format!("%{}", query), - SearchMode::Fuzzy => query.split("").join("%"), + let (query_sql, query_params) = match search_mode { + SearchMode::Prefix => ("command like ?1".to_string(), vec![format!("{}%", query)]), + SearchMode::FullText => ("command like ?1".to_string(), vec![format!("%{}%", query)]), + SearchMode::Fuzzy => { + let split_regex = Regex::new(r" +").unwrap(); + let terms: Vec<&str> = split_regex.split(query.as_str()).collect(); + let mut query_sql = std::string::String::new(); + let mut query_params = Vec::with_capacity(terms.len()); + let mut was_or = false; + for (i, query_part) in terms.into_iter().enumerate() { + // TODO smart case mode could be made configurable like in fzf + let (operator, glob) = if query_part.contains(char::is_uppercase) { + ("glob", '*') + } else { + ("like", '%') + }; + let (is_inverse, query_part) = match query_part.strip_prefix('!') { + Some(stripped) => (true, stripped), + None => (false, query_part), + }; + match query_part { + "|" => { + if !was_or { + query_sql.push_str(" OR "); + was_or = true; + continue; + } else { + query_params.push(format!("{glob}|{glob}")); + } + } + exact_prefix if query_part.starts_with('^') => query_params.push(format!( + "{term}{glob}", + term = exact_prefix.strip_prefix('^').unwrap() + )), + exact_suffix if query_part.ends_with('$') => query_params.push(format!( + "{glob}{term}", + term = exact_suffix.strip_suffix('$').unwrap() + )), + exact if query_part.starts_with('\'') => query_params.push(format!( + "{glob}{term}{glob}", + term = exact.strip_prefix('\'').unwrap() + )), + exact if is_inverse => { + query_params.push(format!("{glob}{term}{glob}", term = exact)) + } + _ => { + query_params.push(query_part.split("").join(glob.to_string().as_str())) + } + } + if i > 0 && !was_or { + query_sql.push_str(" AND "); + } + if is_inverse { + query_sql.push_str("NOT "); + } + query_sql + .push_str(format!("command {} ?{}", operator, query_params.len()).as_str()); + was_or = false; + } + (query_sql, query_params) + } }; - let res = sqlx::query( - format!( - "select * from history h - where command like ?1 || '%' - group by command - having max(timestamp) - order by timestamp desc {}", - limit.clone() + let res = query_params + .iter() + .fold( + sqlx::query( + format!( + "select * from history h + where {} + group by command + having max(timestamp) + order by timestamp desc {}", + query_sql.as_str(), + limit.clone() + ) + .as_str(), + ), + |query, query_param| query.bind(query_param), ) - .as_str(), - ) - .bind(query) - .map(Self::query_history) - .fetch_all(&self.pool) - .await?; + .map(Self::query_history) + .fetch_all(&self.pool) + .await?; Ok(ordering::reorder_fuzzy(search_mode, orig_query, res)) } @@ -326,6 +389,36 @@ mod test { use super::*; use std::time::{Duration, Instant}; + async fn assert_search_eq<'a>( + db: &impl Database, + mode: SearchMode, + query: &str, + expected: usize, + ) -> Result> { + let results = db.search(None, mode, query).await?; + assert_eq!( + results.len(), + expected, + "query \"{}\", commands: {:?}", + query, + results.iter().map(|a| &a.command).collect::>() + ); + Ok(results) + } + + async fn assert_search_commands( + db: &impl Database, + mode: SearchMode, + query: &str, + expected_commands: Vec<&str>, + ) { + let results = assert_search_eq(db, mode, query, expected_commands.len()) + .await + .unwrap(); + let commands: Vec<&str> = results.iter().map(|a| a.command.as_str()).collect(); + assert_eq!(commands, expected_commands); + } + async fn new_history_item(db: &mut impl Database, cmd: &str) -> Result<()> { let history = History::new( chrono::Utc::now(), @@ -344,14 +437,15 @@ mod test { let mut db = Sqlite::new("sqlite::memory:").await.unwrap(); new_history_item(&mut db, "ls /home/ellie").await.unwrap(); - let mut results = db.search(None, SearchMode::Prefix, "ls").await.unwrap(); - assert_eq!(results.len(), 1); - - results = db.search(None, SearchMode::Prefix, "/home").await.unwrap(); - assert_eq!(results.len(), 0); - - results = db.search(None, SearchMode::Prefix, "ls ").await.unwrap(); - assert_eq!(results.len(), 0); + assert_search_eq(&db, SearchMode::Prefix, "ls", 1) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Prefix, "/home", 0) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Prefix, "ls ", 0) + .await + .unwrap(); } #[tokio::test(flavor = "multi_thread")] @@ -359,17 +453,15 @@ mod test { let mut db = Sqlite::new("sqlite::memory:").await.unwrap(); new_history_item(&mut db, "ls /home/ellie").await.unwrap(); - let mut results = db.search(None, SearchMode::FullText, "ls").await.unwrap(); - assert_eq!(results.len(), 1); - - results = db - .search(None, SearchMode::FullText, "/home") + assert_search_eq(&db, SearchMode::FullText, "ls", 1) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::FullText, "/home", 1) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::FullText, "ls ", 0) .await .unwrap(); - assert_eq!(results.len(), 1); - - results = db.search(None, SearchMode::FullText, "ls ").await.unwrap(); - assert_eq!(results.len(), 0); } #[tokio::test(flavor = "multi_thread")] @@ -377,34 +469,77 @@ mod test { let mut db = Sqlite::new("sqlite::memory:").await.unwrap(); new_history_item(&mut db, "ls /home/ellie").await.unwrap(); new_history_item(&mut db, "ls /home/frank").await.unwrap(); - new_history_item(&mut db, "cd /home/ellie").await.unwrap(); + new_history_item(&mut db, "cd /home/Ellie").await.unwrap(); new_history_item(&mut db, "/home/ellie/.bin/rustup") .await .unwrap(); - let mut results = db.search(None, SearchMode::Fuzzy, "ls /").await.unwrap(); - assert_eq!(results.len(), 2); - - results = db.search(None, SearchMode::Fuzzy, "l/h/").await.unwrap(); - assert_eq!(results.len(), 2); - - results = db.search(None, SearchMode::Fuzzy, "/h/e").await.unwrap(); - assert_eq!(results.len(), 3); - - results = db.search(None, SearchMode::Fuzzy, "/hmoe/").await.unwrap(); - assert_eq!(results.len(), 0); - - results = db - .search(None, SearchMode::Fuzzy, "ellie/home") + assert_search_eq(&db, SearchMode::Fuzzy, "ls /", 3) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "ls/", 2) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "l/h/", 2) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "/h/e", 3) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "/hmoe/", 0) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "ellie/home", 0) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "lsellie", 1) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, " ", 4) .await .unwrap(); - assert_eq!(results.len(), 0); - results = db.search(None, SearchMode::Fuzzy, "lsellie").await.unwrap(); - assert_eq!(results.len(), 1); + // single term operators + assert_search_eq(&db, SearchMode::Fuzzy, "^ls", 2) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "'ls", 2) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "ellie$", 2) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "!^ls", 2) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "!ellie", 1) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "!ellie$", 2) + .await + .unwrap(); - results = db.search(None, SearchMode::Fuzzy, " ").await.unwrap(); - assert_eq!(results.len(), 3); + // multiple terms + assert_search_eq(&db, SearchMode::Fuzzy, "ls !ellie", 1) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "^ls !e$", 1) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "home !^ls", 2) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "'frank | 'rustup", 2) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "'frank | 'rustup 'ls", 1) + .await + .unwrap(); + + // case matching + assert_search_eq(&db, SearchMode::Fuzzy, "Ellie", 1) + .await + .unwrap(); } #[tokio::test(flavor = "multi_thread")] @@ -414,17 +549,16 @@ mod test { new_history_item(&mut db, "curl").await.unwrap(); new_history_item(&mut db, "corburl").await.unwrap(); + // if fuzzy reordering is on, it should come back in a more sensible order - let mut results = db.search(None, SearchMode::Fuzzy, "curl").await.unwrap(); - assert_eq!(results.len(), 2); - let commands: Vec<&String> = results.iter().map(|a| &a.command).collect(); - assert_eq!(commands, vec!["curl", "corburl"]); + assert_search_commands(&db, SearchMode::Fuzzy, "curl", vec!["curl", "corburl"]).await; - results = db.search(None, SearchMode::Fuzzy, "xxxx").await.unwrap(); - assert_eq!(results.len(), 0); - - results = db.search(None, SearchMode::Fuzzy, "").await.unwrap(); - assert_eq!(results.len(), 2); + assert_search_eq(&db, SearchMode::Fuzzy, "xxxx", 0) + .await + .unwrap(); + assert_search_eq(&db, SearchMode::Fuzzy, "", 2) + .await + .unwrap(); } #[tokio::test(flavor = "multi_thread")] diff --git a/atuin-client/src/ordering.rs b/atuin-client/src/ordering.rs index b6051d1..0bb12c6 100644 --- a/atuin-client/src/ordering.rs +++ b/atuin-client/src/ordering.rs @@ -17,6 +17,7 @@ where let mut r = res.clone(); let qvec = &query.chars().collect(); r.sort_by_cached_key(|h| { + // TODO for fzf search we should sum up scores for each matched term let (from, to) = match minspan::span(qvec, &(f(h).chars().collect())) { Some(x) => x, // this is a little unfortunate: when we are asked to match a query that is found nowhere, diff --git a/docs/config.md b/docs/config.md index 414146f..405e3b5 100644 --- a/docs/config.md +++ b/docs/config.md @@ -97,7 +97,8 @@ key = "~/.atuin-session" ### `search_mode` Which search mode to use. Atuin supports "prefix", full text and "fuzzy" search -modes. The prefix search for "query\*", fulltext "\*query\*", and fuzzy "\*q\*u\*e\*r\*y\*" +modes. The prefix searches for "query\*", fulltext "\*query\*", and fuzzy applies +the search syntax [described below](#fuzzy-search-syntax). Defaults to "prefix" @@ -105,6 +106,29 @@ Defaults to "prefix" search_mode = "fulltext" ``` +#### `fuzzy` search syntax + +The "fuzzy" search syntax is based on the +[fzf search syntax](https://github.com/junegunn/fzf#search-syntax). + +| Token | Match type | Description | +| --------- | -------------------------- | ------------------------------------ | +| `sbtrkt` | fuzzy-match | Items that match `sbtrkt` | +| `'wild` | exact-match (quoted) | Items that include `wild` | +| `^music` | prefix-exact-match | Items that start with `music` | +| `.mp3$` | suffix-exact-match | Items that end with `.mp3` | +| `!fire` | inverse-exact-match | Items that do not include `fire` | +| `!^music` | inverse-prefix-exact-match | Items that do not start with `music` | +| `!.mp3$` | inverse-suffix-exact-match | Items that do not end with `.mp3` | + +A single bar character term acts as an OR operator. For example, the following +query matches entries that start with `core` and end with either `go`, `rb`, +or `py`. + +``` +^core go$ | rb$ | py$ +``` + ## Server config `// TODO`