From e5df809dd29b0fc73cb39b9debd3180b174e6bf5 Mon Sep 17 00:00:00 2001 From: noyez Date: Fri, 20 May 2022 02:36:53 -0400 Subject: [PATCH] Noyez zsh histdb import (#393) * Attempting to implement zsh-histdb import Import compiles passes tests, but doesn't run b/c of async runtime. zsh-histdb uses sqlite, and sqlx-rs is async, but import code is sync. * More working on importing histdb * Rewriting tests and using `Vec` instead of `String` - Rewriting tests to eliminate depencency on local file system - Using `Vec` for command strings instead of `String` to eliminate the utf8 errors i was seeing previously. Seems to be working. * Running fmt Co-authored-by: Bradley Noyes --- atuin-client/Cargo.toml | 1 + atuin-client/src/import/mod.rs | 1 + atuin-client/src/import/zsh_histdb.rs | 219 ++++++++++++++++++++++++++ src/command/client/import.rs | 20 ++- 4 files changed, 237 insertions(+), 4 deletions(-) create mode 100644 atuin-client/src/import/zsh_histdb.rs diff --git a/atuin-client/Cargo.toml b/atuin-client/Cargo.toml index b1dd8be..62e7984 100644 --- a/atuin-client/Cargo.toml +++ b/atuin-client/Cargo.toml @@ -62,6 +62,7 @@ hex = { version = "0.4", optional = true } sha2 = { version = "0.10", optional = true } rmp-serde = { version = "1.0.0", optional = true } base64 = { version = "0.13.0", optional = true } +tokio = { version = "1", features = ["full"] } [dev-dependencies] tokio = { version = "1", features = ["full"] } diff --git a/atuin-client/src/import/mod.rs b/atuin-client/src/import/mod.rs index 07178d1..65c4f41 100644 --- a/atuin-client/src/import/mod.rs +++ b/atuin-client/src/import/mod.rs @@ -10,6 +10,7 @@ pub mod bash; pub mod fish; pub mod resh; pub mod zsh; +pub mod zsh_histdb; #[async_trait] pub trait Importer: Sized { diff --git a/atuin-client/src/import/zsh_histdb.rs b/atuin-client/src/import/zsh_histdb.rs new file mode 100644 index 0000000..34718f8 --- /dev/null +++ b/atuin-client/src/import/zsh_histdb.rs @@ -0,0 +1,219 @@ +// import old shell history from zsh-histdb! +// automatically hoover up all that we can find + +// As far as i can tell there are no version numbers in the histdb sqlite DB, so we're going based +// on the schema from 2022-05-01 +// +// I have run into some histories that will not import b/c of non UTF-8 characters. +// + +// +// An Example sqlite query for hsitdb data: +// +//id|session|command_id|place_id|exit_status|start_time|duration|id|argv|id|host|dir +// +// +// select +// history.id, +// history.start_time, +// places.host, +// places.dir, +// commands.argv +// from history +// left join commands on history.command_id = commands.rowid +// left join places on history.place_id = places.rowid ; +// +// CREATE TABLE history (id integer primary key autoincrement, +// session int, +// command_id int references commands (id), +// place_id int references places (id), +// exit_status int, +// start_time int, +// duration int); +// + +use std::path::{Path, PathBuf}; + +use async_trait::async_trait; +use chrono::{prelude::*, Utc}; +use directories::UserDirs; +use eyre::{eyre, Result}; +use sqlx::{sqlite::SqlitePool, Pool}; + +use super::Importer; +use crate::history::History; +use crate::import::Loader; + +#[derive(sqlx::FromRow, Debug)] +pub struct HistDbEntryCount { + pub count: usize, +} + +#[derive(sqlx::FromRow, Debug)] +pub struct HistDbEntry { + pub id: i64, + pub start_time: NaiveDateTime, + pub host: String, + pub dir: String, + pub argv: Vec, + pub duration: i64, +} + +impl From for History { + fn from(histdb_item: HistDbEntry) -> Self { + History::new( + DateTime::from_utc(histdb_item.start_time, Utc), // must assume UTC? + String::from_utf8(histdb_item.argv) + .unwrap_or_else(|_e| String::from("")) + .trim_end() + .to_string(), + histdb_item.dir, + 0, // assume 0, we have no way of knowing :( + histdb_item.duration, + None, + Some(histdb_item.host), + ) + } +} + +#[derive(Debug)] +pub struct ZshHistDb { + histdb: Vec, +} + +/// Read db at given file, return vector of entries. +async fn hist_from_db(dbpath: PathBuf) -> Result> { + let pool = SqlitePool::connect(dbpath.to_str().unwrap()).await?; + hist_from_db_conn(pool).await +} + +async fn hist_from_db_conn(pool: Pool) -> Result> { + let query = "select history.id,history.start_time,history.duration,places.host,places.dir,commands.argv from history left join commands on history.command_id = commands.rowid left join places on history.place_id = places.rowid order by history.start_time"; + let histdb_vec: Vec = sqlx::query_as::<_, HistDbEntry>(query) + .fetch_all(&pool) + .await?; + Ok(histdb_vec) +} + +impl ZshHistDb { + pub fn histpath_candidate() -> PathBuf { + // By default histdb database is `${HOME}/.histdb/zsh-history.db` + // This can be modified by ${HISTDB_FILE} + // + // if [[ -z ${HISTDB_FILE} ]]; then + // typeset -g HISTDB_FILE="${HOME}/.histdb/zsh-history.db" + let user_dirs = UserDirs::new().unwrap(); // should catch error here? + let home_dir = user_dirs.home_dir(); + std::env::var("HISTDB_FILE") + .as_ref() + .map(|x| Path::new(x).to_path_buf()) + .unwrap_or_else(|_err| home_dir.join(".histdb/zsh-history.db")) + } + pub fn histpath() -> Result { + let histdb_path = ZshHistDb::histpath_candidate(); + if histdb_path.exists() { + Ok(histdb_path) + } else { + Err(eyre!( + "Could not find history file. Try setting $HISTDB_FILE" + )) + } + } +} + +#[async_trait] +impl Importer for ZshHistDb { + // Not sure how this is used + const NAME: &'static str = "zsh_histdb"; + + /// Creates a new ZshHistDb and populates the history based on the pre-populated data + /// structure. + async fn new() -> Result { + let dbpath = ZshHistDb::histpath()?; + let histdb_entry_vec = hist_from_db(dbpath).await?; + Ok(Self { + histdb: histdb_entry_vec, + }) + } + async fn entries(&mut self) -> Result { + Ok(self.histdb.len()) + } + async fn load(self, h: &mut impl Loader) -> Result<()> { + for i in self.histdb { + h.push(i.into()).await?; + } + Ok(()) + } +} + +#[cfg(test)] +mod test { + + use super::*; + use sqlx::sqlite::SqlitePoolOptions; + use std::env; + #[tokio::test(flavor = "multi_thread")] + async fn test_env_vars() { + let test_env_db = "nonstd-zsh-history.db"; + let key = "HISTDB_FILE"; + env::set_var(key, test_env_db); + + // test the env got set + assert_eq!(env::var(key).unwrap(), test_env_db.to_string()); + + // test histdb returns the proper db from previous step + let histdb_path = ZshHistDb::histpath_candidate(); + assert_eq!(histdb_path.to_str().unwrap(), test_env_db); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_import() { + let pool: SqlitePool = SqlitePoolOptions::new() + .min_connections(2) + .connect(":memory:") + .await + .unwrap(); + + // sql dump directly from a test database. + let db_sql = r#" + PRAGMA foreign_keys=OFF; + BEGIN TRANSACTION; + CREATE TABLE commands (id integer primary key autoincrement, argv text, unique(argv) on conflict ignore); + INSERT INTO commands VALUES(1,'pwd'); + INSERT INTO commands VALUES(2,'curl google.com'); + INSERT INTO commands VALUES(3,'bash'); + CREATE TABLE places (id integer primary key autoincrement, host text, dir text, unique(host, dir) on conflict ignore); + INSERT INTO places VALUES(1,'mbp16.local','/home/noyez'); + CREATE TABLE history (id integer primary key autoincrement, + session int, + command_id int references commands (id), + place_id int references places (id), + exit_status int, + start_time int, + duration int); + INSERT INTO history VALUES(1,0,1,1,0,1651497918,1); + INSERT INTO history VALUES(2,0,2,1,0,1651497923,1); + INSERT INTO history VALUES(3,0,3,1,NULL,1651497930,NULL); + DELETE FROM sqlite_sequence; + INSERT INTO sqlite_sequence VALUES('commands',3); + INSERT INTO sqlite_sequence VALUES('places',3); + INSERT INTO sqlite_sequence VALUES('history',3); + CREATE INDEX hist_time on history(start_time); + CREATE INDEX place_dir on places(dir); + CREATE INDEX place_host on places(host); + CREATE INDEX history_command_place on history(command_id, place_id); + COMMIT; "#; + + sqlx::query(db_sql).execute(&pool).await.unwrap(); + + // test histdb iterator + let histdb_vec = hist_from_db_conn(pool).await.unwrap(); + let histdb = ZshHistDb { histdb: histdb_vec }; + + println!("h: {:#?}", histdb.histdb); + println!("counter: {:?}", histdb.histdb.len()); + for i in histdb.histdb { + println!("{:?}", i); + } + } +} diff --git a/src/command/client/import.rs b/src/command/client/import.rs index 580e4b0..60fd536 100644 --- a/src/command/client/import.rs +++ b/src/command/client/import.rs @@ -8,7 +8,9 @@ use indicatif::ProgressBar; use atuin_client::{ database::Database, history::History, - import::{bash::Bash, fish::Fish, resh::Resh, zsh::Zsh, Importer, Loader}, + import::{ + bash::Bash, fish::Fish, resh::Resh, zsh::Zsh, zsh_histdb::ZshHistDb, Importer, Loader, + }, }; #[derive(Parser)] @@ -19,6 +21,8 @@ pub enum Cmd { /// Import history from the zsh history file Zsh, + /// Import history from the zsh history file + ZshHistDb, /// Import history from the bash history file Bash, /// Import history from the resh history file @@ -42,10 +46,17 @@ impl Cmd { match self { Self::Auto => { let shell = env::var("SHELL").unwrap_or_else(|_| String::from("NO_SHELL")); - if shell.ends_with("/zsh") { - println!("Detected ZSH"); - import::(db).await + if ZshHistDb::histpath().is_ok() { + println!( + "Detected Zsh-HistDb, using :{}", + ZshHistDb::histpath().unwrap().to_str().unwrap() + ); + import::(db).await + } else { + println!("Detected ZSH"); + import::(db).await + } } else if shell.ends_with("/fish") { println!("Detected Fish"); import::(db).await @@ -59,6 +70,7 @@ impl Cmd { } Self::Zsh => import::(db).await, + Self::ZshHistDb => import::(db).await, Self::Bash => import::(db).await, Self::Resh => import::(db).await, Self::Fish => import::(db).await,