Noyez zsh histdb import (#393)

* Attempting to implement zsh-histdb import

Import compiles passes tests, but doesn't run b/c of async runtime.
zsh-histdb uses sqlite, and sqlx-rs is async, but import code is sync.

* More working on importing histdb

* Rewriting tests and using `Vec<u8>` instead of `String`

 - Rewriting tests to eliminate depencency on local file system
 - Using `Vec<u8>` for command strings instead of `String` to eliminate
   the utf8 errors i was seeing previously. Seems to be working.

* Running fmt

Co-authored-by: Bradley Noyes <b@noyes.dev>
This commit is contained in:
noyez 2022-05-20 02:36:53 -04:00 committed by GitHub
parent b08e254343
commit e5df809dd2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 237 additions and 4 deletions

View file

@ -62,6 +62,7 @@ hex = { version = "0.4", optional = true }
sha2 = { version = "0.10", optional = true } sha2 = { version = "0.10", optional = true }
rmp-serde = { version = "1.0.0", optional = true } rmp-serde = { version = "1.0.0", optional = true }
base64 = { version = "0.13.0", optional = true } base64 = { version = "0.13.0", optional = true }
tokio = { version = "1", features = ["full"] }
[dev-dependencies] [dev-dependencies]
tokio = { version = "1", features = ["full"] } tokio = { version = "1", features = ["full"] }

View file

@ -10,6 +10,7 @@ pub mod bash;
pub mod fish; pub mod fish;
pub mod resh; pub mod resh;
pub mod zsh; pub mod zsh;
pub mod zsh_histdb;
#[async_trait] #[async_trait]
pub trait Importer: Sized { pub trait Importer: Sized {

View file

@ -0,0 +1,219 @@
// import old shell history from zsh-histdb!
// automatically hoover up all that we can find
// As far as i can tell there are no version numbers in the histdb sqlite DB, so we're going based
// on the schema from 2022-05-01
//
// I have run into some histories that will not import b/c of non UTF-8 characters.
//
//
// An Example sqlite query for hsitdb data:
//
//id|session|command_id|place_id|exit_status|start_time|duration|id|argv|id|host|dir
//
//
// select
// history.id,
// history.start_time,
// places.host,
// places.dir,
// commands.argv
// from history
// left join commands on history.command_id = commands.rowid
// left join places on history.place_id = places.rowid ;
//
// CREATE TABLE history (id integer primary key autoincrement,
// session int,
// command_id int references commands (id),
// place_id int references places (id),
// exit_status int,
// start_time int,
// duration int);
//
use std::path::{Path, PathBuf};
use async_trait::async_trait;
use chrono::{prelude::*, Utc};
use directories::UserDirs;
use eyre::{eyre, Result};
use sqlx::{sqlite::SqlitePool, Pool};
use super::Importer;
use crate::history::History;
use crate::import::Loader;
#[derive(sqlx::FromRow, Debug)]
pub struct HistDbEntryCount {
pub count: usize,
}
#[derive(sqlx::FromRow, Debug)]
pub struct HistDbEntry {
pub id: i64,
pub start_time: NaiveDateTime,
pub host: String,
pub dir: String,
pub argv: Vec<u8>,
pub duration: i64,
}
impl From<HistDbEntry> for History {
fn from(histdb_item: HistDbEntry) -> Self {
History::new(
DateTime::from_utc(histdb_item.start_time, Utc), // must assume UTC?
String::from_utf8(histdb_item.argv)
.unwrap_or_else(|_e| String::from(""))
.trim_end()
.to_string(),
histdb_item.dir,
0, // assume 0, we have no way of knowing :(
histdb_item.duration,
None,
Some(histdb_item.host),
)
}
}
#[derive(Debug)]
pub struct ZshHistDb {
histdb: Vec<HistDbEntry>,
}
/// Read db at given file, return vector of entries.
async fn hist_from_db(dbpath: PathBuf) -> Result<Vec<HistDbEntry>> {
let pool = SqlitePool::connect(dbpath.to_str().unwrap()).await?;
hist_from_db_conn(pool).await
}
async fn hist_from_db_conn(pool: Pool<sqlx::Sqlite>) -> Result<Vec<HistDbEntry>> {
let query = "select history.id,history.start_time,history.duration,places.host,places.dir,commands.argv from history left join commands on history.command_id = commands.rowid left join places on history.place_id = places.rowid order by history.start_time";
let histdb_vec: Vec<HistDbEntry> = sqlx::query_as::<_, HistDbEntry>(query)
.fetch_all(&pool)
.await?;
Ok(histdb_vec)
}
impl ZshHistDb {
pub fn histpath_candidate() -> PathBuf {
// By default histdb database is `${HOME}/.histdb/zsh-history.db`
// This can be modified by ${HISTDB_FILE}
//
// if [[ -z ${HISTDB_FILE} ]]; then
// typeset -g HISTDB_FILE="${HOME}/.histdb/zsh-history.db"
let user_dirs = UserDirs::new().unwrap(); // should catch error here?
let home_dir = user_dirs.home_dir();
std::env::var("HISTDB_FILE")
.as_ref()
.map(|x| Path::new(x).to_path_buf())
.unwrap_or_else(|_err| home_dir.join(".histdb/zsh-history.db"))
}
pub fn histpath() -> Result<PathBuf> {
let histdb_path = ZshHistDb::histpath_candidate();
if histdb_path.exists() {
Ok(histdb_path)
} else {
Err(eyre!(
"Could not find history file. Try setting $HISTDB_FILE"
))
}
}
}
#[async_trait]
impl Importer for ZshHistDb {
// Not sure how this is used
const NAME: &'static str = "zsh_histdb";
/// Creates a new ZshHistDb and populates the history based on the pre-populated data
/// structure.
async fn new() -> Result<Self> {
let dbpath = ZshHistDb::histpath()?;
let histdb_entry_vec = hist_from_db(dbpath).await?;
Ok(Self {
histdb: histdb_entry_vec,
})
}
async fn entries(&mut self) -> Result<usize> {
Ok(self.histdb.len())
}
async fn load(self, h: &mut impl Loader) -> Result<()> {
for i in self.histdb {
h.push(i.into()).await?;
}
Ok(())
}
}
#[cfg(test)]
mod test {
use super::*;
use sqlx::sqlite::SqlitePoolOptions;
use std::env;
#[tokio::test(flavor = "multi_thread")]
async fn test_env_vars() {
let test_env_db = "nonstd-zsh-history.db";
let key = "HISTDB_FILE";
env::set_var(key, test_env_db);
// test the env got set
assert_eq!(env::var(key).unwrap(), test_env_db.to_string());
// test histdb returns the proper db from previous step
let histdb_path = ZshHistDb::histpath_candidate();
assert_eq!(histdb_path.to_str().unwrap(), test_env_db);
}
#[tokio::test(flavor = "multi_thread")]
async fn test_import() {
let pool: SqlitePool = SqlitePoolOptions::new()
.min_connections(2)
.connect(":memory:")
.await
.unwrap();
// sql dump directly from a test database.
let db_sql = r#"
PRAGMA foreign_keys=OFF;
BEGIN TRANSACTION;
CREATE TABLE commands (id integer primary key autoincrement, argv text, unique(argv) on conflict ignore);
INSERT INTO commands VALUES(1,'pwd');
INSERT INTO commands VALUES(2,'curl google.com');
INSERT INTO commands VALUES(3,'bash');
CREATE TABLE places (id integer primary key autoincrement, host text, dir text, unique(host, dir) on conflict ignore);
INSERT INTO places VALUES(1,'mbp16.local','/home/noyez');
CREATE TABLE history (id integer primary key autoincrement,
session int,
command_id int references commands (id),
place_id int references places (id),
exit_status int,
start_time int,
duration int);
INSERT INTO history VALUES(1,0,1,1,0,1651497918,1);
INSERT INTO history VALUES(2,0,2,1,0,1651497923,1);
INSERT INTO history VALUES(3,0,3,1,NULL,1651497930,NULL);
DELETE FROM sqlite_sequence;
INSERT INTO sqlite_sequence VALUES('commands',3);
INSERT INTO sqlite_sequence VALUES('places',3);
INSERT INTO sqlite_sequence VALUES('history',3);
CREATE INDEX hist_time on history(start_time);
CREATE INDEX place_dir on places(dir);
CREATE INDEX place_host on places(host);
CREATE INDEX history_command_place on history(command_id, place_id);
COMMIT; "#;
sqlx::query(db_sql).execute(&pool).await.unwrap();
// test histdb iterator
let histdb_vec = hist_from_db_conn(pool).await.unwrap();
let histdb = ZshHistDb { histdb: histdb_vec };
println!("h: {:#?}", histdb.histdb);
println!("counter: {:?}", histdb.histdb.len());
for i in histdb.histdb {
println!("{:?}", i);
}
}
}

View file

@ -8,7 +8,9 @@ use indicatif::ProgressBar;
use atuin_client::{ use atuin_client::{
database::Database, database::Database,
history::History, history::History,
import::{bash::Bash, fish::Fish, resh::Resh, zsh::Zsh, Importer, Loader}, import::{
bash::Bash, fish::Fish, resh::Resh, zsh::Zsh, zsh_histdb::ZshHistDb, Importer, Loader,
},
}; };
#[derive(Parser)] #[derive(Parser)]
@ -19,6 +21,8 @@ pub enum Cmd {
/// Import history from the zsh history file /// Import history from the zsh history file
Zsh, Zsh,
/// Import history from the zsh history file
ZshHistDb,
/// Import history from the bash history file /// Import history from the bash history file
Bash, Bash,
/// Import history from the resh history file /// Import history from the resh history file
@ -42,10 +46,17 @@ impl Cmd {
match self { match self {
Self::Auto => { Self::Auto => {
let shell = env::var("SHELL").unwrap_or_else(|_| String::from("NO_SHELL")); let shell = env::var("SHELL").unwrap_or_else(|_| String::from("NO_SHELL"));
if shell.ends_with("/zsh") { if shell.ends_with("/zsh") {
println!("Detected ZSH"); if ZshHistDb::histpath().is_ok() {
import::<Zsh, DB>(db).await println!(
"Detected Zsh-HistDb, using :{}",
ZshHistDb::histpath().unwrap().to_str().unwrap()
);
import::<ZshHistDb, DB>(db).await
} else {
println!("Detected ZSH");
import::<Zsh, DB>(db).await
}
} else if shell.ends_with("/fish") { } else if shell.ends_with("/fish") {
println!("Detected Fish"); println!("Detected Fish");
import::<Fish, DB>(db).await import::<Fish, DB>(db).await
@ -59,6 +70,7 @@ impl Cmd {
} }
Self::Zsh => import::<Zsh, DB>(db).await, Self::Zsh => import::<Zsh, DB>(db).await,
Self::ZshHistDb => import::<ZshHistDb, DB>(db).await,
Self::Bash => import::<Bash, DB>(db).await, Self::Bash => import::<Bash, DB>(db).await,
Self::Resh => import::<Resh, DB>(db).await, Self::Resh => import::<Resh, DB>(db).await,
Self::Fish => import::<Fish, DB>(db).await, Self::Fish => import::<Fish, DB>(db).await,