Importer V3 (#395)

* start of importer refactor

* fish

* resh

* zsh
This commit is contained in:
Conrad Ludgate 2022-05-09 07:46:52 +01:00 committed by GitHub
parent d3a4ff959b
commit 1d030b9d32
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 476 additions and 566 deletions

5
Cargo.lock generated
View file

@ -118,6 +118,7 @@ dependencies = [
"itertools", "itertools",
"lazy_static", "lazy_static",
"log", "log",
"memchr",
"minspan", "minspan",
"parse_duration", "parse_duration",
"regex", "regex",
@ -1189,9 +1190,9 @@ dependencies = [
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.4.1" version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]] [[package]]
name = "mime" name = "mime"

View file

@ -49,6 +49,7 @@ regex = "1.5.4"
fs-err = "2.7" fs-err = "2.7"
sql-builder = "3" sql-builder = "3"
lazy_static = "1" lazy_static = "1"
memchr = "2.5"
# sync # sync
urlencoding = { version = "2.1.0", optional = true } urlencoding = { version = "2.1.0", optional = true }

View file

@ -41,7 +41,7 @@ pub fn current_context() -> Context {
} }
#[async_trait] #[async_trait]
pub trait Database { pub trait Database: Send + Sync {
async fn save(&mut self, h: &History) -> Result<()>; async fn save(&mut self, h: &History) -> Result<()>;
async fn save_bulk(&mut self, h: &[History]) -> Result<()>; async fn save_bulk(&mut self, h: &[History]) -> Result<()>;

View file

@ -1,134 +1,106 @@
use std::{ use std::{fs::File, io::Read, path::PathBuf};
fs::File,
io::{BufRead, BufReader, Read, Seek},
path::{Path, PathBuf},
};
use async_trait::async_trait;
use directories::UserDirs; use directories::UserDirs;
use eyre::{eyre, Result}; use eyre::{eyre, Result};
use super::{count_lines, Importer}; use super::{get_histpath, unix_byte_lines, Importer, Loader};
use crate::history::History; use crate::history::History;
#[derive(Debug)] #[derive(Debug)]
pub struct Bash<R> { pub struct Bash {
file: BufReader<R>, bytes: Vec<u8>,
strbuf: String,
loc: usize,
counter: i64,
} }
impl<R: Read + Seek> Bash<R> { fn default_histpath() -> Result<PathBuf> {
fn new(r: R) -> Result<Self> { let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?;
let mut buf = BufReader::new(r); let home_dir = user_dirs.home_dir();
let loc = count_lines(&mut buf)?;
Ok(Self { Ok(home_dir.join(".bash_history"))
file: buf,
strbuf: String::new(),
loc,
counter: 0,
})
}
} }
impl Importer for Bash<File> { #[async_trait]
impl Importer for Bash {
const NAME: &'static str = "bash"; const NAME: &'static str = "bash";
fn histpath() -> Result<PathBuf> { async fn new() -> Result<Self> {
let user_dirs = UserDirs::new().unwrap(); let mut bytes = Vec::new();
let home_dir = user_dirs.home_dir(); let path = get_histpath(default_histpath)?;
let mut f = File::open(path)?;
Ok(home_dir.join(".bash_history")) f.read_to_end(&mut bytes)?;
Ok(Self { bytes })
} }
fn parse(path: impl AsRef<Path>) -> Result<Self> { async fn entries(&mut self) -> Result<usize> {
Self::new(File::open(path)?) Ok(super::count_lines(&self.bytes))
} }
}
impl<R: Read> Iterator for Bash<R> { async fn load(self, h: &mut impl Loader) -> Result<()> {
type Item = Result<History>; let now = chrono::Utc::now();
let mut line = String::new();
fn next(&mut self) -> Option<Self::Item> { for (i, b) in unix_byte_lines(&self.bytes).enumerate() {
self.strbuf.clear(); let s = match std::str::from_utf8(b) {
match self.file.read_line(&mut self.strbuf) { Ok(s) => s,
Ok(0) => return None, Err(_) => continue, // we can skip past things like invalid utf8
Ok(_) => (),
Err(e) => return Some(Err(eyre!("failed to read line: {}", e))), // we can skip past things like invalid utf8
}
self.loc -= 1;
while self.strbuf.ends_with("\\\n") {
if self.file.read_line(&mut self.strbuf).is_err() {
// There's a chance that the last line of a command has invalid
// characters, the only safe thing to do is break :/
// usually just invalid utf8 or smth
// however, we really need to avoid missing history, so it's
// better to have some items that should have been part of
// something else, than to miss things. So break.
break;
}; };
self.loc -= 1; if let Some(s) = s.strip_suffix('\\') {
line.push_str(s);
line.push_str("\\\n");
} else {
line.push_str(s);
let command = std::mem::take(&mut line);
let offset = chrono::Duration::seconds(i as i64);
h.push(History::new(
now - offset, // preserve ordering
command,
String::from("unknown"),
-1,
-1,
None,
None,
))
.await?;
}
} }
let time = chrono::Utc::now(); Ok(())
let offset = chrono::Duration::seconds(self.counter);
let time = time - offset;
self.counter += 1;
Some(Ok(History::new(
time,
self.strbuf.trim_end().to_string(),
String::from("unknown"),
-1,
-1,
None,
None,
)))
}
fn size_hint(&self) -> (usize, Option<usize>) {
(0, Some(self.loc))
} }
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::io::Cursor; use itertools::assert_equal;
use crate::import::{tests::TestLoader, Importer};
use super::Bash; use super::Bash;
#[test] #[tokio::test]
fn test_parse_file() { async fn test_parse_file() {
let input = r"cargo install atuin let bytes = r"cargo install atuin
cargo install atuin; \ cargo install atuin; \
cargo update cargo update
cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷ cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷
"; "
.as_bytes()
.to_owned();
let cursor = Cursor::new(input); let mut bash = Bash { bytes };
let mut bash = Bash::new(cursor).unwrap(); assert_eq!(bash.entries().await.unwrap(), 4);
assert_eq!(bash.loc, 4);
assert_eq!(bash.size_hint(), (0, Some(4)));
assert_eq!( let mut loader = TestLoader::default();
&bash.next().unwrap().unwrap().command, bash.load(&mut loader).await.unwrap();
"cargo install atuin"
);
assert_eq!(
&bash.next().unwrap().unwrap().command,
"cargo install atuin; \\\ncargo update"
);
assert_eq!(
&bash.next().unwrap().unwrap().command,
"cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷"
);
assert!(bash.next().is_none());
assert_eq!(bash.size_hint(), (0, Some(0))); assert_equal(
loader.buf.iter().map(|h| h.command.as_str()),
[
"cargo install atuin",
"cargo install atuin; \\\ncargo update",
"cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷",
],
);
} }
} }

View file

@ -1,99 +1,90 @@
// import old shell history! // import old shell history!
// automatically hoover up all that we can find // automatically hoover up all that we can find
use std::{ use std::{fs::File, io::Read, path::PathBuf};
fs::File,
io::{self, BufRead, BufReader, Read, Seek},
path::{Path, PathBuf},
};
use async_trait::async_trait;
use chrono::{prelude::*, Utc}; use chrono::{prelude::*, Utc};
use directories::BaseDirs; use directories::BaseDirs;
use eyre::{eyre, Result}; use eyre::{eyre, Result};
use super::{count_lines, Importer}; use super::{get_histpath, unix_byte_lines, Importer, Loader};
use crate::history::History; use crate::history::History;
#[derive(Debug)] #[derive(Debug)]
pub struct Fish<R> { pub struct Fish {
file: BufReader<R>, bytes: Vec<u8>,
strbuf: String,
loc: usize,
} }
impl<R: Read + Seek> Fish<R> { /// see https://fishshell.com/docs/current/interactive.html#searchable-command-history
fn new(r: R) -> Result<Self> { fn default_histpath() -> Result<PathBuf> {
let mut buf = BufReader::new(r); let base = BaseDirs::new().ok_or_else(|| eyre!("could not determine data directory"))?;
let loc = count_lines(&mut buf)?; let data = base.data_local_dir();
Ok(Self { // fish supports multiple history sessions
file: buf, // If `fish_history` var is missing, or set to `default`, use `fish` as the session
strbuf: String::new(), let session = std::env::var("fish_history").unwrap_or_else(|_| String::from("fish"));
loc, let session = if session == "default" {
}) String::from("fish")
} else {
session
};
let mut histpath = data.join("fish");
histpath.push(format!("{}_history", session));
if histpath.exists() {
Ok(histpath)
} else {
Err(eyre!("Could not find history file. Try setting $HISTFILE"))
} }
} }
impl<R: Read> Fish<R> { #[async_trait]
fn new_entry(&mut self) -> io::Result<bool> { impl Importer for Fish {
let inner = self.file.fill_buf()?;
Ok(inner.starts_with(b"- "))
}
}
impl Importer for Fish<File> {
const NAME: &'static str = "fish"; const NAME: &'static str = "fish";
/// see https://fishshell.com/docs/current/interactive.html#searchable-command-history async fn new() -> Result<Self> {
fn histpath() -> Result<PathBuf> { let mut bytes = Vec::new();
let base = BaseDirs::new().ok_or_else(|| eyre!("could not determine data directory"))?; let path = get_histpath(default_histpath)?;
let data = base.data_local_dir(); let mut f = File::open(path)?;
f.read_to_end(&mut bytes)?;
// fish supports multiple history sessions Ok(Self { bytes })
// If `fish_history` var is missing, or set to `default`, use `fish` as the session
let session = std::env::var("fish_history").unwrap_or_else(|_| String::from("fish"));
let session = if session == "default" {
String::from("fish")
} else {
session
};
let mut histpath = data.join("fish");
histpath.push(format!("{}_history", session));
if histpath.exists() {
Ok(histpath)
} else {
Err(eyre!("Could not find history file. Try setting $HISTFILE"))
}
} }
fn parse(path: impl AsRef<Path>) -> Result<Self> { async fn entries(&mut self) -> Result<usize> {
Self::new(File::open(path)?) Ok(super::count_lines(&self.bytes))
} }
}
impl<R: Read> Iterator for Fish<R> { async fn load(self, loader: &mut impl Loader) -> Result<()> {
type Item = Result<History>; let now = Utc::now();
fn next(&mut self) -> Option<Self::Item> {
let mut time: Option<DateTime<Utc>> = None; let mut time: Option<DateTime<Utc>> = None;
let mut cmd: Option<String> = None; let mut cmd: Option<String> = None;
loop { for b in unix_byte_lines(&self.bytes) {
self.strbuf.clear(); let s = match std::str::from_utf8(b) {
match self.file.read_line(&mut self.strbuf) { Ok(s) => s,
// no more content to read Err(_) => continue, // we can skip past things like invalid utf8
Ok(0) => break, };
// bail on IO error
Err(e) => return Some(Err(e.into())),
_ => (),
}
// `read_line` adds the line delimeter to the string. No thanks if let Some(c) = s.strip_prefix("- cmd: ") {
self.strbuf.pop(); // first, we must deal with the prev cmd
if let Some(cmd) = cmd.take() {
let time = time.unwrap_or(now);
loader
.push(History::new(
time,
cmd,
"unknown".into(),
-1,
-1,
None,
None,
))
.await?;
}
if let Some(c) = self.strbuf.strip_prefix("- cmd: ") {
// using raw strings to avoid needing escaping. // using raw strings to avoid needing escaping.
// replaces double backslashes with single backslashes // replaces double backslashes with single backslashes
let c = c.replace(r"\\", r"\"); let c = c.replace(r"\\", r"\");
@ -102,7 +93,7 @@ impl<R: Read> Iterator for Fish<R> {
// TODO: any other escape characters? // TODO: any other escape characters?
cmd = Some(c); cmd = Some(c);
} else if let Some(t) = self.strbuf.strip_prefix(" when: ") { } else if let Some(t) = s.strip_prefix(" when: ") {
// if t is not an int, just ignore this line // if t is not an int, just ignore this line
if let Ok(t) = t.parse::<i64>() { if let Ok(t) = t.parse::<i64>() {
time = Some(Utc.timestamp(t, 0)); time = Some(Utc.timestamp(t, 0));
@ -110,47 +101,40 @@ impl<R: Read> Iterator for Fish<R> {
} else { } else {
// ... ignore paths lines // ... ignore paths lines
} }
match self.new_entry() {
// next line is a new entry, so let's stop here
// only if we have found a command though
Ok(true) if cmd.is_some() => break,
// bail on IO error
Err(e) => return Some(Err(e.into())),
_ => (),
}
} }
let cmd = cmd?; // we might have a trailing cmd
let time = time.unwrap_or_else(Utc::now); if let Some(cmd) = cmd.take() {
let time = time.unwrap_or(now);
Some(Ok(History::new( loader
time, .push(History::new(
cmd, time,
"unknown".into(), cmd,
-1, "unknown".into(),
-1, -1,
None, -1,
None, None,
))) None,
} ))
.await?;
}
fn size_hint(&self) -> (usize, Option<usize>) { Ok(())
// worst case, entry per line
(0, Some(self.loc))
} }
} }
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use std::io::Cursor;
use crate::import::{tests::TestLoader, Importer};
use super::Fish; use super::Fish;
#[test] #[tokio::test]
fn parse_complex() { async fn parse_complex() {
// complicated input with varying contents and escaped strings. // complicated input with varying contents and escaped strings.
let input = r#"- cmd: history --help let bytes = r#"- cmd: history --help
when: 1639162832 when: 1639162832
- cmd: cat ~/.bash_history - cmd: cat ~/.bash_history
when: 1639162851 when: 1639162851
@ -181,14 +165,20 @@ ERROR
when: 1639163066 when: 1639163066
paths: paths:
- ~/.local/share/fish/fish_history - ~/.local/share/fish/fish_history
"#; "#
let cursor = Cursor::new(input); .as_bytes()
let mut fish = Fish::new(cursor).unwrap(); .to_owned();
let fish = Fish { bytes };
let mut loader = TestLoader::default();
fish.load(&mut loader).await.unwrap();
let mut history = loader.buf.into_iter();
// simple wrapper for fish history entry // simple wrapper for fish history entry
macro_rules! fishtory { macro_rules! fishtory {
($timestamp:expr, $command:expr) => { ($timestamp:expr, $command:expr) => {
let h = fish.next().expect("missing entry in history").unwrap(); let h = history.next().expect("missing entry in history");
assert_eq!(h.command.as_str(), $command); assert_eq!(h.command.as_str(), $command);
assert_eq!(h.timestamp.timestamp(), $timestamp); assert_eq!(h.timestamp.timestamp(), $timestamp);
}; };

View file

@ -1,9 +1,8 @@
use std::{ use std::path::PathBuf;
io::{BufRead, BufReader, Read, Seek, SeekFrom},
path::{Path, PathBuf},
};
use eyre::Result; use async_trait::async_trait;
use eyre::{bail, Result};
use memchr::Memchr;
use crate::history::History; use crate::history::History;
@ -12,16 +11,88 @@ pub mod fish;
pub mod resh; pub mod resh;
pub mod zsh; pub mod zsh;
// this could probably be sped up #[async_trait]
fn count_lines(buf: &mut BufReader<impl Read + Seek>) -> Result<usize> { pub trait Importer: Sized {
let lines = buf.lines().count();
buf.seek(SeekFrom::Start(0))?;
Ok(lines)
}
pub trait Importer: IntoIterator<Item = Result<History>> + Sized {
const NAME: &'static str; const NAME: &'static str;
fn histpath() -> Result<PathBuf>; async fn new() -> Result<Self>;
fn parse(path: impl AsRef<Path>) -> Result<Self>; async fn entries(&mut self) -> Result<usize>;
async fn load(self, loader: &mut impl Loader) -> Result<()>;
}
#[async_trait]
pub trait Loader: Sync + Send {
async fn push(&mut self, hist: History) -> eyre::Result<()>;
}
fn unix_byte_lines(input: &[u8]) -> impl Iterator<Item = &[u8]> {
UnixByteLines {
iter: memchr::memchr_iter(b'\n', input),
bytes: input,
i: 0,
}
}
struct UnixByteLines<'a> {
iter: Memchr<'a>,
bytes: &'a [u8],
i: usize,
}
impl<'a> Iterator for UnixByteLines<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
let j = self.iter.next()?;
let out = &self.bytes[self.i..j];
self.i = j + 1;
Some(out)
}
fn count(self) -> usize
where
Self: Sized,
{
self.iter.count()
}
}
fn count_lines(input: &[u8]) -> usize {
unix_byte_lines(input).count()
}
fn get_histpath<D>(def: D) -> Result<PathBuf>
where
D: FnOnce() -> Result<PathBuf>,
{
if let Ok(p) = std::env::var("HISTFILE") {
is_file(PathBuf::from(p))
} else {
is_file(def()?)
}
}
fn is_file(p: PathBuf) -> Result<PathBuf> {
if p.is_file() {
Ok(p)
} else {
bail!("Could not find history file {:?}. Try setting $HISTFILE", p)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[derive(Default)]
pub struct TestLoader {
pub buf: Vec<History>,
}
#[async_trait]
impl Loader for TestLoader {
async fn push(&mut self, hist: History) -> Result<()> {
self.buf.push(hist);
Ok(())
}
}
} }

View file

@ -1,9 +1,6 @@
use std::{ use std::{fs::File, io::Read, path::PathBuf};
fs::File,
io::{BufRead, BufReader},
path::{Path, PathBuf},
};
use async_trait::async_trait;
use chrono::{TimeZone, Utc}; use chrono::{TimeZone, Utc};
use directories::UserDirs; use directories::UserDirs;
use eyre::{eyre, Result}; use eyre::{eyre, Result};
@ -11,7 +8,7 @@ use serde::Deserialize;
use atuin_common::utils::uuid_v4; use atuin_common::utils::uuid_v4;
use super::{count_lines, Importer}; use super::{get_histpath, unix_byte_lines, Importer, Loader};
use crate::history::History; use crate::history::History;
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
@ -72,88 +69,72 @@ pub struct ReshEntry {
#[derive(Debug)] #[derive(Debug)]
pub struct Resh { pub struct Resh {
file: BufReader<File>, bytes: Vec<u8>,
strbuf: String,
loc: usize,
} }
fn default_histpath() -> Result<PathBuf> {
let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?;
let home_dir = user_dirs.home_dir();
Ok(home_dir.join(".resh_history.json"))
}
#[async_trait]
impl Importer for Resh { impl Importer for Resh {
const NAME: &'static str = "resh"; const NAME: &'static str = "resh";
fn histpath() -> Result<PathBuf> { async fn new() -> Result<Self> {
let user_dirs = UserDirs::new().unwrap(); let mut bytes = Vec::new();
let home_dir = user_dirs.home_dir(); let path = get_histpath(default_histpath)?;
let mut f = File::open(path)?;
Ok(home_dir.join(".resh_history.json")) f.read_to_end(&mut bytes)?;
Ok(Self { bytes })
} }
fn parse(path: impl AsRef<Path>) -> Result<Self> { async fn entries(&mut self) -> Result<usize> {
let file = File::open(path)?; Ok(super::count_lines(&self.bytes))
let mut buf = BufReader::new(file);
let loc = count_lines(&mut buf)?;
Ok(Self {
file: buf,
strbuf: String::new(),
loc,
})
} }
}
impl Iterator for Resh { async fn load(self, h: &mut impl Loader) -> Result<()> {
type Item = Result<History>; for b in unix_byte_lines(&self.bytes) {
let s = match std::str::from_utf8(b) {
Ok(s) => s,
Err(_) => continue, // we can skip past things like invalid utf8
};
let entry = match serde_json::from_str::<ReshEntry>(s) {
Ok(e) => e,
Err(_) => continue, // skip invalid json :shrug:
};
fn next(&mut self) -> Option<Self::Item> { #[allow(clippy::cast_possible_truncation)]
self.strbuf.clear(); #[allow(clippy::cast_sign_loss)]
match self.file.read_line(&mut self.strbuf) { let timestamp = {
Ok(0) => return None, let secs = entry.realtime_before.floor() as i64;
Ok(_) => (), let nanosecs = (entry.realtime_before.fract() * 1_000_000_000_f64).round() as u32;
Err(e) => return Some(Err(eyre!("failed to read line: {}", e))), // we can skip past things like invalid utf8 Utc.timestamp(secs, nanosecs)
};
#[allow(clippy::cast_possible_truncation)]
#[allow(clippy::cast_sign_loss)]
let duration = {
let secs = entry.realtime_after.floor() as i64;
let nanosecs = (entry.realtime_after.fract() * 1_000_000_000_f64).round() as u32;
let difference = Utc.timestamp(secs, nanosecs) - timestamp;
difference.num_nanoseconds().unwrap_or(0)
};
h.push(History {
id: uuid_v4(),
timestamp,
duration,
exit: entry.exit_code,
command: entry.cmd_line,
cwd: entry.pwd,
session: uuid_v4(),
hostname: entry.host,
})
.await?;
} }
// .resh_history.json lies about being a json. It is actually a file containing valid json Ok(())
// on every line. This means that the last line will throw an error, as it is just an EOF.
// Without the special case here, that will crash the importer.
let entry = match serde_json::from_str::<ReshEntry>(&self.strbuf) {
Ok(e) => e,
Err(e) if e.is_eof() => return None,
Err(e) => {
return Some(Err(eyre!(
"Invalid entry found in resh_history file: {}",
e
)))
}
};
#[allow(clippy::cast_possible_truncation)]
#[allow(clippy::cast_sign_loss)]
let timestamp = {
let secs = entry.realtime_before.floor() as i64;
let nanosecs = (entry.realtime_before.fract() * 1_000_000_000_f64).round() as u32;
Utc.timestamp(secs, nanosecs)
};
#[allow(clippy::cast_possible_truncation)]
#[allow(clippy::cast_sign_loss)]
let duration = {
let secs = entry.realtime_after.floor() as i64;
let nanosecs = (entry.realtime_after.fract() * 1_000_000_000_f64).round() as u32;
let difference = Utc.timestamp(secs, nanosecs) - timestamp;
difference.num_nanoseconds().unwrap_or(0)
};
Some(Ok(History {
id: uuid_v4(),
timestamp,
duration,
exit: entry.exit_code,
command: entry.cmd_line,
cwd: entry.pwd,
session: uuid_v4(),
hostname: entry.host,
}))
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.loc, Some(self.loc))
} }
} }

View file

@ -1,138 +1,104 @@
// import old shell history! // import old shell history!
// automatically hoover up all that we can find // automatically hoover up all that we can find
use std::{ use std::{fs::File, io::Read, path::PathBuf};
fs::File,
io::{BufRead, BufReader, Read, Seek},
path::{Path, PathBuf},
};
use async_trait::async_trait;
use chrono::{prelude::*, Utc}; use chrono::{prelude::*, Utc};
use directories::UserDirs; use directories::UserDirs;
use eyre::{eyre, Result}; use eyre::{eyre, Result};
use itertools::Itertools;
use super::{count_lines, Importer}; use super::{get_histpath, unix_byte_lines, Importer, Loader};
use crate::history::History; use crate::history::History;
#[derive(Debug)] #[derive(Debug)]
pub struct Zsh<R> { pub struct Zsh {
file: BufReader<R>, bytes: Vec<u8>,
strbuf: String,
loc: usize,
counter: i64,
} }
impl<R: Read + Seek> Zsh<R> { fn default_histpath() -> Result<PathBuf> {
fn new(r: R) -> Result<Self> { // oh-my-zsh sets HISTFILE=~/.zhistory
let mut buf = BufReader::new(r); // zsh has no default value for this var, but uses ~/.zhistory.
let loc = count_lines(&mut buf)?; // we could maybe be smarter about this in the future :)
let user_dirs = UserDirs::new().ok_or_else(|| eyre!("could not find user directories"))?;
let home_dir = user_dirs.home_dir();
Ok(Self { let mut candidates = [".zhistory", ".zsh_history"].iter();
file: buf, loop {
strbuf: String::new(), match candidates.next() {
loc, Some(candidate) => {
counter: 0, let histpath = home_dir.join(candidate);
}) if histpath.exists() {
} break Ok(histpath);
}
impl Importer for Zsh<File> {
const NAME: &'static str = "zsh";
fn histpath() -> Result<PathBuf> {
// oh-my-zsh sets HISTFILE=~/.zhistory
// zsh has no default value for this var, but uses ~/.zhistory.
// we could maybe be smarter about this in the future :)
let user_dirs = UserDirs::new().unwrap();
let home_dir = user_dirs.home_dir();
let mut candidates = [".zhistory", ".zsh_history"].iter();
loop {
match candidates.next() {
Some(candidate) => {
let histpath = home_dir.join(candidate);
if histpath.exists() {
break Ok(histpath);
}
} }
None => break Err(eyre!("Could not find history file. Try setting $HISTFILE")),
} }
None => break Err(eyre!("Could not find history file. Try setting $HISTFILE")),
} }
} }
fn parse(path: impl AsRef<Path>) -> Result<Self> {
Self::new(File::open(path)?)
}
} }
impl<R: Read> Iterator for Zsh<R> { #[async_trait]
type Item = Result<History>; impl Importer for Zsh {
const NAME: &'static str = "bash";
fn next(&mut self) -> Option<Self::Item> { async fn new() -> Result<Self> {
// ZSH extended history records the timestamp + command duration let mut bytes = Vec::new();
// These lines begin with : let path = get_histpath(default_histpath)?;
// So, if the line begins with :, parse it. Otherwise it's just let mut f = File::open(path)?;
// the command f.read_to_end(&mut bytes)?;
self.strbuf.clear(); Ok(Self { bytes })
match self.file.read_line(&mut self.strbuf) { }
Ok(0) => return None,
Ok(_) => (),
Err(e) => return Some(Err(eyre!("failed to read line: {}", e))), // we can skip past things like invalid utf8
}
self.loc -= 1; async fn entries(&mut self) -> Result<usize> {
Ok(super::count_lines(&self.bytes))
}
while self.strbuf.ends_with("\\\n") { async fn load(self, h: &mut impl Loader) -> Result<()> {
if self.file.read_line(&mut self.strbuf).is_err() { let now = chrono::Utc::now();
// There's a chance that the last line of a command has invalid let mut line = String::new();
// characters, the only safe thing to do is break :/
// usually just invalid utf8 or smth let mut counter = 0;
// however, we really need to avoid missing history, so it's for b in unix_byte_lines(&self.bytes) {
// better to have some items that should have been part of let s = match std::str::from_utf8(b) {
// something else, than to miss things. So break. Ok(s) => s,
break; Err(_) => continue, // we can skip past things like invalid utf8
}; };
self.loc -= 1; if let Some(s) = s.strip_suffix('\\') {
line.push_str(s);
line.push_str("\\\n");
} else {
line.push_str(s);
let command = std::mem::take(&mut line);
if let Some(command) = command.strip_prefix(": ") {
counter += 1;
h.push(parse_extended(command, counter)).await?;
} else {
let offset = chrono::Duration::seconds(counter);
counter += 1;
h.push(History::new(
now - offset, // preserve ordering
command.trim_end().to_string(),
String::from("unknown"),
-1,
-1,
None,
None,
))
.await?;
}
}
} }
// We have to handle the case where a line has escaped newlines. Ok(())
// Keep reading until we have a non-escaped newline
let extended = self.strbuf.starts_with(':');
if extended {
self.counter += 1;
Some(Ok(parse_extended(&self.strbuf, self.counter)))
} else {
let time = chrono::Utc::now();
let offset = chrono::Duration::seconds(self.counter);
let time = time - offset;
self.counter += 1;
Some(Ok(History::new(
time,
self.strbuf.trim_end().to_string(),
String::from("unknown"),
-1,
-1,
None,
None,
)))
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
(0, Some(self.loc))
} }
} }
fn parse_extended(line: &str, counter: i64) -> History { fn parse_extended(line: &str, counter: i64) -> History {
let line = line.replacen(": ", "", 2); let (time, duration) = line.split_once(':').unwrap();
let (time, duration) = line.splitn(2, ':').collect_tuple().unwrap(); let (duration, command) = duration.split_once(';').unwrap();
let (duration, command) = duration.splitn(2, ';').collect_tuple().unwrap();
let time = time let time = time
.parse::<i64>() .parse::<i64>()
@ -158,64 +124,64 @@ fn parse_extended(line: &str, counter: i64) -> History {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use std::io::Cursor;
use chrono::prelude::*; use chrono::prelude::*;
use chrono::Utc; use chrono::Utc;
use itertools::assert_equal;
use crate::import::tests::TestLoader;
use super::*; use super::*;
#[test] #[test]
fn test_parse_extended_simple() { fn test_parse_extended_simple() {
let parsed = parse_extended(": 1613322469:0;cargo install atuin", 0); let parsed = parse_extended("1613322469:0;cargo install atuin", 0);
assert_eq!(parsed.command, "cargo install atuin"); assert_eq!(parsed.command, "cargo install atuin");
assert_eq!(parsed.duration, 0); assert_eq!(parsed.duration, 0);
assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0)); assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0));
let parsed = parse_extended(": 1613322469:10;cargo install atuin;cargo update", 0); let parsed = parse_extended("1613322469:10;cargo install atuin;cargo update", 0);
assert_eq!(parsed.command, "cargo install atuin;cargo update"); assert_eq!(parsed.command, "cargo install atuin;cargo update");
assert_eq!(parsed.duration, 10_000_000_000); assert_eq!(parsed.duration, 10_000_000_000);
assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0)); assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0));
let parsed = parse_extended(": 1613322469:10;cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", 0); let parsed = parse_extended("1613322469:10;cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", 0);
assert_eq!(parsed.command, "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷"); assert_eq!(parsed.command, "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷");
assert_eq!(parsed.duration, 10_000_000_000); assert_eq!(parsed.duration, 10_000_000_000);
assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0)); assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0));
let parsed = parse_extended(": 1613322469:10;cargo install \\n atuin\n", 0); let parsed = parse_extended("1613322469:10;cargo install \\n atuin\n", 0);
assert_eq!(parsed.command, "cargo install \\n atuin"); assert_eq!(parsed.command, "cargo install \\n atuin");
assert_eq!(parsed.duration, 10_000_000_000); assert_eq!(parsed.duration, 10_000_000_000);
assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0)); assert_eq!(parsed.timestamp, Utc.timestamp(1_613_322_469, 0));
} }
#[test] #[tokio::test]
fn test_parse_file() { async fn test_parse_file() {
let input = r": 1613322469:0;cargo install atuin let bytes = r": 1613322469:0;cargo install atuin
: 1613322469:10;cargo install atuin; \ : 1613322469:10;cargo install atuin; \
cargo update cargo update
: 1613322469:10;cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷ : 1613322469:10;cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷
"; "
.as_bytes()
.to_owned();
let cursor = Cursor::new(input); let mut zsh = Zsh { bytes };
let mut zsh = Zsh::new(cursor).unwrap(); assert_eq!(zsh.entries().await.unwrap(), 4);
assert_eq!(zsh.loc, 4);
assert_eq!(zsh.size_hint(), (0, Some(4)));
assert_eq!(&zsh.next().unwrap().unwrap().command, "cargo install atuin"); let mut loader = TestLoader::default();
assert_eq!( zsh.load(&mut loader).await.unwrap();
&zsh.next().unwrap().unwrap().command,
"cargo install atuin; \\\ncargo update" assert_equal(
loader.buf.iter().map(|h| h.command.as_str()),
[
"cargo install atuin",
"cargo install atuin; \\\ncargo update",
"cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷",
],
); );
assert_eq!(
&zsh.next().unwrap().unwrap().command,
"cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷"
);
assert!(zsh.next().is_none());
assert_eq!(zsh.size_hint(), (0, Some(0)));
} }
} }

View file

@ -58,6 +58,7 @@ pub enum Cmd {
} }
impl Cmd { impl Cmd {
#[tokio::main(flavor = "current_thread")]
pub async fn run(self) -> Result<()> { pub async fn run(self) -> Result<()> {
pretty_env_logger::init(); pretty_env_logger::init();

View file

@ -128,11 +128,7 @@ pub fn print_cmd_only(w: &mut StdoutLock, h: &[History]) {
} }
impl Cmd { impl Cmd {
pub async fn run( pub async fn run(&self, settings: &Settings, db: &mut impl Database) -> Result<()> {
&self,
settings: &Settings,
db: &mut (impl Database + Send + Sync),
) -> Result<()> {
let context = current_context(); let context = current_context();
match self { match self {

View file

@ -1,13 +1,14 @@
use std::{env, path::PathBuf}; use std::env;
use async_trait::async_trait;
use clap::Parser; use clap::Parser;
use eyre::{eyre, Result}; use eyre::Result;
use indicatif::ProgressBar; use indicatif::ProgressBar;
use atuin_client::{ use atuin_client::{
database::Database, database::Database,
history::History, history::History,
import::{bash::Bash, fish::Fish, resh::Resh, zsh::Zsh, Importer}, import::{bash::Bash, fish::Fish, resh::Resh, zsh::Zsh, Importer, Loader},
}; };
#[derive(Parser)] #[derive(Parser)]
@ -18,13 +19,10 @@ pub enum Cmd {
/// Import history from the zsh history file /// Import history from the zsh history file
Zsh, Zsh,
/// Import history from the bash history file /// Import history from the bash history file
Bash, Bash,
/// Import history from the resh history file /// Import history from the resh history file
Resh, Resh,
/// Import history from the fish history file /// Import history from the fish history file
Fish, Fish,
} }
@ -32,7 +30,7 @@ pub enum Cmd {
const BATCH_SIZE: usize = 100; const BATCH_SIZE: usize = 100;
impl Cmd { impl Cmd {
pub async fn run(&self, db: &mut (impl Database + Send + Sync)) -> Result<()> { pub async fn run<DB: Database>(&self, db: &mut DB) -> Result<()> {
println!(" Atuin "); println!(" Atuin ");
println!("======================"); println!("======================");
println!(" \u{1f30d} "); println!(" \u{1f30d} ");
@ -47,124 +45,73 @@ impl Cmd {
if shell.ends_with("/zsh") { if shell.ends_with("/zsh") {
println!("Detected ZSH"); println!("Detected ZSH");
import::<Zsh<_>, _>(db, BATCH_SIZE).await import::<Zsh, DB>(db).await
} else if shell.ends_with("/fish") { } else if shell.ends_with("/fish") {
println!("Detected Fish"); println!("Detected Fish");
import::<Fish<_>, _>(db, BATCH_SIZE).await import::<Fish, DB>(db).await
} else if shell.ends_with("/bash") { } else if shell.ends_with("/bash") {
println!("Detected Bash"); println!("Detected Bash");
import::<Bash<_>, _>(db, BATCH_SIZE).await import::<Bash, DB>(db).await
} else { } else {
println!("cannot import {} history", shell); println!("cannot import {} history", shell);
Ok(()) Ok(())
} }
} }
Self::Zsh => import::<Zsh<_>, _>(db, BATCH_SIZE).await, Self::Zsh => import::<Zsh, DB>(db).await,
Self::Bash => import::<Bash<_>, _>(db, BATCH_SIZE).await, Self::Bash => import::<Bash, DB>(db).await,
Self::Resh => import::<Resh, _>(db, BATCH_SIZE).await, Self::Resh => import::<Resh, DB>(db).await,
Self::Fish => import::<Fish<_>, _>(db, BATCH_SIZE).await, Self::Fish => import::<Fish, DB>(db).await,
} }
} }
} }
async fn import<I: Importer + Send, DB: Database + Send + Sync>( pub struct HistoryImporter<'db, DB: Database> {
db: &mut DB, pb: ProgressBar,
buf_size: usize, buf: Vec<History>,
) -> Result<()> db: &'db mut DB,
where }
I::IntoIter: Send,
{ impl<'db, DB: Database> HistoryImporter<'db, DB> {
fn new(db: &'db mut DB, len: usize) -> Self {
Self {
pb: ProgressBar::new(len as u64),
buf: Vec::with_capacity(BATCH_SIZE),
db,
}
}
async fn flush(self) -> Result<()> {
if !self.buf.is_empty() {
self.db.save_bulk(&self.buf).await?;
}
self.pb.finish();
Ok(())
}
}
#[async_trait]
impl<'db, DB: Database> Loader for HistoryImporter<'db, DB> {
async fn push(&mut self, hist: History) -> Result<()> {
self.pb.inc(1);
self.buf.push(hist);
if self.buf.len() == self.buf.capacity() {
self.db.save_bulk(&self.buf).await?;
self.buf.clear();
}
Ok(())
}
}
async fn import<I: Importer + Send, DB: Database>(db: &mut DB) -> Result<()> {
println!("Importing history from {}", I::NAME); println!("Importing history from {}", I::NAME);
let histpath = get_histpath::<I>()?; let mut importer = I::new().await?;
let contents = I::parse(histpath)?; let len = importer.entries().await.unwrap();
let mut loader = HistoryImporter::new(db, len);
let iter = contents.into_iter(); importer.load(&mut loader).await?;
let progress = if let (_, Some(upper_bound)) = iter.size_hint() { loader.flush().await?;
ProgressBar::new(upper_bound as u64)
} else {
ProgressBar::new_spinner()
};
let mut buf = Vec::<History>::with_capacity(buf_size);
let mut iter = progress.wrap_iter(iter);
loop {
// fill until either no more entries
// or until the buffer is full
let done = fill_buf(&mut buf, &mut iter);
// flush
db.save_bulk(&buf).await?;
if done {
break;
}
}
println!("Import complete!"); println!("Import complete!");
Ok(()) Ok(())
} }
fn get_histpath<I: Importer>() -> Result<PathBuf> {
if let Ok(p) = env::var("HISTFILE") {
is_file(PathBuf::from(p))
} else {
is_file(I::histpath()?)
}
}
fn is_file(p: PathBuf) -> Result<PathBuf> {
if p.is_file() {
Ok(p)
} else {
Err(eyre!(
"Could not find history file {:?}. Try setting $HISTFILE",
p
))
}
}
fn fill_buf<T, E>(buf: &mut Vec<T>, iter: &mut impl Iterator<Item = Result<T, E>>) -> bool {
buf.clear();
loop {
match iter.next() {
Some(Ok(t)) => buf.push(t),
Some(Err(_)) => (),
None => break true,
}
if buf.len() == buf.capacity() {
break false;
}
}
}
#[cfg(test)]
mod tests {
use super::fill_buf;
#[test]
fn test_fill_buf() {
let mut buf = Vec::with_capacity(4);
let mut iter = vec![
Ok(1),
Err(2),
Ok(3),
Ok(4),
Err(5),
Ok(6),
Ok(7),
Err(8),
Ok(9),
]
.into_iter();
assert!(!fill_buf(&mut buf, &mut iter));
assert_eq!(buf, vec![1, 3, 4, 6]);
assert!(fill_buf(&mut buf, &mut iter));
assert_eq!(buf, vec![7, 9]);
}
}

View file

@ -75,11 +75,7 @@ pub struct Cmd {
} }
impl Cmd { impl Cmd {
pub async fn run( pub async fn run(self, db: &mut impl Database, settings: &Settings) -> Result<()> {
self,
db: &mut (impl Database + Send + Sync),
settings: &Settings,
) -> Result<()> {
if self.interactive { if self.interactive {
let item = select_history( let item = select_history(
&self.query, &self.query,
@ -257,7 +253,7 @@ impl State {
async fn query_results( async fn query_results(
app: &mut State, app: &mut State,
search_mode: SearchMode, search_mode: SearchMode,
db: &mut (impl Database + Send + Sync), db: &mut impl Database,
) -> Result<()> { ) -> Result<()> {
let results = match app.input.as_str() { let results = match app.input.as_str() {
"" => { "" => {
@ -284,7 +280,7 @@ async fn query_results(
async fn key_handler( async fn key_handler(
input: Key, input: Key,
search_mode: SearchMode, search_mode: SearchMode,
db: &mut (impl Database + Send + Sync), db: &mut impl Database,
app: &mut State, app: &mut State,
) -> Option<String> { ) -> Option<String> {
match input { match input {
@ -537,7 +533,7 @@ async fn select_history(
search_mode: SearchMode, search_mode: SearchMode,
filter_mode: FilterMode, filter_mode: FilterMode,
style: atuin_client::settings::Style, style: atuin_client::settings::Style,
db: &mut (impl Database + Send + Sync), db: &mut impl Database,
) -> Result<String> { ) -> Result<String> {
let stdout = stdout().into_raw_mode()?; let stdout = stdout().into_raw_mode()?;
let stdout = MouseTerminal::from(stdout); let stdout = MouseTerminal::from(stdout);
@ -596,7 +592,7 @@ async fn run_non_interactive(
after: Option<String>, after: Option<String>,
limit: Option<i64>, limit: Option<i64>,
query: &[String], query: &[String],
db: &mut (impl Database + Send + Sync), db: &mut impl Database,
) -> Result<()> { ) -> Result<()> {
let dir = if cwd.as_deref() == Some(".") { let dir = if cwd.as_deref() == Some(".") {
let current = std::env::current_dir()?; let current = std::env::current_dir()?;

View file

@ -62,11 +62,7 @@ fn compute_stats(history: &[History]) -> Result<()> {
} }
impl Cmd { impl Cmd {
pub async fn run( pub async fn run(&self, db: &mut impl Database, settings: &Settings) -> Result<()> {
&self,
db: &mut (impl Database + Send + Sync),
settings: &Settings,
) -> Result<()> {
let context = current_context(); let context = current_context();
let words = if self.period.is_empty() { let words = if self.period.is_empty() {
String::from("all") String::from("all")

View file

@ -31,11 +31,7 @@ pub enum Cmd {
} }
impl Cmd { impl Cmd {
pub async fn run( pub async fn run(self, settings: Settings, db: &mut impl Database) -> Result<()> {
self,
settings: Settings,
db: &mut (impl Database + Send + Sync),
) -> Result<()> {
match self { match self {
Self::Sync { force } => run(&settings, force, db).await, Self::Sync { force } => run(&settings, force, db).await,
Self::Login(l) => l.run(&settings).await, Self::Login(l) => l.run(&settings).await,
@ -52,11 +48,7 @@ impl Cmd {
} }
} }
async fn run( async fn run(settings: &Settings, force: bool, db: &mut impl Database) -> Result<()> {
settings: &Settings,
force: bool,
db: &mut (impl Database + Send + Sync),
) -> Result<()> {
atuin_client::sync::sync(settings, force, db).await?; atuin_client::sync::sync(settings, force, db).await?;
println!( println!(
"Sync complete! {} items in database, force: {}", "Sync complete! {} items in database, force: {}",

View file

@ -19,11 +19,11 @@ pub enum AtuinCmd {
} }
impl AtuinCmd { impl AtuinCmd {
pub async fn run(self) -> Result<()> { pub fn run(self) -> Result<()> {
match self { match self {
Self::Client(client) => client.run().await, Self::Client(client) => client.run(),
#[cfg(feature = "server")] #[cfg(feature = "server")]
Self::Server(server) => server.run().await, Self::Server(server) => server.run(),
} }
} }
} }

View file

@ -21,6 +21,7 @@ pub enum Cmd {
} }
impl Cmd { impl Cmd {
#[tokio::main]
pub async fn run(self) -> Result<()> { pub async fn run(self) -> Result<()> {
tracing_subscriber::registry() tracing_subscriber::registry()
.with(fmt::layer()) .with(fmt::layer())

View file

@ -25,12 +25,11 @@ struct Atuin {
} }
impl Atuin { impl Atuin {
async fn run(self) -> Result<()> { fn run(self) -> Result<()> {
self.atuin.run().await self.atuin.run()
} }
} }
#[tokio::main] fn main() -> Result<()> {
async fn main() -> Result<()> { Atuin::parse().run()
Atuin::parse().run().await
} }