diff --git a/atuin-client/src/import/bash.rs b/atuin-client/src/import/bash.rs index 10e8de1..520b49c 100644 --- a/atuin-client/src/import/bash.rs +++ b/atuin-client/src/import/bash.rs @@ -1,8 +1,10 @@ -use std::{fs::File, io::Read, path::PathBuf}; +use std::{fs::File, io::Read, path::PathBuf, str}; use async_trait::async_trait; +use chrono::{DateTime, Duration, NaiveDateTime, Utc}; use directories::UserDirs; use eyre::{eyre, Result}; +use itertools::Itertools; use super::{get_histpath, unix_byte_lines, Importer, Loader}; use crate::history::History; @@ -32,37 +34,54 @@ impl Importer for Bash { } async fn entries(&mut self) -> Result { - Ok(super::count_lines(&self.bytes)) + let count = unix_byte_lines(&self.bytes) + .map(LineType::from) + .filter(|line| matches!(line, LineType::Command(_))) + .count(); + Ok(count) } async fn load(self, h: &mut impl Loader) -> Result<()> { - let now = chrono::Utc::now(); - let mut line = String::new(); + let lines = unix_byte_lines(&self.bytes) + .map(LineType::from) + .filter(|line| !matches!(line, LineType::NotUtf8)) // invalid utf8 are ignored + .collect_vec(); - for (i, b) in unix_byte_lines(&self.bytes).enumerate() { - let s = match std::str::from_utf8(b) { - Ok(s) => s, - Err(_) => continue, // we can skip past things like invalid utf8 - }; + let (commands_before_first_timestamp, first_timestamp) = lines + .iter() + .enumerate() + .find_map(|(i, line)| match line { + LineType::Timestamp(t) => Some((i, *t)), + _ => None, + }) + // if no known timestamps, use now as base + .unwrap_or((lines.len(), Utc::now())); - if let Some(s) = s.strip_suffix('\\') { - line.push_str(s); - line.push_str("\\\n"); - } else { - line.push_str(s); - let command = std::mem::take(&mut line); + // if no timestamp is recorded, then use this increment to set an arbitrary timestamp + // to preserve ordering + let timestamp_increment = Duration::seconds(1); + // make sure there is a minimum amount of time before the first known timestamp + // to fit all commands, given the default increment + let mut next_timestamp = + first_timestamp - timestamp_increment * commands_before_first_timestamp as i32; - let offset = chrono::Duration::seconds(i as i64); - h.push(History::new( - now - offset, // preserve ordering - command, - String::from("unknown"), - -1, - -1, - None, - None, - )) - .await?; + for line in lines.into_iter() { + match line { + LineType::NotUtf8 => unreachable!(), // already filtered + LineType::Timestamp(t) => next_timestamp = t, + LineType::Command(c) => { + let entry = History::new( + next_timestamp, + c.into(), + "unknown".into(), + -1, + -1, + None, + None, + ); + h.push(entry).await?; + next_timestamp += timestamp_increment; + } } } @@ -70,18 +89,47 @@ impl Importer for Bash { } } +#[derive(Debug, Clone)] +enum LineType<'a> { + NotUtf8, + /// A timestamp line start with a '#', followed immediately by an integer + /// that represents seconds since UNIX epoch. + Timestamp(DateTime), + /// Anything that doesn't look like a timestamp. + Command(&'a str), +} +impl<'a> From<&'a [u8]> for LineType<'a> { + fn from(bytes: &'a [u8]) -> Self { + let Ok(line) = str::from_utf8(bytes) else { + return LineType::NotUtf8; + }; + let parsed = match try_parse_line_as_timestamp(line) { + Some(time) => LineType::Timestamp(time), + None => LineType::Command(line), + }; + parsed + } +} + +fn try_parse_line_as_timestamp(line: &str) -> Option> { + let seconds = line.strip_prefix('#')?.parse().ok()?; + let time = NaiveDateTime::from_timestamp(seconds, 0); + Some(DateTime::from_utc(time, Utc)) +} + #[cfg(test)] -mod tests { - use itertools::assert_equal; +mod test { + use std::cmp::Ordering; + + use itertools::{assert_equal, Itertools}; use crate::import::{tests::TestLoader, Importer}; use super::Bash; #[tokio::test] - async fn test_parse_file() { + async fn parse_no_timestamps() { let bytes = r"cargo install atuin -cargo install atuin; \ cargo update cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷ " @@ -89,7 +137,7 @@ cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷ .to_owned(); let mut bash = Bash { bytes }; - assert_eq!(bash.entries().await.unwrap(), 4); + assert_eq!(bash.entries().await.unwrap(), 3); let mut loader = TestLoader::default(); bash.load(&mut loader).await.unwrap(); @@ -98,9 +146,72 @@ cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷ loader.buf.iter().map(|h| h.command.as_str()), [ "cargo install atuin", - "cargo install atuin; \\\ncargo update", + "cargo update", "cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷", ], ); + assert!(is_strictly_sorted( + loader.buf.iter().map(|h| h.timestamp.timestamp()) + )) + } + + #[tokio::test] + async fn parse_with_timestamps() { + let bytes = b"#1672918999 +git reset +#1672919006 +git clean -dxf +#1672919020 +cd ../ +" + .to_vec(); + + let mut bash = Bash { bytes }; + assert_eq!(bash.entries().await.unwrap(), 3); + + let mut loader = TestLoader::default(); + bash.load(&mut loader).await.unwrap(); + + assert_equal( + loader.buf.iter().map(|h| h.command.as_str()), + ["git reset", "git clean -dxf", "cd ../"], + ); + assert_equal( + loader.buf.iter().map(|h| h.timestamp.timestamp()), + [1672918999, 1672919006, 1672919020], + ) + } + + #[tokio::test] + async fn parse_with_partial_timestamps() { + let bytes = b"git reset +#1672919006 +git clean -dxf +cd ../ +" + .to_vec(); + + let mut bash = Bash { bytes }; + assert_eq!(bash.entries().await.unwrap(), 3); + + let mut loader = TestLoader::default(); + bash.load(&mut loader).await.unwrap(); + + assert_equal( + loader.buf.iter().map(|h| h.command.as_str()), + ["git reset", "git clean -dxf", "cd ../"], + ); + assert!(is_strictly_sorted( + loader.buf.iter().map(|h| h.timestamp.timestamp()) + )) + } + + fn is_strictly_sorted(iter: impl IntoIterator) -> bool + where + T: Clone + PartialOrd, + { + iter.into_iter() + .tuple_windows() + .all(|(a, b)| matches!(a.partial_cmp(&b), Some(Ordering::Less))) } }