Rework Bash import (#747)
* Rework Bash import Closes #745 - Imported history is now ordered correctly - Timestamps (when `HISTTIMEFORMAT` is set) are handled correctly * Timestamp tests test for strict sorting
This commit is contained in:
parent
a033890506
commit
63c572104b
1 changed files with 143 additions and 32 deletions
|
@ -1,8 +1,10 @@
|
||||||
use std::{fs::File, io::Read, path::PathBuf};
|
use std::{fs::File, io::Read, path::PathBuf, str};
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
use chrono::{DateTime, Duration, NaiveDateTime, Utc};
|
||||||
use directories::UserDirs;
|
use directories::UserDirs;
|
||||||
use eyre::{eyre, Result};
|
use eyre::{eyre, Result};
|
||||||
|
use itertools::Itertools;
|
||||||
|
|
||||||
use super::{get_histpath, unix_byte_lines, Importer, Loader};
|
use super::{get_histpath, unix_byte_lines, Importer, Loader};
|
||||||
use crate::history::History;
|
use crate::history::History;
|
||||||
|
@ -32,37 +34,54 @@ impl Importer for Bash {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn entries(&mut self) -> Result<usize> {
|
async fn entries(&mut self) -> Result<usize> {
|
||||||
Ok(super::count_lines(&self.bytes))
|
let count = unix_byte_lines(&self.bytes)
|
||||||
|
.map(LineType::from)
|
||||||
|
.filter(|line| matches!(line, LineType::Command(_)))
|
||||||
|
.count();
|
||||||
|
Ok(count)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn load(self, h: &mut impl Loader) -> Result<()> {
|
async fn load(self, h: &mut impl Loader) -> Result<()> {
|
||||||
let now = chrono::Utc::now();
|
let lines = unix_byte_lines(&self.bytes)
|
||||||
let mut line = String::new();
|
.map(LineType::from)
|
||||||
|
.filter(|line| !matches!(line, LineType::NotUtf8)) // invalid utf8 are ignored
|
||||||
|
.collect_vec();
|
||||||
|
|
||||||
for (i, b) in unix_byte_lines(&self.bytes).enumerate() {
|
let (commands_before_first_timestamp, first_timestamp) = lines
|
||||||
let s = match std::str::from_utf8(b) {
|
.iter()
|
||||||
Ok(s) => s,
|
.enumerate()
|
||||||
Err(_) => continue, // we can skip past things like invalid utf8
|
.find_map(|(i, line)| match line {
|
||||||
};
|
LineType::Timestamp(t) => Some((i, *t)),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
// if no known timestamps, use now as base
|
||||||
|
.unwrap_or((lines.len(), Utc::now()));
|
||||||
|
|
||||||
if let Some(s) = s.strip_suffix('\\') {
|
// if no timestamp is recorded, then use this increment to set an arbitrary timestamp
|
||||||
line.push_str(s);
|
// to preserve ordering
|
||||||
line.push_str("\\\n");
|
let timestamp_increment = Duration::seconds(1);
|
||||||
} else {
|
// make sure there is a minimum amount of time before the first known timestamp
|
||||||
line.push_str(s);
|
// to fit all commands, given the default increment
|
||||||
let command = std::mem::take(&mut line);
|
let mut next_timestamp =
|
||||||
|
first_timestamp - timestamp_increment * commands_before_first_timestamp as i32;
|
||||||
|
|
||||||
let offset = chrono::Duration::seconds(i as i64);
|
for line in lines.into_iter() {
|
||||||
h.push(History::new(
|
match line {
|
||||||
now - offset, // preserve ordering
|
LineType::NotUtf8 => unreachable!(), // already filtered
|
||||||
command,
|
LineType::Timestamp(t) => next_timestamp = t,
|
||||||
String::from("unknown"),
|
LineType::Command(c) => {
|
||||||
|
let entry = History::new(
|
||||||
|
next_timestamp,
|
||||||
|
c.into(),
|
||||||
|
"unknown".into(),
|
||||||
-1,
|
-1,
|
||||||
-1,
|
-1,
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
))
|
);
|
||||||
.await?;
|
h.push(entry).await?;
|
||||||
|
next_timestamp += timestamp_increment;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,18 +89,47 @@ impl Importer for Bash {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
enum LineType<'a> {
|
||||||
|
NotUtf8,
|
||||||
|
/// A timestamp line start with a '#', followed immediately by an integer
|
||||||
|
/// that represents seconds since UNIX epoch.
|
||||||
|
Timestamp(DateTime<Utc>),
|
||||||
|
/// Anything that doesn't look like a timestamp.
|
||||||
|
Command(&'a str),
|
||||||
|
}
|
||||||
|
impl<'a> From<&'a [u8]> for LineType<'a> {
|
||||||
|
fn from(bytes: &'a [u8]) -> Self {
|
||||||
|
let Ok(line) = str::from_utf8(bytes) else {
|
||||||
|
return LineType::NotUtf8;
|
||||||
|
};
|
||||||
|
let parsed = match try_parse_line_as_timestamp(line) {
|
||||||
|
Some(time) => LineType::Timestamp(time),
|
||||||
|
None => LineType::Command(line),
|
||||||
|
};
|
||||||
|
parsed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn try_parse_line_as_timestamp(line: &str) -> Option<DateTime<Utc>> {
|
||||||
|
let seconds = line.strip_prefix('#')?.parse().ok()?;
|
||||||
|
let time = NaiveDateTime::from_timestamp(seconds, 0);
|
||||||
|
Some(DateTime::from_utc(time, Utc))
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod test {
|
||||||
use itertools::assert_equal;
|
use std::cmp::Ordering;
|
||||||
|
|
||||||
|
use itertools::{assert_equal, Itertools};
|
||||||
|
|
||||||
use crate::import::{tests::TestLoader, Importer};
|
use crate::import::{tests::TestLoader, Importer};
|
||||||
|
|
||||||
use super::Bash;
|
use super::Bash;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_parse_file() {
|
async fn parse_no_timestamps() {
|
||||||
let bytes = r"cargo install atuin
|
let bytes = r"cargo install atuin
|
||||||
cargo install atuin; \
|
|
||||||
cargo update
|
cargo update
|
||||||
cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷
|
cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷
|
||||||
"
|
"
|
||||||
|
@ -89,7 +137,7 @@ cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷
|
||||||
.to_owned();
|
.to_owned();
|
||||||
|
|
||||||
let mut bash = Bash { bytes };
|
let mut bash = Bash { bytes };
|
||||||
assert_eq!(bash.entries().await.unwrap(), 4);
|
assert_eq!(bash.entries().await.unwrap(), 3);
|
||||||
|
|
||||||
let mut loader = TestLoader::default();
|
let mut loader = TestLoader::default();
|
||||||
bash.load(&mut loader).await.unwrap();
|
bash.load(&mut loader).await.unwrap();
|
||||||
|
@ -98,9 +146,72 @@ cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷
|
||||||
loader.buf.iter().map(|h| h.command.as_str()),
|
loader.buf.iter().map(|h| h.command.as_str()),
|
||||||
[
|
[
|
||||||
"cargo install atuin",
|
"cargo install atuin",
|
||||||
"cargo install atuin; \\\ncargo update",
|
"cargo update",
|
||||||
"cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷",
|
"cargo :b̷i̶t̴r̵o̴t̴ ̵i̷s̴ ̷r̶e̵a̸l̷",
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
assert!(is_strictly_sorted(
|
||||||
|
loader.buf.iter().map(|h| h.timestamp.timestamp())
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn parse_with_timestamps() {
|
||||||
|
let bytes = b"#1672918999
|
||||||
|
git reset
|
||||||
|
#1672919006
|
||||||
|
git clean -dxf
|
||||||
|
#1672919020
|
||||||
|
cd ../
|
||||||
|
"
|
||||||
|
.to_vec();
|
||||||
|
|
||||||
|
let mut bash = Bash { bytes };
|
||||||
|
assert_eq!(bash.entries().await.unwrap(), 3);
|
||||||
|
|
||||||
|
let mut loader = TestLoader::default();
|
||||||
|
bash.load(&mut loader).await.unwrap();
|
||||||
|
|
||||||
|
assert_equal(
|
||||||
|
loader.buf.iter().map(|h| h.command.as_str()),
|
||||||
|
["git reset", "git clean -dxf", "cd ../"],
|
||||||
|
);
|
||||||
|
assert_equal(
|
||||||
|
loader.buf.iter().map(|h| h.timestamp.timestamp()),
|
||||||
|
[1672918999, 1672919006, 1672919020],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn parse_with_partial_timestamps() {
|
||||||
|
let bytes = b"git reset
|
||||||
|
#1672919006
|
||||||
|
git clean -dxf
|
||||||
|
cd ../
|
||||||
|
"
|
||||||
|
.to_vec();
|
||||||
|
|
||||||
|
let mut bash = Bash { bytes };
|
||||||
|
assert_eq!(bash.entries().await.unwrap(), 3);
|
||||||
|
|
||||||
|
let mut loader = TestLoader::default();
|
||||||
|
bash.load(&mut loader).await.unwrap();
|
||||||
|
|
||||||
|
assert_equal(
|
||||||
|
loader.buf.iter().map(|h| h.command.as_str()),
|
||||||
|
["git reset", "git clean -dxf", "cd ../"],
|
||||||
|
);
|
||||||
|
assert!(is_strictly_sorted(
|
||||||
|
loader.buf.iter().map(|h| h.timestamp.timestamp())
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_strictly_sorted<T>(iter: impl IntoIterator<Item = T>) -> bool
|
||||||
|
where
|
||||||
|
T: Clone + PartialOrd,
|
||||||
|
{
|
||||||
|
iter.into_iter()
|
||||||
|
.tuple_windows()
|
||||||
|
.all(|(a, b)| matches!(a.partial_cmp(&b), Some(Ordering::Less)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue