Relates to discussion #353 (filtering) and #505 (structured CLI output).
The path filtering is used as pijul log -- path*
and will log only the
changes that touched the listed paths.
The json output feature is invoked as pijul log --output-format=json
.
Since internally it uses serde, this can be extended to any serde target
format.
The implementation creates two new structs LogIterator
and LogEntry
;
the former is used to hold the state that was previously loose in Log::run
,
and provides a for_each
method that can be used to map over the log entries
efficiently and in a way that reuses the most code (where efficiently means
we only have one log entry in memory at a time).
OU6JOR3CDZTH2H3NTGMV3WDIAWPD3VEJI7JRY3VJ7LPDR3QOA52QC
RUBBHYZ7MCLKJIHZ3EWEC3JR3FSKOU4T2NH7KRBG7ECAU4JF3LUAC
UW3KU7DHQDCI6GRYI6FI5S6PCLKNCN4QPSAU7WJROY2D5MPQYRAQC
SMMBFECLGSUKRZW5YPOQPOQCOY2CH2OTZXBSZ3KG2N3J3HQZ5PSAC
SXEYMYF7P4RZMZ46WPL4IZUTSQ2ATBWYZX7QNVMS3SGOYXYOHAGQC
L4JXJHWXYNCL4QGJXNKKTOKKTAXKKXBJUUY7HFZGEUZ5A2V5H34QC
CCLLB7OIFNFYJZTG3UCI7536TOCWSCSXR67VELSB466R24WLJSDAC
I52XSRUH5RVHQBFWVMAQPTUSPAJ4KNVID2RMI3UGCVKFLYUO6WZAC
A3RM526Y7LUXNYW4TL56YKQ5GVOK2R5D7JJVTSQ6TT5MEXIR6YAAC
JL4WKA5PBKXRNAMETYO4I52QKASQ3COYHH2JKGA7W5YLIRZZH53AC
5OGOE4VWS5AIG4U2UYLLIGA3HY6UB7SNQOSESHNXBLET3VQXFBZAC
I7VL7VPZV2NKOZRKBWWEHFOGNGGTYLPONHABVJ767D6HPJJNY5RAC
Y6EVFMTA6FOH3OQH6QCSWMI3F6SYZT2FSHO6GF4M3ICENDCWFM4QC
2K7JLB4Z7BS5VFNWD4DO3MKYU7VNPA5MTVHVSDI3FQZ5ICM6XM6QC
YN63NUZO4LVJ7XPMURDULTXBVJKW5MVCTZ24R7Z52QMHO3HPDUVQC
EUZFFJSOWV4PXDFFPDAFBHFUUMOFEU6ST7JH57YYRRR2SEOXLN6QC
I24UEJQLCH2SOXA4UHIYWTRDCHSOPU7AFTRUOTX7HZIAV4AZKYEQC
PH7B6I3U5XCACAX6VX3ZDJD2DQOQS7725R6CTOATNC26NP4VPUFQC
PSKXR4QEPPVJZR777HW67IEHUPGZB44MFCNQ2KUS422Q3W22IQWAC
GURIBVW66JDQK3SJZRGVJ2MQLMT7JD4KLI5QPQZGPAL7WH3T6T4AC
#[clap(long = "output-format")]
output_format: Option<String>,
/// Filter log output, showing only log entries that touched the specified
/// files. Accepted as a list of paths relative to your current directory.
/// Currently, filters can only be applied when logging the channel that's
/// in use.
#[clap(last = true)]
filters: Vec<String>,
impl Log {
pub fn run(self) -> Result<(), anyhow::Error> {
let repo = Repository::find_root(self.repo_path)?;
// A lot of error-handling noise here, but since we're dealing with
// a user-command and a bunch of file-IO/path manipulation it's
// probably necessary for the feedback to be good.
fn get_inodes(
txn: &impl libpijul::pristine::TreeTxnT,
repo_path: &Path,
pats: &[String],
) -> Result<Vec<libpijul::Inode>, anyhow::Error> {
let mut inodes = Vec::new();
for pat in pats {
let canon_path = match Path::new(pat).canonicalize() {
Err(e) if matches!(e.kind(), std::io::ErrorKind::NotFound) => {
bail!(
"pijul log couldn't find a file or directory corresponding to `{}`",
pat
)
}
Err(e) => return Err(e.into()),
Ok(p) => p,
};
match canon_path.strip_prefix(repo_path).map(|p| p.to_str()) {
// strip_prefix error is if repo_path is not a prefix of canon_path,
// which would only happen if they pased in a filter path that's not
// in the repository.
Err(_) => bail!(
"pijul log couldn't assemble file prefix for pattern `{}`; \
{} was not a file in the repository at {}",
pat,
canon_path.display(),
repo_path.display()
),
// PathBuf.to_str() returns none iff the path contains invalid UTF-8.
Ok(None) => bail!(
"pijul log couldn't assemble file prefix for pattern `{}`; \
the path contained invalid UTF-8",
pat
),
Ok(Some(s)) => match libpijul::fs::find_inode(txn, s) {
Err(e) => bail!(
"pijul log couldn't assemble file prefix for pattern `{}`; \
no Inode found for the corresponding path. Internal error: {:?}",
pat,
e
),
Ok(inode) => {
inodes.push(inode);
}
},
};
}
log::debug!("log filters: {:#?}\n", pats);
Ok(inodes)
}
/// Given a list of path filters which represent the files/directories for which
/// the user wants to see the logs, find the subset of relevant change hashes.
fn filtered_hashes<T: TreeTxnT + GraphTxnT + DepsTxnT>(
txn: &T,
path: &Path,
filters: &[String],
) -> Result<HashSet<libpijul::Hash>, anyhow::Error> {
let inodes = get_inodes(txn, path, filters)?;
let mut hashes = HashSet::<libpijul::Hash>::new();
for inode in inodes {
// The Position<ChangeId> for the file Inode.
let inode_position = match txn.get_inodes(&inode, None)? {
None => bail!("Failed to get matching inode: {:?}", inode),
Some(p) => p,
};
for pair in txn.iter_touched(inode_position)? {
let (position, touched_change_id) = pair?;
// Push iff the file ChangeId for this element matches that of the file Inode
if &position.change == &inode_position.change {
match txn.get_external(touched_change_id)? {
Some(ser_h) => {
hashes.insert(libpijul::Hash::from(*ser_h));
}
_ => {
log::error!(
"`get_external` failed to retrieve full hash for ChangeId {:?}",
touched_change_id
);
bail!("Failed to retrieve full hash for {:?}", touched_change_id)
}
}
} else {
// We've gone past the relevant subset of changes in the iterator.
break;
}
}
}
Ok(hashes)
}
/// A single log entry created by [`LogIterator`]. The fields are
/// all `Option<T>` so that users can more precisely choose what
/// data they want.
///
/// The implementaiton of [`std::fmt::Display`] is the standard method
/// of pretty-printing a `LogEntry` to the terminal.
#[derive(Serialize)]
struct LogEntry {
#[serde(skip_serializing_if = "Option::is_none")]
hash: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
state: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
authors: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
timestamp: Option<chrono::DateTime<chrono::offset::Utc>>,
#[serde(skip_serializing_if = "Option::is_none")]
message: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
description: Option<String>,
}
/// The standard pretty-print
impl std::fmt::Display for LogEntry {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if let Some(ref h) = self.hash {
writeln!(f, "Change {}", h)?;
}
if let Some(ref authors) = self.authors {
write!(f, "Author: ")?;
let mut is_first = true;
for a in authors.iter() {
if is_first {
is_first = false;
write!(f, "{}", a)?;
} else {
write!(f, ", {}", a)?;
}
}
// Write a linebreak after finishing the list of authors.
writeln!(f)?;
}
if let Some(ref timestamp) = self.timestamp {
writeln!(f, "Date: {}", timestamp)?;
}
if let Some(ref mrk) = self.state {
writeln!(f, "State: {}", mrk)?;
}
if let Some(ref message) = self.message {
writeln!(f, "\n {}\n", message)?;
}
if let Some(ref description) = self.description {
writeln!(f, "\n {}\n", description)?;
}
Ok(())
}
}
/// Contains state needed to produce the sequence of [`LogEntry`] items
/// that are to be logged. The implementation of `TryFrom<Log>` provides
/// a fallible way of creating one of these from the CLI's [`Log`] structure.
///
/// The two main things this provides are an efficient/streaming implementation
/// of [`serde::Serialize`], and an implementation of [`std::fmt::Display`] that
/// does the standard pretty-printing to stdout.
///
/// The [`LogIterator::for_each`] method lets us reuse the most code while providing both
/// pretty-printing and efficient serialization; we can't easily do this with
/// a full implementation of Iterator because serde's serialize method requires
/// self to be an immutable reference.
struct LogIterator {
txn: Txn,
changes: libpijul::changestore::filesystem::FileSystem,
cmd: Log,
repo_path: PathBuf,
id_path: PathBuf,
channel_ref: ChannelRef<Txn>,
limit: usize,
offset: usize,
}
/// This implementation of Serialize is hand-rolled in order
/// to allow for greater re-use and efficiency.
impl Serialize for LogIterator {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut seq = serializer.serialize_seq(None)?;
match self.for_each(|entry| seq.serialize_element(&entry)) {
Ok(_) => seq.end(),
Err(anyhow_err) => Err(serde::ser::Error::custom(format!("{}", anyhow_err))),
}
}
}
/// Pretty-prints all of the requested log entries in the standard
/// user-facing format.
impl std::fmt::Display for LogIterator {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self.for_each(|entry| write!(f, "{}", entry)) {
Err(e) => {
log::error!("LogIterator::Display: {}", e);
Err(std::fmt::Error)
}
_ => Ok(()),
}
}
}
impl TryFrom<Log> for LogIterator {
type Error = anyhow::Error;
fn try_from(cmd: Log) -> Result<LogIterator, Self::Error> {
let repo = Repository::find_root(cmd.repo_path.clone())?;
let repo_path = repo.path.clone();
let channel = if let Some(channel) = txn.load_channel(channel_name)? {
// The only situation that's disallowed is if the user's trying to apply
// path filters AND get the logs for a channel other than the one they're
// currently using (where using means the one that comprises the working copy)
if !cmd.filters.is_empty()
&& !(channel_name == txn.current_channel().unwrap_or(crate::DEFAULT_CHANNEL))
{
bail!("Currently, log filters can only be applied to the channel currently in use.")
}
let channel_ref = if let Some(channel) = txn.load_channel(channel_name)? {
let mut stdout = std::io::stdout();
let limit = self.limit.unwrap_or(std::usize::MAX);
let offset = self.offset.unwrap_or(0);
if self.hash_only {
for h in txn
.reverse_log(&*channel.read(), None)?
.skip(offset)
.take(limit)
{
let h: libpijul::Hash = (h?.1).0.into();
writeln!(stdout, "{}", h.to_base32())?
let limit = cmd.limit.unwrap_or(std::usize::MAX);
let offset = cmd.offset.unwrap_or(0);
let mut id_path = repo.path.join(libpijul::DOT_DIR);
id_path.push("identities");
Ok(Self {
txn,
cmd,
changes,
repo_path,
id_path,
channel_ref,
limit,
offset,
})
}
}
impl LogIterator {
/// Call `f` on each [`LogEntry`] in a [`LogIterator`].
///
/// The purpose of this is to let us execute a function over the log entries
/// without having to duplicate the iteration/filtering logic or
/// having to collect all of the elements first.
fn for_each<A, E>(
&self,
mut f: impl FnMut(LogEntry) -> Result<A, E>,
) -> Result<(), anyhow::Error>
where
E: std::fmt::Display,
{
// A cache of authors to keys. Prevents us from having to do
// a lot of file-io for looking up the same author multiple times.
let mut authors = HashMap::new();
let mut id_path = self.id_path.clone();
// If the user applied path filters, figure out what change hashes
// are to be logged.
let mut requested_hashes = filtered_hashes(
&self.txn,
self.repo_path.as_ref(),
self.cmd.filters.as_slice(),
)?;
// Get the (Hash, Merkle) pairs for the portion of reverse_log
// that are between offset and limit.
let hs = self
.txn
.reverse_log(&*self.channel_ref.read(), None)?
.skip(self.offset)
.take(self.limit)
.map(|res| {
res.map(|(_, (ser_h, ser_m))| {
(libpijul::Hash::from(ser_h), libpijul::Merkle::from(ser_m))
})
});
for pr in hs {
let (h, mrk) = pr?;
if (self.cmd.filters.is_empty()) || requested_hashes.remove(&h) {
// If there were no path filters applied, OR is this was one of the hashes
// marked by the file filters that were applied
let entry = self.mk_log_entry(&mut authors, &mut id_path, h, Some(mrk))?;
if let Err(e) = f(entry) {
return Err(anyhow::Error::msg(format!("{}", e)));
}
} else if requested_hashes.is_empty() {
// If the user applied path filters, but the relevant change hashes
// have been exhausted, we can break early.
break;
} else {
// The user applied path filters; this wasn't a hit, but
// there are still hits to be logged.
continue;
for h in txn
.reverse_log(&*channel.read(), None)?
.skip(offset)
.take(limit)
{
let (h, mrk) = h?.1;
let h: libpijul::Hash = h.into();
let mrk: libpijul::Merkle = mrk.into();
let header = changes.get_header(&h.into())?;
writeln!(stdout, "Change {}", h.to_base32())?;
write!(stdout, "Author: ")?;
let mut is_first = true;
for mut auth in header.authors.into_iter() {
let auth = if let Some(k) = auth.0.remove("key") {
match authors.entry(k) {
Entry::Occupied(e) => e.into_mut(),
Entry::Vacant(e) => {
let mut id = None;
id_path.push(e.key());
if let Ok(f) = std::fs::File::open(&id_path) {
if let Ok(id_) =
serde_json::from_reader::<_, super::Identity>(f)
{
id = Some(id_)
}
}
id_path.pop();
if let Some(id) = id {
e.insert(id.login)
} else {
let k = e.key().to_string();
e.insert(k)
Ok(())
}
/// Create a [`LogEntry`] for a given hash.
///
/// Most of this is just getting the right key information from either the cache
/// or from the relevant file.
fn mk_log_entry<'x>(
&self,
author_kvs: &'x mut HashMap<String, String>,
id_path: &mut PathBuf,
h: libpijul::Hash,
m: Option<libpijul::Merkle>,
) -> Result<LogEntry, anyhow::Error> {
let header = self.changes.get_header(&h.into())?;
let authors = header
.authors
.into_iter()
.map(|mut auth| {
let auth = if let Some(k) = auth.0.remove("key") {
match author_kvs.entry(k) {
Entry::Occupied(e) => e.into_mut(),
Entry::Vacant(e) => {
let mut id = None;
id_path.push(e.key());
if let Ok(f) = std::fs::File::open(&self.id_path) {
if let Ok(id_) = serde_json::from_reader::<_, super::Identity>(f) {
id = Some(id_)
}
writeln!(stdout)?;
writeln!(stdout, "Date: {}", header.timestamp)?;
if states {
writeln!(stdout, "State: {}", mrk.to_base32())?;
}
writeln!(stdout, "\n {}\n", header.message)?;
if self.descriptions {
if let Some(ref descr) = header.description {
writeln!(stdout, "\n {}\n", descr)?;
}
}
} else {
auth.0.get("name").unwrap()
};
auth.to_owned()
})
.collect();
Ok(LogEntry {
hash: Some(h.to_base32()),
state: m.map(|mm| mm.to_base32()).filter(|_| self.cmd.states),
authors: Some(authors),
timestamp: Some(header.timestamp),
message: Some(header.message.clone()),
description: header.description,
})
}
}
impl Log {
// In order to accommodate both pretty-printing and efficient serialization to a serde
// target format, this now delegates mostly to [`LogIterator`].
pub fn run(self) -> Result<(), anyhow::Error> {
let mut stdout = std::io::stdout();
match self.output_format.as_ref().map(|s| s.as_str()) {
Some(s) if s.eq_ignore_ascii_case("json") => {
serde_json::to_writer_pretty(&mut stdout, &LogIterator::try_from(self)?)?
}
_ => {
super::pager();
LogIterator::try_from(self)?.for_each(|entry| write!(&mut stdout, "{}", entry))?