Add path filtering for log, add json output for log

ammkrn
Aug 14, 2021, 12:06 PM
OU6JOR3CDZTH2H3NTGMV3WDIAWPD3VEJI7JRY3VJ7LPDR3QOA52QC

Dependencies

  • [2] RUBBHYZ7 Removing unnecessary async/await
  • [3] UW3KU7DH Limiting the output size of log (with a CLI flag)
  • [4] 2K7JLB4Z No pager on Windows
  • [5] CCLLB7OI Upgrading to Sanakirja 0.15 + version bump
  • [6] I24UEJQL Various post-fire fixes
  • [7] SXEYMYF7 Fixing the bad changes in history (unfortunately, by rebooting).
  • [8] YN63NUZO Sanakirja 1.0
  • [9] L4JXJHWX pijul/*: reorganize imports and remove extern crate
  • [10] PH7B6I3U Fixing log --hash-only
  • [11] 5OGOE4VW Store the current channel in the pristine
  • [12] JL4WKA5P Implement the Sanakirja concurrency model in a cross-process way
  • [13] SMMBFECL Converting to the new patch format "online"
  • [14] I52XSRUH Massive cleanup, and simplification
  • [15] VIHXB7SG commands: set up pager for diff, change, and credit
  • [16] A3RM526Y Integrating identity malleability
  • [17] GURIBVW6 Fixing the pager
  • [18] I7VL7VPZ Minor cleanup
  • [19] EUZFFJSO Updating Pijul with the latest changes in Libpijul
  • [20] Y6EVFMTA Don't output files if they aren't in the current channel
  • [21] 23LVKATN Use pager crate for log output
  • [22] PSKXR4QE Do not load the entire change in memory in log

Change contents

  • replacement in pijul/src/commands/log.rs at line 2
    [4.694][4.694:725]()
    use std::collections::HashMap;
    [4.694]
    [4.2043]
    use std::collections::{HashMap, HashSet};
    use std::convert::TryFrom;
  • replacement in pijul/src/commands/log.rs at line 5
    [4.2063][4.2063:2087]()
    use std::path::PathBuf;
    [4.2063]
    [4.2087]
    use std::path::{Path, PathBuf};
  • edit in pijul/src/commands/log.rs at line 7
    [4.2088]
    [4.16925]
    use crate::repository::Repository;
  • edit in pijul/src/commands/log.rs at line 11
    [4.134460]
    [4.3357]
    use libpijul::pristine::{sanakirja::Txn, ChannelRef, DepsTxnT, GraphTxnT, TreeTxnT};
  • edit in pijul/src/commands/log.rs at line 13
    [4.3396]
    [4.134559]
    use serde::ser::{SerializeSeq, Serializer};
    use serde::Serialize;
  • edit in pijul/src/commands/log.rs at line 16
    [4.134560]
    [4.134560]
    /// A struct containing user-input assembled by Clap.
  • edit in pijul/src/commands/log.rs at line 40
    [2.1718]
    [4.134881]
    #[clap(long = "output-format")]
    output_format: Option<String>,
    /// Filter log output, showing only log entries that touched the specified
    /// files. Accepted as a list of paths relative to your current directory.
    /// Currently, filters can only be applied when logging the channel that's
    /// in use.
    #[clap(last = true)]
    filters: Vec<String>,
  • replacement in pijul/src/commands/log.rs at line 50
    [4.134884][4.134884:134895](),[4.134895][2.1719:1771](),[2.1771][4.21049:21108](),[4.7083][4.21049:21108]()
    impl Log {
    pub fn run(self) -> Result<(), anyhow::Error> {
    let repo = Repository::find_root(self.repo_path)?;
    [4.134884]
    [4.135006]
    // A lot of error-handling noise here, but since we're dealing with
    // a user-command and a bunch of file-IO/path manipulation it's
    // probably necessary for the feedback to be good.
    fn get_inodes(
    txn: &impl libpijul::pristine::TreeTxnT,
    repo_path: &Path,
    pats: &[String],
    ) -> Result<Vec<libpijul::Inode>, anyhow::Error> {
    let mut inodes = Vec::new();
    for pat in pats {
    let canon_path = match Path::new(pat).canonicalize() {
    Err(e) if matches!(e.kind(), std::io::ErrorKind::NotFound) => {
    bail!(
    "pijul log couldn't find a file or directory corresponding to `{}`",
    pat
    )
    }
    Err(e) => return Err(e.into()),
    Ok(p) => p,
    };
    match canon_path.strip_prefix(repo_path).map(|p| p.to_str()) {
    // strip_prefix error is if repo_path is not a prefix of canon_path,
    // which would only happen if they pased in a filter path that's not
    // in the repository.
    Err(_) => bail!(
    "pijul log couldn't assemble file prefix for pattern `{}`; \
    {} was not a file in the repository at {}",
    pat,
    canon_path.display(),
    repo_path.display()
    ),
    // PathBuf.to_str() returns none iff the path contains invalid UTF-8.
    Ok(None) => bail!(
    "pijul log couldn't assemble file prefix for pattern `{}`; \
    the path contained invalid UTF-8",
    pat
    ),
    Ok(Some(s)) => match libpijul::fs::find_inode(txn, s) {
    Err(e) => bail!(
    "pijul log couldn't assemble file prefix for pattern `{}`; \
    no Inode found for the corresponding path. Internal error: {:?}",
    pat,
    e
    ),
    Ok(inode) => {
    inodes.push(inode);
    }
    },
    };
    }
    log::debug!("log filters: {:#?}\n", pats);
    Ok(inodes)
    }
    /// Given a list of path filters which represent the files/directories for which
    /// the user wants to see the logs, find the subset of relevant change hashes.
    fn filtered_hashes<T: TreeTxnT + GraphTxnT + DepsTxnT>(
    txn: &T,
    path: &Path,
    filters: &[String],
    ) -> Result<HashSet<libpijul::Hash>, anyhow::Error> {
    let inodes = get_inodes(txn, path, filters)?;
    let mut hashes = HashSet::<libpijul::Hash>::new();
    for inode in inodes {
    // The Position<ChangeId> for the file Inode.
    let inode_position = match txn.get_inodes(&inode, None)? {
    None => bail!("Failed to get matching inode: {:?}", inode),
    Some(p) => p,
    };
    for pair in txn.iter_touched(inode_position)? {
    let (position, touched_change_id) = pair?;
    // Push iff the file ChangeId for this element matches that of the file Inode
    if &position.change == &inode_position.change {
    match txn.get_external(touched_change_id)? {
    Some(ser_h) => {
    hashes.insert(libpijul::Hash::from(*ser_h));
    }
    _ => {
    log::error!(
    "`get_external` failed to retrieve full hash for ChangeId {:?}",
    touched_change_id
    );
    bail!("Failed to retrieve full hash for {:?}", touched_change_id)
    }
    }
    } else {
    // We've gone past the relevant subset of changes in the iterator.
    break;
    }
    }
    }
    Ok(hashes)
    }
    /// A single log entry created by [`LogIterator`]. The fields are
    /// all `Option<T>` so that users can more precisely choose what
    /// data they want.
    ///
    /// The implementaiton of [`std::fmt::Display`] is the standard method
    /// of pretty-printing a `LogEntry` to the terminal.
    #[derive(Serialize)]
    struct LogEntry {
    #[serde(skip_serializing_if = "Option::is_none")]
    hash: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    state: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    authors: Option<Vec<String>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    timestamp: Option<chrono::DateTime<chrono::offset::Utc>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    message: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    description: Option<String>,
    }
    /// The standard pretty-print
    impl std::fmt::Display for LogEntry {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
    if let Some(ref h) = self.hash {
    writeln!(f, "Change {}", h)?;
    }
    if let Some(ref authors) = self.authors {
    write!(f, "Author: ")?;
    let mut is_first = true;
    for a in authors.iter() {
    if is_first {
    is_first = false;
    write!(f, "{}", a)?;
    } else {
    write!(f, ", {}", a)?;
    }
    }
    // Write a linebreak after finishing the list of authors.
    writeln!(f)?;
    }
    if let Some(ref timestamp) = self.timestamp {
    writeln!(f, "Date: {}", timestamp)?;
    }
    if let Some(ref mrk) = self.state {
    writeln!(f, "State: {}", mrk)?;
    }
    if let Some(ref message) = self.message {
    writeln!(f, "\n {}\n", message)?;
    }
    if let Some(ref description) = self.description {
    writeln!(f, "\n {}\n", description)?;
    }
    Ok(())
    }
    }
    /// Contains state needed to produce the sequence of [`LogEntry`] items
    /// that are to be logged. The implementation of `TryFrom<Log>` provides
    /// a fallible way of creating one of these from the CLI's [`Log`] structure.
    ///
    /// The two main things this provides are an efficient/streaming implementation
    /// of [`serde::Serialize`], and an implementation of [`std::fmt::Display`] that
    /// does the standard pretty-printing to stdout.
    ///
    /// The [`LogIterator::for_each`] method lets us reuse the most code while providing both
    /// pretty-printing and efficient serialization; we can't easily do this with
    /// a full implementation of Iterator because serde's serialize method requires
    /// self to be an immutable reference.
    struct LogIterator {
    txn: Txn,
    changes: libpijul::changestore::filesystem::FileSystem,
    cmd: Log,
    repo_path: PathBuf,
    id_path: PathBuf,
    channel_ref: ChannelRef<Txn>,
    limit: usize,
    offset: usize,
    }
    /// This implementation of Serialize is hand-rolled in order
    /// to allow for greater re-use and efficiency.
    impl Serialize for LogIterator {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
    S: Serializer,
    {
    let mut seq = serializer.serialize_seq(None)?;
    match self.for_each(|entry| seq.serialize_element(&entry)) {
    Ok(_) => seq.end(),
    Err(anyhow_err) => Err(serde::ser::Error::custom(format!("{}", anyhow_err))),
    }
    }
    }
    /// Pretty-prints all of the requested log entries in the standard
    /// user-facing format.
    impl std::fmt::Display for LogIterator {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
    match self.for_each(|entry| write!(f, "{}", entry)) {
    Err(e) => {
    log::error!("LogIterator::Display: {}", e);
    Err(std::fmt::Error)
    }
    _ => Ok(()),
    }
    }
    }
    impl TryFrom<Log> for LogIterator {
    type Error = anyhow::Error;
    fn try_from(cmd: Log) -> Result<LogIterator, Self::Error> {
    let repo = Repository::find_root(cmd.repo_path.clone())?;
    let repo_path = repo.path.clone();
  • replacement in pijul/src/commands/log.rs at line 262
    [4.135052][4.2460:2523]()
    let channel_name = if let Some(ref c) = self.channel {
    [4.135052]
    [4.2523]
    let channel_name = if let Some(ref c) = cmd.channel {
  • replacement in pijul/src/commands/log.rs at line 267
    [4.701][4.16944:17023](),[4.2252][4.16944:17023](),[4.2633][4.16944:17023](),[4.135173][4.16944:17023]()
    let channel = if let Some(channel) = txn.load_channel(channel_name)? {
    [4.2633]
    [4.135251]
    // The only situation that's disallowed is if the user's trying to apply
    // path filters AND get the logs for a channel other than the one they're
    // currently using (where using means the one that comprises the working copy)
    if !cmd.filters.is_empty()
    && !(channel_name == txn.current_channel().unwrap_or(crate::DEFAULT_CHANNEL))
    {
    bail!("Currently, log filters can only be applied to the channel currently in use.")
    }
    let channel_ref = if let Some(channel) = txn.load_channel(channel_name)? {
  • edit in pijul/src/commands/log.rs at line 281
    [4.135434][4.163:187]()
    super::pager();
  • replacement in pijul/src/commands/log.rs at line 282
    [4.135470][4.135470:135514](),[4.135514][3.0:106](),[3.106][4.135514:135542](),[4.135514][4.135514:135542](),[4.135542][3.107:132](),[3.132][3.132:216](),[3.216][3.216:259](),[3.259][4.12912:12969](),[4.13027][4.12912:12969](),[4.19445][4.12912:12969](),[4.17147][4.12912:12969](),[4.74][4.135619:135674](),[4.12969][4.135619:135674](),[4.17181][4.135619:135674](),[4.135619][4.135619:135674]()
    let mut stdout = std::io::stdout();
    let limit = self.limit.unwrap_or(std::usize::MAX);
    let offset = self.offset.unwrap_or(0);
    if self.hash_only {
    for h in txn
    .reverse_log(&*channel.read(), None)?
    .skip(offset)
    .take(limit)
    {
    let h: libpijul::Hash = (h?.1).0.into();
    writeln!(stdout, "{}", h.to_base32())?
    [4.135470]
    [4.135674]
    let limit = cmd.limit.unwrap_or(std::usize::MAX);
    let offset = cmd.offset.unwrap_or(0);
    let mut id_path = repo.path.join(libpijul::DOT_DIR);
    id_path.push("identities");
    Ok(Self {
    txn,
    cmd,
    changes,
    repo_path,
    id_path,
    channel_ref,
    limit,
    offset,
    })
    }
    }
    impl LogIterator {
    /// Call `f` on each [`LogEntry`] in a [`LogIterator`].
    ///
    /// The purpose of this is to let us execute a function over the log entries
    /// without having to duplicate the iteration/filtering logic or
    /// having to collect all of the elements first.
    fn for_each<A, E>(
    &self,
    mut f: impl FnMut(LogEntry) -> Result<A, E>,
    ) -> Result<(), anyhow::Error>
    where
    E: std::fmt::Display,
    {
    // A cache of authors to keys. Prevents us from having to do
    // a lot of file-io for looking up the same author multiple times.
    let mut authors = HashMap::new();
    let mut id_path = self.id_path.clone();
    // If the user applied path filters, figure out what change hashes
    // are to be logged.
    let mut requested_hashes = filtered_hashes(
    &self.txn,
    self.repo_path.as_ref(),
    self.cmd.filters.as_slice(),
    )?;
    // Get the (Hash, Merkle) pairs for the portion of reverse_log
    // that are between offset and limit.
    let hs = self
    .txn
    .reverse_log(&*self.channel_ref.read(), None)?
    .skip(self.offset)
    .take(self.limit)
    .map(|res| {
    res.map(|(_, (ser_h, ser_m))| {
    (libpijul::Hash::from(ser_h), libpijul::Merkle::from(ser_m))
    })
    });
    for pr in hs {
    let (h, mrk) = pr?;
    if (self.cmd.filters.is_empty()) || requested_hashes.remove(&h) {
    // If there were no path filters applied, OR is this was one of the hashes
    // marked by the file filters that were applied
    let entry = self.mk_log_entry(&mut authors, &mut id_path, h, Some(mrk))?;
    if let Err(e) = f(entry) {
    return Err(anyhow::Error::msg(format!("{}", e)));
    }
    } else if requested_hashes.is_empty() {
    // If the user applied path filters, but the relevant change hashes
    // have been exhausted, we can break early.
    break;
    } else {
    // The user applied path filters; this wasn't a hit, but
    // there are still hits to be logged.
    continue;
  • replacement in pijul/src/commands/log.rs at line 355
    [4.135688][4.135688:135743](),[4.135743][4.726:772](),[4.772][4.21109:21214]()
    } else {
    let states = self.states;
    let mut authors = HashMap::new();
    let mut id_path = repo.path.join(libpijul::DOT_DIR);
    id_path.push("identities");
    [4.135688]
    [4.985]
    }
  • replacement in pijul/src/commands/log.rs at line 357
    [4.986][3.260:412](),[3.412][4.17247:17284](),[4.13092][4.17247:17284](),[4.19511][4.17247:17284](),[4.17247][4.17247:17284](),[4.17284][4.12970:13137](),[4.54][4.135873:135936](),[4.13137][4.135873:135936](),[4.135873][4.135873:135936](),[4.135936][4.987:1073](),[4.1073][4.21215:21695](),[4.21695][4.1532:1839](),[4.1532][4.1532:1839](),[4.1839][4.21696:22007]()
    for h in txn
    .reverse_log(&*channel.read(), None)?
    .skip(offset)
    .take(limit)
    {
    let (h, mrk) = h?.1;
    let h: libpijul::Hash = h.into();
    let mrk: libpijul::Merkle = mrk.into();
    let header = changes.get_header(&h.into())?;
    writeln!(stdout, "Change {}", h.to_base32())?;
    write!(stdout, "Author: ")?;
    let mut is_first = true;
    for mut auth in header.authors.into_iter() {
    let auth = if let Some(k) = auth.0.remove("key") {
    match authors.entry(k) {
    Entry::Occupied(e) => e.into_mut(),
    Entry::Vacant(e) => {
    let mut id = None;
    id_path.push(e.key());
    if let Ok(f) = std::fs::File::open(&id_path) {
    if let Ok(id_) =
    serde_json::from_reader::<_, super::Identity>(f)
    {
    id = Some(id_)
    }
    }
    id_path.pop();
    if let Some(id) = id {
    e.insert(id.login)
    } else {
    let k = e.key().to_string();
    e.insert(k)
    [4.986]
    [4.22007]
    Ok(())
    }
    /// Create a [`LogEntry`] for a given hash.
    ///
    /// Most of this is just getting the right key information from either the cache
    /// or from the relevant file.
    fn mk_log_entry<'x>(
    &self,
    author_kvs: &'x mut HashMap<String, String>,
    id_path: &mut PathBuf,
    h: libpijul::Hash,
    m: Option<libpijul::Merkle>,
    ) -> Result<LogEntry, anyhow::Error> {
    let header = self.changes.get_header(&h.into())?;
    let authors = header
    .authors
    .into_iter()
    .map(|mut auth| {
    let auth = if let Some(k) = auth.0.remove("key") {
    match author_kvs.entry(k) {
    Entry::Occupied(e) => e.into_mut(),
    Entry::Vacant(e) => {
    let mut id = None;
    id_path.push(e.key());
    if let Ok(f) = std::fs::File::open(&self.id_path) {
    if let Ok(id_) = serde_json::from_reader::<_, super::Identity>(f) {
    id = Some(id_)
  • edit in pijul/src/commands/log.rs at line 386
    [4.22041]
    [4.2153]
    }
    id_path.pop();
    if let Some(id) = id {
    e.insert(id.login)
    } else {
    let k = e.key().to_string();
    e.insert(k)
  • edit in pijul/src/commands/log.rs at line 395
    [4.2209][4.22042:22123](),[4.22123][4.2209:2445](),[4.2209][4.2209:2445]()
    } else {
    auth.0.get("name").unwrap()
    };
    if is_first {
    is_first = false;
    write!(stdout, "{}", auth)?;
    } else {
    write!(stdout, ", {}", auth)?;
  • replacement in pijul/src/commands/log.rs at line 396
    [4.2467][4.2467:2485](),[4.2485][4.2485:2520](),[4.2520][4.122:187](),[4.122][4.122:187](),[4.187][4.136082:136179](),[4.136082][4.136082:136179](),[4.136179][4.136179:136197](),[4.136197][4.188:253](),[4.253][4.136269:136308](),[4.136269][4.136269:136308](),[4.136308][4.254:320](),[4.320][4.136381:136445](),[4.136381][4.136381:136445](),[4.136445][4.136445:136467](),[4.136467][4.136467:136485]()
    }
    writeln!(stdout)?;
    writeln!(stdout, "Date: {}", header.timestamp)?;
    if states {
    writeln!(stdout, "State: {}", mrk.to_base32())?;
    }
    writeln!(stdout, "\n {}\n", header.message)?;
    if self.descriptions {
    if let Some(ref descr) = header.description {
    writeln!(stdout, "\n {}\n", descr)?;
    }
    }
    [4.2467]
    [4.136485]
    } else {
    auth.0.get("name").unwrap()
    };
    auth.to_owned()
    })
    .collect();
    Ok(LogEntry {
    hash: Some(h.to_base32()),
    state: m.map(|mm| mm.to_base32()).filter(|_| self.cmd.states),
    authors: Some(authors),
    timestamp: Some(header.timestamp),
    message: Some(header.message.clone()),
    description: header.description,
    })
    }
    }
    impl Log {
    // In order to accommodate both pretty-printing and efficient serialization to a serde
    // target format, this now delegates mostly to [`LogIterator`].
    pub fn run(self) -> Result<(), anyhow::Error> {
    let mut stdout = std::io::stdout();
    match self.output_format.as_ref().map(|s| s.as_str()) {
    Some(s) if s.eq_ignore_ascii_case("json") => {
    serde_json::to_writer_pretty(&mut stdout, &LogIterator::try_from(self)?)?
    }
    _ => {
    super::pager();
    LogIterator::try_from(self)?.for_each(|entry| write!(&mut stdout, "{}", entry))?
  • edit in pijul/src/commands/log.rs at line 430
    [4.46][4.2105:2141](),[4.134515][4.2105:2141]()
    use crate::repository::Repository;
  • resolve order conflict in pijul/src/commands/log.rs at line 430
    [4.136532]