track file encoding in the record, including change text for file adds

[?]
Dec 15, 2020, 9:16 AM
NYOF5766GLBTWQV2KTVRAJMGVJNJ37Z5BLJMFPZA3HG7X2Q2RXPAC

Dependencies

  • [2] KJDQ2WOM Fixing the parsing of section headers in the text change format
  • [3] 246V5TYI decode existing files
  • [4] 6HNRL5RT detect non-utf8 text files
  • [5] VMOYG7MK text file decoding for new files
  • [6] 7FFFKQZU add 'Default' implementations
  • [7] VO5OQW4W Removing anyhow in libpijul
  • [8] O4DNWMPD Cleaunp and proofreading of libpijul::record
  • [9] SXEYMYF7 Fixing the bad changes in history (unfortunately, by rebooting).

Change contents

  • replacement in libpijul/src/working_copy/mod.rs at line 1
    [4.198172][4.198173:198228]()
    // org id jgSEtEI/xIjz/bF+vtGtYbEA9bNIeFWLqnZT+M51S64=
    [4.198172]
    [3.0]
    use std::fmt;
  • replacement in libpijul/src/working_copy/mod.rs at line 4
    [3.33][3.33:63]()
    // use encoding_rs::Encoding;
    [3.33]
    [3.63]
    use serde::{de::Visitor, Deserialize, Serialize};
  • replacement in libpijul/src/working_copy/mod.rs at line 39
    [3.66][3.66:159]()
    fn decode_file(&mut self, file: &str, buffer: &mut Vec<u8>) -> Result<(), Self::Error> {
    [3.66]
    [3.159]
    /// Read the file into the buffer, decoding to UTF-8 for text files
    ///
    /// Returns the encoding used or None if it was a binary file
    fn decode_file(
    &mut self,
    file: &str,
    buffer: &mut Vec<u8>,
    ) -> Result<Option<Encoding>, Self::Error> {
  • replacement in libpijul/src/working_copy/mod.rs at line 49
    [3.243][3.243:342]()
    let (mut decoded, encoding) = if tree_magic_mini::from_u8(&uncoded).starts_with("text/") {
    [3.243]
    [3.342]
    let mime = tree_magic_mini::from_u8(&uncoded);
    debug!("mime = {:?}", mime);
    let (mut decoded, encoding) = if mime.starts_with("text/") {
  • replacement in libpijul/src/working_copy/mod.rs at line 59
    [3.726][3.726:788]()
    (decoded.as_bytes().to_vec(), Some(encoding))
    [3.726]
    [3.788]
    (decoded.as_bytes().to_vec(), Some(Encoding(encoding)))
  • edit in libpijul/src/working_copy/mod.rs at line 68
    [3.998]
    [3.998]
    Ok(encoding)
    }
    }
  • replacement in libpijul/src/working_copy/mod.rs at line 73
    [3.999][3.999:1014]()
    Ok(())
    [3.999]
    [3.1014]
    #[derive(Debug, PartialEq, Eq)]
    pub struct Encoding(&'static encoding_rs::Encoding);
    impl Encoding {
    pub(crate) fn for_label(label: &str) -> Encoding {
    Encoding(encoding_rs::Encoding::for_label_no_replacement(label.as_bytes()).unwrap())
    }
    pub(crate) fn label(&self) -> &str {
    self.0.name()
    }
    }
    impl Clone for Encoding {
    fn clone(&self) -> Self {
    Encoding(self.0)
    }
    }
    impl Serialize for Encoding {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
    S: serde::Serializer,
    {
    serializer.serialize_str(self.label())
    }
    }
    struct EncodingVisitor;
    impl<'de> Deserialize<'de> for Encoding {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
    D: serde::Deserializer<'de>,
    {
    impl<'de> Visitor<'de> for EncodingVisitor {
    type Value = Encoding;
    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
    formatter.write_str("a string label meeting the encoding standard https://encoding.spec.whatwg.org/#concept-encoding-get")
    }
    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
    where
    E: serde::de::Error,
    {
    Ok(Encoding::for_label(v))
    }
    }
    deserializer.deserialize_str(EncodingVisitor)
  • file move: text.rs (-xw-x--x--)text.rs (-xwrx-rx-r)
    [4.248792]
    [4.10]
  • edit in libpijul/src/record.rs at line 136
    [4.492161][4.492161:492194]()
    const CHECK_UTF8: usize = 1000;
  • replacement in libpijul/src/record.rs at line 236
    [4.496437][4.496437:496480]()
    let contents = if meta.is_file() {
    [4.496437]
    [4.496480]
    let (contents, encoding) = if meta.is_file() {
  • replacement in libpijul/src/record.rs at line 238
    [4.496552][3.1021:1101]()
    working_copy.decode_file(&item.full_path, &mut self.rec.contents)?;
    [4.496552]
    [4.496630]
    let encoding = working_copy.decode_file(&item.full_path, &mut self.rec.contents)?;
  • replacement in libpijul/src/record.rs at line 241
    [4.496780][4.496780:496931](),[4.496931][4.4780:4947](),[4.4947][4.497193:497208](),[4.497193][4.497193:497208]()
    self.rec.has_binary_files |= {
    let s = start.0 as usize;
    let e = (end.0 as usize).min(s + CHECK_UTF8 + 4);
    let mime = tree_magic_mini::from_u8(&self.rec.contents[s..e]);
    debug!("mime = {:?}", mime);
    !mime.starts_with("text/")
    };
    [4.496780]
    [4.497208]
    self.rec.has_binary_files |= encoding.is_none();
  • replacement in libpijul/src/record.rs at line 244
    [4.497313][4.497313:497811]()
    Some(Atom::NewVertex(NewVertex {
    up_context: vec![Position {
    change: None,
    pos: inode_pos,
    }],
    down_context: vec![],
    start,
    end,
    flag: EdgeFlags::empty(),
    inode: Position {
    change: None,
    pos: inode_pos,
    },
    }))
    [4.497313]
    [4.497811]
    (
    Some(Atom::NewVertex(NewVertex {
    up_context: vec![Position {
    change: None,
    pos: inode_pos,
    }],
    down_context: vec![],
    start,
    end,
    flag: EdgeFlags::empty(),
    inode: Position {
    change: None,
    pos: inode_pos,
    },
    })),
    encoding,
    )
  • replacement in libpijul/src/record.rs at line 262
    [4.497832][4.497832:497853]()
    None
    [4.497832]
    [4.497853]
    (None, encoding)
  • replacement in libpijul/src/record.rs at line 265
    [4.497884][4.497884:497901]()
    None
    [4.497884]
    [4.497901]
    (None, None)
  • edit in libpijul/src/record.rs at line 289
    [4.498800]
    [4.498800]
    encoding,
  • file move: mod.rs (-xw-x--x--)mod.rs (-xwrx-rx-r)
    [4.768883]
    [4.793247]
  • edit in libpijul/src/change.rs at line 1
    [4.831385][4.831385:831438]()
    use crate::pristine::*;
    use chrono::{DateTime, Utc};
  • edit in libpijul/src/change.rs at line 2
    [4.831481]
    [4.831519]
    use chrono::{DateTime, Utc};
    use crate::{pristine::*, working_copy::Encoding};
  • edit in libpijul/src/change.rs at line 561
    [4.848439]
    [4.848439]
    ..
  • edit in libpijul/src/change.rs at line 659
    [4.851667]
    [4.851667]
    encoding: Option<Encoding>,
  • edit in libpijul/src/change.rs at line 1066
    [4.865376]
    [4.865376]
    encoding,
  • edit in libpijul/src/change.rs at line 1072
    [4.865612]
    [4.865612]
    encoding,
  • edit in libpijul/src/change/text_changes.rs at line 295
    [4.48443]
    [4.48443]
    const BINARY_LABEL: &str = "binary";
  • edit in libpijul/src/change/text_changes.rs at line 385
    [4.51767]
    [4.51767]
    encoding,
  • edit in libpijul/src/change/text_changes.rs at line 404
    [4.52540]
    [4.52540]
    };
    let encoding_label = match encoding {
    Some(encoding) => encoding.label(),
    _ => BINARY_LABEL,
  • replacement in libpijul/src/change/text_changes.rs at line 411
    [4.52618][4.52618:52730]()
    "File addition: {:?} in {:?} {:o}\n up",
    name, parent, perms.0
    [4.52618]
    [4.52730]
    "File addition: {:?} in {:?} {:o} {:?}\n up",
    name, parent, perms.0, encoding_label
  • replacement in libpijul/src/change/text_changes.rs at line 516
    [4.57069][2.0:127]()
    Regex::new(r#"^(?P<n>\d+)\. File addition: "(?P<name>[^"]*)" in "(?P<parent>[^"]*)" (?P<perm>\d+)"#).unwrap();
    [4.57069]
    [4.57195]
    Regex::new(r#"^(?P<n>\d+)\. File addition: "(?P<name>[^"]*)" in "(?P<parent>[^"]*)" (?P<perm>\d+) "(?P<encoding>[^"]*)""#).unwrap();
  • edit in libpijul/src/change/text_changes.rs at line 588
    [4.60586]
    [4.60586]
    let encoding_label = cap.name("encoding").unwrap().as_str();
    let encoding = if encoding_label != BINARY_LABEL {
    Some(Encoding::for_label(encoding_label))
    } else {
    None
    };
  • edit in libpijul/src/change/text_changes.rs at line 601
    [4.60862]
    [4.60862]
    encoding,