Fixing a number of bugs related to encodings (extra newlines + misdetection in linux2x)

pmeunier
Jul 3, 2021, 10:32 AM
4DNDMC7IUZNYLDEQQYYF5K3G2QWWXGQENTEWPNTM6XKQEFPW7L3QC

Dependencies

  • [2] NG3Z3DOK roundtrip text encoding when recording
  • [3] SGXOEWHU Adding a patched chardetng (temporarily)
  • [4] TGA6QXGI Initial support for binary diffs (conflicts are not yet supported in the output)
  • [5] QNJBR73K don't return Result for infallible functions
  • [6] Q3GU26WD merge with changes from sanakirja v1.1.2
  • [7] CCFJ7VO3 Renaming "Record" to "Hunk" in the changes
  • [8] 6CZYYOG7 Faster guessing of encoding
  • [9] ZXTHL45O address clippy lints
  • [10] WZVCLZKY address clippy lints
  • [11] VO5OQW4W Removing anyhow in libpijul
  • [12] I24UEJQL Various post-fire fixes
  • [13] UM5DLRPB store new non-UTF-8 files raw and decode to deplay the contents
  • [14] SXEYMYF7 Fixing the bad changes in history (unfortunately, by rebooting).
  • [15] YWUZQU3T Formatting (for some reason, this previously escaped the hooks)
  • [16] IACED7RW text_encoding module
  • [17] XR7MNOMU file encoding in updates
  • [18] QJXNUQFJ Solving conflicts
  • [19] I52XSRUH Massive cleanup, and simplification
  • [20] YN63NUZO Sanakirja 1.0
  • [21] Z6FWHKCA Improving the UI around zombie conflicts
  • [22] LV34DUJY Formatting
  • [23] F6S2RUVP Fixing an index out of bounds on binary files smaller than a full hash window
  • [24] IYJZVLET Cleaning up the literate programming bits
  • [25] CCLLB7OI Upgrading to Sanakirja 0.15 + version bump
  • [26] ZSF3YFZT encoded file deletion
  • [*] EEBKW7VT Keys and identities

Change contents

  • replacement in libpijul/src/text_encoding.rs at line 2
    [5.34][5.34:35]()
    [5.34]
    [5.35]
    use std::borrow::Cow;
  • replacement in libpijul/src/text_encoding.rs at line 17
    [5.424][5.424:595]()
    pub(crate) fn decode(&self, text: &[u8]) -> String {
    let (decoded, ..) = self.0.decode(&text);
    String::from_utf8(decoded.as_bytes().to_vec()).unwrap()
    [5.424]
    [5.595]
    pub(crate) fn decode<'a>(&self, text: &'a [u8]) -> Cow<'a, str> {
    self.0.decode(&text).0
  • replacement in libpijul/src/text_encoding.rs at line 21
    [2.1][2.1:145]()
    pub(crate) fn encode(&self, text: &str) -> Vec<u8> {
    let (encoded, ..) = self.0.encode(text);
    encoded.into_owned().to_vec()
    [2.1]
    [2.145]
    pub(crate) fn encode<'a>(&self, text: &'a str) -> Cow<'a, [u8]> {
    self.0.encode(text).0
  • replacement in libpijul/src/diff/mod.rs at line 16
    [5.793459][5.793459:793495]()
    #[derive(Debug, Hash, Clone, Copy)]
    [5.793459]
    [5.793495]
    #[derive(Hash, Clone, Copy)]
  • edit in libpijul/src/diff/mod.rs at line 22
    [5.793593]
    [4.11]
    ptr: *const u8,
  • edit in libpijul/src/diff/mod.rs at line 25
    [4.14]
    [4.14]
    impl<'a> std::fmt::Debug for Line<'a> {
    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
    write!(fmt, "Line {{ l: {:?} }}", std::str::from_utf8(self.l))
    }
    }
  • edit in libpijul/src/diff/mod.rs at line 38
    [4.198]
    [4.198]
    ptr: std::ptr::null(),
  • replacement in libpijul/src/diff/mod.rs at line 51
    [5.794015][5.794015:794064]()
    self.l == b.l && self.cyclic == b.cyclic
    [5.794015]
    [5.794064]
    (self.ptr == b.ptr || self.l == b.l) && self.cyclic == b.cyclic
  • edit in libpijul/src/diff/mod.rs at line 100
    [4.1344]
    [4.1344]
    ptr: l.as_ptr(),
  • edit in libpijul/src/diff/mod.rs at line 116
    [4.1757]
    [4.1757]
    ptr: l.as_ptr(),
  • edit in libpijul/src/diff/mod.rs at line 139
    [5.783]
    [4.1803]
    debug!("encoding = {:?}", encoding);
  • edit in libpijul/src/diff/mod.rs at line 141
    [4.1860][4.1860:1978]()
    // self.diff_binary(changes, txn, txn.graph(channel), path, inode, a, &b)?;
    // return Ok(());
  • replacement in libpijul/src/diff/mod.rs at line 151
    [5.68][5.68:115]()
    trace!("{:?} {:?}", lines_a, lines_b);
    [5.68]
    [5.796653]
    if log::log_enabled!(log::Level::Trace) {
    for l in lines_a.iter() {
    trace!("a: {:?}", l)
    }
    for l in lines_b.iter() {
    trace!("b: {:?}", l)
    }
    }
  • edit in libpijul/src/diff/mod.rs at line 190
    [5.797337][5.797337:797338](),[5.797338][5.116096:116146](),[5.116146][5.797383:797440](),[5.797383][5.797383:797440](),[5.797440][5.116147:116175](),[5.116175][5.797470:797588](),[5.797470][5.797470:797588](),[5.797588][5.116176:116221](),[5.116221][5.86503:86541](),[5.86541][5.52:129](),[5.797638][5.52:129](),[5.129][5.86542:86778](),[5.86778][5.797895:798199](),[5.797895][5.797895:798199](),[5.798199][5.116222:116349](),[5.116349][5.136081:136139](),[5.136139][5.798352:798400](),[5.798352][5.798352:798400](),[5.798400][5.136140:136198](),[5.136198][5.798456:798787](),[5.798456][5.798456:798787](),[5.798787][5.136199:136320](),[5.136320][5.798904:798951](),[5.798904][5.798904:798951](),[5.798951][5.136321:136509](),[5.136509][5.799133:799187](),[5.799133][5.799133:799187](),[5.799187][5.136510:136578](),[5.136578][5.799253:799309](),[5.799253][5.799253:799309](),[5.799309][5.136579:136648](),[5.136648][5.799376:799524](),[5.799376][5.799376:799524](),[5.799524][5.136649:136705](),[5.136705][5.799578:799632](),[5.799578][5.799578:799632](),[5.799632][5.136706:136774](),[5.136774][5.799698:799869](),[5.799698][5.799698:799869](),[5.799869][5.86779:86822](),[5.2400][5.799918:800168](),[5.86822][5.799918:800168](),[5.799918][5.799918:800168](),[5.800168][5.1103:1135](),[5.1135][5.800168:800193](),[5.800168][5.800168:800193](),[5.800193][5.86823:86869](),[5.2451][5.548:592](),[5.86869][5.548:592](),[5.800245][5.548:592](),[5.592][5.800348:800587](),[5.800348][5.800348:800587](),[5.800587][5.136775:136879](),[5.136879][5.800691:800730](),[5.800691][5.800691:800730](),[5.800730][5.1136:1164](),[5.1164][5.116350:116377](),[5.800730][5.116350:116377](),[5.116377][5.800741:800747](),[5.800741][5.800741:800747]()
    fn diff_binary<T: GraphTxnT, C: ChangeStore>(
    &mut self,
    changes: &C,
    txn: &T,
    channel: &T::Graph,
    path: String,
    inode: Position<Option<ChangeId>>,
    ret: &crate::alive::Graph,
    b: &[u8],
    ) -> Result<(), TxnErr<T::GraphError>> {
    self.has_binary_files = true;
    use crate::change::{Atom, EdgeMap, Hunk, Local, NewEdge, NewVertex};
    let mut contents = self.contents.lock().unwrap();
    let pos = contents.len();
    contents.extend_from_slice(&b[..]);
    let pos_end = contents.len();
    contents.push(0);
    std::mem::drop(contents);
    let mut edges = Vec::new();
    let mut deleted = Vec::new();
    for v in ret.lines.iter() {
    debug!("v.vertex = {:?}, inode = {:?}", v.vertex, inode);
    if Some(v.vertex.change) == inode.change && v.vertex.end == inode.pos {
    continue;
    }
    for e in iter_adjacent(txn, channel, v.vertex, EdgeFlags::PARENT, EdgeFlags::all())? {
    let e = e?;
    if e.flag().contains(EdgeFlags::PSEUDO) {
    continue;
    }
    if e.flag().contains(EdgeFlags::FOLDER) {
    if log_enabled!(log::Level::Debug) {
    let f = std::fs::File::create("debug_diff_binary").unwrap();
    ret.debug(changes, txn, channel, false, true, f).unwrap();
    }
    panic!("e.flag.contains(EdgeFlags::FOLDER)");
    }
    if e.flag().contains(EdgeFlags::PARENT) {
    if e.flag().contains(EdgeFlags::DELETED) {
    deleted.push(NewEdge {
    previous: e.flag() - EdgeFlags::PARENT,
    flag: e.flag() - EdgeFlags::PARENT,
    from: e.dest().to_option(),
    to: v.vertex.to_option(),
    introduced_by: Some(e.introduced_by()),
    })
    } else {
    let previous = e.flag() - EdgeFlags::PARENT;
    edges.push(NewEdge {
    previous,
    flag: previous | EdgeFlags::DELETED,
    from: e.dest().to_option(),
    to: v.vertex.to_option(),
    introduced_by: Some(e.introduced_by()),
    })
    }
    }
    }
    }
    // Kill all of `ret`, add `b` instead.
    if !deleted.is_empty() {
    self.actions.push(Hunk::Edit {
    local: Local {
    line: 0,
    path: path.clone(),
    },
    change: Atom::EdgeMap(EdgeMap {
    edges: deleted,
    inode,
    }),
    encoding: None,
    })
    }
    self.actions.push(Hunk::Replacement {
    local: Local { line: 0, path },
    change: Atom::EdgeMap(EdgeMap { edges, inode }),
    replacement: Atom::NewVertex(NewVertex {
    up_context: vec![inode],
    down_context: Vec::new(),
    flag: EdgeFlags::empty(),
    start: ChangePosition(pos.into()),
    end: ChangePosition(pos_end.into()),
    inode,
    }),
    encoding: None,
    });
    Ok(())
    }
  • replacement in libpijul/src/diff/bin.rs at line 49
    [4.3860][4.3860:3884]()
    let mut i = window;
    [4.3860]
    [4.3884]
    let mut i = window.min(b.len());
  • edit in libpijul/src/diff/bin.rs at line 62
    [4.4302]
    [4.4302]
    ptr: old.as_ptr(),
  • edit in libpijul/src/diff/bin.rs at line 68
    [4.4496]
    [4.4496]
    } else {
    break
  • replacement in libpijul/src/diff/bin.rs at line 95
    [4.5184][4.5184:5254]()
    Chunk::Old { start, end, .. } => lines.push(super::Line {
    [4.5184]
    [4.5254]
    Chunk::Old { start, end, ptr, .. } => lines.push(super::Line {
  • edit in libpijul/src/diff/bin.rs at line 97
    [4.5289]
    [4.5289]
    ptr,
  • edit in libpijul/src/diff/bin.rs at line 118
    [4.5732]
    [4.5732]
    ptr: *const u8,
  • replacement in libpijul/src/change/text_changes.rs at line 1281
    [2.987][2.987:1033]()
    None => String::from(h).into_bytes(),
    [2.987]
    [2.1033]
    None => std::borrow::Cow::Borrowed(h.as_bytes()),
  • replacement in libpijul/src/change/text_changes.rs at line 1320
    [5.522][5.6111:6169]()
    for a in encoding.decode(&contents).split('\n') {
    [5.522]
    [5.1236]
    let dec = encoding.decode(&contents);
    let dec = if dec.ends_with("\n") {
    &dec[..dec.len() - 1]
    } else {
    &dec
    };
    for a in dec.split('\n') {
  • edit in chardetng/src/lib.rs at line 2950
    [3.128274]
    [3.128274]
  • replacement in chardetng/src/lib.rs at line 2962
    [3.128794][3.128794:128849]()
    if self.non_ascii_seen == 0 && self.esc_seen {
    [3.128794]
    [3.128849]
    if self.non_ascii_seen == 0
    && self.esc_seen
    {
  • replacement in chardetng/src/lib.rs at line 2966
    [3.128923][3.128923:128967]()
    return (ISO_2022_JP, true);
    [3.128923]
    [3.128967]
    return (ISO_2022_JP, true)
  • replacement in chardetng/src/lib.rs at line 3041
    [3.132034][3.132034:132063]()
    (encoding, max > 0)
    [3.132034]
    [3.132063]
    (encoding, max >= 0)
  • edit in chardetng/src/lib.rs at line 3386
    [3.143335]
    [3.143335]
    }
    #[test]
    fn test_en_windows1252() {
    // "Don't "
    check_bytes(&[68,111,110,180,116,32], WINDOWS_1252);
  • edit in Cargo.lock at line 21
    [5.1030901]
    [28.16895]
    name = "adler32"
    version = "1.2.0"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
    [[package]]
  • edit in Cargo.lock at line 212
    [5.2369][5.2369:2512]()
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "81a81b0d8f8ee23417182818b4f06312c5f535c2b04eef1773f7c24bbdf8c500"
  • edit in Cargo.lock at line 213
    [5.2529]
    [5.2529]
    "arrayvec",
  • edit in Cargo.lock at line 215
    [5.2547]
    [5.2547]
    "detone",
  • edit in Cargo.lock at line 218
    [5.466]
    [5.2580]
    "rayon",
  • edit in Cargo.lock at line 326
    [5.5083]
    [5.5083]
    name = "crossbeam-channel"
    version = "0.5.1"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
    dependencies = [
    "cfg-if 1.0.0",
    "crossbeam-utils",
    ]
    [[package]]
  • edit in Cargo.lock at line 439
    [5.7564]
    [5.7564]
    name = "detone"
    version = "1.0.0"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "f7104c193859c8141dcbb2008bd3d93e581d0fa7bb47b0d9f5e15c89d1b55514"
    [[package]]
  • edit in Cargo.lock at line 1074
    [5.22974]
    [5.22974]
    "adler32",
  • edit in Cargo.lock at line 1705
    [5.39097]
    [5.39097]
    ]
    [[package]]
    name = "rayon"
    version = "1.5.1"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90"
    dependencies = [
    "autocfg",
    "crossbeam-deque",
    "either",
    "rayon-core",
  • edit in Cargo.lock at line 1720
    [5.39310]
    [5.39310]
    name = "rayon-core"
    version = "1.9.1"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e"
    dependencies = [
    "crossbeam-channel",
    "crossbeam-deque",
    "crossbeam-utils",
    "lazy_static",
    "num_cpus",
    ]
    [[package]]