text file decoding for new files

[?]
Dec 11, 2020, 6:32 AM
VMOYG7MKEWTUEEY2EOL256RWCVPGRD63IFOSKXHBGJ6VSRITLMOAC

Dependencies

  • [2] 6HNRL5RT detect non-utf8 text files
  • [3] SXEYMYF7 Fixing the bad changes in history (unfortunately, by rebooting).
  • [4] VO5OQW4W Removing anyhow in libpijul

Change contents

  • replacement in libpijul/src/tests/text.rs at line 34
    [2.969][2.969:1116]()
    for l in std::str::from_utf8(&v).unwrap().lines() {
    error!("{:?}", l);
    }
    let lines = std::str::from_utf8(&v).unwrap().lines();
    [2.969]
    [2.1116]
    let lines: Vec<&str> = std::str::from_utf8(&v)
    .unwrap()
    .lines()
    .filter(|l| l.starts_with("+"))
    .collect();
  • replacement in libpijul/src/tests/text.rs at line 41
    [2.1132][2.1132:1331]()
    1,
    lines
    .clone()
    .filter(|l| l.starts_with("+") && l.contains("French / Français (Windows CP 1252)"))
    .count()
    );
    assert_eq!(
    1,
    [2.1132]
    [2.1331]
    vec!["+ French / Français (Windows CP 1252)", "+ €‚ƒ„…†‡, Salut"],
  • edit in libpijul/src/tests/text.rs at line 43
    [2.1345][2.1345:1455]()
    .filter(|l| l.starts_with("+") && l.contains("€‚ƒ„…†‡, Salut"))
    .count()
  • replacement in libpijul/src/tests/text.rs at line 91
    [2.2793][2.2793:2882]()
    for l in std::str::from_utf8(&v).unwrap().lines() {
    error!("{:?}", l);
    }
    [2.2793]
    [2.2882]
    let lines: Vec<&str> = std::str::from_utf8(&v)
    .unwrap()
    .lines()
    .filter(|l| l.starts_with(|c| c == '-' || c == '+'))
    .collect();
  • replacement in libpijul/src/tests/text.rs at line 97
    [2.2898][2.2898:3128]()
    1,
    std::str::from_utf8(&v)
    .unwrap()
    .lines()
    .filter(|l| l.starts_with("-")
    && l.contains("French / Français (ISO Latin-1 / ISO 8859-1)"))
    .count()
    [2.2898]
    [2.3128]
    vec![
    "- French / Français (ISO Latin-1 / ISO 8859-1)",
    "+ Français / French (ISO Latin-1 / ISO 8859-1)"
    ],
    lines
  • edit in libpijul/src/record.rs at line 2
    [3.488937]
    [3.488937]
    use std::collections::{HashMap, HashSet};
    use chardetng::EncodingDetector;
    // use encoding_rs::Encoding;
  • edit in libpijul/src/record.rs at line 16
    [3.489225][3.489225:489267]()
    use std::collections::{HashMap, HashSet};
  • replacement in libpijul/src/record.rs at line 243
    [3.496552][3.496552:496630]()
    working_copy.read_file(&item.full_path, &mut self.rec.contents)?;
    [3.496552]
    [3.496630]
    let mut uncoded = Vec::new();
    working_copy.read_file(&item.full_path, &mut uncoded)?;
    let encoding = if tree_magic_mini::from_u8(&uncoded).starts_with("text/") {
    let mut detector = EncodingDetector::new();
    detector.feed(&uncoded, true);
    let encoding = detector.guess(None, true);
    debug!("guessed encoding = {:?}", encoding.name());
    let (decoded, encoding, malformed) = encoding.decode(&uncoded);
    debug!("final encoding = {:?}", encoding.name());
    if !malformed {
    self.rec.contents.append(&mut decoded.as_bytes().to_vec());
    Some(encoding)
    } else {
    warn!("text file was malformed");
    self.rec.contents.append(&mut uncoded);
    None
    }
    } else {
    self.rec.contents.append(&mut uncoded);
    None
    };
  • edit in libpijul/src/change/text_changes.rs at line 1
    [3.37976][3.37977:38019](),[3.38019][2.5213:5246]()
    use super::*;
    use crate::changestore::*;
    use chardetng::EncodingDetector;
  • edit in libpijul/src/change/text_changes.rs at line 5
    [3.38112]
    [3.38112]
    use super::*;
    use crate::changestore::*;
  • replacement in libpijul/src/change/text_changes.rs at line 1161
    [2.5313][2.5313:5752](),[2.5752][3.84993:85003](),[3.84993][3.84993:85003](),[3.85003][2.5753:5804]()
    let mut detector = EncodingDetector::new();
    detector.feed(&contents, true);
    let encoding = detector.guess(None, true);
    debug!("guessed encoding = {:?}", encoding.name());
    let (contents, encoding, malformed) = encoding.decode(&contents);
    debug!("final encoding = {:?}", encoding.name());
    if malformed {
    warn!("text file was malformed, should probably try binary instead")
    }
    for a in contents.split_terminator('\n') {
    [2.5313]
    [2.5804]
    for a in std::str::from_utf8(&contents)
    .unwrap()
    .split_terminator('\n')
    {