text file decoding for new files
[?]
Dec 11, 2020, 6:32 AM
VMOYG7MKEWTUEEY2EOL256RWCVPGRD63IFOSKXHBGJ6VSRITLMOACDependencies
- [2]
6HNRL5RTdetect non-utf8 text files - [3]
SXEYMYF7Fixing the bad changes in history (unfortunately, by rebooting). - [4]
VO5OQW4WRemoving anyhow in libpijul
Change contents
- replacement in libpijul/src/tests/text.rs at line 34
for l in std::str::from_utf8(&v).unwrap().lines() {error!("{:?}", l);}let lines = std::str::from_utf8(&v).unwrap().lines();let lines: Vec<&str> = std::str::from_utf8(&v).unwrap().lines().filter(|l| l.starts_with("+")).collect(); - replacement in libpijul/src/tests/text.rs at line 41
1,lines.clone().filter(|l| l.starts_with("+") && l.contains("French / Français (Windows CP 1252)")).count());assert_eq!(1,vec!["+ French / Français (Windows CP 1252)", "+ €‚ƒ„…†‡, Salut"], - edit in libpijul/src/tests/text.rs at line 43
.filter(|l| l.starts_with("+") && l.contains("€‚ƒ„…†‡, Salut")).count() - replacement in libpijul/src/tests/text.rs at line 91
for l in std::str::from_utf8(&v).unwrap().lines() {error!("{:?}", l);}let lines: Vec<&str> = std::str::from_utf8(&v).unwrap().lines().filter(|l| l.starts_with(|c| c == '-' || c == '+')).collect(); - replacement in libpijul/src/tests/text.rs at line 97
1,std::str::from_utf8(&v).unwrap().lines().filter(|l| l.starts_with("-")&& l.contains("French / Français (ISO Latin-1 / ISO 8859-1)")).count()vec!["- French / Français (ISO Latin-1 / ISO 8859-1)","+ Français / French (ISO Latin-1 / ISO 8859-1)"],lines - edit in libpijul/src/record.rs at line 2
use std::collections::{HashMap, HashSet};use chardetng::EncodingDetector;// use encoding_rs::Encoding; - edit in libpijul/src/record.rs at line 16
use std::collections::{HashMap, HashSet}; - replacement in libpijul/src/record.rs at line 243
working_copy.read_file(&item.full_path, &mut self.rec.contents)?;let mut uncoded = Vec::new();working_copy.read_file(&item.full_path, &mut uncoded)?;let encoding = if tree_magic_mini::from_u8(&uncoded).starts_with("text/") {let mut detector = EncodingDetector::new();detector.feed(&uncoded, true);let encoding = detector.guess(None, true);debug!("guessed encoding = {:?}", encoding.name());let (decoded, encoding, malformed) = encoding.decode(&uncoded);debug!("final encoding = {:?}", encoding.name());if !malformed {self.rec.contents.append(&mut decoded.as_bytes().to_vec());Some(encoding)} else {warn!("text file was malformed");self.rec.contents.append(&mut uncoded);None}} else {self.rec.contents.append(&mut uncoded);None}; - edit in libpijul/src/change/text_changes.rs at line 1
use super::*;use crate::changestore::*;use chardetng::EncodingDetector; - edit in libpijul/src/change/text_changes.rs at line 5
use super::*;use crate::changestore::*; - replacement in libpijul/src/change/text_changes.rs at line 1161[2.5313]→[2.5313:5752](∅→∅),[2.5752]→[3.84993:85003](∅→∅),[3.84993]→[3.84993:85003](∅→∅),[3.85003]→[2.5753:5804](∅→∅)
let mut detector = EncodingDetector::new();detector.feed(&contents, true);let encoding = detector.guess(None, true);debug!("guessed encoding = {:?}", encoding.name());let (contents, encoding, malformed) = encoding.decode(&contents);debug!("final encoding = {:?}", encoding.name());if malformed {warn!("text file was malformed, should probably try binary instead")}for a in contents.split_terminator('\n') {for a in std::str::from_utf8(&contents).unwrap().split_terminator('\n'){