F6O6FGOJ762C5CFA4R5K4BTUHTERHT2AOWGQS4S3QYUI7QU6N4IQC 6YZAVBWU6E5FYOI5JGEIPXGZLIKAW6LS2AOFIQWEE5DMOPPCD5PQC YBJRDOTCX3ZRDB5EVXJBR55FX3CADCSIGMYWNYVC2PD5W3GXR3DQC A5YBC77VWH2LXCZJOPZORQJI5ZYABSCHJWVX5HVNWPM5RABXESLQC V55EAIWQXWER2HWKZHPJBV7DDJMSPSPWSO3FSSAYODJHVDBHUN6QC HOJZI52YIXKAYF766WR3SAOIFZH6YRMDOUE23VWEYNBZRBGEU25AC MJDGPSHGF62FTVWZBE7MFNJTUQD42OBVJEOSVPBT553UFJLTEMXQC QMAUTRB6R5R7ABWT2JIDEA7LMILZOS3PGPZIF3YUFKRVLW6HGKTQC WI2BVQ6JOJBM4OC5KSZBMTDPBWESIR7GD72B5TLO7H2SY7QBDHJAC NWJD6VM6POMYKQTTPP3X6LVCWU3FHLDRIHMCSC2PPUT7JWNY42LAC CALXOZXANFZ64NBZBTR2KYTZ6ZLLCJXNFAEALBB2EYAVDVJJ6X6AC BFN2VHZS7VCBUHQ4S3CQ3LFQV2V4M6VANNAF32XMRFQVWRGYSZ6AC 3SYSJKYLVCXR54LRUPL6GOQISSJS6XWK4M6PRQRCKZN7F23NNVEAC 23SFYK4Q5NKBPJG53PQNPWQH6UOUU2YKJEL7RLXYBRLJOJYV7AWQC OPXFZKEBDHZZLXEJ2JRDYBOJH6YIN7UZNZYHVHMWMQVDTE2ZD53QC XSZZB47UXR6KGYFZZQFQR63X2LDKOH6TPNNBRRGHUCI5JJ4JIWVAC 3BK22XE5LPOH2EK5AMRXFXHNQNCJ54HEPYRINHJT4DA7INT32I7AC VCNKFNUF7OWVSWC6I5D25KUZ3XZZICZ3LHWVPF2N5ZSP7LQ2JOUQC 6F7Q4ZLR5DGYT557MYMSHMZGQ7EVEB3LZGLZFCWHGIOI66STIANQC I56UGW7UUKLSR4753EYRGNROZB5PD522REEOGHVAQOZZTSVRUEEQC DXAYDIMQ7BYEI3ASOHKADQWSMVJOA2ZVNEL2TDENJCJKX2U4GMWAC PTWZYQFRWWUOE2WMQT26CKZKFSHAIJVJS3QWHJFYUFDRRTVPHSUAC 7MJOO4E2VGNT7FKBOJUX6JDG4OET6V7DH3JIERUQXXJPM2AJQCNQC WH57EHNML4OTGQQZBT2SG6SOFTBOD6OJPJYHJVGPH22CSSOE25AAC IFQPVMBD552DZ3B5HCM6W6MI2SB6576ZYJNU5KVA3O4YPZAUEFHAC K2SQTVJDXCETCK5KXP72JVCKUIK5V4XNBHRUHKRMOF2524D44WUAC UPWS6J3BIHQKXSSWHD7CFLJOXWT3MRABFRVQ4T4NRYFALBAKJOOQC FU6P5QLG4GVLHVB4O5TCEPJF4X4FGDUBONQFRYP4U5KEPIYLUWJQC YGZ3VCW4OAJYPI2CYK3MTABNFY7Y2ENSSTFE5ZZ4K6HK57FCU3XQC 6LF2U2Y6QOQ7BREI6L26RXMQNKNEBOW2YUUHEIYELDP2C7PJG5XAC G5WLRXODOQR3PLO7G4RS2T3COBA3TXGOJJQ6DG5T2HA7C5K4APHAC TJHMERBNK3WE3XRVX7RJV4G2IF46HCFDDGQYIFKCZV26LYSKQZ4AC OLT666N4VXRYJAVF4ZBYL3FDCQB5N42BFUCMVFWN4LP5AANKWGPQC SWWE2R6MVBX5CNM6X3WLXZTSRTU53PBJL7WJSFVF77XBPXDX4COAC use crate::prelude::pijul;/// Binary data returns `None`.// Detection logic borrowed from <https://github.com/Wilfred/difftastic/blob/1436c8eac39dcea07f8c24a8128284a25b416e8d/src/files.rs#L141>.pub fn detect(data: &[u8]) -> Option<pijul::Encoding> {if std::str::from_utf8(data).is_ok() {return Some(pijul::Encoding(encoding_rs::UTF_8));}// Only consider the first 1,000 bytes, as tree_magic_mini// considers the entire file, which is very slow on large files.let mut magic_bytes = data;if magic_bytes.len() > 1000 {magic_bytes = &magic_bytes[..1000];}let mime = tree_magic_mini::from_u8(magic_bytes);dbg!(mime);// Use MIME type detection to guess whether a file is binary. This// has false positives and false negatives, so only check the MIME// type after allowing perfect text files (see issue #433).match mime {// Treat pdf as binary."application/pdf" => return None,// application/* is a mix of stuff, application/json is fine// but application/zip is binary that often decodes as valid// UTF-16.//// See// <https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/MIME_types/Common_types>// for a list of MIME types."application/x-archive"| "application/x-bzip"| "application/x-bzip2"| "application/x-7zip-compressed"| "application/gzip"| "application/zip"| "application/zstd"| "application/octet-stream" => return None,// Treat all image content as binary.v if v.starts_with("image/") => return None,// Treat all audio content as binary.v if v.starts_with("audio/") => return None,// Treat all video content as binary.v if v.starts_with("video/") => return None,// Treat all font content as binary.v if v.starts_with("font/") => return None,_ => {}}// If the input bytes are *almost* valid UTF-8, treat them as// UTF-8. This is helpful when the user has written a small number// of bad bytes to a file. Users would still like to be able to// diff these files.let utf8_string = String::from_utf8_lossy(data).to_string();let num_utf8_invalid = utf8_string.chars().take(50000).filter(|c| *c == std::char::REPLACEMENT_CHARACTER || *c == '\0').count();if num_utf8_invalid <= 2 {return Some(pijul::Encoding(encoding_rs::UTF_8));}// Fallback to pijul encoding detectionpijul::change::get_encoding(data)}
}}// Detection logic borrowed from <https://github.com/Wilfred/difftastic/blob/1436c8eac39dcea07f8c24a8128284a25b416e8d/src/files.rs#L141>./// Binary data returns `None`fn detect_encoding(data: &[u8]) -> Option<pijul::Encoding> {if std::str::from_utf8(data).is_ok() {return Some(pijul::Encoding(encoding_rs::UTF_8));}// Only consider the first 1,000 bytes, as tree_magic_mini// considers the entire file, which is very slow on large files.let mut magic_bytes = data;if magic_bytes.len() > 1000 {magic_bytes = &magic_bytes[..1000];}let mime = tree_magic_mini::from_u8(magic_bytes);// Use MIME type detection to guess whether a file is binary. This// has false positives and false negatives, so only check the MIME// type after allowing perfect text files (see issue #433).match mime {// Treat pdf as binary."application/pdf" => return None,// application/* is a mix of stuff, application/json is fine// but application/zip is binary that often decodes as valid// UTF-16.//// See// <https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/MIME_types/Common_types>// for a list of MIME types."application/x-archive" => return None,"application/x-bzip" => return None,"application/x-bzip2" => return None,"application/x-7zip-compressed" => return None,"application/gzip" => return None,"application/zip" => return None,"application/zstd" => return None,// Treat all image content as binary.v if v.starts_with("image/") => return None,// Treat all audio content as binary.v if v.starts_with("audio/") => return None,// Treat all video content as binary.v if v.starts_with("video/") => return None,// Treat all font content as binary.v if v.starts_with("font/") => return None,_ => {}
// If the input bytes are *almost* valid UTF-8, treat them as// UTF-8. This is helpful when the user has written a small number// of bad bytes to a file. Users would still like to be able to// diff these files.let utf8_string = String::from_utf8_lossy(data).to_string();let num_utf8_invalid = utf8_string.chars().take(50000).filter(|c| *c == std::char::REPLACEMENT_CHARACTER || *c == '\0').count();if num_utf8_invalid <= 2 {return Some(pijul::Encoding(encoding_rs::UTF_8));}// Fallback to pijul encoding detectionpijul::change::get_encoding(data)
unimplemented!("The change and replacement have different encoding!");
error!("The change and replacement have different encoding! Change: {change_contents:?}, replacement: {replacement_contents:?}");without.push(DiffWithoutContents::Replacement {line: *line,change_contents: UndecodableContents::UnknownEncoding,replacement_contents: UndecodableContents::UnknownEncoding,});