Extract balance deduplication logic into shared utility

korrat
Jun 25, 2023, 6:53 PM
NQ455YWRMOM3SUX3A7W2SR4VAI3DYBXBKBMYDGNQIGUICOVC2WOAC

Dependencies

  • [2] R5K55SCB Move tagging of directives with source to framework runner
  • [3] SPZVD25W Fix importer after changed metadata format
  • [4] RCS5VP3A Add an importer for PayPal account statements
  • [*] QNGOXZL4 Add a basic framework
  • [*] I2P2FTLE add basic parser for german decimals
  • [*] 6MR76MLL Replace build script with cargo-px
  • [*] UESS5YZE migrate dependencies into workspace manifest
  • [*] YDK6X6PP add a library of important types for beancount
  • [*] 24CCPM5O Update dependencies
  • [*] 5S4MZHL5 pretty print decimals using icu

Change contents

  • edit in importers/paypal/src/lib.rs at line 5
    [4.106][4.106:137]()
    use std::collections::HashMap;
  • edit in importers/paypal/src/lib.rs at line 6
    [4.138][4.138:172]()
    use alloc::collections::BTreeMap;
  • edit in importers/paypal/src/lib.rs at line 23
    [4.747]
    [4.747]
    use hashbrown::HashMap;
  • replacement in importers/paypal/src/lib.rs at line 196
    [4.6446][4.6446:6494]()
    Balance::new(date, account, amount)
    [4.6446]
    [4.6494]
    let mut balance = Balance::new(date, account, amount);
    balance.add_meta(common_keys::TIMESTAMP, timestamp.format(&Rfc3339).unwrap());
    balance
  • replacement in importers/paypal/src/lib.rs at line 224
    [4.7112][4.7112:7243]()
    // TODO post-process results to eliminate duplicate balance statements
    let directives = self
    .importer
    [4.7112]
    [4.7243]
    self.importer
  • replacement in importers/paypal/src/lib.rs at line 226
    [4.7286][4.7286:8053](),[4.8053][3.20:199](),[3.199][4.8132:8511](),[4.8132][4.8132:8511]()
    .map_err(Self::Error::from)?;
    let mut balances = BTreeMap::new();
    let mut directives: Vec<_> = directives
    .into_iter()
    .filter_map(|directive| {
    if let Directive::Balance(balance) = directive {
    use alloc::collections::btree_map::Entry;
    match balances.entry((
    balance.date,
    balance.account.clone(),
    balance.amount.commodity,
    )) {
    Entry::Vacant(entry) => {
    entry.insert(balance);
    }
    Entry::Occupied(mut entry) => {
    let other = entry.get();
    if other.meta[common_keys::IMPORTED_RECORD]
    < balance.meta[common_keys::IMPORTED_RECORD]
    {
    *entry.get_mut() = balance;
    }
    }
    }
    None
    } else {
    Some(directive)
    }
    })
    .collect();
    directives.extend(balances.into_values().map(Directive::from));
    Ok(directives)
    [4.7286]
    [4.8511]
    .map_err(Self::Error::from)
  • edit in importers/paypal/Cargo.toml at line 16
    [4.15276]
    [4.15276]
    hashbrown.workspace = true
  • file addition: utilities.rs (---r------)
    [6.24]
    use core::hash::BuildHasher as _;
    use core::hash::Hash as _;
    use core::hash::Hasher as _;
    use beancount_types::Acc;
    use beancount_types::Account;
    use beancount_types::Amount;
    use beancount_types::Balance;
    use beancount_types::Commodity;
    use beancount_types::Directive;
    use camino::Utf8Path;
    use delegate::delegate;
    use hashbrown::hash_map::RawEntryMut;
    use hashbrown::HashMap;
    use time::Date;
    use crate::ImporterProtocol;
    pub struct DeduplicateBalances<I, T, F>
    where
    I: ImporterProtocol,
    T: Ord,
    F: Fn(&Balance) -> &T,
    {
    inner: I,
    key: F,
    }
    impl<I, T, F> DeduplicateBalances<I, T, F>
    where
    I: ImporterProtocol,
    T: Ord,
    F: Fn(&Balance) -> &T,
    {
    pub fn new(inner: I, key: F) -> Self {
    Self { inner, key }
    }
    }
    impl<I, T, F> DeduplicateBalances<I, T, F>
    where
    I: ImporterProtocol,
    T: Ord,
    F: Fn(&Balance) -> &T,
    {
    fn key<'b>(&self, balance: &'b Balance) -> &'b T {
    (self.key)(balance)
    }
    fn upsert(&self, map: &mut HashMap<StorageKey, Balance>, balance: Balance) {
    let Balance {
    date,
    ref account,
    amount: Amount { ref commodity, .. },
    ..
    } = balance;
    let query = QueryKey {
    date,
    account,
    commodity,
    };
    let hash = {
    let mut hasher = map.hasher().build_hasher();
    query.hash(&mut hasher);
    hasher.finish()
    };
    let entry = map
    .raw_entry_mut()
    .from_hash(hash, |storage| storage == query);
    match entry {
    RawEntryMut::Occupied(mut entry) => {
    if self.key(entry.get()) < self.key(&balance) {
    entry.insert(balance);
    }
    }
    RawEntryMut::Vacant(entry) => {
    entry.insert(query.into(), balance);
    }
    }
    }
    }
    impl<I, T, F> ImporterProtocol for DeduplicateBalances<I, T, F>
    where
    F: Fn(&Balance) -> &T,
    I: ImporterProtocol,
    T: Ord,
    {
    type Error = I::Error;
    delegate! {
    to (self.inner) {
    fn account(&self, file: &Utf8Path) -> Result<Account, Self::Error>;
    fn date(&self, file: &Utf8Path) -> Option<Result<Date, Self::Error>>;
    fn filename(&self, file: &Utf8Path) -> Option<Result<String, Self::Error>>;
    fn identify(&self, file: &Utf8Path) -> Result<bool, Self::Error>;
    fn name(&self) -> &'static str;
    fn typetag_deserialize(&self);
    }
    }
    fn extract(
    &self,
    file: &camino::Utf8Path,
    existing: &[Directive],
    ) -> Result<Vec<Directive>, Self::Error> {
    let directives = self.inner.extract(file, existing)?;
    let mut balances = HashMap::new();
    let mut directives: Vec<_> = directives
    .into_iter()
    .filter_map(|directive| {
    if let Directive::Balance(balance) = directive {
    self.upsert(&mut balances, balance);
    None
    } else {
    Some(directive)
    }
    })
    .collect();
    directives.extend(balances.into_values().map(Directive::from));
    Ok(directives)
    }
    }
    pub trait ImporterProtocolExt {
    fn deduplicate_balances_by<T, F>(self, key: F) -> DeduplicateBalances<Self, T, F>
    where
    Self: ImporterProtocol + Sized,
    T: Ord,
    F: Fn(&Balance) -> &T,
    {
    DeduplicateBalances::new(self, key)
    }
    }
    impl<I> ImporterProtocolExt for I where I: ImporterProtocol + Sized {}
    #[derive(Debug, Eq, Hash, PartialEq)]
    struct StorageKey {
    date: Date,
    account: Account,
    commodity: Commodity,
    }
    impl From<QueryKey<'_>> for StorageKey {
    fn from(query: QueryKey) -> Self {
    let QueryKey {
    date,
    account,
    commodity,
    } = query;
    let account = account.to_owned();
    let commodity = *commodity;
    Self {
    date,
    account,
    commodity,
    }
    }
    }
    impl PartialEq<QueryKey<'_>> for &StorageKey {
    fn eq(&self, other: &QueryKey) -> bool {
    self.date == other.date
    && self.account == other.account
    && &self.commodity == other.commodity
    }
    }
    #[derive(Debug, Hash)]
    struct QueryKey<'q> {
    date: Date,
    account: &'q Acc,
    commodity: &'q Commodity,
    }
  • edit in framework/src/lib.rs at line 17
    [6.8981]
    [6.8981]
    pub mod utilities;
  • edit in framework/Cargo.toml at line 19
    [2.2178]
    [2.2178]
    hashbrown.workspace = true
  • edit in Cargo.toml at line 90
    [8.3979]
    [9.1583]
    [workspace.dependencies.hashbrown]
    features = ["rayon", "serde"]
    version = "0.14.0"
  • edit in Cargo.lock at line 61
    [10.26521]
    [10.26521]
    [[package]]
    name = "allocator-api2"
    version = "0.2.15"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "56fc6cf8dc8c4158eed8649f9b8b0ea1518eb62b544fe9490d66fa0b349eafe9"
  • edit in Cargo.lock at line 262
    [6.15406]
    [6.15406]
    "hashbrown 0.14.0",
  • edit in Cargo.lock at line 1592
    [11.6235]
    [11.6235]
    dependencies = [
    "ahash 0.8.3",
    ]
    [[package]]
    name = "hashbrown"
    version = "0.14.0"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
  • edit in Cargo.lock at line 1603
    [11.6268]
    [12.20214]
    "allocator-api2",
    "rayon",
    "serde",
  • edit in Cargo.lock at line 2894
    [4.15918]
    [4.15918]
    "hashbrown 0.14.0",