QNGOXZL4NWSLMBCXA4GL46V5W3I6MXZZARCYKOPJHABBK46C7JJAC
ZVTVMOZQCMKERJXJOVZWYYIHY7GCHAA4RTVCUZBIHXWDQJIACPMAC
5PYSO4HI4ATDTZ3CWI7HGGPZDEG52YELOOGXCSU6Q4KPFMFR5KHAC
QRIJE4AQWN7A7O2CO7FXRV4FXZ5RHONQKRGHYXAD7WSECJYK2MFAC
6MR76MLLBLJUVM4K4VI3SVDYLN67FDG3VTLP4S2QD4Q2QO5UKPKAC
I2P2FTLEKLICJKHQ3FHOLRQRQYGZCJTCTU2MWXU2TMIRIKG6YFCQC
R7S2CWF72WQTE447Q62PKCVJSSCEXHMVUJ33Z6IN6NREOUSD4WNQC
D6UTHZA4XNAR2PTG4YEZFNNH3OTSNOWGDSVBYDRE5R2YSV7MPN6AC
2Z4EGCWQV2GF3CZC6IBGLTOS6JN3NMNBNQMUOZTIOCRO54VB6DSQC
use core::mem;
use alloc::borrow::Cow;
use beancount_pretty_printer::PrettyPrinter;
use beancount_tree_writer::Config as TreeWriterConfig;
use beancount_tree_writer::TreeWriter;
use beancount_types::Directive;
use camino::Utf8Path;
use camino::Utf8PathBuf;
use miette::Context as _;
use miette::IntoDiagnostic as _;
use miette::Report as ErrorReport;
use miette::Result; // TODO
use crate::ImporterProtocol;
use crate::ImporterRegistry;
mod archive;
#[derive(Debug)]
pub struct Builder {
archive_directory: Utf8PathBuf,
dry_run: bool,
importers: ImporterRegistry<ErrorReport>,
overwrite: bool,
}
impl Builder {
pub fn archive_directory(&mut self, directory: Utf8PathBuf) -> &mut Self {
self.archive_directory = directory;
self
}
pub fn build(&mut self) -> Runner {
let Self {
archive_directory,
dry_run,
importers,
overwrite,
} = mem::take(self);
let archive_directory = archive_directory.canonicalize_utf8().unwrap();
Runner {
archive_directory,
dry_run,
importers,
overwrite,
}
}
pub fn dry_run(&mut self) -> &mut Self {
self.dry_run = true;
self
}
pub fn overwrite(&mut self) -> &mut Self {
self.overwrite = true;
self
}
pub fn register_importer(
&mut self,
importer: impl ImporterProtocol<Error = ErrorReport> + 'static,
) -> &mut Self {
self.importers.register_importer(importer);
self
}
pub fn with_dry_run(&mut self, dry_run: bool) -> &mut Self {
self.dry_run = dry_run;
self
}
pub fn with_overwrite(&mut self, overwrite: bool) -> &mut Self {
self.overwrite = overwrite;
self
}
}
impl Default for Builder {
fn default() -> Self {
let (dry_run, importers, overwrite) = Default::default();
Self {
archive_directory: Utf8PathBuf::from("archive"),
dry_run,
importers,
overwrite,
}
}
}
#[derive(Debug)]
pub struct Runner {
archive_directory: Utf8PathBuf,
dry_run: bool,
importers: ImporterRegistry<ErrorReport>,
overwrite: bool,
}
impl Runner {
pub fn builder() -> Builder {
Builder::default()
}
}
impl Runner {
#[tracing::instrument]
pub fn run(&self, paths: &[Utf8PathBuf], tree_writer_config: TreeWriterConfig) -> Result<()> {
let directives = self.process_paths(paths)?;
if directives.is_empty() {
tracing::warn!("extracted no directives");
return Ok(());
}
tracing::info!("extracted {} directives", directives.len());
if self.dry_run {
let config = beancount_pretty_printer::Config::derive_from_directives(&directives);
let mut printer = PrettyPrinter::unbuffered(config, std::io::stdout().lock());
printer.print_directives(&directives).into_diagnostic()?;
} else {
TreeWriter::new(tree_writer_config).write_directives(directives)?;
tracing::info!("successfully wrote tree of accounts");
}
Ok(())
}
}
impl Runner {
#[tracing::instrument(fields(importer = importer.name()), skip(self))]
fn archive_file<'a>(
&self,
importer: &(dyn ImporterProtocol<Error = ErrorReport>),
file: &'a Utf8Path,
) -> Result<Cow<'a, Utf8Path>> {
let destination = archive::file_name(importer, &self.archive_directory, file)?;
if self.dry_run || file == destination {
return Ok(Cow::Borrowed(file));
}
tracing::info!(?file, ?destination, "archiving file");
if destination.exists() {
miette::ensure!(
self.overwrite,
"destination {destination:?} already exists",
destination = destination,
);
}
archive::move_file(file, &destination).wrap_err("while moving file into archive")?;
Ok(Cow::Owned(destination))
}
#[tracing::instrument(skip(self))]
fn identify_file(&self, file: &Utf8Path) -> Option<&dyn ImporterProtocol<Error = ErrorReport>> {
self.importers
.iter()
.find(|importer| importer.identify(file).unwrap_or_default())
}
#[tracing::instrument(skip(self, directives))]
fn process_dir(&self, directives: &mut Vec<Directive>, path: &Utf8Path) -> Result<()> {
let entries = path
.read_dir_utf8()
.into_diagnostic()
.wrap_err("could not read directory")?;
for entry in entries {
let entry = entry.into_diagnostic()?;
let path = entry.path();
self.process_path(directives, path)
}
Ok(())
}
#[tracing::instrument(skip(self, directives))]
fn process_file(&self, directives: &mut Vec<Directive>, file: &Utf8Path) -> Result<()> {
let Some(importer) = self.identify_file(file) else {
tracing::warn!(%file, "ignoring file since no importer could identify it");
return Ok(());
};
let file = self
.archive_file(importer, file)
.wrap_err("while archiving file")?;
// let file = file.strip_prefix(&self.archive_directory).unwrap_or(&file);
let mut extracted_directives = importer
.extract(&file, &[]) // TODO load existing transactions
.wrap_err_with(|| format!("error in importer {:?}", importer.name()))?;
directives.append(&mut extracted_directives);
Ok(())
}
#[tracing::instrument(skip(self, directives))]
fn process_path(&self, directives: &mut Vec<Directive>, path: &Utf8Path) {
let (kind, result) = if path.is_dir() {
("directory", self.process_dir(directives, path))
} else {
("file", self.process_file(directives, path))
};
if let Err(error) = result {
tracing::error!(%path, ?error, "error while importing {kind}");
}
}
#[tracing::instrument(skip(self))]
fn process_paths(&self, paths: &[Utf8PathBuf]) -> Result<Vec<Directive>> {
paths.iter().try_fold(Vec::new(), |mut directives, path| {
let path = path.canonicalize_utf8().into_diagnostic()?;
self.process_path(&mut directives, &path);
Ok(directives)
})
}
}
use std::fs;
use camino::Utf8Path;
use camino::Utf8PathBuf;
use miette::IntoDiagnostic;
use miette::Result;
use time::OffsetDateTime;
use time_tz::OffsetDateTimeExt as _;
use crate::ImporterProtocol;
#[tracing::instrument(fields(importer = importer.name()))]
pub(crate) fn file_name(
importer: &dyn ImporterProtocol<Error = miette::Report>,
base: &Utf8Path,
file: &Utf8Path,
) -> Result<Utf8PathBuf> {
use std::fmt::Write;
let account = importer.account(file)?;
let binding = importer.filename(file).transpose()?;
let name = binding
.as_deref()
.unwrap_or_else(|| file.file_name().unwrap());
let date = importer.date(file).unwrap_or_else(|| {
let metadata = file.metadata().into_diagnostic()?;
let ctime = metadata.created().into_diagnostic()?;
let ctime = OffsetDateTime::from(ctime);
let tz = time_tz::system::get_timezone().into_diagnostic()?;
let ctime = ctime.to_timezone(tz);
Ok(ctime.date())
})?;
let sep = std::path::MAIN_SEPARATOR;
// The returned filename cannot contain the file path separator character.
miette::ensure!(
!name.contains(sep),
"filename contains path separator character"
);
/*
TODO "if re.match(r'\d\d\d\d-\d\d-\d\d\.', filename):
raise Error("The name contains what looks like a date.")
*/
// Prepend account directory and date prefix.
let mut path = base.to_string();
let additional = account.len() + 10 + 1 + name.len() + 5 + 2; // account name + date + dot + name + extra separators
path.reserve(additional);
account
.segments()
.for_each(|segment| write!(path, "{sep}{segment}").unwrap());
write!(path, "{sep}{date}.{name}").unwrap();
Ok(Utf8PathBuf::from(path))
}
#[tracing::instrument]
pub(crate) fn move_file(file: &Utf8Path, destination: &Utf8Path) -> Result<()> {
if let Some(path) = destination.parent() {
fs::create_dir_all(path).into_diagnostic()?;
}
fs::rename(file, destination).into_diagnostic()?;
Ok(())
}
extern crate alloc;
use core::hash::Hash;
use alloc::collections::BTreeSet;
use alloc::rc::Rc;
use alloc::sync::Arc;
use beancount_types::Account;
use beancount_types::Directive;
use camino::Utf8Path;
use delegate::delegate;
use time::Date;
pub mod runner;
pub trait ImporterProtocol {
type Error;
fn account(&self, file: &Utf8Path) -> Result<Account, Self::Error>;
fn date(&self, _file: &Utf8Path) -> Option<Result<Date, Self::Error>> {
None
}
fn extract(
&self,
file: &Utf8Path,
existing: &[Directive],
) -> Result<Vec<Directive>, Self::Error>;
fn filename(&self, _file: &Utf8Path) -> Option<Result<String, Self::Error>> {
None
}
fn identify(&self, file: &Utf8Path) -> Result<bool, Self::Error>;
fn name(&self) -> &'static str;
#[doc(hidden)]
fn typetag_deserialize(&self);
}
impl<I> ImporterProtocol for &I
where
I: ImporterProtocol + ?Sized,
{
type Error = I::Error;
delegate! {
to (*self) {
fn account(&self, file: &Utf8Path) -> Result<Account, Self::Error>;
fn date(&self, _file: &Utf8Path) -> Option<Result<Date, Self::Error>>;
fn extract(&self, file: &Utf8Path, existing: &[Directive]) -> Result<Vec<Directive>, Self::Error>;
fn filename(&self, file: &Utf8Path) -> Option<Result<String, Self::Error>>;
fn identify(&self, file: &Utf8Path) -> Result<bool, Self::Error>;
fn name(&self) -> &'static str;
fn typetag_deserialize(&self);
}
}
}
impl<I> ImporterProtocol for Arc<I>
where
I: ImporterProtocol + ?Sized,
{
type Error = I::Error;
delegate! {
to (**self) {
fn account(&self, file: &Utf8Path) -> Result<Account, Self::Error>;
fn date(&self, _file: &Utf8Path) -> Option<Result<Date, Self::Error>>;
fn extract(&self, file: &Utf8Path, existing: &[Directive]) -> Result<Vec<Directive>, Self::Error>;
fn filename(&self, file: &Utf8Path) -> Option<Result<String, Self::Error>>;
fn identify(&self, file: &Utf8Path) -> Result<bool, Self::Error>;
fn name(&self) -> &'static str;
fn typetag_deserialize(&self);
}
}
}
impl<I> ImporterProtocol for Box<I>
where
I: ImporterProtocol + ?Sized,
{
type Error = I::Error;
delegate! {
to (**self) {
fn account(&self, file: &Utf8Path) -> Result<Account, Self::Error>;
fn date(&self, _file: &Utf8Path) -> Option<Result<Date, Self::Error>>;
fn extract(&self, file: &Utf8Path, existing: &[Directive]) -> Result<Vec<Directive>, Self::Error>;
fn filename(&self, file: &Utf8Path) -> Option<Result<String, Self::Error>>;
fn identify(&self, file: &Utf8Path) -> Result<bool, Self::Error>;
fn name(&self) -> &'static str;
fn typetag_deserialize(&self);
}
}
}
impl<I> ImporterProtocol for Rc<I>
where
I: ImporterProtocol + ?Sized,
{
type Error = I::Error;
delegate! {
to (**self) {
fn account(&self, file: &Utf8Path) -> Result<Account, Self::Error>;
fn date(&self, _file: &Utf8Path) -> Option<Result<Date, Self::Error>>;
fn extract(&self, file: &Utf8Path, existing: &[Directive]) -> Result<Vec<Directive>, Self::Error>;
fn filename(&self, file: &Utf8Path) -> Option<Result<String, Self::Error>>;
fn identify(&self, file: &Utf8Path) -> Result<bool, Self::Error>;
fn name(&self) -> &'static str;
fn typetag_deserialize(&self);
}
}
}
#[derive(Debug)]
pub struct ImporterRegistry<E> {
importers: BTreeSet<NamedImporter<E>>,
}
impl<E> Default for ImporterRegistry<E> {
fn default() -> Self {
let importers = Default::default();
Self { importers }
}
}
impl<E> ImporterRegistry<E> {
pub fn register_importer<I>(&mut self, importer: I) -> &mut Self
where
I: ImporterProtocol<Error = E> + 'static,
{
let name = importer.name();
if !self.importers.insert(NamedImporter::new(importer)) {
tracing::warn!(importer = name, "importer has already been registered");
}
self
}
}
impl<E> ImporterRegistry<E> {
pub fn iter(&self) -> impl Iterator<Item = &dyn ImporterProtocol<Error = E>> {
self.importers.iter().map(|importer| &*importer.0)
}
}
struct NamedImporter<E>(Box<dyn ImporterProtocol<Error = E>>);
impl<E> NamedImporter<E> {
fn new<I>(importer: I) -> Self
where
I: ImporterProtocol<Error = E> + 'static,
{
Self(Box::new(importer))
}
}
impl<E> core::fmt::Debug for NamedImporter<E> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_tuple("NamedImporter")
.field(&self.0.name())
.finish()
}
}
impl<E> Eq for NamedImporter<E> {}
impl<E> From<Box<dyn ImporterProtocol<Error = E>>> for NamedImporter<E> {
fn from(value: Box<dyn ImporterProtocol<Error = E>>) -> Self {
Self(value)
}
}
impl<E> Hash for NamedImporter<E> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.0.name().hash(state);
}
}
impl<E> Ord for NamedImporter<E> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.0.name().cmp(other.0.name())
}
}
impl<E> PartialEq for NamedImporter<E> {
fn eq(&self, other: &Self) -> bool {
self.cmp(other).is_eq()
}
}
impl<E> PartialOrd for NamedImporter<E> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
[package]
name = "beancount-importers-framework"
edition.workspace = true
publish.workspace = true
rust-version.workspace = true
version.workspace = true
[dependencies]
# Workspace dependencies
beancount-pretty-printer.path = "../common/beancount-pretty-printer"
beancount-tree-writer.path = "../common/beancount-tree-writer"
beancount-types.path = "../common/beancount-types"
# Inherited dependencies
camino.workspace = true
clap.workspace = true
delegate.workspace = true
inventory.workspace = true
miette.workspace = true
time-tz.workspace = true
time.workspace = true
tracing.workspace = true