add basic parser for german decimals

korrat
Sep 9, 2022, 7:11 PM
I2P2FTLEKLICJKHQ3FHOLRQRQYGZCJTCTU2MWXU2TMIRIKG6YFCQC

Dependencies

Change contents

  • file addition: common (d--r------)
    [2.1]
  • file addition: german-decimal (d--r------)
    [0.18]
  • file addition: tests (d--r------)
    [0.46]
  • file addition: basic.rs (---r------)
    [0.65]
    use rust_decimal_macros::dec;
    #[test]
    fn accepts_integer() {
    assert_eq!(german_decimal::parse("1").unwrap(), dec!(1))
    }
    #[test]
    fn accepts_decimals() {
    let parsed = german_decimal::parse("0,3").unwrap();
    assert_eq!(parsed, dec!(0.3))
    }
    #[test]
    fn accepts_group_seperators() {
    let parsed = german_decimal::parse("2.500").unwrap();
    assert_eq!(parsed, dec!(2500))
    }
    #[test]
    fn accepts_group_seperators_and_decimals() {
    let parsed = german_decimal::parse("2.500,12345").unwrap();
    assert_eq!(parsed, dec!(2500.12345))
    }
    #[test]
    fn accepts_minus_sign() {
    assert_eq!(
    german_decimal::parse("-2.500,12345").unwrap(),
    dec!(-2500.12345)
    );
    }
    #[test]
    fn accepts_plus_sign() {
    assert_eq!(
    german_decimal::parse("+2.500,12345").unwrap(),
    dec!(2500.12345)
    );
    }
    #[test]
    fn rejects_empty_numbers() {
    german_decimal::parse("").unwrap_err();
    }
    #[test]
    fn rejects_leading_separator() {
    german_decimal::parse(".500,12345").unwrap_err();
    }
    #[test]
    fn rejects_only_decimal_separator() {
    german_decimal::parse(",").unwrap_err();
    }
    #[test]
    fn rejects_only_plus_sign() {
    german_decimal::parse("+").unwrap_err();
    }
    #[test]
    fn rejects_only_minus_sign() {
    german_decimal::parse("-").unwrap_err();
    }
    #[test]
    fn rejects_interior_minus_sign() {
    german_decimal::parse("12-345").unwrap_err();
    }
    #[test]
    fn rejects_interior_plus_sign() {
    german_decimal::parse("12+345").unwrap_err();
    }
    #[test]
    fn rejects_letters() {
    german_decimal::parse("a").unwrap_err();
    }
    #[test]
    fn rejects_symbols() {
    german_decimal::parse("_").unwrap_err();
    }
    #[test]
    fn rejects_group_separators_in_decimals() {
    german_decimal::parse("2.500,123.456").unwrap_err();
    }
  • file addition: src (d--r------)
    [0.46]
  • file addition: lib.rs (---r------)
    [0.1856]
    //! This is based on a copy of [`rust_decimal::str`], with adaptations to handle german decimals.
    use rust_decimal::Decimal;
    use rust_decimal::Error;
    // Determines potential overflow for 128 bit operations
    const OVERFLOW_U96: u128 = 1u128 << 96;
    const WILL_OVERFLOW_U64: u64 = u64::MAX / 10 - u8::MAX as u64;
    const BYTES_TO_OVERFLOW_U64: usize = 18; // We can probably get away with less
    #[inline]
    pub fn parse(value: &str) -> Result<Decimal, Error> {
    let bytes = value.as_bytes();
    if bytes.len() < BYTES_TO_OVERFLOW_U64 {
    parse_str_radix_10_dispatch::<false>(bytes)
    } else {
    parse_str_radix_10_dispatch::<true>(bytes)
    }
    }
    #[inline]
    fn parse_str_radix_10_dispatch<const BIG: bool>(bytes: &[u8]) -> Result<Decimal, Error> {
    match bytes {
    [b, rest @ ..] => byte_dispatch_u64::<false, false, false, BIG, true>(rest, 0, 0, *b),
    [] => tail_error("invalid decimal: empty"),
    }
    }
    #[inline]
    fn overflow_64(val: u64) -> bool {
    val >= WILL_OVERFLOW_U64
    }
    #[inline]
    pub fn overflow_128(val: u128) -> bool {
    val >= OVERFLOW_U96
    }
    /// Dispatch the next byte:
    ///
    /// * SAW_DECIMAL_SEPARATOR - a decimal point has been seen
    /// * NEGATIVE - we've encountered a `-` and the number is negative
    /// * SAW_DIGIT - a digit has been encountered (when HAS is false it's invalid)
    /// * BIG - a number that uses 96 bits instead of only 64 bits
    /// * FIRST - true if it is the first byte in the string
    #[inline]
    fn dispatch_next<
    const SAW_DECIMAL_SEPARATOR: bool,
    const NEGATIVE: bool,
    const SAW_DIGIT: bool,
    const BIG: bool,
    >(
    bytes: &[u8],
    data64: u64,
    scale: u8,
    ) -> Result<Decimal, Error> {
    if let Some((next, bytes)) = bytes.split_first() {
    byte_dispatch_u64::<SAW_DECIMAL_SEPARATOR, NEGATIVE, SAW_DIGIT, BIG, false>(
    bytes, data64, scale, *next,
    )
    } else {
    handle_data::<NEGATIVE, SAW_DIGIT>(data64 as u128, scale)
    }
    }
    #[inline(never)]
    fn non_digit_dispatch_u64<
    const SAW_DECIMAL_SEPARATOR: bool,
    const NEG: bool,
    const NON_EMPTY: bool,
    const BIG: bool,
    const FIRST: bool,
    >(
    bytes: &[u8],
    data64: u64,
    scale: u8,
    b: u8,
    ) -> Result<Decimal, Error> {
    match b {
    b'-' if FIRST && !NON_EMPTY => {
    dispatch_next::<false, true, false, BIG>(bytes, data64, scale)
    }
    b'+' if FIRST && !NON_EMPTY => {
    dispatch_next::<false, false, false, BIG>(bytes, data64, scale)
    }
    b'.' if !SAW_DECIMAL_SEPARATOR && NON_EMPTY => {
    handle_separator::<SAW_DECIMAL_SEPARATOR, NEG, BIG>(bytes, data64, scale)
    }
    b => tail_invalid_digit(b),
    }
    }
    #[inline]
    fn byte_dispatch_u64<
    const SAW_DECIMAL_SEPARATOR: bool,
    const NEGATIVE: bool,
    const NON_EMPTY: bool,
    const BIG: bool,
    const FIRST: bool,
    >(
    bytes: &[u8],
    data64: u64,
    scale: u8,
    b: u8,
    ) -> Result<Decimal, Error> {
    match b {
    b'0'..=b'9' => {
    handle_digit_64::<SAW_DECIMAL_SEPARATOR, NEGATIVE, BIG>(bytes, data64, scale, b - b'0')
    }
    b',' if !SAW_DECIMAL_SEPARATOR => {
    handle_point::<NEGATIVE, NON_EMPTY, BIG>(bytes, data64, scale)
    }
    b => non_digit_dispatch_u64::<SAW_DECIMAL_SEPARATOR, NEGATIVE, NON_EMPTY, BIG, FIRST>(
    bytes, data64, scale, b,
    ),
    }
    }
    #[inline(never)]
    fn handle_digit_64<const SAW_DECIMAL_SEPARATOR: bool, const NEGATIVE: bool, const BIG: bool>(
    bytes: &[u8],
    data64: u64,
    scale: u8,
    digit: u8,
    ) -> Result<Decimal, Error> {
    // we have already validated that we cannot overflow
    let data64 = data64 * 10 + digit as u64;
    let scale = if SAW_DECIMAL_SEPARATOR { scale + 1 } else { 0 };
    if let Some((next, bytes)) = bytes.split_first() {
    let next = *next;
    if SAW_DECIMAL_SEPARATOR && BIG && scale >= 28 {
    Err(Error::Underflow)
    } else if BIG && overflow_64(data64) {
    handle_full_128::<SAW_DECIMAL_SEPARATOR, NEGATIVE>(data64 as u128, bytes, scale, next)
    } else {
    byte_dispatch_u64::<SAW_DECIMAL_SEPARATOR, NEGATIVE, true, BIG, false>(
    bytes, data64, scale, next,
    )
    }
    } else {
    let data: u128 = data64 as u128;
    handle_data::<NEGATIVE, true>(data, scale)
    }
    }
    #[inline(never)]
    fn handle_point<const NEG: bool, const NON_EMPTY: bool, const BIG: bool>(
    bytes: &[u8],
    data64: u64,
    scale: u8,
    ) -> Result<Decimal, Error> {
    dispatch_next::<true, NEG, NON_EMPTY, BIG>(bytes, data64, scale)
    }
    #[inline(never)]
    fn handle_separator<const SAW_DECIMAL_SEPARATOR: bool, const NEG: bool, const BIG: bool>(
    bytes: &[u8],
    data64: u64,
    scale: u8,
    ) -> Result<Decimal, Error> {
    dispatch_next::<SAW_DECIMAL_SEPARATOR, NEG, true, BIG>(bytes, data64, scale)
    }
    #[cold]
    fn tail_error(from: &'static str) -> Result<Decimal, Error> {
    Err(from.into())
    }
    #[inline(never)]
    #[cold]
    fn tail_invalid_digit(digit: u8) -> Result<Decimal, Error> {
    match digit {
    b',' => tail_error("invalid decimal: two decimal points"),
    // b'_' => tail_error("Invalid decimal: must start lead with a number"),
    _ => tail_error("invalid decimal: unknown character"),
    }
    }
    #[inline(never)]
    #[cold]
    fn handle_full_128<const SAW_DECIMAL_SEPARATOR: bool, const NEG: bool>(
    mut data: u128,
    bytes: &[u8],
    scale: u8,
    next_byte: u8,
    ) -> Result<Decimal, Error> {
    let b = next_byte;
    match b {
    b'0'..=b'9' => {
    let digit = u32::from(b - b'0');
    // If the data is going to overflow then we should go into recovery mode
    let next = (data * 10) + digit as u128;
    if overflow_128(next) {
    if !SAW_DECIMAL_SEPARATOR {
    tail_error("invalid decimal: overflow from too many digits")
    } else {
    Err(Error::Underflow)
    }
    } else {
    data = next;
    let scale = scale + SAW_DECIMAL_SEPARATOR as u8;
    if let Some((next, bytes)) = bytes.split_first() {
    let next = *next;
    if SAW_DECIMAL_SEPARATOR && scale >= 28 {
    Err(Error::Underflow)
    } else {
    handle_full_128::<SAW_DECIMAL_SEPARATOR, NEG>(data, bytes, scale, next)
    }
    } else {
    handle_data::<NEG, true>(data, scale)
    }
    }
    }
    b',' if !SAW_DECIMAL_SEPARATOR => {
    // This call won't tail?
    if let Some((next, bytes)) = bytes.split_first() {
    handle_full_128::<true, NEG>(data, bytes, scale, *next)
    } else {
    handle_data::<NEG, true>(data, scale)
    }
    }
    b'.' => {
    if let Some((next, bytes)) = bytes.split_first() {
    handle_full_128::<SAW_DECIMAL_SEPARATOR, NEG>(data, bytes, scale, *next)
    } else {
    handle_data::<NEG, true>(data, scale)
    }
    }
    b => tail_invalid_digit(b),
    }
    }
    #[inline(never)]
    fn tail_empty() -> Result<Decimal, Error> {
    tail_error("invalid decimal: no digits found")
    }
    #[inline]
    fn handle_data<const NEG: bool, const HAS: bool>(data: u128, scale: u8) -> Result<Decimal, Error> {
    debug_assert_eq!(data >> 96, 0);
    if !HAS {
    tail_empty()
    } else {
    Ok(Decimal::from_parts(
    data as u32,
    (data >> 32) as u32,
    (data >> 64) as u32,
    NEG,
    scale as u32,
    ))
    }
    }
  • file addition: Cargo.toml (---r------)
    [0.46]
    [package]
    name = "german-decimal"
    version = "0.0.0-dev.0"
    edition = "2021"
    [dependencies]
    rust_decimal = "1.26.1"
    [dev-dependencies]
    rust_decimal_macros = "1.26.1"
  • file addition: Cargo.toml (---r------)
    [2.1]
    [workspace]
    members = ["common/german-decimal"]
  • file addition: Cargo.lock (---r------)
    [2.1]
    # This file is automatically @generated by Cargo.
    # It is not intended for manual editing.
    version = 3
    [[package]]
    name = "arrayvec"
    version = "0.7.2"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
    [[package]]
    name = "autocfg"
    version = "1.1.0"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
    [[package]]
    name = "german-decimal"
    version = "0.0.0-dev.0"
    dependencies = [
    "rust_decimal",
    "rust_decimal_macros",
    ]
    [[package]]
    name = "num-traits"
    version = "0.2.15"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
    dependencies = [
    "autocfg",
    ]
    [[package]]
    name = "proc-macro2"
    version = "1.0.43"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab"
    dependencies = [
    "unicode-ident",
    ]
    [[package]]
    name = "quote"
    version = "1.0.21"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
    dependencies = [
    "proc-macro2",
    ]
    [[package]]
    name = "rust_decimal"
    version = "1.26.1"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "ee9164faf726e4f3ece4978b25ca877ddc6802fa77f38cdccb32c7f805ecd70c"
    dependencies = [
    "arrayvec",
    "num-traits",
    "serde",
    ]
    [[package]]
    name = "rust_decimal_macros"
    version = "1.26.1"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "4903d8db81d2321699ca8318035d6ff805c548868df435813968795a802171b2"
    dependencies = [
    "quote",
    "rust_decimal",
    ]
    [[package]]
    name = "serde"
    version = "1.0.144"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "0f747710de3dcd43b88c9168773254e809d8ddbdf9653b84e2554ab219f17860"
    [[package]]
    name = "unicode-ident"
    version = "1.0.3"
    source = "registry+https://github.com/rust-lang/crates.io-index"
    checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf"
  • file addition: .ignore (---r------)
    [2.1]
    # Created by https://www.toptal.com/developers/gitignore/api/rust,visualstudiocode,windows,linux
    # Edit at https://www.toptal.com/developers/gitignore?templates=rust,visualstudiocode,windows,linux
    ### Linux ###
    *~
    # temporary files which can be created if a process still has a handle open of a deleted file
    .fuse_hidden*
    # KDE directory preferences
    .directory
    # Linux trash folder which might appear on any partition or disk
    .Trash-*
    # .nfs files are created when an open file is removed but is still being accessed
    .nfs*
    ### Rust ###
    # Generated by Cargo
    # will have compiled files and executables
    debug/
    target/
    # These are backup files generated by rustfmt
    **/*.rs.bk
    # MSVC Windows builds of rustc generate these, which store debugging information
    *.pdb
    ### VisualStudioCode ###
    .vscode/*
    !.vscode/settings.json
    !.vscode/tasks.json
    !.vscode/launch.json
    !.vscode/extensions.json
    !.vscode/*.code-snippets
    # Local History for Visual Studio Code
    .history/
    # Built Visual Studio Code Extensions
    *.vsix
    ### VisualStudioCode Patch ###
    # Ignore all local history of files
    .history
    .ionide
    # Support for Project snippet scope
    .vscode/*.code-snippets
    # Ignore code-workspaces
    *.code-workspace
    ### Windows ###
    # Windows thumbnail cache files
    Thumbs.db
    Thumbs.db:encryptable
    ehthumbs.db
    ehthumbs_vista.db
    # Dump file
    *.stackdump
    # Folder config file
    [Dd]esktop.ini
    # Recycle Bin used on file shares
    $RECYCLE.BIN/
    # Windows Installer files
    *.cab
    *.msi
    *.msix
    *.msm
    *.msp
    # Windows shortcuts
    *.lnk
    # End of https://www.toptal.com/developers/gitignore/api/rust,visualstudiocode,windows,linux