Port validation code from PR

finchie
Oct 8, 2023, 3:53 AM
23NVAJMBPRFDINHF3UOWCZOF3GCTCRPHYDHXKPCIPEWQI5ZAORUQC

Dependencies

Change contents

  • file addition: src (d--r------)
    [2.1]
  • file addition: lib.rs (----------)
    [0.15]
    pub mod email;
  • file addition: email.rs (----------)
    [0.15]
    // Heavily modified version of https://github.com/Keats/validator/blob/master/validator/src/validation/email.rs
    extern crate alloc;
    use alloc::borrow::Cow;
    use idna::domain_to_ascii;
    use lazy_static::lazy_static;
    use regex::Regex;
    use std::{net::IpAddr, str::FromStr};
    // Valid characters, excluding ASCII a-z, A-Z and 0-9
    const VALID_USER_CHARACTERS: &str = r#".!#$%&'*+/=?^_`{|}~-"#;
    const VALID_DOMAIN_CHARACTERS: &str = r#".-"#;
    lazy_static! {
    // Regex from the specs
    // https://html.spec.whatwg.org/multipage/forms.html#valid-e-mail-address
    // It will mark esoteric email addresses like quoted string as invalid
    static ref EMAIL_USER_RE: Regex = Regex::new(r"^(?i)[a-z0-9.!#$%&'*+/=?^_`{|}~-]+\z").unwrap();
    static ref EMAIL_DOMAIN_RE: Regex = Regex::new(
    r"(?i)^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?(?:\.[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)*$"
    ).unwrap();
    // literal form, ipv4 or ipv6 address (SMTP 4.1.3)
    static ref EMAIL_LITERAL_RE: Regex = Regex::new(r"(?i)\[([A-f0-9:\.]+)\]\z").unwrap();
    }
    /// Represents the error states of an email address
    ///
    /// This enum is always constructed along with an accompanying span that indicates where the
    /// error occurred. For more details, see the [`ErrorSpan`] struct.
    #[derive(Clone, Copy, Debug, PartialEq, Eq)]
    #[non_exhaustive]
    pub enum Error {
    /// The length of the domain is greater than the maximum of 255 characters
    ///
    /// Error spans: domain
    DomainTooLong,
    /// An unknown error in the domain that was caught by the [regex from the whatwg spec](https://html.spec.whatwg.org/multipage/forms.html#valid-e-mail-address)
    ///
    /// Error spans: domain
    DomainRegexFailed,
    /// The provided domain does not contain any characters
    ///
    /// Errors spans: email
    EmptyDomain,
    /// The provided email does not contain any charaacters
    ///
    /// Error spans: email
    EmptyEmail,
    /// An element of the domain does not contain any characters
    ///
    /// Error spans: domain
    EmptySubdomain,
    /// The provided user does not contain any characters
    ///
    /// Error spans: email
    EmptyUser,
    /// The provided domain contains invalid characters
    ///
    /// Error spans: invalid character
    InvalidDomainCharacter,
    /// The provided user contains invalid characters
    ///
    /// Error spans: invalid character
    InvalidUserCharacter,
    /// The provided IP is invalid
    ///
    /// Error spans: IP address
    InvalidIP,
    /// An element of the domain begins with a hyphen (`-`)
    ///
    /// Error spans: leading hyphen character
    LeadingHyphen,
    /// An element of the domain begins with a period (`.`)
    ///
    /// Error spans: leading period character
    LeadingPeriod,
    /// The provided email does not contain the required `@` character
    ///
    /// Error spans: email
    MissingAtCharacter,
    /// The length of an element in the domain is greater than the maximum of 63 characters
    ///
    /// Error spans: invalid subdomain
    SubdomainTooLong,
    /// An element of the domain ends with a hyphen (`-`)
    ///
    /// Error spans: trailing hyphen character
    TrailingHyphen,
    /// An element of the domain ends with a period (`.`)
    ///
    /// Error spans: trailing period character
    TrailingPeriod,
    /// The length of the user was greater than the maximum of 64 characters
    ///
    /// Error spans: invalid user
    UserTooLong,
    /// An unknown error in the user that was caught by the [regex from the whatwg spec](https://html.spec.whatwg.org/multipage/forms.html#valid-e-mail-address)
    ///
    /// Error spans: user
    UserRegexFailed,
    /// The provided domain was unable to be converted into an [IDN](https://en.wikipedia.org/wiki/Internationalized_domain_name)
    ///
    /// Error spans: domain
    Uts46,
    }
    /// Upon encountering an error, [`validate_email`] will return this struct containing the first error reached.
    /// The field `error_type` indicates which rule has not been met, and the `span` field provides a simple character range
    /// indicating which portion of the email is at fault. This may be a single character, or an entire string. Each enum variant
    /// documents the range it spans.
    ///
    /// Example of a single-character span:
    /// ```
    /// # use span_validator::email::{Error, ErrorSpan, validate_email};
    /// // invalid"@example.com
    /// // ^
    ///
    /// assert_eq!(
    /// validate_email(r#"invalid"@example.com"#),
    /// Err(ErrorSpan { error_type: Error::InvalidUserCharacter, span: 7..7 })
    /// );
    /// ```
    /// Example of a multi-character span:
    /// ```
    /// // invalid@example..com
    /// // ^^^^^^^^^^^^
    /// # use span_validator::email::{Error, ErrorSpan, validate_email};
    /// assert_eq!(
    /// validate_email("invalid@example..com"),
    /// Err(ErrorSpan { error_type: Error::EmptySubdomain, span: 15..16 })
    /// );
    /// ```
    ///
    /// Example of an address-long span:
    /// ```
    /// # use span_validator::email::{Error, ErrorSpan, validate_email};
    /// // no-user.com
    /// // ^^^^^^^^^^^
    /// assert_eq!(
    /// validate_email("no-user.com"),
    /// Err(ErrorSpan { error_type: Error::MissingAtCharacter, span: 0..11 })
    /// );
    /// ```
    #[derive(Clone, Debug, PartialEq, Eq)]
    pub struct ErrorSpan {
    pub error_type: Error,
    pub span: core::ops::Range<usize>,
    }
    pub fn validate_email<'email, T>(value: T) -> Result<(), ErrorSpan>
    where
    T: Into<Cow<'email, str>>,
    {
    let email = value.into();
    // Email must not be empty
    if email.is_empty() {
    return Err(ErrorSpan {
    error_type: Error::EmptyEmail,
    span: 0..0,
    });
    }
    // Email must contain at least one '@' character
    if !email.contains('@') {
    // example.com
    // ^^^^^^^^^^^
    return Err(ErrorSpan {
    error_type: Error::MissingAtCharacter,
    span: 0..email.len(),
    });
    }
    // User: everything up until the last `@` character
    let parts: Vec<&str> = email.rsplitn(2, '@').collect();
    let user = parts[1];
    // Domain: everything after the last `@` character
    // Convert the domain to an [IDN](https://en.wikipedia.org/wiki/Internationalized_domain_name)
    let domain = match domain_to_ascii(parts[0]) {
    Ok(domain) => domain,
    Err(_domain_errors) => {
    return Err(ErrorSpan {
    error_type: Error::Uts46,
    span: 0..email.len(),
    })
    }
    };
    // Validate the email domain
    validate_domain(&domain, user.len(), email.len())?;
    // Validate the email user
    validate_user(parts[1], email.len())?;
    // No errors found during validation process
    Ok(())
    }
    fn validate_user(user: &str, email_end: usize) -> Result<(), ErrorSpan> {
    // Ensure validity of all characters in user
    for (index, character) in user.chars().enumerate() {
    if !character.is_alphanumeric() && !VALID_USER_CHARACTERS.contains(character) {
    // user"@example.com
    // ^
    return Err(ErrorSpan {
    error_type: Error::InvalidUserCharacter,
    span: index..index,
    });
    }
    }
    // validate the length of each part of the email, BEFORE doing the regex
    // according to RFC5321 the max length of the subdomain is 64 characters
    // and the max length of the domain part is 255 characters
    // https://datatracker.ietf.org/doc/html/rfc5321#section-4.5.3.1.1
    if user.is_empty() {
    // @example.com
    // ^^^^^^^^^^^^
    return Err(ErrorSpan {
    error_type: Error::EmptyUser,
    span: 0..email_end,
    });
    }
    // User has a maximum length of 64
    if user.len() > 64 {
    // pretend_this_is_too_long@example.com
    // ^^^^^^^^^^^^^^^^^^^^^^^^
    return Err(ErrorSpan {
    error_type: Error::UserTooLong,
    span: 0..user.len(),
    });
    }
    // Final check, match user against regex from the spec
    if !EMAIL_USER_RE.is_match(user) {
    // exotic_case@example.com
    // ^^^^^^^^^^^
    return Err(ErrorSpan {
    error_type: Error::UserRegexFailed,
    span: 0..user.len(),
    });
    }
    Ok(())
    }
    /// Validate the email domain.
    ///
    /// A domain can either be an
    fn validate_domain(domain: &str, user_end: usize, email_end: usize) -> Result<(), ErrorSpan> {
    // Email domains can be an IP address surrounded by square brackets (`[]`)
    let is_literal_address = match (domain.chars().next(), domain.chars().last()) {
    (Some('['), Some(']')) => true,
    _ => false,
    };
    // Ensure validity of all characters in domain
    if is_literal_address {
    let potential_ip = domain.get(1..domain.len() - 1).unwrap();
    let ip_range_start = user_end + 2;
    if IpAddr::from_str(potential_ip).is_err() {
    // user@[127.0.0.256]
    // ^^^^^^^^^^^
    return Err(ErrorSpan {
    error_type: Error::InvalidIP,
    span: ip_range_start..ip_range_start + potential_ip.len() - 1,
    });
    }
    } else {
    for (index, character) in domain.chars().enumerate() {
    if !character.is_alphanumeric() && !VALID_DOMAIN_CHARACTERS.contains(character) {
    let location = index + user_end + 1;
    // user@*example.com
    // ^
    return Err(ErrorSpan {
    error_type: Error::InvalidDomainCharacter,
    span: location..location,
    });
    }
    }
    }
    // Domain cannot be empty
    if domain.is_empty() {
    // user@
    // ^^^^^
    return Err(ErrorSpan {
    error_type: Error::EmptyDomain,
    span: 0..email_end,
    });
    }
    // Domain has a maximum length of 255
    if domain.len() > 255 {
    // user@pretend_this_is_too_long.com
    // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    return Err(ErrorSpan {
    error_type: Error::DomainTooLong,
    span: (user_end + 1)..email_end,
    });
    }
    // Domain cannot begin with a leading period (`.`)
    if domain.starts_with('.') {
    let column = user_end + 1;
    // user@.example.com
    // ^
    return Err(ErrorSpan {
    error_type: Error::LeadingPeriod,
    span: column..column,
    });
    }
    // Domain cannot end with a trailing period (`.`)
    if domain.ends_with('.') {
    let column = email_end - 1;
    // user@example.com.
    // ^
    return Err(ErrorSpan {
    error_type: Error::TrailingPeriod,
    span: column..column,
    });
    }
    let mut span_start = user_end + 1;
    for subdomain in domain.split('.') {
    // Subdomain has a maximum length of 63
    if subdomain.len() > 63 {
    // user@valid.valid.pretend_this_is_too_long.com
    // ^^^^^^^^^^^^^^^^^^^^^^^^
    return Err(ErrorSpan {
    error_type: Error::SubdomainTooLong,
    span: span_start..span_start + subdomain.len(),
    });
    }
    // Subdomain cannot be empty
    if subdomain.is_empty() {
    // user@example..com
    // ^^
    return Err(ErrorSpan {
    error_type: Error::EmptySubdomain,
    span: span_start - 1..span_start,
    });
    }
    // Subdomain cannot have a leading hyphen (`-`)
    if subdomain.starts_with('-') {
    // user@-example.com
    // ^
    return Err(ErrorSpan {
    error_type: Error::LeadingHyphen,
    span: span_start..span_start,
    });
    }
    // Subdomain cannot have a trailing hypen (`-`)
    if subdomain.ends_with('-') {
    // user@example.com-
    // ^
    return Err(ErrorSpan {
    error_type: Error::TrailingHyphen,
    span: span_start + subdomain.len()..span_start + subdomain.len(),
    });
    }
    // Account for extra '.' character between subdomains
    span_start += subdomain.len() + 1;
    }
    // Final check, match domain against regex from the spec
    if !validate_domain_part(domain) {
    // user@exotic_case.com
    // ^^^^^^^^^^^^^^^
    return Err(ErrorSpan {
    error_type: Error::DomainRegexFailed,
    span: user_end + 1..email_end,
    });
    }
    Ok(())
    }
    /// Checks if the domain is a valid domain and if not, check whether it's an IP
    #[must_use]
    fn validate_domain_part(domain: &str) -> bool {
    if EMAIL_DOMAIN_RE.is_match(domain) {
    return true;
    }
    // maybe we have an ip as a domain?
    match EMAIL_LITERAL_RE.captures(domain) {
    Some(caps) => match caps.get(1) {
    Some(c) => return IpAddr::from_str(c.as_str()).is_ok(),
    None => false,
    },
    None => false,
    }
    }
    #[cfg(test)]
    mod tests {
    use super::{validate_email, Error, ErrorSpan};
    #[test]
    fn test_validate_email() {
    // Test cases taken from Django
    // https://github.com/django/django/blob/master/tests/validators/tests.py#L48
    let tests =
    vec![
    ("email@here.com", Ok(())),
    ("weirder-email@here.and.there.com", Ok(())),
    (r#"!def!xyz%abc@example.com"#, Ok(())),
    ("email@[127.0.0.1]", Ok(())),
    ("email@[2001:dB8::1]", Ok(())),
    ("email@[2001:dB8:0:0:0:0:0:1]", Ok(())),
    ("email@[::fffF:127.0.0.1]", Ok(())),
    ("example@valid-----hyphens.com", Ok(())),
    ("example@valid-with-hyphens.com", Ok(())),
    ("test@domain.with.idn.tld.उदाहरण.परीक्षा", Ok(())),
    (
    r#""test@test"@example.com"#,
    Err(ErrorSpan {
    error_type: Error::InvalidUserCharacter,
    span: 0..0,
    }),
    ),
    // max length for domain name labels is 63 characters per RFC 1034
    ("a@atm.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", Ok(())),
    ("a@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.atm", Ok(())),
    (
    "a@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bbbbbbbbbb.atm",
    Ok(()),
    ),
    // 64 * a
    (
    "a@atm.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
    Err(ErrorSpan { error_type: Error::SubdomainTooLong, span: 6..70 }),
    ),
    ("", Err(ErrorSpan { error_type: Error::EmptyEmail, span: 0..0 })),
    (
    "abc",
    Err(ErrorSpan {
    error_type: Error::MissingAtCharacter,
    span: 0..3,
    }),
    ),
    ("abc@", Err(ErrorSpan { error_type: Error::EmptyDomain, span: 0..4 })),
    ("@abc", Err(ErrorSpan { error_type: Error::EmptyUser, span: 0..4 })),
    ("abc@bar", Ok(())),
    (
    "a @x.cz",
    Err(ErrorSpan {
    error_type: Error::InvalidUserCharacter,
    span: 1..1,
    }),
    ),
    (
    "abc@.com",
    Err(ErrorSpan { error_type: Error::LeadingPeriod, span: 4..4 }),
    ),
    (
    "something@@somewhere.com",
    Err(ErrorSpan {
    error_type: Error::InvalidUserCharacter,
    span: 9..9,
    }),
    ),
    // ("email@127.0.0.1", Ok(())),
    (
    "email@[127.0.0.256]",
    Err(ErrorSpan { error_type: Error::InvalidIP, span: 7..17 }),
    ),
    (
    "email@[2001:db8::12345]",
    Err(ErrorSpan { error_type: Error::InvalidIP, span: 7..21 }),
    ),
    (
    "email@[2001:db8:0:0:0:0:1]",
    Err(ErrorSpan { error_type: Error::InvalidIP, span: 7..24 }),
    ),
    (
    "email@[::ffff:127.0.0.256]",
    Err(ErrorSpan { error_type: Error::InvalidIP, span: 7..24 }),
    ),
    (
    "example@invalid-.com",
    Err(ErrorSpan { error_type: Error::TrailingHyphen, span: 16..16 }),
    ),
    (
    "example@-invalid.com",
    Err(ErrorSpan { error_type: Error::LeadingHyphen, span: 8..8 }),
    ),
    (
    "example@invalid.com-",
    Err(ErrorSpan { error_type: Error::TrailingHyphen, span: 20..20 }),
    ),
    (
    "example@inv-.alid-.com",
    Err(ErrorSpan { error_type: Error::TrailingHyphen, span: 12..12 }),
    ),
    (
    "example@inv-.-alid.com",
    Err(ErrorSpan { error_type: Error::TrailingHyphen, span: 12..12 }),
    ),
    (
    r#"test@example.com\n\n<script src="x.js">"#,
    Err(ErrorSpan {
    error_type: Error::InvalidDomainCharacter,
    span: 16..16,
    }),
    ),
    (
    r#""\\\011"@here.com"#,
    Err(ErrorSpan {
    error_type: Error::InvalidUserCharacter,
    span: 0..0,
    }),
    ),
    (
    r#""\\\012"@here.com"#,
    Err(ErrorSpan {
    error_type: Error::InvalidUserCharacter,
    span: 0..0,
    }),
    ),
    (
    "trailingdot@shouldfail.com.",
    Err(ErrorSpan { error_type: Error::TrailingPeriod, span: 26..26 }),
    ),
    // Trailing newlines in username or domain not allowed
    (
    "a@b.com\n",
    Err(ErrorSpan {
    error_type: Error::InvalidDomainCharacter,
    span: 7..7,
    }),
    ),
    (
    "a\n@b.com",
    Err(ErrorSpan {
    error_type: Error::InvalidUserCharacter,
    span: 1..1,
    }),
    ),
    (
    r#""test@test"\n@example.com"#,
    Err(ErrorSpan {
    error_type: Error::InvalidUserCharacter,
    span: 0..0,
    }),
    ),
    (
    "a@[127.0.0.1]\n",
    Err(ErrorSpan {
    error_type: Error::InvalidDomainCharacter,
    span: 2..2,
    }),
    ),
    // underscores are not allowed
    (
    "John.Doe@exam_ple.com",
    Err(ErrorSpan {
    error_type: Error::InvalidDomainCharacter,
    span: 13..13,
    }),
    ),
    (
    "invalid@example..com",
    Err(ErrorSpan { error_type: Error::EmptySubdomain, span: 15..16 }),
    ),
    (
    "invalid@.example.com",
    Err(ErrorSpan { error_type: Error::LeadingPeriod, span: 8..8 }),
    ),
    ];
    for (input, expected) in tests {
    // println!("{} - {}", input, expected);
    assert_eq!(
    validate_email(input),
    expected,
    "Email `{input}` was not classified correctly",
    );
    }
    }
    }
  • file addition: Cargo.toml (----------)
    [2.1]
    [package]
    name = "span-validator"
    version = "0.1.0"
    edition = "2021"
    # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
    [dependencies]
    idna = "0.4.0"
    lazy_static = "1.4.0"
    regex = "1.9.6"
  • file addition: .ignore (----------)
    [2.1]
    .git
    .DS_Store
    # Added by cargo
    /target
    /Cargo.lock