user-friendly validation for common types of input!
// Heavily modified version of https://github.com/Keats/validator/blob/master/validator/src/validation/email.rs

extern crate alloc;

use alloc::borrow::Cow;
use idna::domain_to_ascii;
use lazy_static::lazy_static;
use regex::Regex;
use std::{net::IpAddr, str::FromStr};

// Valid characters, excluding ASCII a-z, A-Z and 0-9
const VALID_USER_CHARACTERS: &str = r#".!#$%&'*+/=?^_`{|}~-"#;
const VALID_DOMAIN_CHARACTERS: &str = r#".-"#;

lazy_static! {
    // Regex from the specs
    // https://html.spec.whatwg.org/multipage/forms.html#valid-e-mail-address
    // It will mark esoteric email addresses like quoted string as invalid
    static ref EMAIL_USER_RE: Regex = Regex::new(r"^(?i)[a-z0-9.!#$%&'*+/=?^_`{|}~-]+\z").unwrap();
    static ref EMAIL_DOMAIN_RE: Regex = Regex::new(
        r"(?i)^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?(?:\.[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)*$"
    ).unwrap();
    // literal form, ipv4 or ipv6 address (SMTP 4.1.3)
    static ref EMAIL_LITERAL_RE: Regex = Regex::new(r"(?i)\[([A-f0-9:\.]+)\]\z").unwrap();
}

/// Represents the error states of an email address
///
/// This enum is always constructed along with an accompanying span that indicates where the
/// error occurred. For more details, see the [`ErrorSpan`] struct.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum Error {
    /// The length of the domain is greater than the maximum of 255 characters
    ///
    /// Error spans: domain
    DomainTooLong,
    /// An unknown error in the domain that was caught by the [regex from the whatwg spec](https://html.spec.whatwg.org/multipage/forms.html#valid-e-mail-address)
    ///
    /// Error spans: domain
    DomainRegexFailed,
    /// The provided domain does not contain any characters
    ///
    /// Errors spans: email
    EmptyDomain,
    /// The provided email does not contain any charaacters
    ///
    /// Error spans: email
    EmptyEmail,
    /// An element of the domain does not contain any characters
    ///
    /// Error spans: domain
    EmptySubdomain,
    /// The provided user does not contain any characters
    ///
    /// Error spans: email
    EmptyUser,
    /// The provided domain contains invalid characters
    ///
    /// Error spans: invalid character
    InvalidDomainCharacter,
    /// The provided user contains invalid characters
    ///
    /// Error spans: invalid character
    InvalidUserCharacter,
    /// The provided IP is invalid
    ///
    /// Error spans: IP address
    InvalidIP,
    /// An element of the domain begins with a hyphen (`-`)
    ///
    /// Error spans: leading hyphen character
    LeadingHyphen,
    /// An element of the domain begins with a period (`.`)
    ///
    /// Error spans: leading period character
    LeadingPeriod,
    /// The provided email does not contain the required `@` character
    ///
    /// Error spans: email
    MissingAtCharacter,
    /// The length of an element in the domain is greater than the maximum of 63 characters
    ///
    /// Error spans: invalid subdomain
    SubdomainTooLong,
    /// An element of the domain ends with a hyphen (`-`)
    ///
    /// Error spans: trailing hyphen character
    TrailingHyphen,

    /// An element of the domain ends with a period (`.`)
    ///
    /// Error spans: trailing period character
    TrailingPeriod,
    /// The length of the user was greater than the maximum of 64 characters
    ///
    /// Error spans: invalid user
    UserTooLong,
    /// An unknown error in the user that was caught by the [regex from the whatwg spec](https://html.spec.whatwg.org/multipage/forms.html#valid-e-mail-address)
    ///
    /// Error spans: user
    UserRegexFailed,
    /// The provided domain was unable to be converted into an [IDN](https://en.wikipedia.org/wiki/Internationalized_domain_name)
    ///
    /// Error spans: domain
    Uts46,
}

/// Upon encountering an error, [`validate_email`] will return this struct containing the first error reached.
/// The field `error_type` indicates which rule has not been met, and the `span` field provides a simple character range
/// indicating which portion of the email is at fault. This may be a single character, or an entire string. Each enum variant
/// documents the range it spans.
///
/// Example of a single-character span:
/// ```
/// # use span_validator::email::{Error, ErrorSpan, validate_email};
/// // invalid"@example.com
/// //        ^
///
/// assert_eq!(
///     validate_email(r#"invalid"@example.com"#),
///     Err(ErrorSpan { error_type: Error::InvalidUserCharacter, span: 7..7 })
/// );
/// ```
/// Example of a multi-character span:
/// ```
/// // invalid@example..com
/// //         ^^^^^^^^^^^^
/// # use span_validator::email::{Error, ErrorSpan, validate_email};
/// assert_eq!(
///     validate_email("invalid@example..com"),
///     Err(ErrorSpan { error_type: Error::EmptySubdomain, span: 15..16 })
/// );
/// ```
///
/// Example of an address-long span:
/// ```
/// # use span_validator::email::{Error, ErrorSpan, validate_email};
/// // no-user.com
/// // ^^^^^^^^^^^
/// assert_eq!(
///     validate_email("no-user.com"),
///     Err(ErrorSpan { error_type: Error::MissingAtCharacter, span: 0..11 })
/// );
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ErrorSpan {
    pub error_type: Error,
    pub span: core::ops::Range<usize>,
}

pub fn validate_email<'email, T>(value: T) -> Result<(), ErrorSpan>
where
    T: Into<Cow<'email, str>>,
{
    let email = value.into();

    // Email must not be empty
    if email.is_empty() {
        return Err(ErrorSpan {
            error_type: Error::EmptyEmail,
            span: 0..0,
        });
    }

    // Email must contain at least one '@' character
    if !email.contains('@') {
        // example.com
        // ^^^^^^^^^^^
        return Err(ErrorSpan {
            error_type: Error::MissingAtCharacter,
            span: 0..email.len(),
        });
    }

    // User: everything up until the last `@` character
    let parts: Vec<&str> = email.rsplitn(2, '@').collect();
    let user = parts[1];

    // Domain: everything after the last `@` character
    // Convert the domain to an [IDN](https://en.wikipedia.org/wiki/Internationalized_domain_name)
    let domain = match domain_to_ascii(parts[0]) {
        Ok(domain) => domain,
        Err(_domain_errors) => {
            return Err(ErrorSpan {
                error_type: Error::Uts46,
                span: 0..email.len(),
            })
        }
    };

    // Validate the email domain
    validate_domain(&domain, user.len(), email.len())?;

    // Validate the email user
    validate_user(parts[1], email.len())?;

    // No errors found during validation process
    Ok(())
}

fn validate_user(user: &str, email_end: usize) -> Result<(), ErrorSpan> {
    // Ensure validity of all characters in user
    for (index, character) in user.chars().enumerate() {
        if !character.is_alphanumeric() && !VALID_USER_CHARACTERS.contains(character) {
            // user"@example.com
            //     ^
            return Err(ErrorSpan {
                error_type: Error::InvalidUserCharacter,
                span: index..index,
            });
        }
    }

    // validate the length of each part of the email, BEFORE doing the regex
    // according to RFC5321 the max length of the subdomain is 64 characters
    // and the max length of the domain part is 255 characters
    // https://datatracker.ietf.org/doc/html/rfc5321#section-4.5.3.1.1
    if user.is_empty() {
        // @example.com
        // ^^^^^^^^^^^^
        return Err(ErrorSpan {
            error_type: Error::EmptyUser,
            span: 0..email_end,
        });
    }

    // User has a maximum length of 64
    if user.len() > 64 {
        // pretend_this_is_too_long@example.com
        // ^^^^^^^^^^^^^^^^^^^^^^^^
        return Err(ErrorSpan {
            error_type: Error::UserTooLong,
            span: 0..user.len(),
        });
    }

    // Final check, match user against regex from the spec
    if !EMAIL_USER_RE.is_match(user) {
        // exotic_case@example.com
        // ^^^^^^^^^^^
        return Err(ErrorSpan {
            error_type: Error::UserRegexFailed,
            span: 0..user.len(),
        });
    }

    Ok(())
}

/// Validate the email domain.
///
/// A domain can either be an
fn validate_domain(domain: &str, user_end: usize, email_end: usize) -> Result<(), ErrorSpan> {
    // Email domains can be an IP address surrounded by square brackets (`[]`)
    let is_literal_address = match (domain.chars().next(), domain.chars().last()) {
        (Some('['), Some(']')) => true,
        _ => false,
    };

    // Ensure validity of all characters in domain
    if is_literal_address {
        let potential_ip = domain.get(1..domain.len() - 1).unwrap();
        let ip_range_start = user_end + 2;
        if IpAddr::from_str(potential_ip).is_err() {
            // user@[127.0.0.256]
            //        ^^^^^^^^^^^
            return Err(ErrorSpan {
                error_type: Error::InvalidIP,
                span: ip_range_start..ip_range_start + potential_ip.len() - 1,
            });
        }
    } else {
        for (index, character) in domain.chars().enumerate() {
            if !character.is_alphanumeric() && !VALID_DOMAIN_CHARACTERS.contains(character) {
                let location = index + user_end + 1;

                // user@*example.com
                //      ^
                return Err(ErrorSpan {
                    error_type: Error::InvalidDomainCharacter,
                    span: location..location,
                });
            }
        }
    }

    // Domain cannot be empty
    if domain.is_empty() {
        // user@
        // ^^^^^
        return Err(ErrorSpan {
            error_type: Error::EmptyDomain,
            span: 0..email_end,
        });
    }

    // Domain has a maximum length of 255
    if domain.len() > 255 {
        // user@pretend_this_is_too_long.com
        //      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        return Err(ErrorSpan {
            error_type: Error::DomainTooLong,
            span: (user_end + 1)..email_end,
        });
    }

    // Domain cannot begin with a leading period (`.`)
    if domain.starts_with('.') {
        let column = user_end + 1;

        // user@.example.com
        //      ^
        return Err(ErrorSpan {
            error_type: Error::LeadingPeriod,
            span: column..column,
        });
    }

    // Domain cannot end with a trailing period (`.`)
    if domain.ends_with('.') {
        let column = email_end - 1;

        // user@example.com.
        //                 ^
        return Err(ErrorSpan {
            error_type: Error::TrailingPeriod,
            span: column..column,
        });
    }

    let mut span_start = user_end + 1;
    for subdomain in domain.split('.') {
        // Subdomain has a maximum length of 63
        if subdomain.len() > 63 {
            // user@valid.valid.pretend_this_is_too_long.com
            //                  ^^^^^^^^^^^^^^^^^^^^^^^^
            return Err(ErrorSpan {
                error_type: Error::SubdomainTooLong,
                span: span_start..span_start + subdomain.len(),
            });
        }

        // Subdomain cannot be empty
        if subdomain.is_empty() {
            // user@example..com
            //             ^^
            return Err(ErrorSpan {
                error_type: Error::EmptySubdomain,
                span: span_start - 1..span_start,
            });
        }

        // Subdomain cannot have a leading hyphen (`-`)
        if subdomain.starts_with('-') {
            // user@-example.com
            //      ^
            return Err(ErrorSpan {
                error_type: Error::LeadingHyphen,
                span: span_start..span_start,
            });
        }

        // Subdomain cannot have a trailing hypen (`-`)
        if subdomain.ends_with('-') {
            // user@example.com-
            //                 ^
            return Err(ErrorSpan {
                error_type: Error::TrailingHyphen,
                span: span_start + subdomain.len()..span_start + subdomain.len(),
            });
        }

        // Account for extra '.' character between subdomains
        span_start += subdomain.len() + 1;
    }

    // Final check, match domain against regex from the spec
    if !validate_domain_part(domain) {
        // user@exotic_case.com
        //      ^^^^^^^^^^^^^^^
        return Err(ErrorSpan {
            error_type: Error::DomainRegexFailed,
            span: user_end + 1..email_end,
        });
    }

    Ok(())
}

/// Checks if the domain is a valid domain and if not, check whether it's an IP
#[must_use]
fn validate_domain_part(domain: &str) -> bool {
    if EMAIL_DOMAIN_RE.is_match(domain) {
        return true;
    }

    // maybe we have an ip as a domain?
    match EMAIL_LITERAL_RE.captures(domain) {
        Some(caps) => match caps.get(1) {
            Some(c) => return IpAddr::from_str(c.as_str()).is_ok(),
            None => false,
        },
        None => false,
    }
}

#[cfg(test)]
mod tests {
    use super::{validate_email, Error, ErrorSpan};

    #[test]
    fn test_validate_email() {
        // Test cases taken from Django
        // https://github.com/django/django/blob/master/tests/validators/tests.py#L48
        let tests =
            vec![
            ("email@here.com", Ok(())),
            ("weirder-email@here.and.there.com", Ok(())),
            (r#"!def!xyz%abc@example.com"#, Ok(())),
            ("email@[127.0.0.1]", Ok(())),
            ("email@[2001:dB8::1]", Ok(())),
            ("email@[2001:dB8:0:0:0:0:0:1]", Ok(())),
            ("email@[::fffF:127.0.0.1]", Ok(())),
            ("example@valid-----hyphens.com", Ok(())),
            ("example@valid-with-hyphens.com", Ok(())),
            ("test@domain.with.idn.tld.उदाहरण.परीक्षा", Ok(())),
            (
                r#""test@test"@example.com"#,
                Err(ErrorSpan {
                    error_type: Error::InvalidUserCharacter,
                    span: 0..0,
                }),
            ),
            // max length for domain name labels is 63 characters per RFC 1034
            ("a@atm.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", Ok(())),
            ("a@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.atm", Ok(())),
            (
                "a@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bbbbbbbbbb.atm",
                Ok(()),
            ),
            // 64 * a
            (
                "a@atm.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
                Err(ErrorSpan { error_type: Error::SubdomainTooLong, span: 6..70 }),
            ),
            ("", Err(ErrorSpan { error_type: Error::EmptyEmail, span: 0..0 })),
            (
                "abc",
                Err(ErrorSpan {
                    error_type: Error::MissingAtCharacter,
                    span: 0..3,
                }),
            ),
            ("abc@", Err(ErrorSpan { error_type: Error::EmptyDomain, span: 0..4 })),
            ("@abc", Err(ErrorSpan { error_type: Error::EmptyUser, span: 0..4 })),
            ("abc@bar", Ok(())),
            (
                "a @x.cz",
                Err(ErrorSpan {
                    error_type: Error::InvalidUserCharacter,
                    span: 1..1,
                }),
            ),
            (
                "abc@.com",
                Err(ErrorSpan { error_type: Error::LeadingPeriod, span: 4..4 }),
            ),
            (
                "something@@somewhere.com",
                Err(ErrorSpan {
                    error_type: Error::InvalidUserCharacter,
                    span: 9..9,
                }),
            ),
            // ("email@127.0.0.1", Ok(())),
            (
                "email@[127.0.0.256]",
                Err(ErrorSpan { error_type: Error::InvalidIP, span: 7..17 }),
            ),
            (
                "email@[2001:db8::12345]",
                Err(ErrorSpan { error_type: Error::InvalidIP, span: 7..21 }),
            ),
            (
                "email@[2001:db8:0:0:0:0:1]",
                Err(ErrorSpan { error_type: Error::InvalidIP, span: 7..24 }),
            ),
            (
                "email@[::ffff:127.0.0.256]",
                Err(ErrorSpan { error_type: Error::InvalidIP, span: 7..24 }),
            ),
            (
                "example@invalid-.com",
                Err(ErrorSpan { error_type: Error::TrailingHyphen, span: 16..16 }),
            ),
            (
                "example@-invalid.com",
                Err(ErrorSpan { error_type: Error::LeadingHyphen, span: 8..8 }),
            ),
            (
                "example@invalid.com-",
                Err(ErrorSpan { error_type: Error::TrailingHyphen, span: 20..20 }),
            ),
            (
                "example@inv-.alid-.com",
                Err(ErrorSpan { error_type: Error::TrailingHyphen, span: 12..12 }),
            ),
            (
                "example@inv-.-alid.com",
                Err(ErrorSpan { error_type: Error::TrailingHyphen, span: 12..12 }),
            ),
            (
                r#"test@example.com\n\n<script src="x.js">"#,
                Err(ErrorSpan {
                    error_type: Error::InvalidDomainCharacter,
                    span: 16..16,
                }),
            ),
            (
                r#""\\\011"@here.com"#,
                Err(ErrorSpan {
                    error_type: Error::InvalidUserCharacter,
                    span: 0..0,
                }),
            ),
            (
                r#""\\\012"@here.com"#,
                Err(ErrorSpan {
                    error_type: Error::InvalidUserCharacter,
                    span: 0..0,
                }),
            ),
            (
                "trailingdot@shouldfail.com.",
                Err(ErrorSpan { error_type: Error::TrailingPeriod, span: 26..26 }),
            ),
            // Trailing newlines in username or domain not allowed
            (
                "a@b.com\n",
                Err(ErrorSpan {
                    error_type: Error::InvalidDomainCharacter,
                    span: 7..7,
                }),
            ),
            (
                "a\n@b.com",
                Err(ErrorSpan {
                    error_type: Error::InvalidUserCharacter,
                    span: 1..1,
                }),
            ),
            (
                r#""test@test"\n@example.com"#,
                Err(ErrorSpan {
                    error_type: Error::InvalidUserCharacter,
                    span: 0..0,
                }),
            ),
            (
                "a@[127.0.0.1]\n",
                Err(ErrorSpan {
                    error_type: Error::InvalidDomainCharacter,
                    span: 2..2,
                }),
            ),
            // underscores are not allowed
            (
                "John.Doe@exam_ple.com",
                Err(ErrorSpan {
                    error_type: Error::InvalidDomainCharacter,
                    span: 13..13,
                }),
            ),
            (
                "invalid@example..com",
                Err(ErrorSpan { error_type: Error::EmptySubdomain, span: 15..16 }),
            ),
            (
                "invalid@.example.com",
                Err(ErrorSpan { error_type: Error::LeadingPeriod, span: 8..8 }),
            ),
        ];

        for (input, expected) in tests {
            // println!("{} - {}", input, expected);
            assert_eq!(
                validate_email(input),
                expected,
                "Email `{input}` was not classified correctly",
            );
        }
    }
}