This is pretty much just the changes from https://github.com/Keats/validator/pull/272, just in its own crate.
23NVAJMBPRFDINHF3UOWCZOF3GCTCRPHYDHXKPCIPEWQI5ZAORUQC
pub mod email;
// Heavily modified version of https://github.com/Keats/validator/blob/master/validator/src/validation/email.rs
extern crate alloc;
use alloc::borrow::Cow;
use idna::domain_to_ascii;
use lazy_static::lazy_static;
use regex::Regex;
use std::{net::IpAddr, str::FromStr};
// Valid characters, excluding ASCII a-z, A-Z and 0-9
const VALID_USER_CHARACTERS: &str = r#".!#$%&'*+/=?^_`{|}~-"#;
const VALID_DOMAIN_CHARACTERS: &str = r#".-"#;
lazy_static! {
// Regex from the specs
// https://html.spec.whatwg.org/multipage/forms.html#valid-e-mail-address
// It will mark esoteric email addresses like quoted string as invalid
static ref EMAIL_USER_RE: Regex = Regex::new(r"^(?i)[a-z0-9.!#$%&'*+/=?^_`{|}~-]+\z").unwrap();
static ref EMAIL_DOMAIN_RE: Regex = Regex::new(
r"(?i)^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?(?:\.[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)*$"
).unwrap();
// literal form, ipv4 or ipv6 address (SMTP 4.1.3)
static ref EMAIL_LITERAL_RE: Regex = Regex::new(r"(?i)\[([A-f0-9:\.]+)\]\z").unwrap();
}
/// Represents the error states of an email address
///
/// This enum is always constructed along with an accompanying span that indicates where the
/// error occurred. For more details, see the [`ErrorSpan`] struct.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum Error {
/// The length of the domain is greater than the maximum of 255 characters
///
/// Error spans: domain
DomainTooLong,
/// An unknown error in the domain that was caught by the [regex from the whatwg spec](https://html.spec.whatwg.org/multipage/forms.html#valid-e-mail-address)
///
/// Error spans: domain
DomainRegexFailed,
/// The provided domain does not contain any characters
///
/// Errors spans: email
EmptyDomain,
/// The provided email does not contain any charaacters
///
/// Error spans: email
EmptyEmail,
/// An element of the domain does not contain any characters
///
/// Error spans: domain
EmptySubdomain,
/// The provided user does not contain any characters
///
/// Error spans: email
EmptyUser,
/// The provided domain contains invalid characters
///
/// Error spans: invalid character
InvalidDomainCharacter,
/// The provided user contains invalid characters
///
/// Error spans: invalid character
InvalidUserCharacter,
/// The provided IP is invalid
///
/// Error spans: IP address
InvalidIP,
/// An element of the domain begins with a hyphen (`-`)
///
/// Error spans: leading hyphen character
LeadingHyphen,
/// An element of the domain begins with a period (`.`)
///
/// Error spans: leading period character
LeadingPeriod,
/// The provided email does not contain the required `@` character
///
/// Error spans: email
MissingAtCharacter,
/// The length of an element in the domain is greater than the maximum of 63 characters
///
/// Error spans: invalid subdomain
SubdomainTooLong,
/// An element of the domain ends with a hyphen (`-`)
///
/// Error spans: trailing hyphen character
TrailingHyphen,
/// An element of the domain ends with a period (`.`)
///
/// Error spans: trailing period character
TrailingPeriod,
/// The length of the user was greater than the maximum of 64 characters
///
/// Error spans: invalid user
UserTooLong,
/// An unknown error in the user that was caught by the [regex from the whatwg spec](https://html.spec.whatwg.org/multipage/forms.html#valid-e-mail-address)
///
/// Error spans: user
UserRegexFailed,
/// The provided domain was unable to be converted into an [IDN](https://en.wikipedia.org/wiki/Internationalized_domain_name)
///
/// Error spans: domain
Uts46,
}
/// Upon encountering an error, [`validate_email`] will return this struct containing the first error reached.
/// The field `error_type` indicates which rule has not been met, and the `span` field provides a simple character range
/// indicating which portion of the email is at fault. This may be a single character, or an entire string. Each enum variant
/// documents the range it spans.
///
/// Example of a single-character span:
/// ```
/// # use span_validator::email::{Error, ErrorSpan, validate_email};
/// // invalid"@example.com
/// // ^
///
/// assert_eq!(
/// validate_email(r#"invalid"@example.com"#),
/// Err(ErrorSpan { error_type: Error::InvalidUserCharacter, span: 7..7 })
/// );
/// ```
/// Example of a multi-character span:
/// ```
/// // invalid@example..com
/// // ^^^^^^^^^^^^
/// # use span_validator::email::{Error, ErrorSpan, validate_email};
/// assert_eq!(
/// validate_email("invalid@example..com"),
/// Err(ErrorSpan { error_type: Error::EmptySubdomain, span: 15..16 })
/// );
/// ```
///
/// Example of an address-long span:
/// ```
/// # use span_validator::email::{Error, ErrorSpan, validate_email};
/// // no-user.com
/// // ^^^^^^^^^^^
/// assert_eq!(
/// validate_email("no-user.com"),
/// Err(ErrorSpan { error_type: Error::MissingAtCharacter, span: 0..11 })
/// );
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ErrorSpan {
pub error_type: Error,
pub span: core::ops::Range<usize>,
}
pub fn validate_email<'email, T>(value: T) -> Result<(), ErrorSpan>
where
T: Into<Cow<'email, str>>,
{
let email = value.into();
// Email must not be empty
if email.is_empty() {
return Err(ErrorSpan {
error_type: Error::EmptyEmail,
span: 0..0,
});
}
// Email must contain at least one '@' character
if !email.contains('@') {
// example.com
// ^^^^^^^^^^^
return Err(ErrorSpan {
error_type: Error::MissingAtCharacter,
span: 0..email.len(),
});
}
// User: everything up until the last `@` character
let parts: Vec<&str> = email.rsplitn(2, '@').collect();
let user = parts[1];
// Domain: everything after the last `@` character
// Convert the domain to an [IDN](https://en.wikipedia.org/wiki/Internationalized_domain_name)
let domain = match domain_to_ascii(parts[0]) {
Ok(domain) => domain,
Err(_domain_errors) => {
return Err(ErrorSpan {
error_type: Error::Uts46,
span: 0..email.len(),
})
}
};
// Validate the email domain
validate_domain(&domain, user.len(), email.len())?;
// Validate the email user
validate_user(parts[1], email.len())?;
// No errors found during validation process
Ok(())
}
fn validate_user(user: &str, email_end: usize) -> Result<(), ErrorSpan> {
// Ensure validity of all characters in user
for (index, character) in user.chars().enumerate() {
if !character.is_alphanumeric() && !VALID_USER_CHARACTERS.contains(character) {
// user"@example.com
// ^
return Err(ErrorSpan {
error_type: Error::InvalidUserCharacter,
span: index..index,
});
}
}
// validate the length of each part of the email, BEFORE doing the regex
// according to RFC5321 the max length of the subdomain is 64 characters
// and the max length of the domain part is 255 characters
// https://datatracker.ietf.org/doc/html/rfc5321#section-4.5.3.1.1
if user.is_empty() {
// @example.com
// ^^^^^^^^^^^^
return Err(ErrorSpan {
error_type: Error::EmptyUser,
span: 0..email_end,
});
}
// User has a maximum length of 64
if user.len() > 64 {
// pretend_this_is_too_long@example.com
// ^^^^^^^^^^^^^^^^^^^^^^^^
return Err(ErrorSpan {
error_type: Error::UserTooLong,
span: 0..user.len(),
});
}
// Final check, match user against regex from the spec
if !EMAIL_USER_RE.is_match(user) {
// exotic_case@example.com
// ^^^^^^^^^^^
return Err(ErrorSpan {
error_type: Error::UserRegexFailed,
span: 0..user.len(),
});
}
Ok(())
}
/// Validate the email domain.
///
/// A domain can either be an
fn validate_domain(domain: &str, user_end: usize, email_end: usize) -> Result<(), ErrorSpan> {
// Email domains can be an IP address surrounded by square brackets (`[]`)
let is_literal_address = match (domain.chars().next(), domain.chars().last()) {
(Some('['), Some(']')) => true,
_ => false,
};
// Ensure validity of all characters in domain
if is_literal_address {
let potential_ip = domain.get(1..domain.len() - 1).unwrap();
let ip_range_start = user_end + 2;
if IpAddr::from_str(potential_ip).is_err() {
// user@[127.0.0.256]
// ^^^^^^^^^^^
return Err(ErrorSpan {
error_type: Error::InvalidIP,
span: ip_range_start..ip_range_start + potential_ip.len() - 1,
});
}
} else {
for (index, character) in domain.chars().enumerate() {
if !character.is_alphanumeric() && !VALID_DOMAIN_CHARACTERS.contains(character) {
let location = index + user_end + 1;
// user@*example.com
// ^
return Err(ErrorSpan {
error_type: Error::InvalidDomainCharacter,
span: location..location,
});
}
}
}
// Domain cannot be empty
if domain.is_empty() {
// user@
// ^^^^^
return Err(ErrorSpan {
error_type: Error::EmptyDomain,
span: 0..email_end,
});
}
// Domain has a maximum length of 255
if domain.len() > 255 {
// user@pretend_this_is_too_long.com
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
return Err(ErrorSpan {
error_type: Error::DomainTooLong,
span: (user_end + 1)..email_end,
});
}
// Domain cannot begin with a leading period (`.`)
if domain.starts_with('.') {
let column = user_end + 1;
// user@.example.com
// ^
return Err(ErrorSpan {
error_type: Error::LeadingPeriod,
span: column..column,
});
}
// Domain cannot end with a trailing period (`.`)
if domain.ends_with('.') {
let column = email_end - 1;
// user@example.com.
// ^
return Err(ErrorSpan {
error_type: Error::TrailingPeriod,
span: column..column,
});
}
let mut span_start = user_end + 1;
for subdomain in domain.split('.') {
// Subdomain has a maximum length of 63
if subdomain.len() > 63 {
// user@valid.valid.pretend_this_is_too_long.com
// ^^^^^^^^^^^^^^^^^^^^^^^^
return Err(ErrorSpan {
error_type: Error::SubdomainTooLong,
span: span_start..span_start + subdomain.len(),
});
}
// Subdomain cannot be empty
if subdomain.is_empty() {
// user@example..com
// ^^
return Err(ErrorSpan {
error_type: Error::EmptySubdomain,
span: span_start - 1..span_start,
});
}
// Subdomain cannot have a leading hyphen (`-`)
if subdomain.starts_with('-') {
// user@-example.com
// ^
return Err(ErrorSpan {
error_type: Error::LeadingHyphen,
span: span_start..span_start,
});
}
// Subdomain cannot have a trailing hypen (`-`)
if subdomain.ends_with('-') {
// user@example.com-
// ^
return Err(ErrorSpan {
error_type: Error::TrailingHyphen,
span: span_start + subdomain.len()..span_start + subdomain.len(),
});
}
// Account for extra '.' character between subdomains
span_start += subdomain.len() + 1;
}
// Final check, match domain against regex from the spec
if !validate_domain_part(domain) {
// user@exotic_case.com
// ^^^^^^^^^^^^^^^
return Err(ErrorSpan {
error_type: Error::DomainRegexFailed,
span: user_end + 1..email_end,
});
}
Ok(())
}
/// Checks if the domain is a valid domain and if not, check whether it's an IP
#[must_use]
fn validate_domain_part(domain: &str) -> bool {
if EMAIL_DOMAIN_RE.is_match(domain) {
return true;
}
// maybe we have an ip as a domain?
match EMAIL_LITERAL_RE.captures(domain) {
Some(caps) => match caps.get(1) {
Some(c) => return IpAddr::from_str(c.as_str()).is_ok(),
None => false,
},
None => false,
}
}
#[cfg(test)]
mod tests {
use super::{validate_email, Error, ErrorSpan};
#[test]
fn test_validate_email() {
// Test cases taken from Django
// https://github.com/django/django/blob/master/tests/validators/tests.py#L48
let tests =
vec![
("email@here.com", Ok(())),
("weirder-email@here.and.there.com", Ok(())),
(r#"!def!xyz%abc@example.com"#, Ok(())),
("email@[127.0.0.1]", Ok(())),
("email@[2001:dB8::1]", Ok(())),
("email@[2001:dB8:0:0:0:0:0:1]", Ok(())),
("email@[::fffF:127.0.0.1]", Ok(())),
("example@valid-----hyphens.com", Ok(())),
("example@valid-with-hyphens.com", Ok(())),
("test@domain.with.idn.tld.उदाहरण.परीक्षा", Ok(())),
(
r#""test@test"@example.com"#,
Err(ErrorSpan {
error_type: Error::InvalidUserCharacter,
span: 0..0,
}),
),
// max length for domain name labels is 63 characters per RFC 1034
("a@atm.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", Ok(())),
("a@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.atm", Ok(())),
(
"a@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bbbbbbbbbb.atm",
Ok(()),
),
// 64 * a
(
"a@atm.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
Err(ErrorSpan { error_type: Error::SubdomainTooLong, span: 6..70 }),
),
("", Err(ErrorSpan { error_type: Error::EmptyEmail, span: 0..0 })),
(
"abc",
Err(ErrorSpan {
error_type: Error::MissingAtCharacter,
span: 0..3,
}),
),
("abc@", Err(ErrorSpan { error_type: Error::EmptyDomain, span: 0..4 })),
("@abc", Err(ErrorSpan { error_type: Error::EmptyUser, span: 0..4 })),
("abc@bar", Ok(())),
(
"a @x.cz",
Err(ErrorSpan {
error_type: Error::InvalidUserCharacter,
span: 1..1,
}),
),
(
"abc@.com",
Err(ErrorSpan { error_type: Error::LeadingPeriod, span: 4..4 }),
),
(
"something@@somewhere.com",
Err(ErrorSpan {
error_type: Error::InvalidUserCharacter,
span: 9..9,
}),
),
// ("email@127.0.0.1", Ok(())),
(
"email@[127.0.0.256]",
Err(ErrorSpan { error_type: Error::InvalidIP, span: 7..17 }),
),
(
"email@[2001:db8::12345]",
Err(ErrorSpan { error_type: Error::InvalidIP, span: 7..21 }),
),
(
"email@[2001:db8:0:0:0:0:1]",
Err(ErrorSpan { error_type: Error::InvalidIP, span: 7..24 }),
),
(
"email@[::ffff:127.0.0.256]",
Err(ErrorSpan { error_type: Error::InvalidIP, span: 7..24 }),
),
(
"example@invalid-.com",
Err(ErrorSpan { error_type: Error::TrailingHyphen, span: 16..16 }),
),
(
"example@-invalid.com",
Err(ErrorSpan { error_type: Error::LeadingHyphen, span: 8..8 }),
),
(
"example@invalid.com-",
Err(ErrorSpan { error_type: Error::TrailingHyphen, span: 20..20 }),
),
(
"example@inv-.alid-.com",
Err(ErrorSpan { error_type: Error::TrailingHyphen, span: 12..12 }),
),
(
"example@inv-.-alid.com",
Err(ErrorSpan { error_type: Error::TrailingHyphen, span: 12..12 }),
),
(
r#"test@example.com\n\n<script src="x.js">"#,
Err(ErrorSpan {
error_type: Error::InvalidDomainCharacter,
span: 16..16,
}),
),
(
r#""\\\011"@here.com"#,
Err(ErrorSpan {
error_type: Error::InvalidUserCharacter,
span: 0..0,
}),
),
(
r#""\\\012"@here.com"#,
Err(ErrorSpan {
error_type: Error::InvalidUserCharacter,
span: 0..0,
}),
),
(
"trailingdot@shouldfail.com.",
Err(ErrorSpan { error_type: Error::TrailingPeriod, span: 26..26 }),
),
// Trailing newlines in username or domain not allowed
(
"a@b.com\n",
Err(ErrorSpan {
error_type: Error::InvalidDomainCharacter,
span: 7..7,
}),
),
(
"a\n@b.com",
Err(ErrorSpan {
error_type: Error::InvalidUserCharacter,
span: 1..1,
}),
),
(
r#""test@test"\n@example.com"#,
Err(ErrorSpan {
error_type: Error::InvalidUserCharacter,
span: 0..0,
}),
),
(
"a@[127.0.0.1]\n",
Err(ErrorSpan {
error_type: Error::InvalidDomainCharacter,
span: 2..2,
}),
),
// underscores are not allowed
(
"John.Doe@exam_ple.com",
Err(ErrorSpan {
error_type: Error::InvalidDomainCharacter,
span: 13..13,
}),
),
(
"invalid@example..com",
Err(ErrorSpan { error_type: Error::EmptySubdomain, span: 15..16 }),
),
(
"invalid@.example.com",
Err(ErrorSpan { error_type: Error::LeadingPeriod, span: 8..8 }),
),
];
for (input, expected) in tests {
// println!("{} - {}", input, expected);
assert_eq!(
validate_email(input),
expected,
"Email `{input}` was not classified correctly",
);
}
}
}
[package]
name = "span-validator"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
idna = "0.4.0"
lazy_static = "1.4.0"
regex = "1.9.6"
.git
.DS_Store
# Added by cargo
/target
/Cargo.lock