use icu_locid::extensions::unicode::{key, value, Attributes, Keywords, Unicode};
use icu_locid::extensions::Extensions;
use icu_locid::subtags::{self, Language, Variants};
use icu_locid::{LanguageIdentifier, Locale};
use crate::fetch::unix::LocaleCategory;
const POSIX_KEYWORD: Keywords = Keywords::new_single(key!("va"), value!("posix"));
#[derive(Debug, Clone, Copy)]
enum OptionalSubtagType {
Territory,
Codeset,
Modifier,
}
impl OptionalSubtagType {
const fn try_from_char(source: char) -> Option<Self> {
match source {
'_' => Some(Self::Territory),
'.' => Some(Self::Codeset),
'@' => Some(Self::Modifier),
_ => None,
}
}
}
#[derive(Debug, Clone, Copy)]
struct SubtagIndex {
separator: OptionalSubtagType,
separator_index: usize,
}
impl SubtagIndex {
const fn try_from_char(captures: (usize, char)) -> Option<Self> {
let (index, source) = captures;
if let Some(separator) = OptionalSubtagType::try_from_char(source) {
Some(Self {
separator,
separator_index: index,
})
} else {
None
}
}
fn from_str_with_offset(source: &str, index_offset: usize) -> Option<Self> {
source
.chars()
.enumerate()
.skip(index_offset)
.find_map(Self::try_from_char)
}
}
#[derive(Debug, Clone, Copy)]
struct OptionalSubtag {
start_index: usize,
end_index: usize,
subtag_type: OptionalSubtagType,
}
#[derive(Debug, Clone, Copy)]
struct OptionalSubtagsIterator<'locale> {
source: &'locale str,
current_subtag: Option<SubtagIndex>,
next_subtag: Option<SubtagIndex>,
}
impl<'locale> OptionalSubtagsIterator<'locale> {
fn new(source: &'locale str) -> Self {
let current_subtag = if let Some(first_character) = source.chars().next() {
let subtag = SubtagIndex::try_from_char((0, first_character)).expect(&format!(
"The first character in `{source}` ('{first_character}') is not a valid separator."
));
Some(subtag)
} else {
None
};
Self {
source,
current_subtag,
next_subtag: SubtagIndex::from_str_with_offset(&source, 1),
}
}
}
impl<'locale> Iterator for OptionalSubtagsIterator<'locale> {
type Item = OptionalSubtag;
fn next(&mut self) -> Option<Self::Item> {
let current_subtag = self.current_subtag.take()?;
let next_subtag = self.next_subtag.take();
let next_separator = next_subtag
.as_ref()
.map(|next_index| next_index.separator_index)
.unwrap_or(self.source.len());
self.current_subtag = next_subtag;
self.next_subtag = if next_separator < self.source.len() {
SubtagIndex::from_str_with_offset(&self.source, next_separator + 1)
} else {
None
};
Some(OptionalSubtag {
start_index: current_subtag.separator_index + 1,
end_index: next_separator,
subtag_type: current_subtag.separator,
})
}
}
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
pub struct PosixLocale<'locale> {
pub language: &'locale str,
pub territory: Option<&'locale str>,
pub codeset: Option<&'locale str>,
pub modifier: Option<&'locale str>,
}
impl<'locale> PosixLocale<'locale> {
pub fn from_str(source: &'locale str) -> Self {
let additional_subtags_start = source
.chars()
.position(|character| OptionalSubtagType::try_from_char(character).is_some());
let language_end_bound = additional_subtags_start.unwrap_or(source.len());
let mut locale = PosixLocale {
language: &source[..language_end_bound],
..Default::default()
};
assert!(!locale.language.is_empty());
let mut subtags_iter = OptionalSubtagsIterator::new(&source[language_end_bound..]);
while let Some(subtag) = subtags_iter.next() {
let OptionalSubtag {
start_index,
end_index,
..
} = subtag;
let start_index = start_index + language_end_bound;
let end_index = end_index + language_end_bound;
assert!(start_index <= source.len());
assert!(end_index <= source.len());
let subtag_slice = &source[start_index..end_index];
match subtag.subtag_type {
OptionalSubtagType::Territory => locale.territory = Some(subtag_slice),
OptionalSubtagType::Codeset => locale.codeset = Some(subtag_slice),
OptionalSubtagType::Modifier => locale.modifier = Some(subtag_slice),
};
}
locale
}
pub fn icu_locale(&self) -> Result<Locale, icu_locid::ParserError> {
let language = subtags::Language::try_from_bytes(self.language.as_bytes())?;
let region = if let Some(territory) = self.territory {
Some(subtags::Region::try_from_bytes(territory.as_bytes())?)
} else {
None
};
let language_id = LanguageIdentifier {
language,
script: None,
region,
variants: Variants::new(),
};
let unicode_extensions = Unicode {
keywords: POSIX_KEYWORD,
attributes: Attributes::new(),
};
let extensions = Extensions::from_unicode(unicode_extensions);
Ok(Locale {
id: language_id,
extensions,
})
}
}
pub fn get_locales(category: LocaleCategory) -> Vec<LanguageIdentifier> {
category
.get_locales()
.iter()
.map(|locale_str| PosixLocale::from_str(locale_str))
.map(|posix_locale| posix_locale.icu_locale())
.filter_map(|potential_locale| potential_locale.ok())
.map(|locale| locale.id)
.collect()
}