Includes a basic implementation of splitting POSIX locales into their various subtags, with the goal being to eventually transform these subtags into icu_locid::Locale
structs.
YNEOCYMGMSHQGCL5TOIGWDDKHE4BZ5M7FGY5I6B2V6JO6ZRCLETAC
use locale_select::unix::PosixLocale;
#[test]
fn simple_en_us() {
let locale = "en_US.utf8";
let parsed_locale = PosixLocale::from_str(locale);
assert_eq!(
parsed_locale,
PosixLocale {
language: "en",
territory: Some("US"),
codeset: Some("utf8"),
modifier: None,
}
);
}
#[derive(Debug, Clone, Copy)]
enum OptionalSubtagType {
Territory,
Codeset,
Modifier,
}
impl OptionalSubtagType {
const fn try_from_char(source: char) -> Option<Self> {
match source {
'_' => Some(Self::Territory),
'.' => Some(Self::Codeset),
'@' => Some(Self::Modifier),
_ => None,
}
}
}
#[derive(Debug, Clone, Copy)]
struct SubtagIndex {
separator: OptionalSubtagType,
separator_index: usize,
}
impl SubtagIndex {
const fn try_from_char(captures: (usize, char)) -> Option<Self> {
// Closure captures a tuple of length 2, destructure for readability
let (index, source) = captures;
if let Some(separator) = OptionalSubtagType::try_from_char(source) {
Some(Self {
separator,
separator_index: index,
})
} else {
None
}
}
fn from_str_with_offset(source: &str, index_offset: usize) -> Option<Self> {
source
.chars()
.enumerate()
.skip(index_offset)
.find_map(Self::try_from_char)
}
}
#[derive(Debug, Clone, Copy)]
struct OptionalSubtag {
start_index: usize,
end_index: usize,
subtag_type: OptionalSubtagType,
}
#[derive(Debug, Clone, Copy)]
struct OptionalSubtagsIterator<'locale> {
source: &'locale str,
current_subtag: Option<SubtagIndex>,
next_subtag: Option<SubtagIndex>,
}
impl<'locale> OptionalSubtagsIterator<'locale> {
fn new(source: &'locale str) -> Self {
let current_subtag = if let Some(first_character) = source.chars().next() {
let subtag = SubtagIndex::try_from_char((0, first_character)).expect(&format!(
"The first character in `{source}` ('{first_character}') is not a valid separator."
));
Some(subtag)
} else {
// The source locale is empty, return an empty iterator
None
};
Self {
source,
current_subtag,
next_subtag: SubtagIndex::from_str_with_offset(&source, 1),
}
}
fn next(&mut self) -> Option<OptionalSubtag> {
// If the current subtag is empty, all work is done
let current_subtag = self.current_subtag.take()?;
let next_subtag = self.next_subtag.take();
// Get the index of the next separator
// If this is the last subtag then this is the length of the source
let next_separator = next_subtag
.as_ref()
.map(|next_index| next_index.separator_index)
.unwrap_or(self.source.len());
// Modify internal state for the next iteration
self.current_subtag = next_subtag;
self.next_subtag = if next_separator < self.source.len() {
SubtagIndex::from_str_with_offset(&self.source, next_separator + 1)
} else {
None
};
Some(OptionalSubtag {
start_index: current_subtag.separator_index + 1,
end_index: next_separator,
subtag_type: current_subtag.separator,
})
}
}
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
pub struct PosixLocale<'locale> {
pub language: &'locale str,
pub territory: Option<&'locale str>,
pub codeset: Option<&'locale str>,
pub modifier: Option<&'locale str>,
}
impl<'locale> PosixLocale<'locale> {
pub fn from_str(source: &'locale str) -> Self {
let additional_subtags_start = source
.chars()
.position(|character| OptionalSubtagType::try_from_char(character).is_some());
let language_end_bound = additional_subtags_start.unwrap_or(source.len());
let mut locale = PosixLocale {
language: &source[..language_end_bound],
..Default::default()
};
assert!(!locale.language.is_empty());
let mut subtags_iter = OptionalSubtagsIterator::new(&source[language_end_bound..]);
while let Some(subtag) = subtags_iter.next() {
let OptionalSubtag {
start_index,
end_index,
..
} = subtag;
// Offset based on language boundary
let start_index = start_index + language_end_bound;
let end_index = end_index + language_end_bound;
assert!(start_index <= source.len());
assert!(end_index <= source.len());
let subtag_slice = &source[start_index..end_index];
match subtag.subtag_type {
OptionalSubtagType::Territory => locale.territory = Some(subtag_slice),
OptionalSubtagType::Codeset => locale.codeset = Some(subtag_slice),
OptionalSubtagType::Modifier => locale.modifier = Some(subtag_slice),
};
}
locale
}
}
pub mod unix;
[package]
name = "locale_select"
version = "0.1.0"
edition = "2021"
[dependencies]
icu_locid = "1.4.0"
[lints]
workspace = true