Includes a basic implementation of splitting POSIX locales into their various subtags, with the goal being to eventually transform these subtags into icu_locid::Locale structs.
YNEOCYMGMSHQGCL5TOIGWDDKHE4BZ5M7FGY5I6B2V6JO6ZRCLETAC use locale_select::unix::PosixLocale;#[test]fn simple_en_us() {let locale = "en_US.utf8";let parsed_locale = PosixLocale::from_str(locale);assert_eq!(parsed_locale,PosixLocale {language: "en",territory: Some("US"),codeset: Some("utf8"),modifier: None,});}
#[derive(Debug, Clone, Copy)]enum OptionalSubtagType {Territory,Codeset,Modifier,}impl OptionalSubtagType {const fn try_from_char(source: char) -> Option<Self> {match source {'_' => Some(Self::Territory),'.' => Some(Self::Codeset),'@' => Some(Self::Modifier),_ => None,}}}#[derive(Debug, Clone, Copy)]struct SubtagIndex {separator: OptionalSubtagType,separator_index: usize,}impl SubtagIndex {const fn try_from_char(captures: (usize, char)) -> Option<Self> {// Closure captures a tuple of length 2, destructure for readabilitylet (index, source) = captures;if let Some(separator) = OptionalSubtagType::try_from_char(source) {Some(Self {separator,separator_index: index,})} else {None}}fn from_str_with_offset(source: &str, index_offset: usize) -> Option<Self> {source.chars().enumerate().skip(index_offset).find_map(Self::try_from_char)}}#[derive(Debug, Clone, Copy)]struct OptionalSubtag {start_index: usize,end_index: usize,subtag_type: OptionalSubtagType,}#[derive(Debug, Clone, Copy)]struct OptionalSubtagsIterator<'locale> {source: &'locale str,current_subtag: Option<SubtagIndex>,next_subtag: Option<SubtagIndex>,}impl<'locale> OptionalSubtagsIterator<'locale> {fn new(source: &'locale str) -> Self {let current_subtag = if let Some(first_character) = source.chars().next() {let subtag = SubtagIndex::try_from_char((0, first_character)).expect(&format!("The first character in `{source}` ('{first_character}') is not a valid separator."));Some(subtag)} else {// The source locale is empty, return an empty iteratorNone};Self {source,current_subtag,next_subtag: SubtagIndex::from_str_with_offset(&source, 1),}}fn next(&mut self) -> Option<OptionalSubtag> {// If the current subtag is empty, all work is donelet current_subtag = self.current_subtag.take()?;let next_subtag = self.next_subtag.take();// Get the index of the next separator// If this is the last subtag then this is the length of the sourcelet next_separator = next_subtag.as_ref().map(|next_index| next_index.separator_index).unwrap_or(self.source.len());// Modify internal state for the next iterationself.current_subtag = next_subtag;self.next_subtag = if next_separator < self.source.len() {SubtagIndex::from_str_with_offset(&self.source, next_separator + 1)} else {None};Some(OptionalSubtag {start_index: current_subtag.separator_index + 1,end_index: next_separator,subtag_type: current_subtag.separator,})}}#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]pub struct PosixLocale<'locale> {pub language: &'locale str,pub territory: Option<&'locale str>,pub codeset: Option<&'locale str>,pub modifier: Option<&'locale str>,}impl<'locale> PosixLocale<'locale> {pub fn from_str(source: &'locale str) -> Self {let additional_subtags_start = source.chars().position(|character| OptionalSubtagType::try_from_char(character).is_some());let language_end_bound = additional_subtags_start.unwrap_or(source.len());let mut locale = PosixLocale {language: &source[..language_end_bound],..Default::default()};assert!(!locale.language.is_empty());let mut subtags_iter = OptionalSubtagsIterator::new(&source[language_end_bound..]);while let Some(subtag) = subtags_iter.next() {let OptionalSubtag {start_index,end_index,..} = subtag;// Offset based on language boundarylet start_index = start_index + language_end_bound;let end_index = end_index + language_end_bound;assert!(start_index <= source.len());assert!(end_index <= source.len());let subtag_slice = &source[start_index..end_index];match subtag.subtag_type {OptionalSubtagType::Territory => locale.territory = Some(subtag_slice),OptionalSubtagType::Codeset => locale.codeset = Some(subtag_slice),OptionalSubtagType::Modifier => locale.modifier = Some(subtag_slice),};}locale}}
pub mod unix;
[package]name = "locale_select"version = "0.1.0"edition = "2021"[dependencies]icu_locid = "1.4.0"[lints]workspace = true