Create `locale-select` crate

finchie
Mar 5, 2024, 4:00 PM
YNEOCYMGMSHQGCL5TOIGWDDKHE4BZ5M7FGY5I6B2V6JO6ZRCLETAC

Dependencies

  • [2] VNSHGQYN Support using glob paths in `localize` macro
  • [3] O77KA6C4 Create `fluent_embed` crate
  • [4] SHNZZSZG Create `cli_macros` shim crate
  • [5] VZYZRAO4 Move `output-macros` crate into workspace
  • [*] KDUI7LHJ
  • [*] UKFEFT6L Create basic `Output` proc-macro

Change contents

  • file addition: locale_select (d--r------)
    [7.1]
  • file addition: tests (d--r------)
    [0.25]
  • file addition: unix.rs (----------)
    [0.44]
    use locale_select::unix::PosixLocale;
    #[test]
    fn simple_en_us() {
    let locale = "en_US.utf8";
    let parsed_locale = PosixLocale::from_str(locale);
    assert_eq!(
    parsed_locale,
    PosixLocale {
    language: "en",
    territory: Some("US"),
    codeset: Some("utf8"),
    modifier: None,
    }
    );
    }
  • file addition: src (d--r------)
    [0.25]
  • file addition: unix.rs (----------)
    [0.454]
    #[derive(Debug, Clone, Copy)]
    enum OptionalSubtagType {
    Territory,
    Codeset,
    Modifier,
    }
    impl OptionalSubtagType {
    const fn try_from_char(source: char) -> Option<Self> {
    match source {
    '_' => Some(Self::Territory),
    '.' => Some(Self::Codeset),
    '@' => Some(Self::Modifier),
    _ => None,
    }
    }
    }
    #[derive(Debug, Clone, Copy)]
    struct SubtagIndex {
    separator: OptionalSubtagType,
    separator_index: usize,
    }
    impl SubtagIndex {
    const fn try_from_char(captures: (usize, char)) -> Option<Self> {
    // Closure captures a tuple of length 2, destructure for readability
    let (index, source) = captures;
    if let Some(separator) = OptionalSubtagType::try_from_char(source) {
    Some(Self {
    separator,
    separator_index: index,
    })
    } else {
    None
    }
    }
    fn from_str_with_offset(source: &str, index_offset: usize) -> Option<Self> {
    source
    .chars()
    .enumerate()
    .skip(index_offset)
    .find_map(Self::try_from_char)
    }
    }
    #[derive(Debug, Clone, Copy)]
    struct OptionalSubtag {
    start_index: usize,
    end_index: usize,
    subtag_type: OptionalSubtagType,
    }
    #[derive(Debug, Clone, Copy)]
    struct OptionalSubtagsIterator<'locale> {
    source: &'locale str,
    current_subtag: Option<SubtagIndex>,
    next_subtag: Option<SubtagIndex>,
    }
    impl<'locale> OptionalSubtagsIterator<'locale> {
    fn new(source: &'locale str) -> Self {
    let current_subtag = if let Some(first_character) = source.chars().next() {
    let subtag = SubtagIndex::try_from_char((0, first_character)).expect(&format!(
    "The first character in `{source}` ('{first_character}') is not a valid separator."
    ));
    Some(subtag)
    } else {
    // The source locale is empty, return an empty iterator
    None
    };
    Self {
    source,
    current_subtag,
    next_subtag: SubtagIndex::from_str_with_offset(&source, 1),
    }
    }
    fn next(&mut self) -> Option<OptionalSubtag> {
    // If the current subtag is empty, all work is done
    let current_subtag = self.current_subtag.take()?;
    let next_subtag = self.next_subtag.take();
    // Get the index of the next separator
    // If this is the last subtag then this is the length of the source
    let next_separator = next_subtag
    .as_ref()
    .map(|next_index| next_index.separator_index)
    .unwrap_or(self.source.len());
    // Modify internal state for the next iteration
    self.current_subtag = next_subtag;
    self.next_subtag = if next_separator < self.source.len() {
    SubtagIndex::from_str_with_offset(&self.source, next_separator + 1)
    } else {
    None
    };
    Some(OptionalSubtag {
    start_index: current_subtag.separator_index + 1,
    end_index: next_separator,
    subtag_type: current_subtag.separator,
    })
    }
    }
    #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
    pub struct PosixLocale<'locale> {
    pub language: &'locale str,
    pub territory: Option<&'locale str>,
    pub codeset: Option<&'locale str>,
    pub modifier: Option<&'locale str>,
    }
    impl<'locale> PosixLocale<'locale> {
    pub fn from_str(source: &'locale str) -> Self {
    let additional_subtags_start = source
    .chars()
    .position(|character| OptionalSubtagType::try_from_char(character).is_some());
    let language_end_bound = additional_subtags_start.unwrap_or(source.len());
    let mut locale = PosixLocale {
    language: &source[..language_end_bound],
    ..Default::default()
    };
    assert!(!locale.language.is_empty());
    let mut subtags_iter = OptionalSubtagsIterator::new(&source[language_end_bound..]);
    while let Some(subtag) = subtags_iter.next() {
    let OptionalSubtag {
    start_index,
    end_index,
    ..
    } = subtag;
    // Offset based on language boundary
    let start_index = start_index + language_end_bound;
    let end_index = end_index + language_end_bound;
    assert!(start_index <= source.len());
    assert!(end_index <= source.len());
    let subtag_slice = &source[start_index..end_index];
    match subtag.subtag_type {
    OptionalSubtagType::Territory => locale.territory = Some(subtag_slice),
    OptionalSubtagType::Codeset => locale.codeset = Some(subtag_slice),
    OptionalSubtagType::Modifier => locale.modifier = Some(subtag_slice),
    };
    }
    locale
    }
    }
  • file addition: lib.rs (----------)
    [0.454]
    pub mod unix;
  • file addition: Cargo.toml (----------)
    [0.25]
    [package]
    name = "locale_select"
    version = "0.1.0"
    edition = "2021"
    [dependencies]
    icu_locid = "1.4.0"
    [lints]
    workspace = true
  • replacement in Cargo.toml at line 2
    [3.210][3.1034:1092]()
    members = ["fluent_embed", "output-macros", "cli_macros"]
    [3.210]
    [3.238]
    members = ["fluent_embed", "output-macros", "cli_macros", "locale_select"]
  • edit in Cargo.lock at line 203
    [2.2199]
    [2.2199]
    name = "locale_select"
    version = "0.1.0"
    dependencies = [
    "icu_locid",
    ]
    [[package]]