The env_preferences crate is being developed under ICU4X by a GSOC student, so it makes sense to switch over. The implementation is probably very flaky, but should work for now.
UN2XEIEUIB4ERS3IXOHQT2GCPBKK3JKHCGEVKQFP4SCV5AONFXMQC HL6ZEJTNQGFQDFI6MJHKITGZHCXC3OYC2O7KRD42D36PEU5C5YVAC BXLE3JXY37S6M7DGPM4KCL3ABT64HUVJ3NEUOFK56IFDOMUEJ3SQC CFJKYXUX4FF2DVAOJ3RRTI4JZPP5GMMDTJCEYM2IS57SCRKGZI6AC C6W7N6N57UCNHEV55HEZ3G7WN2ZOBGMFBB5M5ZPDB2HNNHHTOPBQC KF65O6ODA2UE2GYYTXFINCJW54CN62LB65NQLZNI5UM2W76ABEJAC KDUI7LHJRRQRFYPY7ANUNXG6XCUKQ4YYOEL5NG5Y6BRMV6GQ5M7AC YNEOCYMGMSHQGCL5TOIGWDDKHE4BZ5M7FGY5I6B2V6JO6ZRCLETAC WBI5HFOBBUMDSGKY2RX3YA6N7YDCJEP23JNEJ7PG5VZXHLYIRJRQC HCGVXOF7P3KKS2IMGVJWI2POVOZQFPXH26YVBJZRSOYSUM4CHUBQC BFL2Y7GN6NBXXNAUSD4M6T6CIVQ2OLERPE2CAFSLRF377WFFTVCQC JZXXFWQKOYAFQLQZDRALXG4KGEDR7JKO3AZ5Q5X7IQTS7BCJP3QAC LIH6JCXY5GMYQPU5L6HY2NOMJDMEW54THPKJ6YXI62Y2SVXFIAXQC T6JEWQJ7KI4SQFGIZNRKCWD5DEUVTIPEWXU7AX6WM7IU4DBSQZRQC HHJDRLLNN36UNIA7STAXEEVBCEMPJNB7SJQOS3TJLLYN4AEZ4MHQC 3NMKD6I57ONAGHEN4PZIAV2KPYESVR4JL3DTWSHXKCMVJBEQ4GIQC VZYZRAO4EXCHW2LBVFG5ELSWG5SCNDREMJ6RKQ4EKQGI2T7SD3ZQC KZLFC7OWYNK3G5YNHRANUK3VUVCM6W6J34N7UABYA24XMZWAVVHQC F5LG7WENUUDRSCTDMA4M6BAC5RWTGQO45C4ZEBZDX6FHCTTHBVGQC UKFEFT6LSI4K7X6UHQFZYD52DILKXMZMYSO2UYS2FCHNPXIF4BEQC VNSHGQYNPGKGGPYNVP4Z2RWD7JCSDJVYAADD6UXWBYL6ZRXKLE4AC SHNZZSZGIBTTD4IV5SMW5BIN5DORUWQVTVTNB5RMRD5CTFNOMJ6AC 6ABVDTXZOHVUDZDKDQS256F74LFIMM5DO3OZWHKRXZBUTPII4WAQC O77KA6C4UJGZXVGPEA7WCRQH6XYQJPWETSPDXI3VOKOSRQND7JEQC BANMRGROVYKYRJ4N2P4HSOJ2JVV6VSEB3W34BFXPOEFND5O36CGAC [package]name = "locale_select"version = "0.1.0"edition = "2021"[dependencies]fluent-langneg = { version = "0.14.0", features = ["cldr"] }icu_locid = "1.5.0"libc = "0.2.153"[dev-dependencies]gettext-rs = "0.7.0"[lints]workspace = true
pub mod unix;pub fn match_locales(available: &[LanguageIdentifier],default: &LanguageIdentifier,) -> LanguageIdentifier {// TODO: requesting locales should have platform-specific logiclet requested = unix::get_locales(fetch::unix::LocaleCategory::LC_MESSAGES);let supported = fluent_langneg::negotiate_languages(&requested,&available,Some(&default),NegotiationStrategy::Matching,);// TODO: properly handle this caseif let [single_locale] = supported[..] {// TODO: this is wasteful but avoids dealing with lifetimes for nowsingle_locale.to_owned()} else {todo!("Multiple locales returned, which is not yet handled. Got: {supported:#?}");}}// TODO: this can probably be enums, not stringsuse fluent_langneg::NegotiationStrategy;use icu_locid::LanguageIdentifier;pub mod fetch;
#[derive(Debug, Clone, Copy)]enum OptionalSubtagType {Territory,Codeset,Modifier,}impl OptionalSubtagType {const fn try_from_char(source: char) -> Option<Self> {match source {'_' => Some(Self::Territory),'.' => Some(Self::Codeset),'@' => Some(Self::Modifier),_ => None,}}}#[derive(Debug, Clone, Copy)]struct SubtagIndex {separator: OptionalSubtagType,separator_index: usize,}impl SubtagIndex {const fn try_from_char(captures: (usize, char)) -> Option<Self> {// Closure captures a tuple of length 2, destructure for readabilitylet (index, source) = captures;if let Some(separator) = OptionalSubtagType::try_from_char(source) {Some(Self {separator,separator_index: index,})} else {None}}fn from_str_with_offset(source: &str, index_offset: usize) -> Option<Self> {source.chars().enumerate().skip(index_offset).find_map(Self::try_from_char)}}#[derive(Debug, Clone, Copy)]struct OptionalSubtag {start_index: usize,end_index: usize,subtag_type: OptionalSubtagType,}#[derive(Debug, Clone, Copy)]struct OptionalSubtagsIterator<'locale> {source: &'locale str,current_subtag: Option<SubtagIndex>,next_subtag: Option<SubtagIndex>,}impl<'locale> OptionalSubtagsIterator<'locale> {fn new(source: &'locale str) -> Self {let current_subtag = if let Some(first_character) = source.chars().next() {let subtag = SubtagIndex::try_from_char((0, first_character)).expect(&format!("The first character in `{source}` ('{first_character}') is not a valid separator."));Some(subtag)} else {// The source locale is empty, return an empty iteratorNone};Self {source,current_subtag,next_subtag: SubtagIndex::from_str_with_offset(&source, 1),}}impl<'locale> Iterator for OptionalSubtagsIterator<'locale> {type Item = OptionalSubtag;fn next(&mut self) -> Option<Self::Item> {// If the current subtag is empty, all work is donelet current_subtag = self.current_subtag.take()?;let next_subtag = self.next_subtag.take();// Get the index of the next separator// If this is the last subtag then this is the length of the sourcelet next_separator = next_subtag.as_ref().map(|next_index| next_index.separator_index).unwrap_or(self.source.len());// Modify internal state for the next iterationself.current_subtag = next_subtag;self.next_subtag = if next_separator < self.source.len() {SubtagIndex::from_str_with_offset(&self.source, next_separator + 1)} else {None};Some(OptionalSubtag {start_index: current_subtag.separator_index + 1,end_index: next_separator,subtag_type: current_subtag.separator,})}}#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]pub struct PosixLocale<'locale> {pub language: &'locale str,pub territory: Option<&'locale str>,pub codeset: Option<&'locale str>,pub modifier: Option<&'locale str>,}impl<'locale> PosixLocale<'locale> {pub fn from_str(source: &'locale str) -> Self {let additional_subtags_start = source.chars().position(|character| OptionalSubtagType::try_from_char(character).is_some());let language_end_bound = additional_subtags_start.unwrap_or(source.len());let mut locale = PosixLocale {language: &source[..language_end_bound],..Default::default()};assert!(!locale.language.is_empty());let mut subtags_iter = OptionalSubtagsIterator::new(&source[language_end_bound..]);while let Some(subtag) = subtags_iter.next() {let OptionalSubtag {start_index,end_index,..} = subtag;// Offset based on language boundarylet start_index = start_index + language_end_bound;let end_index = end_index + language_end_bound;assert!(start_index <= source.len());assert!(end_index <= source.len());let subtag_slice = &source[start_index..end_index];match subtag.subtag_type {OptionalSubtagType::Territory => locale.territory = Some(subtag_slice),OptionalSubtagType::Codeset => locale.codeset = Some(subtag_slice),OptionalSubtagType::Modifier => locale.modifier = Some(subtag_slice),};}locale}}pub fn icu_locale(&self) -> Result<Locale, icu_locid::ParserError> {let language = subtags::Language::try_from_bytes(self.language.as_bytes())?;let region = if let Some(territory) = self.territory {Some(subtags::Region::try_from_bytes(territory.as_bytes())?)} else {None};// TODO: should script/variants always be empty?let language_id = LanguageIdentifier {language,script: None,region,variants: Variants::new(),};// TODO: should attributes always be empty?let unicode_extensions = Unicode {keywords: POSIX_KEYWORD,attributes: Attributes::new(),};let extensions = Extensions::from_unicode(unicode_extensions);Ok(Locale {id: language_id,extensions,})}}pub fn get_locales(category: LocaleCategory) -> Vec<LanguageIdentifier> {category.get_locales_custom().iter().map(|locale_str| PosixLocale::from_str(locale_str)).map(|posix_locale| posix_locale.icu_locale()).filter_map(|potential_locale| potential_locale.ok())// TODO: is it ok to strip this posix metadata from the locale?.map(|locale| locale.id).collect()}use icu_locid::extensions::unicode::{key, value, Attributes, Keywords, Unicode};use icu_locid::extensions::Extensions;use icu_locid::subtags::{self, Variants};use icu_locid::{LanguageIdentifier, Locale};const POSIX_KEYWORD: Keywords = Keywords::new_single(key!("va"), value!("posix"));use crate::fetch::unix::LocaleCategory;
use std::env;use std::ffi::CStr;const NUL_BYTE: &[u8] = b"\0";macro_rules! repr_lc {($($variant:ident),+) => {#[derive(Clone, Copy, Debug)]#[allow(non_camel_case_types)] // Required for parity with C enumpub enum LocaleCategory {$($variant,)*}impl TryFrom<i32> for LocaleCategory {type Error = ();fn try_from(value: i32) -> Result<Self, Self::Error> {match value {$(libc::$variant => Ok(Self::$variant),)*_ => Err(())}}}impl Into<i32> for LocaleCategory {fn into(self) -> i32 {match self {$(Self::$variant => libc::$variant,)*}}}impl LocaleCategory {fn as_str(&self) -> &str {match self {$(Self::$variant => stringify!($variant),)*}}}}}repr_lc! {LC_ALL,LC_CTYPE,LC_COLLATE,LC_MESSAGES,LC_MONETARY,LC_NUMERIC,LC_TIME,LC_ADDRESS,LC_IDENTIFICATION,LC_MEASUREMENT,LC_NAME,LC_PAPER,LC_TELEPHONE}// TODO: handle and document safety invariantsfn get_locale_libc(category: i32) -> String {let empty_cstr = CStr::from_bytes_with_nul(NUL_BYTE).unwrap();let locale_string_pointer = unsafe { libc::setlocale(category, empty_cstr.as_ptr()) };let locale_c_str = unsafe { CStr::from_ptr(locale_string_pointer) };locale_c_str.to_str().unwrap().to_string()}impl LocaleCategory {/// Query the locale following the POSIX spec:/// https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_02////// Order of precedence:/// 1. LC_ALL/// 2. LC_{NUMERIC, TIME, etc}/// 3. LANG/// 4. Default locale (handled by caller, this function will return None)pub fn get_locales_custom(&self) -> Vec<String> {let mut locales = Vec::with_capacity(3);if let Ok(global_locale) = env::var("LC_ALL") {locales.push(global_locale);}if let Ok(category_locale) = env::var(self.as_str()) {locales.push(category_locale);}}}if let Ok(lang) = env::var("LANG") {locales.push(lang);}locales}pub fn get_locales_libc(self) -> Vec<String> {let global_locale = get_locale_libc(libc::LC_ALL);let locale_for_category = get_locale_libc(self.into());vec![global_locale, locale_for_category]
use std::collections::HashSet;use gettextrs::LocaleCategory as GettextCategory;use locale_select::fetch::unix::LocaleCategory as LocaleSelectCategory;use locale_select::unix::PosixLocale;const GETTEXT_CATEGORIES: [GettextCategory; 13] = [GettextCategory::LcAll,GettextCategory::LcCType,GettextCategory::LcCollate,GettextCategory::LcMessages,GettextCategory::LcMonetary,GettextCategory::LcNumeric,GettextCategory::LcTime,GettextCategory::LcAddress,GettextCategory::LcIdentification,GettextCategory::LcMeasurement,GettextCategory::LcName,GettextCategory::LcPaper,GettextCategory::LcTelephone,];#[test]fn simple_en_us() {let locale = "en_US.utf8";let parsed_locale = PosixLocale::from_str(locale);assert_eq!(parsed_locale,PosixLocale {language: "en",territory: Some("US"),codeset: Some("utf8"),modifier: None,});}#[test]/// Exactly compare the output of get_locales_libc() with get_locales_custom()fn compare_libc_with_custom_impl_exact() {for gettext_category in GETTEXT_CATEGORIES {let locale_select_category =LocaleSelectCategory::try_from(gettext_category as i32).unwrap();let libc_locales = locale_select_category.get_locales_libc();let custom_locales = locale_select_category.get_locales_custom();assert_eq!(libc_locales, custom_locales);}}#[test]/// Compare the output of get_locales_libc() with get_locales_custom() using a HashSet////// This will make sure that both functions return the same data, even if it's not/// in the same order or items are duplicated. If the `_exact()` variant of this test/// fails, this test may still pass.fn compare_libc_with_custom_impl_hash_set() {for gettext_category in GETTEXT_CATEGORIES {let locale_select_category =LocaleSelectCategory::try_from(gettext_category as i32).unwrap();let libc_locales: HashSet<String> =HashSet::from_iter(locale_select_category.get_locales_libc().into_iter());let custom_locales: HashSet<String> =HashSet::from_iter(locale_select_category.get_locales_custom().into_iter());assert_eq!(libc_locales.symmetric_difference(&custom_locales).collect::<Vec<_>>(),Vec::<&String>::new());}}#[test]/// Compare get_locales_libc() with the implementation from gettext-rsfn compare_libc_with_gettext() {for gettext_category in GETTEXT_CATEGORIES {let locale_select_category =LocaleSelectCategory::try_from(gettext_category as i32).unwrap();let libc_locales = locale_select_category.get_locales_libc();let gettext_locales =String::from_utf8(gettextrs::setlocale(gettext_category, b"").unwrap()).unwrap();assert_eq!(libc_locales[0], gettext_locales);}}#[test]/// Compare get_locales_custom() with the implementation from gettext-rsfn compare_custom_with_gettext() {for gettext_category in GETTEXT_CATEGORIES {let locale_select_category =LocaleSelectCategory::try_from(gettext_category as i32).unwrap();let custom_locales = locale_select_category.get_locales_custom();let gettext_locales =String::from_utf8(gettextrs::setlocale(gettext_category, b"").unwrap()).unwrap();assert_eq!(custom_locales[0], gettext_locales);}}assert_eq!(parsed_locale.icu_locale().unwrap(),"en-US-u-va-posix".parse().unwrap());
# `locale_select`A simple library for selecting the user's locale preferences for various ICU4X modules, such as calendar, datetime and decimals.## AlternativesThis library is specific to ICU4X, but there are many Rust APIs with a similar focus. Here are some that I could find, and what they seem to do differently:- [`simple-locale`](https://github.com/johnstonskj/simple-locale): very close to this crate, but doesn't use ICU4X libraries- [`locale_settings`](https://docs.rs/locale-settings/latest/locale_settings): unmaintained(?) version of `simple-locale::settings`- [`sys_locale`](https://github.com/1password/sys-locale): single locale instead of per-category- [`utf8-locale`](https://gitlab.com/ppentchev/utf8-locale), [`locale-config`](https://github.com/rust-locale/locale_config): categories are less strongly typed- [`gettextrs::setlocale()`](https://docs.rs/gettext-rs/latest/gettextrs/fn.setlocale.html): `gettext` bindings, this function can query the locale by setting the locale to a null string## Useful links- POSIX:- Locale category data: https://www.man7.org/linux/man-pages/man5/locale.5.html- Locale category definitions: https://www.man7.org/linux/man-pages/man7/locale.7.html- Description of locale names: https://www.gnu.org/software/libc/manual/html_node/Locale-Names.html- Locale spec: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html- Localization variables spec: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_02- Unicode handling of POSIX identifiers: https://unicode.org/reports/tr35/tr35.html#Legacy_Variants
let selected_locale = locale_select::match_locales(
let canonical_locale = <Self as Localize<W>>::CANONICAL_LOCALE;// MacOS and Windows return in order of preference, but Linux returns a HashMaplet requested_locales = get_locales();let selected_locale = fluent_langneg::negotiate_languages(&requested_locales,
#[cfg(target_os = "linux")]// TODO: does not add `-u-va-posix` extension, see https://unicode.org/reports/tr35/tr35.html#Legacy_Variantspub fn get_locales() -> Vec<LanguageIdentifier> {let mut retrieved_locales = env_preferences::get_locales().unwrap();let locale_order = vec![retrieved_locales.remove(&LocaleCategory::All),retrieved_locales.remove(&LocaleCategory::Messages),std::env::var("LANG").ok(),];locale_order.into_iter().filter_map(|optional_locale| optional_locale).map(|locale| LanguageIdentifier::try_from_bytes(locale.as_bytes()).unwrap()).collect()}#[cfg(any(target_os = "macos", target_os = "windows"))]pub fn get_locales() -> Vec<LanguageIdentifier> {env_preferences::get_locales().unwrap().map(|locale| LanguageIdentifier::try_from_bytes(locale.as_bytes()).unwrap()).collect()}
][[package]]name = "gettext-rs"version = "0.7.0"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "e49ea8a8fad198aaa1f9655a2524b64b70eb06b2f3ff37da407566c93054f364"dependencies = ["gettext-sys","locale_config",][[package]]name = "gettext-sys"version = "0.21.3"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "c63ce2e00f56a206778276704bbe38564c8695249fdc8f354b4ef71c57c3839d"dependencies = ["cc","temp-dir",
name = "locale_config"version = "0.3.0"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "08d2c35b16f4483f6c26f0e4e9550717a2f6575bcd6f12a53ff0c490a94a6934"dependencies = ["lazy_static","objc","objc-foundation","regex","winapi",][[package]]name = "locale_select"version = "0.1.0"dependencies = ["fluent-langneg","gettext-rs","icu_locid","libc",][[package]]name = "malloc_buf"version = "0.0.6"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb"dependencies = ["libc",][[package]]
name = "objc"version = "0.2.7"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1"dependencies = ["malloc_buf",][[package]]name = "objc-foundation"version = "0.1.1"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "1add1b659e36c9607c7aab864a76c7a4c2760cd0cd2e120f3fb8b952c7e22bf9"dependencies = ["block","objc","objc_id",][[package]]name = "objc_id"version = "0.1.1"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "c92d4ddb4bd7b50d730c215ff871754d0da6b2178849f8a2a2ab69712d0c073b"dependencies = ["objc",][[package]]
name = "winapi"version = "0.3.9"
name = "windows"version = "0.56.0"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "1de69df01bdf1ead2f4ac895dc77c9351aefff65b2f3db429a343f9cbf05e132"dependencies = ["windows-core","windows-targets 0.52.6",][[package]]name = "windows-core"version = "0.56.0"