Experimenting with more structured ways to handle command-line input/output in Rust
use icu_locid::extensions::unicode::{key, value, Attributes, Keywords, Unicode};
use icu_locid::extensions::Extensions;
use icu_locid::subtags::{self, Language, Variants};
use icu_locid::{LanguageIdentifier, Locale};

use crate::fetch::unix::LocaleCategory;

const POSIX_KEYWORD: Keywords = Keywords::new_single(key!("va"), value!("posix"));

#[derive(Debug, Clone, Copy)]
enum OptionalSubtagType {
    Territory,
    Codeset,
    Modifier,
}

impl OptionalSubtagType {
    const fn try_from_char(source: char) -> Option<Self> {
        match source {
            '_' => Some(Self::Territory),
            '.' => Some(Self::Codeset),
            '@' => Some(Self::Modifier),
            _ => None,
        }
    }
}

#[derive(Debug, Clone, Copy)]
struct SubtagIndex {
    separator: OptionalSubtagType,
    separator_index: usize,
}

impl SubtagIndex {
    const fn try_from_char(captures: (usize, char)) -> Option<Self> {
        // Closure captures a tuple of length 2, destructure for readability
        let (index, source) = captures;

        if let Some(separator) = OptionalSubtagType::try_from_char(source) {
            Some(Self {
                separator,
                separator_index: index,
            })
        } else {
            None
        }
    }

    fn from_str_with_offset(source: &str, index_offset: usize) -> Option<Self> {
        source
            .chars()
            .enumerate()
            .skip(index_offset)
            .find_map(Self::try_from_char)
    }
}

#[derive(Debug, Clone, Copy)]
struct OptionalSubtag {
    start_index: usize,
    end_index: usize,
    subtag_type: OptionalSubtagType,
}

#[derive(Debug, Clone, Copy)]
struct OptionalSubtagsIterator<'locale> {
    source: &'locale str,
    current_subtag: Option<SubtagIndex>,
    next_subtag: Option<SubtagIndex>,
}

impl<'locale> OptionalSubtagsIterator<'locale> {
    fn new(source: &'locale str) -> Self {
        let current_subtag = if let Some(first_character) = source.chars().next() {
            let subtag = SubtagIndex::try_from_char((0, first_character)).expect(&format!(
                "The first character in `{source}` ('{first_character}') is not a valid separator."
            ));
            Some(subtag)
        } else {
            // The source locale is empty, return an empty iterator
            None
        };

        Self {
            source,
            current_subtag,
            next_subtag: SubtagIndex::from_str_with_offset(&source, 1),
        }
    }
}

impl<'locale> Iterator for OptionalSubtagsIterator<'locale> {
    type Item = OptionalSubtag;

    fn next(&mut self) -> Option<Self::Item> {
        // If the current subtag is empty, all work is done
        let current_subtag = self.current_subtag.take()?;
        let next_subtag = self.next_subtag.take();

        // Get the index of the next separator
        // If this is the last subtag then this is the length of the source
        let next_separator = next_subtag
            .as_ref()
            .map(|next_index| next_index.separator_index)
            .unwrap_or(self.source.len());

        // Modify internal state for the next iteration
        self.current_subtag = next_subtag;

        self.next_subtag = if next_separator < self.source.len() {
            SubtagIndex::from_str_with_offset(&self.source, next_separator + 1)
        } else {
            None
        };

        Some(OptionalSubtag {
            start_index: current_subtag.separator_index + 1,
            end_index: next_separator,
            subtag_type: current_subtag.separator,
        })
    }
}

#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
pub struct PosixLocale<'locale> {
    pub language: &'locale str,
    pub territory: Option<&'locale str>,
    pub codeset: Option<&'locale str>,
    pub modifier: Option<&'locale str>,
}

impl<'locale> PosixLocale<'locale> {
    pub fn from_str(source: &'locale str) -> Self {
        let additional_subtags_start = source
            .chars()
            .position(|character| OptionalSubtagType::try_from_char(character).is_some());

        let language_end_bound = additional_subtags_start.unwrap_or(source.len());
        let mut locale = PosixLocale {
            language: &source[..language_end_bound],
            ..Default::default()
        };

        assert!(!locale.language.is_empty());

        let mut subtags_iter = OptionalSubtagsIterator::new(&source[language_end_bound..]);

        while let Some(subtag) = subtags_iter.next() {
            let OptionalSubtag {
                start_index,
                end_index,
                ..
            } = subtag;

            // Offset based on language boundary
            let start_index = start_index + language_end_bound;
            let end_index = end_index + language_end_bound;
            assert!(start_index <= source.len());
            assert!(end_index <= source.len());

            let subtag_slice = &source[start_index..end_index];

            match subtag.subtag_type {
                OptionalSubtagType::Territory => locale.territory = Some(subtag_slice),
                OptionalSubtagType::Codeset => locale.codeset = Some(subtag_slice),
                OptionalSubtagType::Modifier => locale.modifier = Some(subtag_slice),
            };
        }

        locale
    }

    pub fn icu_locale(&self) -> Result<Locale, icu_locid::ParserError> {
        let language = subtags::Language::try_from_bytes(self.language.as_bytes())?;

        let region = if let Some(territory) = self.territory {
            Some(subtags::Region::try_from_bytes(territory.as_bytes())?)
        } else {
            None
        };

        // TODO: should script/variants always be empty?
        let language_id = LanguageIdentifier {
            language,
            script: None,
            region,
            variants: Variants::new(),
        };

        // TODO: should attributes always be empty?
        let unicode_extensions = Unicode {
            keywords: POSIX_KEYWORD,
            attributes: Attributes::new(),
        };
        let extensions = Extensions::from_unicode(unicode_extensions);

        Ok(Locale {
            id: language_id,
            extensions,
        })
    }
}

pub fn get_locales(category: LocaleCategory) -> Vec<LanguageIdentifier> {
    category
        .get_locales()
        .iter()
        .map(|locale_str| PosixLocale::from_str(locale_str))
        .map(|posix_locale| posix_locale.icu_locale())
        .filter_map(|potential_locale| potential_locale.ok())
        // TODO: is it ok to strip this posix metadata from the locale?
        .map(|locale| locale.id)
        .collect()
}