Analyze dependencies of cargo projects
use std::cell::OnceCell;
use std::collections::HashMap;
use std::convert::Infallible;
use std::str::FromStr;

use analyzeme::ProfilingData;
use camino::Utf8PathBuf;
use cargo_metadata::Edition;
use serde::de::value::StrDeserializer;
use serde::Deserialize;

/// When a crate name is set to 3 underscores (`___`), we assume this is cargo
/// probing rustc without actually compiling anything. Clarification:
/// https://rust-lang.zulipchat.com/#narrow/stream/246057-t-cargo/topic/New.20visualizations.20for.20cargo-timing.2Ehtml/near/436821099
const CARGO_PROBE_CRATE_NAME: &str = "___";
const PROFDATA_EXTENSION: &str = "mm_profdata";
const PROFILES_DIR: &str = "target/profiles";

#[derive(Clone, Copy, Debug, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum Emit {
    #[serde(rename = "asm")]
    Assembly,
    DepInfo,
    Link,
    #[serde(rename = "llvm-bc")]
    LlvmBitcode,
    LlvmIr,
    Metadata,
    Mir,
    #[serde(rename = "obj")]
    Object,
}

#[derive(Clone, Debug)]
pub enum RustcFilename {
    StandardInput,
    File(Utf8PathBuf),
}

impl FromStr for RustcFilename {
    type Err = Infallible;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        Ok(match s {
            // Cargo sets the filename to be a single hyphen (`-`: U+002D) when probing rustc
            // This instructs rustc to read the file from standard input
            "-" => Self::StandardInput,
            _ => {
                let path = Utf8PathBuf::from_str(s).unwrap();
                // TODO: some build-scripts seem to generate relative paths, so the following code would fail:
                // assert!(path_buf.is_file(), "Path does not exist: {path_buf:#?}");

                Self::File(path)
            }
        })
    }
}

#[derive(Clone, Debug)]
// TODO: investigate using `rustc --print` to validate this information
// TODO: reference data directly
pub struct RustcArgs {
    pub filename: RustcFilename,
    pub crate_name: String,
    pub edition: Edition,
    pub crate_types: Vec<super::TargetKind>,
    pub emit: Vec<Emit>,
    pub externs: Vec<String>,
    pub codegen_options: Vec<String>,
    pub unstable_options: Vec<String>,
    pub extra_args: HashMap<String, Vec<String>>,
}

impl RustcArgs {
    fn single_value(map: &mut HashMap<String, Vec<String>>, key: &str) -> Option<String> {
        let mut values = map.remove(key)?;

        assert_eq!(values.len(), 1);
        Some(values.remove(0))
    }

    fn count_parens(source: &str) -> i32 {
        source.chars().fold(0, |count, character| match character {
            '(' => count + 1,
            ')' => count - 1,
            _ => count,
        })
    }

    fn balance_parens<'a>(source: &'a str, iterator: &mut impl Iterator<Item = &'a str>) -> String {
        let mut buffer = source.to_string();

        // If there are more closing parentheses than opening, there's a bug
        // (most likely from this argument parsing code)
        let mut open_parens = Self::count_parens(&buffer);
        assert!(
            open_parens >= 0,
            "Got too many closing parentheses while parsing argument: `{source}`"
        );

        while open_parens > 0 {
            let next_chunk = iterator.next().expect(&format!(
                "Ran out of arguments while searching for closing parenthesis: `{buffer}`"
            ));
            buffer.push_str(next_chunk);

            open_parens += Self::count_parens(next_chunk);
        }

        buffer
    }

    pub fn parse_from(source: &str) -> Self {
        let mut args_map: HashMap<String, Vec<String>> = HashMap::new();
        let mut arg_chunks = source.split_ascii_whitespace();

        let command_name = arg_chunks.next().expect("Arguments cannot be empty");
        assert_eq!(command_name, "rustc");

        let filename = OnceCell::new();

        while let Some(initial_chunk) = arg_chunks.next() {
            // Need to handle cases such as `--check-cfg cfg(feature, values("default", "simd"))`
            // To do this, consume more chunks until the parentheses are balanced
            let owned_chunk = Self::balance_parens(initial_chunk, &mut arg_chunks);
            let chunk = owned_chunk.as_str();

            let (prefix, arg) = if let Some(long_arg) = chunk.strip_prefix("--") {
                ("--", long_arg)
            } else if let Some(short_arg) = chunk.strip_prefix('-')
                && chunk != "-"
            {
                ("-", short_arg)
            } else {
                let insertion_result = filename.set(chunk.to_string());

                if let Err(previous_filename) = insertion_result {
                    panic!("Found multiple unexpected arguments, unable to parse filename. First: `{previous_filename}` Second: `{chunk}`");
                }
                continue;
            };

            let (arg_name, value) = if let Some((name, value)) = chunk.split_once('=') {
                (name.strip_prefix(prefix).unwrap(), value.to_string())
            } else {
                let value = arg_chunks.next().expect(&format!(
                    "No arguments left, but expected value for argument: `{chunk}`"
                ));

                (arg, Self::balance_parens(value, &mut arg_chunks))
            };

            args_map
                .entry(arg_name.to_string())
                .or_default()
                .push(value.to_string());
        }

        Self {
            filename: RustcFilename::from_str(
                filename.get().expect("Missing filename in arguments"),
            )
            .unwrap(),
            crate_name: Self::single_value(&mut args_map, "crate-name").unwrap(),
            edition: Self::single_value(&mut args_map, "edition")
                .map(|edition| deserialize_string(&edition))
                .unwrap_or(Edition::E2015),
            crate_types: args_map
                .remove("crate-type")
                .map(deserialize_vec)
                .unwrap_or_default(),
            emit: args_map
                .remove("emit")
                .map(deserialize_vec)
                .unwrap_or_default(),
            externs: args_map.remove("extern").unwrap_or_default(),
            codegen_options: args_map.remove("C").unwrap_or_default(),
            unstable_options: args_map.remove("Z").unwrap_or_default(),
            extra_args: args_map,
        }
    }
}

#[derive(Debug)]
pub struct SelfProfile {
    pub args: RustcArgs,
    pub data: ProfilingData,
}

#[derive(Debug)]
pub struct ProfileCollection {
    pub crates: Vec<SelfProfile>,
    pub probes: Vec<SelfProfile>,
}

impl ProfileCollection {
    pub fn new() -> Self {
        let mut crate_profiles = Vec::new();
        let mut probe_profiles = Vec::new();

        let valid_profiles = std::fs::read_dir(PROFILES_DIR)
            .unwrap()
            .into_iter()
            .filter_map(|potential_entry| potential_entry.ok())
            .filter_map(|entry| Utf8PathBuf::from_path_buf(entry.path()).ok())
            .filter(|path| path.is_file())
            .filter(|path| path.extension() == Some(&PROFDATA_EXTENSION));

        for profile_path in valid_profiles {
            let relative_filename = profile_path.file_name().unwrap();
            let (file_name, _extension) = relative_filename.rsplit_once('.').unwrap();
            // Self-profiles have paths like:
            // CRATE_NAME-RUSTC_PID.mm_profdata
            // We need to special-case the crate name cargo uses when probing rustc;
            // these profiles are not associated to any specific crate in the graph
            let (crate_name, process_id) = file_name.rsplit_once('-').unwrap();

            // Make sure PID is only numbers
            assert!(
                process_id.chars().all(|character| character.is_numeric()),
                "{}",
                process_id
            );

            let data = ProfilingData::new(profile_path.as_std_path()).unwrap();

            let cmd = &data.metadata().cmd;
            let args = RustcArgs::parse_from(cmd);

            match args.filename {
                RustcFilename::StandardInput => {
                    if crate_name != CARGO_PROBE_CRATE_NAME {
                        if let Some(suffix) = crate_name.strip_prefix("probe") {
                            assert!(
                                suffix.chars().all(|c| c.is_numeric()),
                                "Unexpected suffix for probe crate name: {suffix}"
                            );
                        } else {
                            panic!("Unexpected probe with name: {crate_name}")
                        }
                    }

                    probe_profiles.push(SelfProfile { args, data });
                }
                RustcFilename::File(_) => {
                    crate_profiles.push(SelfProfile { args, data });
                }
            }
        }

        Self {
            crates: crate_profiles,
            probes: probe_profiles,
        }
    }
}

fn deserialize_string<'a, T: Deserialize<'a>>(source: &str) -> T {
    let deserializer = StrDeserializer::<serde::de::value::Error>::new(source);
    T::deserialize(deserializer).unwrap()
}

fn deserialize_vec<'a, T: Deserialize<'a>>(source: Vec<String>) -> Vec<T> {
    source
        .iter()
        // .map(|arg| {
        //     // Can't split on commas if inside (parentheses)
        //     // Example: `--check-cfg cfg(feature, values("default", "simd"))`
        //     let mut split_points = Vec::new();
        //     let mut open_parens = 0;
        //     for (index, character) in arg.chars().enumerate() {
        //         match character {
        //             '(' => open_parens += 1,
        //             ')' => open_parens -= 1,
        //             ',' => {
        //                 if open_parens == 0 {
        //                     split_points.push(index)
        //                 }
        //             }
        //             _ => (),
        //         }
        //     }
        // })
        .map(|arg| arg.split_terminator(','))
        .flatten()
        .map(deserialize_string::<T>)
        .collect()
}