Refactor measurements into `annotations` module

finchie
Apr 30, 2024, 7:10 AM
BRXHJFU7ANVWOFXBNR5EYUGDURCFCZIF66VDV6L3JBYC45CAFNJAC

Dependencies

  • [2] PJPTNU2S Skip re-generating `--timings=json` on every run
  • [3] XVQXXAGZ Add support for generating treemaps
  • [4] ZPFD3275 Switch from `cargo_metadata`+`petgraph` to `guppy`
  • [5] UQJO24KB Use `forceatlas2` to construct graph layout
  • [6] B2L26LOA Store index of dependency nodes
  • [7] LOR3KOXG Parse JSON output from `cargo build --timings`
  • [8] JVYWRCPT Add basic chart visualisation
  • [9] T34OV3YQ Store timings output in `timings::Output` struct
  • [10] YA5ITLOV Add support for Sankey diagrams
  • [11] 475UXTLY Use multiple force-directed layout algorithms to improve graph readability
  • [12] 2H6RJC35 Account for floating point inaccuracy in histogram calculation
  • [13] UXJFRBBL Move graph functionality into `graph` module
  • [14] ZEN3WUPD Add support for histogram charts
  • [15] C43IWI7G Move visualization logic into separate module
  • [16] OPTMCUTB Use timings `duration` to set size of rendered node
  • [17] 7CVIL7UJ Create simple metadata parser

Change contents

  • replacement in src/visualize/sankey.rs at line 1
    [4.34][4.35:55]()
    use crate::timings;
    [4.34]
    [4.55]
    use crate::{Annotations, Measurement, Variable};
  • replacement in src/visualize/sankey.rs at line 3
    [4.56][4.56:215]()
    use guppy::graph::{DependencyDirection, PackageGraph};
    pub fn nodes(graph: &PackageGraph) -> Vec<&str> {
    graph.packages().map(|pkg| pkg.name()).collect()
    [4.56]
    [4.215]
    pub fn nodes<'graph>(annotations: &'graph Annotations) -> Vec<&'graph str> {
    annotations
    .packages()
    .map(|id| annotations.metadata(id).unwrap().name())
    .collect()
  • replacement in src/visualize/sankey.rs at line 10
    [4.218][4.218:450]()
    pub fn links<'graph>(
    graph: &'graph PackageGraph,
    timings: &timings::Output,
    ) -> Vec<(&'graph str, &'graph str, f64)> {
    let package_set = graph.resolve_all();
    let mut links = Vec::with_capacity(package_set.len());
    [4.218]
    [4.450]
    pub fn links<'graph>(annotations: &'graph Annotations) -> Vec<(&'graph str, &'graph str, f64)> {
    let links = annotations.links().collect::<Vec<_>>();
    let mut sankey_links = Vec::with_capacity(links.len());
  • replacement in src/visualize/sankey.rs at line 14
    [4.451][4.451:1157]()
    for link in package_set.links(DependencyDirection::Forward) {
    let (from, to) = link.endpoints();
    let time_taken = timings.pkg_time(to.id()).unwrap_or(0_f64);
    // Can't just set the edge weight to duration - if the package has N dependents
    // it would appear to take N times longer, just need to divide duration by
    // direct dependents to fix
    let direct_dependents = to
    .direct_links_directed(DependencyDirection::Reverse)
    .count();
    // Make sure to not divide by 0
    let edge_weight = if direct_dependents == 0 {
    time_taken
    } else {
    time_taken / (direct_dependents as f64)
    };
    [4.451]
    [4.1157]
    for (from, to) in links {
    let edge_weight = annotations
    .variable(to, Variable::UnitDuration, Measurement::Relative)
    .unwrap_or(0_f64);
  • replacement in src/visualize/sankey.rs at line 19
    [4.1158][4.1158:1217]()
    links.push((from.name(), to.name(), edge_weight));
    [4.1158]
    [4.1217]
    let (from_meta, to_meta) = (
    annotations.metadata(from).unwrap(),
    annotations.metadata(to).unwrap(),
    );
    sankey_links.push((from_meta.name(), to_meta.name(), edge_weight));
  • replacement in src/visualize/sankey.rs at line 26
    [4.1224][4.1224:1234]()
    links
    [4.1224]
    [4.1234]
    sankey_links
  • edit in src/visualize/mod.rs at line 1
    [4.54][4.55:76]()
    use crate::timings;
  • edit in src/visualize/mod.rs at line 4
    [4.1339][4.171:203](),[4.171][4.171:203]()
    use guppy::graph::PackageGraph;
  • replacement in src/visualize/mod.rs at line 18
    [4.277][4.277:359]()
    pub fn for_style(style: Style, graph: &PackageGraph, timings: &timings::Output) {
    [4.277]
    [4.359]
    pub fn for_style(style: Style, annotations: &crate::Annotations) {
  • replacement in src/visualize/mod.rs at line 28
    [4.830][4.830:881]()
    .data(graph::data(graph, timings))
    [4.830]
    [4.881]
    .data(graph::data(annotations))
  • replacement in src/visualize/mod.rs at line 32
    [4.1431][4.1431:1522]()
    .nodes(sankey::nodes(graph))
    .links(sankey::links(graph, timings))
    [4.1431]
    [4.1522]
    .nodes(sankey::nodes(annotations))
    .links(sankey::links(annotations))
  • replacement in src/visualize/mod.rs at line 41
    [4.466][4.466:510]()
    .data(histogram::data(timings))
    [4.466]
    [4.510]
    .data(histogram::data(annotations))
  • replacement in src/visualize/mod.rs at line 45
    [3.215][3.215:268]()
    .data(treemap::data(graph, timings))
    [3.215]
    [3.268]
    .data(treemap::data(annotations, &annotations.root_packages()))
  • replacement in src/visualize/mod.rs at line 51
    [3.565][3.565:635]()
    .levels(treemap::levels(timings.pkg_times().count()))
    [3.565]
    [3.635]
    .levels(treemap::levels(annotations.links().count()))
  • replacement in src/visualize/mod.rs at line 61
    [4.662][4.662:719]()
    let (x_axis, y_axis) = histogram::axes(timings);
    [4.662]
    [4.719]
    let (x_axis, y_axis) = histogram::axes(annotations);
  • replacement in src/visualize/histogram.rs at line 1
    [4.817][4.818:838]()
    use crate::timings;
    [4.817]
    [4.838]
    use crate::{Annotations, Measurement, Variable};
  • replacement in src/visualize/histogram.rs at line 26
    [4.1371][4.1371:1493]()
    pub fn axes(timings: &timings::Output) -> (Axis, Axis) {
    let pkg_durations: Vec<f64> = timings.pkg_times().collect();
    [4.1371]
    [4.1493]
    pub fn axes(annotations: &Annotations) -> (Axis, Axis) {
    let packages = annotations.packages();
    let pkg_durations = packages
    .filter_map(|id| annotations.variable(id, Variable::UnitDuration, Measurement::Exact))
    .collect::<Vec<_>>();
  • replacement in src/visualize/histogram.rs at line 49
    [4.2176][4.2176:2300]()
    pub fn data(timings: &timings::Output) -> Vec<DataPoint> {
    let pkg_durations: Vec<f64> = timings.pkg_times().collect();
    [4.2176]
    [4.2300]
    pub fn data(annotations: &Annotations) -> Vec<DataPoint> {
    let packages = annotations.packages();
    let pkg_durations = packages
    .filter_map(|id| annotations.variable(id, Variable::UnitDuration, Measurement::Exact))
    .collect::<Vec<_>>();
  • replacement in src/visualize/graph.rs at line 1
    [4.146][4.241:261](),[4.261][4.147:178](),[4.146][4.147:178]()
    use crate::timings;
    use std::collections::HashMap;
    [4.146]
    [4.178]
    use crate::{Annotations, Measurement, Variable};
  • replacement in src/visualize/graph.rs at line 4
    [4.204][4.204:295]()
    use guppy::{
    graph::{DependencyDirection, PackageGraph, PackageSet},
    PackageId,
    };
    [4.204]
    [4.295]
    use guppy::PackageId;
    use indexmap::IndexMap;
  • replacement in src/visualize/graph.rs at line 8
    [4.315][4.315:352](),[4.352][4.390:421]()
    package_set: &'graph PackageSet,
    timings: &timings::Output,
    [4.315]
    [4.352]
    annotations: &'graph Annotations,
  • replacement in src/visualize/graph.rs at line 10
    [4.428][4.428:540]()
    let link_index: HashMap<&PackageId, usize> = package_set
    .package_ids(DependencyDirection::Forward)
    [4.428]
    [4.540]
    let link_index: IndexMap<&PackageId, usize> = annotations
    .packages()
  • replacement in src/visualize/graph.rs at line 13
    [4.561][4.561:601]()
    .map(|(index, id)| (id, index))
    [4.561]
    [4.601]
    .map(|(index, id)| (*id, index))
  • replacement in src/visualize/graph.rs at line 16
    [4.622][4.622:821]()
    let edges = package_set
    .links(DependencyDirection::Forward)
    .map(|link| (link.from(), link.to()))
    .map(|(from, to)| (link_index.get(from.id()), link_index.get(to.id())))
    [4.622]
    [4.821]
    let edges = annotations
    .links()
    .map(|(from, to)| (link_index.get(from), link_index.get(to)))
  • replacement in src/visualize/graph.rs at line 22
    [4.932][4.932:1011](),[4.1011][4.422:480]()
    let sizes = package_set
    .package_ids(DependencyDirection::Forward)
    .map(|id| timings.pkg_time(id).unwrap_or(0_f64));
    [4.932]
    [4.1036]
    let sizes = link_index.keys().map(|id| {
    annotations
    .variable(id, Variable::UnitDuration, Measurement::Exact)
    .unwrap_or(0_f64)
    });
  • edit in src/visualize/graph.rs at line 35
    [4.1215][4.1215:1395]()
    // TODO: validate that PackageSet::package_ids() is stable; it is used twice (link_index, here)
    // so need to validate ordering is consistent (or, find a cleaner solution)
  • replacement in src/visualize/graph.rs at line 38
    [4.1442][4.1442:1548]()
    .zip(package_set.package_ids(DependencyDirection::Forward))
    .map(|(node, id)| (id, node))
    [4.1442]
    [4.1548]
    .zip(annotations.packages())
    .map(|(node, id)| (*id, node))
  • replacement in src/visualize/graph.rs at line 42
    [4.1551][4.262:356]()
    pub fn data(graph: &PackageGraph, timings: &timings::Output) -> charming::series::GraphData {
    [4.1551]
    [4.1618]
    pub fn data(annotations: &Annotations) -> charming::series::GraphData {
  • replacement in src/visualize/graph.rs at line 46
    [4.1725][4.1725:1768](),[4.1768][4.481:529]()
    let package_set = graph.resolve_all();
    let layout = layout(&package_set, timings);
    [4.1725]
    [4.1807]
    let layout = layout(annotations);
  • replacement in src/visualize/graph.rs at line 50
    [4.385][4.385:708]()
    let unit_time = if let Some(duration) = timings.pkg_time(id) {
    duration
    } else {
    // TODO: once using the resolved crate graph, `None` should never appear
    println!("Queried node outside cargo's unit graph: {id}");
    0_f64
    };
    [4.385]
    [4.708]
    let unit_time = annotations
    .variable(id, Variable::UnitDuration, Measurement::Exact)
    .unwrap_or(0_f64);
  • replacement in src/visualize/graph.rs at line 57
    [4.875][4.875:945]()
    name: graph.metadata(id).unwrap().name().to_string(),
    [4.875]
    [4.945]
    name: annotations.metadata(id).unwrap().name().to_string(),
  • replacement in src/visualize/graph.rs at line 68
    [4.2190][4.2190:2376]()
    let links = graph
    .query_forward(graph.package_ids())
    .unwrap()
    .resolve()
    .links(DependencyDirection::Forward)
    .map(|link| link.endpoints())
    [4.2190]
    [4.2376]
    let links = annotations
    .links()
  • replacement in src/visualize/graph.rs at line 71
    [4.2438][4.2438:2542]()
    source: source.id().repr().to_string(),
    target: target.id().repr().to_string(),
    [4.2438]
    [4.2542]
    source: source.repr().to_string(),
    target: target.repr().to_string(),
  • replacement in src/main.rs at line 3
    [4.63][4.2712:2725](),[4.2712][4.2712:2725]()
    mod timings;
    [4.2712]
    [4.1121]
    mod annotations;
  • edit in src/main.rs at line 6
    [4.1078]
    [4.706]
    pub use annotations::{Annotations, Measurement, Variable};
  • replacement in src/main.rs at line 11
    [2.387][2.387:440]()
    let timings = timings::Output::new(source_data);
    [2.387]
    [4.2757]
    let timings = annotations::timings::Output::new(source_data);
  • replacement in src/main.rs at line 16
    [4.897][3.1291:1370]()
    visualize::for_style(visualize::Style::Treemap, &package_graph, &timings);
    [4.897]
    [4.924]
    let annotations = annotations::Annotations::new(&package_graph, timings.repr);
    visualize::for_style(visualize::Style::Treemap, &annotations);
  • file addition: annotations (d--r------)
    [4.15]
  • file move: timings.rs (----------)timings.rs (----------)
    [0.3345]
    [4.35]
  • replacement in src/annotations/timings.rs at line 78
    [4.166][4.166:210]()
    repr: HashMap<PackageId, Vec<Message>>,
    [4.166]
    [4.210]
    pub repr: HashMap<PackageId, Vec<Message>>,
  • edit in src/annotations/timings.rs at line 143
    [4.2251][4.0:181](),[4.181][3.1116:1225](),[3.1225][4.3272:3632](),[4.181][4.3272:3632](),[4.181][4.2689:2695](),[4.2251][4.2689:2695](),[4.3632][4.2689:2695](),[4.2689][4.2689:2695](),[4.2695][3.1226:1284]()
    }
    pub fn pkg_time(&self, pkg: &PackageId) -> Option<f64> {
    self.repr
    .get(pkg)
    .map(|timings| timings.iter().map(|msg| msg.duration).sum())
    }
    pub fn pkg_messages(&self, pkg: &PackageId) -> Option<&Vec<Message>> {
    self.repr.get(pkg)
    }
    // TODO: this returns each total package time, but it would be interesting to filter by
    // crate type (lib, binary, proc_macro), target, build script runs etc
    pub fn pkg_times<'s>(&'s self) -> impl Iterator<Item = f64> + 's {
    self.repr
    .values()
    .map(|messages| messages.iter().map(|msg| msg.duration).sum())
    }
    pub fn len(&self) -> usize {
    self.repr.len()
  • file addition: mod.rs (----------)
    [0.3345]
    use std::collections::HashMap;
    use guppy::graph::{DependencyDirection, PackageGraph, PackageMetadata};
    use guppy::PackageId;
    use indexmap::IndexMap;
    use petgraph::data::{Element, FromElements};
    use petgraph::graph::NodeIndex;
    use petgraph::matrix_graph::Zero;
    use petgraph::{Direction, Graph};
    pub mod timings;
    #[derive(Debug, Clone, Copy)]
    pub enum Measurement {
    Relative,
    Exact,
    }
    #[derive(Debug, Clone, Copy)]
    pub enum Variable {
    UnitDuration,
    TotalDuration,
    }
    #[derive(Debug, Clone)]
    pub struct Node<'graph> {
    id: &'graph PackageId,
    timings: Option<Vec<timings::Message>>,
    }
    impl<'graph> Node<'graph> {
    pub fn timings(&self) -> Option<&Vec<timings::Message>> {
    self.timings.as_ref()
    }
    }
    #[derive(Debug, Clone)]
    pub struct Annotations<'graph> {
    package_graph: &'graph PackageGraph,
    graph: Graph<Node<'graph>, ()>,
    node_indices: IndexMap<&'graph PackageId, usize>,
    }
    impl<'graph> Annotations<'graph> {
    pub fn new(
    package_graph: &'graph PackageGraph,
    mut timings: HashMap<PackageId, Vec<timings::Message>>,
    ) -> Self {
    let node_indices = package_graph
    .package_ids()
    .enumerate()
    .map(|(index, id)| (id, index))
    .collect::<IndexMap<&PackageId, usize>>();
    // Iterate over node_indicies.keys() to preserve ordering
    let nodes = node_indices.keys().map(|id| Element::Node {
    weight: Node {
    id,
    timings: timings.remove(id),
    },
    });
    let package_set = package_graph.resolve_all();
    let edges = package_set
    .links(DependencyDirection::Forward)
    .map(|link| Element::Edge {
    source: *node_indices.get(link.from().id()).unwrap(),
    target: *node_indices.get(link.to().id()).unwrap(),
    weight: (),
    });
    let graph = Graph::from_elements(nodes.chain(edges));
    Self {
    package_graph,
    graph,
    node_indices,
    }
    }
    pub fn root_packages(&self) -> Vec<PackageMetadata> {
    self.package_graph
    .resolve_all()
    .root_packages(DependencyDirection::Forward)
    .collect::<Vec<_>>()
    }
    fn node_index(&self, id: &PackageId) -> Option<NodeIndex> {
    let node_index = *self.node_indices.get(id)?;
    Some(NodeIndex::new(node_index))
    }
    pub fn pkg(&self, id: &PackageId) -> Option<&Node> {
    let node_index = self.node_index(id)?;
    Some(&self.graph[node_index])
    }
    pub fn packages(&self) -> impl Iterator<Item = &&PackageId> {
    self.node_indices.keys()
    }
    pub fn links(&self) -> impl Iterator<Item = (&PackageId, &PackageId)> {
    self.graph
    .raw_edges()
    .iter()
    .map(|edge| (self.graph[edge.source()].id, self.graph[edge.target()].id))
    }
    pub fn metadata(&self, id: &PackageId) -> Result<PackageMetadata, guppy::Error> {
    self.package_graph.metadata(id)
    }
    pub fn dependents(&self, id: &PackageId) -> Option<usize> {
    let node_index = self.node_index(id)?;
    let dependents = self
    .graph
    .edges_directed(node_index, Direction::Incoming)
    .count();
    Some(dependents)
    }
    pub fn variable(
    &self,
    id: &PackageId,
    variable: Variable,
    measurement: Measurement,
    ) -> Option<f64> {
    let exact_measurement = match variable {
    Variable::UnitDuration => {
    let node = self.pkg(id)?;
    let timings = node.timings.as_ref()?;
    let durations = timings.iter().map(|msg| msg.duration);
    if id.repr().contains("windows") {
    dbg!(id, timings);
    }
    durations.sum()
    }
    Variable::TotalDuration => {
    let node_index = self.node_index(id)?;
    let dependencies = self
    .graph
    .neighbors_directed(node_index, Direction::Outgoing);
    let timings = dependencies
    .filter_map(|dep| self.graph[dep].timings.as_ref())
    .flatten()
    .map(|msg| msg.duration);
    let self_timings = self
    .variable(id, Variable::UnitDuration, measurement)
    .unwrap_or(0_f64);
    timings.sum::<f64>() + self_timings
    }
    };
    Some(match measurement {
    Measurement::Exact => exact_measurement,
    Measurement::Relative => {
    let dependents = self.dependents(id)? as f64;
    if dependents.is_zero() {
    exact_measurement
    } else {
    exact_measurement / dependents
    }
    }
    })
    }
    }
  • edit in Cargo.toml at line 11
    [4.1354]
    [4.2917]
    indexmap = "2.2.6"
    petgraph = "0.6.4"
  • edit in Cargo.lock at line 160
    [4.2087]
    [4.3022]
    "indexmap",
    "petgraph",