Compiler projects using llvm
//===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
///
/// This file implements the functionalities used by the BottleneckAnalysis
/// to report bottleneck info.
///
//===----------------------------------------------------------------------===//

#include "Views/BottleneckAnalysis.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MCA/Support.h"
#include "llvm/Support/Format.h"

namespace llvm {
namespace mca {

#define DEBUG_TYPE "llvm-mca"

PressureTracker::PressureTracker(const MCSchedModel &Model)
    : SM(Model),
      ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
      ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
      ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
      ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
  computeProcResourceMasks(SM, ProcResID2Mask);

  // Ignore the invalid resource at index zero.
  unsigned NextResourceUsersIdx = 0;
  for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
    const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
    ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
    NextResourceUsersIdx += ProcResource.NumUnits;
    uint64_t ResourceMask = ProcResID2Mask[I];
    ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
  }

  ResourceUsers.resize(NextResourceUsersIdx);
  std::fill(ResourceUsers.begin(), ResourceUsers.end(),
            std::make_pair<unsigned, unsigned>(~0U, 0U));
}

void PressureTracker::getResourceUsers(uint64_t ResourceMask,
                                       SmallVectorImpl<User> &Users) const {
  unsigned Index = getResourceStateIndex(ResourceMask);
  unsigned ProcResID = ResIdx2ProcResID[Index];
  const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
  for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
    const User U = getResourceUser(ProcResID, I);
    if (U.second && IPI.find(U.first) != IPI.end())
      Users.emplace_back(U);
  }
}

void PressureTracker::onInstructionDispatched(unsigned IID) {
  IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
}

void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }

void PressureTracker::handleInstructionIssuedEvent(
    const HWInstructionIssuedEvent &Event) {
  unsigned IID = Event.IR.getSourceIndex();
  for (const ResourceUse &Use : Event.UsedResources) {
    const ResourceRef &RR = Use.first;
    unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
    Index += countTrailingZeros(RR.second);
    ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
  }
}

void PressureTracker::updateResourcePressureDistribution(
    uint64_t CumulativeMask) {
  while (CumulativeMask) {
    uint64_t Current = CumulativeMask & (-CumulativeMask);
    unsigned ResIdx = getResourceStateIndex(Current);
    unsigned ProcResID = ResIdx2ProcResID[ResIdx];
    uint64_t Mask = ProcResID2Mask[ProcResID];

    if (Mask == Current) {
      ResourcePressureDistribution[ProcResID]++;
      CumulativeMask ^= Current;
      continue;
    }

    Mask ^= Current;
    while (Mask) {
      uint64_t SubUnit = Mask & (-Mask);
      ResIdx = getResourceStateIndex(SubUnit);
      ProcResID = ResIdx2ProcResID[ResIdx];
      ResourcePressureDistribution[ProcResID]++;
      Mask ^= SubUnit;
    }

    CumulativeMask ^= Current;
  }
}

void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
  assert(Event.Reason != HWPressureEvent::INVALID &&
         "Unexpected invalid event!");

  switch (Event.Reason) {
  default:
    break;

  case HWPressureEvent::RESOURCES: {
    const uint64_t ResourceMask = Event.ResourceMask;
    updateResourcePressureDistribution(Event.ResourceMask);

    for (const InstRef &IR : Event.AffectedInstructions) {
      const Instruction &IS = *IR.getInstruction();
      unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
      if (!BusyResources)
        continue;

      unsigned IID = IR.getSourceIndex();
      IPI[IID].ResourcePressureCycles++;
    }
    break;
  }

  case HWPressureEvent::REGISTER_DEPS:
    for (const InstRef &IR : Event.AffectedInstructions) {
      unsigned IID = IR.getSourceIndex();
      IPI[IID].RegisterPressureCycles++;
    }
    break;

  case HWPressureEvent::MEMORY_DEPS:
    for (const InstRef &IR : Event.AffectedInstructions) {
      unsigned IID = IR.getSourceIndex();
      IPI[IID].MemoryPressureCycles++;
    }
  }
}

#ifndef NDEBUG
void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
                                         const DependencyEdge &DepEdge,
                                         MCInstPrinter &MCIP) const {
  unsigned FromIID = DepEdge.FromIID;
  unsigned ToIID = DepEdge.ToIID;
  assert(FromIID < ToIID && "Graph should be acyclic!");

  const DependencyEdge::Dependency &DE = DepEdge.Dep;
  assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");

  OS << " FROM: " << FromIID << " TO: " << ToIID << "             ";
  if (DE.Type == DependencyEdge::DT_REGISTER) {
    OS << " - REGISTER: ";
    MCIP.printRegName(OS, DE.ResourceOrRegID);
  } else if (DE.Type == DependencyEdge::DT_MEMORY) {
    OS << " - MEMORY";
  } else {
    assert(DE.Type == DependencyEdge::DT_RESOURCE &&
           "Unsupported dependency type!");
    OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
  }
  OS << " - COST: " << DE.Cost << '\n';
}
#endif // NDEBUG

void DependencyGraph::pruneEdges(unsigned Iterations) {
  for (DGNode &N : Nodes) {
    unsigned NumPruned = 0;
    const unsigned Size = N.OutgoingEdges.size();
    // Use a cut-off threshold to prune edges with a low frequency.
    for (unsigned I = 0, E = Size; I < E; ++I) {
      DependencyEdge &Edge = N.OutgoingEdges[I];
      if (Edge.Frequency == Iterations)
        continue;
      double Factor = (double)Edge.Frequency / Iterations;
      if (0.10 < Factor)
        continue;
      Nodes[Edge.ToIID].NumPredecessors--;
      std::swap(Edge, N.OutgoingEdges[E - 1]);
      --E;
      ++NumPruned;
    }

    if (NumPruned)
      N.OutgoingEdges.resize(Size - NumPruned);
  }
}

void DependencyGraph::initializeRootSet(
    SmallVectorImpl<unsigned> &RootSet) const {
  for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
    const DGNode &N = Nodes[I];
    if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
      RootSet.emplace_back(I);
  }
}

void DependencyGraph::propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet,
                                            unsigned Iterations) {
  SmallVector<unsigned, 8> ToVisit;

  // A critical sequence is computed as the longest path from a node of the
  // RootSet to a leaf node (i.e. a node with no successors).  The RootSet is
  // composed of nodes with at least one successor, and no predecessors.
  //
  // Each node of the graph starts with an initial default cost of zero.  The
  // cost of a node is a measure of criticality: the higher the cost, the bigger
  // is the performance impact.
  // For register and memory dependencies, the cost is a function of the write
  // latency as well as the actual delay (in cycles) caused to users.
  // For processor resource dependencies, the cost is a function of the resource
  // pressure. Resource interferences with low frequency values are ignored.
  //
  // This algorithm is very similar to a (reverse) Dijkstra.  Every iteration of
  // the inner loop selects (i.e. visits) a node N from a set of `unvisited
  // nodes`, and then propagates the cost of N to all its neighbors.
  //
  // The `unvisited nodes` set initially contains all the nodes from the
  // RootSet.  A node N is added to the `unvisited nodes` if all its
  // predecessors have been visited already.
  //
  // For simplicity, every node tracks the number of unvisited incoming edges in
  // field `NumVisitedPredecessors`.  When the value of that field drops to
  // zero, then the corresponding node is added to a `ToVisit` set.
  //
  // At the end of every iteration of the outer loop, set `ToVisit` becomes our
  // new `unvisited nodes` set.
  //
  // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
  // is empty. This algorithm works under the assumption that the graph is
  // acyclic.
  do {
    for (unsigned IID : RootSet) {
      const DGNode &N = Nodes[IID];
      for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
        unsigned ToIID = DepEdge.ToIID;
        DGNode &To = Nodes[ToIID];
        uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
        // Check if this is the most expensive incoming edge seen so far.  In
        // case, update the total cost of the destination node (ToIID), as well
        // its field `CriticalPredecessor`.
        if (Cost > To.Cost) {
          To.CriticalPredecessor = DepEdge;
          To.Cost = Cost;
          To.Depth = N.Depth + 1;
        }
        To.NumVisitedPredecessors++;
        if (To.NumVisitedPredecessors == To.NumPredecessors)
          ToVisit.emplace_back(ToIID);
      }
    }

    std::swap(RootSet, ToVisit);
    ToVisit.clear();
  } while (!RootSet.empty());
}

void DependencyGraph::getCriticalSequence(
    SmallVectorImpl<const DependencyEdge *> &Seq) const {
  // At this stage, nodes of the graph have been already visited, and costs have
  // been propagated through the edges (see method `propagateThroughEdges()`).

  // Identify the node N with the highest cost in the graph. By construction,
  // that node is the last instruction of our critical sequence.
  // Field N.Depth would tell us the total length of the sequence.
  //
  // To obtain the sequence of critical edges, we simply follow the chain of
  // critical predecessors starting from node N (field
  // DGNode::CriticalPredecessor).
  const auto It = std::max_element(
      Nodes.begin(), Nodes.end(),
      [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; });
  unsigned IID = std::distance(Nodes.begin(), It);
  Seq.resize(Nodes[IID].Depth);
  for (const DependencyEdge *&DE : llvm::reverse(Seq)) {
    const DGNode &N = Nodes[IID];
    DE = &N.CriticalPredecessor;
    IID = N.CriticalPredecessor.FromIID;
  }
}

void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS,
                                          const MCInst &MCI,
                                          bool UseDifferentColor) const {
  FOS.PadToColumn(14);
  if (UseDifferentColor)
    FOS.changeColor(raw_ostream::CYAN, true, false);
  FOS << printInstructionString(MCI);
  if (UseDifferentColor)
    FOS.resetColor();
}

void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
  // Early exit if no bottlenecks were found during the simulation.
  if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
    return;

  SmallVector<const DependencyEdge *, 16> Seq;
  DG.getCriticalSequence(Seq);
  if (Seq.empty())
    return;

  OS << "\nCritical sequence based on the simulation:\n\n";

  const DependencyEdge &FirstEdge = *Seq[0];
  ArrayRef<llvm::MCInst> Source = getSource();
  unsigned FromIID = FirstEdge.FromIID % Source.size();
  unsigned ToIID = FirstEdge.ToIID % Source.size();
  bool IsLoopCarried = FromIID >= ToIID;

  formatted_raw_ostream FOS(OS);
  FOS.PadToColumn(14);
  FOS << "Instruction";
  FOS.PadToColumn(58);
  FOS << "Dependency Information";

  bool HasColors = FOS.has_colors();

  unsigned CurrentIID = 0;
  if (IsLoopCarried) {
    FOS << "\n +----< " << FromIID << ".";
    printInstruction(FOS, Source[FromIID], HasColors);
    FOS << "\n |\n |    < loop carried > \n |";
  } else {
    while (CurrentIID < FromIID) {
      FOS << "\n        " << CurrentIID << ".";
      printInstruction(FOS, Source[CurrentIID]);
      CurrentIID++;
    }

    FOS << "\n +----< " << CurrentIID << ".";
    printInstruction(FOS, Source[CurrentIID], HasColors);
    CurrentIID++;
  }

  for (const DependencyEdge *&DE : Seq) {
    ToIID = DE->ToIID % Source.size();
    unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;

    while (CurrentIID < LastIID) {
      FOS << "\n |      " << CurrentIID << ".";
      printInstruction(FOS, Source[CurrentIID]);
      CurrentIID++;
    }

    if (CurrentIID == ToIID) {
      FOS << "\n +----> " << ToIID << ".";
      printInstruction(FOS, Source[CurrentIID], HasColors);
    } else {
      FOS << "\n |\n |    < loop carried > \n |"
          << "\n +----> " << ToIID << ".";
      printInstruction(FOS, Source[ToIID], HasColors);
    }
    FOS.PadToColumn(58);

    const DependencyEdge::Dependency &Dep = DE->Dep;
    if (HasColors)
      FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);

    if (Dep.Type == DependencyEdge::DT_REGISTER) {
      FOS << "## REGISTER dependency:  ";
      if (HasColors)
        FOS.changeColor(raw_ostream::MAGENTA, true, false);
      getInstPrinter().printRegName(FOS, Dep.ResourceOrRegID);
    } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
      FOS << "## MEMORY dependency.";
    } else {
      assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
             "Unsupported dependency type!");
      FOS << "## RESOURCE interference:  ";
      if (HasColors)
        FOS.changeColor(raw_ostream::MAGENTA, true, false);
      FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
      if (HasColors) {
        FOS.resetColor();
        FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
      }
      FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
          << "% ]";
    }
    if (HasColors)
      FOS.resetColor();
    ++CurrentIID;
  }

  while (CurrentIID < Source.size()) {
    FOS << "\n        " << CurrentIID << ".";
    printInstruction(FOS, Source[CurrentIID]);
    CurrentIID++;
  }

  FOS << '\n';
  FOS.flush();
}

#ifndef NDEBUG
void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
  OS << "\nREG DEPS\n";
  for (const DGNode &Node : Nodes)
    for (const DependencyEdge &DE : Node.OutgoingEdges)
      if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
        dumpDependencyEdge(OS, DE, MCIP);

  OS << "\nMEM DEPS\n";
  for (const DGNode &Node : Nodes)
    for (const DependencyEdge &DE : Node.OutgoingEdges)
      if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
        dumpDependencyEdge(OS, DE, MCIP);

  OS << "\nRESOURCE DEPS\n";
  for (const DGNode &Node : Nodes)
    for (const DependencyEdge &DE : Node.OutgoingEdges)
      if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
        dumpDependencyEdge(OS, DE, MCIP);
}
#endif // NDEBUG

void DependencyGraph::addDependency(unsigned From, unsigned To,
                                    DependencyEdge::Dependency &&Dep) {
  DGNode &NodeFrom = Nodes[From];
  DGNode &NodeTo = Nodes[To];
  SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;

  auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) {
    return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
  });

  if (It != Vec.end()) {
    It->Dep.Cost += Dep.Cost;
    It->Frequency++;
    return;
  }

  DependencyEdge DE = {Dep, From, To, 1};
  Vec.emplace_back(DE);
  NodeTo.NumPredecessors++;
}

BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
                                       MCInstPrinter &Printer,
                                       ArrayRef<MCInst> S, unsigned NumIter)
    : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()),
      DG(S.size() * 3), Iterations(NumIter), TotalCycles(0),
      PressureIncreasedBecauseOfResources(false),
      PressureIncreasedBecauseOfRegisterDependencies(false),
      PressureIncreasedBecauseOfMemoryDependencies(false),
      SeenStallCycles(false), BPI() {}

void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
                                        unsigned RegID, unsigned Cost) {
  bool IsLoopCarried = From >= To;
  unsigned SourceSize = getSource().size();
  if (IsLoopCarried) {
    DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
    DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
    return;
  }
  DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost);
}

void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
                                      unsigned Cost) {
  bool IsLoopCarried = From >= To;
  unsigned SourceSize = getSource().size();
  if (IsLoopCarried) {
    DG.addMemoryDep(From, To + SourceSize, Cost);
    DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
    return;
  }
  DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost);
}

void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
                                        uint64_t Mask, unsigned Cost) {
  bool IsLoopCarried = From >= To;
  unsigned SourceSize = getSource().size();
  if (IsLoopCarried) {
    DG.addResourceDep(From, To + SourceSize, Mask, Cost);
    DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
    return;
  }
  DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost);
}

void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
  const unsigned IID = Event.IR.getSourceIndex();
  if (Event.Type == HWInstructionEvent::Dispatched) {
    Tracker.onInstructionDispatched(IID);
    return;
  }
  if (Event.Type == HWInstructionEvent::Executed) {
    Tracker.onInstructionExecuted(IID);
    return;
  }

  if (Event.Type != HWInstructionEvent::Issued)
    return;

  ArrayRef<llvm::MCInst> Source = getSource();
  const Instruction &IS = *Event.IR.getInstruction();
  unsigned To = IID % Source.size();

  unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
  uint64_t ResourceMask = IS.getCriticalResourceMask();
  SmallVector<std::pair<unsigned, unsigned>, 4> Users;
  while (ResourceMask) {
    uint64_t Current = ResourceMask & (-ResourceMask);
    Tracker.getResourceUsers(Current, Users);
    for (const std::pair<unsigned, unsigned> &U : Users)
      addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
    Users.clear();
    ResourceMask ^= Current;
  }

  const CriticalDependency &RegDep = IS.getCriticalRegDep();
  if (RegDep.Cycles) {
    Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
    unsigned From = RegDep.IID % Source.size();
    addRegisterDep(From, To, RegDep.RegID, Cycles);
  }

  const CriticalDependency &MemDep = IS.getCriticalMemDep();
  if (MemDep.Cycles) {
    Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
    unsigned From = MemDep.IID % Source.size();
    addMemoryDep(From, To, Cycles);
  }

  Tracker.handleInstructionIssuedEvent(
      static_cast<const HWInstructionIssuedEvent &>(Event));

  // Check if this is the last simulated instruction.
  if (IID == ((Iterations * Source.size()) - 1))
    DG.finalizeGraph(Iterations);
}

void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
  assert(Event.Reason != HWPressureEvent::INVALID &&
         "Unexpected invalid event!");

  Tracker.handlePressureEvent(Event);

  switch (Event.Reason) {
  default:
    break;

  case HWPressureEvent::RESOURCES:
    PressureIncreasedBecauseOfResources = true;
    break;
  case HWPressureEvent::REGISTER_DEPS:
    PressureIncreasedBecauseOfRegisterDependencies = true;
    break;
  case HWPressureEvent::MEMORY_DEPS:
    PressureIncreasedBecauseOfMemoryDependencies = true;
    break;
  }
}

void BottleneckAnalysis::onCycleEnd() {
  ++TotalCycles;

  bool PressureIncreasedBecauseOfDataDependencies =
      PressureIncreasedBecauseOfRegisterDependencies ||
      PressureIncreasedBecauseOfMemoryDependencies;
  if (!PressureIncreasedBecauseOfResources &&
      !PressureIncreasedBecauseOfDataDependencies)
    return;

  ++BPI.PressureIncreaseCycles;
  if (PressureIncreasedBecauseOfRegisterDependencies)
    ++BPI.RegisterDependencyCycles;
  if (PressureIncreasedBecauseOfMemoryDependencies)
    ++BPI.MemoryDependencyCycles;
  if (PressureIncreasedBecauseOfDataDependencies)
    ++BPI.DataDependencyCycles;
  if (PressureIncreasedBecauseOfResources)
    ++BPI.ResourcePressureCycles;
  PressureIncreasedBecauseOfResources = false;
  PressureIncreasedBecauseOfRegisterDependencies = false;
  PressureIncreasedBecauseOfMemoryDependencies = false;
}

void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
  if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
    OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
    return;
  }

  double PressurePerCycle =
      (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
  double ResourcePressurePerCycle =
      (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
  double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
  double RegDepPressurePerCycle =
      (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
  double MemDepPressurePerCycle =
      (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;

  OS << "\n\nCycles with backend pressure increase [ "
     << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";

  OS << "\nThroughput Bottlenecks: "
     << "\n  Resource Pressure       [ "
     << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
     << "% ]";

  if (BPI.PressureIncreaseCycles) {
    ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
    const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
    for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
      unsigned ResourceCycles = Distribution[I];
      if (ResourceCycles) {
        double Frequency = (double)ResourceCycles * 100 / TotalCycles;
        const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
        OS << "\n  - " << PRDesc.Name << "  [ "
           << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
      }
    }
  }

  OS << "\n  Data Dependencies:      [ "
     << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
  OS << "\n  - Register Dependencies [ "
     << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
     << "% ]";
  OS << "\n  - Memory Dependencies   [ "
     << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
     << "% ]\n";
}

void BottleneckAnalysis::printView(raw_ostream &OS) const {
  std::string Buffer;
  raw_string_ostream TempStream(Buffer);
  printBottleneckHints(TempStream);
  TempStream.flush();
  OS << Buffer;
  printCriticalSequence(OS);
}

} // namespace mca.
} // namespace llvm