#include "llvm/Transforms/Utils/SampleProfileInference.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include <queue>
#include <set>
#include <stack>
using namespace llvm;
#define DEBUG_TYPE "sample-profile-inference"
namespace {
static cl::opt<bool> SampleProfileEvenCountDistribution(
"sample-profile-even-count-distribution", cl::init(true), cl::Hidden,
cl::desc("Try to evenly distribute counts when there are multiple equally "
"likely options."));
static cl::opt<unsigned> SampleProfileMaxDfsCalls(
"sample-profile-max-dfs-calls", cl::init(10), cl::Hidden,
cl::desc("Maximum number of dfs iterations for even count distribution."));
static cl::opt<unsigned> SampleProfileProfiCostInc(
"sample-profile-profi-cost-inc", cl::init(10), cl::Hidden,
cl::desc("A cost of increasing a block's count by one."));
static cl::opt<unsigned> SampleProfileProfiCostDec(
"sample-profile-profi-cost-dec", cl::init(20), cl::Hidden,
cl::desc("A cost of decreasing a block's count by one."));
static cl::opt<unsigned> SampleProfileProfiCostIncZero(
"sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden,
cl::desc("A cost of increasing a count of zero-weight block by one."));
static cl::opt<unsigned> SampleProfileProfiCostIncEntry(
"sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden,
cl::desc("A cost of increasing the entry block's count by one."));
static cl::opt<unsigned> SampleProfileProfiCostDecEntry(
"sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden,
cl::desc("A cost of decreasing the entry block's count by one."));
static constexpr int64_t INF = ((int64_t)1) << 50;
class MinCostMaxFlow {
public:
void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
Source = SourceNode;
Target = SinkNode;
Nodes = std::vector<Node>(NodeCount);
Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
if (SampleProfileEvenCountDistribution)
AugmentingEdges =
std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
}
int64_t run() {
size_t AugmentationIters = applyFlowAugmentation();
int64_t TotalCost = 0;
int64_t TotalFlow = 0;
for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
for (auto &Edge : Edges[Src]) {
if (Edge.Flow > 0) {
TotalCost += Edge.Cost * Edge.Flow;
if (Src == Source)
TotalFlow += Edge.Flow;
}
}
}
LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
<< " iterations with " << TotalFlow << " total flow"
<< " of " << TotalCost << " cost\n");
(void)TotalFlow;
(void)AugmentationIters;
return TotalCost;
}
void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
assert(Capacity > 0 && "adding an edge of zero capacity");
assert(Src != Dst && "loop edge are not supported");
Edge SrcEdge;
SrcEdge.Dst = Dst;
SrcEdge.Cost = Cost;
SrcEdge.Capacity = Capacity;
SrcEdge.Flow = 0;
SrcEdge.RevEdgeIndex = Edges[Dst].size();
Edge DstEdge;
DstEdge.Dst = Src;
DstEdge.Cost = -Cost;
DstEdge.Capacity = 0;
DstEdge.Flow = 0;
DstEdge.RevEdgeIndex = Edges[Src].size();
Edges[Src].push_back(SrcEdge);
Edges[Dst].push_back(DstEdge);
}
void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
addEdge(Src, Dst, INF, Cost);
}
const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
std::vector<std::pair<uint64_t, int64_t>> Flow;
for (auto &Edge : Edges[Src]) {
if (Edge.Flow > 0)
Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
}
return Flow;
}
int64_t getFlow(uint64_t Src, uint64_t Dst) const {
int64_t Flow = 0;
for (auto &Edge : Edges[Src]) {
if (Edge.Dst == Dst) {
Flow += Edge.Flow;
}
}
return Flow;
}
static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
static constexpr uint64_t MinBaseDistance = 10000;
private:
size_t applyFlowAugmentation() {
size_t AugmentationIters = 0;
while (findAugmentingPath()) {
uint64_t PathCapacity = computeAugmentingPathCapacity();
while (PathCapacity > 0) {
bool Progress = false;
if (SampleProfileEvenCountDistribution) {
identifyShortestEdges(PathCapacity);
auto AugmentingOrder = findAugmentingDAG();
Progress = augmentFlowAlongDAG(AugmentingOrder);
PathCapacity = computeAugmentingPathCapacity();
}
if (!Progress) {
augmentFlowAlongPath(PathCapacity);
PathCapacity = 0;
}
AugmentationIters++;
}
}
return AugmentationIters;
}
uint64_t computeAugmentingPathCapacity() {
uint64_t PathCapacity = INF;
uint64_t Now = Target;
while (Now != Source) {
uint64_t Pred = Nodes[Now].ParentNode;
auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
PathCapacity = std::min(PathCapacity, EdgeCapacity);
Now = Pred;
}
return PathCapacity;
}
bool findAugmentingPath() {
for (auto &Node : Nodes) {
Node.Distance = INF;
Node.ParentNode = uint64_t(-1);
Node.ParentEdgeIndex = uint64_t(-1);
Node.Taken = false;
}
std::queue<uint64_t> Queue;
Queue.push(Source);
Nodes[Source].Distance = 0;
Nodes[Source].Taken = true;
while (!Queue.empty()) {
uint64_t Src = Queue.front();
Queue.pop();
Nodes[Src].Taken = false;
if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0)
break;
if (Nodes[Src].Distance > Nodes[Target].Distance)
continue;
for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
auto &Edge = Edges[Src][EdgeIdx];
if (Edge.Flow < Edge.Capacity) {
uint64_t Dst = Edge.Dst;
int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
if (Nodes[Dst].Distance > NewDistance) {
Nodes[Dst].Distance = NewDistance;
Nodes[Dst].ParentNode = Src;
Nodes[Dst].ParentEdgeIndex = EdgeIdx;
if (!Nodes[Dst].Taken) {
Queue.push(Dst);
Nodes[Dst].Taken = true;
}
}
}
}
}
return Nodes[Target].Distance != INF;
}
void augmentFlowAlongPath(uint64_t PathCapacity) {
assert(PathCapacity > 0 && "found an incorrect augmenting path");
uint64_t Now = Target;
while (Now != Source) {
uint64_t Pred = Nodes[Now].ParentNode;
auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
Edge.Flow += PathCapacity;
RevEdge.Flow -= PathCapacity;
Now = Pred;
}
}
std::vector<uint64_t> findAugmentingDAG() {
typedef std::pair<uint64_t, uint64_t> StackItemType;
std::stack<StackItemType> Stack;
std::vector<uint64_t> AugmentingOrder;
for (auto &Node : Nodes) {
Node.Discovery = 0;
Node.Finish = 0;
Node.NumCalls = 0;
Node.Taken = false;
}
uint64_t Time = 0;
Nodes[Target].Taken = true;
Stack.emplace(Source, 0);
Nodes[Source].Discovery = ++Time;
while (!Stack.empty()) {
auto NodeIdx = Stack.top().first;
auto EdgeIdx = Stack.top().second;
if (EdgeIdx < Edges[NodeIdx].size()) {
auto &Edge = Edges[NodeIdx][EdgeIdx];
auto &Dst = Nodes[Edge.Dst];
Stack.top().second++;
if (Edge.OnShortestPath) {
if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) {
Dst.Discovery = ++Time;
Stack.emplace(Edge.Dst, 0);
Dst.NumCalls++;
} else if (Dst.Taken && Dst.Finish != 0) {
Nodes[NodeIdx].Taken = true;
}
}
} else {
Stack.pop();
if (!Nodes[NodeIdx].Taken) {
Nodes[NodeIdx].Discovery = 0;
} else {
Nodes[NodeIdx].Finish = ++Time;
if (NodeIdx != Source) {
assert(!Stack.empty() && "empty stack while running dfs");
Nodes[Stack.top().first].Taken = true;
}
AugmentingOrder.push_back(NodeIdx);
}
}
}
std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
for (size_t Src : AugmentingOrder) {
AugmentingEdges[Src].clear();
for (auto &Edge : Edges[Src]) {
uint64_t Dst = Edge.Dst;
if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
Nodes[Dst].Finish < Nodes[Src].Finish) {
AugmentingEdges[Src].push_back(&Edge);
}
}
assert((Src == Target || !AugmentingEdges[Src].empty()) &&
"incorrectly constructed augmenting edges");
}
return AugmentingOrder;
}
bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
for (uint64_t Src : AugmentingOrder) {
Nodes[Src].FracFlow = 0;
Nodes[Src].IntFlow = 0;
for (auto &Edge : AugmentingEdges[Src]) {
Edge->AugmentedFlow = 0;
}
}
uint64_t MaxFlowAmount = INF;
Nodes[Source].FracFlow = 1.0;
for (uint64_t Src : AugmentingOrder) {
assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
"incorrectly computed fractional flow");
uint64_t Degree = AugmentingEdges[Src].size();
for (auto &Edge : AugmentingEdges[Src]) {
double EdgeFlow = Nodes[Src].FracFlow / Degree;
Nodes[Edge->Dst].FracFlow += EdgeFlow;
if (Edge->Capacity == INF)
continue;
uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
}
}
if (MaxFlowAmount == 0)
return false;
Nodes[Source].IntFlow = MaxFlowAmount;
for (uint64_t Src : AugmentingOrder) {
if (Src == Target)
break;
uint64_t Degree = AugmentingEdges[Src].size();
uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
for (auto &Edge : AugmentingEdges[Src]) {
uint64_t Dst = Edge->Dst;
uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
Nodes[Dst].IntFlow += EdgeFlow;
Nodes[Src].IntFlow -= EdgeFlow;
Edge->AugmentedFlow += EdgeFlow;
}
}
assert(Nodes[Target].IntFlow <= MaxFlowAmount);
Nodes[Target].IntFlow = 0;
for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
uint64_t Src = AugmentingOrder[Idx - 1];
for (auto &Edge : AugmentingEdges[Src]) {
uint64_t Dst = Edge->Dst;
if (Nodes[Dst].IntFlow == 0)
continue;
uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
Nodes[Dst].IntFlow -= EdgeFlow;
Nodes[Src].IntFlow += EdgeFlow;
Edge->AugmentedFlow -= EdgeFlow;
}
}
bool HasSaturatedEdges = false;
for (uint64_t Src : AugmentingOrder) {
assert(Src == Source || Nodes[Src].IntFlow == 0);
for (auto &Edge : AugmentingEdges[Src]) {
assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
Edge->Flow += Edge->AugmentedFlow;
RevEdge.Flow -= Edge->AugmentedFlow;
if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
HasSaturatedEdges = true;
}
}
return HasSaturatedEdges;
}
void identifyShortestEdges(uint64_t PathCapacity) {
assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
for (size_t Src = 0; Src < Nodes.size(); Src++) {
if (Nodes[Src].Distance > Nodes[Target].Distance)
continue;
for (auto &Edge : Edges[Src]) {
uint64_t Dst = Edge.Dst;
Edge.OnShortestPath =
Src != Target && Dst != Source &&
Nodes[Dst].Distance <= Nodes[Target].Distance &&
Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
Edge.Capacity > Edge.Flow &&
uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
}
}
}
struct Node {
int64_t Distance;
uint64_t ParentNode;
uint64_t ParentEdgeIndex;
bool Taken;
double FracFlow;
uint64_t IntFlow;
uint64_t Discovery;
uint64_t Finish;
uint64_t NumCalls;
};
struct Edge {
int64_t Cost;
int64_t Capacity;
int64_t Flow;
uint64_t Dst;
uint64_t RevEdgeIndex;
bool OnShortestPath;
uint64_t AugmentedFlow;
};
std::vector<Node> Nodes;
std::vector<std::vector<Edge>> Edges;
uint64_t Source;
uint64_t Target;
std::vector<std::vector<Edge *>> AugmentingEdges;
};
constexpr int64_t MinCostMaxFlow::AuxCostUnlikely;
constexpr uint64_t MinCostMaxFlow::MinBaseDistance;
class FlowAdjuster {
public:
FlowAdjuster(FlowFunction &Func) : Func(Func) {
assert(Func.Blocks[Func.Entry].isEntry() &&
"incorrect index of the entry block");
}
void run() {
joinIsolatedComponents();
rebalanceUnknownSubgraphs();
}
private:
void joinIsolatedComponents() {
auto Visited = BitVector(NumBlocks(), false);
findReachable(Func.Entry, Visited);
for (uint64_t I = 0; I < NumBlocks(); I++) {
auto &Block = Func.Blocks[I];
if (Block.Flow > 0 && !Visited[I]) {
auto Path = findShortestPath(I);
assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
"incorrectly computed path adjusting control flow");
Func.Blocks[Func.Entry].Flow += 1;
for (auto &Jump : Path) {
Jump->Flow += 1;
Func.Blocks[Jump->Target].Flow += 1;
findReachable(Jump->Target, Visited);
}
}
}
}
void findReachable(uint64_t Src, BitVector &Visited) {
if (Visited[Src])
return;
std::queue<uint64_t> Queue;
Queue.push(Src);
Visited[Src] = true;
while (!Queue.empty()) {
Src = Queue.front();
Queue.pop();
for (auto Jump : Func.Blocks[Src].SuccJumps) {
uint64_t Dst = Jump->Target;
if (Jump->Flow > 0 && !Visited[Dst]) {
Queue.push(Dst);
Visited[Dst] = true;
}
}
}
}
std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
std::vector<FlowJump *> Result;
Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
return Result;
}
std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
if (Source == Target)
return std::vector<FlowJump *>();
if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
return std::vector<FlowJump *>();
auto Distance = std::vector<int64_t>(NumBlocks(), INF);
auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
Distance[Source] = 0;
std::set<std::pair<uint64_t, uint64_t>> Queue;
Queue.insert(std::make_pair(Distance[Source], Source));
while (!Queue.empty()) {
uint64_t Src = Queue.begin()->second;
Queue.erase(Queue.begin());
if (Src == Target ||
(Func.Blocks[Src].isExit() && Target == AnyExitBlock))
break;
for (auto Jump : Func.Blocks[Src].SuccJumps) {
uint64_t Dst = Jump->Target;
int64_t JumpDist = jumpDistance(Jump);
if (Distance[Dst] > Distance[Src] + JumpDist) {
Queue.erase(std::make_pair(Distance[Dst], Dst));
Distance[Dst] = Distance[Src] + JumpDist;
Parent[Dst] = Jump;
Queue.insert(std::make_pair(Distance[Dst], Dst));
}
}
}
if (Target == AnyExitBlock) {
for (uint64_t I = 0; I < NumBlocks(); I++) {
if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
Target = I;
}
}
}
}
assert(Parent[Target] != nullptr && "a path does not exist");
std::vector<FlowJump *> Result;
uint64_t Now = Target;
while (Now != Source) {
assert(Now == Parent[Now]->Target && "incorrect parent jump");
Result.push_back(Parent[Now]);
Now = Parent[Now]->Source;
}
std::reverse(Result.begin(), Result.end());
return Result;
}
int64_t jumpDistance(FlowJump *Jump) const {
uint64_t BaseDistance =
std::max(static_cast<uint64_t>(MinCostMaxFlow::MinBaseDistance),
std::min(Func.Blocks[Func.Entry].Flow,
MinCostMaxFlow::AuxCostUnlikely / NumBlocks()));
if (Jump->IsUnlikely)
return MinCostMaxFlow::AuxCostUnlikely;
if (Jump->Flow > 0)
return BaseDistance + BaseDistance / Jump->Flow;
return BaseDistance * NumBlocks();
};
uint64_t NumBlocks() const { return Func.Blocks.size(); }
void rebalanceUnknownSubgraphs() {
for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
auto SrcBlock = &Func.Blocks[I];
if (!canRebalanceAtRoot(SrcBlock))
continue;
std::vector<FlowBlock *> UnknownBlocks;
std::vector<FlowBlock *> KnownDstBlocks;
findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);
FlowBlock *DstBlock = nullptr;
if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
DstBlock))
continue;
if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
continue;
rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
}
}
bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
return false;
bool HasUnknownSuccs = false;
for (auto Jump : SrcBlock->SuccJumps) {
if (Func.Blocks[Jump->Target].UnknownWeight) {
HasUnknownSuccs = true;
break;
}
}
if (!HasUnknownSuccs)
return false;
return true;
}
void findUnknownSubgraph(const FlowBlock *SrcBlock,
std::vector<FlowBlock *> &KnownDstBlocks,
std::vector<FlowBlock *> &UnknownBlocks) {
auto Visited = BitVector(NumBlocks(), false);
std::queue<uint64_t> Queue;
Queue.push(SrcBlock->Index);
Visited[SrcBlock->Index] = true;
while (!Queue.empty()) {
auto &Block = Func.Blocks[Queue.front()];
Queue.pop();
for (auto Jump : Block.SuccJumps) {
if (ignoreJump(SrcBlock, nullptr, Jump))
continue;
uint64_t Dst = Jump->Target;
if (Visited[Dst])
continue;
Visited[Dst] = true;
if (!Func.Blocks[Dst].UnknownWeight) {
KnownDstBlocks.push_back(&Func.Blocks[Dst]);
} else {
Queue.push(Dst);
UnknownBlocks.push_back(&Func.Blocks[Dst]);
}
}
}
}
bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
const std::vector<FlowBlock *> &KnownDstBlocks,
const std::vector<FlowBlock *> &UnknownBlocks,
FlowBlock *&DstBlock) {
if (UnknownBlocks.empty())
return false;
if (KnownDstBlocks.size() > 1)
return false;
DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
for (auto Block : UnknownBlocks) {
if (Block->SuccJumps.empty()) {
if (DstBlock != nullptr)
return false;
continue;
}
size_t NumIgnoredJumps = 0;
for (auto Jump : Block->SuccJumps) {
if (ignoreJump(SrcBlock, DstBlock, Jump))
NumIgnoredJumps++;
}
if (NumIgnoredJumps == Block->SuccJumps.size())
return false;
}
return true;
}
bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
const FlowJump *Jump) {
if (Jump->IsUnlikely && Jump->Flow == 0)
return true;
auto JumpSource = &Func.Blocks[Jump->Source];
auto JumpTarget = &Func.Blocks[Jump->Target];
if (DstBlock != nullptr && JumpTarget == DstBlock)
return false;
if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
return true;
if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
return true;
return false;
}
bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
std::vector<FlowBlock *> &UnknownBlocks) {
auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
auto fillInDegree = [&](const FlowBlock *Block) {
for (auto Jump : Block->SuccJumps) {
if (ignoreJump(SrcBlock, DstBlock, Jump))
continue;
LocalInDegree[Jump->Target]++;
}
};
fillInDegree(SrcBlock);
for (auto Block : UnknownBlocks) {
fillInDegree(Block);
}
if (LocalInDegree[SrcBlock->Index] > 0)
return false;
std::vector<FlowBlock *> AcyclicOrder;
std::queue<uint64_t> Queue;
Queue.push(SrcBlock->Index);
while (!Queue.empty()) {
FlowBlock *Block = &Func.Blocks[Queue.front()];
Queue.pop();
if (DstBlock != nullptr && Block == DstBlock)
break;
if (Block->UnknownWeight && Block != SrcBlock)
AcyclicOrder.push_back(Block);
for (auto Jump : Block->SuccJumps) {
if (ignoreJump(SrcBlock, DstBlock, Jump))
continue;
uint64_t Dst = Jump->Target;
LocalInDegree[Dst]--;
if (LocalInDegree[Dst] == 0) {
Queue.push(Dst);
}
}
}
if (UnknownBlocks.size() != AcyclicOrder.size())
return false;
UnknownBlocks = AcyclicOrder;
return true;
}
void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
const FlowBlock *DstBlock,
const std::vector<FlowBlock *> &UnknownBlocks) {
assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
uint64_t BlockFlow = 0;
for (auto Jump : SrcBlock->SuccJumps) {
if (ignoreJump(SrcBlock, DstBlock, Jump))
continue;
BlockFlow += Jump->Flow;
}
rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
for (auto Block : UnknownBlocks) {
assert(Block->UnknownWeight && "incorrect unknown subgraph");
uint64_t BlockFlow = 0;
for (auto Jump : Block->PredJumps) {
BlockFlow += Jump->Flow;
}
Block->Flow = BlockFlow;
rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
}
}
void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
const FlowBlock *Block, uint64_t BlockFlow) {
size_t BlockDegree = 0;
for (auto Jump : Block->SuccJumps) {
if (ignoreJump(SrcBlock, DstBlock, Jump))
continue;
BlockDegree++;
}
if (DstBlock == nullptr && BlockDegree == 0)
return;
assert(BlockDegree > 0 && "all outgoing jumps are ignored");
uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
for (auto Jump : Block->SuccJumps) {
if (ignoreJump(SrcBlock, DstBlock, Jump))
continue;
uint64_t Flow = std::min(SuccFlow, BlockFlow);
Jump->Flow = Flow;
BlockFlow -= Flow;
}
assert(BlockFlow == 0 && "not all flow is propagated");
}
static constexpr uint64_t AnyExitBlock = uint64_t(-1);
FlowFunction &Func;
};
void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
uint64_t NumBlocks = Func.Blocks.size();
assert(NumBlocks > 1 && "Too few blocks in a function");
LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
if (Func.Blocks[Func.Entry].Weight == 0) {
Func.Blocks[Func.Entry].Weight = 1;
}
uint64_t S = 3 * NumBlocks;
uint64_t T = S + 1;
uint64_t S1 = S + 2;
uint64_t T1 = S + 3;
Network.initialize(3 * NumBlocks + 4, S1, T1);
for (uint64_t B = 0; B < NumBlocks; B++) {
auto &Block = Func.Blocks[B];
assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
"non-zero weight of a block w/o weight except for an entry");
uint64_t Bin = 3 * B;
uint64_t Bout = 3 * B + 1;
uint64_t Baux = 3 * B + 2;
if (Block.Weight > 0) {
Network.addEdge(S1, Bout, Block.Weight, 0);
Network.addEdge(Bin, T1, Block.Weight, 0);
}
assert((!Block.isEntry() || !Block.isExit()) &&
"a block cannot be an entry and an exit");
if (Block.isEntry()) {
Network.addEdge(S, Bin, 0);
} else if (Block.isExit()) {
Network.addEdge(Bout, T, 0);
}
int64_t AuxCostInc = SampleProfileProfiCostInc;
int64_t AuxCostDec = SampleProfileProfiCostDec;
if (Block.UnknownWeight) {
AuxCostInc = 0;
AuxCostDec = 0;
} else {
if (Block.Weight == 0) {
AuxCostInc = SampleProfileProfiCostIncZero;
}
if (Block.isEntry()) {
AuxCostInc = SampleProfileProfiCostIncEntry;
AuxCostDec = SampleProfileProfiCostDecEntry;
}
}
if (Block.HasSelfEdge) {
AuxCostDec = 0;
}
Network.addEdge(Bin, Baux, AuxCostInc);
Network.addEdge(Baux, Bout, AuxCostInc);
if (Block.Weight > 0) {
Network.addEdge(Bout, Baux, AuxCostDec);
Network.addEdge(Baux, Bin, AuxCostDec);
}
}
for (auto &Jump : Func.Jumps) {
uint64_t Src = Jump.Source;
uint64_t Dst = Jump.Target;
if (Src != Dst) {
uint64_t SrcOut = 3 * Src + 1;
uint64_t DstIn = 3 * Dst;
uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
Network.addEdge(SrcOut, DstIn, Cost);
}
}
Network.addEdge(T, S, 0);
}
void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
uint64_t NumBlocks = Func.Blocks.size();
for (uint64_t Src = 0; Src < NumBlocks; Src++) {
auto &Block = Func.Blocks[Src];
uint64_t SrcOut = 3 * Src + 1;
int64_t Flow = 0;
for (auto &Adj : Network.getFlow(SrcOut)) {
uint64_t DstIn = Adj.first;
int64_t DstFlow = Adj.second;
bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
if (!IsAuxNode || Block.HasSelfEdge) {
Flow += DstFlow;
}
}
Block.Flow = Flow;
assert(Flow >= 0 && "negative block flow");
}
for (auto &Jump : Func.Jumps) {
uint64_t Src = Jump.Source;
uint64_t Dst = Jump.Target;
int64_t Flow = 0;
if (Src != Dst) {
uint64_t SrcOut = 3 * Src + 1;
uint64_t DstIn = 3 * Dst;
Flow = Network.getFlow(SrcOut, DstIn);
} else {
uint64_t SrcOut = 3 * Src + 1;
uint64_t SrcAux = 3 * Src + 2;
int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
if (AuxFlow > 0)
Flow = AuxFlow;
}
Jump.Flow = Flow;
assert(Flow >= 0 && "negative jump flow");
}
}
#ifndef NDEBUG
void verifyWeights(const FlowFunction &Func) {
const uint64_t NumBlocks = Func.Blocks.size();
auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
for (auto &Jump : Func.Jumps) {
InFlow[Jump.Target] += Jump.Flow;
OutFlow[Jump.Source] += Jump.Flow;
}
uint64_t TotalInFlow = 0;
uint64_t TotalOutFlow = 0;
for (uint64_t I = 0; I < NumBlocks; I++) {
auto &Block = Func.Blocks[I];
if (Block.isEntry()) {
TotalInFlow += Block.Flow;
assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
} else if (Block.isExit()) {
TotalOutFlow += Block.Flow;
assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
} else {
assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
}
}
assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
for (auto &Jump : Func.Jumps) {
if (Jump.Flow > 0) {
PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
}
}
std::queue<uint64_t> Queue;
auto Visited = BitVector(NumBlocks, false);
Queue.push(Func.Entry);
Visited[Func.Entry] = true;
while (!Queue.empty()) {
uint64_t Src = Queue.front();
Queue.pop();
for (uint64_t Dst : PositiveFlowEdges[Src]) {
if (!Visited[Dst]) {
Queue.push(Dst);
Visited[Dst] = true;
}
}
}
for (uint64_t I = 0; I < NumBlocks; I++) {
auto &Block = Func.Blocks[I];
assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
}
}
#endif
}
void llvm::applyFlowInference(FlowFunction &Func) {
auto InferenceNetwork = MinCostMaxFlow();
initializeNetwork(InferenceNetwork, Func);
InferenceNetwork.run();
extractWeights(InferenceNetwork, Func);
auto Adjuster = FlowAdjuster(Func);
Adjuster.run();
#ifndef NDEBUG
verifyWeights(Func);
#endif
}