#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <set>
#include <sstream>
using namespace llvm;
#define DEBUG_TYPE "chr"
#define CHR_DEBUG(X) LLVM_DEBUG(X)
static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
cl::desc("Apply CHR for all functions"));
static cl::opt<double> CHRBiasThreshold(
"chr-bias-threshold", cl::init(0.99), cl::Hidden,
cl::desc("CHR considers a branch bias greater than this ratio as biased"));
static cl::opt<unsigned> CHRMergeThreshold(
"chr-merge-threshold", cl::init(2), cl::Hidden,
cl::desc("CHR merges a group of N branches/selects where N >= this value"));
static cl::opt<std::string> CHRModuleList(
"chr-module-list", cl::init(""), cl::Hidden,
cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
static cl::opt<std::string> CHRFunctionList(
"chr-function-list", cl::init(""), cl::Hidden,
cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
static StringSet<> CHRModules;
static StringSet<> CHRFunctions;
static void parseCHRFilterFiles() {
if (!CHRModuleList.empty()) {
auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
if (!FileOrErr) {
errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
std::exit(1);
}
StringRef Buf = FileOrErr->get()->getBuffer();
SmallVector<StringRef, 0> Lines;
Buf.split(Lines, '\n');
for (StringRef Line : Lines) {
Line = Line.trim();
if (!Line.empty())
CHRModules.insert(Line);
}
}
if (!CHRFunctionList.empty()) {
auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
if (!FileOrErr) {
errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
std::exit(1);
}
StringRef Buf = FileOrErr->get()->getBuffer();
SmallVector<StringRef, 0> Lines;
Buf.split(Lines, '\n');
for (StringRef Line : Lines) {
Line = Line.trim();
if (!Line.empty())
CHRFunctions.insert(Line);
}
}
}
namespace {
struct CHRStats {
CHRStats() = default;
void print(raw_ostream &OS) const {
OS << "CHRStats: NumBranches " << NumBranches
<< " NumBranchesDelta " << NumBranchesDelta
<< " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
}
uint64_t NumBranches = 0;
uint64_t NumBranchesDelta = 0;
uint64_t WeightedNumBranchesDelta = 0;
};
struct RegInfo {
RegInfo() = default;
RegInfo(Region *RegionIn) : R(RegionIn) {}
Region *R = nullptr;
bool HasBranch = false;
SmallVector<SelectInst *, 8> Selects;
};
typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
class CHRScope {
public:
CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
assert(RI.R && "Null RegionIn");
RegInfos.push_back(RI);
}
Region *getParentRegion() {
assert(RegInfos.size() > 0 && "Empty CHRScope");
Region *Parent = RegInfos[0].R->getParent();
assert(Parent && "Unexpected to call this on the top-level region");
return Parent;
}
BasicBlock *getEntryBlock() {
assert(RegInfos.size() > 0 && "Empty CHRScope");
return RegInfos.front().R->getEntry();
}
BasicBlock *getExitBlock() {
assert(RegInfos.size() > 0 && "Empty CHRScope");
return RegInfos.back().R->getExit();
}
bool appendable(CHRScope *Next) {
BasicBlock *NextEntry = Next->getEntryBlock();
if (getExitBlock() != NextEntry)
return false;
Region *LastRegion = RegInfos.back().R;
for (BasicBlock *Pred : predecessors(NextEntry))
if (!LastRegion->contains(Pred))
return false;
return true;
}
void append(CHRScope *Next) {
assert(RegInfos.size() > 0 && "Empty CHRScope");
assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
assert(getParentRegion() == Next->getParentRegion() &&
"Must be siblings");
assert(getExitBlock() == Next->getEntryBlock() &&
"Must be adjacent");
RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
Subs.append(Next->Subs.begin(), Next->Subs.end());
}
void addSub(CHRScope *SubIn) {
#ifndef NDEBUG
bool IsChild = false;
for (RegInfo &RI : RegInfos)
if (RI.R == SubIn->getParentRegion()) {
IsChild = true;
break;
}
assert(IsChild && "Must be a child");
#endif
Subs.push_back(SubIn);
}
CHRScope *split(Region *Boundary) {
assert(Boundary && "Boundary null");
assert(RegInfos.begin()->R != Boundary &&
"Can't be split at beginning");
auto BoundaryIt = llvm::find_if(
RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
if (BoundaryIt == RegInfos.end())
return nullptr;
ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
DenseSet<Region *> TailRegionSet;
for (const RegInfo &RI : TailRegInfos)
TailRegionSet.insert(RI.R);
auto TailIt =
std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
assert(Sub && "null Sub");
Region *Parent = Sub->getParentRegion();
if (TailRegionSet.count(Parent))
return false;
assert(llvm::any_of(
RegInfos,
[&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&
"Must be in head");
return true;
});
ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
assert(HoistStopMap.empty() && "MapHoistStops must be empty");
auto *Scope = new CHRScope(TailRegInfos, TailSubs);
RegInfos.erase(BoundaryIt, RegInfos.end());
Subs.erase(TailIt, Subs.end());
return Scope;
}
bool contains(Instruction *I) const {
BasicBlock *Parent = I->getParent();
for (const RegInfo &RI : RegInfos)
if (RI.R->contains(Parent))
return true;
return false;
}
void print(raw_ostream &OS) const;
SmallVector<RegInfo, 8> RegInfos; SmallVector<CHRScope *, 8> Subs;
Instruction *BranchInsertPoint;
DenseSet<Region *> TrueBiasedRegions;
DenseSet<Region *> FalseBiasedRegions;
SmallVector<RegInfo, 8> CHRRegions;
DenseSet<SelectInst *> TrueBiasedSelects;
DenseSet<SelectInst *> FalseBiasedSelects;
HoistStopMapTy HoistStopMap;
private:
CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
: RegInfos(RegInfosIn.begin(), RegInfosIn.end()),
Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {}
};
class CHR {
public:
CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
ProfileSummaryInfo &PSIin, RegionInfo &RIin,
OptimizationRemarkEmitter &OREin)
: F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
~CHR() {
for (CHRScope *Scope : Scopes) {
delete Scope;
}
}
bool run();
private:
void findScopes(SmallVectorImpl<CHRScope *> &Output) {
Region *R = RI.getTopLevelRegion();
if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
Output.push_back(Scope);
}
}
CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
SmallVectorImpl<CHRScope *> &Scopes);
CHRScope *findScope(Region *R);
void checkScopeHoistable(CHRScope *Scope);
void splitScopes(SmallVectorImpl<CHRScope *> &Input,
SmallVectorImpl<CHRScope *> &Output);
SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
CHRScope *Outer,
DenseSet<Value *> *OuterConditionValues,
Instruction *OuterInsertPoint,
SmallVectorImpl<CHRScope *> &Output,
DenseSet<Instruction *> &Unhoistables);
void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
void filterScopes(SmallVectorImpl<CHRScope *> &Input,
SmallVectorImpl<CHRScope *> &Output);
void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
SmallVectorImpl<CHRScope *> &Output);
void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
void sortScopes(SmallVectorImpl<CHRScope *> &Input,
SmallVectorImpl<CHRScope *> &Output);
void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
void cloneScopeBlocks(CHRScope *Scope,
BasicBlock *PreEntryBlock,
BasicBlock *ExitBlock,
Region *LastRegion,
ValueToValueMapTy &VMap);
BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
BasicBlock *EntryBlock,
BasicBlock *NewEntryBlock,
ValueToValueMapTy &VMap);
void fixupBranchesAndSelects(CHRScope *Scope,
BasicBlock *PreEntryBlock,
BranchInst *MergedBR,
uint64_t ProfileCount);
void fixupBranch(Region *R,
CHRScope *Scope,
IRBuilder<> &IRB,
Value *&MergedCondition, BranchProbability &CHRBranchBias);
void fixupSelect(SelectInst* SI,
CHRScope *Scope,
IRBuilder<> &IRB,
Value *&MergedCondition, BranchProbability &CHRBranchBias);
void addToMergedCondition(bool IsTrueBiased, Value *Cond,
Instruction *BranchOrSelect,
CHRScope *Scope,
IRBuilder<> &IRB,
Value *&MergedCondition);
Function &F;
BlockFrequencyInfo &BFI;
DominatorTree &DT;
ProfileSummaryInfo &PSI;
RegionInfo &RI;
OptimizationRemarkEmitter &ORE;
CHRStats Stats;
DenseSet<Region *> TrueBiasedRegionsGlobal;
DenseSet<Region *> FalseBiasedRegionsGlobal;
DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
DenseMap<Region *, BranchProbability> BranchBiasMap;
DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
DenseSet<CHRScope *> Scopes;
};
}
static inline
raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
const CHRStats &Stats) {
Stats.print(OS);
return OS;
}
static inline
raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
Scope.print(OS);
return OS;
}
static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
if (ForceCHR)
return true;
if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
if (CHRModules.count(F.getParent()->getName()))
return true;
return CHRFunctions.count(F.getName());
}
assert(PSI.hasProfileSummary() && "Empty PSI?");
return PSI.isFunctionEntryHot(&F);
}
static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
CHRStats *Stats) {
StringRef FuncName = F.getName();
StringRef ModuleName = F.getParent()->getName();
(void)(FuncName); (void)(ModuleName); CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
<< FuncName);
if (Stats)
CHR_DEBUG(dbgs() << " " << *Stats);
CHR_DEBUG(dbgs() << "\n");
CHR_DEBUG(F.dump());
}
void CHRScope::print(raw_ostream &OS) const {
assert(RegInfos.size() > 0 && "Empty CHRScope");
OS << "CHRScope[";
OS << RegInfos.size() << ", Regions[";
for (const RegInfo &RI : RegInfos) {
OS << RI.R->getNameStr();
if (RI.HasBranch)
OS << " B";
if (RI.Selects.size() > 0)
OS << " S" << RI.Selects.size();
OS << ", ";
}
if (RegInfos[0].R->getParent()) {
OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
} else {
OS << "]";
}
OS << ", Subs[";
for (CHRScope *Sub : Subs) {
OS << *Sub << ", ";
}
OS << "]]";
}
static bool isHoistableInstructionType(Instruction *I) {
return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
isa<InsertValueInst>(I);
}
static bool isHoistable(Instruction *I, DominatorTree &DT) {
if (!isHoistableInstructionType(I))
return false;
return isSafeToSpeculativelyExecute(I, nullptr, &DT);
}
static const std::set<Value *> &
getBaseValues(Value *V, DominatorTree &DT,
DenseMap<Value *, std::set<Value *>> &Visited) {
auto It = Visited.find(V);
if (It != Visited.end()) {
return It->second;
}
std::set<Value *> Result;
if (auto *I = dyn_cast<Instruction>(V)) {
if (!isHoistable(I, DT)) {
Result.insert(I);
return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
}
for (Value *Op : I->operands()) {
const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
Result.insert(OpResult.begin(), OpResult.end());
}
return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
}
if (isa<Argument>(V)) {
Result.insert(V);
}
return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
}
static bool
checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
DenseSet<Instruction *> &Unhoistables,
DenseSet<Instruction *> *HoistStops,
DenseMap<Instruction *, bool> &Visited) {
assert(InsertPoint && "Null InsertPoint");
if (auto *I = dyn_cast<Instruction>(V)) {
auto It = Visited.find(I);
if (It != Visited.end()) {
return It->second;
}
assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
if (Unhoistables.count(I)) {
Visited[I] = false;
return false;
}
if (DT.dominates(I, InsertPoint)) {
if (HoistStops)
HoistStops->insert(I);
Visited[I] = true;
return true;
}
if (isHoistable(I, DT)) {
DenseSet<Instruction *> OpsHoistStops;
bool AllOpsHoisted = true;
for (Value *Op : I->operands()) {
if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
Visited)) {
AllOpsHoisted = false;
break;
}
}
if (AllOpsHoisted) {
CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
if (HoistStops)
HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
Visited[I] = true;
return true;
}
}
Visited[I] = false;
return false;
}
return true;
}
static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb,
BranchProbability &FalseProb) {
if (!MD) return false;
MDString *MDName = cast<MDString>(MD->getOperand(0));
if (MDName->getString() != "branch_weights" ||
MD->getNumOperands() != 3)
return false;
ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
if (!TrueWeight || !FalseWeight)
return false;
uint64_t TrueWt = TrueWeight->getValue().getZExtValue();
uint64_t FalseWt = FalseWeight->getValue().getZExtValue();
uint64_t SumWt = TrueWt + FalseWt;
assert(SumWt >= TrueWt && SumWt >= FalseWt &&
"Overflow calculating branch probabilities.");
if (SumWt == 0)
return false;
TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt);
FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt);
return true;
}
static BranchProbability getCHRBiasThreshold() {
return BranchProbability::getBranchProbability(
static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
}
template <typename K, typename S, typename M>
static bool checkBias(K *Key, BranchProbability TrueProb,
BranchProbability FalseProb, S &TrueSet, S &FalseSet,
M &BiasMap) {
BranchProbability Threshold = getCHRBiasThreshold();
if (TrueProb >= Threshold) {
TrueSet.insert(Key);
BiasMap[Key] = TrueProb;
return true;
} else if (FalseProb >= Threshold) {
FalseSet.insert(Key);
BiasMap[Key] = FalseProb;
return true;
}
return false;
}
static bool checkBiasedBranch(BranchInst *BI, Region *R,
DenseSet<Region *> &TrueBiasedRegionsGlobal,
DenseSet<Region *> &FalseBiasedRegionsGlobal,
DenseMap<Region *, BranchProbability> &BranchBiasMap) {
if (!BI->isConditional())
return false;
BranchProbability ThenProb, ElseProb;
if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof),
ThenProb, ElseProb))
return false;
BasicBlock *IfThen = BI->getSuccessor(0);
BasicBlock *IfElse = BI->getSuccessor(1);
assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
IfThen != IfElse &&
"Invariant from findScopes");
if (IfThen == R->getExit()) {
std::swap(IfThen, IfElse);
std::swap(ThenProb, ElseProb);
}
CHR_DEBUG(dbgs() << "BI " << *BI << " ");
CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
return checkBias(R, ThenProb, ElseProb,
TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
BranchBiasMap);
}
static bool checkBiasedSelect(
SelectInst *SI, Region *R,
DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
BranchProbability TrueProb, FalseProb;
if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof),
TrueProb, FalseProb))
return false;
CHR_DEBUG(dbgs() << "SI " << *SI << " ");
CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
return checkBias(SI, TrueProb, FalseProb,
TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
SelectBiasMap);
}
static Instruction* getBranchInsertPoint(RegInfo &RI) {
Region *R = RI.R;
BasicBlock *EntryBB = R->getEntry();
Instruction *HoistPoint = EntryBB->getTerminator();
for (SelectInst *SI : RI.Selects) {
if (SI->getParent() == EntryBB) {
HoistPoint = SI;
break;
}
}
assert(HoistPoint && "Null HoistPoint");
#ifndef NDEBUG
DenseSet<Instruction *> EntryBlockSelectSet;
for (SelectInst *SI : RI.Selects) {
if (SI->getParent() == EntryBB) {
EntryBlockSelectSet.insert(SI);
}
}
for (Instruction &I : *EntryBB) {
if (EntryBlockSelectSet.contains(&I)) {
assert(&I == HoistPoint &&
"HoistPoint must be the first one in Selects");
break;
}
}
#endif
return HoistPoint;
}
CHRScope * CHR::findScope(Region *R) {
CHRScope *Result = nullptr;
BasicBlock *Entry = R->getEntry();
BasicBlock *Exit = R->getExit(); assert(Entry && "Entry must not be null");
assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
"Only top level region has a null exit");
if (Entry)
CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
else
CHR_DEBUG(dbgs() << "Entry null\n");
if (Exit)
CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
else
CHR_DEBUG(dbgs() << "Exit null\n");
bool EntryInSubregion = RI.getRegionFor(Entry) != R;
if (EntryInSubregion)
return nullptr;
for (BasicBlock *Pred : predecessors(Entry))
if (R->contains(Pred))
return nullptr;
for (BasicBlock *BB : R->blocks()) {
if (BB->hasAddressTaken())
return nullptr;
for (Instruction &I : *BB)
if (auto *II = dyn_cast<IntrinsicInst>(&I))
if (II->getIntrinsicID() == Intrinsic::coro_id)
return nullptr;
}
if (Exit) {
auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
if (BI)
CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
else
CHR_DEBUG(dbgs() << "BI null\n");
if (BI && BI->isConditional()) {
BasicBlock *S0 = BI->getSuccessor(0);
BasicBlock *S1 = BI->getSuccessor(1);
CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
RegInfo RI(R);
RI.HasBranch = checkBiasedBranch(
BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
BranchBiasMap);
Result = new CHRScope(RI);
Scopes.insert(Result);
CHR_DEBUG(dbgs() << "Found a region with a branch\n");
++Stats.NumBranches;
if (!RI.HasBranch) {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
<< "Branch not biased";
});
}
}
}
}
{
SmallVector<SelectInst *, 8> Selects;
for (RegionNode *E : R->elements()) {
if (E->isSubRegion())
continue;
BasicBlock *BB = E->getEntry();
for (Instruction &I : *BB) {
if (auto *SI = dyn_cast<SelectInst>(&I)) {
Selects.push_back(SI);
++Stats.NumBranches;
}
}
}
if (Selects.size() > 0) {
auto AddSelects = [&](RegInfo &RI) {
for (auto *SI : Selects)
if (checkBiasedSelect(SI, RI.R,
TrueBiasedSelectsGlobal,
FalseBiasedSelectsGlobal,
SelectBiasMap))
RI.Selects.push_back(SI);
else
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
<< "Select not biased";
});
};
if (!Result) {
CHR_DEBUG(dbgs() << "Found a select-only region\n");
RegInfo RI(R);
AddSelects(RI);
Result = new CHRScope(RI);
Scopes.insert(Result);
} else {
CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
AddSelects(Result->RegInfos[0]);
}
}
}
if (Result) {
checkScopeHoistable(Result);
}
return Result;
}
void CHR::checkScopeHoistable(CHRScope *Scope) {
RegInfo &RI = Scope->RegInfos[0];
Region *R = RI.R;
BasicBlock *EntryBB = R->getEntry();
auto *Branch = RI.HasBranch ?
cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
SmallVector<SelectInst *, 8> &Selects = RI.Selects;
if (RI.HasBranch || !Selects.empty()) {
Instruction *InsertPoint = getBranchInsertPoint(RI);
CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
DenseSet<Instruction *> Unhoistables;
for (SelectInst *SI : Selects) {
Unhoistables.insert(SI);
}
for (auto it = Selects.begin(); it != Selects.end(); ) {
SelectInst *SI = *it;
if (SI == InsertPoint) {
++it;
continue;
}
DenseMap<Instruction *, bool> Visited;
bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
DT, Unhoistables, nullptr, Visited);
if (!IsHoistable) {
CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
"DropUnhoistableSelect", SI)
<< "Dropped unhoistable select";
});
it = Selects.erase(it);
Unhoistables.erase(SI);
} else
++it;
}
InsertPoint = getBranchInsertPoint(RI);
CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
if (RI.HasBranch && InsertPoint != Branch) {
DenseMap<Instruction *, bool> Visited;
bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
DT, Unhoistables, nullptr, Visited);
if (!IsHoistable) {
assert(InsertPoint != Branch && "Branch must not be the hoist point");
CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
CHR_DEBUG(
for (SelectInst *SI : Selects) {
dbgs() << "SI " << *SI << "\n";
});
for (SelectInst *SI : Selects) {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
"DropSelectUnhoistableBranch", SI)
<< "Dropped select due to unhoistable branch";
});
}
llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
return SI->getParent() == EntryBB;
});
Unhoistables.clear();
InsertPoint = Branch;
}
}
CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
#ifndef NDEBUG
if (RI.HasBranch) {
assert(!DT.dominates(Branch, InsertPoint) &&
"Branch can't be already above the hoist point");
DenseMap<Instruction *, bool> Visited;
assert(checkHoistValue(Branch->getCondition(), InsertPoint,
DT, Unhoistables, nullptr, Visited) &&
"checkHoistValue for branch");
}
for (auto *SI : Selects) {
assert(!DT.dominates(SI, InsertPoint) &&
"SI can't be already above the hoist point");
DenseMap<Instruction *, bool> Visited;
assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
Unhoistables, nullptr, Visited) &&
"checkHoistValue for selects");
}
CHR_DEBUG(dbgs() << "Result\n");
if (RI.HasBranch) {
CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
}
for (auto *SI : Selects) {
CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
}
#endif
}
}
CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
SmallVectorImpl<CHRScope *> &Scopes) {
CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
CHRScope *Result = findScope(R);
CHRScope *ConsecutiveSubscope = nullptr;
SmallVector<CHRScope *, 8> Subscopes;
for (auto It = R->begin(); It != R->end(); ++It) {
const std::unique_ptr<Region> &SubR = *It;
auto NextIt = std::next(It);
Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
<< "\n");
CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
if (SubCHRScope) {
CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
} else {
CHR_DEBUG(dbgs() << "Subregion Scope null\n");
}
if (SubCHRScope) {
if (!ConsecutiveSubscope)
ConsecutiveSubscope = SubCHRScope;
else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
Subscopes.push_back(ConsecutiveSubscope);
ConsecutiveSubscope = SubCHRScope;
} else
ConsecutiveSubscope->append(SubCHRScope);
} else {
if (ConsecutiveSubscope) {
Subscopes.push_back(ConsecutiveSubscope);
}
ConsecutiveSubscope = nullptr;
}
}
if (ConsecutiveSubscope) {
Subscopes.push_back(ConsecutiveSubscope);
}
for (CHRScope *Sub : Subscopes) {
if (Result) {
Result->addSub(Sub);
} else {
Scopes.push_back(Sub);
}
}
return Result;
}
static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
DenseSet<Value *> ConditionValues;
if (RI.HasBranch) {
auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
ConditionValues.insert(BI->getCondition());
}
for (SelectInst *SI : RI.Selects) {
ConditionValues.insert(SI->getCondition());
}
return ConditionValues;
}
static bool shouldSplit(Instruction *InsertPoint,
DenseSet<Value *> &PrevConditionValues,
DenseSet<Value *> &ConditionValues,
DominatorTree &DT,
DenseSet<Instruction *> &Unhoistables) {
assert(InsertPoint && "Null InsertPoint");
CHR_DEBUG(
dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
for (Value *V : PrevConditionValues) {
dbgs() << *V << ", ";
}
dbgs() << " ConditionValues ";
for (Value *V : ConditionValues) {
dbgs() << *V << ", ";
}
dbgs() << "\n");
for (Value *V : ConditionValues) {
DenseMap<Instruction *, bool> Visited;
if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
return true; }
}
if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
std::set<Value *> PrevBases, Bases;
DenseMap<Value *, std::set<Value *>> Visited;
for (Value *V : PrevConditionValues) {
const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
PrevBases.insert(BaseValues.begin(), BaseValues.end());
}
for (Value *V : ConditionValues) {
const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
Bases.insert(BaseValues.begin(), BaseValues.end());
}
CHR_DEBUG(
dbgs() << "PrevBases ";
for (Value *V : PrevBases) {
dbgs() << *V << ", ";
}
dbgs() << " Bases ";
for (Value *V : Bases) {
dbgs() << *V << ", ";
}
dbgs() << "\n");
std::vector<Value *> Intersection;
std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
Bases.end(), std::back_inserter(Intersection));
if (Intersection.empty()) {
CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
return true;
}
}
CHR_DEBUG(dbgs() << "No split\n");
return false; }
static void getSelectsInScope(CHRScope *Scope,
DenseSet<Instruction *> &Output) {
for (RegInfo &RI : Scope->RegInfos)
for (SelectInst *SI : RI.Selects)
Output.insert(SI);
for (CHRScope *Sub : Scope->Subs)
getSelectsInScope(Sub, Output);
}
void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
SmallVectorImpl<CHRScope *> &Output) {
for (CHRScope *Scope : Input) {
assert(!Scope->BranchInsertPoint &&
"BranchInsertPoint must not be set");
DenseSet<Instruction *> Unhoistables;
getSelectsInScope(Scope, Unhoistables);
splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
}
#ifndef NDEBUG
for (CHRScope *Scope : Output) {
assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
}
#endif
}
SmallVector<CHRScope *, 8> CHR::splitScope(
CHRScope *Scope,
CHRScope *Outer,
DenseSet<Value *> *OuterConditionValues,
Instruction *OuterInsertPoint,
SmallVectorImpl<CHRScope *> &Output,
DenseSet<Instruction *> &Unhoistables) {
if (Outer) {
assert(OuterConditionValues && "Null OuterConditionValues");
assert(OuterInsertPoint && "Null OuterInsertPoint");
}
bool PrevSplitFromOuter = true;
DenseSet<Value *> PrevConditionValues;
Instruction *PrevInsertPoint = nullptr;
SmallVector<CHRScope *, 8> Splits;
SmallVector<bool, 8> SplitsSplitFromOuter;
SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
SmallVector<Instruction *, 8> SplitsInsertPoints;
SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); for (RegInfo &RI : RegInfos) {
Instruction *InsertPoint = getBranchInsertPoint(RI);
DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
CHR_DEBUG(
dbgs() << "ConditionValues ";
for (Value *V : ConditionValues) {
dbgs() << *V << ", ";
}
dbgs() << "\n");
if (RI.R == RegInfos[0].R) {
if (Outer) {
CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
CHR_DEBUG(dbgs() << "Should split from outer at "
<< RI.R->getNameStr() << "\n");
if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
ConditionValues, DT, Unhoistables)) {
PrevConditionValues = ConditionValues;
PrevInsertPoint = InsertPoint;
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
"SplitScopeFromOuter",
RI.R->getEntry()->getTerminator())
<< "Split scope from outer due to unhoistable branch/select "
<< "and/or lack of common condition values";
});
} else {
PrevSplitFromOuter = false;
PrevConditionValues = *OuterConditionValues;
PrevConditionValues.insert(ConditionValues.begin(),
ConditionValues.end());
PrevInsertPoint = OuterInsertPoint;
}
} else {
CHR_DEBUG(dbgs() << "Outer null\n");
PrevConditionValues = ConditionValues;
PrevInsertPoint = InsertPoint;
}
} else {
CHR_DEBUG(dbgs() << "Should split from prev at "
<< RI.R->getNameStr() << "\n");
if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
DT, Unhoistables)) {
CHRScope *Tail = Scope->split(RI.R);
Scopes.insert(Tail);
Splits.push_back(Scope);
SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
SplitsConditionValues.push_back(PrevConditionValues);
SplitsInsertPoints.push_back(PrevInsertPoint);
Scope = Tail;
PrevConditionValues = ConditionValues;
PrevInsertPoint = InsertPoint;
PrevSplitFromOuter = true;
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
"SplitScopeFromPrev",
RI.R->getEntry()->getTerminator())
<< "Split scope from previous due to unhoistable branch/select "
<< "and/or lack of common condition values";
});
} else {
PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
}
}
}
Splits.push_back(Scope);
SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
SplitsConditionValues.push_back(PrevConditionValues);
assert(PrevInsertPoint && "Null PrevInsertPoint");
SplitsInsertPoints.push_back(PrevInsertPoint);
assert(Splits.size() == SplitsConditionValues.size() &&
Splits.size() == SplitsSplitFromOuter.size() &&
Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
for (size_t I = 0; I < Splits.size(); ++I) {
CHRScope *Split = Splits[I];
DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
Instruction *SplitInsertPoint = SplitsInsertPoints[I];
SmallVector<CHRScope *, 8> NewSubs;
DenseSet<Instruction *> SplitUnhoistables;
getSelectsInScope(Split, SplitUnhoistables);
for (CHRScope *Sub : Split->Subs) {
SmallVector<CHRScope *, 8> SubSplits = splitScope(
Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
SplitUnhoistables);
llvm::append_range(NewSubs, SubSplits);
}
Split->Subs = NewSubs;
}
SmallVector<CHRScope *, 8> Result;
for (size_t I = 0; I < Splits.size(); ++I) {
CHRScope *Split = Splits[I];
if (SplitsSplitFromOuter[I]) {
Output.push_back(Split);
Split->BranchInsertPoint = SplitsInsertPoints[I];
CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
<< "\n");
} else {
Result.push_back(Split);
}
}
if (!Outer)
assert(Result.empty() &&
"If no outer (top-level), must return no nested ones");
return Result;
}
void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
for (CHRScope *Scope : Scopes) {
assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
classifyBiasedScopes(Scope, Scope);
CHR_DEBUG(
dbgs() << "classifyBiasedScopes " << *Scope << "\n";
dbgs() << "TrueBiasedRegions ";
for (Region *R : Scope->TrueBiasedRegions) {
dbgs() << R->getNameStr() << ", ";
}
dbgs() << "\n";
dbgs() << "FalseBiasedRegions ";
for (Region *R : Scope->FalseBiasedRegions) {
dbgs() << R->getNameStr() << ", ";
}
dbgs() << "\n";
dbgs() << "TrueBiasedSelects ";
for (SelectInst *SI : Scope->TrueBiasedSelects) {
dbgs() << *SI << ", ";
}
dbgs() << "\n";
dbgs() << "FalseBiasedSelects ";
for (SelectInst *SI : Scope->FalseBiasedSelects) {
dbgs() << *SI << ", ";
}
dbgs() << "\n";);
}
}
void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
for (RegInfo &RI : Scope->RegInfos) {
if (RI.HasBranch) {
Region *R = RI.R;
if (TrueBiasedRegionsGlobal.contains(R))
OutermostScope->TrueBiasedRegions.insert(R);
else if (FalseBiasedRegionsGlobal.contains(R))
OutermostScope->FalseBiasedRegions.insert(R);
else
llvm_unreachable("Must be biased");
}
for (SelectInst *SI : RI.Selects) {
if (TrueBiasedSelectsGlobal.contains(SI))
OutermostScope->TrueBiasedSelects.insert(SI);
else if (FalseBiasedSelectsGlobal.contains(SI))
OutermostScope->FalseBiasedSelects.insert(SI);
else
llvm_unreachable("Must be biased");
}
}
for (CHRScope *Sub : Scope->Subs) {
classifyBiasedScopes(Sub, OutermostScope);
}
}
static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
unsigned NumBiased = Scope->TrueBiasedRegions.size() +
Scope->FalseBiasedRegions.size() +
Scope->TrueBiasedSelects.size() +
Scope->FalseBiasedSelects.size();
return NumBiased >= CHRMergeThreshold;
}
void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
SmallVectorImpl<CHRScope *> &Output) {
for (CHRScope *Scope : Input) {
if (!hasAtLeastTwoBiasedBranches(Scope)) {
CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
<< Scope->TrueBiasedRegions.size()
<< " falsy-regions " << Scope->FalseBiasedRegions.size()
<< " true-selects " << Scope->TrueBiasedSelects.size()
<< " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
ORE.emit([&]() {
return OptimizationRemarkMissed(
DEBUG_TYPE,
"DropScopeWithOneBranchOrSelect",
Scope->RegInfos[0].R->getEntry()->getTerminator())
<< "Drop scope with < "
<< ore::NV("CHRMergeThreshold", CHRMergeThreshold)
<< " biased branch(es) or select(s)";
});
continue;
}
Output.push_back(Scope);
}
}
void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
SmallVectorImpl<CHRScope *> &Output) {
for (CHRScope *Scope : Input) {
assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
"Empty");
setCHRRegions(Scope, Scope);
Output.push_back(Scope);
CHR_DEBUG(
dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
for (auto pair : Scope->HoistStopMap) {
Region *R = pair.first;
dbgs() << "Region " << R->getNameStr() << "\n";
for (Instruction *I : pair.second) {
dbgs() << "HoistStop " << *I << "\n";
}
}
dbgs() << "CHRRegions" << "\n";
for (RegInfo &RI : Scope->CHRRegions) {
dbgs() << RI.R->getNameStr() << "\n";
});
}
}
void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
DenseSet<Instruction *> Unhoistables;
for (RegInfo &RI : Scope->RegInfos) {
for (SelectInst *SI : RI.Selects) {
Unhoistables.insert(SI);
}
}
Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
for (RegInfo &RI : Scope->RegInfos) {
Region *R = RI.R;
DenseSet<Instruction *> HoistStops;
bool IsHoisted = false;
if (RI.HasBranch) {
assert((OutermostScope->TrueBiasedRegions.contains(R) ||
OutermostScope->FalseBiasedRegions.contains(R)) &&
"Must be truthy or falsy");
auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
DenseMap<Instruction *, bool> Visited;
bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
Unhoistables, &HoistStops, Visited);
assert(IsHoistable && "Must be hoistable");
(void)(IsHoistable); IsHoisted = true;
}
for (SelectInst *SI : RI.Selects) {
assert((OutermostScope->TrueBiasedSelects.contains(SI) ||
OutermostScope->FalseBiasedSelects.contains(SI)) &&
"Must be true or false biased");
DenseMap<Instruction *, bool> Visited;
bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
Unhoistables, &HoistStops, Visited);
assert(IsHoistable && "Must be hoistable");
(void)(IsHoistable); IsHoisted = true;
}
if (IsHoisted) {
OutermostScope->CHRRegions.push_back(RI);
OutermostScope->HoistStopMap[R] = HoistStops;
}
}
for (CHRScope *Sub : Scope->Subs)
setCHRRegions(Sub, OutermostScope);
}
static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
}
void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
SmallVectorImpl<CHRScope *> &Output) {
Output.resize(Input.size());
llvm::copy(Input, Output.begin());
llvm::stable_sort(Output, CHRScopeSorter);
}
static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
HoistStopMapTy &HoistStopMap,
DenseSet<Instruction *> &HoistedSet,
DenseSet<PHINode *> &TrivialPHIs,
DominatorTree &DT) {
auto IT = HoistStopMap.find(R);
assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
DenseSet<Instruction *> &HoistStops = IT->second;
if (auto *I = dyn_cast<Instruction>(V)) {
if (I == HoistPoint)
return;
if (HoistStops.count(I))
return;
if (auto *PN = dyn_cast<PHINode>(I))
if (TrivialPHIs.count(PN))
return;
if (HoistedSet.count(I))
return;
assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
assert(DT.getNode(I->getParent()) && "DT must contain I's block");
assert(DT.getNode(HoistPoint->getParent()) &&
"DT must contain HoistPoint block");
if (DT.dominates(I, HoistPoint))
return;
for (Value *Op : I->operands()) {
hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
}
I->moveBefore(HoistPoint);
HoistedSet.insert(I);
CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
}
}
static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
DenseSet<PHINode *> &TrivialPHIs,
DominatorTree &DT) {
DenseSet<Instruction *> HoistedSet;
for (const RegInfo &RI : Scope->CHRRegions) {
Region *R = RI.R;
bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
HoistedSet, TrivialPHIs, DT);
}
for (SelectInst *SI : RI.Selects) {
bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
if (!(IsTrueBiased || IsFalseBiased))
continue;
hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
HoistedSet, TrivialPHIs, DT);
}
}
}
static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
Instruction *ExcludedUser,
CHRScope *Scope) {
for (User *U : ICmp->users()) {
if (U == ExcludedUser)
continue;
if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
continue;
if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
continue;
return false;
}
for (User *U : ICmp->users()) {
if (U == ExcludedUser)
continue;
if (auto *BI = dyn_cast<BranchInst>(U)) {
assert(BI->isConditional() && "Must be conditional");
BI->swapSuccessors();
continue;
}
if (auto *SI = dyn_cast<SelectInst>(U)) {
SI->swapValues();
SI->swapProfMetadata();
if (Scope->TrueBiasedSelects.count(SI)) {
assert(!Scope->FalseBiasedSelects.contains(SI) &&
"Must not be already in");
Scope->FalseBiasedSelects.insert(SI);
} else if (Scope->FalseBiasedSelects.count(SI)) {
assert(!Scope->TrueBiasedSelects.contains(SI) &&
"Must not be already in");
Scope->TrueBiasedSelects.insert(SI);
}
continue;
}
llvm_unreachable("Must be a branch or a select");
}
ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
return true;
}
static void insertTrivialPHIs(CHRScope *Scope,
BasicBlock *EntryBlock, BasicBlock *ExitBlock,
DenseSet<PHINode *> &TrivialPHIs) {
SmallSetVector<BasicBlock *, 8> BlocksInScope;
for (RegInfo &RI : Scope->RegInfos) {
for (BasicBlock *BB : RI.R->blocks()) { BlocksInScope.insert(BB);
}
}
CHR_DEBUG({
dbgs() << "Inserting redundant phis\n";
for (BasicBlock *BB : BlocksInScope)
dbgs() << "BlockInScope " << BB->getName() << "\n";
});
for (BasicBlock *BB : BlocksInScope) {
for (Instruction &I : *BB) {
SmallVector<Instruction *, 8> Users;
for (User *U : I.users()) {
if (auto *UI = dyn_cast<Instruction>(U)) {
if (!BlocksInScope.contains(UI->getParent()) &&
!(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
CHR_DEBUG(dbgs() << "V " << I << "\n");
CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
Users.push_back(UI);
} else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
CHR_DEBUG(dbgs() << "V " << I << "\n");
CHR_DEBUG(dbgs()
<< "Used at entry block (for a back edge) by a phi user "
<< *UI << "\n");
Users.push_back(UI);
}
}
}
if (Users.size() > 0) {
PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "",
&ExitBlock->front());
for (BasicBlock *Pred : predecessors(ExitBlock)) {
PN->addIncoming(&I, Pred);
}
TrivialPHIs.insert(PN);
CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
for (Instruction *UI : Users) {
for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
if (UI->getOperand(J) == &I) {
UI->setOperand(J, PN);
}
}
CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
}
}
}
}
}
static void LLVM_ATTRIBUTE_UNUSED
assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
#ifndef NDEBUG
auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
if (Scope->TrueBiasedRegions.count(RI.R) ||
Scope->FalseBiasedRegions.count(RI.R))
return true;
for (SelectInst *SI : RI.Selects)
if (Scope->TrueBiasedSelects.count(SI) ||
Scope->FalseBiasedSelects.count(SI))
return true;
return false;
};
for (RegInfo &RI : Scope->CHRRegions) {
assert(HasBiasedBranchOrSelect(RI, Scope) &&
"Must have biased branch or select");
}
#endif
}
static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
CHRScope *Scope, BasicBlock *PreEntryBlock) {
CHR_DEBUG(dbgs() << "Biased regions condition values \n");
for (RegInfo &RI : Scope->CHRRegions) {
Region *R = RI.R;
bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
Value *V = BI->getCondition();
CHR_DEBUG(dbgs() << *V << "\n");
if (auto *I = dyn_cast<Instruction>(V)) {
(void)(I); assert((I->getParent() == PreEntryBlock ||
!Scope->contains(I)) &&
"Must have been hoisted to PreEntryBlock or outside the scope");
}
}
for (SelectInst *SI : RI.Selects) {
bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
if (!(IsTrueBiased || IsFalseBiased))
continue;
Value *V = SI->getCondition();
CHR_DEBUG(dbgs() << *V << "\n");
if (auto *I = dyn_cast<Instruction>(V)) {
(void)(I); assert((I->getParent() == PreEntryBlock ||
!Scope->contains(I)) &&
"Must have been hoisted to PreEntryBlock or outside the scope");
}
}
}
}
void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
Region *FirstRegion = Scope->RegInfos[0].R;
BasicBlock *EntryBlock = FirstRegion->getEntry();
Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
BasicBlock *ExitBlock = LastRegion->getExit();
Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
if (ExitBlock) {
insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
}
CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
<< " at " << *Scope->BranchInsertPoint << "\n");
BasicBlock *NewEntryBlock =
SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
"NewEntryBlock's only pred must be EntryBlock");
FirstRegion->replaceEntryRecursive(NewEntryBlock);
BasicBlock *PreEntryBlock = EntryBlock;
ValueToValueMapTy VMap;
cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
NewEntryBlock, VMap);
#ifndef NDEBUG
assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
#endif
hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
#ifndef NDEBUG
assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
#endif
fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
ProfileCount.value_or(0));
}
void CHR::cloneScopeBlocks(CHRScope *Scope,
BasicBlock *PreEntryBlock,
BasicBlock *ExitBlock,
Region *LastRegion,
ValueToValueMapTy &VMap) {
SmallVector<BasicBlock*, 8> NewBlocks;
for (RegInfo &RI : Scope->RegInfos)
for (BasicBlock *BB : RI.R->blocks()) { assert(BB != PreEntryBlock && "Don't copy the preetntry block");
BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
NewBlocks.push_back(NewBB);
VMap[BB] = NewBB;
}
if (ExitBlock)
F.getBasicBlockList().splice(ExitBlock->getIterator(),
F.getBasicBlockList(),
NewBlocks[0]->getIterator(), F.end());
for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
for (Instruction &I : *NewBlocks[i])
RemapInstruction(&I, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
if (ExitBlock)
for (PHINode &PN : ExitBlock->phis())
for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
++I) {
BasicBlock *Pred = PN.getIncomingBlock(I);
if (LastRegion->contains(Pred)) {
Value *V = PN.getIncomingValue(I);
auto It = VMap.find(V);
if (It != VMap.end()) V = It->second;
assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
}
}
}
BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
BasicBlock *EntryBlock,
BasicBlock *NewEntryBlock,
ValueToValueMapTy &VMap) {
BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
"SplitBlock did not work correctly!");
assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
"NewEntryBlock's only pred must be EntryBlock");
assert(VMap.find(NewEntryBlock) != VMap.end() &&
"NewEntryBlock must have been copied");
OldBR->dropAllReferences();
OldBR->eraseFromParent();
BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
cast<BasicBlock>(VMap[NewEntryBlock]),
ConstantInt::getTrue(F.getContext()));
PreEntryBlock->getInstList().push_back(NewBR);
assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
"NewEntryBlock's only pred must be EntryBlock");
return NewBR;
}
void CHR::fixupBranchesAndSelects(CHRScope *Scope,
BasicBlock *PreEntryBlock,
BranchInst *MergedBR,
uint64_t ProfileCount) {
Value *MergedCondition = ConstantInt::getTrue(F.getContext());
BranchProbability CHRBranchBias(1, 1);
uint64_t NumCHRedBranches = 0;
IRBuilder<> IRB(PreEntryBlock->getTerminator());
for (RegInfo &RI : Scope->CHRRegions) {
Region *R = RI.R;
if (RI.HasBranch) {
fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
++NumCHRedBranches;
}
for (SelectInst *SI : RI.Selects) {
fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
++NumCHRedBranches;
}
}
Stats.NumBranchesDelta += NumCHRedBranches - 1;
Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE,
"CHR",
MergedBR->getSuccessor(0)->getTerminator())
<< "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
<< " branches or selects";
});
MergedBR->setCondition(MergedCondition);
uint32_t Weights[] = {
static_cast<uint32_t>(CHRBranchBias.scale(1000)),
static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
};
MDBuilder MDB(F.getContext());
MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
<< "\n");
}
void CHR::fixupBranch(Region *R, CHRScope *Scope,
IRBuilder<> &IRB,
Value *&MergedCondition,
BranchProbability &CHRBranchBias) {
bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
"Must be truthy or falsy");
auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
"Must be in the bias map");
BranchProbability Bias = BranchBiasMap[R];
assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
if (CHRBranchBias > Bias)
CHRBranchBias = Bias;
BasicBlock *IfThen = BI->getSuccessor(1);
BasicBlock *IfElse = BI->getSuccessor(0);
BasicBlock *RegionExitBlock = R->getExit();
assert(RegionExitBlock && "Null ExitBlock");
assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
IfThen != IfElse && "Invariant from findScopes");
if (IfThen == RegionExitBlock) {
std::swap(IfThen, IfElse);
}
CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
<< " IfElse " << IfElse->getName() << "\n");
Value *Cond = BI->getCondition();
BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
bool ConditionTrue = HotTarget == BI->getSuccessor(0);
addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
MergedCondition);
assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
"The successor shouldn't change");
Value *NewCondition = ConditionTrue ?
ConstantInt::getTrue(F.getContext()) :
ConstantInt::getFalse(F.getContext());
BI->setCondition(NewCondition);
}
void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
IRBuilder<> &IRB,
Value *&MergedCondition,
BranchProbability &CHRBranchBias) {
bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
assert((IsTrueBiased ||
Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
"Must be in the bias map");
BranchProbability Bias = SelectBiasMap[SI];
assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
if (CHRBranchBias > Bias)
CHRBranchBias = Bias;
Value *Cond = SI->getCondition();
addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
MergedCondition);
Value *NewCondition = IsTrueBiased ?
ConstantInt::getTrue(F.getContext()) :
ConstantInt::getFalse(F.getContext());
SI->setCondition(NewCondition);
}
void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
Instruction *BranchOrSelect, CHRScope *Scope,
IRBuilder<> &IRB, Value *&MergedCondition) {
if (!IsTrueBiased) {
auto *ICmp = dyn_cast<ICmpInst>(Cond);
if (!ICmp ||
!negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope))
Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond);
}
if (isa<SelectInst>(BranchOrSelect) &&
!isGuaranteedNotToBeUndefOrPoison(Cond))
Cond = IRB.CreateFreeze(Cond);
MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond);
}
void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
unsigned I = 0;
DenseSet<PHINode *> TrivialPHIs;
for (CHRScope *Scope : CHRScopes) {
transformScopes(Scope, TrivialPHIs);
CHR_DEBUG(
std::ostringstream oss;
oss << " after transformScopes " << I++;
dumpIR(F, oss.str().c_str(), nullptr));
(void)I;
}
}
static void LLVM_ATTRIBUTE_UNUSED
dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
dbgs() << Label << " " << Scopes.size() << "\n";
for (CHRScope *Scope : Scopes) {
dbgs() << *Scope << "\n";
}
}
bool CHR::run() {
if (!shouldApply(F, PSI))
return false;
CHR_DEBUG(dumpIR(F, "before", nullptr));
bool Changed = false;
{
CHR_DEBUG(
dbgs() << "RegionInfo:\n";
RI.print(dbgs()));
SmallVector<CHRScope *, 8> AllScopes;
findScopes(AllScopes);
CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
SmallVector<CHRScope *, 8> SplitScopes;
splitScopes(AllScopes, SplitScopes);
CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
classifyBiasedScopes(SplitScopes);
CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
SmallVector<CHRScope *, 8> FilteredScopes;
filterScopes(SplitScopes, FilteredScopes);
CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
SmallVector<CHRScope *, 8> SetScopes;
setCHRRegions(FilteredScopes, SetScopes);
CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
SmallVector<CHRScope *, 8> SortedScopes;
sortScopes(SetScopes, SortedScopes);
CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
CHR_DEBUG(
dbgs() << "RegionInfo:\n";
RI.print(dbgs()));
if (!SortedScopes.empty()) {
transformScopes(SortedScopes);
Changed = true;
}
}
if (Changed) {
CHR_DEBUG(dumpIR(F, "after", &Stats));
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
<< ore::NV("Function", &F) << " "
<< "Reduced the number of branches in hot paths by "
<< ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
<< " (static) and "
<< ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
<< " (weighted by PGO count)";
});
}
return Changed;
}
namespace llvm {
ControlHeightReductionPass::ControlHeightReductionPass() {
parseCHRFilterFiles();
}
PreservedAnalyses ControlHeightReductionPass::run(
Function &F,
FunctionAnalysisManager &FAM) {
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
if (!Changed)
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
}