#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/ADT/PriorityWorklist.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/PredIteratorCache.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
#include <utility>
using namespace llvm;
namespace llvm {
class BlockFrequencyInfo;
class LPMUpdater;
}
#define DEBUG_TYPE "licm"
STATISTIC(NumCreatedBlocks, "Number of blocks created");
STATISTIC(NumClonedBranches, "Number of branches cloned");
STATISTIC(NumSunk, "Number of instructions sunk out of loop");
STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
static cl::opt<bool>
DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
cl::desc("Disable memory promotion in LICM pass"));
static cl::opt<bool> ControlFlowHoisting(
"licm-control-flow-hoisting", cl::Hidden, cl::init(false),
cl::desc("Enable control flow (and PHI) hoisting in LICM"));
static cl::opt<uint32_t> MaxNumUsesTraversed(
"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
cl::desc("Max num uses visited for identifying load "
"invariance in loop using invariant start (default = 8)"));
cl::opt<unsigned> llvm::SetLicmMssaOptCap(
"licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
"for faster compile. Caps the MemorySSA clobbering calls."));
cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
"licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
"effect. When MSSA in LICM is enabled, then this is the maximum "
"number of accesses allowed to be present in a loop in order to "
"enable memory promotion."));
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
TargetTransformInfo *TTI, bool &FreeInLoop,
bool LoopNestMode);
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
BlockFrequencyInfo *BFI, const Loop *CurLoop,
ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU,
OptimizationRemarkEmitter *ORE);
static bool isSafeToExecuteUnconditionally(
Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
bool AllowSpeculation);
static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
Loop *CurLoop, Instruction &I,
SinkAndHoistLICMFlags &Flags);
static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
MemoryUse &MU);
static Instruction *cloneInstructionInExitBlock(
Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU);
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
function_ref<void(Instruction *)> Fn);
static SmallVector<SmallSetVector<Value *, 8>, 0>
collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L);
namespace {
struct LoopInvariantCodeMotion {
bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI,
TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
unsigned LicmMssaNoAccForPromotionCap,
bool LicmAllowSpeculation)
: LicmMssaOptCap(LicmMssaOptCap),
LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
LicmAllowSpeculation(LicmAllowSpeculation) {}
private:
unsigned LicmMssaOptCap;
unsigned LicmMssaNoAccForPromotionCap;
bool LicmAllowSpeculation;
};
struct LegacyLICMPass : public LoopPass {
static char ID; LegacyLICMPass(
unsigned LicmMssaOptCap = SetLicmMssaOptCap,
unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
bool LicmAllowSpeculation = true)
: LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
LicmAllowSpeculation) {
initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override {
if (skipLoop(L))
return false;
LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
<< L->getHeader()->getNameOrAsOperand() << "\n");
auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
bool hasProfileData = L->getHeader()->getParent()->hasProfileData();
BlockFrequencyInfo *BFI =
hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI()
: nullptr;
OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
return LICM.runOnLoop(
L, &getAnalysis<AAResultsWrapperPass>().getAAResults(),
&getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
&getAnalysis<DominatorTreeWrapperPass>().getDomTree(), BFI,
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
*L->getHeader()->getParent()),
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*L->getHeader()->getParent()),
SE ? &SE->getSE() : nullptr, MSSA, &ORE);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<MemorySSAWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU);
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
AU.addPreserved<LazyBlockFrequencyInfoPass>();
AU.addPreserved<LazyBranchProbabilityInfoPass>();
}
private:
LoopInvariantCodeMotion LICM;
};
}
PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &) {
if (!AR.MSSA)
report_fatal_error("LICM requires MemorySSA (loop-mssa)");
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
Opts.AllowSpeculation);
if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,
&AR.SE, AR.MSSA, &ORE))
return PreservedAnalyses::all();
auto PA = getLoopPassPreservedAnalyses();
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
PA.preserve<MemorySSAAnalysis>();
return PA;
}
void LICMPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<LICMPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
OS << "<";
OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
OS << ">";
}
PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
if (!AR.MSSA)
report_fatal_error("LNICM requires MemorySSA (loop-mssa)");
OptimizationRemarkEmitter ORE(LN.getParent());
LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
Opts.AllowSpeculation);
Loop &OutermostLoop = LN.getOutermostLoop();
bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI,
&AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true);
if (!Changed)
return PreservedAnalyses::all();
auto PA = getLoopPassPreservedAnalyses();
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
PA.preserve<MemorySSAAnalysis>();
return PA;
}
void LNICMPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<LNICMPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
OS << "<";
OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
OS << ">";
}
char LegacyLICMPass::ID = 0;
INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
false)
Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
unsigned LicmMssaNoAccForPromotionCap,
bool LicmAllowSpeculation) {
return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
LicmAllowSpeculation);
}
llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L,
MemorySSA *MSSA)
: SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
IsSink, L, MSSA) {}
llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
Loop *L, MemorySSA *MSSA)
: LicmMssaOptCap(LicmMssaOptCap),
LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
IsSink(IsSink) {
assert(((L != nullptr) == (MSSA != nullptr)) &&
"Unexpected values for SinkAndHoistLICMFlags");
if (!MSSA)
return;
unsigned AccessCapCount = 0;
for (auto *BB : L->getBlocks())
if (const auto *Accesses = MSSA->getBlockAccesses(BB))
for (const auto &MA : *Accesses) {
(void)MA;
++AccessCapCount;
if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
NoOfMemAccTooLarge = true;
return;
}
}
}
bool LoopInvariantCodeMotion::runOnLoop(
Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE,
bool LoopNestMode) {
bool Changed = false;
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
MSSA->ensureOptimizedUses();
if (hasDisableLICMTransformsHint(L)) {
return false;
}
bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) {
return llvm::any_of(*BB, [](Instruction &I) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
return II && II->getIntrinsicID() == Intrinsic::coro_suspend;
});
});
MemorySSAUpdater MSSAU(MSSA);
SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
true, L, MSSA);
BasicBlock *Preheader = L->getLoopPreheader();
ICFLoopSafetyInfo SafetyInfo;
SafetyInfo.computeLoopSafetyInfo(L);
if (L->hasDedicatedExits())
Changed |= LoopNestMode
? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI,
DT, BFI, TLI, TTI, L, MSSAU,
&SafetyInfo, Flags, ORE)
: sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI,
TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE);
Flags.setIsSink(false);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
LicmAllowSpeculation);
if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
!Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
SmallVector<BasicBlock *, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
return isa<CatchSwitchInst>(Exit->getTerminator());
});
if (!HasCatchSwitch) {
SmallVector<Instruction *, 8> InsertPts;
SmallVector<MemoryAccess *, 8> MSSAInsertPts;
InsertPts.reserve(ExitBlocks.size());
MSSAInsertPts.reserve(ExitBlocks.size());
for (BasicBlock *ExitBlock : ExitBlocks) {
InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
MSSAInsertPts.push_back(nullptr);
}
PredIteratorCache PIC;
bool Promoted = false;
bool LocalPromoted;
do {
LocalPromoted = false;
for (const SmallSetVector<Value *, 8> &PointerMustAliases :
collectPromotionCandidates(MSSA, AA, L)) {
LocalPromoted |= promoteLoopAccessesToScalars(
PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
DT, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);
}
Promoted |= LocalPromoted;
} while (LocalPromoted);
if (Promoted)
formLCSSARecursively(*L, *DT, LI, SE);
Changed |= Promoted;
}
}
assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
"Parent loop not left in LCSSA form after LICM!");
if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
if (Changed && SE)
SE->forgetLoopDispositions(L);
return Changed;
}
bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
DominatorTree *DT, BlockFrequencyInfo *BFI,
TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
Loop *CurLoop, MemorySSAUpdater &MSSAU,
ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
CurLoop != nullptr && SafetyInfo != nullptr &&
"Unexpected input to sinkRegion.");
SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
bool Changed = false;
for (DomTreeNode *DTN : reverse(Worklist)) {
BasicBlock *BB = DTN->getBlock();
if (inSubLoop(BB, CurLoop, LI))
continue;
for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
Instruction &I = *--II;
if (isInstructionTriviallyDead(&I, TLI)) {
LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
salvageKnowledge(&I);
salvageDebugInfo(I);
++II;
eraseInstruction(I, *SafetyInfo, MSSAU);
Changed = true;
continue;
}
bool FreeInLoop = false;
bool LoopNestMode = OutermostLoop != nullptr;
if (!I.mayHaveSideEffects() &&
isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) {
if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
if (!FreeInLoop) {
++II;
salvageDebugInfo(I);
eraseInstruction(I, *SafetyInfo, MSSAU);
}
Changed = true;
}
}
}
}
if (VerifyMemorySSA)
MSSAU.getMemorySSA()->verifyMemorySSA();
return Changed;
}
bool llvm::sinkRegionForLoopNest(
DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) {
bool Changed = false;
SmallPriorityWorklist<Loop *, 4> Worklist;
Worklist.insert(CurLoop);
appendLoopsToWorklist(*CurLoop, Worklist);
while (!Worklist.empty()) {
Loop *L = Worklist.pop_back_val();
Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI,
TTI, L, MSSAU, SafetyInfo, Flags, ORE, CurLoop);
}
return Changed;
}
namespace {
class ControlFlowHoister {
private:
LoopInfo *LI;
DominatorTree *DT;
Loop *CurLoop;
MemorySSAUpdater &MSSAU;
DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
public:
ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
MemorySSAUpdater &MSSAU)
: LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
void registerPossiblyHoistableBranch(BranchInst *BI) {
if (!ControlFlowHoisting || !BI->isConditional() ||
!CurLoop->hasLoopInvariantOperands(BI))
return;
BasicBlock *TrueDest = BI->getSuccessor(0);
BasicBlock *FalseDest = BI->getSuccessor(1);
if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
TrueDest == FalseDest)
return;
SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
BasicBlock *CommonSucc = nullptr;
if (TrueDestSucc.count(FalseDest)) {
CommonSucc = FalseDest;
} else if (FalseDestSucc.count(TrueDest)) {
CommonSucc = TrueDest;
} else {
set_intersect(TrueDestSucc, FalseDestSucc);
if (TrueDestSucc.size() == 1)
CommonSucc = *TrueDestSucc.begin();
else if (!TrueDestSucc.empty()) {
Function *F = TrueDest->getParent();
auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
auto It = llvm::find_if(*F, IsSucc);
assert(It != F->end() && "Could not find successor in function");
CommonSucc = &*It;
}
}
if (CommonSucc && DT->dominates(BI, CommonSucc))
HoistableBranches[BI] = CommonSucc;
}
bool canHoistPHI(PHINode *PN) {
if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
return false;
SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
BasicBlock *BB = PN->getParent();
for (BasicBlock *PredBB : predecessors(BB))
PredecessorBlocks.insert(PredBB);
if (PredecessorBlocks.size() != pred_size(BB))
return false;
for (auto &Pair : HoistableBranches) {
if (Pair.second == BB) {
if (Pair.first->getSuccessor(0) == BB) {
PredecessorBlocks.erase(Pair.first->getParent());
PredecessorBlocks.erase(Pair.first->getSuccessor(1));
} else if (Pair.first->getSuccessor(1) == BB) {
PredecessorBlocks.erase(Pair.first->getParent());
PredecessorBlocks.erase(Pair.first->getSuccessor(0));
} else {
PredecessorBlocks.erase(Pair.first->getSuccessor(0));
PredecessorBlocks.erase(Pair.first->getSuccessor(1));
}
}
}
return PredecessorBlocks.empty();
}
BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
if (!ControlFlowHoisting)
return CurLoop->getLoopPreheader();
if (HoistDestinationMap.count(BB))
return HoistDestinationMap[BB];
auto HasBBAsSuccessor =
[&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) {
return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
Pair.first->getSuccessor(1) == BB);
};
auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor);
BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
if (It == HoistableBranches.end()) {
LLVM_DEBUG(dbgs() << "LICM using "
<< InitialPreheader->getNameOrAsOperand()
<< " as hoist destination for "
<< BB->getNameOrAsOperand() << "\n");
HoistDestinationMap[BB] = InitialPreheader;
return InitialPreheader;
}
BranchInst *BI = It->first;
assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
HoistableBranches.end() &&
"BB is expected to be the target of at most one branch");
LLVMContext &C = BB->getContext();
BasicBlock *TrueDest = BI->getSuccessor(0);
BasicBlock *FalseDest = BI->getSuccessor(1);
BasicBlock *CommonSucc = HoistableBranches[BI];
BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
auto CreateHoistedBlock = [&](BasicBlock *Orig) {
if (HoistDestinationMap.count(Orig))
return HoistDestinationMap[Orig];
BasicBlock *New =
BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
HoistDestinationMap[Orig] = New;
DT->addNewBlock(New, HoistTarget);
if (CurLoop->getParentLoop())
CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
++NumCreatedBlocks;
LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
<< " as hoist destination for " << Orig->getName()
<< "\n");
return New;
};
BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
if (!HoistCommonSucc->getTerminator()) {
BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
assert(TargetSucc && "Expected hoist target to have a single successor");
HoistCommonSucc->moveBefore(TargetSucc);
BranchInst::Create(TargetSucc, HoistCommonSucc);
}
if (!HoistTrueDest->getTerminator()) {
HoistTrueDest->moveBefore(HoistCommonSucc);
BranchInst::Create(HoistCommonSucc, HoistTrueDest);
}
if (!HoistFalseDest->getTerminator()) {
HoistFalseDest->moveBefore(HoistCommonSucc);
BranchInst::Create(HoistCommonSucc, HoistFalseDest);
}
if (HoistTarget == InitialPreheader) {
InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
MSSAU.wireOldPredecessorsToNewImmediatePredecessor(
HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
DT->changeImmediateDominator(HeaderNode, PreheaderNode);
for (auto &Pair : HoistDestinationMap)
if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
Pair.second = HoistCommonSucc;
}
ReplaceInstWithInst(
HoistTarget->getTerminator(),
BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
++NumClonedBranches;
assert(CurLoop->getLoopPreheader() &&
"Hoisting blocks should not have destroyed preheader");
return HoistDestinationMap[BB];
}
};
}
bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
DominatorTree *DT, BlockFrequencyInfo *BFI,
TargetLibraryInfo *TLI, Loop *CurLoop,
MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE, bool LoopNestMode,
bool AllowSpeculation) {
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
CurLoop != nullptr && SafetyInfo != nullptr &&
"Unexpected input to hoistRegion.");
ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
SmallVector<Instruction *, 16> HoistedInstructions;
LoopBlocksRPO Worklist(CurLoop);
Worklist.perform(LI);
bool Changed = false;
for (BasicBlock *BB : Worklist) {
if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
continue;
for (Instruction &I : llvm::make_early_inc_range(*BB)) {
if (Constant *C = ConstantFoldInstruction(
&I, I.getModule()->getDataLayout(), TLI)) {
LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
<< '\n');
I.replaceAllUsesWith(C);
if (isInstructionTriviallyDead(&I, TLI))
eraseInstruction(I, *SafetyInfo, MSSAU);
Changed = true;
continue;
}
if (CurLoop->hasLoopInvariantOperands(&I) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) &&
isSafeToExecuteUnconditionally(
I, DT, TLI, CurLoop, SafetyInfo, ORE,
CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
MSSAU, SE, ORE);
HoistedInstructions.push_back(&I);
Changed = true;
continue;
}
if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() &&
CurLoop->isLoopInvariant(I.getOperand(1))) {
auto Divisor = I.getOperand(1);
auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
ReciprocalDivisor->insertBefore(&I);
auto Product =
BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
Product->setFastMathFlags(I.getFastMathFlags());
SafetyInfo->insertInstructionTo(Product, I.getParent());
Product->insertAfter(&I);
I.replaceAllUsesWith(Product);
eraseInstruction(I, *SafetyInfo, MSSAU);
hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
SafetyInfo, MSSAU, SE, ORE);
HoistedInstructions.push_back(ReciprocalDivisor);
Changed = true;
continue;
}
auto IsInvariantStart = [&](Instruction &I) {
using namespace PatternMatch;
return I.use_empty() &&
match(&I, m_Intrinsic<Intrinsic::invariant_start>());
};
auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
};
if ((IsInvariantStart(I) || isGuard(&I)) &&
CurLoop->hasLoopInvariantOperands(&I) &&
MustExecuteWithoutWritesBefore(I)) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
MSSAU, SE, ORE);
HoistedInstructions.push_back(&I);
Changed = true;
continue;
}
if (PHINode *PN = dyn_cast<PHINode>(&I)) {
if (CFH.canHoistPHI(PN)) {
for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
PN->setIncomingBlock(
i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
MSSAU, SE, ORE);
assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
Changed = true;
continue;
}
}
if (BranchInst *BI = dyn_cast<BranchInst>(&I))
CFH.registerPossiblyHoistableBranch(BI);
}
}
Instruction *HoistPoint = nullptr;
if (ControlFlowHoisting) {
for (Instruction *I : reverse(HoistedInstructions)) {
if (!llvm::all_of(I->uses(),
[&](Use &U) { return DT->dominates(I, U); })) {
BasicBlock *Dominator =
DT->getNode(I->getParent())->getIDom()->getBlock();
if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
if (HoistPoint)
assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
"New hoist point expected to dominate old hoist point");
HoistPoint = Dominator->getTerminator();
}
LLVM_DEBUG(dbgs() << "LICM rehoisting to "
<< HoistPoint->getParent()->getNameOrAsOperand()
<< ": " << *I << "\n");
moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE);
HoistPoint = I;
Changed = true;
}
}
}
if (VerifyMemorySSA)
MSSAU.getMemorySSA()->verifyMemorySSA();
#ifdef EXPENSIVE_CHECKS
if (Changed) {
assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
"Dominator tree verification failed");
LI->verify(*DT);
}
#endif
return Changed;
}
static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
Loop *CurLoop) {
Value *Addr = LI->getOperand(0);
const DataLayout &DL = LI->getModule()->getDataLayout();
const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
if (LocSizeInBits.isScalable())
return false;
auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
LI->getPointerAddressSpace());
unsigned BitcastsVisited = 0;
while (Addr->getType() != PtrInt8Ty) {
auto *BC = dyn_cast<BitCastInst>(Addr);
if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
return false;
Addr = BC->getOperand(0);
}
if (isa<Constant>(Addr))
return false;
unsigned UsesVisited = 0;
for (auto *U : Addr->users()) {
if (++UsesVisited > MaxNumUsesTraversed)
return false;
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
!II->use_empty())
continue;
ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
if (InvariantSize->isNegative())
continue;
uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits &&
DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
return true;
}
return false;
}
namespace {
bool isHoistableAndSinkableInst(Instruction &I) {
return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
isa<BinaryOperator>(I) || isa<SelectInst>(I) ||
isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
isa<InsertValueInst>(I) || isa<FreezeInst>(I));
}
bool isReadOnly(const MemorySSAUpdater &MSSAU, const Loop *L) {
for (auto *BB : L->getBlocks())
if (MSSAU.getMemorySSA()->getBlockDefs(BB))
return false;
return true;
}
bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
const MemorySSAUpdater &MSSAU) {
for (auto *BB : L->getBlocks())
if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
int NotAPhi = 0;
for (const auto &Acc : *Accs) {
if (isa<MemoryPhi>(&Acc))
continue;
const auto *MUD = cast<MemoryUseOrDef>(&Acc);
if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
return false;
}
}
return true;
}
}
bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
Loop *CurLoop, MemorySSAUpdater &MSSAU,
bool TargetExecutesOncePerLoop,
SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE) {
if (!isHoistableAndSinkableInst(I))
return false;
MemorySSA *MSSA = MSSAU.getMemorySSA();
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (!LI->isUnordered())
return false;
if (AA->pointsToConstantMemory(LI->getOperand(0)))
return true;
if (LI->hasMetadata(LLVMContext::MD_invariant_load))
return true;
if (LI->isAtomic() && !TargetExecutesOncePerLoop)
return false;
if (isLoadInvariantInLoop(LI, DT, CurLoop))
return true;
bool Invalidated = pointerInvalidatedByLoop(
MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, Flags);
if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
ORE->emit([&]() {
return OptimizationRemarkMissed(
DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
<< "failed to move load with loop-invariant address "
"because the loop may invalidate its value";
});
return !Invalidated;
} else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
if (isa<DbgInfoIntrinsic>(I))
return false;
if (CI->mayThrow())
return false;
if (CI->isConvergent())
return false;
using namespace PatternMatch;
if (match(CI, m_Intrinsic<Intrinsic::assume>()))
return true;
if (match(CI, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
return true;
FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
if (Behavior == FMRB_DoesNotAccessMemory)
return true;
if (AAResults::onlyReadsMemory(Behavior)) {
if (AAResults::onlyAccessesArgPointees(Behavior)) {
for (Value *Op : CI->args())
if (Op->getType()->isPointerTy() &&
pointerInvalidatedByLoop(
MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
Flags))
return false;
return true;
}
if (isReadOnly(MSSAU, CurLoop))
return true;
}
return false;
} else if (auto *FI = dyn_cast<FenceInst>(&I)) {
return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
} else if (auto *SI = dyn_cast<StoreInst>(&I)) {
if (!SI->isUnordered())
return false;
if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
return true;
if (Flags.tooManyMemoryAccesses() || Flags.tooManyClobberingCalls())
return false;
auto *SIMD = MSSA->getMemoryAccess(SI);
for (auto *BB : CurLoop->getBlocks())
if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
for (const auto &MA : *Accesses)
if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
auto *MD = MU->getDefiningAccess();
if (!MSSA->isLiveOnEntryDef(MD) &&
CurLoop->contains(MD->getBlock()))
return false;
if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU))
return false;
} else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
(void)LI; assert(!LI->isUnordered() && "Expected unordered load");
return false;
}
if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI));
if (isModOrRefSet(MRI))
return false;
}
}
}
auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
Flags.incrementClobberingCalls();
return MSSA->isLiveOnEntryDef(Source) ||
!CurLoop->contains(Source->getBlock());
}
assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
return true;
}
static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
for (const Value *IncValue : PN.incoming_values())
if (IncValue != &I)
return false;
return true;
}
static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
const TargetTransformInfo *TTI) {
if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
if (TTI->getUserCost(GEP, TargetTransformInfo::TCK_SizeAndLatency) !=
TargetTransformInfo::TCC_Free)
return false;
const BasicBlock *BB = GEP->getParent();
for (const User *U : GEP->users()) {
const Instruction *UI = cast<Instruction>(U);
if (CurLoop->contains(UI) &&
(BB != UI->getParent() ||
(!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
return false;
}
return true;
} else
return TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
TargetTransformInfo::TCC_Free;
}
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
TargetTransformInfo *TTI, bool &FreeInLoop,
bool LoopNestMode) {
const auto &BlockColors = SafetyInfo->getBlockColors();
bool IsFree = isFreeInLoop(I, CurLoop, TTI);
for (const User *U : I.users()) {
const Instruction *UI = cast<Instruction>(U);
if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
const BasicBlock *BB = PN->getParent();
if (isa<CatchSwitchInst>(BB->getTerminator()))
return false;
if (isa<CallInst>(I))
if (!BlockColors.empty() &&
BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
return false;
if (LoopNestMode) {
while (isa<PHINode>(UI) && UI->hasOneUser() &&
UI->getNumOperands() == 1) {
if (!CurLoop->contains(UI))
break;
UI = cast<Instruction>(UI->user_back());
}
}
}
if (CurLoop->contains(UI)) {
if (IsFree) {
FreeInLoop = true;
continue;
}
return false;
}
}
return true;
}
static Instruction *cloneInstructionInExitBlock(
Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) {
Instruction *New;
if (auto *CI = dyn_cast<CallInst>(&I)) {
const auto &BlockColors = SafetyInfo->getBlockColors();
SmallVector<OperandBundleDef, 1> OpBundles;
for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
BundleIdx != BundleEnd; ++BundleIdx) {
OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
if (Bundle.getTagID() == LLVMContext::OB_funclet)
continue;
OpBundles.emplace_back(Bundle);
}
if (!BlockColors.empty()) {
const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
assert(CV.size() == 1 && "non-unique color for exit block!");
BasicBlock *BBColor = CV.front();
Instruction *EHPad = BBColor->getFirstNonPHI();
if (EHPad->isEHPad())
OpBundles.emplace_back("funclet", EHPad);
}
New = CallInst::Create(CI, OpBundles);
} else {
New = I.clone();
}
ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
if (!I.getName().empty())
New->setName(I.getName() + ".le");
if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) {
MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
New, nullptr, New->getParent(), MemorySSA::Beginning);
if (NewMemAcc) {
if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
MSSAU.insertDef(MemDef, true);
else {
auto *MemUse = cast<MemoryUse>(NewMemAcc);
MSSAU.insertUse(MemUse, true);
}
}
}
for (Use &Op : New->operands())
if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) {
auto *OInst = cast<Instruction>(Op.get());
PHINode *OpPN =
PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
OInst->getName() + ".lcssa", &ExitBlock.front());
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
Op = OpPN;
}
return New;
}
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU) {
MSSAU.removeMemoryAccess(&I);
SafetyInfo.removeInstruction(&I);
I.eraseFromParent();
}
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU,
ScalarEvolution *SE) {
SafetyInfo.removeInstruction(&I);
SafetyInfo.insertInstructionTo(&I, Dest.getParent());
I.moveBefore(&Dest);
if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
MSSAU.getMemorySSA()->getMemoryAccess(&I)))
MSSAU.moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::BeforeTerminator);
if (SE)
SE->forgetValue(&I);
}
static Instruction *sinkThroughTriviallyReplaceablePHI(
PHINode *TPN, Instruction *I, LoopInfo *LI,
SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
MemorySSAUpdater &MSSAU) {
assert(isTriviallyReplaceablePHI(*TPN, *I) &&
"Expect only trivially replaceable PHI");
BasicBlock *ExitBlock = TPN->getParent();
Instruction *New;
auto It = SunkCopies.find(ExitBlock);
if (It != SunkCopies.end())
New = It->second;
else
New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock(
*I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
return New;
}
static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
BasicBlock *BB = PN->getParent();
if (!BB->canSplitPredecessors())
return false;
if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
return false;
for (BasicBlock *BBPred : predecessors(BB)) {
if (isa<IndirectBrInst>(BBPred->getTerminator()))
return false;
}
return true;
}
static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
LoopInfo *LI, const Loop *CurLoop,
LoopSafetyInfo *SafetyInfo,
MemorySSAUpdater *MSSAU) {
#ifndef NDEBUG
SmallVector<BasicBlock *, 32> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
ExitBlocks.end());
#endif
BasicBlock *ExitBB = PN->getParent();
assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
const auto &BlockColors = SafetyInfo->getBlockColors();
SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
while (!PredBBs.empty()) {
BasicBlock *PredBB = *PredBBs.begin();
assert(CurLoop->contains(PredBB) &&
"Expect all predecessors are in the loop");
if (PN->getBasicBlockIndex(PredBB) >= 0) {
BasicBlock *NewPred = SplitBlockPredecessors(
ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
if (!BlockColors.empty())
SafetyInfo->copyColors(NewPred, PredBB);
}
PredBBs.remove(PredBB);
}
}
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
BlockFrequencyInfo *BFI, const Loop *CurLoop,
ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU,
OptimizationRemarkEmitter *ORE) {
bool Changed = false;
LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
SmallPtrSet<Instruction *, 8> VisitedUsers;
for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
auto *User = cast<Instruction>(*UI);
Use &U = UI.getUse();
++UI;
if (VisitedUsers.count(User) || CurLoop->contains(User))
continue;
if (!DT->isReachableFromEntry(User->getParent())) {
U = PoisonValue::get(I.getType());
Changed = true;
continue;
}
PHINode *PN = cast<PHINode>(User);
BasicBlock *BB = PN->getIncomingBlock(U);
if (!DT->isReachableFromEntry(BB)) {
U = PoisonValue::get(I.getType());
Changed = true;
continue;
}
VisitedUsers.insert(PN);
if (isTriviallyReplaceablePHI(*PN, I))
continue;
if (!canSplitPredecessors(PN, SafetyInfo))
return Changed;
splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, &MSSAU);
UI = I.user_begin();
UE = I.user_end();
}
if (VisitedUsers.empty())
return Changed;
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
<< "sinking " << ore::NV("Inst", &I);
});
if (isa<LoadInst>(I))
++NumMovedLoads;
else if (isa<CallInst>(I))
++NumMovedCalls;
++NumSunk;
#ifndef NDEBUG
SmallVector<BasicBlock *, 32> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
ExitBlocks.end());
#endif
SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
for (auto *UI : Users) {
auto *User = cast<Instruction>(UI);
if (CurLoop->contains(User))
continue;
PHINode *PN = cast<PHINode>(User);
assert(ExitBlockSet.count(PN->getParent()) &&
"The LCSSA PHI is not in an exit block!");
Instruction *New = sinkThroughTriviallyReplaceablePHI(
PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
PN->replaceAllUsesWith(New);
eraseInstruction(*PN, *SafetyInfo, MSSAU);
Changed = true;
}
return Changed;
}
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
OptimizationRemarkEmitter *ORE) {
LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
<< I << "\n");
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
<< ore::NV("Inst", &I);
});
if ((I.hasMetadataOtherThanDebugLoc() || isa<CallInst>(I)) &&
!SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
I.dropUndefImplyingAttrsAndUnknownMetadata();
if (isa<PHINode>(I))
moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE);
else
moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE);
I.updateLocationAfterHoist();
if (isa<LoadInst>(I))
++NumMovedLoads;
else if (isa<CallInst>(I))
++NumMovedCalls;
++NumHoisted;
}
static bool isSafeToExecuteUnconditionally(
Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
bool AllowSpeculation) {
if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
return true;
bool GuaranteedToExecute =
SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
if (!GuaranteedToExecute) {
auto *LI = dyn_cast<LoadInst>(&Inst);
if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
ORE->emit([&]() {
return OptimizationRemarkMissed(
DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
<< "failed to hoist load with loop-invariant address "
"because load is conditionally executed";
});
}
return GuaranteedToExecute;
}
namespace {
class LoopPromoter : public LoadAndStorePromoter {
Value *SomePtr; const SmallSetVector<Value *, 8> &PointerMustAliases;
SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
SmallVectorImpl<Instruction *> &LoopInsertPts;
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
PredIteratorCache &PredCache;
MemorySSAUpdater &MSSAU;
LoopInfo &LI;
DebugLoc DL;
Align Alignment;
bool UnorderedAtomic;
AAMDNodes AATags;
ICFLoopSafetyInfo &SafetyInfo;
bool CanInsertStoresInExitBlocks;
Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, BB))
return V;
Instruction *I = cast<Instruction>(V);
PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
I->getName() + ".lcssa", &BB->front());
for (BasicBlock *Pred : PredCache.get(BB))
PN->addIncoming(I, Pred);
return PN;
}
public:
LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
const SmallSetVector<Value *, 8> &PMA,
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP,
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
SafetyInfo(SafetyInfo),
CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {}
bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction *> &) const override {
Value *Ptr;
if (LoadInst *LI = dyn_cast<LoadInst>(I))
Ptr = LI->getOperand(0);
else
Ptr = cast<StoreInst>(I)->getPointerOperand();
return PointerMustAliases.count(Ptr);
}
void insertStoresInLoopExitBlocks() {
for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = LoopExitBlocks[i];
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
Instruction *InsertPos = LoopInsertPts[i];
StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
if (UnorderedAtomic)
NewSI->setOrdering(AtomicOrdering::Unordered);
NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
if (AATags)
NewSI->setAAMetadata(AATags);
MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
MemoryAccess *NewMemAcc;
if (!MSSAInsertPoint) {
NewMemAcc = MSSAU.createMemoryAccessInBB(
NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
} else {
NewMemAcc =
MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
}
MSSAInsertPts[i] = NewMemAcc;
MSSAU.insertDef(cast<MemoryDef>(NewMemAcc), true);
}
}
void doExtraRewritesBeforeFinalDeletion() override {
if (CanInsertStoresInExitBlocks)
insertStoresInLoopExitBlocks();
}
void instructionDeleted(Instruction *I) const override {
SafetyInfo.removeInstruction(I);
MSSAU.removeMemoryAccess(I);
}
bool shouldDelete(Instruction *I) const override {
if (isa<StoreInst>(I))
return CanInsertStoresInExitBlocks;
return true;
}
};
bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
DominatorTree *DT) {
return !PointerMayBeCapturedBefore(V, true,
true,
L->getHeader()->getTerminator(), DT);
}
bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L,
DominatorTree *DT) {
bool RequiresNoCaptureBeforeUnwind;
if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind))
return false;
return !RequiresNoCaptureBeforeUnwind ||
isNotCapturedBeforeOrInLoop(Object, L, DT);
}
}
bool llvm::promoteLoopAccessesToScalars(
const SmallSetVector<Value *, 8> &PointerMustAliases,
SmallVectorImpl<BasicBlock *> &ExitBlocks,
SmallVectorImpl<Instruction *> &InsertPts,
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr &&
"Unexpected Input to promoteLoopAccessesToScalars");
Value *SomePtr = *PointerMustAliases.begin();
BasicBlock *Preheader = CurLoop->getLoopPreheader();
bool DereferenceableInPH = false;
bool SafeToInsertStore = false;
bool StoreIsGuanteedToExecute = false;
bool FoundLoadToPromote = false;
SmallVector<Instruction *, 64> LoopUses;
Align Alignment;
bool SawUnorderedAtomic = false;
bool SawNotAtomic = false;
AAMDNodes AATags;
const DataLayout &MDL = Preheader->getModule()->getDataLayout();
bool IsKnownThreadLocalObject = false;
if (SafetyInfo->anyBlockMayThrow()) {
Value *Object = getUnderlyingObject(SomePtr);
if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT))
return false;
IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
}
Type *AccessTy = nullptr;
for (Value *ASIV : PointerMustAliases) {
for (Use &U : ASIV->uses()) {
Instruction *UI = dyn_cast<Instruction>(U.getUser());
if (!UI || !CurLoop->contains(UI))
continue;
if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
if (!Load->isUnordered())
return false;
SawUnorderedAtomic |= Load->isAtomic();
SawNotAtomic |= !Load->isAtomic();
FoundLoadToPromote = true;
Align InstAlignment = Load->getAlign();
if (!DereferenceableInPH || (InstAlignment > Alignment))
if (isSafeToExecuteUnconditionally(
*Load, DT, TLI, CurLoop, SafetyInfo, ORE,
Preheader->getTerminator(), AllowSpeculation)) {
DereferenceableInPH = true;
Alignment = std::max(Alignment, InstAlignment);
}
} else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
continue;
if (!Store->isUnordered())
return false;
SawUnorderedAtomic |= Store->isAtomic();
SawNotAtomic |= !Store->isAtomic();
Align InstAlignment = Store->getAlign();
bool GuaranteedToExecute =
SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop);
StoreIsGuanteedToExecute |= GuaranteedToExecute;
if (!DereferenceableInPH || !SafeToInsertStore ||
(InstAlignment > Alignment)) {
if (GuaranteedToExecute) {
DereferenceableInPH = true;
SafeToInsertStore = true;
Alignment = std::max(Alignment, InstAlignment);
}
}
if (!SafeToInsertStore)
SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
return DT->dominates(Store->getParent(), Exit);
});
if (!DereferenceableInPH) {
DereferenceableInPH = isDereferenceableAndAlignedPointer(
Store->getPointerOperand(), Store->getValueOperand()->getType(),
Store->getAlign(), MDL, Preheader->getTerminator(), DT, TLI);
}
} else
return false;
if (!AccessTy)
AccessTy = getLoadStoreType(UI);
else if (AccessTy != getLoadStoreType(UI))
return false;
if (LoopUses.empty()) {
AATags = UI->getAAMetadata();
} else if (AATags) {
AATags = AATags.merge(UI->getAAMetadata());
}
LoopUses.push_back(UI);
}
}
if (SawUnorderedAtomic && SawNotAtomic)
return false;
if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy))
return false;
if (!DereferenceableInPH)
return false;
if (!SafeToInsertStore) {
if (IsKnownThreadLocalObject)
SafeToInsertStore = true;
else {
Value *Object = getUnderlyingObject(SomePtr);
SafeToInsertStore =
(isNoAliasCall(Object) || isa<AllocaInst>(Object)) &&
isNotCapturedBeforeOrInLoop(Object, CurLoop, DT);
}
}
if (!SafeToInsertStore && !FoundLoadToPromote)
return false;
if (SafeToInsertStore)
LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
<< '\n');
else
LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
<< '\n');
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
LoopUses[0])
<< "Moving accesses to memory location out of the loop";
});
++NumPromoted;
std::vector<const DILocation *> LoopUsesLocs;
for (auto U : LoopUses)
LoopUsesLocs.push_back(U->getDebugLoc().get());
auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs));
SmallVector<PHINode *, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
Alignment, SawUnorderedAtomic, AATags, *SafetyInfo,
SafeToInsertStore);
LoadInst *PreheaderLoad = nullptr;
if (FoundLoadToPromote || !StoreIsGuanteedToExecute) {
PreheaderLoad =
new LoadInst(AccessTy, SomePtr, SomePtr->getName() + ".promoted",
Preheader->getTerminator());
if (SawUnorderedAtomic)
PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
PreheaderLoad->setAlignment(Alignment);
PreheaderLoad->setDebugLoc(DebugLoc());
if (AATags)
PreheaderLoad->setAAMetadata(AATags);
MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB(
PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
MSSAU.insertUse(NewMemUse, true);
SSA.AddAvailableValue(Preheader, PreheaderLoad);
} else {
SSA.AddAvailableValue(Preheader, PoisonValue::get(AccessTy));
}
if (VerifyMemorySSA)
MSSAU.getMemorySSA()->verifyMemorySSA();
Promoter.run(LoopUses);
if (VerifyMemorySSA)
MSSAU.getMemorySSA()->verifyMemorySSA();
if (PreheaderLoad && PreheaderLoad->use_empty())
eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
return true;
}
static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
function_ref<void(Instruction *)> Fn) {
for (const BasicBlock *BB : L->blocks())
if (const auto *Accesses = MSSA->getBlockAccesses(BB))
for (const auto &Access : *Accesses)
if (const auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
Fn(MUD->getMemoryInst());
}
static SmallVector<SmallSetVector<Value *, 8>, 0>
collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
AliasSetTracker AST(*AA);
auto IsPotentiallyPromotable = [L](const Instruction *I) {
if (const auto *SI = dyn_cast<StoreInst>(I))
return L->isLoopInvariant(SI->getPointerOperand());
if (const auto *LI = dyn_cast<LoadInst>(I))
return L->isLoopInvariant(LI->getPointerOperand());
return false;
};
SmallPtrSet<Value *, 16> AttemptingPromotion;
foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
if (IsPotentiallyPromotable(I)) {
AttemptingPromotion.insert(I);
AST.add(I);
}
});
SmallVector<const AliasSet *, 8> Sets;
for (AliasSet &AS : AST)
if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
Sets.push_back(&AS);
if (Sets.empty())
return {};
foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
if (AttemptingPromotion.contains(I))
return;
llvm::erase_if(Sets, [&](const AliasSet *AS) {
return AS->aliasesUnknownInst(I, *AA);
});
});
SmallVector<SmallSetVector<Value *, 8>, 0> Result;
for (const AliasSet *Set : Sets) {
SmallSetVector<Value *, 8> PointerMustAliases;
for (const auto &ASI : *Set)
PointerMustAliases.insert(ASI.getValue());
Result.push_back(std::move(PointerMustAliases));
}
return Result;
}
static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
Loop *CurLoop, Instruction &I,
SinkAndHoistLICMFlags &Flags) {
if (!Flags.getIsSink()) {
MemoryAccess *Source;
if (Flags.tooManyClobberingCalls())
Source = MU->getDefiningAccess();
else {
Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
Flags.incrementClobberingCalls();
}
return !MSSA->isLiveOnEntryDef(Source) &&
CurLoop->contains(Source->getBlock());
}
if (Flags.tooManyMemoryAccesses())
return true;
for (auto *BB : CurLoop->getBlocks())
if (pointerInvalidatedByBlock(*BB, *MSSA, *MU))
return true;
if (!CurLoop->contains(&I))
return pointerInvalidatedByBlock(*I.getParent(), *MSSA, *MU);
return false;
}
bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) {
if (const auto *Accesses = MSSA.getBlockDefs(&BB))
for (const auto &MA : *Accesses)
if (const auto *MD = dyn_cast<MemoryDef>(&MA))
if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
return true;
return false;
}
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
return LI->getLoopFor(BB) != CurLoop;
}