#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionNormalization.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <limits>
#include <map>
#include <numeric>
#include <utility>
using namespace llvm;
#define DEBUG_TYPE "loop-reduce"
static const unsigned MaxIVUsers = 200;
static const unsigned MaxSCEVSalvageExpressionSize = 64;
static cl::opt<bool> EnablePhiElim(
"enable-lsr-phielim", cl::Hidden, cl::init(true),
cl::desc("Enable LSR phi elimination"));
static cl::opt<bool> InsnsCost(
"lsr-insns-cost", cl::Hidden, cl::init(true),
cl::desc("Add instruction count to a LSR cost model"));
static cl::opt<bool> LSRExpNarrow(
"lsr-exp-narrow", cl::Hidden, cl::init(false),
cl::desc("Narrow LSR complex solution using"
" expectation of registers number"));
static cl::opt<bool> FilterSameScaledReg(
"lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true),
cl::desc("Narrow LSR search space by filtering non-optimal formulae"
" with the same ScaledReg and Scale"));
static cl::opt<TTI::AddressingModeKind> PreferredAddresingMode(
"lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None),
cl::desc("A flag that overrides the target's preferred addressing mode."),
cl::values(clEnumValN(TTI::AMK_None,
"none",
"Don't prefer any addressing mode"),
clEnumValN(TTI::AMK_PreIndexed,
"preindexed",
"Prefer pre-indexed addressing mode"),
clEnumValN(TTI::AMK_PostIndexed,
"postindexed",
"Prefer post-indexed addressing mode")));
static cl::opt<unsigned> ComplexityLimit(
"lsr-complexity-limit", cl::Hidden,
cl::init(std::numeric_limits<uint16_t>::max()),
cl::desc("LSR search space complexity limit"));
static cl::opt<unsigned> SetupCostDepthLimit(
"lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),
cl::desc("The limit on recursion depth for LSRs setup cost"));
#ifndef NDEBUG
static cl::opt<bool> StressIVChain(
"stress-ivchain", cl::Hidden, cl::init(false),
cl::desc("Stress test LSR IV chains"));
#else
static bool StressIVChain = false;
#endif
namespace {
struct MemAccessTy {
static const unsigned UnknownAddressSpace =
std::numeric_limits<unsigned>::max();
Type *MemTy = nullptr;
unsigned AddrSpace = UnknownAddressSpace;
MemAccessTy() = default;
MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
bool operator==(MemAccessTy Other) const {
return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
}
bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
static MemAccessTy getUnknown(LLVMContext &Ctx,
unsigned AS = UnknownAddressSpace) {
return MemAccessTy(Type::getVoidTy(Ctx), AS);
}
Type *getType() { return MemTy; }
};
class RegSortData {
public:
SmallBitVector UsedByIndices;
void print(raw_ostream &OS) const;
void dump() const;
};
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void RegSortData::print(raw_ostream &OS) const {
OS << "[NumUses=" << UsedByIndices.count() << ']';
}
LLVM_DUMP_METHOD void RegSortData::dump() const {
print(errs()); errs() << '\n';
}
#endif
namespace {
class RegUseTracker {
using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
RegUsesTy RegUsesMap;
SmallVector<const SCEV *, 16> RegSequence;
public:
void countRegister(const SCEV *Reg, size_t LUIdx);
void dropRegister(const SCEV *Reg, size_t LUIdx);
void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
void clear();
using iterator = SmallVectorImpl<const SCEV *>::iterator;
using const_iterator = SmallVectorImpl<const SCEV *>::const_iterator;
iterator begin() { return RegSequence.begin(); }
iterator end() { return RegSequence.end(); }
const_iterator begin() const { return RegSequence.begin(); }
const_iterator end() const { return RegSequence.end(); }
};
}
void
RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
std::pair<RegUsesTy::iterator, bool> Pair =
RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
RegSortData &RSD = Pair.first->second;
if (Pair.second)
RegSequence.push_back(Reg);
RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
RSD.UsedByIndices.set(LUIdx);
}
void
RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
RegUsesTy::iterator It = RegUsesMap.find(Reg);
assert(It != RegUsesMap.end());
RegSortData &RSD = It->second;
assert(RSD.UsedByIndices.size() > LUIdx);
RSD.UsedByIndices.reset(LUIdx);
}
void
RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
assert(LUIdx <= LastLUIdx);
for (auto &Pair : RegUsesMap) {
SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
if (LUIdx < UsedByIndices.size())
UsedByIndices[LUIdx] =
LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false;
UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
}
}
bool
RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
if (I == RegUsesMap.end())
return false;
const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
int i = UsedByIndices.find_first();
if (i == -1) return false;
if ((size_t)i != LUIdx) return true;
return UsedByIndices.find_next(i) != -1;
}
const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
assert(I != RegUsesMap.end() && "Unknown register!");
return I->second.UsedByIndices;
}
void RegUseTracker::clear() {
RegUsesMap.clear();
RegSequence.clear();
}
namespace {
struct Formula {
GlobalValue *BaseGV = nullptr;
int64_t BaseOffset = 0;
bool HasBaseReg = false;
int64_t Scale = 0;
SmallVector<const SCEV *, 4> BaseRegs;
const SCEV *ScaledReg = nullptr;
int64_t UnfoldedOffset = 0;
Formula() = default;
void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
bool isCanonical(const Loop &L) const;
void canonicalize(const Loop &L);
bool unscale();
bool hasZeroEnd() const;
size_t getNumRegs() const;
Type *getType() const;
void deleteBaseReg(const SCEV *&S);
bool referencesReg(const SCEV *S) const;
bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
const RegUseTracker &RegUses) const;
void print(raw_ostream &OS) const;
void dump() const;
};
}
static void DoInitialMatch(const SCEV *S, Loop *L,
SmallVectorImpl<const SCEV *> &Good,
SmallVectorImpl<const SCEV *> &Bad,
ScalarEvolution &SE) {
if (SE.properlyDominates(S, L->getHeader())) {
Good.push_back(S);
return;
}
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (const SCEV *S : Add->operands())
DoInitialMatch(S, L, Good, Bad, SE);
return;
}
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
if (!AR->getStart()->isZero() && AR->isAffine()) {
DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
AR->getLoop(), SCEV::FlagAnyWrap),
L, Good, Bad, SE);
return;
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
if (Mul->getOperand(0)->isAllOnesValue()) {
SmallVector<const SCEV *, 4> Ops(drop_begin(Mul->operands()));
const SCEV *NewMul = SE.getMulExpr(Ops);
SmallVector<const SCEV *, 4> MyGood;
SmallVector<const SCEV *, 4> MyBad;
DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
SE.getEffectiveSCEVType(NewMul->getType())));
for (const SCEV *S : MyGood)
Good.push_back(SE.getMulExpr(NegOne, S));
for (const SCEV *S : MyBad)
Bad.push_back(SE.getMulExpr(NegOne, S));
return;
}
Bad.push_back(S);
}
void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
SmallVector<const SCEV *, 4> Good;
SmallVector<const SCEV *, 4> Bad;
DoInitialMatch(S, L, Good, Bad, SE);
if (!Good.empty()) {
const SCEV *Sum = SE.getAddExpr(Good);
if (!Sum->isZero())
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
if (!Bad.empty()) {
const SCEV *Sum = SE.getAddExpr(Bad);
if (!Sum->isZero())
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
canonicalize(*L);
}
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L) {
return SCEVExprContains(S, [&L](const SCEV *S) {
return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
});
}
bool Formula::isCanonical(const Loop &L) const {
if (!ScaledReg)
return BaseRegs.size() <= 1;
if (Scale != 1)
return true;
if (Scale == 1 && BaseRegs.empty())
return false;
if (containsAddRecDependentOnLoop(ScaledReg, L))
return true;
return none_of(BaseRegs, [&L](const SCEV *S) {
return containsAddRecDependentOnLoop(S, L);
});
}
void Formula::canonicalize(const Loop &L) {
if (isCanonical(L))
return;
if (BaseRegs.empty()) {
assert(ScaledReg && "Expected 1*reg => reg");
assert(Scale == 1 && "Expected 1*reg => reg");
BaseRegs.push_back(ScaledReg);
Scale = 0;
ScaledReg = nullptr;
return;
}
if (!ScaledReg) {
ScaledReg = BaseRegs.pop_back_val();
Scale = 1;
}
if (!containsAddRecDependentOnLoop(ScaledReg, L)) {
auto I = find_if(BaseRegs, [&L](const SCEV *S) {
return containsAddRecDependentOnLoop(S, L);
});
if (I != BaseRegs.end())
std::swap(ScaledReg, *I);
}
assert(isCanonical(L) && "Failed to canonicalize?");
}
bool Formula::unscale() {
if (Scale != 1)
return false;
Scale = 0;
BaseRegs.push_back(ScaledReg);
ScaledReg = nullptr;
return true;
}
bool Formula::hasZeroEnd() const {
if (UnfoldedOffset || BaseOffset)
return false;
if (BaseRegs.size() != 1 || ScaledReg)
return false;
return true;
}
size_t Formula::getNumRegs() const {
return !!ScaledReg + BaseRegs.size();
}
Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
BaseGV ? BaseGV->getType() :
nullptr;
}
void Formula::deleteBaseReg(const SCEV *&S) {
if (&S != &BaseRegs.back())
std::swap(S, BaseRegs.back());
BaseRegs.pop_back();
}
bool Formula::referencesReg(const SCEV *S) const {
return S == ScaledReg || is_contained(BaseRegs, S);
}
bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
const RegUseTracker &RegUses) const {
if (ScaledReg)
if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
return true;
for (const SCEV *BaseReg : BaseRegs)
if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
return true;
return false;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Formula::print(raw_ostream &OS) const {
bool First = true;
if (BaseGV) {
if (!First) OS << " + "; else First = false;
BaseGV->printAsOperand(OS, false);
}
if (BaseOffset != 0) {
if (!First) OS << " + "; else First = false;
OS << BaseOffset;
}
for (const SCEV *BaseReg : BaseRegs) {
if (!First) OS << " + "; else First = false;
OS << "reg(" << *BaseReg << ')';
}
if (HasBaseReg && BaseRegs.empty()) {
if (!First) OS << " + "; else First = false;
OS << "**error: HasBaseReg**";
} else if (!HasBaseReg && !BaseRegs.empty()) {
if (!First) OS << " + "; else First = false;
OS << "**error: !HasBaseReg**";
}
if (Scale != 0) {
if (!First) OS << " + "; else First = false;
OS << Scale << "*reg(";
if (ScaledReg)
OS << *ScaledReg;
else
OS << "<unknown>";
OS << ')';
}
if (UnfoldedOffset != 0) {
if (!First) OS << " + ";
OS << "imm(" << UnfoldedOffset << ')';
}
}
LLVM_DUMP_METHOD void Formula::dump() const {
print(errs()); errs() << '\n';
}
#endif
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
}
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
}
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(),
SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
}
static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
ScalarEvolution &SE,
bool IgnoreSignificantBits = false) {
if (LHS == RHS)
return SE.getConstant(LHS->getType(), 1);
const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
if (RC) {
const APInt &RA = RC->getAPInt();
if (RA.isAllOnes()) {
if (LHS->getType()->isPointerTy())
return nullptr;
return SE.getMulExpr(LHS, RC);
}
if (RA == 1)
return LHS;
}
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
if (!RC)
return nullptr;
const APInt &LA = C->getAPInt();
const APInt &RA = RC->getAPInt();
if (LA.srem(RA) != 0)
return nullptr;
return SE.getConstant(LA.sdiv(RA));
}
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) {
const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
IgnoreSignificantBits);
if (!Step) return nullptr;
const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
IgnoreSignificantBits);
if (!Start) return nullptr;
return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
}
return nullptr;
}
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
SmallVector<const SCEV *, 8> Ops;
for (const SCEV *S : Add->operands()) {
const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits);
if (!Op) return nullptr;
Ops.push_back(Op);
}
return SE.getAddExpr(Ops);
}
return nullptr;
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
if (const SCEVMulExpr *MulRHS = dyn_cast<SCEVMulExpr>(RHS)) {
if (IgnoreSignificantBits || isMulSExtable(MulRHS, SE)) {
const SCEVConstant *LC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
const SCEVConstant *RC =
dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
if (LC && RC) {
SmallVector<const SCEV *, 4> LOps(drop_begin(Mul->operands()));
SmallVector<const SCEV *, 4> ROps(drop_begin(MulRHS->operands()));
if (LOps == ROps)
return getExactSDiv(LC, RC, SE, IgnoreSignificantBits);
}
}
}
SmallVector<const SCEV *, 4> Ops;
bool Found = false;
for (const SCEV *S : Mul->operands()) {
if (!Found)
if (const SCEV *Q = getExactSDiv(S, RHS, SE,
IgnoreSignificantBits)) {
S = Q;
Found = true;
}
Ops.push_back(S);
}
return Found ? SE.getMulExpr(Ops) : nullptr;
}
return nullptr;
}
return nullptr;
}
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
if (C->getAPInt().getMinSignedBits() <= 64) {
S = SE.getConstant(C->getType(), 0);
return C->getValue()->getSExtValue();
}
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->operands());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
if (Result != 0)
S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->operands());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
if (Result != 0)
S = SE.getAddRecExpr(NewOps, AR->getLoop(),
SCEV::FlagAnyWrap);
return Result;
}
return 0;
}
static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
S = SE.getConstant(GV->getType(), 0);
return GV;
}
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->operands());
GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
if (Result)
S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->operands());
GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
if (Result)
S = SE.getAddRecExpr(NewOps, AR->getLoop(),
SCEV::FlagAnyWrap);
return Result;
}
return nullptr;
}
static bool isAddressUse(const TargetTransformInfo &TTI,
Instruction *Inst, Value *OperandVal) {
bool isAddress = isa<LoadInst>(Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->getPointerOperand() == OperandVal)
isAddress = true;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
switch (II->getIntrinsicID()) {
case Intrinsic::memset:
case Intrinsic::prefetch:
case Intrinsic::masked_load:
if (II->getArgOperand(0) == OperandVal)
isAddress = true;
break;
case Intrinsic::masked_store:
if (II->getArgOperand(1) == OperandVal)
isAddress = true;
break;
case Intrinsic::memmove:
case Intrinsic::memcpy:
if (II->getArgOperand(0) == OperandVal ||
II->getArgOperand(1) == OperandVal)
isAddress = true;
break;
default: {
MemIntrinsicInfo IntrInfo;
if (TTI.getTgtMemIntrinsic(II, IntrInfo)) {
if (IntrInfo.PtrVal == OperandVal)
isAddress = true;
}
}
}
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
if (RMW->getPointerOperand() == OperandVal)
isAddress = true;
} else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
if (CmpX->getPointerOperand() == OperandVal)
isAddress = true;
}
return isAddress;
}
static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
Instruction *Inst, Value *OperandVal) {
MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
AccessTy.MemTy = SI->getOperand(0)->getType();
AccessTy.AddrSpace = SI->getPointerAddressSpace();
} else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
AccessTy.AddrSpace = LI->getPointerAddressSpace();
} else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
AccessTy.AddrSpace = RMW->getPointerAddressSpace();
} else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
switch (II->getIntrinsicID()) {
case Intrinsic::prefetch:
case Intrinsic::memset:
AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
AccessTy.MemTy = OperandVal->getType();
break;
case Intrinsic::memmove:
case Intrinsic::memcpy:
AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace();
AccessTy.MemTy = OperandVal->getType();
break;
case Intrinsic::masked_load:
AccessTy.AddrSpace =
II->getArgOperand(0)->getType()->getPointerAddressSpace();
break;
case Intrinsic::masked_store:
AccessTy.MemTy = II->getOperand(0)->getType();
AccessTy.AddrSpace =
II->getArgOperand(1)->getType()->getPointerAddressSpace();
break;
default: {
MemIntrinsicInfo IntrInfo;
if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) {
AccessTy.AddrSpace
= IntrInfo.PtrVal->getType()->getPointerAddressSpace();
}
break;
}
}
}
if (PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
AccessTy.MemTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
PTy->getAddressSpace());
return AccessTy;
}
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {
if (SE.isSCEVable(PN.getType()) &&
(SE.getEffectiveSCEVType(PN.getType()) ==
SE.getEffectiveSCEVType(AR->getType())) &&
SE.getSCEV(&PN) == AR)
return true;
}
return false;
}
static bool isHighCostExpansion(const SCEV *S,
SmallPtrSetImpl<const SCEV*> &Processed,
ScalarEvolution &SE) {
switch (S->getSCEVType()) {
case scUnknown:
case scConstant:
return false;
case scTruncate:
return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
Processed, SE);
case scZeroExtend:
return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
Processed, SE);
case scSignExtend:
return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
Processed, SE);
default:
break;
}
if (!Processed.insert(S).second)
return false;
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (const SCEV *S : Add->operands()) {
if (isHighCostExpansion(S, Processed, SE))
return true;
}
return false;
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
if (Mul->getNumOperands() == 2) {
if (isa<SCEVConstant>(Mul->getOperand(0)))
return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
Value *UVal = U->getValue();
for (User *UR : UVal->users()) {
Instruction *UI = dyn_cast<Instruction>(UR);
if (UI && UI->getOpcode() == Instruction::Mul &&
SE.isSCEVable(UI->getType())) {
return SE.getSCEV(UI) == Mul;
}
}
}
}
}
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
if (isExistingPhi(AR, SE))
return false;
}
return true;
}
namespace {
class LSRUse;
}
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F);
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F,
const Loop &L);
namespace {
class Cost {
const Loop *L = nullptr;
ScalarEvolution *SE = nullptr;
const TargetTransformInfo *TTI = nullptr;
TargetTransformInfo::LSRCost C;
TTI::AddressingModeKind AMK = TTI::AMK_None;
public:
Cost() = delete;
Cost(const Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
TTI::AddressingModeKind AMK) :
L(L), SE(&SE), TTI(&TTI), AMK(AMK) {
C.Insns = 0;
C.NumRegs = 0;
C.AddRecCost = 0;
C.NumIVMuls = 0;
C.NumBaseAdds = 0;
C.ImmCost = 0;
C.SetupCost = 0;
C.ScaleCost = 0;
}
bool isLess(const Cost &Other);
void Lose();
#ifndef NDEBUG
bool isValid() {
return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds
| C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u)
|| ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds
& C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u);
}
#endif
bool isLoser() {
assert(isValid() && "invalid cost");
return C.NumRegs == ~0u;
}
void RateFormula(const Formula &F,
SmallPtrSetImpl<const SCEV *> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const LSRUse &LU,
SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
void print(raw_ostream &OS) const;
void dump() const;
private:
void RateRegister(const Formula &F, const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs);
void RatePrimaryRegister(const Formula &F, const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
SmallPtrSetImpl<const SCEV *> *LoserRegs);
};
struct LSRFixup {
Instruction *UserInst = nullptr;
Value *OperandValToReplace = nullptr;
PostIncLoopSet PostIncLoops;
int64_t Offset = 0;
LSRFixup() = default;
bool isUseFullyOutsideLoop(const Loop *L) const;
void print(raw_ostream &OS) const;
void dump() const;
};
struct UniquifierDenseMapInfo {
static SmallVector<const SCEV *, 4> getEmptyKey() {
SmallVector<const SCEV *, 4> V;
V.push_back(reinterpret_cast<const SCEV *>(-1));
return V;
}
static SmallVector<const SCEV *, 4> getTombstoneKey() {
SmallVector<const SCEV *, 4> V;
V.push_back(reinterpret_cast<const SCEV *>(-2));
return V;
}
static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
}
static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
const SmallVector<const SCEV *, 4> &RHS) {
return LHS == RHS;
}
};
class LSRUse {
DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
public:
enum KindType {
Basic, Special, Address, ICmpZero };
using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
KindType Kind;
MemAccessTy AccessTy;
SmallVector<LSRFixup, 8> Fixups;
int64_t MinOffset = std::numeric_limits<int64_t>::max();
int64_t MaxOffset = std::numeric_limits<int64_t>::min();
bool AllFixupsOutsideLoop = true;
bool RigidFormula = false;
Type *WidestFixupType = nullptr;
SmallVector<Formula, 12> Formulae;
SmallPtrSet<const SCEV *, 4> Regs;
LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {}
LSRFixup &getNewFixup() {
Fixups.push_back(LSRFixup());
return Fixups.back();
}
void pushFixup(LSRFixup &f) {
Fixups.push_back(f);
if (f.Offset > MaxOffset)
MaxOffset = f.Offset;
if (f.Offset < MinOffset)
MinOffset = f.Offset;
}
bool HasFormulaWithSameRegs(const Formula &F) const;
float getNotSelectedProbability(const SCEV *Reg) const;
bool InsertFormula(const Formula &F, const Loop &L);
void DeleteFormula(Formula &F);
void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
void print(raw_ostream &OS) const;
void dump() const;
};
}
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
Instruction *Fixup = nullptr);
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) {
if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
return 1;
if (Depth == 0)
return 0;
if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
return getSetupCost(S->getStart(), Depth - 1);
if (auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
return getSetupCost(S->getOperand(), Depth - 1);
if (auto S = dyn_cast<SCEVNAryExpr>(Reg))
return std::accumulate(S->op_begin(), S->op_end(), 0,
[&](unsigned i, const SCEV *Reg) {
return i + getSetupCost(Reg, Depth - 1);
});
if (auto S = dyn_cast<SCEVUDivExpr>(Reg))
return getSetupCost(S->getLHS(), Depth - 1) +
getSetupCost(S->getRHS(), Depth - 1);
return 0;
}
void Cost::RateRegister(const Formula &F, const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
if (AR->getLoop() != L) {
if (isExistingPhi(AR, *SE) && AMK != TTI::AMK_PostIndexed)
return;
if (!AR->getLoop()->contains(L)) {
Lose();
return;
}
++C.NumRegs;
return;
}
unsigned LoopCost = 1;
if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||
TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {
if (AMK == TTI::AMK_PreIndexed) {
if (auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
if (Step->getAPInt() == F.BaseOffset)
LoopCost = 0;
} else if (AMK == TTI::AMK_PostIndexed) {
const SCEV *LoopStep = AR->getStepRecurrence(*SE);
if (isa<SCEVConstant>(LoopStep)) {
const SCEV *LoopStart = AR->getStart();
if (!isa<SCEVConstant>(LoopStart) &&
SE->isLoopInvariant(LoopStart, L))
LoopCost = 0;
}
}
}
C.AddRecCost += LoopCost;
if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
if (!Regs.count(AR->getOperand(1))) {
RateRegister(F, AR->getOperand(1), Regs);
if (isLoser())
return;
}
}
}
++C.NumRegs;
C.SetupCost += getSetupCost(Reg, SetupCostDepthLimit);
C.SetupCost = std::min<unsigned>(C.SetupCost, 1 << 16);
C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
SE->hasComputableLoopEvolution(Reg, L);
}
void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
SmallPtrSetImpl<const SCEV *> *LoserRegs) {
if (LoserRegs && LoserRegs->count(Reg)) {
Lose();
return;
}
if (Regs.insert(Reg).second) {
RateRegister(F, Reg, Regs);
if (LoserRegs && isLoser())
LoserRegs->insert(Reg);
}
}
void Cost::RateFormula(const Formula &F,
SmallPtrSetImpl<const SCEV *> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const LSRUse &LU,
SmallPtrSetImpl<const SCEV *> *LoserRegs) {
if (isLoser())
return;
assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
unsigned PrevAddRecCost = C.AddRecCost;
unsigned PrevNumRegs = C.NumRegs;
unsigned PrevNumBaseAdds = C.NumBaseAdds;
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
Lose();
return;
}
RatePrimaryRegister(F, ScaledReg, Regs, LoserRegs);
if (isLoser())
return;
}
for (const SCEV *BaseReg : F.BaseRegs) {
if (VisitedRegs.count(BaseReg)) {
Lose();
return;
}
RatePrimaryRegister(F, BaseReg, Regs, LoserRegs);
if (isLoser())
return;
}
size_t NumBaseParts = F.getNumRegs();
if (NumBaseParts > 1)
C.NumBaseAdds +=
NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(*TTI, LU, F)));
C.NumBaseAdds += (F.UnfoldedOffset != 0);
C.ScaleCost += *getScalingFactorCost(*TTI, LU, F, *L).getValue();
for (const LSRFixup &Fixup : LU.Fixups) {
int64_t O = Fixup.Offset;
int64_t Offset = (uint64_t)O + F.BaseOffset;
if (F.BaseGV)
C.ImmCost += 64; else if (Offset != 0)
C.ImmCost += APInt(64, Offset, true).getMinSignedBits();
if (LU.Kind == LSRUse::Address && Offset != 0 &&
!isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))
C.NumBaseAdds++;
}
if (!InsnsCost) {
assert(isValid() && "invalid cost");
return;
}
unsigned TTIRegNum = TTI->getNumberOfRegisters(
TTI->getRegisterClassForType(false, F.getType())) - 1;
if (C.NumRegs > TTIRegNum) {
if (PrevNumRegs > TTIRegNum)
C.Insns += (C.NumRegs - PrevNumRegs);
else
C.Insns += (C.NumRegs - TTIRegNum);
}
if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() &&
!TTI->canMacroFuseCmp())
C.Insns++;
C.Insns += (C.AddRecCost - PrevAddRecCost);
if (LU.Kind != LSRUse::ICmpZero)
C.Insns += C.NumBaseAdds - PrevNumBaseAdds;
assert(isValid() && "invalid cost");
}
void Cost::Lose() {
C.Insns = std::numeric_limits<unsigned>::max();
C.NumRegs = std::numeric_limits<unsigned>::max();
C.AddRecCost = std::numeric_limits<unsigned>::max();
C.NumIVMuls = std::numeric_limits<unsigned>::max();
C.NumBaseAdds = std::numeric_limits<unsigned>::max();
C.ImmCost = std::numeric_limits<unsigned>::max();
C.SetupCost = std::numeric_limits<unsigned>::max();
C.ScaleCost = std::numeric_limits<unsigned>::max();
}
bool Cost::isLess(const Cost &Other) {
if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&
C.Insns != Other.C.Insns)
return C.Insns < Other.C.Insns;
return TTI->isLSRCostLess(C, Other.C);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Cost::print(raw_ostream &OS) const {
if (InsnsCost)
OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s ");
OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s");
if (C.AddRecCost != 0)
OS << ", with addrec cost " << C.AddRecCost;
if (C.NumIVMuls != 0)
OS << ", plus " << C.NumIVMuls << " IV mul"
<< (C.NumIVMuls == 1 ? "" : "s");
if (C.NumBaseAdds != 0)
OS << ", plus " << C.NumBaseAdds << " base add"
<< (C.NumBaseAdds == 1 ? "" : "s");
if (C.ScaleCost != 0)
OS << ", plus " << C.ScaleCost << " scale cost";
if (C.ImmCost != 0)
OS << ", plus " << C.ImmCost << " imm cost";
if (C.SetupCost != 0)
OS << ", plus " << C.SetupCost << " setup cost";
}
LLVM_DUMP_METHOD void Cost::dump() const {
print(errs()); errs() << '\n';
}
#endif
bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == OperandValToReplace &&
L->contains(PN->getIncomingBlock(i)))
return false;
return true;
}
return !L->contains(UserInst);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRFixup::print(raw_ostream &OS) const {
OS << "UserInst=";
if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
OS << "store ";
Store->getOperand(0)->printAsOperand(OS, false);
} else if (UserInst->getType()->isVoidTy())
OS << UserInst->getOpcodeName();
else
UserInst->printAsOperand(OS, false);
OS << ", OperandValToReplace=";
OperandValToReplace->printAsOperand(OS, false);
for (const Loop *PIL : PostIncLoops) {
OS << ", PostIncLoop=";
PIL->getHeader()->printAsOperand(OS, false);
}
if (Offset != 0)
OS << ", Offset=" << Offset;
}
LLVM_DUMP_METHOD void LSRFixup::dump() const {
print(errs()); errs() << '\n';
}
#endif
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
llvm::sort(Key);
return Uniquifier.count(Key);
}
float LSRUse::getNotSelectedProbability(const SCEV *Reg) const {
unsigned FNum = 0;
for (const Formula &F : Formulae)
if (F.referencesReg(Reg))
FNum++;
return ((float)(Formulae.size() - FNum)) / Formulae.size();
}
bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
assert(F.isCanonical(L) && "Invalid canonical representation");
if (!Formulae.empty() && RigidFormula)
return false;
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
llvm::sort(Key);
if (!Uniquifier.insert(Key).second)
return false;
assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
"Zero allocated in a scaled register!");
#ifndef NDEBUG
for (const SCEV *BaseReg : F.BaseRegs)
assert(!BaseReg->isZero() && "Zero allocated in a base register!");
#endif
Formulae.push_back(F);
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
if (F.ScaledReg)
Regs.insert(F.ScaledReg);
return true;
}
void LSRUse::DeleteFormula(Formula &F) {
if (&F != &Formulae.back())
std::swap(F, Formulae.back());
Formulae.pop_back();
}
void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
Regs.clear();
for (const Formula &F : Formulae) {
if (F.ScaledReg) Regs.insert(F.ScaledReg);
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
}
for (const SCEV *S : OldRegs)
if (!Regs.count(S))
RegUses.dropRegister(S, LUIdx);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRUse::print(raw_ostream &OS) const {
OS << "LSR Use: Kind=";
switch (Kind) {
case Basic: OS << "Basic"; break;
case Special: OS << "Special"; break;
case ICmpZero: OS << "ICmpZero"; break;
case Address:
OS << "Address of ";
if (AccessTy.MemTy->isPointerTy())
OS << "pointer"; else {
OS << *AccessTy.MemTy;
}
OS << " in addrspace(" << AccessTy.AddrSpace << ')';
}
OS << ", Offsets={";
bool NeedComma = false;
for (const LSRFixup &Fixup : Fixups) {
if (NeedComma) OS << ',';
OS << Fixup.Offset;
NeedComma = true;
}
OS << '}';
if (AllFixupsOutsideLoop)
OS << ", all-fixups-outside-loop";
if (WidestFixupType)
OS << ", widest fixup type: " << *WidestFixupType;
}
LLVM_DUMP_METHOD void LSRUse::dump() const {
print(errs()); errs() << '\n';
}
#endif
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
Instruction *Fixup) {
switch (Kind) {
case LSRUse::Address:
return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
HasBaseReg, Scale, AccessTy.AddrSpace, Fixup);
case LSRUse::ICmpZero:
if (BaseGV)
return false;
if (Scale != 0 && HasBaseReg && BaseOffset != 0)
return false;
if (Scale != 0 && Scale != -1)
return false;
if (BaseOffset != 0) {
if (Scale == 0)
BaseOffset = -(uint64_t)BaseOffset;
return TTI.isLegalICmpImmediate(BaseOffset);
}
return true;
case LSRUse::Basic:
return !BaseGV && Scale == 0 && BaseOffset == 0;
case LSRUse::Special:
return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
}
llvm_unreachable("Invalid LSRUse Kind!");
}
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
int64_t MinOffset, int64_t MaxOffset,
LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
(MinOffset > 0))
return false;
MinOffset = (uint64_t)BaseOffset + MinOffset;
if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
(MaxOffset > 0))
return false;
MaxOffset = (uint64_t)BaseOffset + MaxOffset;
return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
HasBaseReg, Scale) &&
isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
HasBaseReg, Scale);
}
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
int64_t MinOffset, int64_t MaxOffset,
LSRUse::KindType Kind, MemAccessTy AccessTy,
const Formula &F, const Loop &L) {
assert((F.isCanonical(L) || F.Scale != 0));
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
}
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
int64_t MaxOffset, LSRUse::KindType Kind,
MemAccessTy AccessTy, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg, int64_t Scale) {
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
BaseOffset, HasBaseReg, Scale) ||
(Scale == 1 &&
isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
BaseGV, BaseOffset, true, 0));
}
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
int64_t MaxOffset, LSRUse::KindType Kind,
MemAccessTy AccessTy, const Formula &F) {
return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
F.BaseOffset, F.HasBaseReg, F.Scale);
}
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F) {
if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {
for (const LSRFixup &Fixup : LU.Fixups)
if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
(F.BaseOffset + Fixup.Offset), F.HasBaseReg,
F.Scale, Fixup.UserInst))
return false;
return true;
}
return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
F.Scale);
}
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F,
const Loop &L) {
if (!F.Scale)
return 0;
if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, F, L))
return F.Scale != 1;
switch (LU.Kind) {
case LSRUse::Address: {
InstructionCost ScaleCostMinOffset = TTI.getScalingFactorCost(
LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
InstructionCost ScaleCostMaxOffset = TTI.getScalingFactorCost(
LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg,
F.Scale, LU.AccessTy.AddrSpace);
assert(ScaleCostMinOffset.isValid() && ScaleCostMaxOffset.isValid() &&
"Legal addressing mode has an illegal cost!");
return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
}
case LSRUse::ICmpZero:
case LSRUse::Basic:
case LSRUse::Special:
return 0;
}
llvm_unreachable("Invalid LSRUse Kind!");
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg) {
if (BaseOffset == 0 && !BaseGV) return true;
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
if (!HasBaseReg && Scale == 1) {
Scale = 0;
HasBaseReg = true;
}
return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
HasBaseReg, Scale);
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
ScalarEvolution &SE, int64_t MinOffset,
int64_t MaxOffset, LSRUse::KindType Kind,
MemAccessTy AccessTy, const SCEV *S,
bool HasBaseReg) {
if (S->isZero()) return true;
int64_t BaseOffset = ExtractImmediate(S, SE);
GlobalValue *BaseGV = ExtractSymbol(S, SE);
if (!S->isZero()) return false;
if (BaseOffset == 0 && !BaseGV) return true;
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
BaseOffset, HasBaseReg, Scale);
}
namespace {
struct IVInc {
Instruction *UserInst;
Value* IVOperand;
const SCEV *IncExpr;
IVInc(Instruction *U, Value *O, const SCEV *E)
: UserInst(U), IVOperand(O), IncExpr(E) {}
};
struct IVChain {
SmallVector<IVInc, 1> Incs;
const SCEV *ExprBase = nullptr;
IVChain() = default;
IVChain(const IVInc &Head, const SCEV *Base)
: Incs(1, Head), ExprBase(Base) {}
using const_iterator = SmallVectorImpl<IVInc>::const_iterator;
const_iterator begin() const {
assert(!Incs.empty());
return std::next(Incs.begin());
}
const_iterator end() const {
return Incs.end();
}
bool hasIncs() const { return Incs.size() >= 2; }
void add(const IVInc &X) { Incs.push_back(X); }
Instruction *tailUserInst() const { return Incs.back().UserInst; }
bool isProfitableIncrement(const SCEV *OperExpr,
const SCEV *IncExpr,
ScalarEvolution&);
};
struct ChainUsers {
SmallPtrSet<Instruction*, 4> FarUsers;
SmallPtrSet<Instruction*, 4> NearUsers;
};
class LSRInstance {
IVUsers &IU;
ScalarEvolution &SE;
DominatorTree &DT;
LoopInfo &LI;
AssumptionCache &AC;
TargetLibraryInfo &TLI;
const TargetTransformInfo &TTI;
Loop *const L;
MemorySSAUpdater *MSSAU;
TTI::AddressingModeKind AMK;
mutable SCEVExpander Rewriter;
bool Changed = false;
Instruction *IVIncInsertPos = nullptr;
SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
SmallSetVector<Type *, 4> Types;
mutable SmallVector<LSRUse, 16> Uses;
RegUseTracker RegUses;
static const unsigned MaxChains = 8;
SmallVector<IVChain, MaxChains> IVChainVec;
SmallPtrSet<Use*, MaxChains> IVIncSet;
SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
void OptimizeShadowIV();
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
void OptimizeLoopTermCond();
void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
SmallVectorImpl<ChainUsers> &ChainUsersVec);
void FinalizeChain(IVChain &Chain);
void CollectChains();
void GenerateIVChain(const IVChain &Chain,
SmallVectorImpl<WeakTrackingVH> &DeadInsts);
void CollectInterestingTypesAndFactors();
void CollectFixupsAndInitialFormulae();
using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
LSRUse::KindType Kind, MemAccessTy AccessTy);
std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
MemAccessTy AccessTy);
void DeleteUse(LSRUse &LU, size_t LUIdx);
LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
void CollectLoopInvariantFixupsAndFormulae();
void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
unsigned Depth = 0);
void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
const Formula &Base, unsigned Depth,
size_t Idx, bool IsScaledReg = false);
void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
const Formula &Base, size_t Idx,
bool IsScaledReg = false);
void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
const Formula &Base,
const SmallVectorImpl<int64_t> &Worklist,
size_t Idx, bool IsScaledReg = false);
void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateCrossUseConstantOffsets();
void GenerateAllReuseFormulae();
void FilterOutUndesirableDedicatedRegisters();
size_t EstimateSearchSpaceComplexity() const;
void NarrowSearchSpaceByDetectingSupersets();
void NarrowSearchSpaceByCollapsingUnrolledCode();
void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
void NarrowSearchSpaceByFilterPostInc();
void NarrowSearchSpaceByDeletingCostlyFormulas();
void NarrowSearchSpaceByPickingWinnerRegs();
void NarrowSearchSpaceUsingHeuristics();
void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
SmallVectorImpl<const Formula *> &Workspace,
const Cost &CurCost,
const SmallPtrSet<const SCEV *, 16> &CurRegs,
DenseSet<const SCEV *> &VisitedRegs) const;
void Solve(SmallVectorImpl<const Formula *> &Solution) const;
BasicBlock::iterator
HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs) const;
BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
const LSRFixup &LF,
const LSRUse &LU) const;
Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
BasicBlock::iterator IP,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
const Formula &F,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
public:
LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC,
TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
bool getChanged() const { return Changed; }
const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs() const {
return ScalarEvolutionIVs;
}
void print_factors_and_types(raw_ostream &OS) const;
void print_fixups(raw_ostream &OS) const;
void print_uses(raw_ostream &OS) const;
void print(raw_ostream &OS) const;
void dump() const;
};
}
void LSRInstance::OptimizeShadowIV() {
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return;
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
UI != E; ) {
IVUsers::const_iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
Type *DestTy = nullptr;
bool IsSigned = false;
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
IsSigned = false;
DestTy = UCast->getDestTy();
}
else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
IsSigned = true;
DestTy = SCast->getDestTy();
}
if (!DestTy) continue;
if (!TTI.isTypeLegal(DestTy)) continue;
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PH));
if (!AR) continue;
if (IsSigned && !AR->hasNoSignedWrap()) continue;
if (!IsSigned && !AR->hasNoUnsignedWrap()) continue;
Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if (Mantissa == -1) continue;
if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
continue;
unsigned Entry, Latch;
if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
Entry = 0;
Latch = 1;
} else {
Entry = 1;
Latch = 0;
}
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
(double)Init->getSExtValue() :
(double)Init->getZExtValue());
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
&& Incr->getOpcode() != Instruction::Sub)
continue;
ConstantInt *C = nullptr;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(0));
else
continue;
if (!C) continue;
if (!C->getValue().isStrictlyPositive()) continue;
PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
Instruction::FAdd : Instruction::FSub,
NewPH, CFP, "IV.S.next.", Incr);
NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
ShadowUse->replaceAllUsesWith(NewPH);
ShadowUse->eraseFromParent();
Changed = true;
break;
}
}
bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
for (IVStrideUse &U : IU)
if (U.getUser() == Cond) {
CondUse = &U;
return true;
}
return false;
}
ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
return Cond;
SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
if (!Sel || !Sel->hasOneUse()) return Cond;
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return Cond;
const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
if (IterationCount != SE.getSCEV(Sel)) return Cond;
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
const SCEVNAryExpr *Max = nullptr;
if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
Pred = ICmpInst::ICMP_SLE;
Max = S;
} else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
Pred = ICmpInst::ICMP_SLT;
Max = S;
} else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
Pred = ICmpInst::ICMP_ULT;
Max = U;
} else {
return Cond;
}
if (Max->getNumOperands() != 2)
return Cond;
const SCEV *MaxLHS = Max->getOperand(0);
const SCEV *MaxRHS = Max->getOperand(1);
if (!MaxLHS ||
(ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
return Cond;
const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine() ||
AR->getStart() != One ||
AR->getStepRecurrence(SE) != One)
return Cond;
assert(AR->getLoop() == L &&
"Loop condition operand is an addrec in a different loop!");
Value *NewRHS = nullptr;
if (ICmpInst::isTrueWhenEqual(Pred)) {
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
NewRHS = BO->getOperand(0);
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
NewRHS = BO->getOperand(0);
if (!NewRHS)
return Cond;
} else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
NewRHS = Sel->getOperand(1);
else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
NewRHS = SU->getValue();
else
return Cond;
if (Cond->getPredicate() == CmpInst::ICMP_EQ)
Pred = CmpInst::getInversePredicate(Pred);
ICmpInst *NewCond =
new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
NewCond->setDebugLoc(Cond->getDebugLoc());
Cond->replaceAllUsesWith(NewCond);
CondUse->setUser(NewCond);
Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
Cond->eraseFromParent();
Sel->eraseFromParent();
if (Cmp->use_empty())
Cmp->eraseFromParent();
return NewCond;
}
void
LSRInstance::OptimizeLoopTermCond() {
SmallPtrSet<Instruction *, 4> PostIncs;
BasicBlock *LatchBlock = L->getLoopLatch();
SmallVector<BasicBlock*, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
if (llvm::all_of(ExitingBlocks, [&LatchBlock](const BasicBlock *BB) {
return LatchBlock != BB;
})) {
IVIncInsertPos = LatchBlock->getTerminator();
return;
}
for (BasicBlock *ExitingBlock : ExitingBlocks) {
BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
if (!TermBr)
continue;
if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
continue;
IVStrideUse *CondUse = nullptr;
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
if (!FindIVUserForCond(Cond, CondUse))
continue;
Cond = OptimizeMax(Cond, CondUse);
if (!DT.dominates(ExitingBlock, LatchBlock))
continue;
if (LatchBlock != ExitingBlock)
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
if (&*UI != CondUse &&
!DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
const SCEV *A = IU.getStride(*CondUse, L);
const SCEV *B = IU.getStride(*UI, L);
if (!A || !B) continue;
if (SE.getTypeSizeInBits(A->getType()) !=
SE.getTypeSizeInBits(B->getType())) {
if (SE.getTypeSizeInBits(A->getType()) >
SE.getTypeSizeInBits(B->getType()))
B = SE.getSignExtendExpr(B, A->getType());
else
A = SE.getSignExtendExpr(A, B->getType());
}
if (const SCEVConstant *D =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
const ConstantInt *C = D->getValue();
if (C->isOne() || C->isMinusOne())
goto decline_post_inc;
if (C->getValue().getMinSignedBits() >= 64 ||
C->getValue().isMinSignedValue())
goto decline_post_inc;
if (isAddressUse(TTI, UI->getUser(), UI->getOperandValToReplace())) {
MemAccessTy AccessTy = getAccessType(
TTI, UI->getUser(), UI->getOperandValToReplace());
int64_t Scale = C->getSExtValue();
if (TTI.isLegalAddressingMode(AccessTy.MemTy, nullptr,
0,
false, Scale,
AccessTy.AddrSpace))
goto decline_post_inc;
Scale = -Scale;
if (TTI.isLegalAddressingMode(AccessTy.MemTy, nullptr,
0,
false, Scale,
AccessTy.AddrSpace))
goto decline_post_inc;
}
}
}
LLVM_DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
<< *Cond << '\n');
if (Cond->getNextNonDebugInstruction() != TermBr) {
if (Cond->hasOneUse()) {
Cond->moveBefore(TermBr);
} else {
ICmpInst *OldCond = Cond;
Cond = cast<ICmpInst>(Cond->clone());
Cond->setName(L->getHeader()->getName() + ".termcond");
ExitingBlock->getInstList().insert(TermBr->getIterator(), Cond);
CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
TermBr->replaceUsesOfWith(OldCond, Cond);
}
}
CondUse->transformToPostInc(L);
Changed = true;
PostIncs.insert(Cond);
decline_post_inc:;
}
IVIncInsertPos = L->getLoopLatch()->getTerminator();
for (Instruction *Inst : PostIncs) {
BasicBlock *BB =
DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
Inst->getParent());
if (BB == Inst->getParent())
IVIncInsertPos = Inst;
else if (BB != IVIncInsertPos->getParent())
IVIncInsertPos = BB->getTerminator();
}
}
bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
bool HasBaseReg, LSRUse::KindType Kind,
MemAccessTy AccessTy) {
int64_t NewMinOffset = LU.MinOffset;
int64_t NewMaxOffset = LU.MaxOffset;
MemAccessTy NewAccessTy = AccessTy;
if (LU.Kind != Kind)
return false;
if (Kind == LSRUse::Address) {
if (AccessTy.MemTy != LU.AccessTy.MemTy) {
NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
AccessTy.AddrSpace);
}
}
if (NewOffset < LU.MinOffset) {
if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, nullptr,
LU.MaxOffset - NewOffset, HasBaseReg))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, nullptr,
NewOffset - LU.MinOffset, HasBaseReg))
return false;
NewMaxOffset = NewOffset;
}
LU.MinOffset = NewMinOffset;
LU.MaxOffset = NewMaxOffset;
LU.AccessTy = NewAccessTy;
return true;
}
std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr,
LSRUse::KindType Kind,
MemAccessTy AccessTy) {
const SCEV *Copy = Expr;
int64_t Offset = ExtractImmediate(Expr, SE);
if (!isAlwaysFoldable(TTI, Kind, AccessTy, nullptr,
Offset, true)) {
Expr = Copy;
Offset = 0;
}
std::pair<UseMapTy::iterator, bool> P =
UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
if (!P.second) {
size_t LUIdx = P.first->second;
LSRUse &LU = Uses[LUIdx];
if (reconcileNewOffset(LU, Offset, true, Kind, AccessTy))
return std::make_pair(LUIdx, Offset);
}
size_t LUIdx = Uses.size();
P.first->second = LUIdx;
Uses.push_back(LSRUse(Kind, AccessTy));
LSRUse &LU = Uses[LUIdx];
LU.MinOffset = Offset;
LU.MaxOffset = Offset;
return std::make_pair(LUIdx, Offset);
}
void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
if (&LU != &Uses.back())
std::swap(LU, Uses.back());
Uses.pop_back();
RegUses.swapAndDropUse(LUIdx, Uses.size());
}
LSRUse *
LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
const LSRUse &OrigLU) {
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
if (&LU != &OrigLU &&
LU.Kind != LSRUse::ICmpZero &&
LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
LU.WidestFixupType == OrigLU.WidestFixupType &&
LU.HasFormulaWithSameRegs(OrigF)) {
for (const Formula &F : LU.Formulae) {
if (F.BaseRegs == OrigF.BaseRegs &&
F.ScaledReg == OrigF.ScaledReg &&
F.BaseGV == OrigF.BaseGV &&
F.Scale == OrigF.Scale &&
F.UnfoldedOffset == OrigF.UnfoldedOffset) {
if (F.BaseOffset == 0)
return &LU;
break;
}
}
}
}
return nullptr;
}
void LSRInstance::CollectInterestingTypesAndFactors() {
SmallSetVector<const SCEV *, 4> Strides;
SmallVector<const SCEV *, 4> Worklist;
for (const IVStrideUse &U : IU) {
const SCEV *Expr = IU.getExpr(U);
Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
Worklist.push_back(Expr);
do {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
if (AR->getLoop() == L)
Strides.insert(AR->getStepRecurrence(SE));
Worklist.push_back(AR->getStart());
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
Worklist.append(Add->op_begin(), Add->op_end());
}
} while (!Worklist.empty());
}
for (SmallSetVector<const SCEV *, 4>::const_iterator
I = Strides.begin(), E = Strides.end(); I != E; ++I)
for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
std::next(I); NewStrideIter != E; ++NewStrideIter) {
const SCEV *OldStride = *I;
const SCEV *NewStride = *NewStrideIter;
if (SE.getTypeSizeInBits(OldStride->getType()) !=
SE.getTypeSizeInBits(NewStride->getType())) {
if (SE.getTypeSizeInBits(OldStride->getType()) >
SE.getTypeSizeInBits(NewStride->getType()))
NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
else
OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
}
if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
SE, true))) {
if (Factor->getAPInt().getMinSignedBits() <= 64 && !Factor->isZero())
Factors.insert(Factor->getAPInt().getSExtValue());
} else if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
NewStride,
SE, true))) {
if (Factor->getAPInt().getMinSignedBits() <= 64 && !Factor->isZero())
Factors.insert(Factor->getAPInt().getSExtValue());
}
}
if (Types.size() == 1)
Types.clear();
LLVM_DEBUG(print_factors_and_types(dbgs()));
}
static User::op_iterator
findIVOperand(User::op_iterator OI, User::op_iterator OE,
Loop *L, ScalarEvolution &SE) {
for(; OI != OE; ++OI) {
if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
if (!SE.isSCEVable(Oper->getType()))
continue;
if (const SCEVAddRecExpr *AR =
dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
if (AR->getLoop() == L)
break;
}
}
}
return OI;
}
static Value *getWideOperand(Value *Oper) {
if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
return Trunc->getOperand(0);
return Oper;
}
static bool isCompatibleIVType(Value *LVal, Value *RVal) {
Type *LType = LVal->getType();
Type *RType = RVal->getType();
return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy() &&
(LType->getPointerAddressSpace() ==
RType->getPointerAddressSpace()));
}
static const SCEV *getExprBase(const SCEV *S) {
switch (S->getSCEVType()) {
default: return S;
case scConstant:
return nullptr;
case scTruncate:
return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
case scZeroExtend:
return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
case scSignExtend:
return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
case scAddExpr: {
const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
for (const SCEV *SubExpr : reverse(Add->operands())) {
if (SubExpr->getSCEVType() == scAddExpr)
return getExprBase(SubExpr);
if (SubExpr->getSCEVType() != scMulExpr)
return SubExpr;
}
return S; }
case scAddRecExpr:
return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
}
llvm_unreachable("Unknown SCEV kind!");
}
bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
const SCEV *IncExpr,
ScalarEvolution &SE) {
if (StressIVChain)
return true;
if (!isa<SCEVConstant>(IncExpr)) {
const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
return false;
}
SmallPtrSet<const SCEV*, 8> Processed;
return !isHighCostExpansion(IncExpr, Processed, SE);
}
static bool isProfitableChain(IVChain &Chain,
SmallPtrSetImpl<Instruction *> &Users,
ScalarEvolution &SE,
const TargetTransformInfo &TTI) {
if (StressIVChain)
return true;
if (!Chain.hasIncs())
return false;
if (!Users.empty()) {
LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
for (Instruction *Inst
: Users) { dbgs() << " " << *Inst << "\n"; });
return false;
}
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
int cost = 1;
if (isa<PHINode>(Chain.tailUserInst())
&& SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
--cost;
}
const SCEV *LastIncExpr = nullptr;
unsigned NumConstIncrements = 0;
unsigned NumVarIncrements = 0;
unsigned NumReusedIncrements = 0;
if (TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
return true;
for (const IVInc &Inc : Chain) {
if (TTI.isProfitableLSRChainElement(Inc.UserInst))
return true;
if (Inc.IncExpr->isZero())
continue;
if (isa<SCEVConstant>(Inc.IncExpr)) {
++NumConstIncrements;
continue;
}
if (Inc.IncExpr == LastIncExpr)
++NumReusedIncrements;
else
++NumVarIncrements;
LastIncExpr = Inc.IncExpr;
}
if (NumConstIncrements > 1)
--cost;
cost += NumVarIncrements;
cost -= NumReusedIncrements;
LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
<< "\n");
return cost < 0;
}
void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
SmallVectorImpl<ChainUsers> &ChainUsersVec) {
Value *const NextIV = getWideOperand(IVOper);
const SCEV *const OperExpr = SE.getSCEV(NextIV);
const SCEV *const OperExprBase = getExprBase(OperExpr);
unsigned ChainIdx = 0, NChains = IVChainVec.size();
const SCEV *LastIncExpr = nullptr;
for (; ChainIdx < NChains; ++ChainIdx) {
IVChain &Chain = IVChainVec[ChainIdx];
if (!StressIVChain && Chain.ExprBase != OperExprBase)
continue;
Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
if (!isCompatibleIVType(PrevIV, NextIV))
continue;
if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
continue;
const SCEV *PrevExpr = SE.getSCEV(PrevIV);
const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.isLoopInvariant(IncExpr, L))
continue;
if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
LastIncExpr = IncExpr;
break;
}
}
if (ChainIdx == NChains) {
if (isa<PHINode>(UserInst))
return;
if (NChains >= MaxChains && !StressIVChain) {
LLVM_DEBUG(dbgs() << "IV Chain Limit\n");
return;
}
LastIncExpr = OperExpr;
if (!isa<SCEVAddRecExpr>(LastIncExpr))
return;
++NChains;
IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
OperExprBase));
ChainUsersVec.resize(NChains);
LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
<< ") IV=" << *LastIncExpr << "\n");
} else {
LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
<< ") IV+" << *LastIncExpr << "\n");
IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
}
IVChain &Chain = IVChainVec[ChainIdx];
SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
if (!LastIncExpr->isZero()) {
ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
NearUsers.end());
NearUsers.clear();
}
for (User *U : IVOper->users()) {
Instruction *OtherUse = dyn_cast<Instruction>(U);
if (!OtherUse)
continue;
IVChain::const_iterator IncIter = Chain.Incs.begin();
IVChain::const_iterator IncEnd = Chain.Incs.end();
for( ; IncIter != IncEnd; ++IncIter) {
if (IncIter->UserInst == OtherUse)
break;
}
if (IncIter != IncEnd)
continue;
if (SE.isSCEVable(OtherUse->getType())
&& !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
&& IU.isIVUserOrOperand(OtherUse)) {
continue;
}
NearUsers.insert(OtherUse);
}
ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
}
void LSRInstance::CollectChains() {
LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n");
SmallVector<ChainUsers, 8> ChainUsersVec;
SmallVector<BasicBlock *,8> LatchPath;
BasicBlock *LoopHeader = L->getHeader();
for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
LatchPath.push_back(Rung->getBlock());
}
LatchPath.push_back(LoopHeader);
for (BasicBlock *BB : reverse(LatchPath)) {
for (Instruction &I : *BB) {
if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))
continue;
if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))
continue;
for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
ChainIdx < NChains; ++ChainIdx) {
ChainUsersVec[ChainIdx].NearUsers.erase(&I);
}
SmallPtrSet<Instruction*, 4> UniqueOperands;
User::op_iterator IVOpEnd = I.op_end();
User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE);
while (IVOpIter != IVOpEnd) {
Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
if (UniqueOperands.insert(IVOpInst).second)
ChainInstruction(&I, IVOpInst, ChainUsersVec);
IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
}
} } for (PHINode &PN : L->getHeader()->phis()) {
if (!SE.isSCEVable(PN.getType()))
continue;
Instruction *IncV =
dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));
if (IncV)
ChainInstruction(&PN, IncV, ChainUsersVec);
}
unsigned ChainIdx = 0;
for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
UsersIdx < NChains; ++UsersIdx) {
if (!isProfitableChain(IVChainVec[UsersIdx],
ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
continue;
if (ChainIdx != UsersIdx)
IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
FinalizeChain(IVChainVec[ChainIdx]);
++ChainIdx;
}
IVChainVec.resize(ChainIdx);
}
void LSRInstance::FinalizeChain(IVChain &Chain) {
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
for (const IVInc &Inc : Chain) {
LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");
auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
IVIncSet.insert(UseI);
}
}
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
Value *Operand, const TargetTransformInfo &TTI) {
const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
if (!IncConst || !isAddressUse(TTI, UserInst, Operand))
return false;
if (IncConst->getAPInt().getMinSignedBits() > 64)
return false;
MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, nullptr,
IncOffset, false))
return false;
return true;
}
void LSRInstance::GenerateIVChain(const IVChain &Chain,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
const IVInc &Head = Chain.Incs[0];
User::op_iterator IVOpEnd = Head.UserInst->op_end();
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
Value *IVSrc = nullptr;
while (IVOpIter != IVOpEnd) {
IVSrc = getWideOperand(*IVOpIter);
if (SE.getSCEV(*IVOpIter) == Head.IncExpr
|| SE.getSCEV(IVSrc) == Head.IncExpr) {
break;
}
IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
}
if (IVOpIter == IVOpEnd) {
LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
return;
}
assert(IVSrc && "Failed to find IV chain source");
LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
Type *IVTy = IVSrc->getType();
Type *IntTy = SE.getEffectiveSCEVType(IVTy);
const SCEV *LeftOverExpr = nullptr;
for (const IVInc &Inc : Chain) {
Instruction *InsertPt = Inc.UserInst;
if (isa<PHINode>(InsertPt))
InsertPt = L->getLoopLatch()->getTerminator();
Value *IVOper = IVSrc;
if (!Inc.IncExpr->isZero()) {
const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);
LeftOverExpr = LeftOverExpr ?
SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
}
if (LeftOverExpr && !LeftOverExpr->isZero()) {
Rewriter.clearPostInc();
Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
SE.getUnknown(IncV));
IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
LeftOverExpr = nullptr;
}
}
Type *OperTy = Inc.IVOperand->getType();
if (IVTy != OperTy) {
assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
"cannot extend a chained IV");
IRBuilder<> Builder(InsertPt);
IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
}
Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
if (auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
DeadInsts.emplace_back(OperandIsInstr);
}
if (isa<PHINode>(Chain.tailUserInst())) {
for (PHINode &Phi : L->getHeader()->phis()) {
if (!isCompatibleIVType(&Phi, IVSrc))
continue;
Instruction *PostIncV = dyn_cast<Instruction>(
Phi.getIncomingValueForBlock(L->getLoopLatch()));
if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
continue;
Value *IVOper = IVSrc;
Type *PostIncTy = PostIncV->getType();
if (IVTy != PostIncTy) {
assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
}
Phi.replaceUsesOfWith(PostIncV, IVOper);
DeadInsts.emplace_back(PostIncV);
}
}
}
void LSRInstance::CollectFixupsAndInitialFormulae() {
BranchInst *ExitBranch = nullptr;
bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
for (const IVStrideUse &U : IU) {
Instruction *UserInst = U.getUser();
User::op_iterator UseI =
find(UserInst->operands(), U.getOperandValToReplace());
assert(UseI != UserInst->op_end() && "cannot find IV operand");
if (IVIncSet.count(UseI)) {
LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n');
continue;
}
LSRUse::KindType Kind = LSRUse::Basic;
MemAccessTy AccessTy;
if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) {
Kind = LSRUse::Address;
AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace());
}
const SCEV *S = IU.getExpr(U);
PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->getCondition()))
continue;
if (CI->isEquality()) {
Value *NV = CI->getOperand(1);
if (NV == U.getOperandValToReplace()) {
CI->setOperand(1, CI->getOperand(0));
CI->setOperand(0, NV);
NV = CI->getOperand(1);
Changed = true;
}
const SCEV *N = SE.getSCEV(NV);
if (SE.isLoopInvariant(N, L) && Rewriter.isSafeToExpand(N) &&
(!NV->getType()->isPointerTy() ||
SE.getPointerBase(N) == SE.getPointerBase(S))) {
N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
} else if (L->isLoopInvariant(NV) &&
(!isa<Instruction>(NV) ||
DT.dominates(cast<Instruction>(NV), L->getHeader())) &&
!NV->getType()->isPointerTy()) {
N = SE.getUnknown(NV);
N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
assert(!isa<SCEVCouldNotCompute>(S));
}
for (size_t i = 0, e = Factors.size(); i != e; ++i)
if (Factors[i] != -1)
Factors.insert(-(uint64_t)Factors[i]);
Factors.insert(-1);
}
}
std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
size_t LUIdx = P.first;
int64_t Offset = P.second;
LSRUse &LU = Uses[LUIdx];
LSRFixup &LF = LU.getNewFixup();
LF.UserInst = UserInst;
LF.OperandValToReplace = U.getOperandValToReplace();
LF.PostIncLoops = TmpPostIncLoops;
LF.Offset = Offset;
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
if (!LU.WidestFixupType ||
SE.getTypeSizeInBits(LU.WidestFixupType) <
SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
LU.WidestFixupType = LF.OperandValToReplace->getType();
if (LU.Formulae.empty()) {
InsertInitialFormula(S, LU, LUIdx);
CountRegisters(LU.Formulae.back(), LUIdx);
}
}
LLVM_DEBUG(print_fixups(dbgs()));
}
void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU,
size_t LUIdx) {
if (!Rewriter.isSafeToExpand(S))
LU.RigidFormula = true;
Formula F;
F.initialMatch(S, L, SE);
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Initial formula already exists!"); (void)Inserted;
}
void
LSRInstance::InsertSupplementalFormula(const SCEV *S,
LSRUse &LU, size_t LUIdx) {
Formula F;
F.BaseRegs.push_back(S);
F.HasBaseReg = true;
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
}
void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
if (F.ScaledReg)
RegUses.countRegister(F.ScaledReg, LUIdx);
for (const SCEV *BaseReg : F.BaseRegs)
RegUses.countRegister(BaseReg, LUIdx);
}
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
"Formula is illegal");
if (!LU.InsertFormula(F, *L))
return false;
CountRegisters(F, LUIdx);
return true;
}
void
LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
SmallPtrSet<const SCEV *, 32> Visited;
while (!Worklist.empty()) {
const SCEV *S = Worklist.pop_back_val();
if (!Visited.insert(S).second)
continue;
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
Worklist.append(N->op_begin(), N->op_end());
else if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(S))
Worklist.push_back(C->getOperand());
else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
Worklist.push_back(D->getLHS());
Worklist.push_back(D->getRHS());
} else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
const Value *V = US->getValue();
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
if (L->contains(Inst)) continue;
} else if (isa<UndefValue>(V))
continue;
for (const Use &U : V->uses()) {
const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
if (!UserInst)
continue;
if (UserInst->isEHPad())
continue;
if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
continue;
const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
UserInst->getParent() :
cast<PHINode>(UserInst)->getIncomingBlock(
PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
if (!DT.dominates(L->getHeader(), UseBB))
continue;
if (UseBB->getTerminator()->isEHPad())
continue;
if (isa<PHINode>(UserInst)) {
const auto *PhiNode = cast<PHINode>(UserInst);
bool HasIncompatibleEHPTerminatedBlock = false;
llvm::Value *ExpectedValue = U;
for (unsigned int I = 0; I < PhiNode->getNumIncomingValues(); I++) {
if (PhiNode->getIncomingValue(I) == ExpectedValue) {
if (PhiNode->getIncomingBlock(I)->getTerminator()->isEHPad()) {
HasIncompatibleEHPTerminatedBlock = true;
break;
}
}
}
if (HasIncompatibleEHPTerminatedBlock) {
continue;
}
}
if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator()))
continue;
if (SE.isSCEVable(UserInst->getType())) {
const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
if (!isa<SCEVUnknown>(UserS))
continue;
if (UserS == US) {
Worklist.push_back(
SE.getUnknown(const_cast<Instruction *>(UserInst)));
continue;
}
}
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
unsigned OtherIdx = !U.getOperandNo();
Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
continue;
}
std::pair<size_t, int64_t> P = getUse(
S, LSRUse::Basic, MemAccessTy());
size_t LUIdx = P.first;
int64_t Offset = P.second;
LSRUse &LU = Uses[LUIdx];
LSRFixup &LF = LU.getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
LF.OperandValToReplace = U;
LF.Offset = Offset;
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
if (!LU.WidestFixupType ||
SE.getTypeSizeInBits(LU.WidestFixupType) <
SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
LU.WidestFixupType = LF.OperandValToReplace->getType();
InsertSupplementalFormula(US, LU, LUIdx);
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
break;
}
}
}
}
static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
SmallVectorImpl<const SCEV *> &Ops,
const Loop *L,
ScalarEvolution &SE,
unsigned Depth = 0) {
if (Depth >= 3)
return S;
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (const SCEV *S : Add->operands()) {
const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1);
if (Remainder)
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
}
return nullptr;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
if (AR->getStart()->isZero() || !AR->isAffine())
return S;
const SCEV *Remainder = CollectSubexprs(AR->getStart(),
C, Ops, L, SE, Depth+1);
if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
Remainder = nullptr;
}
if (Remainder != AR->getStart()) {
if (!Remainder)
Remainder = SE.getConstant(AR->getType(), 0);
return SE.getAddRecExpr(Remainder,
AR->getStepRecurrence(SE),
AR->getLoop(),
SCEV::FlagAnyWrap);
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
if (Mul->getNumOperands() != 2)
return S;
if (const SCEVConstant *Op0 =
dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
const SCEV *Remainder =
CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
if (Remainder)
Ops.push_back(SE.getMulExpr(C, Remainder));
return nullptr;
}
}
return S;
}
static bool mayUsePostIncMode(const TargetTransformInfo &TTI,
LSRUse &LU, const SCEV *S, const Loop *L,
ScalarEvolution &SE) {
if (LU.Kind != LSRUse::Address ||
!LU.AccessTy.getType()->isIntOrIntVectorTy())
return false;
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
if (!AR)
return false;
const SCEV *LoopStep = AR->getStepRecurrence(SE);
if (!isa<SCEVConstant>(LoopStep))
return false;
if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
const SCEV *LoopStart = AR->getStart();
if (!isa<SCEVConstant>(LoopStart) && SE.isLoopInvariant(LoopStart, L))
return true;
}
return false;
}
void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
const Formula &Base,
unsigned Depth, size_t Idx,
bool IsScaledReg) {
const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, BaseReg, L, SE))
return;
SmallVector<const SCEV *, 8> AddOps;
const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
if (Remainder)
AddOps.push_back(Remainder);
if (AddOps.size() == 1)
return;
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
JE = AddOps.end();
J != JE; ++J) {
if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
continue;
if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, *J, Base.getNumRegs() > 1))
continue;
SmallVector<const SCEV *, 8> InnerAddOps(
((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
InnerAddOps.append(std::next(J),
((const SmallVector<const SCEV *, 8> &)AddOps).end());
if (InnerAddOps.size() == 1 &&
isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
continue;
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
if (InnerSum->isZero())
continue;
Formula F = Base;
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue())) {
F.UnfoldedOffset =
(uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
if (IsScaledReg)
F.ScaledReg = nullptr;
else
F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
} else if (IsScaledReg)
F.ScaledReg = InnerSum;
else
F.BaseRegs[Idx] = InnerSum;
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue()))
F.UnfoldedOffset =
(uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
else
F.BaseRegs.push_back(*J);
F.canonicalize(*L);
if (InsertFormula(LU, LUIdx, F))
GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
Depth + 1 + (Log2_32(AddOps.size()) >> 2));
}
}
void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
Formula Base, unsigned Depth) {
assert(Base.isCanonical(*L) && "Input must be in the canonical form");
if (Depth >= 3)
return;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
if (Base.Scale == 1)
GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
-1, true);
}
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula Base) {
if (Base.BaseRegs.size() + (Base.Scale == 1) +
(Base.UnfoldedOffset != 0) <= 1)
return;
Base.unscale();
SmallVector<const SCEV *, 4> Ops;
Formula NewBase = Base;
NewBase.BaseRegs.clear();
Type *CombinedIntegerType = nullptr;
for (const SCEV *BaseReg : Base.BaseRegs) {
if (SE.properlyDominates(BaseReg, L->getHeader()) &&
!SE.hasComputableLoopEvolution(BaseReg, L)) {
if (!CombinedIntegerType)
CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType());
Ops.push_back(BaseReg);
}
else
NewBase.BaseRegs.push_back(BaseReg);
}
if (Ops.size() == 0)
return;
auto GenerateFormula = [&](const SCEV *Sum) {
Formula F = NewBase;
if (Sum->isZero())
return;
F.BaseRegs.push_back(Sum);
F.canonicalize(*L);
(void)InsertFormula(LU, LUIdx, F);
};
if (Ops.size() > 1) {
SmallVector<const SCEV *, 4> OpsCopy(Ops); GenerateFormula(SE.getAddExpr(OpsCopy));
}
if (NewBase.UnfoldedOffset) {
assert(CombinedIntegerType && "Missing a type for the unfolded offset");
Ops.push_back(SE.getConstant(CombinedIntegerType, NewBase.UnfoldedOffset,
true));
NewBase.UnfoldedOffset = 0;
GenerateFormula(SE.getAddExpr(Ops));
}
}
void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
const Formula &Base, size_t Idx,
bool IsScaledReg) {
const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
GlobalValue *GV = ExtractSymbol(G, SE);
if (G->isZero() || !GV)
return;
Formula F = Base;
F.BaseGV = GV;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
return;
if (IsScaledReg)
F.ScaledReg = G;
else
F.BaseRegs[Idx] = G;
(void)InsertFormula(LU, LUIdx, F);
}
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
if (Base.BaseGV) return;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
if (Base.Scale == 1)
GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, -1,
true);
}
void LSRInstance::GenerateConstantOffsetsImpl(
LSRUse &LU, unsigned LUIdx, const Formula &Base,
const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
auto GenerateOffset = [&](const SCEV *G, int64_t Offset) {
Formula F = Base;
F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) {
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G);
if (NewG->isZero()) {
if (IsScaledReg) {
F.Scale = 0;
F.ScaledReg = nullptr;
} else
F.deleteBaseReg(F.BaseRegs[Idx]);
F.canonicalize(*L);
} else if (IsScaledReg)
F.ScaledReg = NewG;
else
F.BaseRegs[Idx] = NewG;
(void)InsertFormula(LU, LUIdx, F);
}
};
const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
if (AMK == TTI::AMK_PreIndexed && LU.Kind == LSRUse::Address) {
if (auto *GAR = dyn_cast<SCEVAddRecExpr>(G)) {
if (auto *StepRec =
dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
const APInt &StepInt = StepRec->getAPInt();
int64_t Step = StepInt.isNegative() ?
StepInt.getSExtValue() : StepInt.getZExtValue();
for (int64_t Offset : Worklist) {
Offset -= Step;
GenerateOffset(G, Offset);
}
}
}
}
for (int64_t Offset : Worklist)
GenerateOffset(G, Offset);
int64_t Imm = ExtractImmediate(G, SE);
if (G->isZero() || Imm == 0)
return;
Formula F = Base;
F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
return;
if (IsScaledReg) {
F.ScaledReg = G;
} else {
F.BaseRegs[Idx] = G;
F.canonicalize(*L);
}
(void)InsertFormula(LU, LUIdx, F);
}
void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
SmallVector<int64_t, 2> Worklist;
Worklist.push_back(LU.MinOffset);
if (LU.MaxOffset != LU.MinOffset)
Worklist.push_back(LU.MaxOffset);
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
if (Base.Scale == 1)
GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, -1,
true);
}
void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
Formula Base) {
if (LU.Kind != LSRUse::ICmpZero) return;
Type *IntTy = Base.getType();
if (!IntTy) return;
if (SE.getTypeSizeInBits(IntTy) > 64) return;
if (LU.MinOffset != LU.MaxOffset) return;
if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
return;
for (const SCEV *BaseReg : Base.BaseRegs)
if (BaseReg->getType()->isPointerTy())
return;
assert(!Base.BaseGV && "ICmpZero use is not legal!");
for (int64_t Factor : Factors) {
if (!ConstantInt::isValueValidForType(IntTy, Factor))
continue;
if (Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
continue;
int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
assert(Factor != 0 && "Zero factor not expected!");
if (NewBaseOffset / Factor != Base.BaseOffset)
continue;
if (!IntTy->isPointerTy() &&
!ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
continue;
int64_t Offset = LU.MinOffset;
if (Offset == std::numeric_limits<int64_t>::min() && Factor == -1)
continue;
Offset = (uint64_t)Offset * Factor;
if (Offset / Factor != LU.MinOffset)
continue;
if (!IntTy->isPointerTy() &&
!ConstantInt::isValueValidForType(IntTy, Offset))
continue;
Formula F = Base;
F.BaseOffset = NewBaseOffset;
if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
continue;
F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
goto next;
}
if (F.ScaledReg) {
F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
continue;
}
if (F.UnfoldedOffset != 0) {
if (F.UnfoldedOffset == std::numeric_limits<int64_t>::min() &&
Factor == -1)
continue;
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
continue;
if (!IntTy->isPointerTy() &&
!ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
continue;
}
(void)InsertFormula(LU, LUIdx, F);
next:;
}
}
void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
Type *IntTy = Base.getType();
if (!IntTy) return;
if (Base.Scale != 0 && !Base.unscale())
return;
assert(Base.Scale == 0 && "unscale did not did its job!");
for (int64_t Factor : Factors) {
Base.Scale = Factor;
Base.HasBaseReg = Base.BaseRegs.size() > 1;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
Base)) {
if (LU.Kind == LSRUse::Basic &&
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
LU.AccessTy, Base) &&
LU.AllFixupsOutsideLoop)
LU.Kind = LSRUse::Special;
else
continue;
}
if (LU.Kind == LSRUse::ICmpZero &&
!Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
continue;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i]);
if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) {
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
if (FactorS->isZero())
continue;
if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true))
if (!Quotient->isZero()) {
Formula F = Base;
F.ScaledReg = Quotient;
F.deleteBaseReg(F.BaseRegs[i]);
if (F.Scale == 1 && (F.BaseRegs.empty() ||
(AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
continue;
if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
F.canonicalize(*L);
(void)InsertFormula(LU, LUIdx, F);
}
}
}
}
}
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (Base.BaseGV) return;
Type *DstTy = Base.getType();
if (!DstTy) return;
if (DstTy->isPointerTy())
return;
if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
return;
if (any_of(Base.BaseRegs,
[](const SCEV *S) { return S->getType()->isPointerTy(); }))
return;
for (Type *SrcTy : Types) {
if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
if (F.ScaledReg) {
const SCEV *NewScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
if (NewScaledReg->isZero())
continue;
F.ScaledReg = NewScaledReg;
}
bool HasZeroBaseReg = false;
for (const SCEV *&BaseReg : F.BaseRegs) {
const SCEV *NewBaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
if (NewBaseReg->isZero()) {
HasZeroBaseReg = true;
break;
}
BaseReg = NewBaseReg;
}
if (HasZeroBaseReg)
continue;
if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
continue;
F.canonicalize(*L);
(void)InsertFormula(LU, LUIdx, F);
}
}
}
namespace {
struct WorkItem {
size_t LUIdx;
int64_t Imm;
const SCEV *OrigReg;
WorkItem(size_t LI, int64_t I, const SCEV *R)
: LUIdx(LI), Imm(I), OrigReg(R) {}
void print(raw_ostream &OS) const;
void dump() const;
};
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void WorkItem::print(raw_ostream &OS) const {
OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
<< " , add offset " << Imm;
}
LLVM_DUMP_METHOD void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
#endif
void LSRInstance::GenerateCrossUseConstantOffsets() {
using ImmMapTy = std::map<int64_t, const SCEV *>;
DenseMap<const SCEV *, ImmMapTy> Map;
DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
SmallVector<const SCEV *, 8> Sequence;
for (const SCEV *Use : RegUses) {
const SCEV *Reg = Use; int64_t Imm = ExtractImmediate(Reg, SE);
auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy()));
if (Pair.second)
Sequence.push_back(Reg);
Pair.first->second.insert(std::make_pair(Imm, Use));
UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);
}
SmallVector<WorkItem, 32> WorkItems;
SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
for (const SCEV *Reg : Sequence) {
const ImmMapTy &Imms = Map.find(Reg)->second;
if (Imms.size() == 1)
continue;
LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
for (const auto &Entry
: Imms) dbgs()
<< ' ' << Entry.first;
dbgs() << '\n');
for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
J != JE; ++J) {
const SCEV *OrigReg = J->second;
int64_t JImm = J->first;
const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
if (!isa<SCEVConstant>(OrigReg) &&
UsedByIndicesMap[Reg].count() == 1) {
LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
<< '\n');
continue;
}
int64_t First = Imms.begin()->first;
int64_t Last = std::prev(Imms.end())->first;
int64_t Avg = (First & Last) + ((First ^ Last) >> 1);
Avg = Avg + ((First ^ Last) & ((uint64_t)Avg >> 63));
ImmMapTy::const_iterator OtherImms[] = {
Imms.begin(), std::prev(Imms.end()),
Imms.lower_bound(Avg)};
for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
ImmMapTy::const_iterator M = OtherImms[i];
if (M == J || M == JE) continue;
int64_t Imm = (uint64_t)JImm - M->first;
for (unsigned LUIdx : UsedByIndices.set_bits())
if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
}
}
}
Map.clear();
Sequence.clear();
UsedByIndicesMap.clear();
UniqueItems.clear();
for (const WorkItem &WI : WorkItems) {
size_t LUIdx = WI.LUIdx;
LSRUse &LU = Uses[LUIdx];
int64_t Imm = WI.Imm;
const SCEV *OrigReg = WI.OrigReg;
Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
Formula F = LU.Formulae[L];
F.unscale();
if (F.ScaledReg == OrigReg) {
int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
if (F.referencesReg(SE.getSCEV(
ConstantInt::get(IntTy, -(uint64_t)Offset))))
continue;
Formula NewF = F;
NewF.BaseOffset = Offset;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
NewF))
continue;
NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
(C->getAPInt().abs() * APInt(BitWidth, F.Scale))
.ule(std::abs(NewF.BaseOffset)))
continue;
NewF.canonicalize(*this->L);
(void)InsertFormula(LU, LUIdx, NewF);
} else {
for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
const SCEV *BaseReg = F.BaseRegs[N];
if (BaseReg != OrigReg)
continue;
Formula NewF = F;
NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, NewF)) {
if (AMK == TTI::AMK_PostIndexed &&
mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE))
continue;
if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
continue;
NewF = F;
NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
}
NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
for (const SCEV *NewReg : NewF.BaseRegs)
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
if ((C->getAPInt() + NewF.BaseOffset)
.abs()
.slt(std::abs(NewF.BaseOffset)) &&
(C->getAPInt() + NewF.BaseOffset).countTrailingZeros() >=
countTrailingZeros<uint64_t>(NewF.BaseOffset))
goto skip_formula;
NewF.canonicalize(*this->L);
(void)InsertFormula(LU, LUIdx, NewF);
break;
skip_formula:;
}
}
}
}
}
void
LSRInstance::GenerateAllReuseFormulae() {
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
}
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateScales(LU, LUIdx, LU.Formulae[i]);
}
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
}
GenerateCrossUseConstantOffsets();
LLVM_DEBUG(dbgs() << "\n"
"After generating reuse formulae:\n";
print_uses(dbgs()));
}
void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
DenseSet<const SCEV *> VisitedRegs;
SmallPtrSet<const SCEV *, 16> Regs;
SmallPtrSet<const SCEV *, 16> LoserRegs;
#ifndef NDEBUG
bool ChangedFormulae = false;
#endif
using BestFormulaeTy =
DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>;
BestFormulaeTy BestFormulae;
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
dbgs() << '\n');
bool Any = false;
for (size_t FIdx = 0, NumForms = LU.Formulae.size();
FIdx != NumForms; ++FIdx) {
Formula &F = LU.Formulae[FIdx];
Cost CostF(L, SE, TTI, AMK);
Regs.clear();
CostF.RateFormula(F, Regs, VisitedRegs, LU, &LoserRegs);
if (CostF.isLoser()) {
LLVM_DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
dbgs() << "\n");
}
else {
SmallVector<const SCEV *, 4> Key;
for (const SCEV *Reg : F.BaseRegs) {
if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
Key.push_back(Reg);
}
if (F.ScaledReg &&
RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
Key.push_back(F.ScaledReg);
llvm::sort(Key);
std::pair<BestFormulaeTy::const_iterator, bool> P =
BestFormulae.insert(std::make_pair(Key, FIdx));
if (P.second)
continue;
Formula &Best = LU.Formulae[P.first->second];
Cost CostBest(L, SE, TTI, AMK);
Regs.clear();
CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
if (CostF.isLess(CostBest))
std::swap(F, Best);
LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
" in favor of formula ";
Best.print(dbgs()); dbgs() << '\n');
}
#ifndef NDEBUG
ChangedFormulae = true;
#endif
LU.DeleteFormula(F);
--FIdx;
--NumForms;
Any = true;
}
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
BestFormulae.clear();
}
LLVM_DEBUG(if (ChangedFormulae) {
dbgs() << "\n"
"After filtering out undesirable candidates:\n";
print_uses(dbgs());
});
}
size_t LSRInstance::EstimateSearchSpaceComplexity() const {
size_t Power = 1;
for (const LSRUse &LU : Uses) {
size_t FSize = LU.Formulae.size();
if (FSize >= ComplexityLimit) {
Power = ComplexityLimit;
break;
}
Power *= FSize;
if (Power >= ComplexityLimit)
break;
}
return Power;
}
void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
"which use a superset of registers used by other "
"formulae.\n");
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
bool Any = false;
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
Formula &F = LU.Formulae[i];
for (SmallVectorImpl<const SCEV *>::const_iterator
I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
Formula NewF = F;
NewF.BaseOffset += (uint64_t)C->getValue()->getSExtValue();
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs());
dbgs() << '\n');
LU.DeleteFormula(F);
--i;
--e;
Any = true;
break;
}
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
if (!F.BaseGV) {
Formula NewF = F;
NewF.BaseGV = GV;
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs());
dbgs() << '\n');
LU.DeleteFormula(F);
--i;
--e;
Any = true;
break;
}
}
}
}
}
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
}
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
}
void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
if (EstimateSearchSpaceComplexity() < ComplexityLimit)
return;
LLVM_DEBUG(
dbgs() << "The search space is too complex.\n"
"Narrowing the search space by assuming that uses separated "
"by a constant offset will use the same registers.\n");
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (const Formula &F : LU.Formulae) {
if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
continue;
LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
if (!LUThatHas)
continue;
if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, false,
LU.Kind, LU.AccessTy))
continue;
LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
for (LSRFixup &Fixup : LU.Fixups) {
Fixup.Offset += F.BaseOffset;
LUThatHas->pushFixup(Fixup);
LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
}
bool Any = false;
for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
Formula &F = LUThatHas->Formulae[i];
if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
LUThatHas->Kind, LUThatHas->AccessTy, F)) {
LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
LUThatHas->DeleteFormula(F);
--i;
--e;
Any = true;
}
}
if (Any)
LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
DeleteUse(LU, LUIdx);
--LUIdx;
--NumUses;
break;
}
}
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
"undesirable dedicated registers.\n");
FilterOutUndesirableDedicatedRegisters();
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
}
void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
if (EstimateSearchSpaceComplexity() < ComplexityLimit)
return;
LLVM_DEBUG(
dbgs() << "The search space is too complex.\n"
"Narrowing the search space by choosing the best Formula "
"from the Formulae with the same Scale and ScaledReg.\n");
using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;
BestFormulaeTy BestFormulae;
#ifndef NDEBUG
bool ChangedFormulae = false;
#endif
DenseSet<const SCEV *> VisitedRegs;
SmallPtrSet<const SCEV *, 16> Regs;
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
dbgs() << '\n');
auto IsBetterThan = [&](Formula &FA, Formula &FB) {
size_t FARegNum = 0;
for (const SCEV *Reg : FA.BaseRegs) {
const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
FARegNum += (NumUses - UsedByIndices.count() + 1);
}
size_t FBRegNum = 0;
for (const SCEV *Reg : FB.BaseRegs) {
const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
FBRegNum += (NumUses - UsedByIndices.count() + 1);
}
if (FARegNum != FBRegNum)
return FARegNum < FBRegNum;
Cost CostFA(L, SE, TTI, AMK);
Cost CostFB(L, SE, TTI, AMK);
Regs.clear();
CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
Regs.clear();
CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
return CostFA.isLess(CostFB);
};
bool Any = false;
for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
++FIdx) {
Formula &F = LU.Formulae[FIdx];
if (!F.ScaledReg)
continue;
auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx});
if (P.second)
continue;
Formula &Best = LU.Formulae[P.first->second];
if (IsBetterThan(F, Best))
std::swap(F, Best);
LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
" in favor of formula ";
Best.print(dbgs()); dbgs() << '\n');
#ifndef NDEBUG
ChangedFormulae = true;
#endif
LU.DeleteFormula(F);
--FIdx;
--NumForms;
Any = true;
}
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
BestFormulae.clear();
}
LLVM_DEBUG(if (ChangedFormulae) {
dbgs() << "\n"
"After filtering out undesirable candidates:\n";
print_uses(dbgs());
});
}
void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
if (AMK != TTI::AMK_PostIndexed)
return;
if (EstimateSearchSpaceComplexity() < ComplexityLimit)
return;
LLVM_DEBUG(dbgs() << "The search space is too complex.\n"
"Narrowing the search space by choosing the lowest "
"register Formula for PostInc Uses.\n");
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
if (LU.Kind != LSRUse::Address)
continue;
if (!TTI.isIndexedLoadLegal(TTI.MIM_PostInc, LU.AccessTy.getType()) &&
!TTI.isIndexedStoreLegal(TTI.MIM_PostInc, LU.AccessTy.getType()))
continue;
size_t MinRegs = std::numeric_limits<size_t>::max();
for (const Formula &F : LU.Formulae)
MinRegs = std::min(F.getNumRegs(), MinRegs);
bool Any = false;
for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
++FIdx) {
Formula &F = LU.Formulae[FIdx];
if (F.getNumRegs() > MinRegs) {
LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n");
LU.DeleteFormula(F);
--FIdx;
--NumForms;
Any = true;
}
}
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
if (EstimateSearchSpaceComplexity() < ComplexityLimit)
break;
}
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
if (EstimateSearchSpaceComplexity() < ComplexityLimit)
return;
SmallPtrSet<const SCEV *, 4> UniqRegs;
LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
DenseMap <const SCEV *, float> RegNumMap;
for (const SCEV *Reg : RegUses) {
if (UniqRegs.count(Reg))
continue;
float PNotSel = 1;
for (const LSRUse &LU : Uses) {
if (!LU.Regs.count(Reg))
continue;
float P = LU.getNotSelectedProbability(Reg);
if (P != 0.0)
PNotSel *= P;
else
UniqRegs.insert(Reg);
}
RegNumMap.insert(std::make_pair(Reg, PNotSel));
}
LLVM_DEBUG(
dbgs() << "Narrowing the search space by deleting costly formulas\n");
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
if (LU.Formulae.size() < 2)
continue;
float FMinRegNum = LU.Formulae[0].getNumRegs();
float FMinARegNum = LU.Formulae[0].getNumRegs();
size_t MinIdx = 0;
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
Formula &F = LU.Formulae[i];
float FRegNum = 0;
float FARegNum = 0;
for (const SCEV *BaseReg : F.BaseRegs) {
if (UniqRegs.count(BaseReg))
continue;
FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
if (isa<SCEVAddRecExpr>(BaseReg))
FARegNum +=
RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
}
if (const SCEV *ScaledReg = F.ScaledReg) {
if (!UniqRegs.count(ScaledReg)) {
FRegNum +=
RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
if (isa<SCEVAddRecExpr>(ScaledReg))
FARegNum +=
RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
}
}
if (FMinRegNum > FRegNum ||
(FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
FMinRegNum = FRegNum;
FMinARegNum = FARegNum;
MinIdx = i;
}
}
LLVM_DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs());
dbgs() << " with min reg num " << FMinRegNum << '\n');
if (MinIdx != 0)
std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
while (LU.Formulae.size() != 1) {
LLVM_DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs());
dbgs() << '\n');
LU.Formulae.pop_back();
}
LU.RecomputeRegs(LUIdx, RegUses);
assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula");
Formula &F = LU.Formulae[0];
LLVM_DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n');
UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
if (F.ScaledReg)
UniqRegs.insert(F.ScaledReg);
}
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
SmallPtrSet<const SCEV *, 4> Taken;
while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
const SCEV *Best = nullptr;
unsigned BestNum = 0;
for (const SCEV *Reg : RegUses) {
if (Taken.count(Reg))
continue;
if (!Best) {
Best = Reg;
BestNum = RegUses.getUsedByIndices(Reg).count();
} else {
unsigned Count = RegUses.getUsedByIndices(Reg).count();
if (Count > BestNum) {
Best = Reg;
BestNum = Count;
}
}
}
assert(Best && "Failed to find best LSRUse candidate");
LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
<< " will yield profitable reuse.\n");
Taken.insert(Best);
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
if (!LU.Regs.count(Best)) continue;
bool Any = false;
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
Formula &F = LU.Formulae[i];
if (!F.referencesReg(Best)) {
LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
LU.DeleteFormula(F);
--e;
--i;
Any = true;
assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
continue;
}
}
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
}
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
}
void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
NarrowSearchSpaceByDetectingSupersets();
NarrowSearchSpaceByCollapsingUnrolledCode();
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
if (FilterSameScaledReg)
NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
NarrowSearchSpaceByFilterPostInc();
if (LSRExpNarrow)
NarrowSearchSpaceByDeletingCostlyFormulas();
else
NarrowSearchSpaceByPickingWinnerRegs();
}
void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
SmallVectorImpl<const Formula *> &Workspace,
const Cost &CurCost,
const SmallPtrSet<const SCEV *, 16> &CurRegs,
DenseSet<const SCEV *> &VisitedRegs) const {
const LSRUse &LU = Uses[Workspace.size()];
SmallSetVector<const SCEV *, 4> ReqRegs;
for (const SCEV *S : CurRegs)
if (LU.Regs.count(S))
ReqRegs.insert(S);
SmallPtrSet<const SCEV *, 16> NewRegs;
Cost NewCost(L, SE, TTI, AMK);
for (const Formula &F : LU.Formulae) {
if (AMK != TTI::AMK_PostIndexed || LU.Kind != LSRUse::Address) {
int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
for (const SCEV *Reg : ReqRegs) {
if ((F.ScaledReg && F.ScaledReg == Reg) ||
is_contained(F.BaseRegs, Reg)) {
--NumReqRegsToFind;
if (NumReqRegsToFind == 0)
break;
}
}
if (NumReqRegsToFind != 0) {
continue;
}
}
NewCost = CurCost;
NewRegs = CurRegs;
NewCost.RateFormula(F, NewRegs, VisitedRegs, LU);
if (NewCost.isLess(SolutionCost)) {
Workspace.push_back(&F);
if (Workspace.size() != Uses.size()) {
SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
NewRegs, VisitedRegs);
if (F.getNumRegs() == 1 && Workspace.size() == 1)
VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
} else {
LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
dbgs() << ".\nRegs:\n";
for (const SCEV *S : NewRegs) dbgs()
<< "- " << *S << "\n";
dbgs() << '\n');
SolutionCost = NewCost;
Solution = Workspace;
}
Workspace.pop_back();
}
}
}
void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
SmallVector<const Formula *, 8> Workspace;
Cost SolutionCost(L, SE, TTI, AMK);
SolutionCost.Lose();
Cost CurCost(L, SE, TTI, AMK);
SmallPtrSet<const SCEV *, 16> CurRegs;
DenseSet<const SCEV *> VisitedRegs;
Workspace.reserve(Uses.size());
SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
CurRegs, VisitedRegs);
if (Solution.empty()) {
LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
return;
}
LLVM_DEBUG(dbgs() << "\n"
"The chosen solution requires ";
SolutionCost.print(dbgs()); dbgs() << ":\n";
for (size_t i = 0, e = Uses.size(); i != e; ++i) {
dbgs() << " ";
Uses[i].print(dbgs());
dbgs() << "\n"
" ";
Solution[i]->print(dbgs());
dbgs() << '\n';
});
assert(Solution.size() == Uses.size() && "Malformed solution!");
}
BasicBlock::iterator
LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs)
const {
Instruction *Tentative = &*IP;
while (true) {
bool AllDominate = true;
Instruction *BetterPos = nullptr;
if (isa<CatchSwitchInst>(Tentative))
return IP;
for (Instruction *Inst : Inputs) {
if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
AllDominate = false;
break;
}
if (Tentative->getParent() == Inst->getParent() &&
(!BetterPos || !DT.dominates(Inst, BetterPos)))
BetterPos = &*std::next(BasicBlock::iterator(Inst));
}
if (!AllDominate)
break;
if (BetterPos)
IP = BetterPos->getIterator();
else
IP = Tentative->getIterator();
const Loop *IPLoop = LI.getLoopFor(IP->getParent());
unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
BasicBlock *IDom;
for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
if (!Rung) return IP;
Rung = Rung->getIDom();
if (!Rung) return IP;
IDom = Rung->getBlock();
const Loop *IDomLoop = LI.getLoopFor(IDom);
unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
if (IDomDepth <= IPLoopDepth &&
(IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
break;
}
Tentative = IDom->getTerminator();
}
return IP;
}
BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand(
BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const {
SmallVector<Instruction *, 4> Inputs;
if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
Inputs.push_back(I);
if (LU.Kind == LSRUse::ICmpZero)
if (Instruction *I =
dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
Inputs.push_back(I);
if (LF.PostIncLoops.count(L)) {
if (LF.isUseFullyOutsideLoop(L))
Inputs.push_back(L->getLoopLatch()->getTerminator());
else
Inputs.push_back(IVIncInsertPos);
}
for (const Loop *PIL : LF.PostIncLoops) {
if (PIL == L) continue;
SmallVector<BasicBlock *, 4> ExitingBlocks;
PIL->getExitingBlocks(ExitingBlocks);
if (!ExitingBlocks.empty()) {
BasicBlock *BB = ExitingBlocks[0];
for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
Inputs.push_back(BB->getTerminator());
}
}
assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
&& !isa<DbgInfoIntrinsic>(LowestIP) &&
"Insertion point must be a normal instruction");
BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
while (isa<PHINode>(IP)) ++IP;
while (IP->isEHPad()) ++IP;
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
++IP;
return IP;
}
Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
const Formula &F, BasicBlock::iterator IP,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
if (LU.RigidFormula)
return LF.OperandValToReplace;
IP = AdjustInsertPositionForExpand(IP, LF, LU);
Rewriter.setInsertPoint(&*IP);
Rewriter.setPostInc(LF.PostIncLoops);
Type *OpTy = LF.OperandValToReplace->getType();
Type *Ty = F.getType();
if (!Ty)
Ty = OpTy;
else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
Ty = OpTy;
Type *IntTy = SE.getEffectiveSCEVType(Ty);
SmallVector<const SCEV *, 8> Ops;
for (const SCEV *Reg : F.BaseRegs) {
assert(!Reg->isZero() && "Zero allocated in a base register!");
Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE);
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
}
Value *ICmpScaledV = nullptr;
if (F.Scale != 0) {
const SCEV *ScaledS = F.ScaledReg;
PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE);
if (LU.Kind == LSRUse::ICmpZero) {
if (F.Scale == 1)
Ops.push_back(
SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));
else {
assert(F.Scale == -1 &&
"The only scale supported by ICmpZero uses is -1!");
ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);
}
} else {
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
isAMCompletelyFolded(TTI, LU, F)) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));
if (F.Scale != 1)
ScaledS =
SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
Ops.push_back(ScaledS);
}
}
if (F.BaseGV) {
if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), IntTy);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
Ops.push_back(SE.getUnknown(F.BaseGV));
}
if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
if (Offset != 0) {
if (LU.Kind == LSRUse::ICmpZero) {
if (!ICmpScaledV)
ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
else {
Ops.push_back(SE.getUnknown(ICmpScaledV));
ICmpScaledV = ConstantInt::get(IntTy, Offset);
}
} else {
Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
}
}
int64_t UnfoldedOffset = F.UnfoldedOffset;
if (UnfoldedOffset != 0) {
Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
UnfoldedOffset)));
}
const SCEV *FullS = Ops.empty() ?
SE.getConstant(IntTy, 0) :
SE.getAddExpr(Ops);
Value *FullV = Rewriter.expandCodeFor(FullS, Ty);
Rewriter.clearPostInc();
if (LU.Kind == LSRUse::ICmpZero) {
ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
if (auto *OperandIsInstr = dyn_cast<Instruction>(CI->getOperand(1)))
DeadInsts.emplace_back(OperandIsInstr);
assert(!F.BaseGV && "ICmp does not support folding a global value and "
"a scale at the same time!");
if (F.Scale == -1) {
if (ICmpScaledV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
OpTy, false),
ICmpScaledV, OpTy, "tmp", CI);
ICmpScaledV = Cast;
}
CI->setOperand(1, ICmpScaledV);
} else {
assert((F.Scale == 0 || F.Scale == 1) &&
"ICmp does not support folding a global value and "
"a scale at the same time!");
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
-(uint64_t)Offset);
if (C->getType() != OpTy)
C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
OpTy, false),
C, OpTy);
CI->setOperand(1, C);
}
}
return FullV;
}
void LSRInstance::RewriteForPHI(
PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
DenseMap<BasicBlock *, Value *> Inserted;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
bool needUpdateFixups = false;
BasicBlock *BB = PN->getIncomingBlock(i);
if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
!isa<IndirectBrInst>(BB->getTerminator()) &&
!isa<CatchSwitchInst>(BB->getTerminator())) {
BasicBlock *Parent = PN->getParent();
Loop *PNLoop = LI.getLoopFor(Parent);
if (!PNLoop || Parent != PNLoop->getHeader()) {
BasicBlock *NewBB = nullptr;
if (!Parent->isLandingPad()) {
NewBB =
SplitCriticalEdge(BB, Parent,
CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
.setMergeIdenticalEdges()
.setKeepOneInputPHIs());
} else {
SmallVector<BasicBlock*, 2> NewBBs;
SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
NewBB = NewBBs[0];
}
if (NewBB) {
if (L->contains(BB) && !L->contains(PN))
NewBB->moveBefore(PN->getParent());
e = PN->getNumIncomingValues();
BB = NewBB;
i = PN->getBasicBlockIndex(BB);
needUpdateFixups = true;
}
}
}
std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
if (!Pair.second)
PN->setIncomingValue(i, Pair.first->second);
else {
Value *FullV =
Expand(LU, LF, F, BB->getTerminator()->getIterator(), DeadInsts);
Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy)
FullV =
CastInst::Create(CastInst::getCastOpcode(FullV, false,
OpTy, false),
FullV, LF.OperandValToReplace->getType(),
"tmp", BB->getTerminator());
PN->setIncomingValue(i, FullV);
Pair.first->second = FullV;
}
if (needUpdateFixups) {
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
for (LSRFixup &Fixup : Uses[LUIdx].Fixups)
if (Fixup.UserInst == PN) {
bool foundInOriginalPHI = false;
for (const auto &val : PN->incoming_values())
if (val == Fixup.OperandValToReplace) {
foundInOriginalPHI = true;
break;
}
if (foundInOriginalPHI)
continue;
for (const auto &Block : PN->blocks())
for (BasicBlock::iterator I = Block->begin(); isa<PHINode>(I);
++I) {
PHINode *NewPN = cast<PHINode>(I);
for (const auto &val : NewPN->incoming_values())
if (val == Fixup.OperandValToReplace)
Fixup.UserInst = NewPN;
}
}
}
}
}
void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
const Formula &F,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
RewriteForPHI(PN, LU, LF, F, DeadInsts);
} else {
Value *FullV = Expand(LU, LF, F, LF.UserInst->getIterator(), DeadInsts);
Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
FullV, OpTy, "tmp", LF.UserInst);
FullV = Cast;
}
if (LU.Kind == LSRUse::ICmpZero)
LF.UserInst->setOperand(0, FullV);
else
LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
}
if (auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))
DeadInsts.emplace_back(OperandIsInstr);
}
void LSRInstance::ImplementSolution(
const SmallVectorImpl<const Formula *> &Solution) {
SmallVector<WeakTrackingVH, 16> DeadInsts;
Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
for (const IVChain &Chain : IVChainVec) {
if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
Rewriter.setChainedPhi(PN);
}
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts);
Changed = true;
}
for (const IVChain &Chain : IVChainVec) {
GenerateIVChain(Chain, DeadInsts);
Changed = true;
}
for (const WeakVH &IV : Rewriter.getInsertedIVs())
if (IV && dyn_cast<Instruction>(&*IV)->getParent())
ScalarEvolutionIVs.push_back(IV);
Rewriter.clear();
Changed |= RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts,
&TLI, MSSAU);
for (PHINode &PN : L->getHeader()->phis()) {
BinaryOperator *BO = nullptr;
Value *Start = nullptr, *Step = nullptr;
if (!matchSimpleRecurrence(&PN, BO, Start, Step))
continue;
switch (BO->getOpcode()) {
case Instruction::Sub:
if (BO->getOperand(0) != &PN)
continue;
break;
case Instruction::Add:
break;
default:
continue;
};
if (!isa<Constant>(Step))
continue;
if (BO->getParent() == IVIncInsertPos->getParent())
continue;
if (!llvm::all_of(BO->uses(),
[&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
continue;
BO->moveBefore(IVIncInsertPos);
Changed = true;
}
}
LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
DominatorTree &DT, LoopInfo &LI,
const TargetTransformInfo &TTI, AssumptionCache &AC,
TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
: IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L),
MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0
? PreferredAddresingMode
: TTI.getPreferredAddressingMode(L, &SE)),
Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", false) {
if (!L->isLoopSimplifyForm())
return;
if (IU.empty()) return;
unsigned NumUsers = 0;
for (const IVStrideUse &U : IU) {
if (++NumUsers > MaxIVUsers) {
(void)U;
LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U
<< "\n");
return;
}
if (auto *PN = dyn_cast<PHINode>(U.getUser())) {
auto *FirstNonPHI = PN->getParent()->getFirstNonPHI();
if (isa<FuncletPadInst>(FirstNonPHI) ||
isa<CatchSwitchInst>(FirstNonPHI))
for (BasicBlock *PredBB : PN->blocks())
if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
return;
}
}
LLVM_DEBUG(dbgs() << "\nLSR on loop ";
L->getHeader()->printAsOperand(dbgs(), false);
dbgs() << ":\n");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
Rewriter.disableCanonicalMode();
Rewriter.enableLSRMode();
OptimizeShadowIV();
OptimizeLoopTermCond();
if (IU.empty()) return;
if (!L->isInnermost()) {
LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
return;
}
if (TTI.isNumRegsMajorCostOfLSR() || StressIVChain)
CollectChains();
CollectInterestingTypesAndFactors();
CollectFixupsAndInitialFormulae();
CollectLoopInvariantFixupsAndFormulae();
if (Uses.empty())
return;
LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
print_uses(dbgs()));
GenerateAllReuseFormulae();
FilterOutUndesirableDedicatedRegisters();
NarrowSearchSpaceUsingHeuristics();
SmallVector<const Formula *, 8> Solution;
Solve(Solution);
Factors.clear();
Types.clear();
RegUses.clear();
if (Solution.empty())
return;
#ifndef NDEBUG
for (const LSRUse &LU : Uses) {
for (const Formula &F : LU.Formulae)
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
F) && "Illegal formula generated!");
};
#endif
ImplementSolution(Solution);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
if (Factors.empty() && Types.empty()) return;
OS << "LSR has identified the following interesting factors and types: ";
bool First = true;
for (int64_t Factor : Factors) {
if (!First) OS << ", ";
First = false;
OS << '*' << Factor;
}
for (Type *Ty : Types) {
if (!First) OS << ", ";
First = false;
OS << '(' << *Ty << ')';
}
OS << '\n';
}
void LSRInstance::print_fixups(raw_ostream &OS) const {
OS << "LSR is examining the following fixup sites:\n";
for (const LSRUse &LU : Uses)
for (const LSRFixup &LF : LU.Fixups) {
dbgs() << " ";
LF.print(OS);
OS << '\n';
}
}
void LSRInstance::print_uses(raw_ostream &OS) const {
OS << "LSR is examining the following uses:\n";
for (const LSRUse &LU : Uses) {
dbgs() << " ";
LU.print(OS);
OS << '\n';
for (const Formula &F : LU.Formulae) {
OS << " ";
F.print(OS);
OS << '\n';
}
}
}
void LSRInstance::print(raw_ostream &OS) const {
print_factors_and_types(OS);
print_fixups(OS);
print_uses(OS);
}
LLVM_DUMP_METHOD void LSRInstance::dump() const {
print(errs()); errs() << '\n';
}
#endif
namespace {
class LoopStrengthReduce : public LoopPass {
public:
static char ID;
LoopStrengthReduce();
private:
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
};
}
LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
}
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreservedID(LoopSimplifyID);
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<IVUsersWrapperPass>();
AU.addPreserved<IVUsersWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
}
namespace {
static iterator_range<llvm::DIExpression::expr_op_iterator>
ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
llvm::DIExpression::expr_op_iterator Begin =
llvm::DIExpression::expr_op_iterator(Expr.begin());
llvm::DIExpression::expr_op_iterator End =
llvm::DIExpression::expr_op_iterator(Expr.end());
return {Begin, End};
}
struct SCEVDbgValueBuilder {
SCEVDbgValueBuilder() = default;
SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); }
void clone(const SCEVDbgValueBuilder &Base) {
LocationOps = Base.LocationOps;
Expr = Base.Expr;
}
void clear() {
LocationOps.clear();
Expr.clear();
}
SmallVector<uint64_t, 6> Expr;
SmallVector<Value *, 2> LocationOps;
void pushOperator(uint64_t Op) { Expr.push_back(Op); }
void pushUInt(uint64_t Operand) { Expr.push_back(Operand); }
void pushLocation(llvm::Value *V) {
Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg);
auto *It = std::find(LocationOps.begin(), LocationOps.end(), V);
unsigned ArgIndex = 0;
if (It != LocationOps.end()) {
ArgIndex = std::distance(LocationOps.begin(), It);
} else {
ArgIndex = LocationOps.size();
LocationOps.push_back(V);
}
Expr.push_back(ArgIndex);
}
void pushValue(const SCEVUnknown *U) {
llvm::Value *V = cast<SCEVUnknown>(U)->getValue();
pushLocation(V);
}
bool pushConst(const SCEVConstant *C) {
if (C->getAPInt().getMinSignedBits() > 64)
return false;
Expr.push_back(llvm::dwarf::DW_OP_consts);
Expr.push_back(C->getAPInt().getSExtValue());
return true;
}
iterator_range<llvm::DIExpression::expr_op_iterator> expr_ops() {
return ToDwarfOpIter(Expr);
}
bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr,
uint64_t DwarfOp) {
assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
"Expected arithmetic SCEV type");
bool Success = true;
unsigned EmitOperator = 0;
for (auto &Op : CommExpr->operands()) {
Success &= pushSCEV(Op);
if (EmitOperator >= 1)
pushOperator(DwarfOp);
++EmitOperator;
}
return Success;
}
bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) {
const llvm::SCEV *Inner = C->getOperand(0);
const llvm::Type *Type = C->getType();
uint64_t ToWidth = Type->getIntegerBitWidth();
bool Success = pushSCEV(Inner);
uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth,
IsSigned ? llvm::dwarf::DW_ATE_signed
: llvm::dwarf::DW_ATE_unsigned};
for (const auto &Op : CastOps)
pushOperator(Op);
return Success;
}
bool pushSCEV(const llvm::SCEV *S) {
bool Success = true;
if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
Success &= pushConst(StartInt);
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (!U->getValue())
return false;
pushLocation(U->getValue());
} else if (const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
} else if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
Success &= pushSCEV(UDiv->getLHS());
Success &= pushSCEV(UDiv->getRHS());
pushOperator(llvm::dwarf::DW_OP_div);
} else if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
"Unexpected cast type in SCEV.");
Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
} else if (const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
} else if (isa<SCEVAddRecExpr>(S)) {
return false;
} else {
return false;
}
return Success;
}
bool isIdentityFunction(uint64_t Op, const SCEV *S) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
if (C->getAPInt().getMinSignedBits() > 64)
return false;
int64_t I = C->getAPInt().getSExtValue();
switch (Op) {
case llvm::dwarf::DW_OP_plus:
case llvm::dwarf::DW_OP_minus:
return I == 0;
case llvm::dwarf::DW_OP_mul:
case llvm::dwarf::DW_OP_div:
return I == 1;
}
}
return false;
}
bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
assert(SAR.isAffine() && "Expected affine SCEV");
if (isa<SCEVAddRecExpr>(SAR.getStart()))
return false;
const SCEV *Start = SAR.getStart();
const SCEV *Stride = SAR.getStepRecurrence(SE);
if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
if (!pushSCEV(Stride))
return false;
pushOperator(llvm::dwarf::DW_OP_mul);
}
if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
if (!pushSCEV(Start))
return false;
pushOperator(llvm::dwarf::DW_OP_plus);
}
return true;
}
void createOffsetExpr(int64_t Offset, Value *OffsetValue) {
pushLocation(OffsetValue);
DIExpression::appendOffset(Expr, Offset);
LLVM_DEBUG(
dbgs() << "scev-salvage: Generated IV offset expression. Offset: "
<< std::to_string(Offset) << "\n");
}
bool createIterCountExpr(const SCEV *S,
const SCEVDbgValueBuilder &IterationCount,
ScalarEvolution &SE) {
if (!isa<SCEVAddRecExpr>(S))
return false;
LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S
<< '\n');
const auto *Rec = cast<SCEVAddRecExpr>(S);
if (!Rec->isAffine())
return false;
if (S->getExpressionSize() > MaxSCEVSalvageExpressionSize)
return false;
clone(IterationCount);
if (!SCEVToValueExpr(*Rec, SE))
return false;
return true;
}
bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR,
ScalarEvolution &SE) {
assert(SAR.isAffine() && "Expected affine SCEV");
if (isa<SCEVAddRecExpr>(SAR.getStart())) {
LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV. Unsupported nested AddRec: "
<< SAR << '\n');
return false;
}
const SCEV *Start = SAR.getStart();
const SCEV *Stride = SAR.getStepRecurrence(SE);
if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
if (!pushSCEV(Start))
return false;
pushOperator(llvm::dwarf::DW_OP_minus);
}
if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
if (!pushSCEV(Stride))
return false;
pushOperator(llvm::dwarf::DW_OP_div);
}
return true;
}
void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
SmallVectorImpl<Value *> &DestLocations) {
assert(!DestLocations.empty() &&
"Expected the locations vector to contain the IV");
assert(!LocationOps.empty() &&
"Expected the location ops to contain the IV.");
SmallVector<uint64_t, 2> DestIndexMap;
for (const auto &Op : LocationOps) {
auto It = find(DestLocations, Op);
if (It != DestLocations.end()) {
DestIndexMap.push_back(std::distance(DestLocations.begin(), It));
continue;
}
DestIndexMap.push_back(DestLocations.size());
DestLocations.push_back(Op);
}
for (const auto &Op : expr_ops()) {
if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
Op.appendToVector(DestExpr);
continue;
}
DestExpr.push_back(dwarf::DW_OP_LLVM_arg);
uint64_t NewIndex = DestIndexMap[Op.getArg(0)];
DestExpr.push_back(NewIndex);
}
}
};
struct DVIRecoveryRec {
DVIRecoveryRec(DbgValueInst *DbgValue)
: DVI(DbgValue), Expr(DbgValue->getExpression()),
HadLocationArgList(false) {}
DbgValueInst *DVI;
DIExpression *Expr;
bool HadLocationArgList;
SmallVector<WeakVH, 2> LocationOps;
SmallVector<const llvm::SCEV *, 2> SCEVs;
SmallVector<std::unique_ptr<SCEVDbgValueBuilder>, 2> RecoveryExprs;
void clear() {
for (auto &RE : RecoveryExprs)
RE.reset();
RecoveryExprs.clear();
}
~DVIRecoveryRec() { clear(); }
};
}
static unsigned numLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) {
auto expr_ops = ToDwarfOpIter(Expr);
unsigned Count = 0;
for (auto Op : expr_ops)
if (Op.getOp() == dwarf::DW_OP_LLVM_arg)
Count++;
return Count;
}
static void updateDVIWithLocation(DbgValueInst &DVI, Value *Location,
SmallVectorImpl<uint64_t> &Ops) {
assert(
numLLVMArgOps(Ops) == 0 &&
"Expected expression that does not contain any DW_OP_llvm_arg operands.");
DVI.setRawLocation(ValueAsMetadata::get(Location));
DVI.setExpression(DIExpression::get(DVI.getContext(), Ops));
}
static void updateDVIWithLocations(DbgValueInst &DVI,
SmallVectorImpl<Value *> &Locations,
SmallVectorImpl<uint64_t> &Ops) {
assert(numLLVMArgOps(Ops) != 0 &&
"Expected expression that references DIArglist locations using "
"DW_OP_llvm_arg operands.");
SmallVector<ValueAsMetadata *, 3> MetadataLocs;
for (Value *V : Locations)
MetadataLocs.push_back(ValueAsMetadata::get(V));
auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
DVI.setRawLocation(llvm::DIArgList::get(DVI.getContext(), ValArrayRef));
DVI.setExpression(DIExpression::get(DVI.getContext(), Ops));
}
static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec,
SmallVectorImpl<Value *> &NewLocationOps,
SmallVectorImpl<uint64_t> &NewExpr) {
unsigned NumLLVMArgs = numLLVMArgOps(NewExpr);
if (NumLLVMArgs == 0) {
updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], NewExpr);
} else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) {
assert(NewExpr[1] == 0 &&
"Lone LLVM_arg in a DIExpression should refer to location-op 0.");
llvm::SmallVector<uint64_t, 6> ShortenedOps(llvm::drop_begin(NewExpr, 2));
updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], ShortenedOps);
} else {
updateDVIWithLocations(*DVIRec.DVI, NewLocationOps, NewExpr);
}
DIExpression *SalvageExpr = DVIRec.DVI->getExpression();
if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) {
SalvageExpr = DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value});
DVIRec.DVI->setExpression(SalvageExpr);
}
}
static Value *getValueOrUndef(WeakVH &VH, LLVMContext &C) {
return (VH) ? VH : UndefValue::get(llvm::Type::getInt8Ty(C));
}
static void restorePreTransformState(DVIRecoveryRec &DVIRec) {
LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n"
<< "scev-salvage: post-LSR: " << *DVIRec.DVI << '\n');
assert(DVIRec.Expr && "Expected an expression");
DVIRec.DVI->setExpression(DVIRec.Expr);
if (!DVIRec.HadLocationArgList) {
assert(DVIRec.LocationOps.size() == 1 &&
"Unexpected number of location ops.");
Value *CachedValue =
getValueOrUndef(DVIRec.LocationOps[0], DVIRec.DVI->getContext());
DVIRec.DVI->setRawLocation(ValueAsMetadata::get(CachedValue));
} else {
SmallVector<ValueAsMetadata *, 3> MetadataLocs;
for (WeakVH VH : DVIRec.LocationOps) {
Value *CachedValue = getValueOrUndef(VH, DVIRec.DVI->getContext());
MetadataLocs.push_back(ValueAsMetadata::get(CachedValue));
}
auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
DVIRec.DVI->setRawLocation(
llvm::DIArgList::get(DVIRec.DVI->getContext(), ValArrayRef));
}
LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DVIRec.DVI << '\n');
}
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec,
const SCEV *SCEVInductionVar,
SCEVDbgValueBuilder IterCountExpr) {
if (!DVIRec.DVI->isUndef())
return false;
restorePreTransformState(DVIRec);
SmallVector<int64_t, 2> LocationOpIndexMap;
LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1);
SmallVector<Value *, 2> NewLocationOps;
NewLocationOps.push_back(LSRInductionVar);
for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
WeakVH VH = DVIRec.LocationOps[i];
if (VH && !isa<UndefValue>(VH)) {
NewLocationOps.push_back(VH);
LocationOpIndexMap[i] = NewLocationOps.size() - 1;
LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i
<< " now at index " << LocationOpIndexMap[i] << "\n");
continue;
}
if (SE.containsErasedValue(DVIRec.SCEVs[i]) ||
SE.containsUndefs(DVIRec.SCEVs[i])) {
LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i
<< " refers to a location that is now undef or erased. "
"Salvage abandoned.\n");
return false;
}
LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i
<< " with SCEV: " << *DVIRec.SCEVs[i] << "\n");
DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
if (Optional<APInt> Offset =
SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) {
if (Offset.value().getMinSignedBits() <= 64)
SalvageExpr->createOffsetExpr(Offset.value().getSExtValue(),
LSRInductionVar);
} else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
SE))
return false;
}
SmallVector<uint64_t, 3> NewExpr;
if (DVIRec.Expr->getNumElements() == 0) {
assert(DVIRec.RecoveryExprs.size() == 1 &&
"Expected only a single recovery expression for an empty "
"DIExpression.");
assert(DVIRec.RecoveryExprs[0] &&
"Expected a SCEVDbgSalvageBuilder for location 0");
SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get();
B->appendToVectors(NewExpr, NewLocationOps);
}
for (const auto &Op : DVIRec.Expr->expr_ops()) {
if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
Op.appendToVector(NewExpr);
continue;
}
uint64_t LocationArgIndex = Op.getArg(0);
SCEVDbgValueBuilder *DbgBuilder =
DVIRec.RecoveryExprs[LocationArgIndex].get();
if (!DbgBuilder) {
NewExpr.push_back(dwarf::DW_OP_LLVM_arg);
assert(LocationOpIndexMap[Op.getArg(0)] != -1 &&
"Expected a positive index for the location-op position.");
NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]);
continue;
}
DbgBuilder->appendToVectors(NewExpr, NewLocationOps);
}
UpdateDbgValueInst(DVIRec, NewLocationOps, NewExpr);
LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " << *DVIRec.DVI << "\n");
return true;
}
static void
DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
llvm::PHINode *LSRInductionVar,
SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
if (DVIToUpdate.empty())
return;
const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);
assert(SCEVInductionVar &&
"Anticipated a SCEV for the post-LSR induction variable");
if (const SCEVAddRecExpr *IVAddRec =
dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
if (!IVAddRec->isAffine())
return;
if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize)
return;
SCEVDbgValueBuilder IterCountExpr;
IterCountExpr.pushLocation(LSRInductionVar);
if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
return;
LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
<< '\n');
for (auto &DVIRec : DVIToUpdate) {
SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
IterCountExpr);
}
}
}
static void DbgGatherSalvagableDVI(
Loop *L, ScalarEvolution &SE,
SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) {
for (auto &B : L->getBlocks()) {
for (auto &I : *B) {
auto DVI = dyn_cast<DbgValueInst>(&I);
if (!DVI)
continue;
if (DVI->isUndef())
continue;
const auto &HasTranslatableLocationOps =
[&](const DbgValueInst *DVI) -> bool {
for (const auto LocOp : DVI->location_ops()) {
if (!LocOp)
return false;
if (!SE.isSCEVable(LocOp->getType()))
return false;
const SCEV *S = SE.getSCEV(LocOp);
if (SE.containsUndefs(S))
return false;
}
return true;
};
if (!HasTranslatableLocationOps(DVI))
continue;
std::unique_ptr<DVIRecoveryRec> NewRec =
std::make_unique<DVIRecoveryRec>(DVI);
NewRec->RecoveryExprs.resize(DVI->getNumVariableLocationOps());
for (const auto LocOp : DVI->location_ops()) {
NewRec->SCEVs.push_back(SE.getSCEV(LocOp));
NewRec->LocationOps.push_back(LocOp);
NewRec->HadLocationArgList = DVI->hasArgList();
}
SalvageableDVISCEVs.push_back(std::move(NewRec));
DVIHandles.insert(DVI);
}
}
}
static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
const LSRInstance &LSR) {
auto IsSuitableIV = [&](PHINode *P) {
if (!SE.isSCEVable(P->getType()))
return false;
if (const SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(P)))
return Rec->isAffine() && !SE.containsUndefs(SE.getSCEV(P));
return false;
};
for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {
if (!IV)
continue;
PHINode *P = cast<PHINode>(&*IV);
if (IsSuitableIV(P))
return P;
}
for (PHINode &P : L.getHeader()->phis()) {
if (IsSuitableIV(&P))
return &P;
}
return nullptr;
}
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
DominatorTree &DT, LoopInfo &LI,
const TargetTransformInfo &TTI,
AssumptionCache &AC, TargetLibraryInfo &TLI,
MemorySSA *MSSA) {
SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> SalvageableDVIRecords;
SmallSet<AssertingVH<DbgValueInst>, 2> DVIHandles;
DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords, DVIHandles);
bool Changed = false;
std::unique_ptr<MemorySSAUpdater> MSSAU;
if (MSSA)
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
const LSRInstance &Reducer =
LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get());
Changed |= Reducer.getChanged();
Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
if (EnablePhiElim && L->isLoopSimplifyForm()) {
SmallVector<WeakTrackingVH, 16> DeadInsts;
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
SCEVExpander Rewriter(SE, DL, "lsr", false);
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);
if (numFolded) {
Changed = true;
RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
MSSAU.get());
DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
}
}
if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
SmallVector<WeakTrackingVH, 16> DeadInsts;
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
SCEVExpander Rewriter(SE, DL, "lsr", false);
int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT,
UnusedIndVarInLoop, DeadInsts);
if (Rewrites) {
Changed = true;
RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
MSSAU.get());
DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
}
}
if (SalvageableDVIRecords.empty())
return Changed;
for (auto &L : LI) {
if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer))
DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVIRecords);
else {
LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV "
"could not be identified.\n");
}
}
for (auto &Rec : SalvageableDVIRecords)
Rec->clear();
SalvageableDVIRecords.clear();
DVIHandles.clear();
return Changed;
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & ) {
if (skipLoop(L))
return false;
auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*L->getHeader()->getParent());
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
*L->getHeader()->getParent());
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
*L->getHeader()->getParent());
auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
MemorySSA *MSSA = nullptr;
if (MSSAAnalysis)
MSSA = &MSSAAnalysis->getMSSA();
return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA);
}
PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE,
AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI, AR.MSSA))
return PreservedAnalyses::all();
auto PA = getLoopPassPreservedAnalyses();
if (AR.MSSA)
PA.preserve<MemorySSAAnalysis>();
return PA;
}
char LoopStrengthReduce::ID = 0;
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
Pass *llvm::createLoopStrengthReducePass() { return new LoopStrengthReduce(); }