#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
using namespace llvm;
using namespace PatternMatch;
#define LV_NAME "loop-vectorize"
#define DEBUG_TYPE LV_NAME
static cl::opt<bool>
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
cl::desc("Enable if-conversion during vectorization."));
namespace llvm {
cl::opt<bool>
HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
cl::desc("Allow enabling loop hints to reorder "
"FP operations during vectorization."));
}
static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
"vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
cl::desc("The maximum number of SCEV checks allowed."));
static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
"pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
cl::desc("The maximum number of SCEV checks allowed with a "
"vectorize(enable) pragma"));
static cl::opt<LoopVectorizeHints::ScalableForceKind>
ForceScalableVectorization(
"scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
cl::Hidden,
cl::desc("Control whether the compiler can use scalable vectors to "
"vectorize a loop"),
cl::values(
clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
"Scalable vectorization is disabled."),
clEnumValN(
LoopVectorizeHints::SK_PreferScalable, "preferred",
"Scalable vectorization is available and favored when the "
"cost is inconclusive."),
clEnumValN(
LoopVectorizeHints::SK_PreferScalable, "on",
"Scalable vectorization is available and favored when the "
"cost is inconclusive.")));
static const unsigned MaxInterleaveFactor = 16;
namespace llvm {
bool LoopVectorizeHints::Hint::validate(unsigned Val) {
switch (Kind) {
case HK_WIDTH:
return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
case HK_INTERLEAVE:
return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
case HK_FORCE:
return (Val <= 1);
case HK_ISVECTORIZED:
case HK_PREDICATE:
case HK_SCALABLE:
return (Val == 0 || Val == 1);
}
return false;
}
LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
bool InterleaveOnlyWhenForced,
OptimizationRemarkEmitter &ORE,
const TargetTransformInfo *TTI)
: Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
Force("vectorize.enable", FK_Undefined, HK_FORCE),
IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
TheLoop(L), ORE(ORE) {
getHintsFromMetadata();
if (VectorizerParams::isInterleaveForced())
Interleave.Value = VectorizerParams::VectorizationInterleave;
if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified) {
if (TTI)
Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
: SK_FixedWidthOnly;
if (Width.Value)
Scalable.Value = SK_FixedWidthOnly;
}
if (ForceScalableVectorization.getValue() !=
LoopVectorizeHints::SK_Unspecified)
Scalable.Value = ForceScalableVectorization.getValue();
if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified)
Scalable.Value = SK_FixedWidthOnly;
if (IsVectorized.Value != 1)
IsVectorized.Value =
getWidth() == ElementCount::getFixed(1) && getInterleave() == 1;
LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
<< "LV: Interleaving disabled by the pass manager\n");
}
void LoopVectorizeHints::setAlreadyVectorized() {
LLVMContext &Context = TheLoop->getHeader()->getContext();
MDNode *IsVectorizedMD = MDNode::get(
Context,
{MDString::get(Context, "llvm.loop.isvectorized"),
ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
MDNode *LoopID = TheLoop->getLoopID();
MDNode *NewLoopID =
makePostTransformationMetadata(Context, LoopID,
{Twine(Prefix(), "vectorize.").str(),
Twine(Prefix(), "interleave.").str()},
{IsVectorizedMD});
TheLoop->setLoopID(NewLoopID);
IsVectorized.Value = 1;
}
bool LoopVectorizeHints::allowVectorization(
Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
if (getForce() == LoopVectorizeHints::FK_Disabled) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
emitRemarkWithHints();
return false;
}
if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
emitRemarkWithHints();
return false;
}
if (getIsVectorized() == 1) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
ORE.emit([&]() {
return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
"AllDisabled", L->getStartLoc(),
L->getHeader())
<< "loop not vectorized: vectorization and interleaving are "
"explicitly disabled, or the loop has already been "
"vectorized";
});
return false;
}
return true;
}
void LoopVectorizeHints::emitRemarkWithHints() const {
using namespace ore;
ORE.emit([&]() {
if (Force.Value == LoopVectorizeHints::FK_Disabled)
return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
TheLoop->getStartLoc(),
TheLoop->getHeader())
<< "loop not vectorized: vectorization is explicitly disabled";
else {
OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
TheLoop->getStartLoc(), TheLoop->getHeader());
R << "loop not vectorized";
if (Force.Value == LoopVectorizeHints::FK_Enabled) {
R << " (Force=" << NV("Force", true);
if (Width.Value != 0)
R << ", Vector Width=" << NV("VectorWidth", getWidth());
if (getInterleave() != 0)
R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
R << ")";
}
return R;
}
});
}
const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
if (getWidth() == ElementCount::getFixed(1))
return LV_NAME;
if (getForce() == LoopVectorizeHints::FK_Disabled)
return LV_NAME;
if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero())
return LV_NAME;
return OptimizationRemarkAnalysis::AlwaysPrint;
}
bool LoopVectorizeHints::allowReordering() const {
ElementCount EC = getWidth();
return HintsAllowReordering &&
(getForce() == LoopVectorizeHints::FK_Enabled ||
EC.getKnownMinValue() > 1);
}
void LoopVectorizeHints::getHintsFromMetadata() {
MDNode *LoopID = TheLoop->getLoopID();
if (!LoopID)
return;
assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
const MDString *S = nullptr;
SmallVector<Metadata *, 4> Args;
if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
if (!MD || MD->getNumOperands() == 0)
continue;
S = dyn_cast<MDString>(MD->getOperand(0));
for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
Args.push_back(MD->getOperand(i));
} else {
S = dyn_cast<MDString>(LoopID->getOperand(i));
assert(Args.size() == 0 && "too many arguments for MDString");
}
if (!S)
continue;
StringRef Name = S->getString();
if (Args.size() == 1)
setHint(Name, Args[0]);
}
}
void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
if (!Name.startswith(Prefix()))
return;
Name = Name.substr(Prefix().size(), StringRef::npos);
const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
if (!C)
return;
unsigned Val = C->getZExtValue();
Hint *Hints[] = {&Width, &Interleave, &Force,
&IsVectorized, &Predicate, &Scalable};
for (auto H : Hints) {
if (Name == H->Name) {
if (H->validate(Val))
H->Value = Val;
else
LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
break;
}
}
}
static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
if (Lp == OuterLp)
return true;
assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
PHINode *IV = Lp->getCanonicalInductionVariable();
if (!IV) {
LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
return false;
}
BasicBlock *Latch = Lp->getLoopLatch();
auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
if (!LatchBr || LatchBr->isUnconditional()) {
LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
return false;
}
auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
if (!LatchCmp) {
LLVM_DEBUG(
dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
return false;
}
Value *CondOp0 = LatchCmp->getOperand(0);
Value *CondOp1 = LatchCmp->getOperand(1);
Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
!(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
return false;
}
return true;
}
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
if (!isUniformLoop(Lp, OuterLp))
return false;
for (Loop *SubLp : *Lp)
if (!isUniformLoopNest(SubLp, OuterLp))
return false;
return true;
}
static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
if (Ty->isPointerTy())
return DL.getIntPtrType(Ty);
if (Ty->getScalarSizeInBits() < 32)
return Type::getInt32Ty(Ty->getContext());
return Ty;
}
static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
Ty0 = convertPointerToIntegerType(DL, Ty0);
Ty1 = convertPointerToIntegerType(DL, Ty1);
if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
return Ty0;
return Ty1;
}
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
SmallPtrSetImpl<Value *> &AllowedExit) {
if (!AllowedExit.count(Inst))
for (User *U : Inst->users()) {
Instruction *UI = cast<Instruction>(U);
if (!TheLoop->contains(UI)) {
LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
return true;
}
}
return false;
}
static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A,
StoreInst *B) {
if (A == B)
return true;
Value *APtr = A->getPointerOperand();
Value *BPtr = B->getPointerOperand();
if (APtr == BPtr)
return true;
if (SE->getSCEV(APtr) == SE->getSCEV(BPtr))
return true;
return false;
}
int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
Value *Ptr) const {
const ValueToValueMap &Strides =
getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
Function *F = TheLoop->getHeader()->getParent();
bool OptForSize = F->hasOptSize() ||
llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
PGSOQueryType::IRPass);
bool CanAddPredicate = !OptForSize;
int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
CanAddPredicate, false);
if (Stride == 1 || Stride == -1)
return Stride;
return 0;
}
bool LoopVectorizationLegality::isUniform(Value *V) {
return LAI->isUniform(V);
}
bool LoopVectorizationLegality::canVectorizeOuterLoop() {
assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
bool Result = true;
bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
for (BasicBlock *BB : TheLoop->blocks()) {
auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
if (!Br) {
reportVectorizationFailure("Unsupported basic block terminator",
"loop control flow is not understood by vectorizer",
"CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
return false;
}
if (Br && Br->isConditional() &&
!TheLoop->isLoopInvariant(Br->getCondition()) &&
!LI->isLoopHeader(Br->getSuccessor(0)) &&
!LI->isLoopHeader(Br->getSuccessor(1))) {
reportVectorizationFailure("Unsupported conditional branch",
"loop control flow is not understood by vectorizer",
"CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
return false;
}
}
if (!isUniformLoopNest(TheLoop ,
TheLoop )) {
reportVectorizationFailure("Outer loop contains divergent loops",
"loop control flow is not understood by vectorizer",
"CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
return false;
}
if (!setupOuterLoopInductions()) {
reportVectorizationFailure("Unsupported outer loop Phi(s)",
"Unsupported outer loop Phi(s)",
"UnsupportedPhi", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
return false;
}
return Result;
}
void LoopVectorizationLegality::addInductionPhi(
PHINode *Phi, const InductionDescriptor &ID,
SmallPtrSetImpl<Value *> &AllowedExit) {
Inductions[Phi] = ID;
const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
if (!Casts.empty())
InductionCastsToIgnore.insert(*Casts.begin());
Type *PhiTy = Phi->getType();
const DataLayout &DL = Phi->getModule()->getDataLayout();
if (!PhiTy->isFloatingPointTy()) {
if (!WidestIndTy)
WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
else
WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
}
if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
isa<Constant>(ID.getStartValue()) &&
cast<Constant>(ID.getStartValue())->isNullValue()) {
if (!PrimaryInduction || PhiTy == WidestIndTy)
PrimaryInduction = Phi;
}
if (PSE.getPredicate().isAlwaysTrue()) {
AllowedExit.insert(Phi);
AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
}
LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
}
bool LoopVectorizationLegality::setupOuterLoopInductions() {
BasicBlock *Header = TheLoop->getHeader();
auto isSupportedPhi = [&](PHINode &Phi) -> bool {
InductionDescriptor ID;
if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
ID.getKind() == InductionDescriptor::IK_IntInduction) {
addInductionPhi(&Phi, ID, AllowedExit);
return true;
} else {
LLVM_DEBUG(
dbgs()
<< "LV: Found unsupported PHI for outer loop vectorization.\n");
return false;
}
};
if (llvm::all_of(Header->phis(), isSupportedPhi))
return true;
else
return false;
}
static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
const StringRef ScalarName = CI.getCalledFunction()->getName();
bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
if (Scalarize) {
ElementCount WidestFixedVF, WidestScalableVF;
TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
for (ElementCount VF = ElementCount::getFixed(2);
ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
for (ElementCount VF = ElementCount::getScalable(1);
ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
assert((WidestScalableVF.isZero() || !Scalarize) &&
"Caller may decide to scalarize a variant using a scalable VF");
}
return Scalarize;
}
bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *Header = TheLoop->getHeader();
for (BasicBlock *BB : TheLoop->blocks()) {
for (Instruction &I : *BB) {
if (auto *Phi = dyn_cast<PHINode>(&I)) {
Type *PhiTy = Phi->getType();
if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
!PhiTy->isPointerTy()) {
reportVectorizationFailure("Found a non-int non-pointer PHI",
"loop control flow is not understood by vectorizer",
"CFGNotUnderstood", ORE, TheLoop);
return false;
}
if (BB != Header) {
AllowedExit.insert(&I);
continue;
}
if (Phi->getNumIncomingValues() != 2) {
reportVectorizationFailure("Found an invalid PHI",
"loop control flow is not understood by vectorizer",
"CFGNotUnderstood", ORE, TheLoop, Phi);
return false;
}
RecurrenceDescriptor RedDes;
if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
DT, PSE.getSE())) {
Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
AllowedExit.insert(RedDes.getLoopExitInstr());
Reductions[Phi] = RedDes;
continue;
}
InductionDescriptor ID;
if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
addInductionPhi(Phi, ID, AllowedExit);
Requirements->addExactFPMathInst(ID.getExactFPMathInst());
continue;
}
if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
SinkAfter, DT)) {
AllowedExit.insert(Phi);
FirstOrderRecurrences.insert(Phi);
continue;
}
if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
addInductionPhi(Phi, ID, AllowedExit);
continue;
}
reportVectorizationFailure("Found an unidentified PHI",
"value that could not be identified as "
"reduction is used outside the loop",
"NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
return false;
}
auto *CI = dyn_cast<CallInst>(&I);
if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
!isa<DbgInfoIntrinsic>(CI) &&
!(CI->getCalledFunction() && TLI &&
(!VFDatabase::getMappings(*CI).empty() ||
isTLIScalarize(*TLI, *CI)))) {
LibFunc Func;
bool IsMathLibCall =
TLI && CI->getCalledFunction() &&
CI->getType()->isFloatingPointTy() &&
TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
TLI->hasOptimizedCodeGen(Func);
if (IsMathLibCall) {
reportVectorizationFailure(
"Found a non-intrinsic callsite",
"library call cannot be vectorized. "
"Try compiling with -fno-math-errno, -ffast-math, "
"or similar flags",
"CantVectorizeLibcall", ORE, TheLoop, CI);
} else {
reportVectorizationFailure("Found a non-intrinsic callsite",
"call instruction cannot be vectorized",
"CantVectorizeLibcall", ORE, TheLoop, CI);
}
return false;
}
if (CI) {
auto *SE = PSE.getSE();
Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
for (unsigned i = 0, e = CI->arg_size(); i != e; ++i)
if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, i)) {
if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
reportVectorizationFailure("Found unvectorizable intrinsic",
"intrinsic instruction cannot be vectorized",
"CantVectorizeIntrinsic", ORE, TheLoop, CI);
return false;
}
}
}
if ((!VectorType::isValidElementType(I.getType()) &&
!I.getType()->isVoidTy()) ||
isa<ExtractElementInst>(I)) {
reportVectorizationFailure("Found unvectorizable type",
"instruction return type cannot be vectorized",
"CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
return false;
}
if (auto *ST = dyn_cast<StoreInst>(&I)) {
Type *T = ST->getValueOperand()->getType();
if (!VectorType::isValidElementType(T)) {
reportVectorizationFailure("Store instruction cannot be vectorized",
"store instruction cannot be vectorized",
"CantVectorizeStore", ORE, TheLoop, ST);
return false;
}
if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
auto *VecTy = FixedVectorType::get(T, 2);
assert(VecTy && "did not find vectorized version of stored type");
if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
reportVectorizationFailure(
"nontemporal store instruction cannot be vectorized",
"nontemporal store instruction cannot be vectorized",
"CantVectorizeNontemporalStore", ORE, TheLoop, ST);
return false;
}
}
} else if (auto *LD = dyn_cast<LoadInst>(&I)) {
if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
auto *VecTy = FixedVectorType::get(I.getType(), 2);
assert(VecTy && "did not find vectorized version of load type");
if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
reportVectorizationFailure(
"nontemporal load instruction cannot be vectorized",
"nontemporal load instruction cannot be vectorized",
"CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
return false;
}
}
} else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
!I.isFast()) {
LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
Hints->setPotentiallyUnsafe();
}
if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
if (PSE.getPredicate().isAlwaysTrue()) {
AllowedExit.insert(&I);
continue;
}
reportVectorizationFailure("Value cannot be used outside the loop",
"value cannot be used outside the loop",
"ValueUsedOutsideLoop", ORE, TheLoop, &I);
return false;
}
} }
if (!PrimaryInduction) {
if (Inductions.empty()) {
reportVectorizationFailure("Did not find one integer induction var",
"loop induction variable could not be identified",
"NoInductionVariable", ORE, TheLoop);
return false;
} else if (!WidestIndTy) {
reportVectorizationFailure("Did not find one integer induction var",
"integer loop induction variable could not be identified",
"NoIntegerInductionVariable", ORE, TheLoop);
return false;
} else {
LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
}
}
BasicBlock *LoopLatch = TheLoop->getLoopLatch();
if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
Instruction *V =
cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
return SinkAfter.find(V) != SinkAfter.end();
}))
return false;
if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
PrimaryInduction = nullptr;
return true;
}
bool LoopVectorizationLegality::canVectorizeMemory() {
LAI = &(*GetLAA)(*TheLoop);
const OptimizationRemarkAnalysis *LAR = LAI->getReport();
if (LAR) {
ORE->emit([&]() {
return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
"loop not vectorized: ", *LAR);
});
}
if (!LAI->canVectorizeMemory())
return false;
if (!LAI->getStoresToInvariantAddresses().empty()) {
for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
if (isInvariantStoreOfReduction(SI) &&
blockNeedsPredication(SI->getParent())) {
reportVectorizationFailure(
"We don't allow storing to uniform addresses",
"write of conditional recurring variant value to a loop "
"invariant address could not be vectorized",
"CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
return false;
}
}
if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
ScalarEvolution *SE = PSE.getSE();
SmallVector<StoreInst *, 4> UnhandledStores;
for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
if (isInvariantStoreOfReduction(SI)) {
erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
return storeToSameAddress(SE, SI, I) &&
I->getValueOperand()->getType() ==
SI->getValueOperand()->getType();
});
continue;
}
UnhandledStores.push_back(SI);
}
bool IsOK = UnhandledStores.empty();
if (!IsOK) {
reportVectorizationFailure(
"We don't allow storing to uniform addresses",
"write to a loop invariant address could not "
"be vectorized",
"CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
return false;
}
}
}
PSE.addPredicate(LAI->getPSE().getPredicate());
return true;
}
bool LoopVectorizationLegality::canVectorizeFPMath(
bool EnableStrictReductions) {
if (!Requirements->getExactFPInst() || Hints->allowReordering())
return true;
if (!EnableStrictReductions ||
any_of(getInductionVars(), [&](auto &Induction) -> bool {
InductionDescriptor IndDesc = Induction.second;
return IndDesc.getExactFPMathInst();
}))
return false;
return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
const RecurrenceDescriptor &RdxDesc = Reduction.second;
return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
}));
}
bool LoopVectorizationLegality::isInvariantStoreOfReduction(StoreInst *SI) {
return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
const RecurrenceDescriptor &RdxDesc = Reduction.second;
return RdxDesc.IntermediateStore == SI;
});
}
bool LoopVectorizationLegality::isInvariantAddressOfReduction(Value *V) {
return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
const RecurrenceDescriptor &RdxDesc = Reduction.second;
if (!RdxDesc.IntermediateStore)
return false;
ScalarEvolution *SE = PSE.getSE();
Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
return V == InvariantAddress ||
SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
});
}
bool LoopVectorizationLegality::isInductionPhi(const Value *V) const {
Value *In0 = const_cast<Value *>(V);
PHINode *PN = dyn_cast_or_null<PHINode>(In0);
if (!PN)
return false;
return Inductions.count(PN);
}
const InductionDescriptor *
LoopVectorizationLegality::getIntOrFpInductionDescriptor(PHINode *Phi) const {
if (!isInductionPhi(Phi))
return nullptr;
auto &ID = getInductionVars().find(Phi)->second;
if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
ID.getKind() == InductionDescriptor::IK_FpInduction)
return &ID;
return nullptr;
}
const InductionDescriptor *
LoopVectorizationLegality::getPointerInductionDescriptor(PHINode *Phi) const {
if (!isInductionPhi(Phi))
return nullptr;
auto &ID = getInductionVars().find(Phi)->second;
if (ID.getKind() == InductionDescriptor::IK_PtrInduction)
return &ID;
return nullptr;
}
bool LoopVectorizationLegality::isCastedInductionVariable(
const Value *V) const {
auto *Inst = dyn_cast<Instruction>(V);
return (Inst && InductionCastsToIgnore.count(Inst));
}
bool LoopVectorizationLegality::isInductionVariable(const Value *V) const {
return isInductionPhi(V) || isCastedInductionVariable(V);
}
bool LoopVectorizationLegality::isFirstOrderRecurrence(
const PHINode *Phi) const {
return FirstOrderRecurrences.count(Phi);
}
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
}
bool LoopVectorizationLegality::blockCanBePredicated(
BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
SmallPtrSetImpl<const Instruction *> &MaskedOp,
SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const {
for (Instruction &I : *BB) {
if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
ConditionalAssumes.insert(&I);
continue;
}
if (isa<NoAliasScopeDeclInst>(&I))
continue;
if (I.mayReadFromMemory()) {
auto *LI = dyn_cast<LoadInst>(&I);
if (!LI)
return false;
if (!SafePtrs.count(LI->getPointerOperand())) {
MaskedOp.insert(LI);
continue;
}
}
if (I.mayWriteToMemory()) {
auto *SI = dyn_cast<StoreInst>(&I);
if (!SI)
return false;
MaskedOp.insert(SI);
continue;
}
if (I.mayThrow())
return false;
}
return true;
}
bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!EnableIfConversion) {
reportVectorizationFailure("If-conversion is disabled",
"if-conversion is disabled",
"IfConversionDisabled",
ORE, TheLoop);
return false;
}
assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
SmallPtrSet<Value *, 8> SafePointers;
for (BasicBlock *BB : TheLoop->blocks()) {
if (!blockNeedsPredication(BB)) {
for (Instruction &I : *BB)
if (auto *Ptr = getLoadStorePointerOperand(&I))
SafePointers.insert(Ptr);
continue;
}
ScalarEvolution &SE = *PSE.getSE();
for (Instruction &I : *BB) {
LoadInst *LI = dyn_cast<LoadInst>(&I);
if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
SafePointers.insert(LI->getPointerOperand());
}
}
for (BasicBlock *BB : TheLoop->blocks()) {
if (!isa<BranchInst>(BB->getTerminator())) {
reportVectorizationFailure("Loop contains a switch statement",
"loop contains a switch statement",
"LoopContainsSwitch", ORE, TheLoop,
BB->getTerminator());
return false;
}
if (blockNeedsPredication(BB)) {
if (!blockCanBePredicated(BB, SafePointers, MaskedOp,
ConditionalAssumes)) {
reportVectorizationFailure(
"Control flow cannot be substituted for a select",
"control flow cannot be substituted for a select",
"NoCFGForSelect", ORE, TheLoop,
BB->getTerminator());
return false;
}
}
}
return true;
}
bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
bool UseVPlanNativePath) {
assert((UseVPlanNativePath || Lp->isInnermost()) &&
"VPlan-native path is not enabled.");
bool Result = true;
bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
if (!Lp->getLoopPreheader()) {
reportVectorizationFailure("Loop doesn't have a legal pre-header",
"loop control flow is not understood by vectorizer",
"CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
return false;
}
if (Lp->getNumBackEdges() != 1) {
reportVectorizationFailure("The loop must have a single backedge",
"loop control flow is not understood by vectorizer",
"CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
return false;
}
return Result;
}
bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
Loop *Lp, bool UseVPlanNativePath) {
bool Result = true;
bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
if (DoExtraAnalysis)
Result = false;
else
return false;
}
for (Loop *SubLp : *Lp)
if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
if (DoExtraAnalysis)
Result = false;
else
return false;
}
return Result;
}
bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
bool Result = true;
bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
if (DoExtraAnalysis)
Result = false;
else
return false;
}
LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
<< '\n');
if (!TheLoop->isInnermost()) {
assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
if (!canVectorizeOuterLoop()) {
reportVectorizationFailure("Unsupported outer loop",
"unsupported outer loop",
"UnsupportedOuterLoop",
ORE, TheLoop);
return false;
}
LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
return Result;
}
assert(TheLoop->isInnermost() && "Inner loop expected.");
unsigned NumBlocks = TheLoop->getNumBlocks();
if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
if (DoExtraAnalysis)
Result = false;
else
return false;
}
if (!canVectorizeInstrs()) {
LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
if (DoExtraAnalysis)
Result = false;
else
return false;
}
if (!canVectorizeMemory()) {
LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
if (DoExtraAnalysis)
Result = false;
else
return false;
}
LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
<< (LAI->getRuntimePointerChecking()->Need
? " (with a runtime bound check)"
: "")
<< "!\n");
unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
reportVectorizationFailure("Too many SCEV checks needed",
"Too many SCEV assumptions need to be made and checked at runtime",
"TooManySCEVRunTimeChecks", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
return false;
}
return Result;
}
bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
SmallPtrSet<const Value *, 8> ReductionLiveOuts;
for (auto &Reduction : getReductionVars())
ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
for (auto *AE : AllowedExit) {
if (ReductionLiveOuts.count(AE))
continue;
for (User *U : AE->users()) {
Instruction *UI = cast<Instruction>(U);
if (TheLoop->contains(UI))
continue;
LLVM_DEBUG(
dbgs()
<< "LV: Cannot fold tail by masking, loop has an outside user for "
<< *UI << "\n");
return false;
}
}
SmallPtrSet<Value *, 8> SafePointers;
SmallPtrSet<const Instruction *, 8> TmpMaskedOp;
SmallPtrSet<Instruction *, 8> TmpConditionalAssumes;
for (BasicBlock *BB : TheLoop->blocks()) {
if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp,
TmpConditionalAssumes)) {
LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
return false;
}
}
LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
ConditionalAssumes.insert(TmpConditionalAssumes.begin(),
TmpConditionalAssumes.end());
return true;
}
}