#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <utility>
#include <vector>
using namespace llvm;
using namespace llvm::PatternMatch;
#define DEBUG_TYPE "loop-accesses"
static cl::opt<unsigned, true>
VectorizationFactor("force-vector-width", cl::Hidden,
cl::desc("Sets the SIMD width. Zero is autoselect."),
cl::location(VectorizerParams::VectorizationFactor));
unsigned VectorizerParams::VectorizationFactor;
static cl::opt<unsigned, true>
VectorizationInterleave("force-vector-interleave", cl::Hidden,
cl::desc("Sets the vectorization interleave count. "
"Zero is autoselect."),
cl::location(
VectorizerParams::VectorizationInterleave));
unsigned VectorizerParams::VectorizationInterleave;
static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
"runtime-memory-check-threshold", cl::Hidden,
cl::desc("When performing memory disambiguation checks at runtime do not "
"generate more than this number of comparisons (default = 8)."),
cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
static cl::opt<unsigned> MemoryCheckMergeThreshold(
"memory-check-merge-threshold", cl::Hidden,
cl::desc("Maximum number of comparisons done when trying to merge "
"runtime memory checks. (default = 100)"),
cl::init(100));
const unsigned VectorizerParams::MaxVectorWidth = 64;
static cl::opt<unsigned>
MaxDependences("max-dependences", cl::Hidden,
cl::desc("Maximum number of dependences collected by "
"loop-access analysis (default = 100)"),
cl::init(100));
static cl::opt<bool> EnableMemAccessVersioning(
"enable-mem-access-versioning", cl::init(true), cl::Hidden,
cl::desc("Enable symbolic stride memory access versioning"));
static cl::opt<bool> EnableForwardingConflictDetection(
"store-to-load-forwarding-conflict-detection", cl::Hidden,
cl::desc("Enable conflict detection in loop-access analysis"),
cl::init(true));
static cl::opt<unsigned> MaxForkedSCEVDepth(
"max-forked-scev-depth", cl::Hidden,
cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
cl::init(5));
bool VectorizerParams::isInterleaveForced() {
return ::VectorizationInterleave.getNumOccurrences() > 0;
}
Value *llvm::stripIntegerCast(Value *V) {
if (auto *CI = dyn_cast<CastInst>(V))
if (CI->getOperand(0)->getType()->isIntegerTy())
return CI->getOperand(0);
return V;
}
const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
const ValueToValueMap &PtrToStride,
Value *Ptr) {
const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
ValueToValueMap::const_iterator SI = PtrToStride.find(Ptr);
if (SI == PtrToStride.end())
return OrigSCEV;
Value *StrideVal = stripIntegerCast(SI->second);
ScalarEvolution *SE = PSE.getSE();
const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
const auto *CT =
static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
PSE.addPredicate(*SE->getEqualPredicate(U, CT));
auto *Expr = PSE.getSCEV(Ptr);
LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
<< " by: " << *Expr << "\n");
return Expr;
}
RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
unsigned Index, RuntimePointerChecking &RtCheck)
: High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
AddressSpace(RtCheck.Pointers[Index]
.PointerValue->getType()
->getPointerAddressSpace()),
NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) {
Members.push_back(Index);
}
void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
Type *AccessTy, bool WritePtr,
unsigned DepSetId, unsigned ASId,
PredicatedScalarEvolution &PSE,
bool NeedsFreeze) {
ScalarEvolution *SE = PSE.getSE();
const SCEV *ScStart;
const SCEV *ScEnd;
if (SE->isLoopInvariant(PtrExpr, Lp)) {
ScStart = ScEnd = PtrExpr;
} else {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr);
assert(AR && "Invalid addrec expression");
const SCEV *Ex = PSE.getBackedgeTakenCount();
ScStart = AR->getStart();
ScEnd = AR->evaluateAtIteration(Ex, *SE);
const SCEV *Step = AR->getStepRecurrence(*SE);
if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
if (CStep->getValue()->isNegative())
std::swap(ScStart, ScEnd);
} else {
ScStart = SE->getUMinExpr(ScStart, ScEnd);
ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
}
}
auto &DL = Lp->getHeader()->getModule()->getDataLayout();
Type *IdxTy = DL.getIndexType(Ptr->getType());
const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,
NeedsFreeze);
}
void RuntimePointerChecking::tryToCreateDiffCheck(
const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) {
if (!CanUseDiffCheck)
return;
if (CGI.Members.size() != 1 || CGJ.Members.size() != 1) {
CanUseDiffCheck = false;
return;
}
PointerInfo *Src = &Pointers[CGI.Members[0]];
PointerInfo *Sink = &Pointers[CGJ.Members[0]];
if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() ||
!DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty()) {
CanUseDiffCheck = false;
return;
}
ArrayRef<unsigned> AccSrc =
DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr);
ArrayRef<unsigned> AccSink =
DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr);
if (AccSrc.size() != 1 || AccSink.size() != 1) {
CanUseDiffCheck = false;
return;
}
if (AccSink[0] < AccSrc[0])
std::swap(Src, Sink);
auto *SrcAR = dyn_cast<SCEVAddRecExpr>(Src->Expr);
auto *SinkAR = dyn_cast<SCEVAddRecExpr>(Sink->Expr);
if (!SrcAR || !SinkAR || SrcAR->getLoop() != DC.getInnermostLoop() ||
SinkAR->getLoop() != DC.getInnermostLoop()) {
CanUseDiffCheck = false;
return;
}
const DataLayout &DL =
SinkAR->getLoop()->getHeader()->getModule()->getDataLayout();
SmallVector<Instruction *, 4> SrcInsts =
DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr);
SmallVector<Instruction *, 4> SinkInsts =
DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr);
Type *SrcTy = getLoadStoreType(SrcInsts[0]);
Type *DstTy = getLoadStoreType(SinkInsts[0]);
if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy)) {
CanUseDiffCheck = false;
return;
}
unsigned AllocSize =
std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));
IntegerType *IntTy =
IntegerType::get(Src->PointerValue->getContext(),
DL.getPointerSizeInBits(CGI.AddressSpace));
auto *Step = dyn_cast<SCEVConstant>(SinkAR->getStepRecurrence(*SE));
if (!Step || Step != SrcAR->getStepRecurrence(*SE) ||
Step->getAPInt().abs() != AllocSize) {
CanUseDiffCheck = false;
return;
}
if (Step->getValue()->isNegative())
std::swap(SinkAR, SrcAR);
const SCEV *SinkStartInt = SE->getPtrToIntExpr(SinkAR->getStart(), IntTy);
const SCEV *SrcStartInt = SE->getPtrToIntExpr(SrcAR->getStart(), IntTy);
if (isa<SCEVCouldNotCompute>(SinkStartInt) ||
isa<SCEVCouldNotCompute>(SrcStartInt)) {
CanUseDiffCheck = false;
return;
}
DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
Src->NeedsFreeze || Sink->NeedsFreeze);
}
SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() {
SmallVector<RuntimePointerCheck, 4> Checks;
for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I];
const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J];
if (needsChecking(CGI, CGJ)) {
tryToCreateDiffCheck(CGI, CGJ);
Checks.push_back(std::make_pair(&CGI, &CGJ));
}
}
}
return Checks;
}
void RuntimePointerChecking::generateChecks(
MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
assert(Checks.empty() && "Checks is not empty");
groupChecks(DepCands, UseDependencies);
Checks = generateChecks();
}
bool RuntimePointerChecking::needsChecking(
const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {
for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
if (needsChecking(M.Members[I], N.Members[J]))
return true;
return false;
}
static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
ScalarEvolution *SE) {
const SCEV *Diff = SE->getMinusSCEV(J, I);
const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
if (!C)
return nullptr;
if (C->getValue()->isNegative())
return J;
return I;
}
bool RuntimeCheckingPtrGroup::addPointer(unsigned Index,
RuntimePointerChecking &RtCheck) {
return addPointer(
Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);
}
bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
const SCEV *End, unsigned AS,
bool NeedsFreeze,
ScalarEvolution &SE) {
assert(AddressSpace == AS &&
"all pointers in a checking group must be in the same address space");
const SCEV *Min0 = getMinFromExprs(Start, Low, &SE);
if (!Min0)
return false;
const SCEV *Min1 = getMinFromExprs(End, High, &SE);
if (!Min1)
return false;
if (Min0 == Start)
Low = Start;
if (Min1 != End)
High = End;
Members.push_back(Index);
this->NeedsFreeze |= NeedsFreeze;
return true;
}
void RuntimePointerChecking::groupChecks(
MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
CheckingGroups.clear();
if (!UseDependencies) {
for (unsigned I = 0; I < Pointers.size(); ++I)
CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this));
return;
}
unsigned TotalComparisons = 0;
DenseMap<Value *, SmallVector<unsigned>> PositionMap;
for (unsigned Index = 0; Index < Pointers.size(); ++Index) {
auto Iter = PositionMap.insert({Pointers[Index].PointerValue, {}});
Iter.first->second.push_back(Index);
}
SmallSet<unsigned, 2> Seen;
for (unsigned I = 0; I < Pointers.size(); ++I) {
if (Seen.count(I))
continue;
MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
Pointers[I].IsWritePtr);
SmallVector<RuntimeCheckingPtrGroup, 2> Groups;
auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
MI != ME; ++MI) {
auto PointerI = PositionMap.find(MI->getPointer());
assert(PointerI != PositionMap.end() &&
"pointer in equivalence class not found in PositionMap");
for (unsigned Pointer : PointerI->second) {
bool Merged = false;
Seen.insert(Pointer);
for (RuntimeCheckingPtrGroup &Group : Groups) {
if (TotalComparisons > MemoryCheckMergeThreshold)
break;
TotalComparisons++;
if (Group.addPointer(Pointer, *this)) {
Merged = true;
break;
}
}
if (!Merged)
Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
}
}
llvm::copy(Groups, std::back_inserter(CheckingGroups));
}
}
bool RuntimePointerChecking::arePointersInSamePartition(
const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
unsigned PtrIdx2) {
return (PtrToPartition[PtrIdx1] != -1 &&
PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
}
bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
const PointerInfo &PointerI = Pointers[I];
const PointerInfo &PointerJ = Pointers[J];
if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
return false;
if (PointerI.DependencySetId == PointerJ.DependencySetId)
return false;
if (PointerI.AliasSetId != PointerJ.AliasSetId)
return false;
return true;
}
void RuntimePointerChecking::printChecks(
raw_ostream &OS, const SmallVectorImpl<RuntimePointerCheck> &Checks,
unsigned Depth) const {
unsigned N = 0;
for (const auto &Check : Checks) {
const auto &First = Check.first->Members, &Second = Check.second->Members;
OS.indent(Depth) << "Check " << N++ << ":\n";
OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
for (unsigned K = 0; K < First.size(); ++K)
OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
for (unsigned K = 0; K < Second.size(); ++K)
OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
}
}
void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
OS.indent(Depth) << "Run-time memory checks:\n";
printChecks(OS, Checks, Depth);
OS.indent(Depth) << "Grouped accesses:\n";
for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
const auto &CG = CheckingGroups[I];
OS.indent(Depth + 2) << "Group " << &CG << ":\n";
OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
<< ")\n";
for (unsigned J = 0; J < CG.Members.size(); ++J) {
OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
<< "\n";
}
}
}
namespace {
class AccessAnalysis {
public:
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
MemoryDepChecker::DepCandidates &DA,
PredicatedScalarEvolution &PSE)
: TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {}
void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
if (IsReadOnly)
ReadOnlyPtr.insert(Ptr);
}
void addStore(MemoryLocation &Loc, Type *AccessTy) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
}
bool createCheckForAccess(RuntimePointerChecking &RtCheck,
MemAccessInfo Access, Type *AccessTy,
const ValueToValueMap &Strides,
DenseMap<Value *, unsigned> &DepSetId,
Loop *TheLoop, unsigned &RunningDepId,
unsigned ASId, bool ShouldCheckStride, bool Assume);
bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
Loop *TheLoop, const ValueToValueMap &Strides,
Value *&UncomputablePtr, bool ShouldCheckWrap = false);
void buildDependenceSets() {
processMemAccesses();
}
bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
void resetDepChecks(MemoryDepChecker &DepChecker) {
CheckDeps.clear();
DepChecker.clearDependences();
}
MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
private:
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
void processMemAccesses();
PtrAccessMap Accesses;
const Loop *TheLoop;
MemAccessInfoList CheckDeps;
SmallPtrSet<Value*, 16> ReadOnlyPtr;
AliasSetTracker AST;
LoopInfo *LI;
MemoryDepChecker::DepCandidates &DepCands;
bool IsRTCheckAnalysisNeeded = false;
PredicatedScalarEvolution &PSE;
};
}
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr,
const SCEV *PtrScev, Loop *L, bool Assume) {
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (!AR && Assume)
AR = PSE.getAsAddRec(Ptr);
if (!AR)
return false;
return AR->isAffine();
}
static bool isNoWrap(PredicatedScalarEvolution &PSE,
const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy,
Loop *L) {
const SCEV *PtrScev = PSE.getSCEV(Ptr);
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;
int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
return true;
return false;
}
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
function_ref<void(Value *)> AddPointer) {
SmallPtrSet<Value *, 8> Visited;
SmallVector<Value *> WorkList;
WorkList.push_back(StartPtr);
while (!WorkList.empty()) {
Value *Ptr = WorkList.pop_back_val();
if (!Visited.insert(Ptr).second)
continue;
auto *PN = dyn_cast<PHINode>(Ptr);
if (PN && InnermostLoop.contains(PN->getParent()) &&
PN->getParent() != InnermostLoop.getHeader()) {
for (const Use &Inc : PN->incoming_values())
WorkList.push_back(Inc);
} else
AddPointer(Ptr);
}
}
static void
findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr,
SmallVectorImpl<std::pair<const SCEV *, bool>> &ScevList,
unsigned Depth) {
const SCEV *Scev = SE->getSCEV(Ptr);
if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) ||
!isa<Instruction>(Ptr) || Depth == 0) {
ScevList.push_back(
std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
return;
}
Depth--;
auto UndefPoisonCheck = [](std::pair<const SCEV *, bool> S) -> bool {
return S.second;
};
Instruction *I = cast<Instruction>(Ptr);
unsigned Opcode = I->getOpcode();
switch (Opcode) {
case Instruction::GetElementPtr: {
GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
Type *SourceTy = GEP->getSourceElementType();
if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
ScevList.push_back(
std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP)));
break;
}
SmallVector<std::pair<const SCEV *, bool>, 2> BaseScevs;
SmallVector<std::pair<const SCEV *, bool>, 2> OffsetScevs;
findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth);
findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth);
bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||
any_of(OffsetScevs, UndefPoisonCheck);
if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)
BaseScevs.push_back(BaseScevs[0]);
else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
OffsetScevs.push_back(OffsetScevs[0]);
else {
ScevList.push_back(std::make_pair(Scev, NeedsFreeze));
break;
}
Type *IntPtrTy = SE->getEffectiveSCEVType(
SE->getSCEV(GEP->getPointerOperand())->getType());
const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy);
const SCEV *Scaled1 = SE->getMulExpr(
Size, SE->getTruncateOrSignExtend(OffsetScevs[0].first, IntPtrTy));
const SCEV *Scaled2 = SE->getMulExpr(
Size, SE->getTruncateOrSignExtend(OffsetScevs[1].first, IntPtrTy));
ScevList.push_back(std::make_pair(
SE->getAddExpr(BaseScevs[0].first, Scaled1), NeedsFreeze));
ScevList.push_back(std::make_pair(
SE->getAddExpr(BaseScevs[1].first, Scaled2), NeedsFreeze));
break;
}
case Instruction::Select: {
SmallVector<std::pair<const SCEV *, bool>, 2> ChildScevs;
findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth);
if (ChildScevs.size() == 2) {
ScevList.push_back(ChildScevs[0]);
ScevList.push_back(ChildScevs[1]);
} else
ScevList.push_back(
std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
break;
}
default:
LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
ScevList.push_back(
std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
break;
}
}
static SmallVector<std::pair<const SCEV *, bool>>
findForkedPointer(PredicatedScalarEvolution &PSE,
const ValueToValueMap &StridesMap, Value *Ptr,
const Loop *L) {
ScalarEvolution *SE = PSE.getSE();
assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
SmallVector<std::pair<const SCEV *, bool>> Scevs;
findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth);
if (Scevs.size() == 2)
return Scevs;
return {
std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)};
}
bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
MemAccessInfo Access, Type *AccessTy,
const ValueToValueMap &StridesMap,
DenseMap<Value *, unsigned> &DepSetId,
Loop *TheLoop, unsigned &RunningDepId,
unsigned ASId, bool ShouldCheckWrap,
bool Assume) {
Value *Ptr = Access.getPointer();
SmallVector<std::pair<const SCEV *, bool>> TranslatedPtrs =
findForkedPointer(PSE, StridesMap, Ptr, TheLoop);
for (auto &P : TranslatedPtrs) {
const SCEV *PtrExpr = P.first;
if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume))
return false;
if (ShouldCheckWrap) {
if (TranslatedPtrs.size() > 1)
return false;
if (!isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
auto *Expr = PSE.getSCEV(Ptr);
if (!Assume || !isa<SCEVAddRecExpr>(Expr))
return false;
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
}
}
if (TranslatedPtrs.size() == 1)
TranslatedPtrs[0] = std::make_pair(
replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false);
}
for (auto &P : TranslatedPtrs) {
const SCEV *PtrExpr = P.first;
unsigned DepId;
if (isDependencyCheckNeeded()) {
Value *Leader = DepCands.getLeaderValue(Access).getPointer();
unsigned &LeaderId = DepSetId[Leader];
if (!LeaderId)
LeaderId = RunningDepId++;
DepId = LeaderId;
} else
DepId = RunningDepId++;
bool IsWrite = Access.getInt();
RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
P.second);
LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
}
return true;
}
bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
ScalarEvolution *SE, Loop *TheLoop,
const ValueToValueMap &StridesMap,
Value *&UncomputablePtr, bool ShouldCheckWrap) {
bool CanDoRT = true;
bool MayNeedRTCheck = false;
if (!IsRTCheckAnalysisNeeded) return true;
bool IsDepCheckNeeded = isDependencyCheckNeeded();
unsigned ASId = 0;
for (auto &AS : AST) {
int NumReadPtrChecks = 0;
int NumWritePtrChecks = 0;
bool CanDoAliasSetRT = true;
++ASId;
unsigned RunningDepId = 1;
DenseMap<Value *, unsigned> DepSetId;
SmallVector<std::pair<MemAccessInfo, Type *>, 4> Retries;
SmallVector<MemAccessInfo, 4> AccessInfos;
for (const auto &A : AS) {
Value *Ptr = A.getValue();
bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
if (IsWrite)
++NumWritePtrChecks;
else
++NumReadPtrChecks;
AccessInfos.emplace_back(Ptr, IsWrite);
}
if (NumWritePtrChecks == 0 ||
(NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
assert((AS.size() <= 1 ||
all_of(AS,
[this](auto AC) {
MemAccessInfo AccessWrite(AC.getValue(), true);
return DepCands.findValue(AccessWrite) == DepCands.end();
})) &&
"Can only skip updating CanDoRT below, if all entries in AS "
"are reads or there is at most 1 entry");
continue;
}
for (auto &Access : AccessInfos) {
for (const auto &AccessTy : Accesses[Access]) {
if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
DepSetId, TheLoop, RunningDepId, ASId,
ShouldCheckWrap, false)) {
LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
<< *Access.getPointer() << '\n');
Retries.push_back({Access, AccessTy});
CanDoAliasSetRT = false;
}
}
}
bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
CanDoAliasSetRT = true;
for (auto Retry : Retries) {
MemAccessInfo Access = Retry.first;
Type *AccessTy = Retry.second;
if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
DepSetId, TheLoop, RunningDepId, ASId,
ShouldCheckWrap, true)) {
CanDoAliasSetRT = false;
UncomputablePtr = Access.getPointer();
break;
}
}
}
CanDoRT &= CanDoAliasSetRT;
MayNeedRTCheck |= NeedsAliasSetRTCheck;
++ASId;
}
unsigned NumPointers = RtCheck.Pointers.size();
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i + 1; j < NumPointers; ++j) {
if (RtCheck.Pointers[i].DependencySetId ==
RtCheck.Pointers[j].DependencySetId)
continue;
if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
continue;
Value *PtrI = RtCheck.Pointers[i].PointerValue;
Value *PtrJ = RtCheck.Pointers[j].PointerValue;
unsigned ASi = PtrI->getType()->getPointerAddressSpace();
unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
if (ASi != ASj) {
LLVM_DEBUG(
dbgs() << "LAA: Runtime check would require comparison between"
" different address spaces\n");
return false;
}
}
}
if (MayNeedRTCheck && CanDoRT)
RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
<< " pointer comparisons.\n");
RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck;
bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
if (!CanDoRTIfNeeded)
RtCheck.reset();
return CanDoRTIfNeeded;
}
void AccessAnalysis::processMemAccesses() {
LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
LLVM_DEBUG({
for (auto A : Accesses)
dbgs() << "\t" << *A.first.getPointer() << " ("
<< (A.first.getInt()
? "write"
: (ReadOnlyPtr.count(A.first.getPointer()) ? "read-only"
: "read"))
<< ")\n";
});
for (const auto &AS : AST) {
bool SetHasWrite = false;
typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
UnderlyingObjToAccessMap ObjToLastAccess;
PtrAccessMap DeferredAccesses;
for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
bool UseDeferred = SetIteration > 0;
PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
for (const auto &AV : AS) {
Value *Ptr = AV.getValue();
for (const auto &AC : S) {
if (AC.first.getPointer() != Ptr)
continue;
bool IsWrite = AC.first.getInt();
bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
if (UseDeferred && !IsReadOnlyPtr)
continue;
assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
S.count(MemAccessInfo(Ptr, false))) &&
"Alias-set pointer not in the access set?");
MemAccessInfo Access(Ptr, IsWrite);
DepCands.insert(Access);
if (!UseDeferred && IsReadOnlyPtr) {
DeferredAccesses.insert({Access, {}});
continue;
}
if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
CheckDeps.push_back(Access);
IsRTCheckAnalysisNeeded = true;
}
if (IsWrite)
SetHasWrite = true;
typedef SmallVector<const Value *, 16> ValueVector;
ValueVector TempObjects;
getUnderlyingObjects(Ptr, TempObjects, LI);
LLVM_DEBUG(dbgs()
<< "Underlying objects for pointer " << *Ptr << "\n");
for (const Value *UnderlyingObj : TempObjects) {
if (isa<ConstantPointerNull>(UnderlyingObj) &&
!NullPointerIsDefined(
TheLoop->getHeader()->getParent(),
UnderlyingObj->getType()->getPointerAddressSpace()))
continue;
UnderlyingObjToAccessMap::iterator Prev =
ObjToLastAccess.find(UnderlyingObj);
if (Prev != ObjToLastAccess.end())
DepCands.unionSets(Access, Prev->second);
ObjToLastAccess[UnderlyingObj] = Access;
LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
}
}
}
}
}
}
static bool isInBoundsGep(Value *Ptr) {
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
return GEP->isInBounds();
return false;
}
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
PredicatedScalarEvolution &PSE, const Loop *L) {
if (AR->getNoWrapFlags(SCEV::NoWrapMask))
return true;
auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
if (!GEP || !GEP->isInBounds())
return false;
Value *NonConstIndex = nullptr;
for (Value *Index : GEP->indices())
if (!isa<ConstantInt>(Index)) {
if (NonConstIndex)
return false;
NonConstIndex = Index;
}
if (!NonConstIndex)
return false;
if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
if (OBO->hasNoSignedWrap() &&
isa<ConstantInt>(OBO->getOperand(1))) {
auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
}
return false;
}
int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
Value *Ptr, const Loop *Lp,
const ValueToValueMap &StridesMap, bool Assume,
bool ShouldCheckWrap) {
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
if (isa<ScalableVectorType>(AccessTy)) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
<< "\n");
return 0;
}
const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (Assume && !AR)
AR = PSE.getAsAddRec(Ptr);
if (!AR) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
<< " SCEV: " << *PtrScev << "\n");
return 0;
}
if (Lp != AR->getLoop()) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
<< *Ptr << " SCEV: " << *AR << "\n");
return 0;
}
unsigned AddrSpace = Ty->getPointerAddressSpace();
bool IsInBoundsGEP = isInBoundsGep(Ptr);
bool IsNoWrapAddRec = !ShouldCheckWrap ||
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
isNoWrapAddRec(Ptr, AR, PSE, Lp);
if (!IsNoWrapAddRec && !IsInBoundsGEP &&
NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace)) {
if (Assume) {
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
IsNoWrapAddRec = true;
LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
<< "LAA: Pointer: " << *Ptr << "\n"
<< "LAA: SCEV: " << *AR << "\n"
<< "LAA: Added an overflow assumption\n");
} else {
LLVM_DEBUG(
dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
<< *Ptr << " SCEV: " << *AR << "\n");
return 0;
}
}
const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
if (!C) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
<< " SCEV: " << *AR << "\n");
return 0;
}
auto &DL = Lp->getHeader()->getModule()->getDataLayout();
TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
int64_t Size = AllocSize.getFixedSize();
const APInt &APStepVal = C->getAPInt();
if (APStepVal.getBitWidth() > 64)
return 0;
int64_t StepVal = APStepVal.getSExtValue();
int64_t Stride = StepVal / Size;
int64_t Rem = StepVal % Size;
if (Rem)
return 0;
if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
(IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
AddrSpace))) {
if (Assume) {
LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
<< "inbounds or in address space 0 may wrap:\n"
<< "LAA: Pointer: " << *Ptr << "\n"
<< "LAA: SCEV: " << *AR << "\n"
<< "LAA: Added an overflow assumption\n");
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
} else
return 0;
}
return Stride;
}
Optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
Value *PtrB, const DataLayout &DL,
ScalarEvolution &SE, bool StrictCheck,
bool CheckType) {
assert(PtrA && PtrB && "Expected non-nullptr pointers.");
assert(cast<PointerType>(PtrA->getType())
->isOpaqueOrPointeeTypeMatches(ElemTyA) && "Wrong PtrA type");
assert(cast<PointerType>(PtrB->getType())
->isOpaqueOrPointeeTypeMatches(ElemTyB) && "Wrong PtrB type");
if (PtrA == PtrB)
return 0;
if (CheckType && ElemTyA != ElemTyB)
return None;
unsigned ASA = PtrA->getType()->getPointerAddressSpace();
unsigned ASB = PtrB->getType()->getPointerAddressSpace();
if (ASA != ASB)
return None;
unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
int Val;
if (PtrA1 == PtrB1) {
ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
if (ASA != ASB)
return None;
IdxWidth = DL.getIndexSizeInBits(ASA);
OffsetA = OffsetA.sextOrTrunc(IdxWidth);
OffsetB = OffsetB.sextOrTrunc(IdxWidth);
OffsetB -= OffsetA;
Val = OffsetB.getSExtValue();
} else {
const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
const auto *Diff =
dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA));
if (!Diff)
return None;
Val = Diff->getAPInt().getSExtValue();
}
int Size = DL.getTypeStoreSize(ElemTyA);
int Dist = Val / Size;
if (!StrictCheck || Dist * Size == Val)
return Dist;
return None;
}
bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
const DataLayout &DL, ScalarEvolution &SE,
SmallVectorImpl<unsigned> &SortedIndices) {
assert(llvm::all_of(
VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
"Expected list of pointer operands.");
Value *Ptr0 = VL[0];
using DistOrdPair = std::pair<int64_t, int>;
auto Compare = llvm::less_first();
std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
Offsets.emplace(0, 0);
int Cnt = 1;
bool IsConsecutive = true;
for (auto *Ptr : VL.drop_front()) {
Optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
true);
if (!Diff)
return false;
int64_t Offset = *Diff;
auto Res = Offsets.emplace(Offset, Cnt);
if (!Res.second)
return false;
IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
++Cnt;
}
SortedIndices.clear();
if (!IsConsecutive) {
SortedIndices.resize(VL.size());
Cnt = 0;
for (const std::pair<int64_t, int> &Pair : Offsets) {
SortedIndices[Cnt] = Pair.second;
++Cnt;
}
}
return true;
}
bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
ScalarEvolution &SE, bool CheckType) {
Value *PtrA = getLoadStorePointerOperand(A);
Value *PtrB = getLoadStorePointerOperand(B);
if (!PtrA || !PtrB)
return false;
Type *ElemTyA = getLoadStoreType(A);
Type *ElemTyB = getLoadStoreType(B);
Optional<int> Diff = getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
true, CheckType);
return Diff && *Diff == 1;
}
void MemoryDepChecker::addAccess(StoreInst *SI) {
visitPointers(SI->getPointerOperand(), *InnermostLoop,
[this, SI](Value *Ptr) {
Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
InstMap.push_back(SI);
++AccessIdx;
});
}
void MemoryDepChecker::addAccess(LoadInst *LI) {
visitPointers(LI->getPointerOperand(), *InnermostLoop,
[this, LI](Value *Ptr) {
Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
InstMap.push_back(LI);
++AccessIdx;
});
}
MemoryDepChecker::VectorizationSafetyStatus
MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
switch (Type) {
case NoDep:
case Forward:
case BackwardVectorizable:
return VectorizationSafetyStatus::Safe;
case Unknown:
return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
case ForwardButPreventsForwarding:
case Backward:
case BackwardVectorizableButPreventsForwarding:
return VectorizationSafetyStatus::Unsafe;
}
llvm_unreachable("unexpected DepType!");
}
bool MemoryDepChecker::Dependence::isBackward() const {
switch (Type) {
case NoDep:
case Forward:
case ForwardButPreventsForwarding:
case Unknown:
return false;
case BackwardVectorizable:
case Backward:
case BackwardVectorizableButPreventsForwarding:
return true;
}
llvm_unreachable("unexpected DepType!");
}
bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
return isBackward() || Type == Unknown;
}
bool MemoryDepChecker::Dependence::isForward() const {
switch (Type) {
case Forward:
case ForwardButPreventsForwarding:
return true;
case NoDep:
case Unknown:
case BackwardVectorizable:
case Backward:
case BackwardVectorizableButPreventsForwarding:
return false;
}
llvm_unreachable("unexpected DepType!");
}
bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
uint64_t TypeByteSize) {
const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
uint64_t MaxVFWithoutSLForwardIssues = std::min(
VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
VF *= 2) {
if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
MaxVFWithoutSLForwardIssues = (VF >> 1);
break;
}
}
if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
LLVM_DEBUG(
dbgs() << "LAA: Distance " << Distance
<< " that could cause a store-load forwarding conflict\n");
return true;
}
if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
MaxVFWithoutSLForwardIssues !=
VectorizerParams::MaxVectorWidth * TypeByteSize)
MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
return false;
}
void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
if (Status < S)
Status = S;
}
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
const SCEV &BackedgeTakenCount,
const SCEV &Dist, uint64_t Stride,
uint64_t TypeByteSize) {
const uint64_t ByteStride = Stride * TypeByteSize;
const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
const SCEV *CastedDist = &Dist;
const SCEV *CastedProduct = Product;
uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
if (DistTypeSizeBits > ProductTypeSizeBits)
CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
else
CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
if (SE.isKnownPositive(Minus))
return true;
const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
Minus = SE.getMinusSCEV(NegDist, CastedProduct);
if (SE.isKnownPositive(Minus))
return true;
return false;
}
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
uint64_t TypeByteSize) {
assert(Stride > 1 && "The stride must be greater than 1");
assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
assert(Distance > 0 && "The distance must be non-zero");
if (Distance % TypeByteSize)
return false;
uint64_t ScaledDist = Distance / TypeByteSize;
return ScaledDist % Stride;
}
MemoryDepChecker::Dependence::DepType
MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
const MemAccessInfo &B, unsigned BIdx,
const ValueToValueMap &Strides) {
assert (AIdx < BIdx && "Must pass arguments in program order");
Value *APtr = A.getPointer();
Value *BPtr = B.getPointer();
bool AIsWrite = A.getInt();
bool BIsWrite = B.getInt();
Type *ATy = getLoadStoreType(InstMap[AIdx]);
Type *BTy = getLoadStoreType(InstMap[BIdx]);
if (!AIsWrite && !BIsWrite)
return Dependence::NoDep;
if (APtr->getType()->getPointerAddressSpace() !=
BPtr->getType()->getPointerAddressSpace())
return Dependence::Unknown;
int64_t StrideAPtr =
getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true);
int64_t StrideBPtr =
getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true);
const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);
if (StrideAPtr < 0) {
std::swap(APtr, BPtr);
std::swap(ATy, BTy);
std::swap(Src, Sink);
std::swap(AIsWrite, BIsWrite);
std::swap(AIdx, BIdx);
std::swap(StrideAPtr, StrideBPtr);
}
const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
<< "(Induction step: " << StrideAPtr << ")\n");
LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
<< *InstMap[BIdx] << ": " << *Dist << "\n");
if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
return Dependence::Unknown;
}
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
bool HasSameSize =
DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
uint64_t Stride = std::abs(StrideAPtr);
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
if (!C) {
if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
isSafeDependenceDistance(DL, *(PSE.getSE()),
*(PSE.getBackedgeTakenCount()), *Dist, Stride,
TypeByteSize))
return Dependence::NoDep;
LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
FoundNonConstantDistanceDependence = true;
return Dependence::Unknown;
}
const APInt &Val = C->getAPInt();
int64_t Distance = Val.getSExtValue();
if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize &&
areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
return Dependence::NoDep;
}
if (Val.isNegative()) {
bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
if (IsTrueDataDependence && EnableForwardingConflictDetection &&
(couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
!HasSameSize)) {
LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
return Dependence::ForwardButPreventsForwarding;
}
LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
return Dependence::Forward;
}
if (Val == 0) {
if (HasSameSize)
return Dependence::Forward;
LLVM_DEBUG(
dbgs() << "LAA: Zero dependence difference but different type sizes\n");
return Dependence::Unknown;
}
assert(Val.isStrictlyPositive() && "Expect a positive value");
if (!HasSameSize) {
LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
"different type sizes\n");
return Dependence::Unknown;
}
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
VectorizerParams::VectorizationFactor : 1);
unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
VectorizerParams::VectorizationInterleave : 1);
unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
uint64_t MinDistanceNeeded =
TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
<< Distance << '\n');
return Dependence::Backward;
}
if (MinDistanceNeeded > MaxSafeDepDistBytes) {
LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
<< MinDistanceNeeded << " size in bytes");
return Dependence::Backward;
}
MaxSafeDepDistBytes =
std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
if (IsTrueDataDependence && EnableForwardingConflictDetection &&
couldPreventStoreLoadForward(Distance, TypeByteSize))
return Dependence::BackwardVectorizableButPreventsForwarding;
uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
<< " with max VF = " << MaxVF << '\n');
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
return Dependence::BackwardVectorizable;
}
bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
MemAccessInfoList &CheckDeps,
const ValueToValueMap &Strides) {
MaxSafeDepDistBytes = -1;
SmallPtrSet<MemAccessInfo, 8> Visited;
for (MemAccessInfo CurAccess : CheckDeps) {
if (Visited.count(CurAccess))
continue;
EquivalenceClasses<MemAccessInfo>::iterator I =
AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
EquivalenceClasses<MemAccessInfo>::member_iterator AI =
AccessSets.member_begin(I);
EquivalenceClasses<MemAccessInfo>::member_iterator AE =
AccessSets.member_end();
while (AI != AE) {
Visited.insert(*AI);
bool AIIsWrite = AI->getInt();
EquivalenceClasses<MemAccessInfo>::member_iterator OI =
(AIIsWrite ? AI : std::next(AI));
while (OI != AE) {
for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
for (std::vector<unsigned>::iterator
I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
I2E = (OI == AI ? I1E : Accesses[*OI].end());
I2 != I2E; ++I2) {
auto A = std::make_pair(&*AI, *I1);
auto B = std::make_pair(&*OI, *I2);
assert(*I1 != *I2);
if (*I1 > *I2)
std::swap(A, B);
Dependence::DepType Type =
isDependent(*A.first, A.second, *B.first, B.second, Strides);
mergeInStatus(Dependence::isSafeForVectorization(Type));
if (RecordDependences) {
if (Type != Dependence::NoDep)
Dependences.push_back(Dependence(A.second, B.second, Type));
if (Dependences.size() >= MaxDependences) {
RecordDependences = false;
Dependences.clear();
LLVM_DEBUG(dbgs()
<< "Too many dependences, stopped recording\n");
}
}
if (!RecordDependences && !isSafeForVectorization())
return false;
}
++OI;
}
AI++;
}
}
LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
return isSafeForVectorization();
}
SmallVector<Instruction *, 4>
MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
MemAccessInfo Access(Ptr, isWrite);
auto &IndexVector = Accesses.find(Access)->second;
SmallVector<Instruction *, 4> Insts;
transform(IndexVector,
std::back_inserter(Insts),
[&](unsigned Idx) { return this->InstMap[Idx]; });
return Insts;
}
const char *MemoryDepChecker::Dependence::DepName[] = {
"NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
"BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
void MemoryDepChecker::Dependence::print(
raw_ostream &OS, unsigned Depth,
const SmallVectorImpl<Instruction *> &Instrs) const {
OS.indent(Depth) << DepName[Type] << ":\n";
OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
}
bool LoopAccessInfo::canAnalyzeLoop() {
LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
<< TheLoop->getHeader()->getParent()->getName() << ": "
<< TheLoop->getHeader()->getName() << '\n');
if (!TheLoop->isInnermost()) {
LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
return false;
}
if (TheLoop->getNumBackEdges() != 1) {
LLVM_DEBUG(
dbgs() << "LAA: loop control flow is not understood by analyzer\n");
recordAnalysis("CFGNotUnderstood")
<< "loop control flow is not understood by analyzer";
return false;
}
const SCEV *ExitCount = PSE->getBackedgeTakenCount();
if (isa<SCEVCouldNotCompute>(ExitCount)) {
recordAnalysis("CantComputeNumberOfIterations")
<< "could not determine number of loop iterations";
LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
return false;
}
return true;
}
void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
const TargetLibraryInfo *TLI,
DominatorTree *DT) {
SmallVector<LoadInst *, 16> Loads;
SmallVector<StoreInst *, 16> Stores;
unsigned NumReads = 0;
unsigned NumReadWrites = 0;
bool HasComplexMemInst = false;
HasConvergentOp = false;
PtrRtChecking->Pointers.clear();
PtrRtChecking->Need = false;
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
const bool EnableMemAccessVersioningOfLoop =
EnableMemAccessVersioning &&
!TheLoop->getHeader()->getParent()->hasOptSize();
for (BasicBlock *BB : TheLoop->blocks()) {
for (Instruction &I : *BB) {
if (auto *Call = dyn_cast<CallBase>(&I)) {
if (Call->isConvergent())
HasConvergentOp = true;
}
if (HasComplexMemInst && HasConvergentOp) {
CanVecMem = false;
return;
}
if (HasComplexMemInst)
continue;
if (I.mayReadFromMemory()) {
auto *Call = dyn_cast<CallInst>(&I);
if (Call && getVectorIntrinsicIDForCall(Call, TLI))
continue;
if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
!VFDatabase::getMappings(*Call).empty())
continue;
auto *Ld = dyn_cast<LoadInst>(&I);
if (!Ld) {
recordAnalysis("CantVectorizeInstruction", Ld)
<< "instruction cannot be vectorized";
HasComplexMemInst = true;
continue;
}
if (!Ld->isSimple() && !IsAnnotatedParallel) {
recordAnalysis("NonSimpleLoad", Ld)
<< "read with atomic ordering or volatile read";
LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
HasComplexMemInst = true;
continue;
}
NumLoads++;
Loads.push_back(Ld);
DepChecker->addAccess(Ld);
if (EnableMemAccessVersioningOfLoop)
collectStridedAccess(Ld);
continue;
}
if (I.mayWriteToMemory()) {
auto *St = dyn_cast<StoreInst>(&I);
if (!St) {
recordAnalysis("CantVectorizeInstruction", St)
<< "instruction cannot be vectorized";
HasComplexMemInst = true;
continue;
}
if (!St->isSimple() && !IsAnnotatedParallel) {
recordAnalysis("NonSimpleStore", St)
<< "write with atomic ordering or volatile write";
LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
HasComplexMemInst = true;
continue;
}
NumStores++;
Stores.push_back(St);
DepChecker->addAccess(St);
if (EnableMemAccessVersioningOfLoop)
collectStridedAccess(St);
}
} }
if (HasComplexMemInst) {
CanVecMem = false;
return;
}
if (!Stores.size()) {
LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
CanVecMem = true;
return;
}
MemoryDepChecker::DepCandidates DependentAccesses;
AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
SmallSet<std::pair<Value *, Type *>, 16> Seen;
SmallPtrSet<Value *, 16> UniformStores;
for (StoreInst *ST : Stores) {
Value *Ptr = ST->getPointerOperand();
if (isUniform(Ptr)) {
StoresToInvariantAddresses.push_back(ST);
HasDependenceInvolvingLoopInvariantAddress |=
!UniformStores.insert(Ptr).second;
}
Type *AccessTy = getLoadStoreType(ST);
if (Seen.insert({Ptr, AccessTy}).second) {
++NumReadWrites;
MemoryLocation Loc = MemoryLocation::get(ST);
if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
Loc.AATags.TBAA = nullptr;
visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
[&Accesses, AccessTy, Loc](Value *Ptr) {
MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
Accesses.addStore(NewLoc, AccessTy);
});
}
}
if (IsAnnotatedParallel) {
LLVM_DEBUG(
dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
<< "checks.\n");
CanVecMem = true;
return;
}
for (LoadInst *LD : Loads) {
Value *Ptr = LD->getPointerOperand();
bool IsReadOnlyPtr = false;
Type *AccessTy = getLoadStoreType(LD);
if (Seen.insert({Ptr, AccessTy}).second ||
!getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
++NumReads;
IsReadOnlyPtr = true;
}
if (UniformStores.count(Ptr)) {
LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
"load and uniform store to the same address!\n");
HasDependenceInvolvingLoopInvariantAddress = true;
}
MemoryLocation Loc = MemoryLocation::get(LD);
if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
Loc.AATags.TBAA = nullptr;
visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
[&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
});
}
if (NumReadWrites == 1 && NumReads == 0) {
LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
CanVecMem = true;
return;
}
Accesses.buildDependenceSets();
Value *UncomputablePtr = nullptr;
bool CanDoRTIfNeeded =
Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop,
SymbolicStrides, UncomputablePtr, false);
if (!CanDoRTIfNeeded) {
auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
recordAnalysis("CantIdentifyArrayBounds", I)
<< "cannot identify array bounds";
LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
<< "the array bounds.\n");
CanVecMem = false;
return;
}
LLVM_DEBUG(
dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
CanVecMem = true;
if (Accesses.isDependencyCheckNeeded()) {
LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
CanVecMem = DepChecker->areDepsSafe(
DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
Accesses.resetDepChecks(*DepChecker);
PtrRtChecking->reset();
PtrRtChecking->Need = true;
auto *SE = PSE->getSE();
UncomputablePtr = nullptr;
CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
*PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true);
if (!CanDoRTIfNeeded) {
auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
recordAnalysis("CantCheckMemDepsAtRunTime", I)
<< "cannot check memory dependencies at runtime";
LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
CanVecMem = false;
return;
}
CanVecMem = true;
}
}
if (HasConvergentOp) {
recordAnalysis("CantInsertRuntimeCheckWithConvergent")
<< "cannot add control dependency to convergent operation";
LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
"would be needed with a convergent operation\n");
CanVecMem = false;
return;
}
if (CanVecMem)
LLVM_DEBUG(
dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
<< (PtrRtChecking->Need ? "" : " don't")
<< " need runtime memory checks.\n");
else
emitUnsafeDependenceRemark();
}
void LoopAccessInfo::emitUnsafeDependenceRemark() {
auto Deps = getDepChecker().getDependences();
if (!Deps)
return;
auto Found = std::find_if(
Deps->begin(), Deps->end(), [](const MemoryDepChecker::Dependence &D) {
return MemoryDepChecker::Dependence::isSafeForVectorization(D.Type) !=
MemoryDepChecker::VectorizationSafetyStatus::Safe;
});
if (Found == Deps->end())
return;
MemoryDepChecker::Dependence Dep = *Found;
LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
OptimizationRemarkAnalysis &R =
recordAnalysis("UnsafeDep", Dep.getDestination(*this))
<< "unsafe dependent memory operations in loop. Use "
"#pragma loop distribute(enable) to allow loop distribution "
"to attempt to isolate the offending operations into a separate "
"loop";
switch (Dep.Type) {
case MemoryDepChecker::Dependence::NoDep:
case MemoryDepChecker::Dependence::Forward:
case MemoryDepChecker::Dependence::BackwardVectorizable:
llvm_unreachable("Unexpected dependence");
case MemoryDepChecker::Dependence::Backward:
R << "\nBackward loop carried data dependence.";
break;
case MemoryDepChecker::Dependence::ForwardButPreventsForwarding:
R << "\nForward loop carried data dependence that prevents "
"store-to-load forwarding.";
break;
case MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding:
R << "\nBackward loop carried data dependence that prevents "
"store-to-load forwarding.";
break;
case MemoryDepChecker::Dependence::Unknown:
R << "\nUnknown data dependence.";
break;
}
if (Instruction *I = Dep.getSource(*this)) {
DebugLoc SourceLoc = I->getDebugLoc();
if (auto *DD = dyn_cast_or_null<Instruction>(getPointerOperand(I)))
SourceLoc = DD->getDebugLoc();
if (SourceLoc)
R << " Memory location is the same as accessed at "
<< ore::NV("Location", SourceLoc);
}
}
bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
DominatorTree *DT) {
assert(TheLoop->contains(BB) && "Unknown block used");
BasicBlock* Latch = TheLoop->getLoopLatch();
return !DT->dominates(BB, Latch);
}
OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
Instruction *I) {
assert(!Report && "Multiple reports generated");
Value *CodeRegion = TheLoop->getHeader();
DebugLoc DL = TheLoop->getStartLoc();
if (I) {
CodeRegion = I->getParent();
if (I->getDebugLoc())
DL = I->getDebugLoc();
}
Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
CodeRegion);
return *Report;
}
bool LoopAccessInfo::isUniform(Value *V) const {
auto *SE = PSE->getSE();
if (!SE->isSCEVable(V->getType()))
return false;
return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
}
void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
Value *Ptr = getLoadStorePointerOperand(MemAccess);
if (!Ptr)
return;
Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
if (!Stride)
return;
LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
"versioning:");
LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
const SCEV *StrideExpr = PSE->getSCEV(Stride);
const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
uint64_t BETypeSizeBits = DL.getTypeSizeInBits(BETakenCount->getType());
const SCEV *CastedStride = StrideExpr;
const SCEV *CastedBECount = BETakenCount;
ScalarEvolution *SE = PSE->getSE();
if (BETypeSizeBits >= StrideTypeSizeBits)
CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
else
CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
if (SE->isKnownPositive(StrideMinusBETaken)) {
LLVM_DEBUG(
dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
"Stride==1 predicate will imply that the loop executes "
"at most once.\n");
return;
}
LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
SymbolicStrides[Ptr] = Stride;
StrideSet.insert(Stride);
}
LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const TargetLibraryInfo *TLI, AAResults *AA,
DominatorTree *DT, LoopInfo *LI)
: PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
PtrRtChecking(nullptr),
DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L) {
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
if (canAnalyzeLoop()) {
analyzeLoop(AA, LI, TLI, DT);
}
}
void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
if (CanVecMem) {
OS.indent(Depth) << "Memory dependences are safe";
if (MaxSafeDepDistBytes != -1ULL)
OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
<< " bytes";
if (PtrRtChecking->Need)
OS << " with run-time checks";
OS << "\n";
}
if (HasConvergentOp)
OS.indent(Depth) << "Has convergent operation in loop\n";
if (Report)
OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
if (auto *Dependences = DepChecker->getDependences()) {
OS.indent(Depth) << "Dependences:\n";
for (const auto &Dep : *Dependences) {
Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
OS << "\n";
}
} else
OS.indent(Depth) << "Too many dependences, not recorded\n";
PtrRtChecking->print(OS, Depth);
OS << "\n";
OS.indent(Depth) << "Non vectorizable stores to invariant address were "
<< (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
<< "found in loop.\n";
OS.indent(Depth) << "SCEV assumptions:\n";
PSE->getPredicate().print(OS, Depth);
OS << "\n";
OS.indent(Depth) << "Expressions re-written:\n";
PSE->print(OS, Depth);
}
LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis() : FunctionPass(ID) {
initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry());
}
const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) {
auto &LAI = LoopAccessInfoMap[L];
if (!LAI)
LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
return *LAI;
}
void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const {
LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
for (Loop *TopLevelLoop : *LI)
for (Loop *L : depth_first(TopLevelLoop)) {
OS.indent(2) << L->getHeader()->getName() << ":\n";
auto &LAI = LAA.getInfo(L);
LAI.print(OS, 4);
}
}
bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) {
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
return false;
}
void LoopAccessLegacyAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
AU.addRequiredTransitive<AAResultsWrapperPass>();
AU.addRequiredTransitive<DominatorTreeWrapperPass>();
AU.addRequiredTransitive<LoopInfoWrapperPass>();
AU.setPreservesAll();
}
char LoopAccessLegacyAnalysis::ID = 0;
static const char laa_name[] = "Loop Access Analysis";
#define LAA_NAME "loop-accesses"
INITIALIZE_PASS_BEGIN(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
AnalysisKey LoopAccessAnalysis::Key;
LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR) {
return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
}
namespace llvm {
Pass *createLAAPass() {
return new LoopAccessLegacyAnalysis();
}
}