#include "VPlan.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <cassert>
using namespace llvm;
using VectorParts = SmallVector<Value *, 2>;
extern cl::opt<bool> EnableVPlanNativePath;
#define LV_NAME "loop-vectorize"
#define DEBUG_TYPE LV_NAME
bool VPRecipeBase::mayWriteToMemory() const {
switch (getVPDefID()) {
case VPWidenMemoryInstructionSC: {
return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
}
case VPReplicateSC:
case VPWidenCallSC:
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
->mayWriteToMemory();
case VPBranchOnMaskSC:
return false;
case VPWidenIntOrFpInductionSC:
case VPWidenCanonicalIVSC:
case VPWidenPHISC:
case VPBlendSC:
case VPWidenSC:
case VPWidenGEPSC:
case VPReductionSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
assert((!I || !I->mayWriteToMemory()) &&
"underlying instruction may write to memory");
return false;
}
default:
return true;
}
}
bool VPRecipeBase::mayReadFromMemory() const {
switch (getVPDefID()) {
case VPWidenMemoryInstructionSC: {
return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
}
case VPReplicateSC:
case VPWidenCallSC:
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
->mayReadFromMemory();
case VPBranchOnMaskSC:
return false;
case VPWidenIntOrFpInductionSC:
case VPWidenCanonicalIVSC:
case VPWidenPHISC:
case VPBlendSC:
case VPWidenSC:
case VPWidenGEPSC:
case VPReductionSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
assert((!I || !I->mayReadFromMemory()) &&
"underlying instruction may read from memory");
return false;
}
default:
return true;
}
}
bool VPRecipeBase::mayHaveSideEffects() const {
switch (getVPDefID()) {
case VPWidenIntOrFpInductionSC:
case VPWidenPointerInductionSC:
case VPWidenCanonicalIVSC:
case VPWidenPHISC:
case VPBlendSC:
case VPWidenSC:
case VPWidenGEPSC:
case VPReductionSC:
case VPWidenSelectSC:
case VPScalarIVStepsSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
assert((!I || !I->mayHaveSideEffects()) &&
"underlying instruction has side-effects");
return false;
}
case VPReplicateSC: {
auto *R = cast<VPReplicateRecipe>(this);
return R->getUnderlyingInstr()->mayHaveSideEffects();
}
default:
return true;
}
}
void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
auto Lane = VPLane::getLastLaneForVF(State.VF);
VPValue *ExitValue = getOperand(0);
if (Plan.isUniformAfterVectorization(ExitValue))
Lane = VPLane::getFirstLane();
Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
State.Builder.GetInsertBlock());
}
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(InsertPos->getParent() &&
"Insertion position not in any VPBasicBlock");
Parent = InsertPos->getParent();
Parent->getRecipeList().insert(InsertPos->getIterator(), this);
}
void VPRecipeBase::insertBefore(VPBasicBlock &BB,
iplist<VPRecipeBase>::iterator I) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(I == BB.end() || I->getParent() == &BB);
Parent = &BB;
BB.getRecipeList().insert(I, this);
}
void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(InsertPos->getParent() &&
"Insertion position not in any VPBasicBlock");
Parent = InsertPos->getParent();
Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
}
void VPRecipeBase::removeFromParent() {
assert(getParent() && "Recipe not in any VPBasicBlock");
getParent()->getRecipeList().remove(getIterator());
Parent = nullptr;
}
iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
assert(getParent() && "Recipe not in any VPBasicBlock");
return getParent()->getRecipeList().erase(getIterator());
}
void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
removeFromParent();
insertAfter(InsertPos);
}
void VPRecipeBase::moveBefore(VPBasicBlock &BB,
iplist<VPRecipeBase>::iterator I) {
removeFromParent();
insertBefore(BB, I);
}
void VPInstruction::generateInstruction(VPTransformState &State,
unsigned Part) {
IRBuilderBase &Builder = State.Builder;
Builder.SetCurrentDebugLocation(DL);
if (Instruction::isBinaryOp(getOpcode())) {
Value *A = State.get(getOperand(0), Part);
Value *B = State.get(getOperand(1), Part);
Value *V =
Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
State.set(this, V, Part);
return;
}
switch (getOpcode()) {
case VPInstruction::Not: {
Value *A = State.get(getOperand(0), Part);
Value *V = Builder.CreateNot(A, Name);
State.set(this, V, Part);
break;
}
case VPInstruction::ICmpULE: {
Value *IV = State.get(getOperand(0), Part);
Value *TC = State.get(getOperand(1), Part);
Value *V = Builder.CreateICmpULE(IV, TC, Name);
State.set(this, V, Part);
break;
}
case Instruction::Select: {
Value *Cond = State.get(getOperand(0), Part);
Value *Op1 = State.get(getOperand(1), Part);
Value *Op2 = State.get(getOperand(2), Part);
Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name);
State.set(this, V, Part);
break;
}
case VPInstruction::ActiveLaneMask: {
Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
Value *ScalarTC = State.get(getOperand(1), Part);
auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
auto *PredTy = VectorType::get(Int1Ty, State.VF);
Instruction *Call = Builder.CreateIntrinsic(
Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
{VIVElem0, ScalarTC}, nullptr, Name);
State.set(this, Call, Part);
break;
}
case VPInstruction::FirstOrderRecurrenceSplice: {
auto *V1 = State.get(getOperand(0), 0);
Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
if (!PartMinus1->getType()->isVectorTy()) {
State.set(this, PartMinus1, Part);
} else {
Value *V2 = State.get(getOperand(1), Part);
State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name),
Part);
}
break;
}
case VPInstruction::CanonicalIVIncrement:
case VPInstruction::CanonicalIVIncrementNUW: {
Value *Next = nullptr;
if (Part == 0) {
bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
auto *Phi = State.get(getOperand(0), 0);
Value *Step =
createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false);
} else {
Next = State.get(this, 0);
}
State.set(this, Next, Part);
break;
}
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::CanonicalIVIncrementForPartNUW: {
bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW;
auto *IV = State.get(getOperand(0), VPIteration(0, 0));
if (Part == 0) {
State.set(this, IV, Part);
break;
}
Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false);
State.set(this, Next, Part);
break;
}
case VPInstruction::BranchOnCond: {
if (Part != 0)
break;
Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
VPRegionBlock *ParentRegion = getParent()->getParent();
VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
BranchInst *CondBr =
Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
if (getParent()->isExiting())
CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
CondBr->setSuccessor(0, nullptr);
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
break;
}
case VPInstruction::BranchOnCount: {
if (Part != 0)
break;
Value *IV = State.get(getOperand(0), Part);
Value *TC = State.get(getOperand(1), Part);
Value *Cond = Builder.CreateICmpEQ(IV, TC);
auto *Plan = getParent()->getPlan();
VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
State.CFG.VPBB2IRBB[Header]);
CondBr->setSuccessor(0, nullptr);
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
break;
}
default:
llvm_unreachable("Unsupported opcode for instruction");
}
}
void VPInstruction::execute(VPTransformState &State) {
assert(!State.Instance && "VPInstruction executing an Instance");
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
State.Builder.setFastMathFlags(FMF);
for (unsigned Part = 0; Part < State.UF; ++Part)
generateInstruction(State, Part);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPInstruction::dump() const {
VPSlotTracker SlotTracker(getParent()->getPlan());
print(dbgs(), "", SlotTracker);
}
void VPInstruction::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
if (hasResult()) {
printAsOperand(O, SlotTracker);
O << " = ";
}
switch (getOpcode()) {
case VPInstruction::Not:
O << "not";
break;
case VPInstruction::ICmpULE:
O << "icmp ule";
break;
case VPInstruction::SLPLoad:
O << "combined load";
break;
case VPInstruction::SLPStore:
O << "combined store";
break;
case VPInstruction::ActiveLaneMask:
O << "active lane mask";
break;
case VPInstruction::FirstOrderRecurrenceSplice:
O << "first-order splice";
break;
case VPInstruction::CanonicalIVIncrement:
O << "VF * UF + ";
break;
case VPInstruction::CanonicalIVIncrementNUW:
O << "VF * UF +(nuw) ";
break;
case VPInstruction::BranchOnCond:
O << "branch-on-cond";
break;
case VPInstruction::CanonicalIVIncrementForPart:
O << "VF * Part + ";
break;
case VPInstruction::CanonicalIVIncrementForPartNUW:
O << "VF * Part +(nuw) ";
break;
case VPInstruction::BranchOnCount:
O << "branch-on-count ";
break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
O << FMF;
for (const VPValue *Operand : operands()) {
O << " ";
Operand->printAsOperand(O, SlotTracker);
}
if (DL) {
O << ", !dbg ";
DL.print(O);
}
}
#endif
void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
Opcode == Instruction::FCmp) &&
"this op can't take fast-math flags");
FMF = FMFNew;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-CALL ";
auto *CI = cast<CallInst>(getUnderlyingInstr());
if (CI->getType()->isVoidTy())
O << "void ";
else {
printAsOperand(O, SlotTracker);
O << " = ";
}
O << "call @" << CI->getCalledFunction()->getName() << "(";
printOperands(O, SlotTracker);
O << ")";
}
void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-SELECT ";
printAsOperand(O, SlotTracker);
O << " = select ";
getOperand(0)->printAsOperand(O, SlotTracker);
O << ", ";
getOperand(1)->printAsOperand(O, SlotTracker);
O << ", ";
getOperand(2)->printAsOperand(O, SlotTracker);
O << (InvariantCond ? " (condition is loop invariant)" : "");
}
#endif
void VPWidenSelectRecipe::execute(VPTransformState &State) {
auto &I = *cast<SelectInst>(getUnderlyingInstr());
State.setDebugLocFromInst(&I);
auto *InvarCond =
InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr;
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part);
Value *Op0 = State.get(getOperand(1), Part);
Value *Op1 = State.get(getOperand(2), Part);
Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
State.set(this, Sel, Part);
State.addMetadata(Sel, &I);
}
}
void VPWidenRecipe::execute(VPTransformState &State) {
auto &I = *cast<Instruction>(getUnderlyingValue());
auto &Builder = State.Builder;
switch (I.getOpcode()) {
case Instruction::Call:
case Instruction::Br:
case Instruction::PHI:
case Instruction::GetElementPtr:
case Instruction::Select:
llvm_unreachable("This instruction is handled by a different recipe.");
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::SRem:
case Instruction::URem:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::FNeg:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
State.setDebugLocFromInst(&I);
for (unsigned Part = 0; Part < State.UF; ++Part) {
SmallVector<Value *, 2> Ops;
for (VPValue *VPOp : operands())
Ops.push_back(State.get(VPOp, Part));
Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
if (auto *VecOp = dyn_cast<Instruction>(V)) {
VecOp->copyIRFlags(&I);
if (State.MayGeneratePoisonRecipes.contains(this))
VecOp->dropPoisonGeneratingFlags();
}
State.set(this, V, Part);
State.addMetadata(V, &I);
}
break;
}
case Instruction::Freeze: {
State.setDebugLocFromInst(&I);
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *Op = State.get(getOperand(0), Part);
Value *Freeze = Builder.CreateFreeze(Op);
State.set(this, Freeze, Part);
}
break;
}
case Instruction::ICmp:
case Instruction::FCmp: {
bool FCmp = (I.getOpcode() == Instruction::FCmp);
auto *Cmp = cast<CmpInst>(&I);
State.setDebugLocFromInst(Cmp);
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *A = State.get(getOperand(0), Part);
Value *B = State.get(getOperand(1), Part);
Value *C = nullptr;
if (FCmp) {
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
Builder.setFastMathFlags(Cmp->getFastMathFlags());
C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
} else {
C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
}
State.set(this, C, Part);
State.addMetadata(C, &I);
}
break;
}
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::FPExt:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::SIToFP:
case Instruction::UIToFP:
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
auto *CI = cast<CastInst>(&I);
State.setDebugLocFromInst(CI);
Type *DestTy = (State.VF.isScalar())
? CI->getType()
: VectorType::get(CI->getType(), State.VF);
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *A = State.get(getOperand(0), Part);
Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
State.set(this, Cast, Part);
State.addMetadata(Cast, &I);
}
break;
}
default:
LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
llvm_unreachable("Unhandled instruction!");
} }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN ";
printAsOperand(O, SlotTracker);
O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
printOperands(O, SlotTracker);
}
void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-INDUCTION";
if (getTruncInst()) {
O << "\\l\"";
O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
O << " +\n" << Indent << "\" ";
getVPValue(0)->printAsOperand(O, SlotTracker);
} else
O << " " << VPlanIngredient(IV);
O << ", ";
getStepValue()->printAsOperand(O, SlotTracker);
}
#endif
bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
return StartC && StartC->isZero() && StepC && StepC->isOne();
}
VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const {
return cast<VPCanonicalIVPHIRecipe>(getOperand(0));
}
bool VPScalarIVStepsRecipe::isCanonical() const {
auto *CanIV = getCanonicalIV();
if (CanIV->getStartValue() != getStartValue())
return false;
auto *StepVPV = getStepValue();
if (StepVPV->getDef())
return false;
auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue());
return StepC && StepC->isOne();
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent;
printAsOperand(O, SlotTracker);
O << Indent << "= SCALAR-STEPS ";
printOperands(O, SlotTracker);
}
#endif
void VPWidenGEPRecipe::execute(VPTransformState &State) {
auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
auto *Clone = State.Builder.Insert(GEP->clone());
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone);
State.set(this, EntryPart, Part);
State.addMetadata(EntryPart, GEP);
}
} else {
for (unsigned Part = 0; Part < State.UF; ++Part) {
auto *Ptr = IsPtrLoopInvariant
? State.get(getOperand(0), VPIteration(0, 0))
: State.get(getOperand(0), Part);
SmallVector<Value *, 4> Indices;
for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
VPValue *Operand = getOperand(I);
if (IsIndexLoopInvariant[I - 1])
Indices.push_back(State.get(Operand, VPIteration(0, 0)));
else
Indices.push_back(State.get(Operand, Part));
}
bool IsInBounds =
GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0;
auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
Indices, "", IsInBounds);
assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
"NewGEP is not a pointer vector");
State.set(this, NewGEP, Part);
State.addMetadata(NewGEP, GEP);
}
}
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-GEP ";
O << (IsPtrLoopInvariant ? "Inv" : "Var");
size_t IndicesNumber = IsIndexLoopInvariant.size();
for (size_t I = 0; I < IndicesNumber; ++I)
O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
O << " ";
printAsOperand(O, SlotTracker);
O << " = getelementptr ";
printOperands(O, SlotTracker);
}
#endif
void VPBlendRecipe::execute(VPTransformState &State) {
State.setDebugLocFromInst(Phi);
unsigned NumIncoming = getNumIncomingValues();
VectorParts Entry(State.UF);
for (unsigned In = 0; In < NumIncoming; ++In) {
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *In0 = State.get(getIncomingValue(In), Part);
if (In == 0)
Entry[Part] = In0; else {
Value *Cond = State.get(getMask(In), Part);
Entry[Part] =
State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
}
}
}
for (unsigned Part = 0; Part < State.UF; ++Part)
State.set(this, Entry[Part], Part);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "BLEND ";
Phi->printAsOperand(O, false);
O << " =";
if (getNumIncomingValues() == 1) {
O << " ";
getIncomingValue(0)->printAsOperand(O, SlotTracker);
} else {
for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
O << " ";
getIncomingValue(I)->printAsOperand(O, SlotTracker);
O << "/";
getMask(I)->printAsOperand(O, SlotTracker);
}
}
}
void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "REDUCE ";
printAsOperand(O, SlotTracker);
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
if (isa<FPMathOperator>(getUnderlyingInstr()))
O << getUnderlyingInstr()->getFastMathFlags();
O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
if (getCondOp()) {
O << ", ";
getCondOp()->printAsOperand(O, SlotTracker);
}
O << ")";
if (RdxDesc->IntermediateStore)
O << " (with final reduction value stored in invariant address sank "
"outside of loop)";
}
void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
if (!getUnderlyingInstr()->getType()->isVoidTy()) {
printAsOperand(O, SlotTracker);
O << " = ";
}
if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
O << "call @" << CB->getCalledFunction()->getName() << "(";
interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
O, [&O, &SlotTracker](VPValue *Op) {
Op->printAsOperand(O, SlotTracker);
});
O << ")";
} else {
O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
printOperands(O, SlotTracker);
}
if (AlsoPack)
O << " (S->V)";
}
#endif
void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
assert(State.Instance && "Branch on Mask works only on single instance.");
unsigned Part = State.Instance->Part;
unsigned Lane = State.Instance->Lane.getKnownLane();
Value *ConditionBit = nullptr;
VPValue *BlockInMask = getMask();
if (BlockInMask) {
ConditionBit = State.get(BlockInMask, Part);
if (ConditionBit->getType()->isVectorTy())
ConditionBit = State.Builder.CreateExtractElement(
ConditionBit, State.Builder.getInt32(Lane));
} else ConditionBit = State.Builder.getTrue();
auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
assert(isa<UnreachableInst>(CurrentTerminator) &&
"Expected to replace unreachable terminator with conditional branch.");
auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
CondBr->setSuccessor(0, nullptr);
ReplaceInstWithInst(CurrentTerminator, CondBr);
}
void VPPredInstPHIRecipe::execute(VPTransformState &State) {
assert(State.Instance && "Predicated instruction PHI works per instance.");
Instruction *ScalarPredInst =
cast<Instruction>(State.get(getOperand(0), *State.Instance));
BasicBlock *PredicatedBB = ScalarPredInst->getParent();
BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
assert(PredicatingBB && "Predicated block has no single predecessor.");
assert(isa<VPReplicateRecipe>(getOperand(0)) &&
"operand must be VPReplicateRecipe");
unsigned Part = State.Instance->Part;
if (State.hasVectorValue(getOperand(0), Part)) {
Value *VectorValue = State.get(getOperand(0), Part);
InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); VPhi->addIncoming(IEI, PredicatedBB); if (State.hasVectorValue(this, Part))
State.reset(this, VPhi, Part);
else
State.set(this, VPhi, Part);
State.reset(getOperand(0), VPhi, Part);
} else {
Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
PredicatingBB);
Phi->addIncoming(ScalarPredInst, PredicatedBB);
if (State.hasScalarValue(this, *State.Instance))
State.reset(this, Phi, *State.Instance);
else
State.set(this, Phi, *State.Instance);
State.reset(getOperand(0), Phi, *State.Instance);
}
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "PHI-PREDICATED-INSTRUCTION ";
printAsOperand(O, SlotTracker);
O << " = ";
printOperands(O, SlotTracker);
}
void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN ";
if (!isStore()) {
getVPSingleValue()->printAsOperand(O, SlotTracker);
O << " = ";
}
O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
printOperands(O, SlotTracker);
}
#endif
void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
Value *Start = getStartValue()->getLiveInIRValue();
PHINode *EntryPart = PHINode::Create(
Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
EntryPart->addIncoming(Start, VectorPH);
EntryPart->setDebugLoc(DL);
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(this, EntryPart, Part);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
printAsOperand(O, SlotTracker);
O << " = CANONICAL-INDUCTION";
}
#endif
bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
return IsScalarAfterVectorization &&
(!VF.isScalable() || vputils::onlyFirstLaneUsed(this));
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
printAsOperand(O, SlotTracker);
O << " = WIDEN-POINTER-INDUCTION ";
getStartValue()->printAsOperand(O, SlotTracker);
O << ", " << *IndDesc.getStep();
}
#endif
void VPExpandSCEVRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "cannot be used in per-lane");
const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
SCEVExpander Exp(SE, DL, "induction");
Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
&*State.Builder.GetInsertPoint());
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(this, Res, Part);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
getVPSingleValue()->printAsOperand(O, SlotTracker);
O << " = EXPAND SCEV " << *Expr;
}
#endif
void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
Value *CanonicalIV = State.get(getOperand(0), 0);
Type *STy = CanonicalIV->getType();
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
ElementCount VF = State.VF;
Value *VStart = VF.isScalar()
? CanonicalIV
: Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
Value *VStep = createStepForVF(Builder, STy, VF, Part);
if (VF.isVector()) {
VStep = Builder.CreateVectorSplat(VF, VStep);
VStep =
Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
}
Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
State.set(this, CanonicalVectorIV, Part);
}
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
printAsOperand(O, SlotTracker);
O << " = WIDEN-CANONICAL-INDUCTION ";
printOperands(O, SlotTracker);
}
#endif
void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
auto *VectorInit = getStartValue()->getLiveInIRValue();
Type *VecTy = State.VF.isScalar()
? VectorInit->getType()
: VectorType::get(VectorInit->getType(), State.VF);
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
if (State.VF.isVector()) {
auto *IdxTy = Builder.getInt32Ty();
auto *One = ConstantInt::get(IdxTy, 1);
IRBuilder<>::InsertPointGuard Guard(Builder);
Builder.SetInsertPoint(VectorPH->getTerminator());
auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
VectorInit = Builder.CreateInsertElement(
PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
}
PHINode *EntryPart = PHINode::Create(
VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
EntryPart->addIncoming(VectorInit, VectorPH);
State.set(this, EntryPart, 0);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
printAsOperand(O, SlotTracker);
O << " = phi ";
printOperands(O, SlotTracker);
}
#endif
void VPReductionPHIRecipe::execute(VPTransformState &State) {
PHINode *PN = cast<PHINode>(getUnderlyingValue());
auto &Builder = State.Builder;
bool ScalarPHI = State.VF.isScalar() || IsInLoop;
Type *VecTy =
ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
BasicBlock *HeaderBB = State.CFG.PrevBB;
assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
"recipe must be in the vector loop header");
unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
Value *EntryPart =
PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
State.set(this, EntryPart, Part);
}
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
VPValue *StartVPV = getStartValue();
Value *StartV = StartVPV->getLiveInIRValue();
Value *Iden = nullptr;
RecurKind RK = RdxDesc.getRecurrenceKind();
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
if (ScalarPHI) {
Iden = StartV;
} else {
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
Builder.SetInsertPoint(VectorPH->getTerminator());
StartV = Iden =
Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
}
} else {
Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
RdxDesc.getFastMathFlags());
if (!ScalarPHI) {
Iden = Builder.CreateVectorSplat(State.VF, Iden);
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
Builder.SetInsertPoint(VectorPH->getTerminator());
Constant *Zero = Builder.getInt32(0);
StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
}
}
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
Value *EntryPart = State.get(this, Part);
Value *StartVal = (Part == 0) ? StartV : Iden;
cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
}
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-REDUCTION-PHI ";
printAsOperand(O, SlotTracker);
O << " = phi ";
printOperands(O, SlotTracker);
}
#endif
void VPWidenPHIRecipe::execute(VPTransformState &State) {
assert(EnableVPlanNativePath &&
"Non-native vplans are not expected to have VPWidenPHIRecipes.");
VPBasicBlock *Parent = getParent();
VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion();
unsigned StartIdx = 0;
if (LoopRegion->getEntryBasicBlock() == Parent) {
for (unsigned I = 0; I < getNumOperands(); ++I) {
if (getIncomingBlock(I) ==
LoopRegion->getSinglePredecessor()->getExitingBasicBlock())
StartIdx = I;
}
}
Value *Op0 = State.get(getOperand(StartIdx), 0);
Type *VecTy = Op0->getType();
Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
State.set(this, VecPhi, 0);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-PHI ";
auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
if (getNumOperands() != OriginalPhi->getNumOperands()) {
O << VPlanIngredient(OriginalPhi);
return;
}
printAsOperand(O, SlotTracker);
O << " = phi ";
printOperands(O, SlotTracker);
}
#endif
void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
Value *StartMask = State.get(getOperand(0), Part);
PHINode *EntryPart =
State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
EntryPart->addIncoming(StartMask, VectorPH);
EntryPart->setDebugLoc(DL);
State.set(this, EntryPart, Part);
}
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "ACTIVE-LANE-MASK-PHI ";
printAsOperand(O, SlotTracker);
O << " = phi ";
printOperands(O, SlotTracker);
}
#endif