#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "legalizer"
using namespace llvm;
using namespace LegalizeActions;
using namespace MIPatternMatch;
static std::pair<int, int>
getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
assert(!LeftoverTy.isValid() && "this is an out argument");
unsigned Size = OrigTy.getSizeInBits();
unsigned NarrowSize = NarrowTy.getSizeInBits();
unsigned NumParts = Size / NarrowSize;
unsigned LeftoverSize = Size - NumParts * NarrowSize;
assert(Size > NarrowSize);
if (LeftoverSize == 0)
return {NumParts, 0};
if (NarrowTy.isVector()) {
unsigned EltSize = OrigTy.getScalarSizeInBits();
if (LeftoverSize % EltSize != 0)
return {-1, -1};
LeftoverTy = LLT::scalarOrVector(
ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
} else {
LeftoverTy = LLT::scalar(LeftoverSize);
}
int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
return std::make_pair(NumParts, NumLeftover);
}
static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
if (!Ty.isScalar())
return nullptr;
switch (Ty.getSizeInBits()) {
case 16:
return Type::getHalfTy(Ctx);
case 32:
return Type::getFloatTy(Ctx);
case 64:
return Type::getDoubleTy(Ctx);
case 80:
return Type::getX86_FP80Ty(Ctx);
case 128:
return Type::getFP128Ty(Ctx);
default:
return nullptr;
}
}
LegalizerHelper::LegalizerHelper(MachineFunction &MF,
GISelChangeObserver &Observer,
MachineIRBuilder &Builder)
: MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
LI(*MF.getSubtarget().getLegalizerInfo()),
TLI(*MF.getSubtarget().getTargetLowering()) { }
LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
GISelChangeObserver &Observer,
MachineIRBuilder &B)
: MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
TLI(*MF.getSubtarget().getTargetLowering()) { }
LegalizerHelper::LegalizeResult
LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
LostDebugLocObserver &LocObserver) {
LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
MIRBuilder.setInstrAndDebugLoc(MI);
if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
auto Step = LI.getAction(MI, MRI);
switch (Step.Action) {
case Legal:
LLVM_DEBUG(dbgs() << ".. Already legal\n");
return AlreadyLegal;
case Libcall:
LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
return libcall(MI, LocObserver);
case NarrowScalar:
LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
return narrowScalar(MI, Step.TypeIdx, Step.NewType);
case WidenScalar:
LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
return widenScalar(MI, Step.TypeIdx, Step.NewType);
case Bitcast:
LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
return bitcast(MI, Step.TypeIdx, Step.NewType);
case Lower:
LLVM_DEBUG(dbgs() << ".. Lower\n");
return lower(MI, Step.TypeIdx, Step.NewType);
case FewerElements:
LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
case MoreElements:
LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
case Custom:
LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
default:
LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
return UnableToLegalize;
}
}
void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
SmallVectorImpl<Register> &VRegs) {
for (int i = 0; i < NumParts; ++i)
VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
MIRBuilder.buildUnmerge(VRegs, Reg);
}
bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
LLT MainTy, LLT &LeftoverTy,
SmallVectorImpl<Register> &VRegs,
SmallVectorImpl<Register> &LeftoverRegs) {
assert(!LeftoverTy.isValid() && "this is an out argument");
unsigned RegSize = RegTy.getSizeInBits();
unsigned MainSize = MainTy.getSizeInBits();
unsigned NumParts = RegSize / MainSize;
unsigned LeftoverSize = RegSize - NumParts * MainSize;
if (LeftoverSize == 0) {
for (unsigned I = 0; I < NumParts; ++I)
VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
MIRBuilder.buildUnmerge(VRegs, Reg);
return true;
}
if (MainTy.isVector()) {
SmallVector<Register, 8> RegPieces;
extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
VRegs.push_back(RegPieces[i]);
LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
LeftoverTy = MRI.getType(LeftoverRegs[0]);
return true;
}
LeftoverTy = LLT::scalar(LeftoverSize);
for (unsigned I = 0; I != NumParts; ++I) {
Register NewReg = MRI.createGenericVirtualRegister(MainTy);
VRegs.push_back(NewReg);
MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
}
for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
Offset += LeftoverSize) {
Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
LeftoverRegs.push_back(NewReg);
MIRBuilder.buildExtract(NewReg, Reg, Offset);
}
return true;
}
void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
SmallVectorImpl<Register> &VRegs) {
LLT RegTy = MRI.getType(Reg);
assert(RegTy.isVector() && "Expected a vector type");
LLT EltTy = RegTy.getElementType();
LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
unsigned RegNumElts = RegTy.getNumElements();
unsigned LeftoverNumElts = RegNumElts % NumElts;
unsigned NumNarrowTyPieces = RegNumElts / NumElts;
if (LeftoverNumElts == 0)
return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
SmallVector<Register, 8> Elts;
extractParts(Reg, EltTy, RegNumElts, Elts);
unsigned Offset = 0;
for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
}
if (LeftoverNumElts == 1) {
VRegs.push_back(Elts[Offset]);
} else {
LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0));
}
}
void LegalizerHelper::insertParts(Register DstReg,
LLT ResultTy, LLT PartTy,
ArrayRef<Register> PartRegs,
LLT LeftoverTy,
ArrayRef<Register> LeftoverRegs) {
if (!LeftoverTy.isValid()) {
assert(LeftoverRegs.empty());
if (!ResultTy.isVector()) {
MIRBuilder.buildMerge(DstReg, PartRegs);
return;
}
if (PartTy.isVector())
MIRBuilder.buildConcatVectors(DstReg, PartRegs);
else
MIRBuilder.buildBuildVector(DstReg, PartRegs);
return;
}
if (ResultTy.isVector()) {
assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
SmallVector<Register, 8> AllRegs;
for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
AllRegs.push_back(Reg);
return mergeMixedSubvectors(DstReg, AllRegs);
}
SmallVector<Register> GCDRegs;
LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
extractGCDType(GCDRegs, GCDTy, PartReg);
LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
}
void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
Register Reg) {
LLT Ty = MRI.getType(Reg);
SmallVector<Register, 8> RegElts;
extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
Elts.append(RegElts);
}
void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
ArrayRef<Register> PartRegs) {
SmallVector<Register, 8> AllElts;
for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
appendVectorElts(AllElts, PartRegs[i]);
Register Leftover = PartRegs[PartRegs.size() - 1];
if (MRI.getType(Leftover).isScalar())
AllElts.push_back(Leftover);
else
appendVectorElts(AllElts, Leftover);
MIRBuilder.buildMerge(DstReg, AllElts);
}
static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
const int StartIdx = Regs.size();
const int NumResults = MI.getNumOperands() - 1;
Regs.resize(Regs.size() + NumResults);
for (int I = 0; I != NumResults; ++I)
Regs[StartIdx + I] = MI.getOperand(I).getReg();
}
void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
LLT GCDTy, Register SrcReg) {
LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy == GCDTy) {
Parts.push_back(SrcReg);
} else {
auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
getUnmergeResults(Parts, *Unmerge);
}
}
LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
LLT NarrowTy, Register SrcReg) {
LLT SrcTy = MRI.getType(SrcReg);
LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
extractGCDType(Parts, GCDTy, SrcReg);
return GCDTy;
}
LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
SmallVectorImpl<Register> &VRegs,
unsigned PadStrategy) {
LLT LCMTy = getLCMType(DstTy, NarrowTy);
int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
int NumOrigSrc = VRegs.size();
Register PadReg;
if (NumOrigSrc < NumParts * NumSubParts) {
if (PadStrategy == TargetOpcode::G_ZEXT)
PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
else if (PadStrategy == TargetOpcode::G_ANYEXT)
PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
else {
assert(PadStrategy == TargetOpcode::G_SEXT);
auto ShiftAmt =
MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
}
}
SmallVector<Register, 4> Remerge(NumParts);
SmallVector<Register, 4> SubMerge(NumSubParts);
Register AllPadReg;
for (int I = 0; I != NumParts; ++I) {
bool AllMergePartsArePadding = true;
for (int J = 0; J != NumSubParts; ++J) {
int Idx = I * NumSubParts + J;
if (Idx >= NumOrigSrc) {
SubMerge[J] = PadReg;
continue;
}
SubMerge[J] = VRegs[Idx];
AllMergePartsArePadding = false;
}
if (AllMergePartsArePadding && !AllPadReg) {
if (PadStrategy == TargetOpcode::G_ANYEXT)
AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
else if (PadStrategy == TargetOpcode::G_ZEXT)
AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
}
if (AllPadReg) {
Remerge[I] = AllPadReg;
continue;
}
if (NumSubParts == 1)
Remerge[I] = SubMerge[0];
else
Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
if (AllMergePartsArePadding && !AllPadReg)
AllPadReg = Remerge[I];
}
VRegs = std::move(Remerge);
return LCMTy;
}
void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
ArrayRef<Register> RemergeRegs) {
LLT DstTy = MRI.getType(DstReg);
if (DstTy == LCMTy) {
MIRBuilder.buildMerge(DstReg, RemergeRegs);
return;
}
auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
if (DstTy.isScalar() && LCMTy.isScalar()) {
MIRBuilder.buildTrunc(DstReg, Remerge);
return;
}
if (LCMTy.isVector()) {
unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
SmallVector<Register, 8> UnmergeDefs(NumDefs);
UnmergeDefs[0] = DstReg;
for (unsigned I = 1; I != NumDefs; ++I)
UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
MIRBuilder.buildUnmerge(UnmergeDefs,
MIRBuilder.buildMerge(LCMTy, RemergeRegs));
return;
}
llvm_unreachable("unhandled case");
}
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
#define RTLIBCASE_INT(LibcallPrefix) \
do { \
switch (Size) { \
case 32: \
return RTLIB::LibcallPrefix##32; \
case 64: \
return RTLIB::LibcallPrefix##64; \
case 128: \
return RTLIB::LibcallPrefix##128; \
default: \
llvm_unreachable("unexpected size"); \
} \
} while (0)
#define RTLIBCASE(LibcallPrefix) \
do { \
switch (Size) { \
case 32: \
return RTLIB::LibcallPrefix##32; \
case 64: \
return RTLIB::LibcallPrefix##64; \
case 80: \
return RTLIB::LibcallPrefix##80; \
case 128: \
return RTLIB::LibcallPrefix##128; \
default: \
llvm_unreachable("unexpected size"); \
} \
} while (0)
switch (Opcode) {
case TargetOpcode::G_SDIV:
RTLIBCASE_INT(SDIV_I);
case TargetOpcode::G_UDIV:
RTLIBCASE_INT(UDIV_I);
case TargetOpcode::G_SREM:
RTLIBCASE_INT(SREM_I);
case TargetOpcode::G_UREM:
RTLIBCASE_INT(UREM_I);
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
RTLIBCASE_INT(CTLZ_I);
case TargetOpcode::G_FADD:
RTLIBCASE(ADD_F);
case TargetOpcode::G_FSUB:
RTLIBCASE(SUB_F);
case TargetOpcode::G_FMUL:
RTLIBCASE(MUL_F);
case TargetOpcode::G_FDIV:
RTLIBCASE(DIV_F);
case TargetOpcode::G_FEXP:
RTLIBCASE(EXP_F);
case TargetOpcode::G_FEXP2:
RTLIBCASE(EXP2_F);
case TargetOpcode::G_FREM:
RTLIBCASE(REM_F);
case TargetOpcode::G_FPOW:
RTLIBCASE(POW_F);
case TargetOpcode::G_FMA:
RTLIBCASE(FMA_F);
case TargetOpcode::G_FSIN:
RTLIBCASE(SIN_F);
case TargetOpcode::G_FCOS:
RTLIBCASE(COS_F);
case TargetOpcode::G_FLOG10:
RTLIBCASE(LOG10_F);
case TargetOpcode::G_FLOG:
RTLIBCASE(LOG_F);
case TargetOpcode::G_FLOG2:
RTLIBCASE(LOG2_F);
case TargetOpcode::G_FCEIL:
RTLIBCASE(CEIL_F);
case TargetOpcode::G_FFLOOR:
RTLIBCASE(FLOOR_F);
case TargetOpcode::G_FMINNUM:
RTLIBCASE(FMIN_F);
case TargetOpcode::G_FMAXNUM:
RTLIBCASE(FMAX_F);
case TargetOpcode::G_FSQRT:
RTLIBCASE(SQRT_F);
case TargetOpcode::G_FRINT:
RTLIBCASE(RINT_F);
case TargetOpcode::G_FNEARBYINT:
RTLIBCASE(NEARBYINT_F);
case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
RTLIBCASE(ROUNDEVEN_F);
}
llvm_unreachable("Unknown libcall function");
}
static bool isLibCallInTailPosition(MachineInstr &MI,
const TargetInstrInfo &TII,
MachineRegisterInfo &MRI) {
MachineBasicBlock &MBB = *MI.getParent();
const Function &F = MBB.getParent()->getFunction();
AttributeList CallerAttrs = F.getAttributes();
if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
.removeAttribute(Attribute::NoAlias)
.removeAttribute(Attribute::NonNull)
.hasAttributes())
return false;
if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
CallerAttrs.hasRetAttr(Attribute::SExt))
return false;
auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
if (Next != MBB.instr_end() && Next->isCopy()) {
switch (MI.getOpcode()) {
default:
llvm_unreachable("unsupported opcode");
case TargetOpcode::G_BZERO:
return false;
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMMOVE:
case TargetOpcode::G_MEMSET:
break;
}
Register VReg = MI.getOperand(0).getReg();
if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
return false;
Register PReg = Next->getOperand(0).getReg();
if (!PReg.isPhysical())
return false;
auto Ret = next_nodbg(Next, MBB.instr_end());
if (Ret == MBB.instr_end() || !Ret->isReturn())
return false;
if (Ret->getNumImplicitOperands() != 1)
return false;
if (PReg != Ret->getOperand(0).getReg())
return false;
Next = Ret;
}
if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
return false;
return true;
}
LegalizerHelper::LegalizeResult
llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
const CallLowering::ArgInfo &Result,
ArrayRef<CallLowering::ArgInfo> Args,
const CallingConv::ID CC) {
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
CallLowering::CallLoweringInfo Info;
Info.CallConv = CC;
Info.Callee = MachineOperand::CreateES(Name);
Info.OrigRet = Result;
std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
if (!CLI.lowerCall(MIRBuilder, Info))
return LegalizerHelper::UnableToLegalize;
return LegalizerHelper::Legalized;
}
LegalizerHelper::LegalizeResult
llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
const CallLowering::ArgInfo &Result,
ArrayRef<CallLowering::ArgInfo> Args) {
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
const char *Name = TLI.getLibcallName(Libcall);
const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
return createLibcall(MIRBuilder, Name, Result, Args, CC);
}
static LegalizerHelper::LegalizeResult
simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
Type *OpType) {
auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
SmallVector<CallLowering::ArgInfo, 3> Args;
for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
Args.push_back({MO.getReg(), OpType, 0});
return createLibcall(MIRBuilder, Libcall,
{MI.getOperand(0).getReg(), OpType, 0}, Args);
}
LegalizerHelper::LegalizeResult
llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstr &MI, LostDebugLocObserver &LocObserver) {
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
SmallVector<CallLowering::ArgInfo, 3> Args;
for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
Register Reg = MI.getOperand(i).getReg();
LLT OpLLT = MRI.getType(Reg);
Type *OpTy = nullptr;
if (OpLLT.isPointer())
OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
else
OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
Args.push_back({Reg, OpTy, 0});
}
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
RTLIB::Libcall RTLibcall;
unsigned Opc = MI.getOpcode();
switch (Opc) {
case TargetOpcode::G_BZERO:
RTLibcall = RTLIB::BZERO;
break;
case TargetOpcode::G_MEMCPY:
RTLibcall = RTLIB::MEMCPY;
Args[0].Flags[0].setReturned();
break;
case TargetOpcode::G_MEMMOVE:
RTLibcall = RTLIB::MEMMOVE;
Args[0].Flags[0].setReturned();
break;
case TargetOpcode::G_MEMSET:
RTLibcall = RTLIB::MEMSET;
Args[0].Flags[0].setReturned();
break;
default:
llvm_unreachable("unsupported opcode");
}
const char *Name = TLI.getLibcallName(RTLibcall);
if (!Name) {
LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
<< MIRBuilder.getTII().getName(Opc) << "\n");
return LegalizerHelper::UnableToLegalize;
}
CallLowering::CallLoweringInfo Info;
Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
Info.Callee = MachineOperand::CreateES(Name);
Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
if (!CLI.lowerCall(MIRBuilder, Info))
return LegalizerHelper::UnableToLegalize;
if (Info.LoweredTailCall) {
assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
LocObserver.checkpoint(true);
do {
MachineInstr *Next = MI.getNextNode();
assert(Next &&
(Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
"Expected instr following MI to be return or debug inst?");
Next->eraseFromParent();
} while (MI.getNextNode());
LocObserver.checkpoint(false);
}
return LegalizerHelper::Legalized;
}
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
Type *FromType) {
auto ToMVT = MVT::getVT(ToType);
auto FromMVT = MVT::getVT(FromType);
switch (Opcode) {
case TargetOpcode::G_FPEXT:
return RTLIB::getFPEXT(FromMVT, ToMVT);
case TargetOpcode::G_FPTRUNC:
return RTLIB::getFPROUND(FromMVT, ToMVT);
case TargetOpcode::G_FPTOSI:
return RTLIB::getFPTOSINT(FromMVT, ToMVT);
case TargetOpcode::G_FPTOUI:
return RTLIB::getFPTOUINT(FromMVT, ToMVT);
case TargetOpcode::G_SITOFP:
return RTLIB::getSINTTOFP(FromMVT, ToMVT);
case TargetOpcode::G_UITOFP:
return RTLIB::getUINTTOFP(FromMVT, ToMVT);
}
llvm_unreachable("Unsupported libcall function");
}
static LegalizerHelper::LegalizeResult
conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
Type *FromType) {
RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
return createLibcall(MIRBuilder, Libcall,
{MI.getOperand(0).getReg(), ToType, 0},
{{MI.getOperand(1).getReg(), FromType, 0}});
}
LegalizerHelper::LegalizeResult
LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
unsigned Size = LLTy.getSizeInBits();
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UDIV:
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM:
case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
Type *HLTy = IntegerType::get(Ctx, Size);
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
if (Status != Legalized)
return Status;
break;
}
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FMA:
case TargetOpcode::G_FPOW:
case TargetOpcode::G_FREM:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FEXP2:
case TargetOpcode::G_FCEIL:
case TargetOpcode::G_FFLOOR:
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FRINT:
case TargetOpcode::G_FNEARBYINT:
case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
return UnableToLegalize;
}
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
if (Status != Legalized)
return Status;
break;
}
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC: {
Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
if (!FromTy || !ToTy)
return UnableToLegalize;
LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
if (Status != Legalized)
return Status;
break;
}
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI: {
unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
return UnableToLegalize;
LegalizeResult Status = conversionLibcall(
MI, MIRBuilder,
ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
if (Status != Legalized)
return Status;
break;
}
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP: {
unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
return UnableToLegalize;
LegalizeResult Status = conversionLibcall(
MI, MIRBuilder,
ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
if (Status != Legalized)
return Status;
break;
}
case TargetOpcode::G_BZERO:
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMMOVE:
case TargetOpcode::G_MEMSET: {
LegalizeResult Result =
createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
if (Result != Legalized)
return Result;
MI.eraseFromParent();
return Result;
}
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowTy) {
uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
uint64_t NarrowSize = NarrowTy.getSizeInBits();
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
case TargetOpcode::G_IMPLICIT_DEF: {
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
if (SizeOp0 % NarrowSize != 0) {
LLT ImplicitTy = NarrowTy;
if (DstTy.isVector())
ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
MI.eraseFromParent();
return Legalized;
}
int NumParts = SizeOp0 / NarrowSize;
SmallVector<Register, 2> DstRegs;
for (int i = 0; i < NumParts; ++i)
DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
if (DstTy.isVector())
MIRBuilder.buildBuildVector(DstReg, DstRegs);
else
MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_CONSTANT: {
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
const APInt &Val = MI.getOperand(1).getCImm()->getValue();
unsigned TotalSize = Ty.getSizeInBits();
unsigned NarrowSize = NarrowTy.getSizeInBits();
int NumParts = TotalSize / NarrowSize;
SmallVector<Register, 4> PartRegs;
for (int I = 0; I != NumParts; ++I) {
unsigned Offset = I * NarrowSize;
auto K = MIRBuilder.buildConstant(NarrowTy,
Val.lshr(Offset).trunc(NarrowSize));
PartRegs.push_back(K.getReg(0));
}
LLT LeftoverTy;
unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
SmallVector<Register, 1> LeftoverRegs;
if (LeftoverBits != 0) {
LeftoverTy = LLT::scalar(LeftoverBits);
auto K = MIRBuilder.buildConstant(
LeftoverTy,
Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
LeftoverRegs.push_back(K.getReg(0));
}
insertParts(MI.getOperand(0).getReg(),
Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
return narrowScalarExt(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_TRUNC: {
if (TypeIdx != 1)
return UnableToLegalize;
uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
return UnableToLegalize;
}
auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_FREEZE: {
if (TypeIdx != 0)
return UnableToLegalize;
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
return UnableToLegalize;
auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
SmallVector<Register, 8> Parts;
for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
Parts.push_back(
MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
}
MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_ADD:
case TargetOpcode::G_SUB:
case TargetOpcode::G_SADDO:
case TargetOpcode::G_SSUBO:
case TargetOpcode::G_SADDE:
case TargetOpcode::G_SSUBE:
case TargetOpcode::G_UADDO:
case TargetOpcode::G_USUBO:
case TargetOpcode::G_UADDE:
case TargetOpcode::G_USUBE:
return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_MUL:
case TargetOpcode::G_UMULH:
return narrowScalarMul(MI, NarrowTy);
case TargetOpcode::G_EXTRACT:
return narrowScalarExtract(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_INSERT:
return narrowScalarInsert(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_LOAD: {
auto &LoadMI = cast<GLoad>(MI);
Register DstReg = LoadMI.getDstReg();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isVector())
return UnableToLegalize;
if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
MIRBuilder.buildAnyExt(DstReg, TmpReg);
LoadMI.eraseFromParent();
return Legalized;
}
return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
}
case TargetOpcode::G_ZEXTLOAD:
case TargetOpcode::G_SEXTLOAD: {
auto &LoadMI = cast<GExtLoad>(MI);
Register DstReg = LoadMI.getDstReg();
Register PtrReg = LoadMI.getPointerReg();
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
auto &MMO = LoadMI.getMMO();
unsigned MemSize = MMO.getSizeInBits();
if (MemSize == NarrowSize) {
MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
} else if (MemSize < NarrowSize) {
MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
} else if (MemSize > NarrowSize) {
return UnableToLegalize;
}
if (isa<GZExtLoad>(LoadMI))
MIRBuilder.buildZExt(DstReg, TmpReg);
else
MIRBuilder.buildSExt(DstReg, TmpReg);
LoadMI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_STORE: {
auto &StoreMI = cast<GStore>(MI);
Register SrcReg = StoreMI.getValueReg();
LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy.isVector())
return UnableToLegalize;
int NumParts = SizeOp0 / NarrowSize;
unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
if (SrcTy.isVector() && LeftoverBits != 0)
return UnableToLegalize;
if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.buildTrunc(TmpReg, SrcReg);
MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
StoreMI.eraseFromParent();
return Legalized;
}
return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
}
case TargetOpcode::G_SELECT:
return narrowScalarSelect(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR: {
return narrowScalarBasic(MI, TypeIdx, NarrowTy);
}
case TargetOpcode::G_SHL:
case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR:
return narrowScalarShift(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTPOP:
if (TypeIdx == 1)
switch (MI.getOpcode()) {
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_CTPOP:
return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
default:
return UnableToLegalize;
}
Observer.changingInstr(MI);
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_INTTOPTR:
if (TypeIdx != 1)
return UnableToLegalize;
Observer.changingInstr(MI);
narrowScalarSrc(MI, NarrowTy, 1);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PTRTOINT:
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PHI: {
if (SizeOp0 % NarrowSize != 0)
return UnableToLegalize;
unsigned NumParts = SizeOp0 / NarrowSize;
SmallVector<Register, 2> DstRegs(NumParts);
SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
Observer.changingInstr(MI);
for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
SrcRegs[i / 2]);
}
MachineBasicBlock &MBB = *MI.getParent();
MIRBuilder.setInsertPt(MBB, MI);
for (unsigned i = 0; i < NumParts; ++i) {
DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
MachineInstrBuilder MIB =
MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
}
MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
Observer.changedInstr(MI);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
case TargetOpcode::G_INSERT_VECTOR_ELT: {
if (TypeIdx != 2)
return UnableToLegalize;
int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
Observer.changingInstr(MI);
narrowScalarSrc(MI, NarrowTy, OpIdx);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_ICMP: {
Register LHS = MI.getOperand(2).getReg();
LLT SrcTy = MRI.getType(LHS);
uint64_t SrcSize = SrcTy.getSizeInBits();
CmpInst::Predicate Pred =
static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
return UnableToLegalize;
LLT LeftoverTy; SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
LHSLeftoverRegs))
return UnableToLegalize;
LLT Unused; SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
RHSPartRegs, RHSLeftoverRegs))
return UnableToLegalize;
Register Dst = MI.getOperand(0).getReg();
LLT ResTy = MRI.getType(Dst);
if (ICmpInst::isEquality(Pred)) {
auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
SmallVector<Register, 4> Xors;
for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
auto LHS = std::get<0>(LHSAndRHS);
auto RHS = std::get<1>(LHSAndRHS);
auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
Xors.push_back(Xor);
}
SmallVector<Register, 4> WidenedXors;
for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
auto LHS = std::get<0>(LHSAndRHS);
auto RHS = std::get<1>(LHSAndRHS);
auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
TargetOpcode::G_ZEXT);
Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
}
assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
for (unsigned I = 2, E = Xors.size(); I < E; ++I)
Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
} else {
assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
Register LHSL = LHSPartRegs[0];
Register LHSH = LHSPartRegs[1];
Register RHSL = RHSPartRegs[0];
Register RHSH = RHSPartRegs[1];
MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpHEQ =
MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
}
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_SEXT_INREG: {
if (TypeIdx != 0)
return UnableToLegalize;
int64_t SizeInBits = MI.getOperand(2).getImm();
if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
Observer.changingInstr(MI);
MachineOperand &MO1 = MI.getOperand(1);
auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
MO1.setReg(TruncMIB.getReg(0));
MachineOperand &MO2 = MI.getOperand(0);
Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
MIRBuilder.buildSExt(MO2, DstExt);
MO2.setReg(DstExt);
Observer.changedInstr(MI);
return Legalized;
}
if (SizeOp0 % NarrowSize != 0)
return UnableToLegalize;
int NumParts = SizeOp0 / NarrowSize;
SmallVector<Register, 2> DstRegs;
SmallVector<Register, 2> SrcRegs;
for (int i = 0; i < NumParts; ++i) {
Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
SrcRegs.push_back(SrcReg);
}
MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
Register AshrCstReg =
MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
.getReg(0);
Register FullExtensionReg = 0;
Register PartialExtensionReg = 0;
for (int i = 0; i < NumParts; ++i) {
if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
DstRegs.push_back(SrcRegs[i]);
else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
assert(PartialExtensionReg &&
"Expected to visit partial extension before full");
if (FullExtensionReg) {
DstRegs.push_back(FullExtensionReg);
continue;
}
DstRegs.push_back(
MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
.getReg(0));
FullExtensionReg = DstRegs.back();
} else {
DstRegs.push_back(
MIRBuilder
.buildInstr(
TargetOpcode::G_SEXT_INREG, {NarrowTy},
{SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
.getReg(0));
PartialExtensionReg = DstRegs.back();
}
}
Register DstReg = MI.getOperand(0).getReg();
MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_BSWAP:
case TargetOpcode::G_BITREVERSE: {
if (SizeOp0 % NarrowSize != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
SmallVector<Register, 2> SrcRegs, DstRegs;
unsigned NumParts = SizeOp0 / NarrowSize;
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
for (unsigned i = 0; i < NumParts; ++i) {
auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
{SrcRegs[NumParts - 1 - i]});
DstRegs.push_back(DstPart.getReg(0));
}
MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
Observer.changedInstr(MI);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_PTR_ADD:
case TargetOpcode::G_PTRMASK: {
if (TypeIdx != 1)
return UnableToLegalize;
Observer.changingInstr(MI);
narrowScalarSrc(MI, NarrowTy, 2);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_FPTOSI:
return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_FPEXT:
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
Observer.changedInstr(MI);
return Legalized;
}
}
Register LegalizerHelper::coerceToScalar(Register Val) {
LLT Ty = MRI.getType(Val);
if (Ty.isScalar())
return Val;
const DataLayout &DL = MIRBuilder.getDataLayout();
LLT NewTy = LLT::scalar(Ty.getSizeInBits());
if (Ty.isPointer()) {
if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
return Register();
return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
}
Register NewVal = Val;
assert(Ty.isVector());
LLT EltTy = Ty.getElementType();
if (EltTy.isPointer())
NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
}
void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
unsigned OpIdx, unsigned ExtOpcode) {
MachineOperand &MO = MI.getOperand(OpIdx);
auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
MO.setReg(ExtB.getReg(0));
}
void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
MO.setReg(ExtB.getReg(0));
}
void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
unsigned OpIdx, unsigned TruncOpcode) {
MachineOperand &MO = MI.getOperand(OpIdx);
Register DstExt = MRI.createGenericVirtualRegister(WideTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
MO.setReg(DstExt);
}
void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
unsigned OpIdx, unsigned ExtOpcode) {
MachineOperand &MO = MI.getOperand(OpIdx);
Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
MO.setReg(DstTrunc);
}
void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
Register Dst = MO.getReg();
Register DstExt = MRI.createGenericVirtualRegister(WideTy);
MO.setReg(DstExt);
MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
}
void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
SmallVector<Register, 8> Regs;
MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
}
void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
MachineOperand &Op = MI.getOperand(OpIdx);
Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
}
void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
Register CastDst = MRI.createGenericVirtualRegister(CastTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
MIRBuilder.buildBitcast(MO, CastDst);
MO.setReg(CastDst);
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
if (TypeIdx != 1)
return UnableToLegalize;
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isVector())
return UnableToLegalize;
Register Src1 = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(Src1);
const int DstSize = DstTy.getSizeInBits();
const int SrcSize = SrcTy.getSizeInBits();
const int WideSize = WideTy.getSizeInBits();
const int NumMerge = (DstSize + WideSize - 1) / WideSize;
unsigned NumOps = MI.getNumOperands();
unsigned NumSrc = MI.getNumOperands() - 1;
unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
if (WideSize >= DstSize) {
Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
for (unsigned I = 2; I != NumOps; ++I) {
const unsigned Offset = (I - 1) * PartSize;
Register SrcReg = MI.getOperand(I).getReg();
assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
MRI.createGenericVirtualRegister(WideTy);
auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
MIRBuilder.buildOr(NextResult, ResultReg, Shl);
ResultReg = NextResult;
}
if (WideSize > DstSize)
MIRBuilder.buildTrunc(DstReg, ResultReg);
else if (DstTy.isPointer())
MIRBuilder.buildIntToPtr(DstReg, ResultReg);
MI.eraseFromParent();
return Legalized;
}
const int GCD = greatestCommonDivisor(SrcSize, WideSize);
LLT GCDTy = LLT::scalar(GCD);
SmallVector<Register, 8> Parts;
SmallVector<Register, 8> NewMergeRegs;
SmallVector<Register, 8> Unmerges;
LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
Register SrcReg = MO.getReg();
if (GCD == SrcSize) {
Unmerges.push_back(SrcReg);
} else {
auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
Unmerges.push_back(Unmerge.getReg(J));
}
}
if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
Unmerges.push_back(UndefReg);
}
const int PartsPerGCD = WideSize / GCD;
ArrayRef<Register> Slicer(Unmerges);
for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
NewMergeRegs.push_back(Merge.getReg(0));
}
if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
MIRBuilder.buildMerge(DstReg, NewMergeRegs);
} else {
auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
if (TypeIdx != 0)
return UnableToLegalize;
int NumDst = MI.getNumOperands() - 1;
Register SrcReg = MI.getOperand(NumDst).getReg();
LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy.isVector())
return UnableToLegalize;
Register Dst0Reg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(Dst0Reg);
if (!DstTy.isScalar())
return UnableToLegalize;
if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
if (SrcTy.isPointer()) {
const DataLayout &DL = MIRBuilder.getDataLayout();
if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
LLVM_DEBUG(
dbgs() << "Not casting non-integral address space integer\n");
return UnableToLegalize;
}
SrcTy = LLT::scalar(SrcTy.getSizeInBits());
SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
}
if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
SrcTy = WideTy;
SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
}
unsigned DstSize = DstTy.getSizeInBits();
MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
for (int I = 1; I != NumDst; ++I) {
auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
}
MI.eraseFromParent();
return Legalized;
}
LLT LCMTy = getLCMType(SrcTy, WideTy);
Register WideSrc = SrcReg;
if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
if (SrcTy.isPointer()) {
LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
return UnableToLegalize;
}
WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
}
auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
const LLT GCDTy = getGCDType(WideTy, DstTy);
const int NumUnmerge = Unmerge->getNumOperands() - 1;
const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
if (PartsPerRemerge == 1) {
const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
for (int I = 0; I != NumUnmerge; ++I) {
auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
for (int J = 0; J != PartsPerUnmerge; ++J) {
int Idx = I * PartsPerUnmerge + J;
if (Idx < NumDst)
MIB.addDef(MI.getOperand(Idx).getReg());
else {
MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
}
}
MIB.addUse(Unmerge.getReg(I));
}
} else {
SmallVector<Register, 16> Parts;
for (int J = 0; J != NumUnmerge; ++J)
extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
SmallVector<Register, 8> RemergeParts;
for (int I = 0; I != NumDst; ++I) {
for (int J = 0; J < PartsPerRemerge; ++J) {
const int Idx = I * PartsPerRemerge + J;
RemergeParts.emplace_back(Parts[Idx]);
}
MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
RemergeParts.clear();
}
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
LLT DstTy = MRI.getType(DstReg);
unsigned Offset = MI.getOperand(2).getImm();
if (TypeIdx == 0) {
if (SrcTy.isVector() || DstTy.isVector())
return UnableToLegalize;
SrcOp Src(SrcReg);
if (SrcTy.isPointer()) {
const DataLayout &DL = MIRBuilder.getDataLayout();
if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
return UnableToLegalize;
LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
SrcTy = SrcAsIntTy;
}
if (DstTy.isPointer())
return UnableToLegalize;
if (Offset == 0) {
MIRBuilder.buildTrunc(DstReg,
MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
MI.eraseFromParent();
return Legalized;
}
LLT ShiftTy = SrcTy;
if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
Src = MIRBuilder.buildAnyExt(WideTy, Src);
ShiftTy = WideTy;
}
auto LShr = MIRBuilder.buildLShr(
ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
MIRBuilder.buildTrunc(DstReg, LShr);
MI.eraseFromParent();
return Legalized;
}
if (SrcTy.isScalar()) {
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
Observer.changedInstr(MI);
return Legalized;
}
if (!SrcTy.isVector())
return UnableToLegalize;
if (DstTy != SrcTy.getElementType())
return UnableToLegalize;
if (Offset % SrcTy.getScalarSizeInBits() != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
Offset);
widenScalarDst(MI, WideTy.getScalarType(), 0);
Observer.changedInstr(MI);
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
if (TypeIdx != 0 || WideTy.isVector())
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
unsigned Opcode;
unsigned ExtOpcode;
Optional<Register> CarryIn = None;
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode!");
case TargetOpcode::G_SADDO:
Opcode = TargetOpcode::G_ADD;
ExtOpcode = TargetOpcode::G_SEXT;
break;
case TargetOpcode::G_SSUBO:
Opcode = TargetOpcode::G_SUB;
ExtOpcode = TargetOpcode::G_SEXT;
break;
case TargetOpcode::G_UADDO:
Opcode = TargetOpcode::G_ADD;
ExtOpcode = TargetOpcode::G_ZEXT;
break;
case TargetOpcode::G_USUBO:
Opcode = TargetOpcode::G_SUB;
ExtOpcode = TargetOpcode::G_ZEXT;
break;
case TargetOpcode::G_SADDE:
Opcode = TargetOpcode::G_UADDE;
ExtOpcode = TargetOpcode::G_SEXT;
CarryIn = MI.getOperand(4).getReg();
break;
case TargetOpcode::G_SSUBE:
Opcode = TargetOpcode::G_USUBE;
ExtOpcode = TargetOpcode::G_SEXT;
CarryIn = MI.getOperand(4).getReg();
break;
case TargetOpcode::G_UADDE:
Opcode = TargetOpcode::G_UADDE;
ExtOpcode = TargetOpcode::G_ZEXT;
CarryIn = MI.getOperand(4).getReg();
break;
case TargetOpcode::G_USUBE:
Opcode = TargetOpcode::G_USUBE;
ExtOpcode = TargetOpcode::G_ZEXT;
CarryIn = MI.getOperand(4).getReg();
break;
}
if (TypeIdx == 1) {
unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
Observer.changingInstr(MI);
widenScalarDst(MI, WideTy, 1);
if (CarryIn)
widenScalarSrc(MI, WideTy, 4, BoolExtOp);
Observer.changedInstr(MI);
return Legalized;
}
auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
Register NewOp;
if (CarryIn) {
LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
NewOp = MIRBuilder
.buildInstr(Opcode, {WideTy, CarryOutTy},
{LHSExt, RHSExt, *CarryIn})
.getReg(0);
} else {
NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
}
LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
MI.getOpcode() == TargetOpcode::G_USHLSAT;
Register DstReg = MI.getOperand(0).getReg();
unsigned NewBits = WideTy.getScalarSizeInBits();
unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
: MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
{ShiftL, ShiftR}, MI.getFlags());
auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
: MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
MIRBuilder.buildTrunc(DstReg, Result);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
if (TypeIdx == 1) {
Observer.changingInstr(MI);
widenScalarDst(MI, WideTy, 1);
Observer.changedInstr(MI);
return Legalized;
}
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
Register Result = MI.getOperand(0).getReg();
Register OriginalOverflow = MI.getOperand(1).getReg();
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
LLT SrcTy = MRI.getType(LHS);
LLT OverflowTy = MRI.getType(OriginalOverflow);
unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
{LeftOperand, RightOperand});
auto Mul = Mulo->getOperand(0);
MIRBuilder.buildTrunc(Result, Mul);
MachineInstrBuilder ExtResult;
if (IsSigned) {
ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
} else {
ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
}
if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
auto Overflow =
MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
} else {
MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
case TargetOpcode::G_ATOMICRMW_XCHG:
case TargetOpcode::G_ATOMICRMW_ADD:
case TargetOpcode::G_ATOMICRMW_SUB:
case TargetOpcode::G_ATOMICRMW_AND:
case TargetOpcode::G_ATOMICRMW_OR:
case TargetOpcode::G_ATOMICRMW_XOR:
case TargetOpcode::G_ATOMICRMW_MIN:
case TargetOpcode::G_ATOMICRMW_MAX:
case TargetOpcode::G_ATOMICRMW_UMIN:
case TargetOpcode::G_ATOMICRMW_UMAX:
assert(TypeIdx == 0 && "atomicrmw with second scalar type");
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy, 0);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_ATOMIC_CMPXCHG:
assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy, 0);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
if (TypeIdx == 0) {
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
assert(TypeIdx == 1 &&
"G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
Observer.changingInstr(MI);
widenScalarDst(MI, WideTy, 1);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_EXTRACT:
return widenScalarExtract(MI, TypeIdx, WideTy);
case TargetOpcode::G_INSERT:
return widenScalarInsert(MI, TypeIdx, WideTy);
case TargetOpcode::G_MERGE_VALUES:
return widenScalarMergeValues(MI, TypeIdx, WideTy);
case TargetOpcode::G_UNMERGE_VALUES:
return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
case TargetOpcode::G_SADDO:
case TargetOpcode::G_SSUBO:
case TargetOpcode::G_UADDO:
case TargetOpcode::G_USUBO:
case TargetOpcode::G_SADDE:
case TargetOpcode::G_SSUBE:
case TargetOpcode::G_UADDE:
case TargetOpcode::G_USUBE:
return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
case TargetOpcode::G_UMULO:
case TargetOpcode::G_SMULO:
return widenScalarMulo(MI, TypeIdx, WideTy);
case TargetOpcode::G_SADDSAT:
case TargetOpcode::G_SSUBSAT:
case TargetOpcode::G_SSHLSAT:
case TargetOpcode::G_UADDSAT:
case TargetOpcode::G_USUBSAT:
case TargetOpcode::G_USHLSAT:
return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTPOP: {
if (TypeIdx == 0) {
Observer.changingInstr(MI);
widenScalarDst(MI, WideTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
Register SrcReg = MI.getOperand(1).getReg();
unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
? TargetOpcode::G_ANYEXT
: TargetOpcode::G_ZEXT;
auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
LLT CurTy = MRI.getType(SrcReg);
unsigned NewOpc = MI.getOpcode();
if (NewOpc == TargetOpcode::G_CTTZ) {
auto TopBit =
APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
MIBSrc = MIRBuilder.buildOr(
WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
}
auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
MIBNewOp = MIRBuilder.buildSub(
WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
}
MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_BSWAP: {
Observer.changingInstr(MI);
Register DstReg = MI.getOperand(0).getReg();
Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
Register DstExt = MRI.createGenericVirtualRegister(WideTy);
Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
MI.getOperand(0).setReg(DstExt);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
LLT Ty = MRI.getType(DstReg);
unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
MIRBuilder.buildTrunc(DstReg, ShrReg);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_BITREVERSE: {
Observer.changingInstr(MI);
Register DstReg = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(DstReg);
unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
Register DstExt = MRI.createGenericVirtualRegister(WideTy);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
MI.getOperand(0).setReg(DstExt);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
MIRBuilder.buildTrunc(DstReg, Shift);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_FREEZE:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_ABS:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
case TargetOpcode::G_MUL:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
case TargetOpcode::G_SUB:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SBFX:
case TargetOpcode::G_UBFX:
Observer.changingInstr(MI);
if (TypeIdx == 0) {
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
} else {
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
}
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SHL:
Observer.changingInstr(MI);
if (TypeIdx == 0) {
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
} else {
assert(TypeIdx == 1);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
}
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SDIV:
case TargetOpcode::G_SREM:
case TargetOpcode::G_SMIN:
case TargetOpcode::G_SMAX:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SDIVREM:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
widenScalarDst(MI, WideTy);
widenScalarDst(MI, WideTy, 1);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR:
Observer.changingInstr(MI);
if (TypeIdx == 0) {
unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
widenScalarSrc(MI, WideTy, 1, CvtOp);
widenScalarDst(MI, WideTy);
} else {
assert(TypeIdx == 1);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
}
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_UDIV:
case TargetOpcode::G_UREM:
case TargetOpcode::G_UMIN:
case TargetOpcode::G_UMAX:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_UDIVREM:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
widenScalarDst(MI, WideTy);
widenScalarDst(MI, WideTy, 1);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SELECT:
Observer.changingInstr(MI);
if (TypeIdx == 0) {
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
} else {
bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
}
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
Observer.changingInstr(MI);
if (TypeIdx == 0)
widenScalarDst(MI, WideTy);
else
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SITOFP:
Observer.changingInstr(MI);
if (TypeIdx == 0)
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
else
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_UITOFP:
Observer.changingInstr(MI);
if (TypeIdx == 0)
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
else
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD:
Observer.changingInstr(MI);
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_STORE: {
if (TypeIdx != 0)
return UnableToLegalize;
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
if (!Ty.isScalar())
return UnableToLegalize;
Observer.changingInstr(MI);
unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
widenScalarSrc(MI, WideTy, 0, ExtType);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_CONSTANT: {
MachineOperand &SrcMO = MI.getOperand(1);
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
MRI.getType(MI.getOperand(0).getReg()));
assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
ExtOpc == TargetOpcode::G_ANYEXT) &&
"Illegal Extend");
const APInt &SrcVal = SrcMO.getCImm()->getValue();
const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
? SrcVal.sext(WideTy.getSizeInBits())
: SrcVal.zext(WideTy.getSizeInBits());
Observer.changingInstr(MI);
SrcMO.setCImm(ConstantInt::get(Ctx, Val));
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_FCONSTANT: {
MachineOperand &SrcMO = MI.getOperand(1);
APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
MIRBuilder.setInstrAndDebugLoc(MI);
auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_IMPLICIT_DEF: {
Observer.changingInstr(MI);
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_BRCOND:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_FCMP:
Observer.changingInstr(MI);
if (TypeIdx == 0)
widenScalarDst(MI, WideTy);
else {
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
}
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_ICMP:
Observer.changingInstr(MI);
if (TypeIdx == 0)
widenScalarDst(MI, WideTy);
else {
unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
MI.getOperand(1).getPredicate()))
? TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT;
widenScalarSrc(MI, WideTy, 2, ExtOpcode);
widenScalarSrc(MI, WideTy, 3, ExtOpcode);
}
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PTR_ADD:
assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PHI: {
assert(TypeIdx == 0 && "Expecting only Idx 0");
Observer.changingInstr(MI);
for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
}
MachineBasicBlock &MBB = *MI.getParent();
MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
widenScalarDst(MI, WideTy);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
if (TypeIdx == 0) {
Register VecReg = MI.getOperand(1).getReg();
LLT VecTy = MRI.getType(VecReg);
Observer.changingInstr(MI);
widenScalarSrc(
MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
if (TypeIdx != 2)
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_INSERT_VECTOR_ELT: {
if (TypeIdx == 1) {
Observer.changingInstr(MI);
Register VecReg = MI.getOperand(1).getReg();
LLT VecTy = MRI.getType(VecReg);
LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideVecTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
if (TypeIdx == 2) {
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
}
return UnableToLegalize;
}
case TargetOpcode::G_FADD:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMA:
case TargetOpcode::G_FMAD:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FABS:
case TargetOpcode::G_FCANONICALIZE:
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXNUM_IEEE:
case TargetOpcode::G_FMINIMUM:
case TargetOpcode::G_FMAXIMUM:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FREM:
case TargetOpcode::G_FCEIL:
case TargetOpcode::G_FFLOOR:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
case TargetOpcode::G_FRINT:
case TargetOpcode::G_FNEARBYINT:
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FEXP2:
case TargetOpcode::G_FPOW:
case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_INTRINSIC_ROUND:
case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
assert(TypeIdx == 0);
Observer.changingInstr(MI);
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_FPOWI: {
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_INTTOPTR:
if (TypeIdx != 1)
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PTRTOINT:
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarDst(MI, WideTy, 0);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_BUILD_VECTOR: {
Observer.changingInstr(MI);
const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
if (TypeIdx == 1) {
MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
} else {
widenScalarDst(MI, WideTy, 0);
}
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_SEXT_INREG:
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_PTRMASK: {
if (TypeIdx != 1)
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
}
}
}
static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
MachineIRBuilder &B, Register Src, LLT Ty) {
auto Unmerge = B.buildUnmerge(Ty, Src);
for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
Pieces.push_back(Unmerge.getReg(I));
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitcast(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);
if (SrcTy.isVector()) {
LLT SrcEltTy = SrcTy.getElementType();
SmallVector<Register, 8> SrcRegs;
if (DstTy.isVector()) {
int NumDstElt = DstTy.getNumElements();
int NumSrcElt = SrcTy.getNumElements();
LLT DstEltTy = DstTy.getElementType();
LLT DstCastTy = DstEltTy; LLT SrcPartTy = SrcEltTy;
if (NumSrcElt < NumDstElt) { DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
SrcPartTy = SrcEltTy;
} else if (NumSrcElt > NumDstElt) { SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
DstCastTy = DstEltTy;
}
getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
for (Register &SrcReg : SrcRegs)
SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
} else
getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
MIRBuilder.buildMerge(Dst, SrcRegs);
MI.eraseFromParent();
return Legalized;
}
if (DstTy.isVector()) {
SmallVector<Register, 8> SrcRegs;
getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
MIRBuilder.buildMerge(Dst, SrcRegs);
MI.eraseFromParent();
return Legalized;
}
return UnableToLegalize;
}
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
Register Idx,
unsigned NewEltSize,
unsigned OldEltSize) {
const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
LLT IdxTy = B.getMRI()->getType(Idx);
auto OffsetMask = B.buildConstant(
IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
return B.buildShl(IdxTy, OffsetIdx,
B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
}
LegalizerHelper::LegalizeResult
LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
LLT CastTy) {
if (TypeIdx != 1)
return UnableToLegalize;
Register Dst = MI.getOperand(0).getReg();
Register SrcVec = MI.getOperand(1).getReg();
Register Idx = MI.getOperand(2).getReg();
LLT SrcVecTy = MRI.getType(SrcVec);
LLT IdxTy = MRI.getType(Idx);
LLT SrcEltTy = SrcVecTy.getElementType();
unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
unsigned OldNumElts = SrcVecTy.getNumElements();
LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
const unsigned NewEltSize = NewEltTy.getSizeInBits();
const unsigned OldEltSize = SrcEltTy.getSizeInBits();
if (NewNumElts > OldNumElts) {
if (NewNumElts % OldNumElts != 0)
return UnableToLegalize;
const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
LLT MidTy =
LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
NewOps[I] = Elt.getReg(0);
}
auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
MIRBuilder.buildBitcast(Dst, NewVec);
MI.eraseFromParent();
return Legalized;
}
if (NewNumElts < OldNumElts) {
if (NewEltSize % OldEltSize != 0)
return UnableToLegalize;
if (!isPowerOf2_32(NewEltSize / OldEltSize))
return UnableToLegalize;
const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
Register WideElt = CastVec;
if (CastTy.isVector()) {
WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
ScaledIdx).getReg(0);
}
Register OffsetBits = getBitcastWiderVectorElementOffset(
MIRBuilder, Idx, NewEltSize, OldEltSize);
auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
MIRBuilder.buildTrunc(Dst, ExtractedBits);
MI.eraseFromParent();
return Legalized;
}
return UnableToLegalize;
}
static Register buildBitFieldInsert(MachineIRBuilder &B,
Register TargetReg, Register InsertReg,
Register OffsetBits) {
LLT TargetTy = B.getMRI()->getType(TargetReg);
LLT InsertTy = B.getMRI()->getType(InsertReg);
auto ZextVal = B.buildZExt(TargetTy, InsertReg);
auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
auto EltMask = B.buildConstant(
TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
InsertTy.getSizeInBits()));
auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
}
LegalizerHelper::LegalizeResult
LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
LLT CastTy) {
if (TypeIdx != 0)
return UnableToLegalize;
Register Dst = MI.getOperand(0).getReg();
Register SrcVec = MI.getOperand(1).getReg();
Register Val = MI.getOperand(2).getReg();
Register Idx = MI.getOperand(3).getReg();
LLT VecTy = MRI.getType(Dst);
LLT IdxTy = MRI.getType(Idx);
LLT VecEltTy = VecTy.getElementType();
LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
const unsigned NewEltSize = NewEltTy.getSizeInBits();
const unsigned OldEltSize = VecEltTy.getSizeInBits();
unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
unsigned OldNumElts = VecTy.getNumElements();
Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
if (NewNumElts < OldNumElts) {
if (NewEltSize % OldEltSize != 0)
return UnableToLegalize;
if (!isPowerOf2_32(NewEltSize / OldEltSize))
return UnableToLegalize;
const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
Register ExtractedElt = CastVec;
if (CastTy.isVector()) {
ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
ScaledIdx).getReg(0);
}
Register OffsetBits = getBitcastWiderVectorElementOffset(
MIRBuilder, Idx, NewEltSize, OldEltSize);
Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
Val, OffsetBits);
if (CastTy.isVector()) {
InsertedElt = MIRBuilder.buildInsertVectorElement(
CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
}
MIRBuilder.buildBitcast(Dst, InsertedElt);
MI.eraseFromParent();
return Legalized;
}
return UnableToLegalize;
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
Register DstReg = LoadMI.getDstReg();
Register PtrReg = LoadMI.getPointerReg();
LLT DstTy = MRI.getType(DstReg);
MachineMemOperand &MMO = LoadMI.getMMO();
LLT MemTy = MMO.getMemoryType();
MachineFunction &MF = MIRBuilder.getMF();
unsigned MemSizeInBits = MemTy.getSizeInBits();
unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
if (MemSizeInBits != MemStoreSizeInBits) {
if (MemTy.isVector())
return UnableToLegalize;
LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
MachineMemOperand *NewMMO =
MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
Register LoadReg = DstReg;
LLT LoadTy = DstTy;
if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
LoadTy = WideMemTy;
LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
}
if (isa<GSExtLoad>(LoadMI)) {
auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
} else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
} else {
MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
}
if (DstTy != LoadTy)
MIRBuilder.buildTrunc(DstReg, LoadReg);
LoadMI.eraseFromParent();
return Legalized;
}
if (MIRBuilder.getDataLayout().isBigEndian())
return UnableToLegalize;
uint64_t LargeSplitSize, SmallSplitSize;
if (!isPowerOf2_32(MemSizeInBits)) {
LargeSplitSize = PowerOf2Floor(MemSizeInBits);
SmallSplitSize = MemSizeInBits - LargeSplitSize;
} else {
auto &Ctx = MF.getFunction().getContext();
if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
return UnableToLegalize;
SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
}
if (MemTy.isVector()) {
if (MemTy != DstTy)
return UnableToLegalize;
return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
}
MachineMemOperand *LargeMMO =
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
MachineMemOperand *SmallMMO =
MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
LLT PtrTy = MRI.getType(PtrReg);
unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
LLT AnyExtTy = LLT::scalar(AnyExtSize);
auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
PtrReg, *LargeMMO);
auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
LargeSplitSize / 8);
Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
SmallPtr, *SmallMMO);
auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
if (AnyExtTy == DstTy)
MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
MIRBuilder.buildTrunc(DstReg, {Or});
} else {
assert(DstTy.isPointer() && "expected pointer");
auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
MIRBuilder.buildIntToPtr(DstReg, Or);
}
LoadMI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
Register SrcReg = StoreMI.getValueReg();
Register PtrReg = StoreMI.getPointerReg();
LLT SrcTy = MRI.getType(SrcReg);
MachineFunction &MF = MIRBuilder.getMF();
MachineMemOperand &MMO = **StoreMI.memoperands_begin();
LLT MemTy = MMO.getMemoryType();
unsigned StoreWidth = MemTy.getSizeInBits();
unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
if (StoreWidth != StoreSizeInBits) {
if (SrcTy.isVector())
return UnableToLegalize;
LLT WideTy = LLT::scalar(StoreSizeInBits);
if (StoreSizeInBits > SrcTy.getSizeInBits()) {
SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
SrcTy = WideTy;
}
auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
MachineMemOperand *NewMMO =
MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
StoreMI.eraseFromParent();
return Legalized;
}
if (MemTy.isVector()) {
if (MemTy != SrcTy)
return UnableToLegalize;
return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
}
unsigned MemSizeInBits = MemTy.getSizeInBits();
uint64_t LargeSplitSize, SmallSplitSize;
if (!isPowerOf2_32(MemSizeInBits)) {
LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
} else {
auto &Ctx = MF.getFunction().getContext();
if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
return UnableToLegalize;
SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
}
unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
const LLT NewSrcTy = LLT::scalar(AnyExtSize);
if (SrcTy.isPointer()) {
const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
}
auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
LLT PtrTy = MRI.getType(PtrReg);
auto OffsetCst = MIRBuilder.buildConstant(
LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
auto SmallPtr =
MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
MachineMemOperand *LargeMMO =
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
MachineMemOperand *SmallMMO =
MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
StoreMI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
switch (MI.getOpcode()) {
case TargetOpcode::G_LOAD: {
if (TypeIdx != 0)
return UnableToLegalize;
MachineMemOperand &MMO = **MI.memoperands_begin();
if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
return UnableToLegalize;
Observer.changingInstr(MI);
bitcastDst(MI, CastTy, 0);
MMO.setType(CastTy);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_STORE: {
if (TypeIdx != 0)
return UnableToLegalize;
MachineMemOperand &MMO = **MI.memoperands_begin();
if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
return UnableToLegalize;
Observer.changingInstr(MI);
bitcastSrc(MI, CastTy, 0);
MMO.setType(CastTy);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_SELECT: {
if (TypeIdx != 0)
return UnableToLegalize;
if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
LLVM_DEBUG(
dbgs() << "bitcast action not implemented for vector select\n");
return UnableToLegalize;
}
Observer.changingInstr(MI);
bitcastSrc(MI, CastTy, 2);
bitcastSrc(MI, CastTy, 3);
bitcastDst(MI, CastTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR: {
Observer.changingInstr(MI);
bitcastSrc(MI, CastTy, 1);
bitcastSrc(MI, CastTy, 2);
bitcastDst(MI, CastTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
case TargetOpcode::G_INSERT_VECTOR_ELT:
return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
default:
return UnableToLegalize;
}
}
void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
Observer.changingInstr(MI);
MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
Observer.changedInstr(MI);
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
using namespace TargetOpcode;
switch(MI.getOpcode()) {
default:
return UnableToLegalize;
case TargetOpcode::G_BITCAST:
return lowerBitcast(MI);
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM: {
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
auto Quot =
MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
{MI.getOperand(1), MI.getOperand(2)});
auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_SADDO:
case TargetOpcode::G_SSUBO:
return lowerSADDO_SSUBO(MI);
case TargetOpcode::G_UMULH:
case TargetOpcode::G_SMULH:
return lowerSMULH_UMULH(MI);
case TargetOpcode::G_SMULO:
case TargetOpcode::G_UMULO: {
Register Res = MI.getOperand(0).getReg();
Register Overflow = MI.getOperand(1).getReg();
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
LLT Ty = MRI.getType(Res);
unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
? TargetOpcode::G_SMULH
: TargetOpcode::G_UMULH;
Observer.changingInstr(MI);
const auto &TII = MIRBuilder.getTII();
MI.setDesc(TII.get(TargetOpcode::G_MUL));
MI.removeOperand(1);
Observer.changedInstr(MI);
auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
auto Zero = MIRBuilder.buildConstant(Ty, 0);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
if (Opcode == TargetOpcode::G_SMULH) {
auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
} else {
MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
}
return Legalized;
}
case TargetOpcode::G_FNEG: {
Register Res = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Res);
if (Ty.isVector())
return UnableToLegalize;
auto SignMask =
MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
Register SubByReg = MI.getOperand(1).getReg();
MIRBuilder.buildXor(Res, SubByReg, SignMask);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_FSUB: {
Register Res = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Res);
if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
return UnableToLegalize;
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
Register Neg = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildFNeg(Neg, RHS);
MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_FMAD:
return lowerFMad(MI);
case TargetOpcode::G_FFLOOR:
return lowerFFloor(MI);
case TargetOpcode::G_INTRINSIC_ROUND:
return lowerIntrinsicRound(MI);
case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
changeOpcode(MI, TargetOpcode::G_FRINT);
return Legalized;
}
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
Register OldValRes = MI.getOperand(0).getReg();
Register SuccessRes = MI.getOperand(1).getReg();
Register Addr = MI.getOperand(2).getReg();
Register CmpVal = MI.getOperand(3).getReg();
Register NewVal = MI.getOperand(4).getReg();
MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
**MI.memoperands_begin());
MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD:
return lowerLoad(cast<GAnyLoad>(MI));
case TargetOpcode::G_STORE:
return lowerStore(cast<GStore>(MI));
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTPOP:
return lowerBitCount(MI);
case G_UADDO: {
Register Res = MI.getOperand(0).getReg();
Register CarryOut = MI.getOperand(1).getReg();
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
MIRBuilder.buildAdd(Res, LHS, RHS);
MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
MI.eraseFromParent();
return Legalized;
}
case G_UADDE: {
Register Res = MI.getOperand(0).getReg();
Register CarryOut = MI.getOperand(1).getReg();
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
Register CarryIn = MI.getOperand(4).getReg();
LLT Ty = MRI.getType(Res);
auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
MI.eraseFromParent();
return Legalized;
}
case G_USUBO: {
Register Res = MI.getOperand(0).getReg();
Register BorrowOut = MI.getOperand(1).getReg();
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
MIRBuilder.buildSub(Res, LHS, RHS);
MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
MI.eraseFromParent();
return Legalized;
}
case G_USUBE: {
Register Res = MI.getOperand(0).getReg();
Register BorrowOut = MI.getOperand(1).getReg();
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
Register BorrowIn = MI.getOperand(4).getReg();
const LLT CondTy = MRI.getType(BorrowOut);
const LLT Ty = MRI.getType(Res);
auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
MI.eraseFromParent();
return Legalized;
}
case G_UITOFP:
return lowerUITOFP(MI);
case G_SITOFP:
return lowerSITOFP(MI);
case G_FPTOUI:
return lowerFPTOUI(MI);
case G_FPTOSI:
return lowerFPTOSI(MI);
case G_FPTRUNC:
return lowerFPTRUNC(MI);
case G_FPOWI:
return lowerFPOWI(MI);
case G_SMIN:
case G_SMAX:
case G_UMIN:
case G_UMAX:
return lowerMinMax(MI);
case G_FCOPYSIGN:
return lowerFCopySign(MI);
case G_FMINNUM:
case G_FMAXNUM:
return lowerFMinNumMaxNum(MI);
case G_MERGE_VALUES:
return lowerMergeValues(MI);
case G_UNMERGE_VALUES:
return lowerUnmergeValues(MI);
case TargetOpcode::G_SEXT_INREG: {
assert(MI.getOperand(2).isImm() && "Expected immediate");
int64_t SizeInBits = MI.getOperand(2).getImm();
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
MI.eraseFromParent();
return Legalized;
}
case G_EXTRACT_VECTOR_ELT:
case G_INSERT_VECTOR_ELT:
return lowerExtractInsertVectorElt(MI);
case G_SHUFFLE_VECTOR:
return lowerShuffleVector(MI);
case G_DYN_STACKALLOC:
return lowerDynStackAlloc(MI);
case G_EXTRACT:
return lowerExtract(MI);
case G_INSERT:
return lowerInsert(MI);
case G_BSWAP:
return lowerBswap(MI);
case G_BITREVERSE:
return lowerBitreverse(MI);
case G_READ_REGISTER:
case G_WRITE_REGISTER:
return lowerReadWriteRegister(MI);
case G_UADDSAT:
case G_USUBSAT: {
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
if (LI.isLegalOrCustom({G_UMIN, Ty}))
return lowerAddSubSatToMinMax(MI);
return lowerAddSubSatToAddoSubo(MI);
}
case G_SADDSAT:
case G_SSUBSAT: {
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
return lowerAddSubSatToMinMax(MI);
return lowerAddSubSatToAddoSubo(MI);
}
case G_SSHLSAT:
case G_USHLSAT:
return lowerShlSat(MI);
case G_ABS:
return lowerAbsToAddXor(MI);
case G_SELECT:
return lowerSelect(MI);
case G_SDIVREM:
case G_UDIVREM:
return lowerDIVREM(MI);
case G_FSHL:
case G_FSHR:
return lowerFunnelShift(MI);
case G_ROTL:
case G_ROTR:
return lowerRotate(MI);
case G_MEMSET:
case G_MEMCPY:
case G_MEMMOVE:
return lowerMemCpyFamily(MI);
case G_MEMCPY_INLINE:
return lowerMemcpyInline(MI);
GISEL_VECREDUCE_CASES_NONSEQ
return lowerVectorReduction(MI);
}
}
Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
Align MinAlign) const {
return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
}
MachineInstrBuilder
LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
MachinePointerInfo &PtrInfo) {
MachineFunction &MF = MIRBuilder.getMF();
const DataLayout &DL = MIRBuilder.getDataLayout();
int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
unsigned AddrSpace = DL.getAllocaAddrSpace();
LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
}
static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
LLT VecTy) {
int64_t IdxVal;
if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
return IdxReg;
LLT IdxTy = B.getMRI()->getType(IdxReg);
unsigned NElts = VecTy.getNumElements();
if (isPowerOf2_32(NElts)) {
APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
}
return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
.getReg(0);
}
Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
Register Index) {
LLT EltTy = VecTy.getElementType();
unsigned EltSize = EltTy.getSizeInBits() / 8; assert(EltSize * 8 == EltTy.getSizeInBits() &&
"Converting bits to bytes lost precision");
Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
LLT IdxTy = MRI.getType(Index);
auto Mul = MIRBuilder.buildMul(IdxTy, Index,
MIRBuilder.buildConstant(IdxTy, EltSize));
LLT PtrTy = MRI.getType(VecPtr);
return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
}
#ifndef NDEBUG
static bool hasSameNumEltsOnAllVectorOperands(
GenericMachineInstr &MI, MachineRegisterInfo &MRI,
std::initializer_list<unsigned> NonVecOpIndices) {
if (MI.getNumMemOperands() != 0)
return false;
LLT VecTy = MRI.getType(MI.getReg(0));
if (!VecTy.isVector())
return false;
unsigned NumElts = VecTy.getNumElements();
for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
MachineOperand &Op = MI.getOperand(OpIdx);
if (!Op.isReg()) {
if (!is_contained(NonVecOpIndices, OpIdx))
return false;
continue;
}
LLT Ty = MRI.getType(Op.getReg());
if (!Ty.isVector()) {
if (!is_contained(NonVecOpIndices, OpIdx))
return false;
continue;
}
if (Ty.getNumElements() != NumElts)
return false;
}
return true;
}
#endif
static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
unsigned NumElts) {
LLT LeftoverTy;
assert(Ty.isVector() && "Expected vector type");
LLT EltTy = Ty.getElementType();
LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
int NumParts, NumLeftover;
std::tie(NumParts, NumLeftover) =
getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
for (int i = 0; i < NumParts; ++i) {
DstOps.push_back(NarrowTy);
}
if (LeftoverTy.isValid()) {
assert(NumLeftover == 1 && "expected exactly one leftover");
DstOps.push_back(LeftoverTy);
}
}
static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
MachineOperand &Op) {
for (unsigned i = 0; i < N; ++i) {
if (Op.isReg())
Ops.push_back(Op.getReg());
else if (Op.isImm())
Ops.push_back(Op.getImm());
else if (Op.isPredicate())
Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
else
llvm_unreachable("Unsupported type");
}
}
LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorMultiEltType(
GenericMachineInstr &MI, unsigned NumElts,
std::initializer_list<unsigned> NonVecOpIndices) {
assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
"Non-compatible opcode or not specified non-vector operands");
unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
unsigned NumDefs = MI.getNumDefs();
SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
for (unsigned i = 0; i < NumDefs; ++i) {
makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
}
SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
++UseIdx, ++UseNo) {
if (is_contained(NonVecOpIndices, UseIdx)) {
broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
MI.getOperand(UseIdx));
} else {
SmallVector<Register, 8> SplitPieces;
extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
for (auto Reg : SplitPieces)
InputOpsPieces[UseNo].push_back(Reg);
}
}
unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
SmallVector<DstOp, 2> Defs;
for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
Defs.push_back(OutputOpsPieces[DstNo][i]);
SmallVector<SrcOp, 3> Uses;
for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
Uses.push_back(InputOpsPieces[InputNo][i]);
auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
OutputRegs[DstNo].push_back(I.getReg(DstNo));
}
if (NumLeftovers) {
for (unsigned i = 0; i < NumDefs; ++i)
mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
} else {
for (unsigned i = 0; i < NumDefs; ++i)
MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]);
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
unsigned NumElts) {
unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
unsigned NumDefs = MI.getNumDefs();
SmallVector<DstOp, 8> OutputOpsPieces;
SmallVector<Register, 8> OutputRegs;
makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
UseIdx += 2, ++UseNo) {
MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
}
unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
MIRBuilder.setInsertPt(*MI.getParent(), MI);
for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
Phi.addDef(
MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
OutputRegs.push_back(Phi.getReg(0));
for (unsigned j = 0; j < NumInputs / 2; ++j) {
Phi.addUse(InputOpsPieces[j][i]);
Phi.add(MI.getOperand(1 + j * 2 + 1));
}
}
if (NumLeftovers) {
mergeMixedSubvectors(MI.getReg(0), OutputRegs);
} else {
MIRBuilder.buildMerge(MI.getReg(0), OutputRegs);
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowTy) {
const int NumDst = MI.getNumOperands() - 1;
const Register SrcReg = MI.getOperand(NumDst).getReg();
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
LLT SrcTy = MRI.getType(SrcReg);
if (TypeIdx != 1 || NarrowTy == DstTy)
return UnableToLegalize;
assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
(NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
return UnableToLegalize;
auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
const int NumUnmerge = Unmerge->getNumOperands() - 1;
const int PartsPerUnmerge = NumDst / NumUnmerge;
for (int I = 0; I != NumUnmerge; ++I) {
auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
for (int J = 0; J != PartsPerUnmerge; ++J)
MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
MIB.addUse(Unmerge.getReg(I));
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
if (NarrowTy == SrcTy)
return UnableToLegalize;
if (TypeIdx == 1) {
assert(SrcTy.isVector() && "Expected vector types");
assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
(NarrowTy.getNumElements() >= SrcTy.getNumElements()))
return UnableToLegalize;
SmallVector<Register, 8> Elts;
LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
Elts.push_back(Unmerge.getReg(j));
}
SmallVector<Register, 8> NarrowTyElts;
unsigned NumNarrowTyElts = NarrowTy.getNumElements();
unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
++i, Offset += NumNarrowTyElts) {
ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
}
MIRBuilder.buildMerge(DstReg, NarrowTyElts);
MI.eraseFromParent();
return Legalized;
}
assert(TypeIdx == 0 && "Bad type index");
if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
(DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
return UnableToLegalize;
SmallVector<Register, 8> NarrowTyElts;
unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
for (unsigned i = 0; i < NumParts; ++i) {
SmallVector<Register, 8> Sources;
for (unsigned j = 0; j < NumElts; ++j)
Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0));
}
MIRBuilder.buildMerge(DstReg, NarrowTyElts);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowVecTy) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcVec = MI.getOperand(1).getReg();
Register InsertVal;
bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
if (IsInsert)
InsertVal = MI.getOperand(2).getReg();
Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
if (!NarrowVecTy.isVector())
return UnableToLegalize;
LLT VecTy = MRI.getType(SrcVec);
int64_t IdxVal;
auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
if (MaybeCst) {
IdxVal = MaybeCst->Value.getSExtValue();
if (IdxVal >= VecTy.getNumElements()) {
MIRBuilder.buildUndef(DstReg);
MI.eraseFromParent();
return Legalized;
}
SmallVector<Register, 8> VecParts;
LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
TargetOpcode::G_ANYEXT);
unsigned NewNumElts = NarrowVecTy.getNumElements();
LLT IdxTy = MRI.getType(Idx);
int64_t PartIdx = IdxVal / NewNumElts;
auto NewIdx =
MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
if (IsInsert) {
LLT PartTy = MRI.getType(VecParts[PartIdx]);
auto InsertPart = MIRBuilder.buildInsertVectorElement(
PartTy, VecParts[PartIdx], InsertVal, NewIdx);
VecParts[PartIdx] = InsertPart.getReg(0);
buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
} else {
MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
}
MI.eraseFromParent();
return Legalized;
}
return lowerExtractInsertVectorElt(MI);
}
LegalizerHelper::LegalizeResult
LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 0)
return UnableToLegalize;
if (LdStMI.isAtomic())
return UnableToLegalize;
bool IsLoad = isa<GLoad>(LdStMI);
Register ValReg = LdStMI.getReg(0);
Register AddrReg = LdStMI.getPointerReg();
LLT ValTy = MRI.getType(ValReg);
if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
return UnableToLegalize;
}
int NumParts = -1;
int NumLeftover = -1;
LLT LeftoverTy;
SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
if (IsLoad) {
std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
} else {
if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
NarrowLeftoverRegs)) {
NumParts = NarrowRegs.size();
NumLeftover = NarrowLeftoverRegs.size();
}
}
if (NumParts == -1)
return UnableToLegalize;
LLT PtrTy = MRI.getType(AddrReg);
const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
unsigned TotalSize = ValTy.getSizeInBits();
bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
auto MMO = LdStMI.getMMO();
auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
unsigned NumParts, unsigned Offset) -> unsigned {
MachineFunction &MF = MIRBuilder.getMF();
unsigned PartSize = PartTy.getSizeInBits();
for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
++Idx) {
unsigned ByteOffset = Offset / 8;
Register NewAddrReg;
MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
MachineMemOperand *NewMMO =
MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
if (IsLoad) {
Register Dst = MRI.createGenericVirtualRegister(PartTy);
ValRegs.push_back(Dst);
MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
} else {
MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
}
Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
}
return Offset;
};
unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
unsigned HandledOffset =
splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
if (LeftoverTy.isValid())
splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
if (IsLoad) {
insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
LeftoverTy, NarrowLeftoverRegs);
}
LdStMI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
using namespace TargetOpcode;
GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
switch (MI.getOpcode()) {
case G_IMPLICIT_DEF:
case G_TRUNC:
case G_AND:
case G_OR:
case G_XOR:
case G_ADD:
case G_SUB:
case G_MUL:
case G_PTR_ADD:
case G_SMULH:
case G_UMULH:
case G_FADD:
case G_FMUL:
case G_FSUB:
case G_FNEG:
case G_FABS:
case G_FCANONICALIZE:
case G_FDIV:
case G_FREM:
case G_FMA:
case G_FMAD:
case G_FPOW:
case G_FEXP:
case G_FEXP2:
case G_FLOG:
case G_FLOG2:
case G_FLOG10:
case G_FNEARBYINT:
case G_FCEIL:
case G_FFLOOR:
case G_FRINT:
case G_INTRINSIC_ROUND:
case G_INTRINSIC_ROUNDEVEN:
case G_INTRINSIC_TRUNC:
case G_FCOS:
case G_FSIN:
case G_FSQRT:
case G_BSWAP:
case G_BITREVERSE:
case G_SDIV:
case G_UDIV:
case G_SREM:
case G_UREM:
case G_SDIVREM:
case G_UDIVREM:
case G_SMIN:
case G_SMAX:
case G_UMIN:
case G_UMAX:
case G_ABS:
case G_FMINNUM:
case G_FMAXNUM:
case G_FMINNUM_IEEE:
case G_FMAXNUM_IEEE:
case G_FMINIMUM:
case G_FMAXIMUM:
case G_FSHL:
case G_FSHR:
case G_ROTL:
case G_ROTR:
case G_FREEZE:
case G_SADDSAT:
case G_SSUBSAT:
case G_UADDSAT:
case G_USUBSAT:
case G_UMULO:
case G_SMULO:
case G_SHL:
case G_LSHR:
case G_ASHR:
case G_SSHLSAT:
case G_USHLSAT:
case G_CTLZ:
case G_CTLZ_ZERO_UNDEF:
case G_CTTZ:
case G_CTTZ_ZERO_UNDEF:
case G_CTPOP:
case G_FCOPYSIGN:
case G_ZEXT:
case G_SEXT:
case G_ANYEXT:
case G_FPEXT:
case G_FPTRUNC:
case G_SITOFP:
case G_UITOFP:
case G_FPTOSI:
case G_FPTOUI:
case G_INTTOPTR:
case G_PTRTOINT:
case G_ADDRSPACE_CAST:
case G_UADDO:
case G_USUBO:
case G_UADDE:
case G_USUBE:
case G_SADDO:
case G_SSUBO:
case G_SADDE:
case G_SSUBE:
return fewerElementsVectorMultiEltType(GMI, NumElts);
case G_ICMP:
case G_FCMP:
return fewerElementsVectorMultiEltType(GMI, NumElts, {1 });
case G_SELECT:
if (MRI.getType(MI.getOperand(1).getReg()).isVector())
return fewerElementsVectorMultiEltType(GMI, NumElts);
return fewerElementsVectorMultiEltType(GMI, NumElts, {1 });
case G_PHI:
return fewerElementsVectorPhi(GMI, NumElts);
case G_UNMERGE_VALUES:
return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
case G_BUILD_VECTOR:
assert(TypeIdx == 0 && "not a vector type index");
return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
case G_CONCAT_VECTORS:
if (TypeIdx != 1) return UnableToLegalize;
return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
case G_EXTRACT_VECTOR_ELT:
case G_INSERT_VECTOR_ELT:
return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
case G_LOAD:
case G_STORE:
return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
case G_SEXT_INREG:
return fewerElementsVectorMultiEltType(GMI, NumElts, {2 });
GISEL_VECREDUCE_CASES_NONSEQ
return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
case G_SHUFFLE_VECTOR:
return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
default:
return UnableToLegalize;
}
}
LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
if (TypeIdx != 0)
return UnableToLegalize;
Register DstReg = MI.getOperand(0).getReg();
Register Src1Reg = MI.getOperand(1).getReg();
Register Src2Reg = MI.getOperand(2).getReg();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
LLT DstTy = MRI.getType(DstReg);
LLT Src1Ty = MRI.getType(Src1Reg);
LLT Src2Ty = MRI.getType(Src2Reg);
if (DstTy != Src1Ty)
return UnableToLegalize;
if (DstTy != Src2Ty)
return UnableToLegalize;
if (!isPowerOf2_32(DstTy.getNumElements()))
return UnableToLegalize;
NarrowTy =
DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
unsigned NewElts = NarrowTy.getNumElements();
SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
SplitSrc2Regs[1]};
Register Hi, Lo;
SmallVector<int, 16> Ops;
for (unsigned High = 0; High < 2; ++High) {
Register &Output = High ? Hi : Lo;
unsigned InputUsed[2] = {-1U, -1U}; unsigned FirstMaskIdx = High * NewElts;
bool UseBuildVector = false;
for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
int Idx = Mask[FirstMaskIdx + MaskOffset];
unsigned Input = (unsigned)Idx / NewElts;
if (Input >= array_lengthof(Inputs)) {
Ops.push_back(-1);
continue;
}
Idx -= Input * NewElts;
unsigned OpNo;
for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
if (InputUsed[OpNo] == Input) {
break;
} else if (InputUsed[OpNo] == -1U) {
InputUsed[OpNo] = Input;
break;
}
}
if (OpNo >= array_lengthof(InputUsed)) {
UseBuildVector = true;
break;
}
Ops.push_back(Idx + OpNo * NewElts);
}
if (UseBuildVector) {
LLT EltTy = NarrowTy.getElementType();
SmallVector<Register, 16> SVOps;
for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
int Idx = Mask[FirstMaskIdx + MaskOffset];
unsigned Input = (unsigned)Idx / NewElts;
if (Input >= array_lengthof(Inputs)) {
SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
continue;
}
Idx -= Input * NewElts;
SVOps.push_back(MIRBuilder
.buildExtractVectorElement(
EltTy, Inputs[Input],
MIRBuilder.buildConstant(LLT::scalar(32), Idx))
.getReg(0));
}
Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
} else if (InputUsed[0] == -1U) {
Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
} else {
Register Op0 = Inputs[InputUsed[0]];
Register Op1 = InputUsed[1] == -1U
? MIRBuilder.buildUndef(NarrowTy).getReg(0)
: Inputs[InputUsed[1]];
Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
}
Ops.clear();
}
MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
MI.eraseFromParent();
return Legalized;
}
static unsigned getScalarOpcForReduction(unsigned Opc) {
unsigned ScalarOpc;
switch (Opc) {
case TargetOpcode::G_VECREDUCE_FADD:
ScalarOpc = TargetOpcode::G_FADD;
break;
case TargetOpcode::G_VECREDUCE_FMUL:
ScalarOpc = TargetOpcode::G_FMUL;
break;
case TargetOpcode::G_VECREDUCE_FMAX:
ScalarOpc = TargetOpcode::G_FMAXNUM;
break;
case TargetOpcode::G_VECREDUCE_FMIN:
ScalarOpc = TargetOpcode::G_FMINNUM;
break;
case TargetOpcode::G_VECREDUCE_ADD:
ScalarOpc = TargetOpcode::G_ADD;
break;
case TargetOpcode::G_VECREDUCE_MUL:
ScalarOpc = TargetOpcode::G_MUL;
break;
case TargetOpcode::G_VECREDUCE_AND:
ScalarOpc = TargetOpcode::G_AND;
break;
case TargetOpcode::G_VECREDUCE_OR:
ScalarOpc = TargetOpcode::G_OR;
break;
case TargetOpcode::G_VECREDUCE_XOR:
ScalarOpc = TargetOpcode::G_XOR;
break;
case TargetOpcode::G_VECREDUCE_SMAX:
ScalarOpc = TargetOpcode::G_SMAX;
break;
case TargetOpcode::G_VECREDUCE_SMIN:
ScalarOpc = TargetOpcode::G_SMIN;
break;
case TargetOpcode::G_VECREDUCE_UMAX:
ScalarOpc = TargetOpcode::G_UMAX;
break;
case TargetOpcode::G_VECREDUCE_UMIN:
ScalarOpc = TargetOpcode::G_UMIN;
break;
default:
llvm_unreachable("Unhandled reduction");
}
return ScalarOpc;
}
LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
unsigned Opc = MI.getOpcode();
assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
"Sequential reductions not expected");
if (TypeIdx != 1)
return UnableToLegalize;
Register SrcReg = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
if (NarrowTy.isVector() &&
(SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
return UnableToLegalize;
unsigned ScalarOpc = getScalarOpcForReduction(Opc);
SmallVector<Register> SplitSrcs;
const unsigned NumParts =
NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
: SrcTy.getNumElements();
extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
if (NarrowTy.isScalar()) {
if (DstTy != NarrowTy)
return UnableToLegalize;
if (isPowerOf2_32(NumParts)) {
SmallVector<Register> PartialResults;
unsigned NumPartsLeft = NumParts;
while (NumPartsLeft > 1) {
for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
PartialResults.emplace_back(
MIRBuilder
.buildInstr(ScalarOpc, {NarrowTy},
{SplitSrcs[Idx], SplitSrcs[Idx + 1]})
.getReg(0));
}
SplitSrcs = PartialResults;
PartialResults.clear();
NumPartsLeft = SplitSrcs.size();
}
assert(SplitSrcs.size() == 1);
MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
MI.eraseFromParent();
return Legalized;
}
Register Acc = SplitSrcs[0];
for (unsigned Idx = 1; Idx < NumParts; ++Idx)
Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
.getReg(0);
MIRBuilder.buildCopy(DstReg, Acc);
MI.eraseFromParent();
return Legalized;
}
SmallVector<Register> PartialReductions;
for (unsigned Part = 0; Part < NumParts; ++Part) {
PartialReductions.push_back(
MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
}
if (isPowerOf2_32(SrcTy.getNumElements()) &&
isPowerOf2_32(NarrowTy.getNumElements())) {
return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
}
Register Acc = PartialReductions[0];
for (unsigned Part = 1; Part < NumParts; ++Part) {
if (Part == NumParts - 1) {
MIRBuilder.buildInstr(ScalarOpc, {DstReg},
{Acc, PartialReductions[Part]});
} else {
Acc = MIRBuilder
.buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
.getReg(0);
}
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
LLT SrcTy, LLT NarrowTy,
unsigned ScalarOpc) {
SmallVector<Register> SplitSrcs;
extractParts(SrcReg, NarrowTy,
SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
while (SplitSrcs.size() > 1) {
SmallVector<Register> PartialRdxs;
for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
Register LHS = SplitSrcs[Idx];
Register RHS = SplitSrcs[Idx + 1];
Register Res =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
PartialRdxs.push_back(Res);
}
SplitSrcs = std::move(PartialRdxs);
}
Observer.changingInstr(MI);
MI.getOperand(1).setReg(SplitSrcs[0]);
Observer.changedInstr(MI);
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
const LLT HalfTy, const LLT AmtTy) {
Register InL = MRI.createGenericVirtualRegister(HalfTy);
Register InH = MRI.createGenericVirtualRegister(HalfTy);
MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
if (Amt.isZero()) {
MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
MI.eraseFromParent();
return Legalized;
}
LLT NVT = HalfTy;
unsigned NVTBits = HalfTy.getSizeInBits();
unsigned VTBits = 2 * NVTBits;
SrcOp Lo(Register(0)), Hi(Register(0));
if (MI.getOpcode() == TargetOpcode::G_SHL) {
if (Amt.ugt(VTBits)) {
Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
} else if (Amt.ugt(NVTBits)) {
Lo = MIRBuilder.buildConstant(NVT, 0);
Hi = MIRBuilder.buildShl(NVT, InL,
MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
} else if (Amt == NVTBits) {
Lo = MIRBuilder.buildConstant(NVT, 0);
Hi = InL;
} else {
Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
auto OrLHS =
MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
auto OrRHS = MIRBuilder.buildLShr(
NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
}
} else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
if (Amt.ugt(VTBits)) {
Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
} else if (Amt.ugt(NVTBits)) {
Lo = MIRBuilder.buildLShr(NVT, InH,
MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
Hi = MIRBuilder.buildConstant(NVT, 0);
} else if (Amt == NVTBits) {
Lo = InH;
Hi = MIRBuilder.buildConstant(NVT, 0);
} else {
auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
auto OrRHS = MIRBuilder.buildShl(
NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
}
} else {
if (Amt.ugt(VTBits)) {
Hi = Lo = MIRBuilder.buildAShr(
NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
} else if (Amt.ugt(NVTBits)) {
Lo = MIRBuilder.buildAShr(NVT, InH,
MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
Hi = MIRBuilder.buildAShr(NVT, InH,
MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
} else if (Amt == NVTBits) {
Lo = InH;
Hi = MIRBuilder.buildAShr(NVT, InH,
MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
} else {
auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
auto OrRHS = MIRBuilder.buildShl(
NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
}
}
MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
LLT RequestedTy) {
if (TypeIdx == 1) {
Observer.changingInstr(MI);
narrowScalarSrc(MI, RequestedTy, 2);
Observer.changedInstr(MI);
return Legalized;
}
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isVector())
return UnableToLegalize;
Register Amt = MI.getOperand(2).getReg();
LLT ShiftAmtTy = MRI.getType(Amt);
const unsigned DstEltSize = DstTy.getScalarSizeInBits();
if (DstEltSize % 2 != 0)
return UnableToLegalize;
const unsigned NewBitSize = DstEltSize / 2;
const LLT HalfTy = LLT::scalar(NewBitSize);
const LLT CondTy = LLT::scalar(1);
if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
ShiftAmtTy);
}
auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
Register InL = MRI.createGenericVirtualRegister(HalfTy);
Register InH = MRI.createGenericVirtualRegister(HalfTy);
MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
Register ResultRegs[2];
switch (MI.getOpcode()) {
case TargetOpcode::G_SHL: {
auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
auto LoL = MIRBuilder.buildConstant(HalfTy, 0); auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess);
auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
auto Hi = MIRBuilder.buildSelect(
HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
ResultRegs[0] = Lo.getReg(0);
ResultRegs[1] = Hi.getReg(0);
break;
}
case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR: {
auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
MachineInstrBuilder HiL;
if (MI.getOpcode() == TargetOpcode::G_LSHR) {
HiL = MIRBuilder.buildConstant(HalfTy, 0); } else {
auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); }
auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
{InH, AmtExcess});
auto Lo = MIRBuilder.buildSelect(
HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
ResultRegs[0] = Lo.getReg(0);
ResultRegs[1] = Hi.getReg(0);
break;
}
default:
llvm_unreachable("not a shift");
}
MIRBuilder.buildMerge(DstReg, ResultRegs);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
LLT MoreTy) {
assert(TypeIdx == 0 && "Expecting only Idx 0");
Observer.changingInstr(MI);
for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
moreElementsVectorSrc(MI, MoreTy, I);
}
MachineBasicBlock &MBB = *MI.getParent();
MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
LLT MoreTy) {
unsigned Opc = MI.getOpcode();
switch (Opc) {
case TargetOpcode::G_IMPLICIT_DEF:
case TargetOpcode::G_LOAD: {
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_STORE:
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
moreElementsVectorSrc(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
case TargetOpcode::G_ADD:
case TargetOpcode::G_SUB:
case TargetOpcode::G_MUL:
case TargetOpcode::G_FADD:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_UADDSAT:
case TargetOpcode::G_USUBSAT:
case TargetOpcode::G_SADDSAT:
case TargetOpcode::G_SSUBSAT:
case TargetOpcode::G_SMIN:
case TargetOpcode::G_SMAX:
case TargetOpcode::G_UMIN:
case TargetOpcode::G_UMAX:
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXNUM_IEEE:
case TargetOpcode::G_FMINIMUM:
case TargetOpcode::G_FMAXIMUM: {
Observer.changingInstr(MI);
moreElementsVectorSrc(MI, MoreTy, 1);
moreElementsVectorSrc(MI, MoreTy, 2);
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_FMA:
case TargetOpcode::G_FSHR:
case TargetOpcode::G_FSHL: {
Observer.changingInstr(MI);
moreElementsVectorSrc(MI, MoreTy, 1);
moreElementsVectorSrc(MI, MoreTy, 2);
moreElementsVectorSrc(MI, MoreTy, 3);
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_EXTRACT:
if (TypeIdx != 1)
return UnableToLegalize;
Observer.changingInstr(MI);
moreElementsVectorSrc(MI, MoreTy, 1);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_INSERT:
case TargetOpcode::G_FREEZE:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FABS:
case TargetOpcode::G_BSWAP:
case TargetOpcode::G_FCANONICALIZE:
case TargetOpcode::G_SEXT_INREG:
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
moreElementsVectorSrc(MI, MoreTy, 1);
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SELECT: {
Register DstReg = MI.getOperand(0).getReg();
Register CondReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT CondTy = MRI.getType(CondReg);
if (TypeIdx == 1) {
if (!CondTy.isScalar() ||
DstTy.getElementCount() != MoreTy.getElementCount())
return UnableToLegalize;
auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
Observer.changingInstr(MI);
MI.getOperand(1).setReg(ShufSplat.getReg(0));
Observer.changedInstr(MI);
return Legalized;
}
if (CondTy.isVector())
return UnableToLegalize;
Observer.changingInstr(MI);
moreElementsVectorSrc(MI, MoreTy, 2);
moreElementsVectorSrc(MI, MoreTy, 3);
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_UNMERGE_VALUES:
return UnableToLegalize;
case TargetOpcode::G_PHI:
return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
case TargetOpcode::G_SHUFFLE_VECTOR:
return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
case TargetOpcode::G_BUILD_VECTOR: {
SmallVector<SrcOp, 8> Elts;
for (auto Op : MI.uses()) {
Elts.push_back(Op.getReg());
}
for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
}
MIRBuilder.buildDeleteTrailingVectorElements(
MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_TRUNC: {
Observer.changingInstr(MI);
moreElementsVectorSrc(MI, MoreTy, 1);
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
default:
return UnableToLegalize;
}
}
LegalizerHelper::LegalizeResult
LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
unsigned int TypeIdx, LLT MoreTy) {
if (TypeIdx != 0)
return UnableToLegalize;
Register DstReg = MI.getOperand(0).getReg();
Register Src1Reg = MI.getOperand(1).getReg();
Register Src2Reg = MI.getOperand(2).getReg();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
LLT DstTy = MRI.getType(DstReg);
LLT Src1Ty = MRI.getType(Src1Reg);
LLT Src2Ty = MRI.getType(Src2Reg);
unsigned NumElts = DstTy.getNumElements();
unsigned WidenNumElts = MoreTy.getNumElements();
if (DstTy != Src1Ty || DstTy != Src2Ty)
return UnableToLegalize;
moreElementsVectorSrc(MI, MoreTy, 1);
moreElementsVectorSrc(MI, MoreTy, 2);
SmallVector<int, 16> NewMask;
for (unsigned I = 0; I != NumElts; ++I) {
int Idx = Mask[I];
if (Idx < static_cast<int>(NumElts))
NewMask.push_back(Idx);
else
NewMask.push_back(Idx - NumElts + WidenNumElts);
}
for (unsigned I = NumElts; I != WidenNumElts; ++I)
NewMask.push_back(-1);
moreElementsVectorDst(MI, MoreTy, 0);
MIRBuilder.setInstrAndDebugLoc(MI);
MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
MI.getOperand(1).getReg(),
MI.getOperand(2).getReg(), NewMask);
MI.eraseFromParent();
return Legalized;
}
void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
ArrayRef<Register> Src1Regs,
ArrayRef<Register> Src2Regs,
LLT NarrowTy) {
MachineIRBuilder &B = MIRBuilder;
unsigned SrcParts = Src1Regs.size();
unsigned DstParts = DstRegs.size();
unsigned DstIdx = 0; Register FactorSum =
B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
DstRegs[DstIdx] = FactorSum;
unsigned CarrySumPrevDstIdx;
SmallVector<Register, 4> Factors;
for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
i <= std::min(DstIdx, SrcParts - 1); ++i) {
MachineInstrBuilder Mul =
B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
Factors.push_back(Mul.getReg(0));
}
for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
MachineInstrBuilder Umulh =
B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
Factors.push_back(Umulh.getReg(0));
}
if (DstIdx != 1) {
Factors.push_back(CarrySumPrevDstIdx);
}
Register CarrySum;
if (DstIdx != DstParts - 1) {
MachineInstrBuilder Uaddo =
B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
FactorSum = Uaddo.getReg(0);
CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
for (unsigned i = 2; i < Factors.size(); ++i) {
MachineInstrBuilder Uaddo =
B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
FactorSum = Uaddo.getReg(0);
MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
}
} else {
FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
for (unsigned i = 2; i < Factors.size(); ++i)
FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
}
CarrySumPrevDstIdx = CarrySum;
DstRegs[DstIdx] = FactorSum;
Factors.clear();
}
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 0)
return UnableToLegalize;
Register DstReg = MI.getOperand(0).getReg();
LLT DstType = MRI.getType(DstReg);
if (DstType.isVector())
return UnableToLegalize;
unsigned Opcode = MI.getOpcode();
unsigned OpO, OpE, OpF;
switch (Opcode) {
case TargetOpcode::G_SADDO:
case TargetOpcode::G_SADDE:
case TargetOpcode::G_UADDO:
case TargetOpcode::G_UADDE:
case TargetOpcode::G_ADD:
OpO = TargetOpcode::G_UADDO;
OpE = TargetOpcode::G_UADDE;
OpF = TargetOpcode::G_UADDE;
if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
OpF = TargetOpcode::G_SADDE;
break;
case TargetOpcode::G_SSUBO:
case TargetOpcode::G_SSUBE:
case TargetOpcode::G_USUBO:
case TargetOpcode::G_USUBE:
case TargetOpcode::G_SUB:
OpO = TargetOpcode::G_USUBO;
OpE = TargetOpcode::G_USUBE;
OpF = TargetOpcode::G_USUBE;
if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
OpF = TargetOpcode::G_SSUBE;
break;
default:
llvm_unreachable("Unexpected add/sub opcode!");
}
unsigned NumDefs = MI.getNumExplicitDefs();
Register Src1 = MI.getOperand(NumDefs).getReg();
Register Src2 = MI.getOperand(NumDefs + 1).getReg();
Register CarryDst, CarryIn;
if (NumDefs == 2)
CarryDst = MI.getOperand(1).getReg();
if (MI.getNumOperands() == NumDefs + 3)
CarryIn = MI.getOperand(NumDefs + 2).getReg();
LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
LLT LeftoverTy, DummyTy;
SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left);
extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left);
int NarrowParts = Src1Regs.size();
for (int I = 0, E = Src1Left.size(); I != E; ++I) {
Src1Regs.push_back(Src1Left[I]);
Src2Regs.push_back(Src2Left[I]);
}
DstRegs.reserve(Src1Regs.size());
for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
Register DstReg =
MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
if (i == e - 1 && CarryDst)
CarryOut = CarryDst;
if (!CarryIn) {
MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
{Src1Regs[i], Src2Regs[i]});
} else if (i == e - 1) {
MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
{Src1Regs[i], Src2Regs[i], CarryIn});
} else {
MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
{Src1Regs[i], Src2Regs[i], CarryIn});
}
DstRegs.push_back(DstReg);
CarryIn = CarryOut;
}
insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
makeArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
makeArrayRef(DstRegs).drop_front(NarrowParts));
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
Register DstReg = MI.getOperand(0).getReg();
Register Src1 = MI.getOperand(1).getReg();
Register Src2 = MI.getOperand(2).getReg();
LLT Ty = MRI.getType(DstReg);
if (Ty.isVector())
return UnableToLegalize;
unsigned Size = Ty.getSizeInBits();
unsigned NarrowSize = NarrowTy.getSizeInBits();
if (Size % NarrowSize != 0)
return UnableToLegalize;
unsigned NumParts = Size / NarrowSize;
bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
SmallVector<Register, 2> Src1Parts, Src2Parts;
SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
extractParts(Src1, NarrowTy, NumParts, Src1Parts);
extractParts(Src2, NarrowTy, NumParts, Src2Parts);
multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 0)
return UnableToLegalize;
bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
Register Src = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(Src);
if (SrcTy.getScalarType() != LLT::scalar(16) ||
NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
return UnableToLegalize;
Observer.changingInstr(MI);
narrowScalarDst(MI, NarrowTy, 0,
IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 1)
return UnableToLegalize;
uint64_t NarrowSize = NarrowTy.getSizeInBits();
int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
if (SizeOp1 % NarrowSize != 0)
return UnableToLegalize;
int NumParts = SizeOp1 / NarrowSize;
SmallVector<Register, 2> SrcRegs, DstRegs;
SmallVector<uint64_t, 2> Indexes;
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
Register OpReg = MI.getOperand(0).getReg();
uint64_t OpStart = MI.getOperand(2).getImm();
uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
for (int i = 0; i < NumParts; ++i) {
unsigned SrcStart = i * NarrowSize;
if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
continue;
} else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
DstRegs.push_back(SrcRegs[i]);
continue;
}
int64_t ExtractOffset;
uint64_t SegSize;
if (OpStart < SrcStart) {
ExtractOffset = 0;
SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
} else {
ExtractOffset = OpStart - SrcStart;
SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
}
Register SegReg = SrcRegs[i];
if (ExtractOffset != 0 || SegSize != NarrowSize) {
SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
}
DstRegs.push_back(SegReg);
}
Register DstReg = MI.getOperand(0).getReg();
if (MRI.getType(DstReg).isVector())
MIRBuilder.buildBuildVector(DstReg, DstRegs);
else if (DstRegs.size() > 1)
MIRBuilder.buildMerge(DstReg, DstRegs);
else
MIRBuilder.buildCopy(DstReg, DstRegs[0]);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 0)
return UnableToLegalize;
SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
SmallVector<uint64_t, 2> Indexes;
LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
LLT LeftoverTy;
extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
LeftoverRegs);
for (Register Reg : LeftoverRegs)
SrcRegs.push_back(Reg);
uint64_t NarrowSize = NarrowTy.getSizeInBits();
Register OpReg = MI.getOperand(2).getReg();
uint64_t OpStart = MI.getOperand(3).getImm();
uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
unsigned DstStart = I * NarrowSize;
if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
DstRegs.push_back(OpReg);
continue;
}
Register SrcReg = SrcRegs[I];
if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
}
if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
DstRegs.push_back(SrcReg);
continue;
}
int64_t ExtractOffset, InsertOffset;
uint64_t SegSize;
if (OpStart < DstStart) {
InsertOffset = 0;
ExtractOffset = DstStart - OpStart;
SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
} else {
InsertOffset = OpStart - DstStart;
ExtractOffset = 0;
SegSize =
std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
}
Register SegReg = OpReg;
if (ExtractOffset != 0 || SegSize != OpSize) {
SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
}
Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
DstRegs.push_back(DstReg);
}
uint64_t WideSize = DstRegs.size() * NarrowSize;
Register DstReg = MI.getOperand(0).getReg();
if (WideSize > RegTy.getSizeInBits()) {
Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
MIRBuilder.buildMerge(MergeReg, DstRegs);
MIRBuilder.buildTrunc(DstReg, MergeReg);
} else
MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
assert(MI.getNumOperands() == 3 && TypeIdx == 0);
SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
LLT LeftoverTy;
if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
Src0Regs, Src0LeftoverRegs))
return UnableToLegalize;
LLT Unused;
if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
Src1Regs, Src1LeftoverRegs))
llvm_unreachable("inconsistent extractParts result");
for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
{Src0Regs[I], Src1Regs[I]});
DstRegs.push_back(Inst.getReg(0));
}
for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
auto Inst = MIRBuilder.buildInstr(
MI.getOpcode(),
{LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
DstLeftoverRegs.push_back(Inst.getReg(0));
}
insertParts(DstReg, DstTy, NarrowTy, DstRegs,
LeftoverTy, DstLeftoverRegs);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 0)
return UnableToLegalize;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isVector())
return UnableToLegalize;
SmallVector<Register, 8> Parts;
LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 0)
return UnableToLegalize;
Register CondReg = MI.getOperand(1).getReg();
LLT CondTy = MRI.getType(CondReg);
if (CondTy.isVector()) return UnableToLegalize;
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
LLT LeftoverTy;
if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
Src1Regs, Src1LeftoverRegs))
return UnableToLegalize;
LLT Unused;
if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
Src2Regs, Src2LeftoverRegs))
llvm_unreachable("inconsistent extractParts result");
for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
auto Select = MIRBuilder.buildSelect(NarrowTy,
CondReg, Src1Regs[I], Src2Regs[I]);
DstRegs.push_back(Select.getReg(0));
}
for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
auto Select = MIRBuilder.buildSelect(
LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
DstLeftoverRegs.push_back(Select.getReg(0));
}
insertParts(DstReg, DstTy, NarrowTy, DstRegs,
LeftoverTy, DstLeftoverRegs);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 1)
return UnableToLegalize;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(SrcReg);
unsigned NarrowSize = NarrowTy.getSizeInBits();
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
MachineIRBuilder &B = MIRBuilder;
auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
auto C_0 = B.buildConstant(NarrowTy, 0);
auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
UnmergeSrc.getReg(1), C_0);
auto LoCTLZ = IsUndef ?
B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
MI.eraseFromParent();
return Legalized;
}
return UnableToLegalize;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 1)
return UnableToLegalize;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(SrcReg);
unsigned NarrowSize = NarrowTy.getSizeInBits();
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
MachineIRBuilder &B = MIRBuilder;
auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
auto C_0 = B.buildConstant(NarrowTy, 0);
auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
UnmergeSrc.getReg(0), C_0);
auto HiCTTZ = IsUndef ?
B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
MI.eraseFromParent();
return Legalized;
}
return UnableToLegalize;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 1)
return UnableToLegalize;
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
unsigned NarrowSize = NarrowTy.getSizeInBits();
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
MI.eraseFromParent();
return Legalized;
}
return UnableToLegalize;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitCount(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
const auto &TII = MIRBuilder.getTII();
auto isSupported = [this](const LegalityQuery &Q) {
auto QAction = LI.getAction(Q).Action;
return QAction == Legal || QAction == Libcall || QAction == Custom;
};
switch (Opc) {
default:
return UnableToLegalize;
case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
Observer.changingInstr(MI);
MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_CTLZ: {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(SrcReg);
unsigned Len = SrcTy.getSizeInBits();
if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
auto ICmp = MIRBuilder.buildICmp(
CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
MI.eraseFromParent();
return Legalized;
}
Register Op = SrcReg;
unsigned NewLen = PowerOf2Ceil(Len);
for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
auto MIBOp = MIRBuilder.buildOr(
SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
Op = MIBOp.getReg(0);
}
auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
MIBPop);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
Observer.changingInstr(MI);
MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_CTTZ: {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(SrcReg);
unsigned Len = SrcTy.getSizeInBits();
if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
auto ICmp = MIRBuilder.buildICmp(
CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
MI.eraseFromParent();
return Legalized;
}
auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
auto MIBTmp = MIRBuilder.buildAnd(
SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
MI.eraseFromParent();
return Legalized;
}
MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
MI.getOperand(1).setReg(MIBTmp.getReg(0));
return Legalized;
}
case TargetOpcode::G_CTPOP: {
Register SrcReg = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(SrcReg);
unsigned Size = Ty.getSizeInBits();
MachineIRBuilder &B = MIRBuilder;
auto C_1 = B.buildConstant(Ty, 1);
auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
auto C_2 = B.buildConstant(Ty, 2);
auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
auto C_4 = B.buildConstant(Ty, 4);
auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
MI.eraseFromParent();
return Legalized;
}
}
}
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
Register Reg, unsigned BW) {
return matchUnaryPredicate(
MRI, Reg,
[=](const Constant *C) {
const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
return !CI || CI->getValue().urem(BW) != 0;
},
true);
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register X = MI.getOperand(1).getReg();
Register Y = MI.getOperand(2).getReg();
Register Z = MI.getOperand(3).getReg();
LLT Ty = MRI.getType(Dst);
LLT ShTy = MRI.getType(Z);
unsigned BW = Ty.getScalarSizeInBits();
if (!isPowerOf2_32(BW))
return UnableToLegalize;
const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
auto Zero = MIRBuilder.buildConstant(ShTy, 0);
Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
} else {
auto One = MIRBuilder.buildConstant(ShTy, 1);
if (IsFSHL) {
Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
} else {
X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
}
Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
}
MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register X = MI.getOperand(1).getReg();
Register Y = MI.getOperand(2).getReg();
Register Z = MI.getOperand(3).getReg();
LLT Ty = MRI.getType(Dst);
LLT ShTy = MRI.getType(Z);
const unsigned BW = Ty.getScalarSizeInBits();
const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
Register ShX, ShY;
Register ShAmt, InvShAmt;
if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
} else {
auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
if (isPowerOf2_32(BW)) {
ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
auto NotZ = MIRBuilder.buildNot(ShTy, Z);
InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
} else {
auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
}
auto One = MIRBuilder.buildConstant(ShTy, 1);
if (IsFSHL) {
ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
} else {
auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
}
}
MIRBuilder.buildOr(Dst, ShX, ShY);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
return lowerFunnelShiftAsShifts(MI);
LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
if (Result == UnableToLegalize)
return lowerFunnelShiftAsShifts(MI);
return Result;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
Register Amt = MI.getOperand(2).getReg();
LLT AmtTy = MRI.getType(Amt);
auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
Register Amt = MI.getOperand(2).getReg();
LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);
LLT AmtTy = MRI.getType(Amt);
unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
MIRBuilder.setInstrAndDebugLoc(MI);
unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
isPowerOf2_32(EltSizeInBits))
return lowerRotateWithReverseRotate(MI);
unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
bool IsFShLegal = false;
if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
Register R3) {
MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
MI.eraseFromParent();
return Legalized;
};
if (IsFShLegal) {
return buildFunnelShift(FShOpc, Dst, Src, Amt);
} else if (isPowerOf2_32(EltSizeInBits)) {
Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
return buildFunnelShift(RevFsh, Dst, Src, Amt);
}
}
auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
Register ShVal;
Register RevShiftVal;
if (isPowerOf2_32(EltSizeInBits)) {
auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
RevShiftVal =
MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
} else {
auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
auto One = MIRBuilder.buildConstant(AmtTy, 1);
auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
RevShiftVal =
MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
}
MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
const LLT S1 = LLT::scalar(1);
assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
auto Zero32 = MIRBuilder.buildConstant(S32, 0);
auto Zero64 = MIRBuilder.buildConstant(S64, 0);
auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
auto Sub = MIRBuilder.buildSub(S32, K, LZ);
auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
auto T = MIRBuilder.buildAnd(S64, U, Mask1);
auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
auto One = MIRBuilder.buildConstant(S32, 1);
auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
MIRBuilder.buildAdd(Dst, V, R);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);
if (SrcTy == LLT::scalar(1)) {
auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
MIRBuilder.buildSelect(Dst, Src, True, False);
MI.eraseFromParent();
return Legalized;
}
if (SrcTy != LLT::scalar(64))
return UnableToLegalize;
if (DstTy == LLT::scalar(32)) {
return lowerU64ToF32BitOps(MI);
}
return UnableToLegalize;
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
const LLT S1 = LLT::scalar(1);
if (SrcTy == S1) {
auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
MIRBuilder.buildSelect(Dst, Src, True, False);
MI.eraseFromParent();
return Legalized;
}
if (SrcTy != S64)
return UnableToLegalize;
if (DstTy == S32) {
Register L = Src;
auto SignBit = MIRBuilder.buildConstant(S64, 63);
auto S = MIRBuilder.buildAShr(S64, L, SignBit);
auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
auto R = MIRBuilder.buildUITOFP(S32, Xor);
auto RNeg = MIRBuilder.buildFNeg(S32, R);
auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
MIRBuilder.buildConstant(S64, 0));
MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
MI.eraseFromParent();
return Legalized;
}
return UnableToLegalize;
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
if (SrcTy != S64 && SrcTy != S32)
return UnableToLegalize;
if (DstTy != S32 && DstTy != S64)
return UnableToLegalize;
APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
: APFloat::IEEEdouble(),
APInt::getZero(SrcTy.getSizeInBits()));
TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
const LLT S1 = LLT::scalar(1);
MachineInstrBuilder FCMP =
MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
return UnableToLegalize;
unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
auto SignMask = MIRBuilder.buildConstant(SrcTy,
APInt::getSignMask(SrcEltBits));
auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
Sign = MIRBuilder.buildSExt(DstTy, Sign);
auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
R = MIRBuilder.buildZExt(DstTy, R);
auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
const LLT S1 = LLT::scalar(1);
auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
S1, Exponent, ExponentLoBit);
R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
S1, Exponent, ZeroSrcTy);
auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
if (MRI.getType(Src).isVector()) return UnableToLegalize;
const unsigned ExpMask = 0x7ff;
const unsigned ExpBiasf64 = 1023;
const unsigned ExpBiasf16 = 15;
const LLT S32 = LLT::scalar(32);
const LLT S1 = LLT::scalar(1);
auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
Register U = Unmerge.getReg(0);
Register UH = Unmerge.getReg(1);
auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
E = MIRBuilder.buildAdd(
S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
MIRBuilder.buildConstant(S32, 0x1ff));
MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
auto Zero = MIRBuilder.buildConstant(S32, 0);
auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
M = MIRBuilder.buildOr(S32, M, Lo40Set);
auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
auto N = MIRBuilder.buildOr(S32, M, EShl12);
auto One = MIRBuilder.buildConstant(S32, 1);
auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
auto SigSetHigh = MIRBuilder.buildOr(S32, M,
MIRBuilder.buildConstant(S32, 0x1000));
auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
auto D0 = MIRBuilder.buildShl(S32, D, B);
auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
D0, SigSetHigh);
auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
D = MIRBuilder.buildOr(S32, D, D1);
auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
MIRBuilder.buildConstant(S32, 3));
auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
MIRBuilder.buildConstant(S32, 5));
auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
V1 = MIRBuilder.buildOr(S32, V0, V1);
V = MIRBuilder.buildAdd(S32, V, V1);
auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
E, MIRBuilder.buildConstant(S32, 30));
V = MIRBuilder.buildSelect(S32, CmpEGt30,
MIRBuilder.buildConstant(S32, 0x7c00), V);
auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
E, MIRBuilder.buildConstant(S32, 1039));
V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
V = MIRBuilder.buildOr(S32, Sign, V);
MIRBuilder.buildTrunc(Dst, V);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);
const LLT S64 = LLT::scalar(64);
const LLT S16 = LLT::scalar(16);
if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
return lowerFPTRUNC_F64_TO_F16(MI);
return UnableToLegalize;
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src0 = MI.getOperand(1).getReg();
Register Src1 = MI.getOperand(2).getReg();
LLT Ty = MRI.getType(Dst);
auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
MI.eraseFromParent();
return Legalized;
}
static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_SMIN:
return CmpInst::ICMP_SLT;
case TargetOpcode::G_SMAX:
return CmpInst::ICMP_SGT;
case TargetOpcode::G_UMIN:
return CmpInst::ICMP_ULT;
case TargetOpcode::G_UMAX:
return CmpInst::ICMP_UGT;
default:
llvm_unreachable("not in integer min/max");
}
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src0 = MI.getOperand(1).getReg();
Register Src1 = MI.getOperand(2).getReg();
const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
LLT CmpType = MRI.getType(Dst).changeElementSize(1);
auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src0 = MI.getOperand(1).getReg();
Register Src1 = MI.getOperand(2).getReg();
const LLT Src0Ty = MRI.getType(Src0);
const LLT Src1Ty = MRI.getType(Src1);
const int Src0Size = Src0Ty.getScalarSizeInBits();
const int Src1Size = Src1Ty.getScalarSizeInBits();
auto SignBitMask = MIRBuilder.buildConstant(
Src0Ty, APInt::getSignMask(Src0Size));
auto NotSignBitMask = MIRBuilder.buildConstant(
Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
Register And1;
if (Src0Ty == Src1Ty) {
And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
} else if (Src0Size > Src1Size) {
auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
} else {
auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
}
unsigned Flags = MI.getFlags();
MIRBuilder.buildOr(Dst, And0, And1, Flags);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
Register Dst = MI.getOperand(0).getReg();
Register Src0 = MI.getOperand(1).getReg();
Register Src1 = MI.getOperand(2).getReg();
LLT Ty = MRI.getType(Dst);
if (!MI.getFlag(MachineInstr::FmNoNans)) {
if (!isKnownNeverSNaN(Src0, MRI))
Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
if (!isKnownNeverSNaN(Src1, MRI))
Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
}
MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(DstReg);
unsigned Flags = MI.getFlags();
auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
Flags);
MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
Register X = MI.getOperand(1).getReg();
const unsigned Flags = MI.getFlags();
const LLT Ty = MRI.getType(DstReg);
const LLT CondTy = Ty.changeElementSize(1);
auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
auto One = MIRBuilder.buildFConstant(Ty, 1.0);
auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
Flags);
auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFFloor(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
unsigned Flags = MI.getFlags();
LLT Ty = MRI.getType(DstReg);
const LLT CondTy = Ty.changeElementSize(1);
auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
SrcReg, Zero, Flags);
auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
SrcReg, Trunc, Flags);
auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
const unsigned NumOps = MI.getNumOperands();
Register DstReg = MI.getOperand(0).getReg();
Register Src0Reg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(Src0Reg);
unsigned PartSize = SrcTy.getSizeInBits();
LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
for (unsigned I = 2; I != NumOps; ++I) {
const unsigned Offset = (I - 1) * PartSize;
Register SrcReg = MI.getOperand(I).getReg();
auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
MRI.createGenericVirtualRegister(WideTy);
auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
MIRBuilder.buildOr(NextResult, ResultReg, Shl);
ResultReg = NextResult;
}
if (DstTy.isPointer()) {
if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
DstTy.getAddressSpace())) {
LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
return UnableToLegalize;
}
MIRBuilder.buildIntToPtr(DstReg, ResultReg);
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
const unsigned NumDst = MI.getNumOperands() - 1;
Register SrcReg = MI.getOperand(NumDst).getReg();
Register Dst0Reg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(Dst0Reg);
if (DstTy.isPointer())
return UnableToLegalize;
SrcReg = coerceToScalar(SrcReg);
if (!SrcReg)
return UnableToLegalize;
LLT IntTy = MRI.getType(SrcReg);
MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
const unsigned DstSize = DstTy.getSizeInBits();
unsigned Offset = DstSize;
for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcVec = MI.getOperand(1).getReg();
Register InsertVal;
if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
InsertVal = MI.getOperand(2).getReg();
Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
LLT VecTy = MRI.getType(SrcVec);
LLT EltTy = VecTy.getElementType();
unsigned NumElts = VecTy.getNumElements();
int64_t IdxVal;
if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
SmallVector<Register, 8> SrcRegs;
extractParts(SrcVec, EltTy, NumElts, SrcRegs);
if (InsertVal) {
SrcRegs[IdxVal] = MI.getOperand(2).getReg();
MIRBuilder.buildMerge(DstReg, SrcRegs);
} else {
MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
}
MI.eraseFromParent();
return Legalized;
}
if (!EltTy.isByteSized()) { LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
return UnableToLegalize;
}
unsigned EltBytes = EltTy.getSizeInBytes();
Align VecAlign = getStackTemporaryAlignment(VecTy);
Align EltAlign;
MachinePointerInfo PtrInfo;
auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
VecAlign, PtrInfo);
MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
int64_t Offset = IdxVal * EltBytes;
PtrInfo = PtrInfo.getWithOffset(Offset);
EltAlign = commonAlignment(VecAlign, Offset);
} else {
EltAlign = getStackTemporaryAlignment(EltTy);
PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
}
if (InsertVal) {
MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
} else {
MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
Register Src0Reg = MI.getOperand(1).getReg();
Register Src1Reg = MI.getOperand(2).getReg();
LLT Src0Ty = MRI.getType(Src0Reg);
LLT DstTy = MRI.getType(DstReg);
LLT IdxTy = LLT::scalar(32);
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
if (DstTy.isScalar()) {
if (Src0Ty.isVector())
return UnableToLegalize;
assert(Mask.size() == 1 && "Expected a single mask element");
Register Val;
if (Mask[0] < 0 || Mask[0] > 1)
Val = MIRBuilder.buildUndef(DstTy).getReg(0);
else
Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
MIRBuilder.buildCopy(DstReg, Val);
MI.eraseFromParent();
return Legalized;
}
Register Undef;
SmallVector<Register, 32> BuildVec;
LLT EltTy = DstTy.getElementType();
for (int Idx : Mask) {
if (Idx < 0) {
if (!Undef.isValid())
Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
BuildVec.push_back(Undef);
continue;
}
if (Src0Ty.isScalar()) {
BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
} else {
int NumElts = Src0Ty.getNumElements();
Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
BuildVec.push_back(Extract.getReg(0));
}
}
MIRBuilder.buildBuildVector(DstReg, BuildVec);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
const auto &MF = *MI.getMF();
const auto &TFI = *MF.getSubtarget().getFrameLowering();
if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
return UnableToLegalize;
Register Dst = MI.getOperand(0).getReg();
Register AllocSize = MI.getOperand(1).getReg();
Align Alignment = assumeAligned(MI.getOperand(2).getImm());
LLT PtrTy = MRI.getType(Dst);
LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
if (Alignment > Align(1)) {
APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
AlignMask.negate();
auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
}
SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
MIRBuilder.buildCopy(SPReg, SPTmp);
MIRBuilder.buildCopy(Dst, SPTmp);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerExtract(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
unsigned Offset = MI.getOperand(2).getImm();
LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);
if (SrcTy.isVector()) {
unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
unsigned DstSize = DstTy.getSizeInBits();
if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
(Offset + DstSize <= SrcTy.getSizeInBits())) {
auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), Src);
SmallVector<Register, 8> SubVectorElts;
for (unsigned Idx = Offset / SrcEltSize;
Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
SubVectorElts.push_back(Unmerge.getReg(Idx));
}
if (SubVectorElts.size() == 1)
MIRBuilder.buildCopy(Dst, SubVectorElts[0]);
else
MIRBuilder.buildMerge(Dst, SubVectorElts);
MI.eraseFromParent();
return Legalized;
}
}
if (DstTy.isScalar() &&
(SrcTy.isScalar() ||
(SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
LLT SrcIntTy = SrcTy;
if (!SrcTy.isScalar()) {
SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
}
if (Offset == 0)
MIRBuilder.buildTrunc(Dst, Src);
else {
auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
MIRBuilder.buildTrunc(Dst, Shr);
}
MI.eraseFromParent();
return Legalized;
}
return UnableToLegalize;
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
Register InsertSrc = MI.getOperand(2).getReg();
uint64_t Offset = MI.getOperand(3).getImm();
LLT DstTy = MRI.getType(Src);
LLT InsertTy = MRI.getType(InsertSrc);
if (DstTy.isVector() && !InsertTy.isPointer()) {
LLT EltTy = DstTy.getElementType();
unsigned EltSize = EltTy.getSizeInBits();
unsigned InsertSize = InsertTy.getSizeInBits();
if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
(Offset + InsertSize <= DstTy.getSizeInBits())) {
auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
SmallVector<Register, 8> DstElts;
unsigned Idx = 0;
for (; Idx < Offset / EltSize; ++Idx) {
DstElts.push_back(UnmergeSrc.getReg(Idx));
}
if (InsertTy.getSizeInBits() > EltSize) {
auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
++Idx, ++i) {
DstElts.push_back(UnmergeInsertSrc.getReg(i));
}
} else {
DstElts.push_back(InsertSrc);
++Idx;
}
for (; Idx < DstTy.getNumElements(); ++Idx) {
DstElts.push_back(UnmergeSrc.getReg(Idx));
}
MIRBuilder.buildMerge(Dst, DstElts);
MI.eraseFromParent();
return Legalized;
}
}
if (InsertTy.isVector() ||
(DstTy.isVector() && DstTy.getElementType() != InsertTy))
return UnableToLegalize;
const DataLayout &DL = MIRBuilder.getDataLayout();
if ((DstTy.isPointer() &&
DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
(InsertTy.isPointer() &&
DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
return UnableToLegalize;
}
LLT IntDstTy = DstTy;
if (!DstTy.isScalar()) {
IntDstTy = LLT::scalar(DstTy.getSizeInBits());
Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
}
if (!InsertTy.isScalar()) {
const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
}
Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
if (Offset != 0) {
auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
}
APInt MaskVal = APInt::getBitsSetWithWrap(
DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
MIRBuilder.buildCast(Dst, Or);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
Register Dst0 = MI.getOperand(0).getReg();
Register Dst1 = MI.getOperand(1).getReg();
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
LLT Ty = MRI.getType(Dst0);
LLT BoolTy = MRI.getType(Dst1);
if (IsAdd)
MIRBuilder.buildAdd(Dst0, LHS, RHS);
else
MIRBuilder.buildSub(Dst0, LHS, RHS);
auto Zero = MIRBuilder.buildConstant(Ty, 0);
auto ResultLowerThanLHS =
MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
auto ConditionRHS = MIRBuilder.buildICmp(
IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
Register Res = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
LLT Ty = MRI.getType(Res);
bool IsSigned;
bool IsAdd;
unsigned BaseOp;
switch (MI.getOpcode()) {
default:
llvm_unreachable("unexpected addsat/subsat opcode");
case TargetOpcode::G_UADDSAT:
IsSigned = false;
IsAdd = true;
BaseOp = TargetOpcode::G_ADD;
break;
case TargetOpcode::G_SADDSAT:
IsSigned = true;
IsAdd = true;
BaseOp = TargetOpcode::G_ADD;
break;
case TargetOpcode::G_USUBSAT:
IsSigned = false;
IsAdd = false;
BaseOp = TargetOpcode::G_SUB;
break;
case TargetOpcode::G_SSUBSAT:
IsSigned = true;
IsAdd = false;
BaseOp = TargetOpcode::G_SUB;
break;
}
if (IsSigned) {
uint64_t NumBits = Ty.getScalarSizeInBits();
auto MaxVal =
MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
auto MinVal =
MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
MachineInstrBuilder Hi, Lo;
if (IsAdd) {
auto Zero = MIRBuilder.buildConstant(Ty, 0);
Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
} else {
auto NegOne = MIRBuilder.buildConstant(Ty, -1);
Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
MaxVal);
Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
MinVal);
}
auto RHSClamped =
MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
} else {
Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
Register Res = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
LLT Ty = MRI.getType(Res);
LLT BoolTy = Ty.changeElementSize(1);
bool IsSigned;
bool IsAdd;
unsigned OverflowOp;
switch (MI.getOpcode()) {
default:
llvm_unreachable("unexpected addsat/subsat opcode");
case TargetOpcode::G_UADDSAT:
IsSigned = false;
IsAdd = true;
OverflowOp = TargetOpcode::G_UADDO;
break;
case TargetOpcode::G_SADDSAT:
IsSigned = true;
IsAdd = true;
OverflowOp = TargetOpcode::G_SADDO;
break;
case TargetOpcode::G_USUBSAT:
IsSigned = false;
IsAdd = false;
OverflowOp = TargetOpcode::G_USUBO;
break;
case TargetOpcode::G_SSUBSAT:
IsSigned = true;
IsAdd = false;
OverflowOp = TargetOpcode::G_SSUBO;
break;
}
auto OverflowRes =
MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
Register Tmp = OverflowRes.getReg(0);
Register Ov = OverflowRes.getReg(1);
MachineInstrBuilder Clamp;
if (IsSigned) {
uint64_t NumBits = Ty.getScalarSizeInBits();
auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
auto MinVal =
MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
} else {
Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
}
MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerShlSat(MachineInstr &MI) {
assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
"Expected shlsat opcode!");
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
Register Res = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
LLT Ty = MRI.getType(Res);
LLT BoolTy = Ty.changeElementSize(1);
unsigned BW = Ty.getScalarSizeInBits();
auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
: MIRBuilder.buildLShr(Ty, Result, RHS);
MachineInstrBuilder SatVal;
if (IsSigned) {
auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
MIRBuilder.buildConstant(Ty, 0));
SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
} else {
SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
}
auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBswap(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
const LLT Ty = MRI.getType(Src);
unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
auto Mask = MIRBuilder.buildConstant(Ty, APMask);
auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
}
Res.getInstr()->getOperand(0).setReg(Dst);
MI.eraseFromParent();
return Legalized;
}
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
MachineInstrBuilder Src, APInt Mask) {
const LLT Ty = Dst.getLLTTy(*B.getMRI());
MachineInstrBuilder C_N = B.buildConstant(Ty, N);
MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
return B.buildOr(Dst, LHS, RHS);
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
const LLT Ty = MRI.getType(Src);
unsigned Size = Ty.getSizeInBits();
MachineInstrBuilder BSWAP =
MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
MachineInstrBuilder Swap4 =
SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
MachineInstrBuilder Swap2 =
SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
MachineFunction &MF = MIRBuilder.getMF();
bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
int NameOpIdx = IsRead ? 1 : 0;
int ValRegIndex = IsRead ? 0 : 1;
Register ValReg = MI.getOperand(ValRegIndex).getReg();
const LLT Ty = MRI.getType(ValReg);
const MDString *RegStr = cast<MDString>(
cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
if (!PhysReg.isValid())
return UnableToLegalize;
if (IsRead)
MIRBuilder.buildCopy(ValReg, PhysReg);
else
MIRBuilder.buildCopy(PhysReg, ValReg);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
Register Result = MI.getOperand(0).getReg();
LLT OrigTy = MRI.getType(Result);
auto SizeInBits = OrigTy.getScalarSizeInBits();
LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
MIRBuilder.buildTrunc(Result, Shifted);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
Register MaskReg = MI.getOperand(1).getReg();
Register Op1Reg = MI.getOperand(2).getReg();
Register Op2Reg = MI.getOperand(3).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT MaskTy = MRI.getType(MaskReg);
if (!DstTy.isVector())
return UnableToLegalize;
if (MaskTy.isScalar()) {
Register MaskElt = MaskReg;
if (MaskTy.getSizeInBits() <= DstTy.getScalarSizeInBits() &&
MaskTy != LLT::scalar(1)) {
MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
}
MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(),
MaskElt).getReg(0);
auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
MaskReg = ShufSplat.getReg(0);
MaskTy = DstTy;
}
if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
return UnableToLegalize;
}
auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
unsigned Opcode = MI.getOpcode();
MIRBuilder.buildInstr(
Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
: TargetOpcode::G_UDIV,
{MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
MIRBuilder.buildInstr(
Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
: TargetOpcode::G_UREM,
{MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) {
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
Register OpReg = MI.getOperand(1).getReg();
auto ShiftAmt =
MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
Register SrcReg = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(SrcReg);
auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
Register SrcReg = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
LLT DstTy = MRI.getType(SrcReg);
if (SrcTy.isScalar()) {
if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
return UnableToLegalize; Observer.changingInstr(MI);
MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
Observer.changedInstr(MI);
return Legalized;
}
return UnableToLegalize;;
}
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
if (MF.getTarget().getTargetTriple().isOSDarwin())
return MF.getFunction().hasMinSize();
return MF.getFunction().hasOptSize();
}
static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
unsigned Limit, const MemOp &Op,
unsigned DstAS, unsigned SrcAS,
const AttributeList &FuncAttributes,
const TargetLowering &TLI) {
if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
return false;
LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
if (Ty == LLT()) {
Ty = LLT::scalar(64);
if (Op.isFixedDstAlign())
while (Op.getDstAlign() < Ty.getSizeInBytes() &&
!TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
Ty = LLT::scalar(Ty.getSizeInBytes());
assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
}
unsigned NumMemOps = 0;
uint64_t Size = Op.size();
while (Size) {
unsigned TySize = Ty.getSizeInBytes();
while (TySize > Size) {
LLT NewTy = Ty;
if (NewTy.isVector())
NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
unsigned NewTySize = NewTy.getSizeInBytes();
assert(NewTySize > 0 && "Could not find appropriate type");
bool Fast;
MVT VT = getMVTForLLT(Ty);
if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
TLI.allowsMisalignedMemoryAccesses(
VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
MachineMemOperand::MONone, &Fast) &&
Fast)
TySize = Size;
else {
Ty = NewTy;
TySize = NewTySize;
}
}
if (++NumMemOps > Limit)
return false;
MemOps.push_back(Ty);
Size -= TySize;
}
return true;
}
static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
if (Ty.isVector())
return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
Ty.getNumElements());
return IntegerType::get(C, Ty.getSizeInBits());
}
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
MachineRegisterInfo &MRI = *MIB.getMRI();
unsigned NumBits = Ty.getScalarSizeInBits();
auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
if (!Ty.isVector() && ValVRegAndVal) {
APInt Scalar = ValVRegAndVal->Value.trunc(8);
APInt SplatVal = APInt::getSplat(NumBits, Scalar);
return MIB.buildConstant(Ty, SplatVal).getReg(0);
}
if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
return MIB.buildConstant(Ty, 0).getReg(0);
}
LLT ExtType = Ty.getScalarType();
auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
if (NumBits > 8) {
APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
auto MagicMI = MIB.buildConstant(ExtType, Magic);
Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
}
if (Ty.isVector())
Val = MIB.buildSplatVector(Ty, Val).getReg(0);
return Val;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
uint64_t KnownLen, Align Alignment,
bool IsVolatile) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
auto &DL = MF.getDataLayout();
LLVMContext &C = MF.getFunction().getContext();
assert(KnownLen != 0 && "Have a zero length memset length!");
bool DstAlignCanChange = false;
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF);
MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
DstAlignCanChange = true;
unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
std::vector<LLT> MemOps;
const auto &DstMMO = **MI.memoperands_begin();
MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
if (!findGISelOptimalMemOpLowering(MemOps, Limit,
MemOp::Set(KnownLen, DstAlignCanChange,
Alignment,
IsZeroVal,
IsVolatile),
DstPtrInfo.getAddrSpace(), ~0u,
MF.getFunction().getAttributes(), TLI))
return UnableToLegalize;
if (DstAlignCanChange) {
Type *IRTy = getTypeForLLT(MemOps[0], C);
Align NewAlign = DL.getABITypeAlign(IRTy);
if (NewAlign > Alignment) {
Alignment = NewAlign;
unsigned FI = FIDef->getOperand(1).getIndex();
if (MFI.getObjectAlign(FI) < Alignment)
MFI.setObjectAlignment(FI, Alignment);
}
}
MachineIRBuilder MIB(MI);
LLT LargestTy = MemOps[0];
for (unsigned i = 1; i < MemOps.size(); i++)
if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
LargestTy = MemOps[i];
Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
if (!MemSetValue)
return UnableToLegalize;
LLT PtrTy = MRI.getType(Dst);
unsigned DstOff = 0;
unsigned Size = KnownLen;
for (unsigned I = 0; I < MemOps.size(); I++) {
LLT Ty = MemOps[I];
unsigned TySize = Ty.getSizeInBytes();
if (TySize > Size) {
assert(I == MemOps.size() - 1 && I != 0);
DstOff -= TySize - Size;
}
Register Value = MemSetValue;
if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
MVT VT = getMVTForLLT(Ty);
MVT LargestVT = getMVTForLLT(LargestTy);
if (!LargestTy.isVector() && !Ty.isVector() &&
TLI.isTruncateFree(LargestVT, VT))
Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
else
Value = getMemsetValue(Val, Ty, MIB);
if (!Value)
return UnableToLegalize;
}
auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
Register Ptr = Dst;
if (DstOff != 0) {
auto Offset =
MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
}
MIB.buildStore(Value, Ptr, *StoreMMO);
DstOff += Ty.getSizeInBytes();
Size -= TySize;
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
Register Len = MI.getOperand(2).getReg();
const auto *MMOIt = MI.memoperands_begin();
const MachineMemOperand *MemOp = *MMOIt;
bool IsVolatile = MemOp->isVolatile();
auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
assert(LenVRegAndVal &&
"inline memcpy with dynamic size is not yet supported");
uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
if (KnownLen == 0) {
MI.eraseFromParent();
return Legalized;
}
const auto &DstMMO = **MI.memoperands_begin();
const auto &SrcMMO = **std::next(MI.memoperands_begin());
Align DstAlign = DstMMO.getBaseAlign();
Align SrcAlign = SrcMMO.getBaseAlign();
return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
IsVolatile);
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
uint64_t KnownLen, Align DstAlign,
Align SrcAlign, bool IsVolatile) {
assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
return lowerMemcpy(MI, Dst, Src, KnownLen,
std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
IsVolatile);
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
uint64_t KnownLen, uint64_t Limit, Align DstAlign,
Align SrcAlign, bool IsVolatile) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
auto &DL = MF.getDataLayout();
LLVMContext &C = MF.getFunction().getContext();
assert(KnownLen != 0 && "Have a zero length memcpy length!");
bool DstAlignCanChange = false;
MachineFrameInfo &MFI = MF.getFrameInfo();
Align Alignment = std::min(DstAlign, SrcAlign);
MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
DstAlignCanChange = true;
std::vector<LLT> MemOps;
const auto &DstMMO = **MI.memoperands_begin();
const auto &SrcMMO = **std::next(MI.memoperands_begin());
MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
if (!findGISelOptimalMemOpLowering(
MemOps, Limit,
MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
IsVolatile),
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
MF.getFunction().getAttributes(), TLI))
return UnableToLegalize;
if (DstAlignCanChange) {
Type *IRTy = getTypeForLLT(MemOps[0], C);
Align NewAlign = DL.getABITypeAlign(IRTy);
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->hasStackRealignment(MF))
while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
NewAlign = NewAlign.previous();
if (NewAlign > Alignment) {
Alignment = NewAlign;
unsigned FI = FIDef->getOperand(1).getIndex();
if (MFI.getObjectAlign(FI) < Alignment)
MFI.setObjectAlignment(FI, Alignment);
}
}
LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
MachineIRBuilder MIB(MI);
unsigned CurrOffset = 0;
unsigned Size = KnownLen;
for (auto CopyTy : MemOps) {
if (CopyTy.getSizeInBytes() > Size)
CurrOffset -= CopyTy.getSizeInBytes() - Size;
auto *LoadMMO =
MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
auto *StoreMMO =
MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
Register LoadPtr = Src;
Register Offset;
if (CurrOffset != 0) {
LLT SrcTy = MRI.getType(Src);
Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
.getReg(0);
LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
}
auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
Register StorePtr = Dst;
if (CurrOffset != 0) {
LLT DstTy = MRI.getType(Dst);
StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
}
MIB.buildStore(LdVal, StorePtr, *StoreMMO);
CurrOffset += CopyTy.getSizeInBytes();
Size -= CopyTy.getSizeInBytes();
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
uint64_t KnownLen, Align DstAlign, Align SrcAlign,
bool IsVolatile) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
auto &DL = MF.getDataLayout();
LLVMContext &C = MF.getFunction().getContext();
assert(KnownLen != 0 && "Have a zero length memmove length!");
bool DstAlignCanChange = false;
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF);
Align Alignment = std::min(DstAlign, SrcAlign);
MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
DstAlignCanChange = true;
unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
std::vector<LLT> MemOps;
const auto &DstMMO = **MI.memoperands_begin();
const auto &SrcMMO = **std::next(MI.memoperands_begin());
MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
if (!findGISelOptimalMemOpLowering(
MemOps, Limit,
MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
true),
DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
MF.getFunction().getAttributes(), TLI))
return UnableToLegalize;
if (DstAlignCanChange) {
Type *IRTy = getTypeForLLT(MemOps[0], C);
Align NewAlign = DL.getABITypeAlign(IRTy);
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->hasStackRealignment(MF))
while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
NewAlign = NewAlign.previous();
if (NewAlign > Alignment) {
Alignment = NewAlign;
unsigned FI = FIDef->getOperand(1).getIndex();
if (MFI.getObjectAlign(FI) < Alignment)
MFI.setObjectAlignment(FI, Alignment);
}
}
LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
MachineIRBuilder MIB(MI);
unsigned CurrOffset = 0;
SmallVector<Register, 16> LoadVals;
for (auto CopyTy : MemOps) {
auto *LoadMMO =
MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
Register LoadPtr = Src;
if (CurrOffset != 0) {
LLT SrcTy = MRI.getType(Src);
auto Offset =
MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
}
LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
CurrOffset += CopyTy.getSizeInBytes();
}
CurrOffset = 0;
for (unsigned I = 0; I < MemOps.size(); ++I) {
LLT CopyTy = MemOps[I];
auto *StoreMMO =
MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
Register StorePtr = Dst;
if (CurrOffset != 0) {
LLT DstTy = MRI.getType(Dst);
auto Offset =
MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
}
MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
CurrOffset += CopyTy.getSizeInBytes();
}
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
const unsigned Opc = MI.getOpcode();
assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
Opc == TargetOpcode::G_MEMSET) &&
"Expected memcpy like instruction");
auto MMOIt = MI.memoperands_begin();
const MachineMemOperand *MemOp = *MMOIt;
Align DstAlign = MemOp->getBaseAlign();
Align SrcAlign;
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
Register Len = MI.getOperand(2).getReg();
if (Opc != TargetOpcode::G_MEMSET) {
assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
MemOp = *(++MMOIt);
SrcAlign = MemOp->getBaseAlign();
}
auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
if (!LenVRegAndVal)
return UnableToLegalize;
uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
if (KnownLen == 0) {
MI.eraseFromParent();
return Legalized;
}
bool IsVolatile = MemOp->isVolatile();
if (Opc == TargetOpcode::G_MEMCPY_INLINE)
return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
IsVolatile);
if (IsVolatile)
return UnableToLegalize;
if (MaxLen && KnownLen > MaxLen)
return UnableToLegalize;
if (Opc == TargetOpcode::G_MEMCPY) {
auto &MF = *MI.getParent()->getParent();
const auto &TLI = *MF.getSubtarget().getTargetLowering();
bool OptSize = shouldLowerMemFuncForSize(MF);
uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
IsVolatile);
}
if (Opc == TargetOpcode::G_MEMMOVE)
return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
if (Opc == TargetOpcode::G_MEMSET)
return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
return UnableToLegalize;
}