#include "AArch64GlobalISelUtils.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterBankInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "aarch64-isel"
using namespace llvm;
using namespace MIPatternMatch;
using namespace AArch64GISelUtils;
namespace llvm {
class BlockFrequencyInfo;
class ProfileSummaryInfo;
}
namespace {
#define GET_GLOBALISEL_PREDICATE_BITSET
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATE_BITSET
class AArch64InstructionSelector : public InstructionSelector {
public:
AArch64InstructionSelector(const AArch64TargetMachine &TM,
const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI);
bool select(MachineInstr &I) override;
static const char *getName() { return DEBUG_TYPE; }
void setupMF(MachineFunction &MF, GISelKnownBits *KB,
CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) override {
InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
MIB.setMF(MF);
ProduceNonFlagSettingCondBr =
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
MFReturnAddr = Register();
processPHIs(MF);
}
private:
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
bool preISelLower(MachineInstr &I);
bool earlySelect(MachineInstr &I);
void processPHIs(MachineFunction &MF);
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
bool contractCrossBankCopyIntoStore(MachineInstr &I,
MachineRegisterInfo &MRI);
bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
MachineIRBuilder &MIB) const;
bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
MachineIRBuilder &MIB) const;
bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
MachineIRBuilder &MIB) const;
bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI);
bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
MachineInstr *emitScalarToVector(unsigned EltSize,
const TargetRegisterClass *DstRC,
Register Scalar,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
Register EltReg, unsigned LaneIdx,
const RegisterBank &RB,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitConstantVector(Register Dst, Constant *CV,
MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI);
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
MachineRegisterInfo &MRI);
bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
MachineInstr &I);
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineRegisterInfo &MRI);
bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
Register Op2,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitFPCompare(Register LHS, Register RHS,
MachineIRBuilder &MIRBuilder,
Optional<CmpInst::Predicate> = None) const;
MachineInstr *emitInstr(unsigned Opcode,
std::initializer_list<llvm::DstOp> DstOps,
std::initializer_list<llvm::SrcOp> SrcOps,
MachineIRBuilder &MIRBuilder,
const ComplexRendererFns &RenderFns = None) const;
MachineInstr *emitAddSub(
const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
AArch64CC::CondCode CC,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
const RegisterBank &DstRB, LLT ScalarTy,
Register VecReg, unsigned LaneIdx,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
AArch64CC::CondCode Pred,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
MachineIRBuilder &MIRBuilder) const;
std::pair<MachineInstr *, AArch64CC::CondCode>
emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
MachineIRBuilder &MIB) const;
MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
CmpInst::Predicate CC,
AArch64CC::CondCode Predicate,
AArch64CC::CondCode OutCC,
MachineIRBuilder &MIB) const;
MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
bool Negate, Register CCOp,
AArch64CC::CondCode Predicate,
MachineIRBuilder &MIB) const;
MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
MachineBasicBlock *DestMBB,
MachineIRBuilder &MIB) const;
ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
unsigned Size) const;
ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 1);
}
ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 2);
}
ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 4);
}
ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 8);
}
ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
return selectAddrModeUnscaled(Root, 16);
}
ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
MachineRegisterInfo &MRI) const;
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
unsigned Size) const;
template <int Width>
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
return selectAddrModeIndexed(Root, Width / 8);
}
bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
const MachineRegisterInfo &MRI) const;
ComplexRendererFns
selectAddrModeShiftedExtendXReg(MachineOperand &Root,
unsigned SizeInBytes) const;
ComplexRendererFns
selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
MachineOperand &Offset, unsigned SizeInBytes,
bool WantsExt) const;
ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
unsigned SizeInBytes) const;
template <int Width>
ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
return selectAddrModeXRO(Root, Width / 8);
}
ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
unsigned SizeInBytes) const;
template <int Width>
ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
return selectAddrModeWRO(Root, Width / 8);
}
ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
bool AllowROR = false) const;
ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
return selectShiftedRegister(Root);
}
ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
return selectShiftedRegister(Root, true);
}
AArch64_AM::ShiftExtendType
getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
bool IsLoadStore = false) const;
Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
MachineIRBuilder &MIB) const;
ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
int OpIdx = -1) const;
void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
int OpIdx = -1) const;
void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx = -1) const;
void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
bool tryOptSelect(GSelect &Sel);
bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
bool isDef32(const MachineInstr &MI) const;
const AArch64TargetMachine &TM;
const AArch64Subtarget &STI;
const AArch64InstrInfo &TII;
const AArch64RegisterInfo &TRI;
const AArch64RegisterBankInfo &RBI;
bool ProduceNonFlagSettingCondBr = false;
Register MFReturnAddr;
MachineIRBuilder MIB;
#define GET_GLOBALISEL_PREDICATES_DECL
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_DECL
#define GET_GLOBALISEL_TEMPORARIES_DECL
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_TEMPORARIES_DECL
};
}
#define GET_GLOBALISEL_IMPL
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_IMPL
AArch64InstructionSelector::AArch64InstructionSelector(
const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI)
: TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
RBI(RBI),
#define GET_GLOBALISEL_PREDICATES_INIT
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_TEMPORARIES_INIT
{
}
static const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
bool GetAllRegSet = false) {
if (RB.getID() == AArch64::GPRRegBankID) {
if (Ty.getSizeInBits() <= 32)
return GetAllRegSet ? &AArch64::GPR32allRegClass
: &AArch64::GPR32RegClass;
if (Ty.getSizeInBits() == 64)
return GetAllRegSet ? &AArch64::GPR64allRegClass
: &AArch64::GPR64RegClass;
if (Ty.getSizeInBits() == 128)
return &AArch64::XSeqPairsClassRegClass;
return nullptr;
}
if (RB.getID() == AArch64::FPRRegBankID) {
switch (Ty.getSizeInBits()) {
case 8:
return &AArch64::FPR8RegClass;
case 16:
return &AArch64::FPR16RegClass;
case 32:
return &AArch64::FPR32RegClass;
case 64:
return &AArch64::FPR64RegClass;
case 128:
return &AArch64::FPR128RegClass;
}
return nullptr;
}
return nullptr;
}
static const TargetRegisterClass *
getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
bool GetAllRegSet = false) {
unsigned RegBankID = RB.getID();
if (RegBankID == AArch64::GPRRegBankID) {
if (SizeInBits <= 32)
return GetAllRegSet ? &AArch64::GPR32allRegClass
: &AArch64::GPR32RegClass;
if (SizeInBits == 64)
return GetAllRegSet ? &AArch64::GPR64allRegClass
: &AArch64::GPR64RegClass;
if (SizeInBits == 128)
return &AArch64::XSeqPairsClassRegClass;
}
if (RegBankID == AArch64::FPRRegBankID) {
switch (SizeInBits) {
default:
return nullptr;
case 8:
return &AArch64::FPR8RegClass;
case 16:
return &AArch64::FPR16RegClass;
case 32:
return &AArch64::FPR32RegClass;
case 64:
return &AArch64::FPR64RegClass;
case 128:
return &AArch64::FPR128RegClass;
}
}
return nullptr;
}
static bool getSubRegForClass(const TargetRegisterClass *RC,
const TargetRegisterInfo &TRI, unsigned &SubReg) {
switch (TRI.getRegSizeInBits(*RC)) {
case 8:
SubReg = AArch64::bsub;
break;
case 16:
SubReg = AArch64::hsub;
break;
case 32:
if (RC != &AArch64::FPR32RegClass)
SubReg = AArch64::sub_32;
else
SubReg = AArch64::ssub;
break;
case 64:
SubReg = AArch64::dsub;
break;
default:
LLVM_DEBUG(
dbgs() << "Couldn't find appropriate subregister for register class.");
return false;
}
return true;
}
static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
switch (RB.getID()) {
case AArch64::GPRRegBankID:
return 32;
case AArch64::FPRRegBankID:
return 8;
default:
llvm_unreachable("Tried to get minimum size for unknown register bank.");
}
}
static Register createTuple(ArrayRef<Register> Regs,
const unsigned RegClassIDs[],
const unsigned SubRegs[], MachineIRBuilder &MIB) {
unsigned NumRegs = Regs.size();
if (NumRegs == 1)
return Regs[0];
assert(NumRegs >= 2 && NumRegs <= 4 &&
"Only support between two and 4 registers in a tuple!");
const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
auto RegSequence =
MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
RegSequence.addUse(Regs[I]);
RegSequence.addImm(SubRegs[I]);
}
return RegSequence.getReg(0);
}
static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
static const unsigned RegClassIDs[] = {
AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
AArch64::dsub2, AArch64::dsub3};
return createTuple(Regs, RegClassIDs, SubRegs, MIB);
}
static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
static const unsigned RegClassIDs[] = {
AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
AArch64::qsub2, AArch64::qsub3};
return createTuple(Regs, RegClassIDs, SubRegs, MIB);
}
static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
auto &MI = *Root.getParent();
auto &MBB = *MI.getParent();
auto &MF = *MBB.getParent();
auto &MRI = MF.getRegInfo();
uint64_t Immed;
if (Root.isImm())
Immed = Root.getImm();
else if (Root.isCImm())
Immed = Root.getCImm()->getZExtValue();
else if (Root.isReg()) {
auto ValAndVReg =
getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
if (!ValAndVReg)
return None;
Immed = ValAndVReg->Value.getSExtValue();
} else
return None;
return Immed;
}
static bool unsupportedBinOp(const MachineInstr &I,
const AArch64RegisterBankInfo &RBI,
const MachineRegisterInfo &MRI,
const AArch64RegisterInfo &TRI) {
LLT Ty = MRI.getType(I.getOperand(0).getReg());
if (!Ty.isValid()) {
LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
return true;
}
const RegisterBank *PrevOpBank = nullptr;
for (auto &MO : I.operands()) {
if (!MO.isReg()) {
LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
return true;
}
if (!Register::isVirtualRegister(MO.getReg())) {
LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
return true;
}
const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
if (!OpBank) {
LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
return true;
}
if (PrevOpBank && OpBank != PrevOpBank) {
LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
return true;
}
PrevOpBank = OpBank;
}
return false;
}
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
unsigned OpSize) {
switch (RegBankID) {
case AArch64::GPRRegBankID:
if (OpSize == 32) {
switch (GenericOpc) {
case TargetOpcode::G_SHL:
return AArch64::LSLVWr;
case TargetOpcode::G_LSHR:
return AArch64::LSRVWr;
case TargetOpcode::G_ASHR:
return AArch64::ASRVWr;
default:
return GenericOpc;
}
} else if (OpSize == 64) {
switch (GenericOpc) {
case TargetOpcode::G_PTR_ADD:
return AArch64::ADDXrr;
case TargetOpcode::G_SHL:
return AArch64::LSLVXr;
case TargetOpcode::G_LSHR:
return AArch64::LSRVXr;
case TargetOpcode::G_ASHR:
return AArch64::ASRVXr;
default:
return GenericOpc;
}
}
break;
case AArch64::FPRRegBankID:
switch (OpSize) {
case 32:
switch (GenericOpc) {
case TargetOpcode::G_FADD:
return AArch64::FADDSrr;
case TargetOpcode::G_FSUB:
return AArch64::FSUBSrr;
case TargetOpcode::G_FMUL:
return AArch64::FMULSrr;
case TargetOpcode::G_FDIV:
return AArch64::FDIVSrr;
default:
return GenericOpc;
}
case 64:
switch (GenericOpc) {
case TargetOpcode::G_FADD:
return AArch64::FADDDrr;
case TargetOpcode::G_FSUB:
return AArch64::FSUBDrr;
case TargetOpcode::G_FMUL:
return AArch64::FMULDrr;
case TargetOpcode::G_FDIV:
return AArch64::FDIVDrr;
case TargetOpcode::G_OR:
return AArch64::ORRv8i8;
default:
return GenericOpc;
}
}
break;
}
return GenericOpc;
}
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
unsigned OpSize) {
const bool isStore = GenericOpc == TargetOpcode::G_STORE;
switch (RegBankID) {
case AArch64::GPRRegBankID:
switch (OpSize) {
case 8:
return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
case 16:
return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
case 32:
return isStore ? AArch64::STRWui : AArch64::LDRWui;
case 64:
return isStore ? AArch64::STRXui : AArch64::LDRXui;
}
break;
case AArch64::FPRRegBankID:
switch (OpSize) {
case 8:
return isStore ? AArch64::STRBui : AArch64::LDRBui;
case 16:
return isStore ? AArch64::STRHui : AArch64::LDRHui;
case 32:
return isStore ? AArch64::STRSui : AArch64::LDRSui;
case 64:
return isStore ? AArch64::STRDui : AArch64::LDRDui;
case 128:
return isStore ? AArch64::STRQui : AArch64::LDRQui;
}
break;
}
return GenericOpc;
}
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
const RegisterBankInfo &RBI, Register SrcReg,
const TargetRegisterClass *To, unsigned SubReg) {
assert(SrcReg.isValid() && "Expected a valid source register?");
assert(To && "Destination register class cannot be null");
assert(SubReg && "Expected a valid subregister");
MachineIRBuilder MIB(I);
auto SubRegCopy =
MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
MachineOperand &RegOp = I.getOperand(1);
RegOp.setReg(SubRegCopy.getReg(0));
if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
return true;
}
static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
SrcSize = DstSize = 32;
return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
getMinClassForRegBank(DstRegBank, DstSize, true)};
}
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
const TargetRegisterClass *SrcRC;
const TargetRegisterClass *DstRC;
std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
if (!DstRC) {
LLVM_DEBUG(dbgs() << "Unexpected dest size "
<< RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
return false;
}
if (I.isCopy()) {
if (!SrcRC) {
LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
return false;
}
unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
unsigned SubReg;
if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
const TargetRegisterClass *DstTempRC =
getMinClassForRegBank(DstRegBank, SrcSize, true);
getSubRegForClass(DstRC, TRI, SubReg);
MachineIRBuilder MIB(I);
auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
} else if (SrcSize > DstSize) {
const TargetRegisterClass *SubRegRC =
getMinClassForRegBank(SrcRegBank, DstSize, true);
getSubRegForClass(SubRegRC, TRI, SubReg);
copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
} else if (DstSize > SrcSize) {
const TargetRegisterClass *PromotionRC =
getMinClassForRegBank(SrcRegBank, DstSize, true);
getSubRegForClass(SrcRC, TRI, SubReg);
Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
.addImm(0)
.addUse(SrcReg)
.addImm(SubReg);
MachineOperand &RegOp = I.getOperand(1);
RegOp.setReg(PromoteReg);
}
if (Register::isPhysicalRegister(DstReg))
return true;
}
if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
<< " operand\n");
return false;
}
if (I.getOpcode() == TargetOpcode::G_ZEXT) {
I.setDesc(TII.get(AArch64::COPY));
assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
return selectCopy(I, TII, MRI, TRI, RBI);
}
I.setDesc(TII.get(AArch64::COPY));
return true;
}
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
if (!DstTy.isScalar() || !SrcTy.isScalar())
return GenericOpc;
const unsigned DstSize = DstTy.getSizeInBits();
const unsigned SrcSize = SrcTy.getSizeInBits();
switch (DstSize) {
case 32:
switch (SrcSize) {
case 32:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUWSri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUWSri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUWSr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUWSr;
default:
return GenericOpc;
}
case 64:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUXSri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUXSri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUWDr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUWDr;
default:
return GenericOpc;
}
default:
return GenericOpc;
}
case 64:
switch (SrcSize) {
case 32:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUWDri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUWDri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUXSr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUXSr;
default:
return GenericOpc;
}
case 64:
switch (GenericOpc) {
case TargetOpcode::G_SITOFP:
return AArch64::SCVTFUXDri;
case TargetOpcode::G_UITOFP:
return AArch64::UCVTFUXDri;
case TargetOpcode::G_FPTOSI:
return AArch64::FCVTZSUXDr;
case TargetOpcode::G_FPTOUI:
return AArch64::FCVTZUUXDr;
default:
return GenericOpc;
}
default:
return GenericOpc;
}
default:
return GenericOpc;
};
return GenericOpc;
}
MachineInstr *
AArch64InstructionSelector::emitSelect(Register Dst, Register True,
Register False, AArch64CC::CondCode CC,
MachineIRBuilder &MIB) const {
MachineRegisterInfo &MRI = *MIB.getMRI();
assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?");
LLT Ty = MRI.getType(True);
if (Ty.isVector())
return nullptr;
const unsigned Size = Ty.getSizeInBits();
assert((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?");
const bool Is32Bit = Size == 32;
if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
return &*FCSel;
}
unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
bool Optimized = false;
auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
&Optimized](Register &Reg, Register &OtherReg,
bool Invert) {
if (Optimized)
return false;
Register MatchReg;
if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
Reg = MatchReg;
if (Invert) {
CC = AArch64CC::getInvertedCondCode(CC);
std::swap(Reg, OtherReg);
}
return true;
}
if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
Reg = MatchReg;
if (Invert) {
CC = AArch64CC::getInvertedCondCode(CC);
std::swap(Reg, OtherReg);
}
return true;
}
if (mi_match(Reg, MRI,
m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
Reg = MatchReg;
if (Invert) {
CC = AArch64CC::getInvertedCondCode(CC);
std::swap(Reg, OtherReg);
}
return true;
}
return false;
};
auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
&Optimized]() {
if (Optimized)
return false;
auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
if (!TrueCst && !FalseCst)
return false;
Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
if (TrueCst && FalseCst) {
int64_t T = TrueCst->Value.getSExtValue();
int64_t F = FalseCst->Value.getSExtValue();
if (T == 0 && F == 1) {
Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
True = ZReg;
False = ZReg;
return true;
}
if (T == 0 && F == -1) {
Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
True = ZReg;
False = ZReg;
return true;
}
}
if (TrueCst) {
int64_t T = TrueCst->Value.getSExtValue();
if (T == 1) {
Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
True = False;
False = ZReg;
CC = AArch64CC::getInvertedCondCode(CC);
return true;
}
if (T == -1) {
Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
True = False;
False = ZReg;
CC = AArch64CC::getInvertedCondCode(CC);
return true;
}
}
if (FalseCst) {
int64_t F = FalseCst->Value.getSExtValue();
if (F == 1) {
Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
False = ZReg;
return true;
}
if (F == -1) {
Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
False = ZReg;
return true;
}
}
return false;
};
Optimized |= TryFoldBinOpIntoSelect(False, True, false);
Optimized |= TryFoldBinOpIntoSelect(True, False, true);
Optimized |= TryOptSelectCst();
auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
return &*SelectInst;
}
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
switch (P) {
default:
llvm_unreachable("Unknown condition code!");
case CmpInst::ICMP_NE:
return AArch64CC::NE;
case CmpInst::ICMP_EQ:
return AArch64CC::EQ;
case CmpInst::ICMP_SGT:
return AArch64CC::GT;
case CmpInst::ICMP_SGE:
return AArch64CC::GE;
case CmpInst::ICMP_SLT:
return AArch64CC::LT;
case CmpInst::ICMP_SLE:
return AArch64CC::LE;
case CmpInst::ICMP_UGT:
return AArch64CC::HI;
case CmpInst::ICMP_UGE:
return AArch64CC::HS;
case CmpInst::ICMP_ULT:
return AArch64CC::LO;
case CmpInst::ICMP_ULE:
return AArch64CC::LS;
}
}
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2) {
CondCode2 = AArch64CC::AL;
switch (CC) {
default:
llvm_unreachable("Unknown FP condition!");
case CmpInst::FCMP_OEQ:
CondCode = AArch64CC::EQ;
break;
case CmpInst::FCMP_OGT:
CondCode = AArch64CC::GT;
break;
case CmpInst::FCMP_OGE:
CondCode = AArch64CC::GE;
break;
case CmpInst::FCMP_OLT:
CondCode = AArch64CC::MI;
break;
case CmpInst::FCMP_OLE:
CondCode = AArch64CC::LS;
break;
case CmpInst::FCMP_ONE:
CondCode = AArch64CC::MI;
CondCode2 = AArch64CC::GT;
break;
case CmpInst::FCMP_ORD:
CondCode = AArch64CC::VC;
break;
case CmpInst::FCMP_UNO:
CondCode = AArch64CC::VS;
break;
case CmpInst::FCMP_UEQ:
CondCode = AArch64CC::EQ;
CondCode2 = AArch64CC::VS;
break;
case CmpInst::FCMP_UGT:
CondCode = AArch64CC::HI;
break;
case CmpInst::FCMP_UGE:
CondCode = AArch64CC::PL;
break;
case CmpInst::FCMP_ULT:
CondCode = AArch64CC::LT;
break;
case CmpInst::FCMP_ULE:
CondCode = AArch64CC::LE;
break;
case CmpInst::FCMP_UNE:
CondCode = AArch64CC::NE;
break;
}
}
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2) {
CondCode2 = AArch64CC::AL;
switch (CC) {
default:
changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
assert(CondCode2 == AArch64CC::AL);
break;
case CmpInst::FCMP_ONE:
CondCode = AArch64CC::VC;
CondCode2 = AArch64CC::NE;
break;
case CmpInst::FCMP_UEQ:
CondCode = AArch64CC::PL;
CondCode2 = AArch64CC::LE;
break;
}
}
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
MachineRegisterInfo &MRI) {
assert(Reg.isValid() && "Expected valid register!");
bool HasZext = false;
while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
unsigned Opc = MI->getOpcode();
if (!MI->getOperand(0).isReg() ||
!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
break;
if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
Opc == TargetOpcode::G_TRUNC) {
if (Opc == TargetOpcode::G_ZEXT)
HasZext = true;
Register NextReg = MI->getOperand(1).getReg();
if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
break;
Reg = NextReg;
continue;
}
Optional<uint64_t> C;
Register TestReg;
switch (Opc) {
default:
break;
case TargetOpcode::G_AND:
case TargetOpcode::G_XOR: {
TestReg = MI->getOperand(1).getReg();
Register ConstantReg = MI->getOperand(2).getReg();
auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!VRegAndVal) {
std::swap(ConstantReg, TestReg);
VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
}
if (VRegAndVal) {
if (HasZext)
C = VRegAndVal->Value.getZExtValue();
else
C = VRegAndVal->Value.getSExtValue();
}
break;
}
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR:
case TargetOpcode::G_SHL: {
TestReg = MI->getOperand(1).getReg();
auto VRegAndVal =
getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
if (VRegAndVal)
C = VRegAndVal->Value.getSExtValue();
break;
}
}
if (!C || !TestReg.isValid())
break;
Register NextReg;
unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
switch (Opc) {
default:
break;
case TargetOpcode::G_AND:
if ((*C >> Bit) & 1)
NextReg = TestReg;
break;
case TargetOpcode::G_SHL:
if (*C <= Bit && (Bit - *C) < TestRegSize) {
NextReg = TestReg;
Bit = Bit - *C;
}
break;
case TargetOpcode::G_ASHR:
NextReg = TestReg;
Bit = Bit + *C;
if (Bit >= TestRegSize)
Bit = TestRegSize - 1;
break;
case TargetOpcode::G_LSHR:
if ((Bit + *C) < TestRegSize) {
NextReg = TestReg;
Bit = Bit + *C;
}
break;
case TargetOpcode::G_XOR:
if ((*C >> Bit) & 1)
Invert = !Invert;
NextReg = TestReg;
break;
}
if (!NextReg.isValid())
return Reg;
Reg = NextReg;
}
return Reg;
}
MachineInstr *AArch64InstructionSelector::emitTestBit(
Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const {
assert(TestReg.isValid());
assert(ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!");
MachineRegisterInfo &MRI = *MIB.getMRI();
TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
LLT Ty = MRI.getType(TestReg);
unsigned Size = Ty.getSizeInBits();
assert(!Ty.isVector() && "Expected a scalar!");
assert(Bit < 64 && "Bit is too large!");
bool UseWReg = Bit < 32;
unsigned NecessarySize = UseWReg ? 32 : 64;
if (Size != NecessarySize)
TestReg = moveScalarRegClass(
TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
MIB);
static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
{AArch64::TBZW, AArch64::TBNZW}};
unsigned Opc = OpcTable[UseWReg][IsNegative];
auto TestBitMI =
MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
return &*TestBitMI;
}
bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const {
assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
auto MaybeBit = getIConstantVRegValWithLookThrough(
AndInst.getOperand(2).getReg(), *MIB.getMRI());
if (!MaybeBit)
return false;
int32_t Bit = MaybeBit->Value.exactLogBase2();
if (Bit < 0)
return false;
Register TestReg = AndInst.getOperand(1).getReg();
emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
return true;
}
MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
bool IsNegative,
MachineBasicBlock *DestMBB,
MachineIRBuilder &MIB) const {
assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
MachineRegisterInfo &MRI = *MIB.getMRI();
assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
AArch64::GPRRegBankID &&
"Expected GPRs only?");
auto Ty = MRI.getType(CompareReg);
unsigned Width = Ty.getSizeInBits();
assert(!Ty.isVector() && "Expected scalar only?");
assert(Width <= 64 && "Expected width to be at most 64?");
static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
{AArch64::CBNZW, AArch64::CBNZX}};
unsigned Opc = OpcTable[IsNegative][Width == 64];
auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
return &*BranchMI;
}
bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
assert(I.getOpcode() == TargetOpcode::G_BRCOND);
auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
Pred);
AArch64CC::CondCode CC1, CC2;
changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
if (CC2 != AArch64CC::AL)
MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
assert(I.getOpcode() == TargetOpcode::G_BRCOND);
if (!ProduceNonFlagSettingCondBr)
return false;
MachineRegisterInfo &MRI = *MIB.getMRI();
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
auto Pred =
static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
Register LHS = ICmp.getOperand(2).getReg();
Register RHS = ICmp.getOperand(3).getReg();
auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
if (VRegAndVal && !AndInst) {
int64_t C = VRegAndVal->Value.getSExtValue();
if (C == -1 && Pred == CmpInst::ICMP_SGT) {
uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
emitTestBit(LHS, Bit, false, DestMBB, MIB);
I.eraseFromParent();
return true;
}
if (C == 0 && Pred == CmpInst::ICMP_SLT) {
uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
emitTestBit(LHS, Bit, true, DestMBB, MIB);
I.eraseFromParent();
return true;
}
}
if (ICmpInst::isEquality(Pred)) {
if (!VRegAndVal) {
std::swap(RHS, LHS);
VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
}
if (VRegAndVal && VRegAndVal->Value == 0) {
if (AndInst &&
tryOptAndIntoCompareBranch(
*AndInst, Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
I.eraseFromParent();
return true;
}
auto LHSTy = MRI.getType(LHS);
if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
emitCBZ(LHS, Pred == CmpInst::ICMP_NE, DestMBB, MIB);
I.eraseFromParent();
return true;
}
}
}
return false;
}
bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
assert(I.getOpcode() == TargetOpcode::G_BRCOND);
if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
return true;
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
auto PredOp = ICmp.getOperand(1);
emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectCompareBranch(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
Register CondReg = I.getOperand(0).getReg();
MachineInstr *CCMI = MRI.getVRegDef(CondReg);
unsigned CCMIOpc = CCMI->getOpcode();
if (CCMIOpc == TargetOpcode::G_FCMP)
return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
if (CCMIOpc == TargetOpcode::G_ICMP)
return selectCompareBranchFedByICmp(I, *CCMI, MIB);
if (ProduceNonFlagSettingCondBr) {
emitTestBit(CondReg, 0, true,
I.getOperand(1).getMBB(), MIB);
I.eraseFromParent();
return true;
}
auto TstMI =
MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
auto Bcc = MIB.buildInstr(AArch64::Bcc)
.addImm(AArch64CC::EQ)
.addMBB(I.getOperand(1).getMBB());
I.eraseFromParent();
return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
}
static Optional<int64_t> getVectorShiftImm(Register Reg,
MachineRegisterInfo &MRI) {
assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
MachineInstr *OpMI = MRI.getVRegDef(Reg);
return getAArch64VectorSplatScalar(*OpMI, MRI);
}
static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
if (!ShiftImm)
return None;
int64_t Imm = *ShiftImm;
if (Imm < 0)
return None;
switch (SrcTy.getElementType().getSizeInBits()) {
default:
LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
return None;
case 8:
if (Imm > 7)
return None;
break;
case 16:
if (Imm > 15)
return None;
break;
case 32:
if (Imm > 31)
return None;
break;
case 64:
if (Imm > 63)
return None;
break;
}
return Imm;
}
bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_SHL);
Register DstReg = I.getOperand(0).getReg();
const LLT Ty = MRI.getType(DstReg);
Register Src1Reg = I.getOperand(1).getReg();
Register Src2Reg = I.getOperand(2).getReg();
if (!Ty.isVector())
return false;
Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
unsigned Opc = 0;
if (Ty == LLT::fixed_vector(2, 64)) {
Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
} else if (Ty == LLT::fixed_vector(4, 32)) {
Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
} else if (Ty == LLT::fixed_vector(2, 32)) {
Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
} else if (Ty == LLT::fixed_vector(4, 16)) {
Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
} else if (Ty == LLT::fixed_vector(8, 16)) {
Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
} else if (Ty == LLT::fixed_vector(16, 8)) {
Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
} else if (Ty == LLT::fixed_vector(8, 8)) {
Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
return false;
}
auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
if (ImmVal)
Shl.addImm(*ImmVal);
else
Shl.addUse(Src2Reg);
constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectVectorAshrLshr(
MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_ASHR ||
I.getOpcode() == TargetOpcode::G_LSHR);
Register DstReg = I.getOperand(0).getReg();
const LLT Ty = MRI.getType(DstReg);
Register Src1Reg = I.getOperand(1).getReg();
Register Src2Reg = I.getOperand(2).getReg();
if (!Ty.isVector())
return false;
bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
unsigned Opc = 0;
unsigned NegOpc = 0;
const TargetRegisterClass *RC =
getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
if (Ty == LLT::fixed_vector(2, 64)) {
Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
NegOpc = AArch64::NEGv2i64;
} else if (Ty == LLT::fixed_vector(4, 32)) {
Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
NegOpc = AArch64::NEGv4i32;
} else if (Ty == LLT::fixed_vector(2, 32)) {
Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
NegOpc = AArch64::NEGv2i32;
} else if (Ty == LLT::fixed_vector(4, 16)) {
Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
NegOpc = AArch64::NEGv4i16;
} else if (Ty == LLT::fixed_vector(8, 16)) {
Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
NegOpc = AArch64::NEGv8i16;
} else if (Ty == LLT::fixed_vector(16, 8)) {
Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
NegOpc = AArch64::NEGv16i8;
} else if (Ty == LLT::fixed_vector(8, 8)) {
Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
NegOpc = AArch64::NEGv8i8;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
return false;
}
auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectVaStartAAPCS(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
return false;
}
bool AArch64InstructionSelector::selectVaStartDarwin(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
Register ListReg = I.getOperand(0).getReg();
Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
auto MIB =
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
.addDef(ArgsAddrReg)
.addFrameIndex(FuncInfo->getVarArgsStackIndex())
.addImm(0)
.addImm(0);
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
.addUse(ArgsAddrReg)
.addUse(ListReg)
.addImm(0)
.addMemOperand(*I.memoperands_begin());
constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
void AArch64InstructionSelector::materializeLargeCMVal(
MachineInstr &I, const Value *V, unsigned OpFlags) {
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
MovZ->addOperand(MF, I.getOperand(1));
MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
AArch64II::MO_NC);
MovZ->addOperand(MF, MachineOperand::CreateImm(0));
constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
Register ForceDstReg) {
Register DstReg = ForceDstReg
? ForceDstReg
: MRI.createVirtualRegister(&AArch64::GPR64RegClass);
auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
if (auto *GV = dyn_cast<GlobalValue>(V)) {
MovI->addOperand(MF, MachineOperand::CreateGA(
GV, MovZ->getOperand(1).getOffset(), Flags));
} else {
MovI->addOperand(
MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
MovZ->getOperand(1).getOffset(), Flags));
}
MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
return DstReg;
};
Register DstReg = BuildMovK(MovZ.getReg(0),
AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
}
bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
case TargetOpcode::G_STORE: {
bool Changed = contractCrossBankCopyIntoStore(I, MRI);
MachineOperand &SrcOp = I.getOperand(0);
if (MRI.getType(SrcOp.getReg()).isPointer()) {
auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
Register NewSrc = Copy.getReg(0);
SrcOp.setReg(NewSrc);
RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
Changed = true;
}
return Changed;
}
case TargetOpcode::G_PTR_ADD:
return convertPtrAddToAdd(I, MRI);
case TargetOpcode::G_LOAD: {
Register DstReg = I.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DstReg);
if (!DstTy.isPointer())
return false;
MRI.setType(DstReg, LLT::scalar(64));
return true;
}
case AArch64::G_DUP: {
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
if (!DstTy.getElementType().isPointer())
return false;
auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
MRI.setType(I.getOperand(0).getReg(),
DstTy.changeElementType(LLT::scalar(64)));
MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
I.getOperand(1).setReg(NewSrc.getReg(0));
return true;
}
case TargetOpcode::G_UITOFP:
case TargetOpcode::G_SITOFP: {
Register SrcReg = I.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
return false;
if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
if (I.getOpcode() == TargetOpcode::G_SITOFP)
I.setDesc(TII.get(AArch64::G_SITOF));
else
I.setDesc(TII.get(AArch64::G_UITOF));
return true;
}
return false;
}
default:
return false;
}
}
bool AArch64InstructionSelector::convertPtrAddToAdd(
MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
Register DstReg = I.getOperand(0).getReg();
Register AddOp1Reg = I.getOperand(1).getReg();
const LLT PtrTy = MRI.getType(DstReg);
if (PtrTy.getAddressSpace() != 0)
return false;
const LLT CastPtrTy =
PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
if (PtrTy.isVector())
MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
else
MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
I.setDesc(TII.get(TargetOpcode::G_ADD));
MRI.setType(DstReg, CastPtrTy);
I.getOperand(1).setReg(PtrToInt.getReg(0));
if (!select(*PtrToInt)) {
LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
return false;
}
Register NegatedReg;
if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
return true;
I.getOperand(2).setReg(NegatedReg);
I.setDesc(TII.get(TargetOpcode::G_SUB));
return true;
}
bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
const auto &MO = I.getOperand(2);
auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
if (!VRegAndVal)
return false;
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
if (DstTy.isVector())
return false;
bool Is64Bit = DstTy.getSizeInBits() == 64;
auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
if (!Imm1Fn || !Imm2Fn)
return false;
auto NewI =
MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
{I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
for (auto &RenderFn : *Imm1Fn)
RenderFn(NewI);
for (auto &RenderFn : *Imm2Fn)
RenderFn(NewI);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
}
bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
if (!DefDstReg.isValid())
return false;
LLT DefDstTy = MRI.getType(DefDstReg);
Register StoreSrcReg = I.getOperand(0).getReg();
LLT StoreSrcTy = MRI.getType(StoreSrcReg);
if (!DefDstTy.isValid())
return false;
if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
return false;
if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
RBI.getRegBank(DefDstReg, MRI, TRI))
return false;
I.getOperand(0).setReg(DefDstReg);
return true;
}
bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
case AArch64::G_DUP: {
Register Src = I.getOperand(1).getReg();
auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
if (!ValAndVReg)
return false;
LLVMContext &Ctx = MF.getFunction().getContext();
Register Dst = I.getOperand(0).getReg();
auto *CV = ConstantDataVector::getSplat(
MRI.getType(Dst).getNumElements(),
ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
ValAndVReg->Value));
if (!emitConstantVector(Dst, CV, MIB, MRI))
return false;
I.eraseFromParent();
return true;
}
case TargetOpcode::G_SEXT:
if (selectUSMovFromExtend(I, MRI))
return true;
return false;
case TargetOpcode::G_BR:
return false;
case TargetOpcode::G_SHL:
return earlySelectSHL(I, MRI);
case TargetOpcode::G_CONSTANT: {
bool IsZero = false;
if (I.getOperand(1).isCImm())
IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
else if (I.getOperand(1).isImm())
IsZero = I.getOperand(1).getImm() == 0;
if (!IsZero)
return false;
Register DefReg = I.getOperand(0).getReg();
LLT Ty = MRI.getType(DefReg);
if (Ty.getSizeInBits() == 64) {
I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
} else if (Ty.getSizeInBits() == 32) {
I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
} else
return false;
I.setDesc(TII.get(TargetOpcode::COPY));
return true;
}
case TargetOpcode::G_ADD: {
Register AddDst = I.getOperand(0).getReg();
Register AddLHS = I.getOperand(1).getReg();
Register AddRHS = I.getOperand(2).getReg();
LLT Ty = MRI.getType(AddLHS);
if (Ty.isVector())
return false;
unsigned Size = Ty.getSizeInBits();
if (Size != 32 && Size != 64)
return false;
auto MatchCmp = [&](Register Reg) -> MachineInstr * {
if (!MRI.hasOneNonDBGUse(Reg))
return nullptr;
if (Size == 32)
return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
Register ZExt;
if (!mi_match(Reg, MRI,
m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
return nullptr;
auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
if (!Cmp ||
MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
return nullptr;
return Cmp;
};
MachineInstr *Cmp = MatchCmp(AddRHS);
if (!Cmp) {
std::swap(AddLHS, AddRHS);
Cmp = MatchCmp(AddRHS);
if (!Cmp)
return false;
}
auto &PredOp = Cmp->getOperand(1);
auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
const AArch64CC::CondCode InvCC =
changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
MIB.setInstrAndDebugLoc(I);
emitIntegerCompare(Cmp->getOperand(2),
Cmp->getOperand(3), PredOp, MIB);
emitCSINC(AddDst, AddLHS, AddLHS, InvCC, MIB);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_OR: {
Register Dst = I.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
if (!Ty.isScalar())
return false;
unsigned Size = Ty.getSizeInBits();
if (Size != 32 && Size != 64)
return false;
Register ShiftSrc;
int64_t ShiftImm;
Register MaskSrc;
int64_t MaskImm;
if (!mi_match(
Dst, MRI,
m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
return false;
if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
return false;
int64_t Immr = Size - ShiftImm;
int64_t Imms = Size - ShiftImm - 1;
unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_FENCE: {
if (I.getOperand(1).getImm() == 0)
BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CompilerBarrier))
.addImm(I.getOperand(0).getImm());
else
BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::DMB))
.addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
I.eraseFromParent();
return true;
}
default:
return false;
}
}
bool AArch64InstructionSelector::select(MachineInstr &I) {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
if (Subtarget->requiresStrictAlign()) {
LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
return false;
}
MIB.setInstrAndDebugLoc(I);
unsigned Opcode = I.getOpcode();
if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI.getType(DefReg);
const RegClassOrRegBank &RegClassOrBank =
MRI.getRegClassOrRegBank(DefReg);
const TargetRegisterClass *DefRC
= RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
if (!DefRC) {
if (!DefTy.isValid()) {
LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
return false;
}
const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
DefRC = getRegClassForTypeOnBank(DefTy, RB);
if (!DefRC) {
LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
return false;
}
}
I.setDesc(TII.get(TargetOpcode::PHI));
return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
}
if (I.isCopy())
return selectCopy(I, TII, MRI, TRI, RBI);
return true;
}
if (I.getNumOperands() != I.getNumExplicitOperands()) {
LLVM_DEBUG(
dbgs() << "Generic instruction has unexpected implicit operands\n");
return false;
}
if (preISelLower(I)) {
Opcode = I.getOpcode(); }
if (earlySelect(I))
return true;
if (selectImpl(I, *CoverageInfo))
return true;
LLT Ty =
I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
switch (Opcode) {
case TargetOpcode::G_SBFX:
case TargetOpcode::G_UBFX: {
static const unsigned OpcTable[2][2] = {
{AArch64::UBFMWri, AArch64::UBFMXri},
{AArch64::SBFMWri, AArch64::SBFMXri}};
bool IsSigned = Opcode == TargetOpcode::G_SBFX;
unsigned Size = Ty.getSizeInBits();
unsigned Opc = OpcTable[IsSigned][Size == 64];
auto Cst1 =
getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
assert(Cst1 && "Should have gotten a constant for src 1?");
auto Cst2 =
getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
assert(Cst2 && "Should have gotten a constant for src 2?");
auto LSB = Cst1->Value.getZExtValue();
auto Width = Cst2->Value.getZExtValue();
auto BitfieldInst =
MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
.addImm(LSB)
.addImm(LSB + Width - 1);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
}
case TargetOpcode::G_BRCOND:
return selectCompareBranch(I, MF, MRI);
case TargetOpcode::G_BRINDIRECT: {
I.setDesc(TII.get(AArch64::BR));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_BRJT:
return selectBrJT(I, MRI);
case AArch64::G_ADD_LOW: {
MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
if (BaseMI->getOpcode() != AArch64::ADRP) {
I.setDesc(TII.get(AArch64::ADDXri));
I.addOperand(MachineOperand::CreateImm(0));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
assert(TM.getCodeModel() == CodeModel::Small &&
"Expected small code model");
auto Op1 = BaseMI->getOperand(1);
auto Op2 = I.getOperand(2);
auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
.addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
Op1.getTargetFlags())
.addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
Op2.getTargetFlags());
I.eraseFromParent();
return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
}
case TargetOpcode::G_BSWAP: {
Register DstReg = I.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
return false;
}
unsigned NumElts = DstTy.getNumElements();
if (NumElts != 4 && NumElts != 2) {
LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
return false;
}
unsigned Opc = 0;
unsigned EltSize = DstTy.getElementType().getSizeInBits();
if (EltSize == 32)
Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
: AArch64::REV32v16i8;
else if (EltSize == 64)
Opc = AArch64::REV64v16i8;
assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
I.setDesc(TII.get(Opc));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_CONSTANT: {
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
const LLT s8 = LLT::scalar(8);
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
const LLT s128 = LLT::scalar(128);
const LLT p0 = LLT::pointer(0, 64);
const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI.getType(DefReg);
const unsigned DefSize = DefTy.getSizeInBits();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
if (isFP) {
if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
<< " constant, expected: " << s16 << " or " << s32
<< " or " << s64 << " or " << s128 << '\n');
return false;
}
if (RB.getID() != AArch64::FPRRegBankID) {
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
<< " constant on bank: " << RB
<< ", expected: FPR\n");
return false;
}
if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
return false;
} else {
if (Ty != p0 && Ty != s8 && Ty != s16) {
LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
<< " constant, expected: " << s32 << ", " << s64
<< ", or " << p0 << '\n');
return false;
}
if (RB.getID() != AArch64::GPRRegBankID) {
LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
<< " constant on bank: " << RB
<< ", expected: GPR\n");
return false;
}
}
if (isFP) {
const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
switch (DefSize) {
default:
llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
case 32:
if (!shouldOptForSize(&MF))
break;
LLVM_FALLTHROUGH;
case 16:
case 64:
case 128: {
auto *FPImm = I.getOperand(1).getFPImm();
auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
if (!LoadMI) {
LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
return false;
}
MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
I.eraseFromParent();
return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
}
}
assert(DefSize == 32 &&
"Expected constant pool loads for all sizes other than 32!");
const Register DefGPRReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MachineOperand &RegOp = I.getOperand(0);
RegOp.setReg(DefGPRReg);
MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
MIB.buildCopy({DefReg}, {DefGPRReg});
if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
return false;
}
MachineOperand &ImmOp = I.getOperand(1);
ImmOp.ChangeToImmediate(
ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
} else if (I.getOperand(1).isCImm()) {
uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
I.getOperand(1).ChangeToImmediate(Val);
} else if (I.getOperand(1).isImm()) {
uint64_t Val = I.getOperand(1).getImm();
I.getOperand(1).ChangeToImmediate(Val);
}
const unsigned MovOpc =
DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
I.setDesc(TII.get(MovOpc));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
}
case TargetOpcode::G_EXTRACT: {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
LLT DstTy = MRI.getType(DstReg);
(void)DstTy;
unsigned SrcSize = SrcTy.getSizeInBits();
if (SrcTy.getSizeInBits() > 64) {
if (SrcTy.getSizeInBits() != 128)
return false;
if (DstTy.getSizeInBits() != 64)
return false;
unsigned Offset = I.getOperand(2).getImm();
if (Offset % 64 != 0)
return false;
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
if (SrcRB.getID() == AArch64::GPRRegBankID) {
MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
.addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
I.eraseFromParent();
return true;
}
unsigned LaneIdx = Offset / 64;
MachineInstr *Extract = emitExtractVectorElt(
DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
if (!Extract)
return false;
I.eraseFromParent();
return true;
}
I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
Ty.getSizeInBits() - 1);
if (SrcSize < 64) {
assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
"unexpected G_EXTRACT types");
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
.addReg(DstReg, 0, AArch64::sub_32);
RBI.constrainGenericRegister(I.getOperand(0).getReg(),
AArch64::GPR32RegClass, MRI);
I.getOperand(0).setReg(DstReg);
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_INSERT: {
LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
unsigned DstSize = DstTy.getSizeInBits();
if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
return false;
I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
unsigned LSB = I.getOperand(3).getImm();
unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
I.getOperand(3).setImm((DstSize - LSB) % DstSize);
MachineInstrBuilder(MF, I).addImm(Width - 1);
if (DstSize < 64) {
assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
"unexpected G_INSERT types");
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
TII.get(AArch64::SUBREG_TO_REG))
.addDef(SrcReg)
.addImm(0)
.addUse(I.getOperand(2).getReg())
.addImm(AArch64::sub_32);
RBI.constrainGenericRegister(I.getOperand(2).getReg(),
AArch64::GPR32RegClass, MRI);
I.getOperand(2).setReg(SrcReg);
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_FRAME_INDEX: {
if (Ty != LLT::pointer(0, 64)) {
LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
<< ", expected: " << LLT::pointer(0, 64) << '\n');
return false;
}
I.setDesc(TII.get(AArch64::ADDXri));
I.addOperand(MachineOperand::CreateImm(0));
I.addOperand(MachineOperand::CreateImm(0));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_GLOBAL_VALUE: {
auto GV = I.getOperand(1).getGlobal();
if (GV->isThreadLocal())
return selectTLSGlobalValue(I, MRI);
unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
if (OpFlags & AArch64II::MO_GOT) {
I.setDesc(TII.get(AArch64::LOADgot));
I.getOperand(1).setTargetFlags(OpFlags);
} else if (TM.getCodeModel() == CodeModel::Large) {
materializeLargeCMVal(I, GV, OpFlags);
I.eraseFromParent();
return true;
} else if (TM.getCodeModel() == CodeModel::Tiny) {
I.setDesc(TII.get(AArch64::ADR));
I.getOperand(1).setTargetFlags(OpFlags);
} else {
I.setDesc(TII.get(AArch64::MOVaddr));
I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
MachineInstrBuilder MIB(MF, I);
MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
}
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_ZEXTLOAD:
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE: {
GLoadStore &LdSt = cast<GLoadStore>(I);
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
LLT PtrTy = MRI.getType(LdSt.getPointerReg());
if (PtrTy != LLT::pointer(0, 64)) {
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
<< ", expected: " << LLT::pointer(0, 64) << '\n');
return false;
}
uint64_t MemSizeInBytes = LdSt.getMemSize();
unsigned MemSizeInBits = LdSt.getMemSizeInBits();
AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
if (Order != AtomicOrdering::NotAtomic &&
Order != AtomicOrdering::Unordered &&
Order != AtomicOrdering::Monotonic) {
assert(!isa<GZExtLoad>(LdSt));
if (MemSizeInBytes > 64)
return false;
if (isa<GLoad>(LdSt)) {
static constexpr unsigned LDAPROpcodes[] = {
AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
static constexpr unsigned LDAROpcodes[] = {
AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
ArrayRef<unsigned> Opcodes =
STI.hasLDAPR() && Order != AtomicOrdering::SequentiallyConsistent
? LDAPROpcodes
: LDAROpcodes;
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
} else {
static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
AArch64::STLRW, AArch64::STLRX};
Register ValReg = LdSt.getReg(0);
if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
.addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
I.getOperand(0).setReg(NewVal);
}
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
}
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
}
#ifndef NDEBUG
const Register PtrReg = LdSt.getPointerReg();
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
"Load/Store pointer operand isn't a GPR");
assert(MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer");
#endif
const Register ValReg = LdSt.getReg(0);
const LLT ValTy = MRI.getType(ValReg);
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
unsigned SubReg;
LLT MemTy = LdSt.getMMO().getMemoryType();
auto *RC = getRegClassForTypeOnBank(MemTy, RB);
if (!getSubRegForClass(RC, TRI, SubReg))
return false;
auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
.addReg(ValReg, 0, SubReg)
.getReg(0);
RBI.constrainGenericRegister(Copy, *RC, MRI);
LdSt.getOperand(0).setReg(Copy);
} else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
if (RB.getID() == AArch64::FPRRegBankID) {
unsigned SubReg;
LLT MemTy = LdSt.getMMO().getMemoryType();
auto *RC = getRegClassForTypeOnBank(MemTy, RB);
if (!getSubRegForClass(RC, TRI, SubReg))
return false;
Register OldDst = LdSt.getReg(0);
Register NewDst =
MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
LdSt.getOperand(0).setReg(NewDst);
MRI.setRegBank(NewDst, RB);
MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
.addImm(0)
.addUse(NewDst)
.addImm(SubReg);
auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
MIB.setInstr(LdSt);
}
}
auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
bool IsStore = isa<GStore>(I);
const unsigned NewOpc =
selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
if (NewOpc == I.getOpcode())
return nullptr;
auto AddrModeFns =
selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
if (!AddrModeFns) {
I.setDesc(TII.get(NewOpc));
I.addOperand(MachineOperand::CreateImm(0));
return &I;
}
auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
Register CurValReg = I.getOperand(0).getReg();
IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
NewInst.cloneMemRefs(I);
for (auto &Fn : *AddrModeFns)
Fn(NewInst);
I.eraseFromParent();
return &*NewInst;
};
MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
if (!LoadStore)
return false;
if (Opcode == TargetOpcode::G_STORE) {
auto CVal = getIConstantVRegValWithLookThrough(
LoadStore->getOperand(0).getReg(), MRI);
if (CVal && CVal->Value == 0) {
switch (LoadStore->getOpcode()) {
case AArch64::STRWui:
case AArch64::STRHHui:
case AArch64::STRBBui:
LoadStore->getOperand(0).setReg(AArch64::WZR);
break;
case AArch64::STRXui:
LoadStore->getOperand(0).setReg(AArch64::XZR);
break;
}
}
}
if (IsZExtLoad) {
if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
return false;
Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
Register DstReg = LoadStore->getOperand(0).getReg();
LoadStore->getOperand(0).setReg(LdReg);
MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
.addImm(0)
.addUse(LdReg)
.addImm(AArch64::sub_32);
constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
MRI);
}
return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
}
case TargetOpcode::G_SMULH:
case TargetOpcode::G_UMULH: {
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;
const Register DefReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
if (RB.getID() != AArch64::GPRRegBankID) {
LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
return false;
}
if (Ty != LLT::scalar(64)) {
LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
<< ", expected: " << LLT::scalar(64) << '\n');
return false;
}
unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
: AArch64::UMULHrr;
I.setDesc(TII.get(NewOpc));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR:
if (MRI.getType(I.getOperand(0).getReg()).isVector())
return selectVectorAshrLshr(I, MRI);
LLVM_FALLTHROUGH;
case TargetOpcode::G_SHL:
if (Opcode == TargetOpcode::G_SHL &&
MRI.getType(I.getOperand(0).getReg()).isVector())
return selectVectorSHL(I, MRI);
{
Register SrcReg = I.getOperand(1).getReg();
Register ShiftReg = I.getOperand(2).getReg();
const LLT ShiftTy = MRI.getType(ShiftReg);
const LLT SrcTy = MRI.getType(SrcReg);
if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
ShiftTy.getSizeInBits() == 64) {
assert(!ShiftTy.isVector() && "unexpected vector shift ty");
auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
.addReg(ShiftReg, 0, AArch64::sub_32);
MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
I.getOperand(2).setReg(Trunc.getReg(0));
}
}
LLVM_FALLTHROUGH;
case TargetOpcode::G_OR: {
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;
const unsigned OpSize = Ty.getSizeInBits();
const Register DefReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
if (NewOpc == I.getOpcode())
return false;
I.setDesc(TII.get(NewOpc));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_PTR_ADD: {
emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_SADDO:
case TargetOpcode::G_UADDO:
case TargetOpcode::G_SSUBO:
case TargetOpcode::G_USUBO: {
auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
I.getOperand(2), I.getOperand(3), MIB);
Register ZReg = AArch64::WZR;
emitCSINC(I.getOperand(1).getReg(), ZReg, ZReg,
getInvertedCondCode(OpAndCC.second), MIB);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_PTRMASK: {
Register MaskReg = I.getOperand(2).getReg();
Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
if (!MaskVal || !isShiftedMask_64(*MaskVal))
return false;
uint64_t Mask = *MaskVal;
I.setDesc(TII.get(AArch64::ANDXri));
I.getOperand(2).ChangeToImmediate(
AArch64_AM::encodeLogicalImmediate(Mask, 64));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_PTRTOINT:
case TargetOpcode::G_TRUNC: {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
const Register DstReg = I.getOperand(0).getReg();
const Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
if (DstRB.getID() != SrcRB.getID()) {
LLVM_DEBUG(
dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
return false;
}
if (DstRB.getID() == AArch64::GPRRegBankID) {
const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
if (!DstRC)
return false;
const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
if (!SrcRC)
return false;
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
return false;
}
if (DstRC == SrcRC) {
} else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
SrcTy == LLT::scalar(64)) {
llvm_unreachable("TableGen can import this case");
return false;
} else if (DstRC == &AArch64::GPR32RegClass &&
SrcRC == &AArch64::GPR64RegClass) {
I.getOperand(1).setSubReg(AArch64::sub_32);
} else {
LLVM_DEBUG(
dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
return false;
}
I.setDesc(TII.get(TargetOpcode::COPY));
return true;
} else if (DstRB.getID() == AArch64::FPRRegBankID) {
if (DstTy == LLT::fixed_vector(4, 16) &&
SrcTy == LLT::fixed_vector(4, 32)) {
I.setDesc(TII.get(AArch64::XTNv4i16));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
}
if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
MachineInstr *Extract = emitExtractVectorElt(
DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
if (!Extract)
return false;
I.eraseFromParent();
return true;
}
if (Opcode == TargetOpcode::G_PTRTOINT) {
assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
I.setDesc(TII.get(TargetOpcode::COPY));
return selectCopy(I, TII, MRI, TRI, RBI);
}
}
return false;
}
case TargetOpcode::G_ANYEXT: {
if (selectUSMovFromExtend(I, MRI))
return true;
const Register DstReg = I.getOperand(0).getReg();
const Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
if (RBDst.getID() != AArch64::GPRRegBankID) {
LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
<< ", expected: GPR\n");
return false;
}
const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
if (RBSrc.getID() != AArch64::GPRRegBankID) {
LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
<< ", expected: GPR\n");
return false;
}
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
if (DstSize == 0) {
LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
return false;
}
if (DstSize != 64 && DstSize > 32) {
LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
<< ", expected: 32 or 64\n");
return false;
}
if (DstSize > 32) {
Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
.addDef(ExtSrc)
.addImm(0)
.addUse(SrcReg)
.addImm(AArch64::sub_32);
I.getOperand(1).setReg(ExtSrc);
}
return selectCopy(I, TII, MRI, TRI, RBI);
}
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_SEXT_INREG:
case TargetOpcode::G_SEXT: {
if (selectUSMovFromExtend(I, MRI))
return true;
unsigned Opcode = I.getOpcode();
const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
const Register DefReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
const LLT DstTy = MRI.getType(DefReg);
const LLT SrcTy = MRI.getType(SrcReg);
unsigned DstSize = DstTy.getSizeInBits();
unsigned SrcSize = SrcTy.getSizeInBits();
if (Opcode == TargetOpcode::G_SEXT_INREG)
SrcSize = I.getOperand(2).getImm();
if (DstTy.isVector())
return false;
assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
AArch64::GPRRegBankID &&
"Unexpected ext regbank");
MachineInstr *ExtI;
if (!IsSigned) {
auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
bool IsGPR =
RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
if (LoadMI && IsGPR) {
const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
unsigned BytesLoaded = MemOp->getSize();
if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
return selectCopy(I, TII, MRI, TRI, RBI);
}
if (IsGPR && SrcSize == 32 && DstSize == 64) {
MachineInstr *Def = MRI.getVRegDef(SrcReg);
Register SubregToRegSrc = SrcReg;
if (!Def || !isDef32(*Def)) {
Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
const Register ZReg = AArch64::WZR;
MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
SubregToRegSrc = OrDst;
}
MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
.addImm(0)
.addUse(SubregToRegSrc)
.addImm(AArch64::sub_32);
if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
return false;
}
if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
return false;
}
I.eraseFromParent();
return true;
}
}
if (DstSize == 64) {
if (Opcode != TargetOpcode::G_SEXT_INREG) {
if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
<< " operand\n");
return false;
}
SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
{&AArch64::GPR64RegClass}, {})
.addImm(0)
.addUse(SrcReg)
.addImm(AArch64::sub_32)
.getReg(0);
}
ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
{DefReg}, {SrcReg})
.addImm(0)
.addImm(SrcSize - 1);
} else if (DstSize <= 32) {
ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
{DefReg}, {SrcReg})
.addImm(0)
.addImm(SrcSize - 1);
} else {
return false;
}
constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP:
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI: {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
SrcTy = MRI.getType(I.getOperand(1).getReg());
const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
if (NewOpc == Opcode)
return false;
I.setDesc(TII.get(NewOpc));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
I.setFlags(MachineInstr::NoFPExcept);
return true;
}
case TargetOpcode::G_FREEZE:
return selectCopy(I, TII, MRI, TRI, RBI);
case TargetOpcode::G_INTTOPTR:
return selectCopy(I, TII, MRI, TRI, RBI);
case TargetOpcode::G_BITCAST:
return selectCopy(I, TII, MRI, TRI, RBI);
case TargetOpcode::G_SELECT: {
auto &Sel = cast<GSelect>(I);
const Register CondReg = Sel.getCondReg();
const Register TReg = Sel.getTrueReg();
const Register FReg = Sel.getFalseReg();
if (tryOptSelect(Sel))
return true;
Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
return false;
Sel.eraseFromParent();
return true;
}
case TargetOpcode::G_ICMP: {
if (Ty.isVector())
return selectVectorICmp(I, MRI);
if (Ty != LLT::scalar(32)) {
LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
<< ", expected: " << LLT::scalar(32) << '\n');
return false;
}
auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
const AArch64CC::CondCode InvCC =
changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
emitCSINC(I.getOperand(0).getReg(), AArch64::WZR,
AArch64::WZR, InvCC, MIB);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_FCMP: {
CmpInst::Predicate Pred =
static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
Pred) ||
!emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
return false;
I.eraseFromParent();
return true;
}
case TargetOpcode::G_VASTART:
return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
: selectVaStartAAPCS(I, MF, MRI);
case TargetOpcode::G_INTRINSIC:
return selectIntrinsic(I, MRI);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
return selectIntrinsicWithSideEffects(I, MRI);
case TargetOpcode::G_IMPLICIT_DEF: {
I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const Register DstReg = I.getOperand(0).getReg();
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
return true;
}
case TargetOpcode::G_BLOCK_ADDR: {
if (TM.getCodeModel() == CodeModel::Large) {
materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
I.eraseFromParent();
return true;
} else {
I.setDesc(TII.get(AArch64::MOVaddrBA));
auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
I.getOperand(0).getReg())
.addBlockAddress(I.getOperand(1).getBlockAddress(),
0, AArch64II::MO_PAGE)
.addBlockAddress(
I.getOperand(1).getBlockAddress(), 0,
AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
}
}
case AArch64::G_DUP: {
if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
AArch64::GPRRegBankID)
return false; LLT VecTy = MRI.getType(I.getOperand(0).getReg());
if (VecTy == LLT::fixed_vector(8, 8))
I.setDesc(TII.get(AArch64::DUPv8i8gpr));
else if (VecTy == LLT::fixed_vector(16, 8))
I.setDesc(TII.get(AArch64::DUPv16i8gpr));
else if (VecTy == LLT::fixed_vector(4, 16))
I.setDesc(TII.get(AArch64::DUPv4i16gpr));
else if (VecTy == LLT::fixed_vector(8, 16))
I.setDesc(TII.get(AArch64::DUPv8i16gpr));
else
return false;
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_INTRINSIC_TRUNC:
return selectIntrinsicTrunc(I, MRI);
case TargetOpcode::G_INTRINSIC_ROUND:
return selectIntrinsicRound(I, MRI);
case TargetOpcode::G_BUILD_VECTOR:
return selectBuildVector(I, MRI);
case TargetOpcode::G_MERGE_VALUES:
return selectMergeValues(I, MRI);
case TargetOpcode::G_UNMERGE_VALUES:
return selectUnmergeValues(I, MRI);
case TargetOpcode::G_SHUFFLE_VECTOR:
return selectShuffleVector(I, MRI);
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
return selectExtractElt(I, MRI);
case TargetOpcode::G_INSERT_VECTOR_ELT:
return selectInsertElt(I, MRI);
case TargetOpcode::G_CONCAT_VECTORS:
return selectConcatVectors(I, MRI);
case TargetOpcode::G_JUMP_TABLE:
return selectJumpTable(I, MRI);
case TargetOpcode::G_VECREDUCE_FADD:
case TargetOpcode::G_VECREDUCE_ADD:
return selectReduction(I, MRI);
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMCPY_INLINE:
case TargetOpcode::G_MEMMOVE:
case TargetOpcode::G_MEMSET:
assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
return selectMOPS(I, MRI);
}
return false;
}
bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
MachineRegisterInfo &MRI) {
Register VecReg = I.getOperand(1).getReg();
LLT VecTy = MRI.getType(VecReg);
if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
if (VecTy == LLT::fixed_vector(2, 32)) {
Register DstReg = I.getOperand(0).getReg();
auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
{VecReg, VecReg});
auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
.addReg(AddP.getReg(0), 0, AArch64::ssub)
.getReg(0);
RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
}
unsigned Opc = 0;
if (VecTy == LLT::fixed_vector(16, 8))
Opc = AArch64::ADDVv16i8v;
else if (VecTy == LLT::fixed_vector(8, 16))
Opc = AArch64::ADDVv8i16v;
else if (VecTy == LLT::fixed_vector(4, 32))
Opc = AArch64::ADDVv4i32v;
else if (VecTy == LLT::fixed_vector(2, 64))
Opc = AArch64::ADDPv2i64p;
else {
LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
return false;
}
I.setDesc(TII.get(Opc));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
unsigned Opc = 0;
if (VecTy == LLT::fixed_vector(2, 32))
Opc = AArch64::FADDPv2i32p;
else if (VecTy == LLT::fixed_vector(2, 64))
Opc = AArch64::FADDPv2i64p;
else {
LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
return false;
}
I.setDesc(TII.get(Opc));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
return false;
}
bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
MachineRegisterInfo &MRI) {
unsigned Mopcode;
switch (GI.getOpcode()) {
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMCPY_INLINE:
Mopcode = AArch64::MOPSMemoryCopyPseudo;
break;
case TargetOpcode::G_MEMMOVE:
Mopcode = AArch64::MOPSMemoryMovePseudo;
break;
case TargetOpcode::G_MEMSET:
Mopcode = AArch64::MOPSMemorySetPseudo;
break;
}
auto &DstPtr = GI.getOperand(0);
auto &SrcOrVal = GI.getOperand(1);
auto &Size = GI.getOperand(2);
const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
const auto &SrcValRegClass =
IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
MIB.buildCopy(DstPtrCopy, DstPtr);
MIB.buildCopy(SrcValCopy, SrcOrVal);
MIB.buildCopy(SizeCopy, Size);
Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
if (IsSet) {
MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
{DstPtrCopy, SizeCopy, SrcValCopy});
} else {
Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
{DstPtrCopy, SrcValCopy, SizeCopy});
}
GI.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
Register JTAddr = I.getOperand(0).getReg();
unsigned JTI = I.getOperand(1).getIndex();
Register Index = I.getOperand(2).getReg();
Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
{TargetReg, ScratchReg}, {JTAddr, Index})
.addJumpTableIndex(JTI);
MIB.buildInstr(AArch64::BR, {}, {TargetReg});
I.eraseFromParent();
return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
}
bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
Register DstReg = I.getOperand(0).getReg();
unsigned JTI = I.getOperand(1).getIndex();
auto MovMI =
MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
.addJumpTableIndex(JTI, AArch64II::MO_PAGE)
.addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
}
bool AArch64InstructionSelector::selectTLSGlobalValue(
MachineInstr &I, MachineRegisterInfo &MRI) {
if (!STI.isTargetMachO())
return false;
MachineFunction &MF = *I.getParent()->getParent();
MF.getFrameInfo().setAdjustsStack(true);
const auto &GlobalOp = I.getOperand(1);
assert(GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!");
const GlobalValue &GV = *GlobalOp.getGlobal();
auto LoadGOT =
MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
.addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
{LoadGOT.getReg(0)})
.addImm(0);
MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
.addUse(AArch64::X0, RegState::Implicit)
.addDef(AArch64::X0, RegState::Implicit)
.addRegMask(TRI.getTLSCallPreservedMask());
MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
MRI);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectIntrinsicTrunc(
MachineInstr &I, MachineRegisterInfo &MRI) const {
const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
unsigned Opc = 0;
if (!SrcTy.isVector()) {
switch (SrcTy.getSizeInBits()) {
default:
case 16:
Opc = AArch64::FRINTZHr;
break;
case 32:
Opc = AArch64::FRINTZSr;
break;
case 64:
Opc = AArch64::FRINTZDr;
break;
}
} else {
unsigned NumElts = SrcTy.getNumElements();
switch (SrcTy.getElementType().getSizeInBits()) {
default:
break;
case 16:
if (NumElts == 4)
Opc = AArch64::FRINTZv4f16;
else if (NumElts == 8)
Opc = AArch64::FRINTZv8f16;
break;
case 32:
if (NumElts == 2)
Opc = AArch64::FRINTZv2f32;
else if (NumElts == 4)
Opc = AArch64::FRINTZv4f32;
break;
case 64:
if (NumElts == 2)
Opc = AArch64::FRINTZv2f64;
break;
}
}
if (!Opc) {
LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
return false;
}
I.setDesc(TII.get(Opc));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
bool AArch64InstructionSelector::selectIntrinsicRound(
MachineInstr &I, MachineRegisterInfo &MRI) const {
const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
unsigned Opc = 0;
if (!SrcTy.isVector()) {
switch (SrcTy.getSizeInBits()) {
default:
case 16:
Opc = AArch64::FRINTAHr;
break;
case 32:
Opc = AArch64::FRINTASr;
break;
case 64:
Opc = AArch64::FRINTADr;
break;
}
} else {
unsigned NumElts = SrcTy.getNumElements();
switch (SrcTy.getElementType().getSizeInBits()) {
default:
break;
case 16:
if (NumElts == 4)
Opc = AArch64::FRINTAv4f16;
else if (NumElts == 8)
Opc = AArch64::FRINTAv8f16;
break;
case 32:
if (NumElts == 2)
Opc = AArch64::FRINTAv2f32;
else if (NumElts == 4)
Opc = AArch64::FRINTAv4f32;
break;
case 64:
if (NumElts == 2)
Opc = AArch64::FRINTAv2f64;
break;
}
}
if (!Opc) {
LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
return false;
}
I.setDesc(TII.get(Opc));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
bool AArch64InstructionSelector::selectVectorICmp(
MachineInstr &I, MachineRegisterInfo &MRI) {
Register DstReg = I.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
Register SrcReg = I.getOperand(2).getReg();
Register Src2Reg = I.getOperand(3).getReg();
LLT SrcTy = MRI.getType(SrcReg);
unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
unsigned NumElts = DstTy.getNumElements();
unsigned PredIdx = 0;
bool SwapOperands = false;
CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
switch (Pred) {
case CmpInst::ICMP_NE:
case CmpInst::ICMP_EQ:
PredIdx = 0;
break;
case CmpInst::ICMP_UGT:
PredIdx = 1;
break;
case CmpInst::ICMP_UGE:
PredIdx = 2;
break;
case CmpInst::ICMP_ULT:
PredIdx = 3;
SwapOperands = true;
break;
case CmpInst::ICMP_ULE:
PredIdx = 4;
SwapOperands = true;
break;
case CmpInst::ICMP_SGT:
PredIdx = 5;
break;
case CmpInst::ICMP_SGE:
PredIdx = 6;
break;
case CmpInst::ICMP_SLT:
PredIdx = 7;
SwapOperands = true;
break;
case CmpInst::ICMP_SLE:
PredIdx = 8;
SwapOperands = true;
break;
default:
llvm_unreachable("Unhandled icmp predicate");
return false;
}
static const unsigned OpcTable[4][4][9] = {
{
{0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 ,
0 },
{0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 ,
0 },
{AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
{AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
},
{
{0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 ,
0 },
{AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
{AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
{0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 ,
0 }
},
{
{AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
{AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
{0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 ,
0 },
{0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 ,
0 }
},
{
{AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
{0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 ,
0 },
{0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 ,
0 },
{0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 ,
0 }
},
};
unsigned EltIdx = Log2_32(SrcEltSize / 8);
unsigned NumEltsIdx = Log2_32(NumElts / 2);
unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
if (!Opc) {
LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
return false;
}
const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
const TargetRegisterClass *SrcRC =
getRegClassForTypeOnBank(SrcTy, VecRB, true);
if (!SrcRC) {
LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
return false;
}
unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
if (SrcTy.getSizeInBits() == 128)
NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
if (SwapOperands)
std::swap(SrcReg, Src2Reg);
auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
if (NotOpc) {
Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
} else {
MIB.buildCopy(DstReg, Cmp.getReg(0));
}
RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
I.eraseFromParent();
return true;
}
MachineInstr *AArch64InstructionSelector::emitScalarToVector(
unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
MachineIRBuilder &MIRBuilder) const {
auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
auto BuildFn = [&](unsigned SubregIndex) {
auto Ins =
MIRBuilder
.buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
.addImm(SubregIndex);
constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
return &*Ins;
};
switch (EltSize) {
case 16:
return BuildFn(AArch64::hsub);
case 32:
return BuildFn(AArch64::ssub);
case 64:
return BuildFn(AArch64::dsub);
default:
return nullptr;
}
}
bool AArch64InstructionSelector::selectMergeValues(
MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
if (I.getNumOperands() != 3)
return false;
if (DstTy == LLT::scalar(128)) {
if (SrcTy.getSizeInBits() != 64)
return false;
Register DstReg = I.getOperand(0).getReg();
Register Src1Reg = I.getOperand(1).getReg();
Register Src2Reg = I.getOperand(2).getReg();
auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
MachineInstr *InsMI =
emitLaneInsert(None, Tmp.getReg(0), Src1Reg, 0, RB, MIB);
if (!InsMI)
return false;
MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
Src2Reg, 1, RB, MIB);
if (!Ins2MI)
return false;
constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
if (RB.getID() != AArch64::GPRRegBankID)
return false;
if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
return false;
auto *DstRC = &AArch64::GPR64RegClass;
Register SubToRegDef = MRI.createVirtualRegister(DstRC);
MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(TargetOpcode::SUBREG_TO_REG))
.addDef(SubToRegDef)
.addImm(0)
.addUse(I.getOperand(1).getReg())
.addImm(AArch64::sub_32);
Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(TargetOpcode::SUBREG_TO_REG))
.addDef(SubToRegDef2)
.addImm(0)
.addUse(I.getOperand(2).getReg())
.addImm(AArch64::sub_32);
MachineInstr &BFM =
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
.addDef(I.getOperand(0).getReg())
.addUse(SubToRegDef)
.addUse(SubToRegDef2)
.addImm(32)
.addImm(31);
constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
const unsigned EltSize) {
switch (EltSize) {
case 8:
CopyOpc = AArch64::DUPi8;
ExtractSubReg = AArch64::bsub;
break;
case 16:
CopyOpc = AArch64::DUPi16;
ExtractSubReg = AArch64::hsub;
break;
case 32:
CopyOpc = AArch64::DUPi32;
ExtractSubReg = AArch64::ssub;
break;
case 64:
CopyOpc = AArch64::DUPi64;
ExtractSubReg = AArch64::dsub;
break;
default:
LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
return false;
}
return true;
}
MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
unsigned CopyOpc = 0;
unsigned ExtractSubReg = 0;
if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
LLVM_DEBUG(
dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
return nullptr;
}
const TargetRegisterClass *DstRC =
getRegClassForTypeOnBank(ScalarTy, DstRB, true);
if (!DstRC) {
LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
return nullptr;
}
const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
const LLT &VecTy = MRI.getType(VecReg);
const TargetRegisterClass *VecRC =
getRegClassForTypeOnBank(VecTy, VecRB, true);
if (!VecRC) {
LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
return nullptr;
}
Register InsertReg = VecReg;
if (!DstReg)
DstReg = MRI.createVirtualRegister(DstRC);
if (LaneIdx == 0) {
auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
.addReg(VecReg, 0, ExtractSubReg);
RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
return &*Copy;
}
if (VecTy.getSizeInBits() != 128) {
MachineInstr *ScalarToVector = emitScalarToVector(
VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
if (!ScalarToVector)
return nullptr;
InsertReg = ScalarToVector->getOperand(0).getReg();
}
MachineInstr *LaneCopyMI =
MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
return LaneCopyMI;
}
bool AArch64InstructionSelector::selectExtractElt(
MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
"unexpected opcode!");
Register DstReg = I.getOperand(0).getReg();
const LLT NarrowTy = MRI.getType(DstReg);
const Register SrcReg = I.getOperand(1).getReg();
const LLT WideTy = MRI.getType(SrcReg);
(void)WideTy;
assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!");
assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
MachineOperand &LaneIdxOp = I.getOperand(2);
assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
return false;
}
auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
if (!VRegAndVal)
return false;
unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
LaneIdx, MIB);
if (!Extract)
return false;
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectSplitVectorUnmerge(
MachineInstr &I, MachineRegisterInfo &MRI) {
unsigned NumElts = I.getNumOperands() - 1;
Register SrcReg = I.getOperand(NumElts).getReg();
const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
const LLT SrcTy = MRI.getType(SrcReg);
assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
if (SrcTy.getSizeInBits() > 128) {
LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
return false;
}
const RegisterBank &DstRB =
*RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
Register Dst = I.getOperand(OpIdx).getReg();
MachineInstr *Extract =
emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
if (!Extract)
return false;
}
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"unexpected opcode");
if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
AArch64::FPRRegBankID ||
RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
AArch64::FPRRegBankID) {
LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n");
return false;
}
unsigned NumElts = I.getNumOperands() - 1;
Register SrcReg = I.getOperand(NumElts).getReg();
const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
const LLT WideTy = MRI.getType(SrcReg);
(void)WideTy;
assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
"can only unmerge from vector or s128 types!");
assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
"source register size too small!");
if (!NarrowTy.isScalar())
return selectSplitVectorUnmerge(I, MRI);
unsigned CopyOpc = 0;
unsigned ExtractSubReg = 0;
if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
return false;
MachineBasicBlock &MBB = *I.getParent();
SmallVector<Register, 4> InsertRegs;
unsigned NumInsertRegs = NumElts - 1;
if (NarrowTy.getSizeInBits() * NumElts == 128) {
InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
} else {
const TargetRegisterClass *RC = getRegClassForTypeOnBank(
LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
*RBI.getRegBank(SrcReg, MRI, TRI));
unsigned SubReg = 0;
bool Found = getSubRegForClass(RC, TRI, SubReg);
(void)Found;
assert(Found && "expected to find last operand's subeg idx");
for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
MachineInstr &ImpDefMI =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
ImpDefReg);
Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
MachineInstr &InsMI =
*BuildMI(MBB, I, I.getDebugLoc(),
TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
.addUse(ImpDefReg)
.addUse(SrcReg)
.addImm(SubReg);
constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
InsertRegs.push_back(InsertReg);
}
}
Register CopyTo = I.getOperand(0).getReg();
auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
.addReg(InsertRegs[0], 0, ExtractSubReg);
constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
unsigned LaneIdx = 1;
for (Register InsReg : InsertRegs) {
Register CopyTo = I.getOperand(LaneIdx).getReg();
MachineInstr &CopyInst =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
.addUse(InsReg)
.addImm(LaneIdx);
constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
++LaneIdx;
}
const TargetRegisterClass *RC =
MRI.getRegClassOrNull(I.getOperand(1).getReg());
if (!RC) {
LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
return false;
}
RBI.constrainGenericRegister(CopyTo, *RC, MRI);
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectConcatVectors(
MachineInstr &I, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
"Unexpected opcode");
Register Dst = I.getOperand(0).getReg();
Register Op1 = I.getOperand(1).getReg();
Register Op2 = I.getOperand(2).getReg();
MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
if (!ConcatMI)
return false;
I.eraseFromParent();
return true;
}
unsigned
AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const {
Type *CPTy = CPVal->getType();
Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
MachineConstantPool *MCP = MF.getConstantPool();
return MCP->getConstantPoolIndex(CPVal, Alignment);
}
MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
auto &MF = MIRBuilder.getMF();
unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
auto Adrp =
MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
.addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
MachineInstr *LoadMI = nullptr;
MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
switch (Size) {
case 16:
LoadMI =
&*MIRBuilder
.buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
.addConstantPoolIndex(CPIdx, 0,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
break;
case 8:
LoadMI =
&*MIRBuilder
.buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
.addConstantPoolIndex(CPIdx, 0,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
break;
case 4:
LoadMI =
&*MIRBuilder
.buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
.addConstantPoolIndex(CPIdx, 0,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
break;
case 2:
LoadMI =
&*MIRBuilder
.buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
.addConstantPoolIndex(CPIdx, 0,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
break;
default:
LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType());
return nullptr;
}
LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
MachineMemOperand::MOLoad,
Size, Align(Size)));
constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
return LoadMI;
}
static std::pair<unsigned, unsigned>
getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
unsigned Opc, SubregIdx;
if (RB.getID() == AArch64::GPRRegBankID) {
if (EltSize == 16) {
Opc = AArch64::INSvi16gpr;
SubregIdx = AArch64::ssub;
} else if (EltSize == 32) {
Opc = AArch64::INSvi32gpr;
SubregIdx = AArch64::ssub;
} else if (EltSize == 64) {
Opc = AArch64::INSvi64gpr;
SubregIdx = AArch64::dsub;
} else {
llvm_unreachable("invalid elt size!");
}
} else {
if (EltSize == 8) {
Opc = AArch64::INSvi8lane;
SubregIdx = AArch64::bsub;
} else if (EltSize == 16) {
Opc = AArch64::INSvi16lane;
SubregIdx = AArch64::hsub;
} else if (EltSize == 32) {
Opc = AArch64::INSvi32lane;
SubregIdx = AArch64::ssub;
} else if (EltSize == 64) {
Opc = AArch64::INSvi64lane;
SubregIdx = AArch64::dsub;
} else {
llvm_unreachable("invalid elt size!");
}
}
return std::make_pair(Opc, SubregIdx);
}
MachineInstr *AArch64InstructionSelector::emitInstr(
unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
const ComplexRendererFns &RenderFns) const {
assert(Opcode && "Expected an opcode?");
assert(!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!");
auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
if (RenderFns)
for (auto &Fn : *RenderFns)
Fn(MI);
constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
return &*MI;
}
MachineInstr *AArch64InstructionSelector::emitAddSub(
const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
auto Ty = MRI.getType(LHS.getReg());
assert(!Ty.isVector() && "Expected a scalar or pointer?");
unsigned Size = Ty.getSizeInBits();
assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
bool Is32Bit = Size == 32;
if (auto Fns = selectArithImmed(RHS))
return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
MIRBuilder, Fns);
if (auto Fns = selectNegArithImmed(RHS))
return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
MIRBuilder, Fns);
if (auto Fns = selectArithExtendedRegister(RHS))
return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
MIRBuilder, Fns);
if (auto Fns = selectShiftedRegister(RHS))
return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
MIRBuilder, Fns);
return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
MIRBuilder);
}
MachineInstr *
AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
const std::array<std::array<unsigned, 2>, 5> OpcTable{
{{AArch64::ADDXri, AArch64::ADDWri},
{AArch64::ADDXrs, AArch64::ADDWrs},
{AArch64::ADDXrr, AArch64::ADDWrr},
{AArch64::SUBXri, AArch64::SUBWri},
{AArch64::ADDXrx, AArch64::ADDWrx}}};
return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
}
MachineInstr *
AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
const std::array<std::array<unsigned, 2>, 5> OpcTable{
{{AArch64::ADDSXri, AArch64::ADDSWri},
{AArch64::ADDSXrs, AArch64::ADDSWrs},
{AArch64::ADDSXrr, AArch64::ADDSWrr},
{AArch64::SUBSXri, AArch64::SUBSWri},
{AArch64::ADDSXrx, AArch64::ADDSWrx}}};
return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
}
MachineInstr *
AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
const std::array<std::array<unsigned, 2>, 5> OpcTable{
{{AArch64::SUBSXri, AArch64::SUBSWri},
{AArch64::SUBSXrs, AArch64::SUBSWrs},
{AArch64::SUBSXrr, AArch64::SUBSWrr},
{AArch64::ADDSXri, AArch64::ADDSWri},
{AArch64::SUBSXrx, AArch64::SUBSWrx}}};
return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
}
MachineInstr *
AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
}
MachineInstr *
AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
LLT Ty = MRI.getType(LHS.getReg());
unsigned RegSize = Ty.getSizeInBits();
bool Is32Bit = (RegSize == 32);
const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
{AArch64::ANDSXrs, AArch64::ANDSWrs},
{AArch64::ANDSXrr, AArch64::ANDSWrr}};
if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
int64_t Imm = ValAndVReg->Value.getSExtValue();
if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
return &*TstMI;
}
}
if (auto Fns = selectLogicalShiftedRegister(RHS))
return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
}
MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const {
assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
assert(Predicate.isPredicate() && "Expected predicate?");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
LLT CmpTy = MRI.getType(LHS.getReg());
assert(!CmpTy.isVector() && "Expected scalar or pointer");
unsigned Size = CmpTy.getSizeInBits();
(void)Size;
assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
return FoldCmp;
auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
return emitSUBS(Dst, LHS, RHS, MIRBuilder);
}
MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
#ifndef NDEBUG
LLT Ty = MRI.getType(Dst);
assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
"Expected a 32-bit scalar register?");
#endif
const Register ZReg = AArch64::WZR;
AArch64CC::CondCode CC1, CC2;
changeFCMPPredToAArch64CC(Pred, CC1, CC2);
auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
if (CC2 == AArch64CC::AL)
return emitCSINC(Dst, ZReg, ZReg, InvCC1,
MIRBuilder);
const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
Register Def1Reg = MRI.createVirtualRegister(RC);
Register Def2Reg = MRI.createVirtualRegister(RC);
auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
emitCSINC(Def1Reg, ZReg, ZReg, InvCC1, MIRBuilder);
emitCSINC(Def2Reg, ZReg, ZReg, InvCC2, MIRBuilder);
auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
return &*OrMI;
}
MachineInstr *
AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
MachineIRBuilder &MIRBuilder,
Optional<CmpInst::Predicate> Pred) const {
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
LLT Ty = MRI.getType(LHS);
if (Ty.isVector())
return nullptr;
unsigned OpSize = Ty.getSizeInBits();
if (OpSize != 32 && OpSize != 64)
return nullptr;
const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
auto IsEqualityPred = [](CmpInst::Predicate P) {
return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
};
if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
ShouldUseImm = true;
std::swap(LHS, RHS);
}
}
unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
{AArch64::FCMPSri, AArch64::FCMPDri}};
unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
CmpMI.setMIFlags(MachineInstr::NoFPExcept);
if (!ShouldUseImm)
CmpMI.addUse(RHS);
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
return &*CmpMI;
}
MachineInstr *AArch64InstructionSelector::emitVectorConcat(
Optional<Register> Dst, Register Op1, Register Op2,
MachineIRBuilder &MIRBuilder) const {
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
const LLT Op1Ty = MRI.getType(Op1);
const LLT Op2Ty = MRI.getType(Op2);
if (Op1Ty != Op2Ty) {
LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
return nullptr;
}
assert(Op1Ty.isVector() && "Expected a vector for vector concat");
if (Op1Ty.getSizeInBits() >= 128) {
LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
return nullptr;
}
if (Op1Ty.getSizeInBits() != 64) {
LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
return nullptr;
}
const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
const TargetRegisterClass *DstRC =
getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
MachineInstr *WidenedOp1 =
emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
MachineInstr *WidenedOp2 =
emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
if (!WidenedOp1 || !WidenedOp2) {
LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
return nullptr;
}
unsigned InsertOpc, InsSubRegIdx;
std::tie(InsertOpc, InsSubRegIdx) =
getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
if (!Dst)
Dst = MRI.createVirtualRegister(DstRC);
auto InsElt =
MIRBuilder
.buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
.addImm(1)
.addUse(WidenedOp2->getOperand(0).getReg())
.addImm(0);
constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
return &*InsElt;
}
MachineInstr *
AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
Register Src2, AArch64CC::CondCode Pred,
MachineIRBuilder &MIRBuilder) const {
auto &MRI = *MIRBuilder.getMRI();
const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
unsigned Size;
if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
Size = TRI.getRegSizeInBits(*RC);
else
Size = MRI.getType(Dst).getSizeInBits();
assert(Size <= 64 && "Expected 64 bits or less only!");
static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
unsigned Opc = OpcTable[Size == 64];
auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
return &*CSINC;
}
std::pair<MachineInstr *, AArch64CC::CondCode>
AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
switch (Opcode) {
default:
llvm_unreachable("Unexpected opcode!");
case TargetOpcode::G_SADDO:
return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
case TargetOpcode::G_UADDO:
return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
case TargetOpcode::G_SSUBO:
return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
case TargetOpcode::G_USUBO:
return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
}
}
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
bool WillNegate, MachineRegisterInfo &MRI,
unsigned Depth = 0) {
if (!MRI.hasOneNonDBGUse(Val))
return false;
MachineInstr *ValDef = MRI.getVRegDef(Val);
unsigned Opcode = ValDef->getOpcode();
if (isa<GAnyCmp>(ValDef)) {
CanNegate = true;
MustBeFirst = false;
return true;
}
if (Depth > 6)
return false;
if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
bool IsOR = Opcode == TargetOpcode::G_OR;
Register O0 = ValDef->getOperand(1).getReg();
Register O1 = ValDef->getOperand(2).getReg();
bool CanNegateL;
bool MustBeFirstL;
if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
return false;
bool CanNegateR;
bool MustBeFirstR;
if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
return false;
if (MustBeFirstL && MustBeFirstR)
return false;
if (IsOR) {
if (!CanNegateL && !CanNegateR)
return false;
CanNegate = WillNegate && CanNegateL && CanNegateR;
MustBeFirst = !CanNegate;
} else {
assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
CanNegate = false;
MustBeFirst = MustBeFirstL || MustBeFirstR;
}
return true;
}
return false;
}
MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
Register LHS, Register RHS, CmpInst::Predicate CC,
AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
MachineIRBuilder &MIB) const {
auto &MRI = *MIB.getMRI();
LLT OpTy = MRI.getType(LHS);
assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
unsigned CCmpOpc;
if (CmpInst::isIntPredicate(CC)) {
CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
} else {
switch (OpTy.getSizeInBits()) {
case 16:
CCmpOpc = AArch64::FCCMPHrr;
break;
case 32:
CCmpOpc = AArch64::FCCMPSrr;
break;
case 64:
CCmpOpc = AArch64::FCCMPDrr;
break;
default:
return nullptr;
}
}
AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
auto CCmp =
MIB.buildInstr(CCmpOpc, {}, {LHS, RHS}).addImm(NZCV).addImm(Predicate);
constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);
return &*CCmp;
}
MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
auto &MRI = *MIB.getMRI();
MachineInstr *ValDef = MRI.getVRegDef(Val);
unsigned Opcode = ValDef->getOpcode();
if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
Register LHS = Cmp->getLHSReg();
Register RHS = Cmp->getRHSReg();
CmpInst::Predicate CC = Cmp->getCond();
if (Negate)
CC = CmpInst::getInversePredicate(CC);
if (isa<GICmp>(Cmp)) {
OutCC = changeICMPPredToAArch64CC(CC);
} else {
AArch64CC::CondCode ExtraCC;
changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
if (ExtraCC != AArch64CC::AL) {
MachineInstr *ExtraCmp;
if (!CCOp)
ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
else
ExtraCmp =
emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
CCOp = ExtraCmp->getOperand(0).getReg();
Predicate = ExtraCC;
}
}
if (!CCOp) {
auto Dst = MRI.cloneVirtualRegister(LHS);
if (isa<GICmp>(Cmp))
return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
return emitFPCompare(Cmp->getOperand(2).getReg(),
Cmp->getOperand(3).getReg(), MIB);
}
return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
}
assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
bool IsOR = Opcode == TargetOpcode::G_OR;
Register LHS = ValDef->getOperand(1).getReg();
bool CanNegateL;
bool MustBeFirstL;
bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
assert(ValidL && "Valid conjunction/disjunction tree");
(void)ValidL;
Register RHS = ValDef->getOperand(2).getReg();
bool CanNegateR;
bool MustBeFirstR;
bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
assert(ValidR && "Valid conjunction/disjunction tree");
(void)ValidR;
if (MustBeFirstL) {
assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
std::swap(LHS, RHS);
std::swap(CanNegateL, CanNegateR);
std::swap(MustBeFirstL, MustBeFirstR);
}
bool NegateR;
bool NegateAfterR;
bool NegateL;
bool NegateAfterAll;
if (Opcode == TargetOpcode::G_OR) {
if (!CanNegateL) {
assert(CanNegateR && "at least one side must be negatable");
assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
assert(!Negate);
std::swap(LHS, RHS);
NegateR = false;
NegateAfterR = true;
} else {
NegateR = CanNegateR;
NegateAfterR = !CanNegateR;
}
NegateL = true;
NegateAfterAll = !Negate;
} else {
assert(Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree");
assert(!Negate && "Valid conjunction/disjunction tree");
NegateL = false;
NegateR = false;
NegateAfterR = false;
NegateAfterAll = false;
}
AArch64CC::CondCode RHSCC;
MachineInstr *CmpR =
emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
if (NegateAfterR)
RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
MachineInstr *CmpL = emitConjunctionRec(
LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
if (NegateAfterAll)
OutCC = AArch64CC::getInvertedCondCode(OutCC);
return CmpL;
}
MachineInstr *AArch64InstructionSelector::emitConjunction(
Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
bool DummyCanNegate;
bool DummyMustBeFirst;
if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
*MIB.getMRI()))
return nullptr;
return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
}
bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
MachineInstr &CondMI) {
AArch64CC::CondCode AArch64CC;
MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
if (!ConjMI)
return false;
emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
SelI.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
MachineRegisterInfo &MRI = *MIB.getMRI();
MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
Register CondDefReg = CondDef->getOperand(0).getReg();
if (!MRI.hasOneNonDBGUse(CondDefReg)) {
for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
if (CondDef == &UI)
continue;
if (UI.getOpcode() != TargetOpcode::G_SELECT)
return false;
}
}
unsigned CondOpc = CondDef->getOpcode();
if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
if (tryOptSelectConjunction(I, *CondDef))
return true;
return false;
}
AArch64CC::CondCode CondCode;
if (CondOpc == TargetOpcode::G_ICMP) {
auto Pred =
static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
CondCode = changeICMPPredToAArch64CC(Pred);
emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
CondDef->getOperand(1), MIB);
} else {
auto Pred =
static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
AArch64CC::CondCode CondCode2;
changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
if (CondCode2 != AArch64CC::AL)
return false;
if (!emitFPCompare(CondDef->getOperand(2).getReg(),
CondDef->getOperand(3).getReg(), MIB)) {
LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
return false;
}
}
emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
I.getOperand(3).getReg(), CondCode, MIB);
I.eraseFromParent();
return true;
}
MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const {
assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
"Unexpected MachineOperand");
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
if (isCMN(LHSDef, P, MRI))
return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
if (isCMN(RHSDef, P, MRI))
return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
if (!CmpInst::isUnsigned(P) && LHSDef &&
LHSDef->getOpcode() == TargetOpcode::G_AND) {
auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
if (!ValAndVReg || ValAndVReg->Value != 0)
return nullptr;
return emitTST(LHSDef->getOperand(1),
LHSDef->getOperand(2), MIRBuilder);
}
return nullptr;
}
bool AArch64InstructionSelector::selectShuffleVector(
MachineInstr &I, MachineRegisterInfo &MRI) {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Register Src1Reg = I.getOperand(1).getReg();
const LLT Src1Ty = MRI.getType(Src1Reg);
Register Src2Reg = I.getOperand(2).getReg();
const LLT Src2Ty = MRI.getType(Src2Reg);
ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
LLVMContext &Ctx = MF.getFunction().getContext();
if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
return false;
}
unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
SmallVector<Constant *, 64> CstIdxs;
for (int Val : Mask) {
Val = Val < 0 ? 0 : Val;
for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
unsigned Offset = Byte + Val * BytesPerElt;
CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
}
}
Constant *CPVal = ConstantVector::get(CstIdxs);
MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
if (!IndexLoad) {
LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
return false;
}
if (DstTy.getSizeInBits() != 128) {
assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
if (!Concat) {
LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
return false;
}
IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
IndexLoad->getOperand(0).getReg(), MIB);
auto TBL1 = MIB.buildInstr(
AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
{Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
auto Copy =
MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
.addReg(TBL1.getReg(0), 0, AArch64::dsub);
RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
I.eraseFromParent();
return true;
}
SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
auto RegSeq = createQTuple(Regs, MIB);
auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
{RegSeq, IndexLoad->getOperand(0)});
constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
MachineInstr *AArch64InstructionSelector::emitLaneInsert(
Optional<Register> DstReg, Register SrcReg, Register EltReg,
unsigned LaneIdx, const RegisterBank &RB,
MachineIRBuilder &MIRBuilder) const {
MachineInstr *InsElt = nullptr;
const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
if (!DstReg)
DstReg = MRI.createVirtualRegister(DstRC);
unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
if (RB.getID() == AArch64::FPRRegBankID) {
auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
.addImm(LaneIdx)
.addUse(InsSub->getOperand(0).getReg())
.addImm(0);
} else {
InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
.addImm(LaneIdx)
.addUse(EltReg);
}
constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
return InsElt;
}
bool AArch64InstructionSelector::selectUSMovFromExtend(
MachineInstr &MI, MachineRegisterInfo &MRI) {
if (MI.getOpcode() != TargetOpcode::G_SEXT &&
MI.getOpcode() != TargetOpcode::G_ZEXT &&
MI.getOpcode() != TargetOpcode::G_ANYEXT)
return false;
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
const Register DefReg = MI.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DefReg);
unsigned DstSize = DstTy.getSizeInBits();
if (DstSize != 32 && DstSize != 64)
return false;
MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
MI.getOperand(1).getReg(), MRI);
int64_t Lane;
if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
return false;
Register Src0 = Extract->getOperand(1).getReg();
const LLT &VecTy = MRI.getType(Src0);
if (VecTy.getSizeInBits() != 128) {
const MachineInstr *ScalarToVector = emitScalarToVector(
VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
Src0 = ScalarToVector->getOperand(0).getReg();
}
unsigned Opcode;
if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
else
llvm_unreachable("Unexpected type combo for S/UMov!");
MachineInstr *ExtI = nullptr;
if (DstSize == 64 && !IsSigned) {
Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
.addImm(0)
.addUse(NewReg)
.addImm(AArch64::sub_32);
RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
} else
ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
MI.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
Register DstReg = I.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DstReg);
unsigned VecSize = DstTy.getSizeInBits();
Register EltReg = I.getOperand(2).getReg();
const LLT EltTy = MRI.getType(EltReg);
unsigned EltSize = EltTy.getSizeInBits();
if (EltSize < 16 || EltSize > 64)
return false;
Register IdxReg = I.getOperand(3).getReg();
auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
if (!VRegAndVal)
return false;
unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
if (VecSize < 128) {
MachineInstr *ScalarToVec =
emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
if (!ScalarToVec)
return false;
SrcReg = ScalarToVec->getOperand(0).getReg();
}
MachineInstr *InsMI =
emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
if (VecSize < 128) {
Register DemoteVec = InsMI->getOperand(0).getReg();
const TargetRegisterClass *RC =
getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DemoteVec, MRI, TRI));
if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
return false;
}
unsigned SubReg = 0;
if (!getSubRegForClass(RC, TRI, SubReg))
return false;
if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
<< "\n");
return false;
}
MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
.addReg(DemoteVec, 0, SubReg);
RBI.constrainGenericRegister(DstReg, *RC, MRI);
} else {
InsMI->getOperand(0).setReg(DstReg);
constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
}
I.eraseFromParent();
return true;
}
MachineInstr *
AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI) {
LLT DstTy = MRI.getType(Dst);
unsigned DstSize = DstTy.getSizeInBits();
if (CV->isNullValue()) {
if (DstSize == 128) {
auto Mov =
MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
return &*Mov;
}
if (DstSize == 64) {
auto Mov =
MIRBuilder
.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
.addImm(0);
auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
.addReg(Mov.getReg(0), 0, AArch64::dsub);
RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
return &*Copy;
}
}
auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
if (!CPLoad) {
LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
return nullptr;
}
auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
RBI.constrainGenericRegister(
Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
return &*Copy;
}
bool AArch64InstructionSelector::tryOptConstantBuildVec(
MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
unsigned DstSize = DstTy.getSizeInBits();
assert(DstSize <= 128 && "Unexpected build_vec type!");
if (DstSize < 32)
return false;
SmallVector<Constant *, 16> Csts;
for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
auto *OpMI =
getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
if (OpMI)
Csts.emplace_back(
const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
I.getOperand(Idx).getReg(), MRI)))
Csts.emplace_back(
const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
else
return false;
}
Constant *CV = ConstantVector::get(Csts);
if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
return false;
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
MachineInstr &I, MachineRegisterInfo &MRI) {
Register Dst = I.getOperand(0).getReg();
Register EltReg = I.getOperand(1).getReg();
LLT EltTy = MRI.getType(EltReg);
const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
if (EltRB != DstRB)
return false;
if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
[&MRI](const MachineOperand &Op) {
return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
MRI);
}))
return false;
unsigned SubReg;
const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
if (!EltRC)
return false;
const TargetRegisterClass *DstRC =
getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
if (!DstRC)
return false;
if (!getSubRegForClass(EltRC, TRI, SubReg))
return false;
auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
.addImm(0)
.addUse(EltReg)
.addImm(SubReg);
I.eraseFromParent();
constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
}
bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
unsigned EltSize = EltTy.getSizeInBits();
if (tryOptConstantBuildVec(I, DstTy, MRI))
return true;
if (tryOptBuildVecToSubregToReg(I, MRI))
return true;
if (EltSize < 16 || EltSize > 64)
return false; const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
MachineInstr *ScalarToVec =
emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
I.getOperand(1).getReg(), MIB);
if (!ScalarToVec)
return false;
Register DstVec = ScalarToVec->getOperand(0).getReg();
unsigned DstSize = DstTy.getSizeInBits();
MachineInstr *PrevMI = nullptr;
for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
MIB);
DstVec = PrevMI->getOperand(0).getReg();
}
if (DstSize < 128) {
const TargetRegisterClass *RC =
getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
if (!RC)
return false;
if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
return false;
}
unsigned SubReg = 0;
if (!getSubRegForClass(RC, TRI, SubReg))
return false;
if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
<< "\n");
return false;
}
Register Reg = MRI.createVirtualRegister(RC);
Register DstReg = I.getOperand(0).getReg();
MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
MachineOperand &RegOp = I.getOperand(1);
RegOp.setReg(Reg);
RBI.constrainGenericRegister(DstReg, *RC, MRI);
} else {
assert(PrevMI && "PrevMI was null?");
PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
}
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
unsigned NumVecs,
MachineInstr &I) {
assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
assert(Opc && "Expected an opcode?");
assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
auto &MRI = *MIB.getMRI();
LLT Ty = MRI.getType(I.getOperand(0).getReg());
unsigned Size = Ty.getSizeInBits();
assert((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?");
unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
Load.cloneMemRefs(I);
constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
Register SelectedLoadDst = Load->getOperand(0).getReg();
for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
.addReg(SelectedLoadDst, 0, SubReg + Idx);
selectCopy(*Vec, TII, MRI, TRI, RBI);
}
return true;
}
bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
MachineInstr &I, MachineRegisterInfo &MRI) {
unsigned IntrinID = I.getIntrinsicID();
const LLT S8 = LLT::scalar(8);
const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
const LLT P0 = LLT::pointer(0, 64);
switch (IntrinID) {
default:
return false;
case Intrinsic::aarch64_ldxp:
case Intrinsic::aarch64_ldaxp: {
auto NewI = MIB.buildInstr(
IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
{I.getOperand(0).getReg(), I.getOperand(1).getReg()},
{I.getOperand(3)});
NewI.cloneMemRefs(I);
constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
break;
}
case Intrinsic::trap:
MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
break;
case Intrinsic::debugtrap:
MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
break;
case Intrinsic::ubsantrap:
MIB.buildInstr(AArch64::BRK, {}, {})
.addImm(I.getOperand(1).getImm() | ('U' << 8));
break;
case Intrinsic::aarch64_neon_ld2: {
LLT Ty = MRI.getType(I.getOperand(0).getReg());
unsigned Opc = 0;
if (Ty == LLT::fixed_vector(8, S8))
Opc = AArch64::LD2Twov8b;
else if (Ty == LLT::fixed_vector(16, S8))
Opc = AArch64::LD2Twov16b;
else if (Ty == LLT::fixed_vector(4, S16))
Opc = AArch64::LD2Twov4h;
else if (Ty == LLT::fixed_vector(8, S16))
Opc = AArch64::LD2Twov8h;
else if (Ty == LLT::fixed_vector(2, S32))
Opc = AArch64::LD2Twov2s;
else if (Ty == LLT::fixed_vector(4, S32))
Opc = AArch64::LD2Twov4s;
else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
Opc = AArch64::LD2Twov2d;
else if (Ty == S64 || Ty == P0)
Opc = AArch64::LD1Twov1d;
else
llvm_unreachable("Unexpected type for ld2!");
selectVectorLoadIntrinsic(Opc, 2, I);
break;
}
case Intrinsic::aarch64_neon_ld4: {
LLT Ty = MRI.getType(I.getOperand(0).getReg());
unsigned Opc = 0;
if (Ty == LLT::fixed_vector(8, S8))
Opc = AArch64::LD4Fourv8b;
else if (Ty == LLT::fixed_vector(16, S8))
Opc = AArch64::LD4Fourv16b;
else if (Ty == LLT::fixed_vector(4, S16))
Opc = AArch64::LD4Fourv4h;
else if (Ty == LLT::fixed_vector(8, S16))
Opc = AArch64::LD4Fourv8h;
else if (Ty == LLT::fixed_vector(2, S32))
Opc = AArch64::LD4Fourv2s;
else if (Ty == LLT::fixed_vector(4, S32))
Opc = AArch64::LD4Fourv4s;
else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
Opc = AArch64::LD4Fourv2d;
else if (Ty == S64 || Ty == P0)
Opc = AArch64::LD1Fourv1d;
else
llvm_unreachable("Unexpected type for ld4!");
selectVectorLoadIntrinsic(Opc, 4, I);
break;
}
case Intrinsic::aarch64_neon_st2: {
Register Src1 = I.getOperand(1).getReg();
Register Src2 = I.getOperand(2).getReg();
Register Ptr = I.getOperand(3).getReg();
LLT Ty = MRI.getType(Src1);
unsigned Opc;
if (Ty == LLT::fixed_vector(8, S8))
Opc = AArch64::ST2Twov8b;
else if (Ty == LLT::fixed_vector(16, S8))
Opc = AArch64::ST2Twov16b;
else if (Ty == LLT::fixed_vector(4, S16))
Opc = AArch64::ST2Twov4h;
else if (Ty == LLT::fixed_vector(8, S16))
Opc = AArch64::ST2Twov8h;
else if (Ty == LLT::fixed_vector(2, S32))
Opc = AArch64::ST2Twov2s;
else if (Ty == LLT::fixed_vector(4, S32))
Opc = AArch64::ST2Twov4s;
else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
Opc = AArch64::ST2Twov2d;
else if (Ty == S64 || Ty == P0)
Opc = AArch64::ST1Twov1d;
else
llvm_unreachable("Unexpected type for st2!");
SmallVector<Register, 2> Regs = {Src1, Src2};
Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
: createDTuple(Regs, MIB);
auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
Store.cloneMemRefs(I);
constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
break;
}
case Intrinsic::aarch64_mops_memset_tag: {
Register DstDef = I.getOperand(0).getReg();
Register DstUse = I.getOperand(2).getReg();
Register ValUse = I.getOperand(3).getReg();
Register SizeUse = I.getOperand(4).getReg();
Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
{DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
Memset.cloneMemRefs(I);
constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI);
break;
}
}
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
MachineRegisterInfo &MRI) {
unsigned IntrinID = I.getIntrinsicID();
switch (IntrinID) {
default:
break;
case Intrinsic::aarch64_crypto_sha1h: {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(2).getReg();
if (MRI.getType(DstReg).getSizeInBits() != 32 ||
MRI.getType(SrcReg).getSizeInBits() != 32)
return false;
if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
MIB.buildCopy({SrcReg}, {I.getOperand(2)});
RBI.constrainGenericRegister(I.getOperand(2).getReg(),
AArch64::GPR32RegClass, MRI);
}
if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
if (DstReg != I.getOperand(0).getReg()) {
MIB.buildCopy({I.getOperand(0)}, {DstReg});
RBI.constrainGenericRegister(I.getOperand(0).getReg(),
AArch64::GPR32RegClass, MRI);
}
I.eraseFromParent();
return true;
}
case Intrinsic::ptrauth_sign: {
Register DstReg = I.getOperand(0).getReg();
Register ValReg = I.getOperand(2).getReg();
uint64_t Key = I.getOperand(3).getImm();
Register DiscReg = I.getOperand(4).getReg();
auto DiscVal = getIConstantVRegVal(DiscReg, MRI);
bool IsDiscZero = DiscVal && DiscVal->isNullValue();
if (Key > 3)
return false;
unsigned Opcodes[][4] = {
{AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB},
{AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}};
unsigned Opcode = Opcodes[IsDiscZero][Key];
auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg});
if (!IsDiscZero) {
PAC.addUse(DiscReg);
RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI);
}
RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
I.eraseFromParent();
return true;
}
case Intrinsic::frameaddress:
case Intrinsic::returnaddress: {
MachineFunction &MF = *I.getParent()->getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Depth = I.getOperand(2).getImm();
Register DstReg = I.getOperand(0).getReg();
RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
if (!MFReturnAddr) {
MFI.setReturnAddressIsTaken(true);
MFReturnAddr = getFunctionLiveInPhysReg(
MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
}
if (STI.hasPAuth()) {
MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
} else {
MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
MIB.buildInstr(AArch64::XPACLRI);
MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
}
I.eraseFromParent();
return true;
}
MFI.setFrameAddressIsTaken(true);
Register FrameAddr(AArch64::FP);
while (Depth--) {
Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
auto Ldr =
MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
FrameAddr = NextFrame;
}
if (IntrinID == Intrinsic::frameaddress)
MIB.buildCopy({DstReg}, {FrameAddr});
else {
MFI.setReturnAddressIsTaken(true);
if (STI.hasPAuth()) {
Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
} else {
MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
.addImm(1);
MIB.buildInstr(AArch64::XPACLRI);
MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
}
}
I.eraseFromParent();
return true;
}
case Intrinsic::swift_async_context_addr:
auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
{Register(AArch64::FP)})
.addImm(8)
.addImm(0);
constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI);
MF->getFrameInfo().setFrameAddressIsTaken(true);
MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
I.eraseFromParent();
return true;
}
return false;
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
auto MaybeImmed = getImmedFromMO(Root);
if (MaybeImmed == None || *MaybeImmed > 31)
return None;
uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
auto MaybeImmed = getImmedFromMO(Root);
if (MaybeImmed == None || *MaybeImmed > 31)
return None;
uint64_t Enc = 31 - *MaybeImmed;
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
auto MaybeImmed = getImmedFromMO(Root);
if (MaybeImmed == None || *MaybeImmed > 63)
return None;
uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
auto MaybeImmed = getImmedFromMO(Root);
if (MaybeImmed == None || *MaybeImmed > 63)
return None;
uint64_t Enc = 63 - *MaybeImmed;
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::select12BitValueWithLeftShift(
uint64_t Immed) const {
unsigned ShiftAmt;
if (Immed >> 12 == 0) {
ShiftAmt = 0;
} else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
ShiftAmt = 12;
Immed = Immed >> 12;
} else
return None;
unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
return {{
[=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
}};
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
auto MaybeImmed = getImmedFromMO(Root);
if (MaybeImmed == None)
return None;
return select12BitValueWithLeftShift(*MaybeImmed);
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
if (!Root.isReg())
return None;
auto MaybeImmed = getImmedFromMO(Root);
if (MaybeImmed == None)
return None;
uint64_t Immed = *MaybeImmed;
if (Immed == 0)
return None;
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
Immed = ~((uint32_t)Immed) + 1;
else
Immed = ~Immed + 1ULL;
if (Immed & 0xFFFFFFFFFF000000ULL)
return None;
Immed &= 0xFFFFFFULL;
return select12BitValueWithLeftShift(Immed);
}
bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
MachineInstr &MI, const MachineRegisterInfo &MRI) const {
Register DefReg = MI.getOperand(0).getReg();
if (MRI.hasOneNonDBGUse(DefReg) ||
MI.getParent()->getParent()->getFunction().hasOptSize())
return true;
if (!STI.hasLSLFast())
return false;
return all_of(MRI.use_nodbg_instructions(DefReg),
[](MachineInstr &Use) { return Use.mayLoadOrStore(); });
}
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
switch (Type) {
case AArch64_AM::SXTB:
case AArch64_AM::SXTH:
case AArch64_AM::SXTW:
return true;
default:
return false;
}
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectExtendedSHL(
MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
unsigned SizeInBytes, bool WantsExt) const {
assert(Base.isReg() && "Expected base to be a register operand");
assert(Offset.isReg() && "Expected offset to be a register operand");
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
unsigned OffsetOpc = OffsetInst->getOpcode();
bool LookedThroughZExt = false;
if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
return None;
OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
OffsetOpc = OffsetInst->getOpcode();
LookedThroughZExt = true;
if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
return None;
}
int64_t LegalShiftVal = Log2_32(SizeInBytes);
if (LegalShiftVal == 0)
return None;
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
return None;
Register OffsetReg = OffsetInst->getOperand(1).getReg();
Register ConstantReg = OffsetInst->getOperand(2).getReg();
auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!ValAndVReg) {
if (OffsetOpc == TargetOpcode::G_SHL)
return None;
std::swap(OffsetReg, ConstantReg);
ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!ValAndVReg)
return None;
}
int64_t ImmVal = ValAndVReg->Value.getSExtValue();
if (OffsetOpc == TargetOpcode::G_MUL) {
if (!isPowerOf2_32(ImmVal))
return None;
ImmVal = Log2_32(ImmVal);
}
if ((ImmVal & 0x7) != ImmVal)
return None;
if (ImmVal != LegalShiftVal)
return None;
unsigned SignExtend = 0;
if (WantsExt) {
if (!LookedThroughZExt) {
MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
if (Ext == AArch64_AM::InvalidShiftExtend)
return None;
SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
if (SignExtend && Ext != AArch64_AM::SXTW)
return None;
OffsetReg = ExtInst->getOperand(1).getReg();
}
MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
}
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
[=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
[=](MachineInstrBuilder &MIB) {
MIB.addImm(SignExtend);
MIB.addImm(1);
}}};
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
MachineOperand &Root, unsigned SizeInBytes) const {
if (!Root.isReg())
return None;
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
MachineInstr *PtrAdd =
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
return None;
MachineInstr *OffsetInst =
getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
return selectExtendedSHL(Root, PtrAdd->getOperand(1),
OffsetInst->getOperand(0), SizeInBytes,
false);
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeRegisterOffset(
MachineOperand &Root) const {
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
return None;
if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
return None;
return {{[=](MachineInstrBuilder &MIB) {
MIB.addUse(Gep->getOperand(1).getReg());
},
[=](MachineInstrBuilder &MIB) {
MIB.addUse(Gep->getOperand(2).getReg());
},
[=](MachineInstrBuilder &MIB) {
MIB.addImm(0);
MIB.addImm(0);
}}};
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
unsigned SizeInBytes) const {
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
if (!Root.isReg())
return None;
MachineInstr *PtrAdd =
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
if (!PtrAdd)
return None;
auto ValAndVReg =
getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
if (ValAndVReg) {
unsigned Scale = Log2_32(SizeInBytes);
int64_t ImmOff = ValAndVReg->Value.getSExtValue();
if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
ImmOff < (0x1000 << Scale))
return None;
auto isPreferredADD = [](int64_t ImmOff) {
if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
return true;
if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
return false;
return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
(ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
};
if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
return None;
}
auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
if (AddrModeFns)
return AddrModeFns;
return selectAddrModeRegisterOffset(Root);
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
unsigned SizeInBytes) const {
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
MachineInstr *PtrAdd =
getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
return None;
MachineOperand &LHS = PtrAdd->getOperand(1);
MachineOperand &RHS = PtrAdd->getOperand(2);
MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
SizeInBytes, true);
if (ExtendedShl)
return ExtendedShl;
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
return None;
AArch64_AM::ShiftExtendType Ext =
getExtendTypeForInst(*OffsetInst, MRI, true);
if (Ext == AArch64_AM::InvalidShiftExtend)
return None;
MachineIRBuilder MIB(*PtrAdd);
Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
AArch64::GPR32RegClass, MIB);
unsigned SignExtend = Ext == AArch64_AM::SXTW;
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
[=](MachineInstrBuilder &MIB) {
MIB.addImm(SignExtend);
MIB.addImm(0);
}}};
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
unsigned Size) const {
MachineRegisterInfo &MRI =
Root.getParent()->getParent()->getParent()->getRegInfo();
if (!Root.isReg())
return None;
if (!isBaseWithConstantOffset(Root, MRI))
return None;
MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
MachineOperand &OffImm = RootDef->getOperand(2);
if (!OffImm.isReg())
return None;
MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
return None;
int64_t RHSC;
MachineOperand &RHSOp1 = RHS->getOperand(1);
if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
return None;
RHSC = RHSOp1.getCImm()->getSExtValue();
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
return None;
if (RHSC >= -256 && RHSC < 256) {
MachineOperand &Base = RootDef->getOperand(1);
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(Base); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
}};
}
return None;
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
unsigned Size,
MachineRegisterInfo &MRI) const {
if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
return None;
MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
if (Adrp.getOpcode() != AArch64::ADRP)
return None;
auto Offset = Adrp.getOperand(1).getOffset();
if (Offset % Size != 0)
return None;
auto GV = Adrp.getOperand(1).getGlobal();
if (GV->isThreadLocal())
return None;
auto &MF = *RootDef.getParent()->getParent();
if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
return None;
unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
MachineIRBuilder MIRBuilder(RootDef);
Register AdrpReg = Adrp.getOperand(0).getReg();
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
[=](MachineInstrBuilder &MIB) {
MIB.addGlobalAddress(GV, Offset,
OpFlags | AArch64II::MO_PAGEOFF |
AArch64II::MO_NC);
}}};
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
unsigned Size) const {
MachineFunction &MF = *Root.getParent()->getParent()->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
if (!Root.isReg())
return None;
MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
}};
}
CodeModel::Model CM = MF.getTarget().getCodeModel();
if (CM == CodeModel::Small) {
auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
if (OpFns)
return OpFns;
}
if (isBaseWithConstantOffset(Root, MRI)) {
MachineOperand &LHS = RootDef->getOperand(1);
MachineOperand &RHS = RootDef->getOperand(2);
MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
unsigned Scale = Log2_32(Size);
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
}};
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
}};
}
}
if (selectAddrModeUnscaled(Root, Size))
return None;
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(Root); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
}};
}
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return AArch64_AM::InvalidShiftExtend;
case TargetOpcode::G_SHL:
return AArch64_AM::LSL;
case TargetOpcode::G_LSHR:
return AArch64_AM::LSR;
case TargetOpcode::G_ASHR:
return AArch64_AM::ASR;
case TargetOpcode::G_ROTR:
return AArch64_AM::ROR;
}
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
bool AllowROR) const {
if (!Root.isReg())
return None;
MachineRegisterInfo &MRI =
Root.getParent()->getParent()->getParent()->getRegInfo();
MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
if (ShType == AArch64_AM::InvalidShiftExtend)
return None;
if (ShType == AArch64_AM::ROR && !AllowROR)
return None;
if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
return None;
MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
auto Immed = getImmedFromMO(ShiftRHS);
if (!Immed)
return None;
MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
Register ShiftReg = ShiftLHS.getReg();
unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
unsigned Val = *Immed & (NumBits - 1);
unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
}
AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
unsigned Opc = MI.getOpcode();
if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
unsigned Size;
if (Opc == TargetOpcode::G_SEXT)
Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
else
Size = MI.getOperand(2).getImm();
assert(Size != 64 && "Extend from 64 bits?");
switch (Size) {
case 8:
return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
case 16:
return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
case 32:
return AArch64_AM::SXTW;
default:
return AArch64_AM::InvalidShiftExtend;
}
}
if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
assert(Size != 64 && "Extend from 64 bits?");
switch (Size) {
case 8:
return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
case 16:
return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
case 32:
return AArch64_AM::UXTW;
default:
return AArch64_AM::InvalidShiftExtend;
}
}
if (Opc != TargetOpcode::G_AND)
return AArch64_AM::InvalidShiftExtend;
Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
if (!MaybeAndMask)
return AArch64_AM::InvalidShiftExtend;
uint64_t AndMask = *MaybeAndMask;
switch (AndMask) {
default:
return AArch64_AM::InvalidShiftExtend;
case 0xFF:
return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
case 0xFFFF:
return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
case 0xFFFFFFFF:
return AArch64_AM::UXTW;
}
}
Register AArch64InstructionSelector::moveScalarRegClass(
Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
MachineRegisterInfo &MRI = *MIB.getMRI();
auto Ty = MRI.getType(Reg);
assert(!Ty.isVector() && "Expected scalars only!");
if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
return Reg;
auto Copy = MIB.buildCopy({&RC}, {Reg});
selectCopy(*Copy, TII, MRI, TRI, RBI);
return Copy.getReg(0);
}
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectArithExtendedRegister(
MachineOperand &Root) const {
if (!Root.isReg())
return None;
MachineRegisterInfo &MRI =
Root.getParent()->getParent()->getParent()->getRegInfo();
uint64_t ShiftVal = 0;
Register ExtReg;
AArch64_AM::ShiftExtendType Ext;
MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
if (!RootDef)
return None;
if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
return None;
if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
MachineOperand &RHS = RootDef->getOperand(2);
Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
if (!MaybeShiftVal)
return None;
ShiftVal = *MaybeShiftVal;
if (ShiftVal > 4)
return None;
MachineOperand &LHS = RootDef->getOperand(1);
MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
if (!ExtDef)
return None;
Ext = getExtendTypeForInst(*ExtDef, MRI);
if (Ext == AArch64_AM::InvalidShiftExtend)
return None;
ExtReg = ExtDef->getOperand(1).getReg();
} else {
Ext = getExtendTypeForInst(*RootDef, MRI);
if (Ext == AArch64_AM::InvalidShiftExtend)
return None;
ExtReg = RootDef->getOperand(1).getReg();
if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
if (isDef32(*ExtInst))
return None;
}
}
MachineIRBuilder MIB(*RootDef);
ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
[=](MachineInstrBuilder &MIB) {
MIB.addImm(getArithExtendImm(Ext, ShiftVal));
}}};
}
void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
Optional<int64_t> CstVal =
getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
assert(CstVal && "Expected constant value");
MIB.addImm(*CstVal);
}
void AArch64InstructionSelector::renderLogicalImm32(
MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
MIB.addImm(Enc);
}
void AArch64InstructionSelector::renderLogicalImm64(
MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
MIB.addImm(Enc);
}
void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
"Expected G_FCONSTANT");
MIB.addImm(
AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
}
void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
"Expected G_FCONSTANT");
MIB.addImm(
AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
}
void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
"Expected G_FCONSTANT");
MIB.addImm(
AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
}
void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
"Expected G_FCONSTANT");
MIB.addImm(AArch64_AM::encodeAdvSIMDModImmType4(MI.getOperand(1)
.getFPImm()
->getValueAPF()
.bitcastToAPInt()
.getZExtValue()));
}
bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
const MachineInstr &MI, unsigned NumBytes) const {
if (!MI.mayLoadOrStore())
return false;
assert(MI.hasOneMemOperand() &&
"Expected load/store to have only one mem op!");
return (*MI.memoperands_begin())->getSize() == NumBytes;
}
bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
return false;
switch (MI.getOpcode()) {
default:
return true;
case TargetOpcode::COPY:
case TargetOpcode::G_BITCAST:
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_PHI:
return false;
}
}
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
const AArch64RegisterBankInfo &RBI) {
assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
Register DstReg = MI.getOperand(0).getReg();
const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
assert(DstRB && "Expected PHI dst to have regbank assigned");
MachineIRBuilder MIB(MI);
for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
if (!MO.isReg())
continue;
Register OpReg = MO.getReg();
const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
if (RB != DstRB) {
auto *OpDef = MRI.getVRegDef(OpReg);
const LLT &Ty = MRI.getType(OpReg);
MachineBasicBlock &OpDefBB = *OpDef->getParent();
MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
InsertPt = OpDefBB.getFirstNonPHI();
MIB.setInsertPt(*OpDef->getParent(), InsertPt);
auto Copy = MIB.buildCopy(Ty, OpReg);
MRI.setRegBank(Copy.getReg(0), *DstRB);
MO.setReg(Copy.getReg(0));
}
}
}
void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
SmallVector<MachineInstr *, 32> Phis;
for (auto &BB : MF) {
for (auto &MI : BB) {
if (MI.getOpcode() == TargetOpcode::G_PHI)
Phis.emplace_back(&MI);
}
}
for (auto *MI : Phis) {
bool HasGPROp = false, HasFPROp = false;
for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
if (!MO.isReg())
continue;
const LLT &Ty = MRI.getType(MO.getReg());
if (!Ty.isValid() || !Ty.isScalar())
break;
if (Ty.getSizeInBits() >= 32)
break;
const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
if (!RB)
break;
if (RB->getID() == AArch64::GPRRegBankID)
HasGPROp = true;
else
HasFPROp = true;
}
if (HasGPROp && HasFPROp)
fixupPHIOpBanks(*MI, MRI, RBI);
}
}
namespace llvm {
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &TM,
AArch64Subtarget &Subtarget,
AArch64RegisterBankInfo &RBI) {
return new AArch64InstructionSelector(TM, Subtarget, RBI);
}
}