#include "AArch64GlobalISelUtils.h"
#include "AArch64TargetMachine.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
using namespace llvm;
using namespace MIPatternMatch;
static bool matchFConstantToConstant(MachineInstr &MI,
MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
Register DstReg = MI.getOperand(0).getReg();
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
if (DstSize != 32 && DstSize != 64)
return false;
return all_of(MRI.use_nodbg_instructions(DstReg),
[](const MachineInstr &Use) { return Use.mayStore(); });
}
static void applyFConstantToConstant(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
MachineIRBuilder MIB(MI);
const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());
MI.eraseFromParent();
}
static bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
GISelKnownBits *KB, Register &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_ICMP && KB);
auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
if (!ICmpInst::isEquality(Pred))
return false;
Register LHS = MI.getOperand(2).getReg();
LLT LHSTy = MRI.getType(LHS);
if (!LHSTy.isScalar())
return false;
Register RHS = MI.getOperand(3).getReg();
Register WideReg;
if (!mi_match(LHS, MRI, m_GTrunc(m_Reg(WideReg))) ||
!mi_match(RHS, MRI, m_SpecificICst(0)))
return false;
LLT WideTy = MRI.getType(WideReg);
if (KB->computeNumSignBits(WideReg) <=
WideTy.getSizeInBits() - LHSTy.getSizeInBits())
return false;
MatchInfo = WideReg;
return true;
}
static bool applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &Builder,
GISelChangeObserver &Observer,
Register &WideReg) {
assert(MI.getOpcode() == TargetOpcode::G_ICMP);
LLT WideTy = MRI.getType(WideReg);
Builder.setInstrAndDebugLoc(MI);
auto WideZero = Builder.buildConstant(WideTy, 0);
Observer.changingInstr(MI);
MI.getOperand(2).setReg(WideReg);
MI.getOperand(3).setReg(WideZero.getReg(0));
Observer.changedInstr(MI);
return true;
}
static bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
std::pair<uint64_t, uint64_t> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
MachineFunction &MF = *MI.getMF();
auto &GlobalOp = MI.getOperand(1);
auto *GV = GlobalOp.getGlobal();
if (GV->isThreadLocal())
return false;
if (MF.getSubtarget<AArch64Subtarget>().ClassifyGlobalReference(
GV, MF.getTarget()) != AArch64II::MO_NO_FLAG)
return false;
Register Dst = MI.getOperand(0).getReg();
uint64_t MinOffset = -1ull;
for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) {
if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
return false;
auto Cst = getIConstantVRegValWithLookThrough(
UseInstr.getOperand(2).getReg(), MRI);
if (!Cst)
return false;
MinOffset = std::min(MinOffset, Cst->Value.getZExtValue());
}
uint64_t CurrOffset = GlobalOp.getOffset();
uint64_t NewOffset = MinOffset + CurrOffset;
if (NewOffset <= CurrOffset)
return false;
if (NewOffset >= (1 << 20))
return false;
Type *T = GV->getValueType();
if (!T->isSized() ||
NewOffset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
return false;
MatchInfo = std::make_pair(NewOffset, MinOffset);
return true;
}
static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B,
GISelChangeObserver &Observer,
std::pair<uint64_t, uint64_t> &MatchInfo) {
uint64_t Offset, MinOffset;
std::tie(Offset, MinOffset) = MatchInfo;
B.setInstrAndDebugLoc(MI);
Observer.changingInstr(MI);
auto &GlobalOp = MI.getOperand(1);
auto *GV = GlobalOp.getGlobal();
GlobalOp.ChangeToGA(GV, Offset, GlobalOp.getTargetFlags());
Register Dst = MI.getOperand(0).getReg();
Register NewGVDst = MRI.cloneVirtualRegister(Dst);
MI.getOperand(0).setReg(NewGVDst);
Observer.changedInstr(MI);
B.buildPtrAdd(
Dst, NewGVDst,
B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset)));
return true;
}
static bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B,
CombinerHelper &Helper,
GISelChangeObserver &Observer) {
auto &MRI = *B.getMRI();
MachineOperand *DefOp0 = MRI.getOneDef(MI.getOperand(2).getReg());
MachineOperand *DefOp1 = MRI.getOneDef(MI.getOperand(3).getReg());
Register Op0Wide;
Register Op1Wide;
if (!mi_match(DefOp0->getParent(), MRI, m_GTrunc(m_Reg(Op0Wide))) ||
!mi_match(DefOp1->getParent(), MRI, m_GTrunc(m_Reg(Op1Wide))))
return false;
LLT WideTy0 = MRI.getType(Op0Wide);
LLT WideTy1 = MRI.getType(Op1Wide);
Register ResVal = MI.getOperand(0).getReg();
LLT OpTy = MRI.getType(ResVal);
MachineInstr *Op0WideDef = MRI.getVRegDef(Op0Wide);
MachineInstr *Op1WideDef = MRI.getVRegDef(Op1Wide);
unsigned OpTySize = OpTy.getScalarSizeInBits();
if (Op0WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
Op1WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
OpTySize != Op0WideDef->getOperand(2).getImm() ||
OpTySize != Op1WideDef->getOperand(2).getImm())
return false;
if (!WideTy0.isScalar() || !WideTy1.isScalar() || WideTy0 != WideTy1 ||
OpTySize >= WideTy0.getScalarSizeInBits() ||
(OpTySize != 8 && OpTySize != 16))
return false;
Register ResStatus = MI.getOperand(1).getReg();
if (!MRI.hasOneNonDBGUse(ResStatus))
return false;
MachineInstr *CondUser = &*MRI.use_instr_nodbg_begin(ResStatus);
if (CondUser->getOpcode() != TargetOpcode::G_BRCOND)
return false;
MachineBasicBlock *CurrentMBB = MI.getParent();
MachineBasicBlock *FailMBB = CondUser->getOperand(1).getMBB();
if (!FailMBB->succ_empty() || CondUser->getParent() != CurrentMBB)
return false;
if (any_of(MRI.use_nodbg_instructions(ResVal),
[&MI, FailMBB, CurrentMBB](MachineInstr &I) {
return &MI != &I &&
(I.getParent() == FailMBB || I.getParent() == CurrentMBB);
}))
return false;
B.setInstrAndDebugLoc(*MI.getNextNode());
MI.eraseFromParent();
Register AddDst = MRI.cloneVirtualRegister(Op0Wide);
B.buildInstr(TargetOpcode::G_ADD, {AddDst}, {Op0Wide, Op1Wide});
Register CondBit = MRI.cloneVirtualRegister(Op0Wide);
B.buildAnd(
CondBit, AddDst,
B.buildConstant(LLT::scalar(32), OpTySize == 8 ? 1 << 8 : 1 << 16));
B.buildICmp(CmpInst::ICMP_NE, ResStatus, CondBit,
B.buildConstant(LLT::scalar(32), 0));
B.buildZExtOrTrunc(ResVal, AddDst);
for (MachineOperand &U : make_early_inc_range(MRI.use_operands(ResVal))) {
Register WideReg;
if (mi_match(U.getParent(), MRI, m_GZExt(m_Reg(WideReg)))) {
auto OldR = U.getParent()->getOperand(0).getReg();
Observer.erasingInstr(*U.getParent());
U.getParent()->eraseFromParent();
Helper.replaceRegWith(MRI, OldR, AddDst);
}
}
return true;
}
class AArch64PreLegalizerCombinerHelperState {
protected:
CombinerHelper &Helper;
public:
AArch64PreLegalizerCombinerHelperState(CombinerHelper &Helper)
: Helper(Helper) {}
};
#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPreLegalizeGICombiner.inc"
#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
namespace {
#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
#include "AArch64GenPreLegalizeGICombiner.inc"
#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
class AArch64PreLegalizerCombinerInfo : public CombinerInfo {
GISelKnownBits *KB;
MachineDominatorTree *MDT;
AArch64GenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
public:
AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
GISelKnownBits *KB, MachineDominatorTree *MDT)
: CombinerInfo( true, false,
nullptr, EnableOpt, OptSize, MinSize),
KB(KB), MDT(MDT) {
if (!GeneratedRuleCfg.parseCommandLineOption())
report_fatal_error("Invalid rule identifier");
}
bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
MachineIRBuilder &B) const override;
};
bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
CombinerHelper Helper(Observer, B, KB, MDT);
AArch64GenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper);
if (Generated.tryCombineAll(Observer, MI, B))
return true;
unsigned Opc = MI.getOpcode();
switch (Opc) {
case TargetOpcode::G_CONCAT_VECTORS:
return Helper.tryCombineConcatVectors(MI);
case TargetOpcode::G_SHUFFLE_VECTOR:
return Helper.tryCombineShuffleVector(MI);
case TargetOpcode::G_UADDO:
return tryToSimplifyUADDO(MI, B, Helper, Observer);
case TargetOpcode::G_MEMCPY_INLINE:
return Helper.tryEmitMemcpyInline(MI);
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMMOVE:
case TargetOpcode::G_MEMSET: {
unsigned MaxLen = EnableOpt ? 0 : 32;
if (Helper.tryCombineMemCpyFamily(MI, MaxLen))
return true;
if (Opc == TargetOpcode::G_MEMSET)
return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, EnableMinSize);
return false;
}
}
return false;
}
#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
#include "AArch64GenPreLegalizeGICombiner.inc"
#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
class AArch64PreLegalizerCombiner : public MachineFunctionPass {
public:
static char ID;
AArch64PreLegalizerCombiner();
StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; }
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
};
}
void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
AU.setPreservesCFG();
getSelectionDAGFallbackAnalysisUsage(AU);
AU.addRequired<GISelKnownBitsAnalysis>();
AU.addPreserved<GISelKnownBitsAnalysis>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<GISelCSEAnalysisWrapperPass>();
AU.addPreserved<GISelCSEAnalysisWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner()
: MachineFunctionPass(ID) {
initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
}
bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
auto &TPC = getAnalysis<TargetPassConfig>();
GISelCSEAnalysisWrapper &Wrapper =
getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
auto *CSEInfo = &Wrapper.get(TPC.getCSEConfig());
const Function &F = MF.getFunction();
bool EnableOpt =
MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
F.hasMinSize(), KB, MDT);
Combiner C(PCInfo, &TPC);
return C.combineMachineInstrs(MF, CSEInfo);
}
char AArch64PreLegalizerCombiner::ID = 0;
INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner, DEBUG_TYPE,
"Combine AArch64 machine instrs before legalization",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
"Combine AArch64 machine instrs before legalization", false,
false)
namespace llvm {
FunctionPass *createAArch64PreLegalizerCombiner() {
return new AArch64PreLegalizerCombiner();
}
}