#include "AArch64ExpandImm.h"
#include "AArch64InstrInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-mi-peephole-opt"
namespace {
struct AArch64MIPeepholeOpt : public MachineFunctionPass {
static char ID;
AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
}
const AArch64InstrInfo *TII;
const AArch64RegisterInfo *TRI;
MachineLoopInfo *MLI;
MachineRegisterInfo *MRI;
using OpcodePair = std::pair<unsigned, unsigned>;
template <typename T>
using SplitAndOpcFunc =
std::function<Optional<OpcodePair>(T, unsigned, T &, T &)>;
using BuildMIFunc =
std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
Register, Register, Register)>;
template <typename T>
bool splitTwoPartImm(MachineInstr &MI,
SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
MachineInstr *&SubregToRegMI);
template <typename T>
bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
template <typename T>
bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
template <typename T>
bool visitAND(unsigned Opc, MachineInstr &MI);
bool visitORR(MachineInstr &MI);
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
return "AArch64 MI Peephole Optimization pass";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
char AArch64MIPeepholeOpt::ID = 0;
}
INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
"AArch64 MI Peephole Optimization", false, false)
template <typename T>
static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
T UImm = static_cast<T>(Imm);
if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
return false;
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
if (Insn.size() == 1)
return false;
unsigned LowestBitSet = countTrailingZeros(UImm);
unsigned HighestBitSet = Log2_64(UImm);
T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
(static_cast<T>(1) << LowestBitSet);
T NewImm2 = UImm | ~NewImm1;
if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
return false;
Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
return true;
}
template <typename T>
bool AArch64MIPeepholeOpt::visitAND(
unsigned Opc, MachineInstr &MI) {
return splitTwoPartImm<T>(
MI,
[Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> {
if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
return std::make_pair(Opc, Opc);
return None;
},
[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
unsigned Imm1, Register SrcReg, Register NewTmpReg,
Register NewDstReg) {
DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
.addReg(SrcReg)
.addImm(Imm0);
BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
.addReg(NewTmpReg)
.addImm(Imm1);
});
}
bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
if (MI.getOperand(3).getImm() != 0)
return false;
if (MI.getOperand(1).getReg() != AArch64::WZR)
return false;
MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
if (!SrcMI)
return false;
if (SrcMI->getOpcode() == TargetOpcode::COPY &&
SrcMI->getOperand(1).getReg().isVirtual()) {
const TargetRegisterClass *RC =
MRI->getRegClass(SrcMI->getOperand(1).getReg());
if (RC != &AArch64::FPR32RegClass &&
((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) ||
SrcMI->getOperand(1).getSubReg() != AArch64::ssub))
return false;
Register CpySrc = SrcMI->getOperand(1).getReg();
if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) {
CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass);
BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
TII->get(TargetOpcode::COPY), CpySrc)
.add(SrcMI->getOperand(1));
}
BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg())
.addReg(CpySrc);
SrcMI->eraseFromParent();
}
else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
return false;
Register DefReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(2).getReg();
MRI->replaceRegWith(DefReg, SrcReg);
MRI->clearKillFlags(SrcReg);
LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
MI.eraseFromParent();
return true;
}
template <typename T>
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
(Imm & ~static_cast<T>(0xffffff)) != 0)
return false;
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
if (Insn.size() == 1)
return false;
Imm0 = (Imm >> 12) & 0xfff;
Imm1 = Imm & 0xfff;
return true;
}
template <typename T>
bool AArch64MIPeepholeOpt::visitADDSUB(
unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) {
return splitTwoPartImm<T>(
MI,
[PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
T &Imm1) -> Optional<OpcodePair> {
if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
return std::make_pair(PosOpc, PosOpc);
if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
return std::make_pair(NegOpc, NegOpc);
return None;
},
[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
unsigned Imm1, Register SrcReg, Register NewTmpReg,
Register NewDstReg) {
DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
.addReg(SrcReg)
.addImm(Imm0)
.addImm(12);
BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
.addReg(NewTmpReg)
.addImm(Imm1)
.addImm(0);
});
}
template <typename T>
bool AArch64MIPeepholeOpt::visitADDSSUBS(
OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) {
return splitTwoPartImm<T>(
MI,
[PosOpcs, NegOpcs, &MI, &TRI = TRI, &MRI = MRI](
T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> {
OpcodePair OP;
if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
OP = PosOpcs;
else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
OP = NegOpcs;
else
return None;
MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
Optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
return None;
return OP;
},
[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
unsigned Imm1, Register SrcReg, Register NewTmpReg,
Register NewDstReg) {
DebugLoc DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
.addReg(SrcReg)
.addImm(Imm0)
.addImm(12);
BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
.addReg(NewTmpReg)
.addImm(Imm1)
.addImm(0);
});
}
bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
MachineInstr *&MovMI,
MachineInstr *&SubregToRegMI) {
MachineBasicBlock *MBB = MI.getParent();
MachineLoop *L = MLI->getLoopFor(MBB);
if (L && !L->isLoopInvariant(MI))
return false;
MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
if (!MovMI)
return false;
SubregToRegMI = nullptr;
if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
SubregToRegMI = MovMI;
MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
if (!MovMI)
return false;
}
if (MovMI->getOpcode() != AArch64::MOVi32imm &&
MovMI->getOpcode() != AArch64::MOVi64imm)
return false;
if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
return false;
if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
return false;
return true;
}
template <typename T>
bool AArch64MIPeepholeOpt::splitTwoPartImm(
MachineInstr &MI,
SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
unsigned RegSize = sizeof(T) * 8;
assert((RegSize == 32 || RegSize == 64) &&
"Invalid RegSize for legal immediate peephole optimization");
MachineInstr *MovMI, *SubregToRegMI;
if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
return false;
T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
if (SubregToRegMI)
Imm &= 0xFFFFFFFF;
OpcodePair Opcode;
if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
Opcode = *R;
else
return false;
MachineFunction *MF = MI.getMF();
const TargetRegisterClass *FirstInstrDstRC =
TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);
const TargetRegisterClass *FirstInstrOperandRC =
TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);
const TargetRegisterClass *SecondInstrDstRC =
(Opcode.first == Opcode.second)
? FirstInstrDstRC
: TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);
const TargetRegisterClass *SecondInstrOperandRC =
(Opcode.first == Opcode.second)
? FirstInstrOperandRC
: TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
Register NewDstReg = DstReg.isVirtual()
? MRI->createVirtualRegister(SecondInstrDstRC)
: DstReg;
MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
if (DstReg != NewDstReg)
MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
if (DstReg != NewDstReg) {
MRI->replaceRegWith(DstReg, NewDstReg);
MI.getOperand(0).setReg(DstReg);
}
MI.eraseFromParent();
if (SubregToRegMI)
SubregToRegMI->eraseFromParent();
MovMI->eraseFromParent();
return true;
}
bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
TRI = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
MLI = &getAnalysis<MachineLoopInfo>();
MRI = &MF.getRegInfo();
assert(MRI->isSSA() && "Expected to be run on SSA form!");
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : make_early_inc_range(MBB)) {
switch (MI.getOpcode()) {
default:
break;
case AArch64::ANDWrr:
Changed = visitAND<uint32_t>(AArch64::ANDWri, MI);
break;
case AArch64::ANDXrr:
Changed = visitAND<uint64_t>(AArch64::ANDXri, MI);
break;
case AArch64::ORRWrs:
Changed = visitORR(MI);
break;
case AArch64::ADDWrr:
Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);
break;
case AArch64::SUBWrr:
Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);
break;
case AArch64::ADDXrr:
Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);
break;
case AArch64::SUBXrr:
Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);
break;
case AArch64::ADDSWrr:
Changed = visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
{AArch64::SUBWri, AArch64::SUBSWri},
MI);
break;
case AArch64::SUBSWrr:
Changed = visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
{AArch64::ADDWri, AArch64::ADDSWri},
MI);
break;
case AArch64::ADDSXrr:
Changed = visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
{AArch64::SUBXri, AArch64::SUBSXri},
MI);
break;
case AArch64::SUBSXrr:
Changed = visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
{AArch64::ADDXri, AArch64::ADDSXri},
MI);
break;
}
}
}
return Changed;
}
FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
return new AArch64MIPeepholeOpt();
}