#include "ARM.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MVETailPredUtils.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include <cassert>
using namespace llvm;
#define DEBUG_TYPE "arm-mve-vpt-opts"
static cl::opt<bool>
MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
cl::desc("Enable merging Loop End and Dec instructions."),
cl::init(true));
static cl::opt<bool>
SetLRPredicate("arm-set-lr-predicate", cl::Hidden,
cl::desc("Enable setting lr as a predicate in tail predication regions."),
cl::init(true));
namespace {
class MVETPAndVPTOptimisations : public MachineFunctionPass {
public:
static char ID;
const Thumb2InstrInfo *TII;
MachineRegisterInfo *MRI;
MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &Fn) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
StringRef getPassName() const override {
return "ARM MVE TailPred and VPT Optimisation Pass";
}
private:
bool LowerWhileLoopStart(MachineLoop *ML);
bool MergeLoopEnd(MachineLoop *ML);
bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
MachineInstr &Instr,
MachineOperand &User,
Register Target);
bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
bool ConvertVPSEL(MachineBasicBlock &MBB);
bool HintDoLoopStartReg(MachineBasicBlock &MBB);
MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
MachineInstr *LoopStart);
};
char MVETPAndVPTOptimisations::ID = 0;
}
INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
"ARM MVE TailPred and VPT Optimisations pass", false,
false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
"ARM MVE TailPred and VPT Optimisations pass", false, false)
static MachineInstr *LookThroughCOPY(MachineInstr *MI,
MachineRegisterInfo *MRI) {
while (MI && MI->getOpcode() == TargetOpcode::COPY &&
MI->getOperand(1).getReg().isVirtual())
MI = MRI->getVRegDef(MI->getOperand(1).getReg());
return MI;
}
static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
MachineBasicBlock *Header = ML->getHeader();
MachineBasicBlock *Latch = ML->getLoopLatch();
if (!Header || !Latch) {
LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n");
return false;
}
LoopEnd = nullptr;
for (auto &T : Latch->terminators()) {
if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
LoopEnd = &T;
break;
}
if (T.getOpcode() == ARM::t2LoopEndDec &&
T.getOperand(2).getMBB() == Header) {
LoopEnd = &T;
break;
}
}
if (!LoopEnd) {
LLVM_DEBUG(dbgs() << " no LoopEnd\n");
return false;
}
LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd);
if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
LoopDec = LoopEnd;
else {
LoopDec =
LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n");
return false;
}
}
LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec);
LoopPhi =
LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
LoopPhi->getNumOperands() != 5 ||
(LoopPhi->getOperand(2).getMBB() != Latch &&
LoopPhi->getOperand(4).getMBB() != Latch)) {
LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n");
return false;
}
LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi);
Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
? LoopPhi->getOperand(3).getReg()
: LoopPhi->getOperand(1).getReg();
LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n");
return false;
}
LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart);
return true;
}
static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) {
MachineBasicBlock *MBB = MI->getParent();
assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
"Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
MachineInstrBuilder MIB =
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
MIB.add(MI->getOperand(0));
MIB.add(MI->getOperand(1));
MIB.addImm(0);
MIB.addImm(ARMCC::AL);
MIB.addReg(ARM::NoRegister);
MIB.addReg(ARM::CPSR, RegState::Define);
for (MachineInstr &I : MBB->terminators()) {
if (I.getOpcode() == ARM::t2WhileLoopStart) {
MachineInstrBuilder MIB =
BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
MIB.add(MI->getOperand(1)); MIB.addImm(ARMCC::EQ);
MIB.addReg(ARM::CPSR);
I.eraseFromParent();
break;
}
}
MI->eraseFromParent();
}
bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
<< ML->getHeader()->getName() << "\n");
MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
return false;
if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
return false;
Register LR = LoopStart->getOperand(0).getReg();
auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
return MI.getOpcode() == ARM::t2WhileLoopStart;
});
if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
RevertWhileLoopSetup(LoopStart, TII);
RevertLoopDec(LoopStart, TII);
RevertLoopEnd(LoopStart, TII);
return true;
}
MachineInstrBuilder MI =
BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
TII->get(ARM::t2WhileLoopStartLR), LR)
.add(LoopStart->getOperand(1))
.add(WLSIt->getOperand(1));
(void)MI;
LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
WLSIt->eraseFromParent();
LoopStart->eraseFromParent();
return true;
}
static bool IsInvalidTPInstruction(MachineInstr &MI) {
return MI.isCall() || isLoopStart(MI);
}
MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
SmallVector<MachineBasicBlock *> Worklist;
SmallPtrSet<MachineBasicBlock *, 4> Visited;
Worklist.push_back(PreHeader);
Visited.insert(LoopStart->getParent());
while (!Worklist.empty()) {
MachineBasicBlock *MBB = Worklist.pop_back_val();
if (Visited.count(MBB))
continue;
for (MachineInstr &MI : *MBB) {
if (!IsInvalidTPInstruction(MI))
continue;
LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
MachineInstrBuilder MIB =
BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
MIB.add(LoopStart->getOperand(0));
MIB.add(LoopStart->getOperand(1));
LoopStart->getOperand(1).setIsKill(false);
RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
return MIB;
}
Visited.insert(MBB);
for (auto *Pred : MBB->predecessors())
Worklist.push_back(Pred);
}
return LoopStart;
}
bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
if (!MergeEndDec)
return false;
LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
<< "\n");
MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
return false;
auto *PreHeader = ML->getLoopPreheader();
if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
for (MachineBasicBlock *MBB : ML->blocks()) {
for (MachineInstr &MI : *MBB) {
if (IsInvalidTPInstruction(MI)) {
LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
RevertDoLoopStart(LoopStart, TII);
else
RevertWhileLoopStartLR(LoopStart, TII);
RevertLoopDec(LoopDec, TII);
RevertLoopEnd(LoopEnd, TII);
return true;
}
}
}
Register PhiReg = LoopPhi->getOperand(0).getReg();
Register DecReg = LoopDec->getOperand(0).getReg();
Register StartReg = LoopStart->getOperand(0).getReg();
SmallVector<MachineInstr *, 4> Copies;
auto CheckUsers = [&Copies](Register BaseReg,
ArrayRef<MachineInstr *> ExpectedUsers,
MachineRegisterInfo *MRI) {
SmallVector<Register, 4> Worklist;
Worklist.push_back(BaseReg);
while (!Worklist.empty()) {
Register Reg = Worklist.pop_back_val();
for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
if (llvm::is_contained(ExpectedUsers, &MI))
continue;
if (MI.getOpcode() != TargetOpcode::COPY ||
!MI.getOperand(0).getReg().isVirtual()) {
LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
return false;
}
Worklist.push_back(MI.getOperand(0).getReg());
Copies.push_back(&MI);
}
}
return true;
};
if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
!CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
!CheckUsers(StartReg, {LoopPhi}, MRI)) {
if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
RevertWhileLoopStartLR(LoopStart, TII);
RevertLoopDec(LoopDec, TII);
RevertLoopEnd(LoopEnd, TII);
return true;
}
return false;
}
MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
LoopPhi->getOperand(3).setReg(StartReg);
LoopPhi->getOperand(1).setReg(DecReg);
} else {
LoopPhi->getOperand(1).setReg(StartReg);
LoopPhi->getOperand(3).setReg(DecReg);
}
SmallVector<MachineOperand, 4> Cond; MachineBasicBlock *TBB = nullptr, *FBB = nullptr; if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) {
MachineFunction::iterator MBBI = ++LoopEnd->getParent()->getIterator();
BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B))
.addMBB(&*MBBI)
.add(predOps(ARMCC::AL));
}
MachineInstrBuilder MI =
BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
TII->get(ARM::t2LoopEndDec), DecReg)
.addReg(PhiReg)
.add(LoopEnd->getOperand(1));
(void)MI;
LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
LoopDec->eraseFromParent();
LoopEnd->eraseFromParent();
for (auto *MI : Copies)
MI->eraseFromParent();
return true;
}
bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
MachineDominatorTree *DT) {
LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
<< ML->getHeader()->getName() << "\n");
MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
return false;
if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
LoopStart->getOpcode() != ARM::t2WhileLoopStartLR))
return false;
SmallVector<MachineInstr *, 4> VCTPs;
SmallVector<MachineInstr *, 4> MVEInstrs;
for (MachineBasicBlock *BB : ML->blocks()) {
for (MachineInstr &MI : *BB)
if (isVCTP(&MI))
VCTPs.push_back(&MI);
else if (findFirstVPTPredOperandIdx(MI) != -1)
MVEInstrs.push_back(&MI);
}
if (VCTPs.empty()) {
LLVM_DEBUG(dbgs() << " no VCTPs\n");
return false;
}
MachineInstr *FirstVCTP = *VCTPs.begin();
for (MachineInstr *VCTP : VCTPs) {
LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP);
if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
LLVM_DEBUG(dbgs() << " VCTP's are not identical\n");
return false;
}
}
Register CountReg = FirstVCTP->getOperand(1).getReg();
if (!CountReg.isVirtual()) {
LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n");
return false;
}
MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
Phi->getNumOperands() != 5 ||
(Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n");
return false;
}
CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
? Phi->getOperand(3).getReg()
: Phi->getOperand(1).getReg();
MachineBasicBlock *MBB = LoopStart->getParent();
MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator();
for (MachineInstr &Use :
MRI->use_instructions(LoopStart->getOperand(0).getReg()))
if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
!DT->dominates(ML->getHeader(), Use.getParent())) {
LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n");
return false;
}
unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart
? ARM::t2DoLoopStartTP
: ARM::t2WhileLoopStartTP;
MachineInstrBuilder MI =
BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc))
.add(LoopStart->getOperand(0))
.add(LoopStart->getOperand(1))
.addReg(CountReg);
if (NewOpc == ARM::t2WhileLoopStartTP)
MI.add(LoopStart->getOperand(2));
LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with "
<< *MI.getInstr());
MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
LoopStart->eraseFromParent();
if (SetLRPredicate) {
Register LR = LoopPhi->getOperand(0).getReg();
for (MachineInstr *MI : MVEInstrs) {
int Idx = findFirstVPTPredOperandIdx(*MI);
MI->getOperand(Idx + 2).setReg(LR);
}
}
return true;
}
static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
static bool CanHaveSwappedOperands(unsigned Opcode) {
switch (Opcode) {
default:
return true;
case ARM::MVE_VCMPf32:
case ARM::MVE_VCMPf16:
case ARM::MVE_VCMPf32r:
case ARM::MVE_VCMPf16r:
case ARM::MVE_VCMPi8r:
case ARM::MVE_VCMPi16r:
case ARM::MVE_VCMPi32r:
case ARM::MVE_VCMPu8r:
case ARM::MVE_VCMPu16r:
case ARM::MVE_VCMPu32r:
case ARM::MVE_VCMPs8r:
case ARM::MVE_VCMPs16r:
case ARM::MVE_VCMPs32r:
return false;
}
}
static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) {
assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
return ARMCC::CondCodes(Instr.getOperand(3).getImm());
}
static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) {
assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
if (Cond.getOpcode() != Prev.getOpcode())
return false;
MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
if (ExpectedCode == GetCondCode(Prev))
if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
return true;
if (!CanHaveSwappedOperands(Cond.getOpcode()))
return false;
ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
CondOP2.isIdenticalTo(PrevOP1);
}
static bool IsWritingToVCCR(MachineInstr &Instr) {
if (Instr.getNumOperands() == 0)
return false;
MachineOperand &Dst = Instr.getOperand(0);
if (!Dst.isReg())
return false;
Register DstReg = Dst.getReg();
if (!DstReg.isVirtual())
return false;
MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
}
MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User,
Register Target) {
Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
MachineInstrBuilder MIBuilder =
BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
.addDef(NewResult)
.addReg(Target);
addUnpredicatedMveVpredNOp(MIBuilder);
User.setReg(NewResult);
User.setIsKill(false);
LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): ";
MIBuilder.getInstr()->dump());
return *MIBuilder.getInstr();
}
static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Iter,
Register Reg) {
assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
assert(getVPTInstrPredicate(*Iter) == ARMVCC::None &&
"The VPNOT cannot be predicated");
MachineInstr &VPNOT = *Iter;
Register VPNOTResult = VPNOT.getOperand(0).getReg();
Register VPNOTOperand = VPNOT.getOperand(1).getReg();
bool MustMove = false, HasUser = false;
MachineOperand *VPNOTOperandKiller = nullptr;
for (; Iter != MBB.end(); ++Iter) {
if (MachineOperand *MO =
Iter->findRegisterUseOperand(VPNOTOperand, true)) {
VPNOTOperandKiller = MO;
}
if (Iter->findRegisterUseOperandIdx(Reg) != -1) {
MustMove = true;
continue;
}
if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1)
continue;
HasUser = true;
if (!MustMove)
break;
LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: ";
Iter->dump());
MBB.splice(Iter, &MBB, VPNOT.getIterator());
if (VPNOTOperandKiller)
VPNOTOperandKiller->setIsKill(false);
break;
}
return HasUser;
}
bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
SmallVector<MachineInstr *, 4> DeadInstructions;
bool Modified = false;
while (Iter != End) {
Register VCCRValue, OppositeVCCRValue;
for (; Iter != End; ++Iter) {
if (!IsWritingToVCCR(*Iter) ||
getVPTInstrPredicate(*Iter) != ARMVCC::None)
continue;
Register Dst = Iter->getOperand(0).getReg();
if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
Iter->findRegisterUseOperandIdx(VCCRValue) != -1) {
if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
continue;
OppositeVCCRValue = Dst;
++Iter;
break;
}
VCCRValue = Dst;
}
if (Iter == End)
break;
assert(VCCRValue && OppositeVCCRValue &&
"VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
"stopped before the end of the block!");
assert(VCCRValue != OppositeVCCRValue &&
"VCCRValue should not be equal to OppositeVCCRValue!");
Register LastVPNOTResult = OppositeVCCRValue;
for (; Iter != End; ++Iter) {
bool IsInteresting = false;
if (MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) {
IsInteresting = true;
if (Iter->getOpcode() == ARM::MVE_VPNOT) {
Register Result = Iter->getOperand(0).getReg();
MRI->replaceRegWith(Result, LastVPNOTResult);
DeadInstructions.push_back(&*Iter);
Modified = true;
LLVM_DEBUG(dbgs()
<< "Replacing all uses of '" << printReg(Result)
<< "' with '" << printReg(LastVPNOTResult) << "'\n");
} else {
MachineInstr &VPNOT =
ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
Modified = true;
LastVPNOTResult = VPNOT.getOperand(0).getReg();
std::swap(VCCRValue, OppositeVCCRValue);
LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
<< "' with '" << printReg(LastVPNOTResult)
<< "' in instr: " << *Iter);
}
} else {
if (MachineOperand *MO =
Iter->findRegisterUseOperand(OppositeVCCRValue)) {
IsInteresting = true;
if (LastVPNOTResult != OppositeVCCRValue) {
LLVM_DEBUG(dbgs() << "Replacing usage of '"
<< printReg(OppositeVCCRValue) << "' with '"
<< printReg(LastVPNOTResult) << " for instr: ";
Iter->dump());
MO->setReg(LastVPNOTResult);
Modified = true;
}
MO->setIsKill(false);
}
if (Iter->getOpcode() == ARM::MVE_VPNOT &&
getVPTInstrPredicate(*Iter) == ARMVCC::None) {
Register VPNOTOperand = Iter->getOperand(1).getReg();
if (VPNOTOperand == LastVPNOTResult ||
VPNOTOperand == OppositeVCCRValue) {
IsInteresting = true;
std::swap(VCCRValue, OppositeVCCRValue);
LastVPNOTResult = Iter->getOperand(0).getReg();
}
}
}
if (!IsInteresting && IsWritingToVCCR(*Iter))
break;
}
}
for (MachineInstr *DeadInstruction : DeadInstructions)
DeadInstruction->eraseFromParent();
return Modified;
}
bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
SmallVector<MachineInstr *, 4> DeadInstructions;
MachineInstr *PrevVCMP = nullptr;
MachineOperand *PrevVCMPResultKiller = nullptr;
for (MachineInstr &Instr : MBB.instrs()) {
if (PrevVCMP) {
if (MachineOperand *MO = Instr.findRegisterUseOperand(
PrevVCMP->getOperand(0).getReg(), true)) {
PrevVCMPResultKiller = MO;
}
}
if (getVPTInstrPredicate(Instr) != ARMVCC::None)
continue;
if (!IsVCMP(Instr.getOpcode())) {
if (IsWritingToVCCR(Instr))
PrevVCMP = nullptr;
continue;
}
if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
PrevVCMP = &Instr;
continue;
}
Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
MachineInstrBuilder MIBuilder =
BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
.add(Instr.getOperand(0))
.addReg(PrevVCMPResultReg);
addUnpredicatedMveVpredNOp(MIBuilder);
LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: ";
Instr.dump());
if (PrevVCMPResultKiller)
PrevVCMPResultKiller->setIsKill(false);
DeadInstructions.push_back(&Instr);
PrevVCMP = nullptr;
PrevVCMPResultKiller = nullptr;
}
for (MachineInstr *DeadInstruction : DeadInstructions)
DeadInstruction->eraseFromParent();
return !DeadInstructions.empty();
}
bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
MachineDominatorTree *DT) {
unsigned LastVPTImm = 0;
Register LastVPTReg = 0;
SmallSet<MachineInstr *, 4> DeadInstructions;
for (MachineInstr &Instr : MBB.instrs()) {
int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
if (PIdx == -1)
continue;
Register VPR = Instr.getOperand(PIdx + 1).getReg();
if (!VPR.isVirtual())
continue;
MachineInstr *Copy = MRI->getVRegDef(VPR);
if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
!Copy->getOperand(1).getReg().isVirtual() ||
MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
LastVPTReg = 0;
continue;
}
Register GPR = Copy->getOperand(1).getReg();
auto getImm = [&](Register GPR) -> unsigned {
MachineInstr *Def = MRI->getVRegDef(GPR);
if (Def && (Def->getOpcode() == ARM::t2MOVi ||
Def->getOpcode() == ARM::t2MOVi16))
return Def->getOperand(1).getImm();
return -1U;
};
unsigned Imm = getImm(GPR);
if (Imm == -1U) {
LastVPTReg = 0;
continue;
}
unsigned NotImm = ~Imm & 0xffff;
if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
if (MRI->use_empty(VPR)) {
DeadInstructions.insert(Copy);
if (MRI->hasOneUse(GPR))
DeadInstructions.insert(MRI->getVRegDef(GPR));
}
LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr);
} else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
TII->get(ARM::MVE_VPNOT), NewVPR)
.addReg(LastVPTReg);
addUnpredicatedMveVpredNOp(VPNot);
Instr.getOperand(PIdx + 1).setReg(NewVPR);
if (MRI->use_empty(VPR)) {
DeadInstructions.insert(Copy);
if (MRI->hasOneUse(GPR))
DeadInstructions.insert(MRI->getVRegDef(GPR));
}
LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at "
<< Instr);
VPR = NewVPR;
}
LastVPTImm = Imm;
LastVPTReg = VPR;
}
for (MachineInstr *DI : DeadInstructions)
DI->eraseFromParent();
return !DeadInstructions.empty();
}
bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
bool HasVCTP = false;
SmallVector<MachineInstr *, 4> DeadInstructions;
for (MachineInstr &MI : MBB.instrs()) {
if (isVCTP(&MI)) {
HasVCTP = true;
continue;
}
if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
continue;
MachineInstrBuilder MIBuilder =
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
.add(MI.getOperand(1))
.addImm(ARMVCC::Then)
.add(MI.getOperand(4))
.add(MI.getOperand(5))
.add(MI.getOperand(2));
(void)MIBuilder;
LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump());
DeadInstructions.push_back(&MI);
}
for (MachineInstr *DeadInstruction : DeadInstructions)
DeadInstruction->eraseFromParent();
return !DeadInstructions.empty();
}
bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
bool Changed = false;
for (MachineInstr &MI : MBB.instrs()) {
if (MI.getOpcode() != ARM::t2DoLoopStart)
continue;
Register R = MI.getOperand(1).getReg();
MachineFunction *MF = MI.getParent()->getParent();
MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0);
Changed = true;
}
return Changed;
}
bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
if (!STI.isThumb2() || !STI.hasLOB())
return false;
TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
MRI = &Fn.getRegInfo();
MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
<< "********** Function: " << Fn.getName() << '\n');
bool Modified = false;
for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
Modified |= LowerWhileLoopStart(ML);
Modified |= MergeLoopEnd(ML);
Modified |= ConvertTailPredLoop(ML, DT);
}
for (MachineBasicBlock &MBB : Fn) {
Modified |= HintDoLoopStartReg(MBB);
Modified |= ReplaceConstByVPNOTs(MBB, DT);
Modified |= ReplaceVCMPsByVPNOTs(MBB);
Modified |= ReduceOldVCCRValueUses(MBB);
Modified |= ConvertVPSEL(MBB);
}
LLVM_DEBUG(dbgs() << "**************************************\n");
return Modified;
}
FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() {
return new MVETPAndVPTOptimisations();
}