#include "RISCV.h"
#include "RISCVInstrInfo.h"
#include "RISCVTargetMachine.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
using namespace llvm;
#define RISCV_EXPAND_ATOMIC_PSEUDO_NAME \
"RISCV atomic pseudo instruction expansion pass"
namespace {
class RISCVExpandAtomicPseudo : public MachineFunctionPass {
public:
const RISCVInstrInfo *TII;
static char ID;
RISCVExpandAtomicPseudo() : MachineFunctionPass(ID) {
initializeRISCVExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
return RISCV_EXPAND_ATOMIC_PSEUDO_NAME;
}
private:
bool expandMBB(MachineBasicBlock &MBB);
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
bool expandAtomicBinOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
bool IsMasked, int Width,
MachineBasicBlock::iterator &NextMBBI);
bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
AtomicRMWInst::BinOp, bool IsMasked, int Width,
MachineBasicBlock::iterator &NextMBBI);
bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, bool IsMasked,
int Width, MachineBasicBlock::iterator &NextMBBI);
};
char RISCVExpandAtomicPseudo::ID = 0;
bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
bool Modified = false;
for (auto &MBB : MF)
Modified |= expandMBB(MBB);
return Modified;
}
bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
Modified |= expandMI(MBB, MBBI, NMBBI);
MBBI = NMBBI;
}
return Modified;
}
bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
switch (MBBI->getOpcode()) {
case RISCV::PseudoAtomicLoadNand32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
NextMBBI);
case RISCV::PseudoAtomicLoadNand64:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
NextMBBI);
case RISCV::PseudoMaskedAtomicSwap32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
NextMBBI);
case RISCV::PseudoMaskedAtomicLoadAdd32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
case RISCV::PseudoMaskedAtomicLoadSub32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
case RISCV::PseudoMaskedAtomicLoadNand32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
NextMBBI);
case RISCV::PseudoMaskedAtomicLoadMax32:
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
NextMBBI);
case RISCV::PseudoMaskedAtomicLoadMin32:
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
NextMBBI);
case RISCV::PseudoMaskedAtomicLoadUMax32:
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
NextMBBI);
case RISCV::PseudoMaskedAtomicLoadUMin32:
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
NextMBBI);
case RISCV::PseudoCmpXchg32:
return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
case RISCV::PseudoCmpXchg64:
return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
case RISCV::PseudoMaskedCmpXchg32:
return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
}
return false;
}
static unsigned getLRForRMW32(AtomicOrdering Ordering) {
switch (Ordering) {
default:
llvm_unreachable("Unexpected AtomicOrdering");
case AtomicOrdering::Monotonic:
return RISCV::LR_W;
case AtomicOrdering::Acquire:
return RISCV::LR_W_AQ;
case AtomicOrdering::Release:
return RISCV::LR_W;
case AtomicOrdering::AcquireRelease:
return RISCV::LR_W_AQ;
case AtomicOrdering::SequentiallyConsistent:
return RISCV::LR_W_AQ_RL;
}
}
static unsigned getSCForRMW32(AtomicOrdering Ordering) {
switch (Ordering) {
default:
llvm_unreachable("Unexpected AtomicOrdering");
case AtomicOrdering::Monotonic:
return RISCV::SC_W;
case AtomicOrdering::Acquire:
return RISCV::SC_W;
case AtomicOrdering::Release:
return RISCV::SC_W_RL;
case AtomicOrdering::AcquireRelease:
return RISCV::SC_W_RL;
case AtomicOrdering::SequentiallyConsistent:
return RISCV::SC_W_AQ_RL;
}
}
static unsigned getLRForRMW64(AtomicOrdering Ordering) {
switch (Ordering) {
default:
llvm_unreachable("Unexpected AtomicOrdering");
case AtomicOrdering::Monotonic:
return RISCV::LR_D;
case AtomicOrdering::Acquire:
return RISCV::LR_D_AQ;
case AtomicOrdering::Release:
return RISCV::LR_D;
case AtomicOrdering::AcquireRelease:
return RISCV::LR_D_AQ;
case AtomicOrdering::SequentiallyConsistent:
return RISCV::LR_D_AQ_RL;
}
}
static unsigned getSCForRMW64(AtomicOrdering Ordering) {
switch (Ordering) {
default:
llvm_unreachable("Unexpected AtomicOrdering");
case AtomicOrdering::Monotonic:
return RISCV::SC_D;
case AtomicOrdering::Acquire:
return RISCV::SC_D;
case AtomicOrdering::Release:
return RISCV::SC_D_RL;
case AtomicOrdering::AcquireRelease:
return RISCV::SC_D_RL;
case AtomicOrdering::SequentiallyConsistent:
return RISCV::SC_D_AQ_RL;
}
}
static unsigned getLRForRMW(AtomicOrdering Ordering, int Width) {
if (Width == 32)
return getLRForRMW32(Ordering);
if (Width == 64)
return getLRForRMW64(Ordering);
llvm_unreachable("Unexpected LR width\n");
}
static unsigned getSCForRMW(AtomicOrdering Ordering, int Width) {
if (Width == 32)
return getSCForRMW32(Ordering);
if (Width == 64)
return getSCForRMW64(Ordering);
llvm_unreachable("Unexpected SC width\n");
}
static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
DebugLoc DL, MachineBasicBlock *ThisMBB,
MachineBasicBlock *LoopMBB,
MachineBasicBlock *DoneMBB,
AtomicRMWInst::BinOp BinOp, int Width) {
Register DestReg = MI.getOperand(0).getReg();
Register ScratchReg = MI.getOperand(1).getReg();
Register AddrReg = MI.getOperand(2).getReg();
Register IncrReg = MI.getOperand(3).getReg();
AtomicOrdering Ordering =
static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
.addReg(AddrReg);
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
case AtomicRMWInst::Nand:
BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
.addReg(DestReg)
.addReg(IncrReg);
BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
.addReg(ScratchReg)
.addImm(-1);
break;
}
BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
.addReg(AddrReg)
.addReg(ScratchReg);
BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
.addReg(ScratchReg)
.addReg(RISCV::X0)
.addMBB(LoopMBB);
}
static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
MachineBasicBlock *MBB, Register DestReg,
Register OldValReg, Register NewValReg,
Register MaskReg, Register ScratchReg) {
assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
.addReg(OldValReg)
.addReg(NewValReg);
BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
.addReg(ScratchReg)
.addReg(MaskReg);
BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
.addReg(OldValReg)
.addReg(ScratchReg);
}
static void doMaskedAtomicBinOpExpansion(
const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
Register DestReg = MI.getOperand(0).getReg();
Register ScratchReg = MI.getOperand(1).getReg();
Register AddrReg = MI.getOperand(2).getReg();
Register IncrReg = MI.getOperand(3).getReg();
Register MaskReg = MI.getOperand(4).getReg();
AtomicOrdering Ordering =
static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
.addReg(AddrReg);
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
case AtomicRMWInst::Xchg:
BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
.addReg(IncrReg)
.addImm(0);
break;
case AtomicRMWInst::Add:
BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
.addReg(DestReg)
.addReg(IncrReg);
break;
case AtomicRMWInst::Sub:
BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
.addReg(DestReg)
.addReg(IncrReg);
break;
case AtomicRMWInst::Nand:
BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
.addReg(DestReg)
.addReg(IncrReg);
BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
.addReg(ScratchReg)
.addImm(-1);
break;
}
insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
ScratchReg);
BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
.addReg(AddrReg)
.addReg(ScratchReg);
BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
.addReg(ScratchReg)
.addReg(RISCV::X0)
.addMBB(LoopMBB);
}
bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB.getParent();
auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
MF->insert(++MBB.getIterator(), LoopMBB);
MF->insert(++LoopMBB->getIterator(), DoneMBB);
LoopMBB->addSuccessor(LoopMBB);
LoopMBB->addSuccessor(DoneMBB);
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
DoneMBB->transferSuccessors(&MBB);
MBB.addSuccessor(LoopMBB);
if (!IsMasked)
doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
else
doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
Width);
NextMBBI = MBB.end();
MI.eraseFromParent();
LivePhysRegs LiveRegs;
computeAndAddLiveIns(LiveRegs, *LoopMBB);
computeAndAddLiveIns(LiveRegs, *DoneMBB);
return true;
}
static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
MachineBasicBlock *MBB, Register ValReg,
Register ShamtReg) {
BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
.addReg(ValReg)
.addReg(ShamtReg);
BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
.addReg(ValReg)
.addReg(ShamtReg);
}
bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
MachineBasicBlock::iterator &NextMBBI) {
assert(IsMasked == true &&
"Should only need to expand masked atomic max/min");
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB.getParent();
auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
MF->insert(++MBB.getIterator(), LoopHeadMBB);
MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
LoopHeadMBB->addSuccessor(LoopTailMBB);
LoopIfBodyMBB->addSuccessor(LoopTailMBB);
LoopTailMBB->addSuccessor(LoopHeadMBB);
LoopTailMBB->addSuccessor(DoneMBB);
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
DoneMBB->transferSuccessors(&MBB);
MBB.addSuccessor(LoopHeadMBB);
Register DestReg = MI.getOperand(0).getReg();
Register Scratch1Reg = MI.getOperand(1).getReg();
Register Scratch2Reg = MI.getOperand(2).getReg();
Register AddrReg = MI.getOperand(3).getReg();
Register IncrReg = MI.getOperand(4).getReg();
Register MaskReg = MI.getOperand(5).getReg();
bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
AtomicOrdering Ordering =
static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
.addReg(AddrReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
.addReg(DestReg)
.addReg(MaskReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
.addReg(DestReg)
.addImm(0);
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
case AtomicRMWInst::Max: {
insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
.addReg(Scratch2Reg)
.addReg(IncrReg)
.addMBB(LoopTailMBB);
break;
}
case AtomicRMWInst::Min: {
insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
.addReg(IncrReg)
.addReg(Scratch2Reg)
.addMBB(LoopTailMBB);
break;
}
case AtomicRMWInst::UMax:
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
.addReg(Scratch2Reg)
.addReg(IncrReg)
.addMBB(LoopTailMBB);
break;
case AtomicRMWInst::UMin:
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
.addReg(IncrReg)
.addReg(Scratch2Reg)
.addMBB(LoopTailMBB);
break;
}
insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
MaskReg, Scratch1Reg);
BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg)
.addReg(AddrReg)
.addReg(Scratch1Reg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
.addReg(Scratch1Reg)
.addReg(RISCV::X0)
.addMBB(LoopHeadMBB);
NextMBBI = MBB.end();
MI.eraseFromParent();
LivePhysRegs LiveRegs;
computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
computeAndAddLiveIns(LiveRegs, *DoneMBB);
return true;
}
bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
int Width, MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB.getParent();
auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
MF->insert(++MBB.getIterator(), LoopHeadMBB);
MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
LoopHeadMBB->addSuccessor(LoopTailMBB);
LoopHeadMBB->addSuccessor(DoneMBB);
LoopTailMBB->addSuccessor(DoneMBB);
LoopTailMBB->addSuccessor(LoopHeadMBB);
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
DoneMBB->transferSuccessors(&MBB);
MBB.addSuccessor(LoopHeadMBB);
Register DestReg = MI.getOperand(0).getReg();
Register ScratchReg = MI.getOperand(1).getReg();
Register AddrReg = MI.getOperand(2).getReg();
Register CmpValReg = MI.getOperand(3).getReg();
Register NewValReg = MI.getOperand(4).getReg();
AtomicOrdering Ordering =
static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
if (!IsMasked) {
BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
.addReg(AddrReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
.addReg(DestReg)
.addReg(CmpValReg)
.addMBB(DoneMBB);
BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
.addReg(AddrReg)
.addReg(NewValReg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
.addReg(ScratchReg)
.addReg(RISCV::X0)
.addMBB(LoopHeadMBB);
} else {
Register MaskReg = MI.getOperand(5).getReg();
BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
.addReg(AddrReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
.addReg(DestReg)
.addReg(MaskReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
.addReg(ScratchReg)
.addReg(CmpValReg)
.addMBB(DoneMBB);
insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
MaskReg, ScratchReg);
BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
.addReg(AddrReg)
.addReg(ScratchReg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
.addReg(ScratchReg)
.addReg(RISCV::X0)
.addMBB(LoopHeadMBB);
}
NextMBBI = MBB.end();
MI.eraseFromParent();
LivePhysRegs LiveRegs;
computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
computeAndAddLiveIns(LiveRegs, *DoneMBB);
return true;
}
}
INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",
RISCV_EXPAND_ATOMIC_PSEUDO_NAME, false, false)
namespace llvm {
FunctionPass *createRISCVExpandAtomicPseudoPass() {
return new RISCVExpandAtomicPseudo();
}
}