#include "AMDGPUCustomBehaviour.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/WithColor.h"
namespace llvm {
namespace mca {
void AMDGPUInstrPostProcess::postProcessInstruction(
std::unique_ptr<Instruction> &Inst, const MCInst &MCI) {
switch (MCI.getOpcode()) {
case AMDGPU::S_WAITCNT:
case AMDGPU::S_WAITCNT_EXPCNT:
case AMDGPU::S_WAITCNT_LGKMCNT:
case AMDGPU::S_WAITCNT_VMCNT:
case AMDGPU::S_WAITCNT_VSCNT:
case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
case AMDGPU::S_WAITCNT_VMCNT_gfx10:
case AMDGPU::S_WAITCNT_VSCNT_gfx10:
case AMDGPU::S_WAITCNT_gfx10:
case AMDGPU::S_WAITCNT_gfx6_gfx7:
case AMDGPU::S_WAITCNT_vi:
return processWaitCnt(Inst, MCI);
}
}
void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr<Instruction> &Inst,
const MCInst &MCI) {
for (int Idx = 0, N = MCI.size(); Idx < N; Idx++) {
MCAOperand Op;
const MCOperand &MCOp = MCI.getOperand(Idx);
if (MCOp.isReg()) {
Op = MCAOperand::createReg(MCOp.getReg());
} else if (MCOp.isImm()) {
Op = MCAOperand::createImm(MCOp.getImm());
}
Op.setIndex(Idx);
Inst->addOperand(Op);
}
}
AMDGPUCustomBehaviour::AMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
const mca::SourceMgr &SrcMgr,
const MCInstrInfo &MCII)
: CustomBehaviour(STI, SrcMgr, MCII) {
generateWaitCntInfo();
}
unsigned AMDGPUCustomBehaviour::checkCustomHazard(ArrayRef<InstRef> IssuedInst,
const InstRef &IR) {
const Instruction &Inst = *IR.getInstruction();
unsigned Opcode = Inst.getOpcode();
switch (Opcode) {
default:
return 0;
case AMDGPU::S_WAITCNT: case AMDGPU::S_WAITCNT_EXPCNT:
case AMDGPU::S_WAITCNT_LGKMCNT:
case AMDGPU::S_WAITCNT_VMCNT:
case AMDGPU::S_WAITCNT_VSCNT: case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
case AMDGPU::S_WAITCNT_VMCNT_gfx10:
case AMDGPU::S_WAITCNT_VSCNT_gfx10:
case AMDGPU::S_WAITCNT_gfx10:
case AMDGPU::S_WAITCNT_gfx6_gfx7:
case AMDGPU::S_WAITCNT_vi:
return handleWaitCnt(IssuedInst, IR);
}
return 0;
}
unsigned AMDGPUCustomBehaviour::handleWaitCnt(ArrayRef<InstRef> IssuedInst,
const InstRef &IR) {
unsigned Vmcnt = 63;
unsigned Expcnt = 7;
unsigned Lgkmcnt = 31;
unsigned Vscnt = 63;
unsigned CurrVmcnt = 0;
unsigned CurrExpcnt = 0;
unsigned CurrLgkmcnt = 0;
unsigned CurrVscnt = 0;
unsigned CyclesToWaitVm = ~0U;
unsigned CyclesToWaitExp = ~0U;
unsigned CyclesToWaitLgkm = ~0U;
unsigned CyclesToWaitVs = ~0U;
computeWaitCnt(IR, Vmcnt, Expcnt, Lgkmcnt, Vscnt);
for (const InstRef &PrevIR : IssuedInst) {
const Instruction &PrevInst = *PrevIR.getInstruction();
const unsigned PrevInstIndex = PrevIR.getSourceIndex() % SrcMgr.size();
const WaitCntInfo &PrevInstWaitInfo = InstrWaitCntInfo[PrevInstIndex];
const int CyclesLeft = PrevInst.getCyclesLeft();
assert(CyclesLeft != UNKNOWN_CYCLES &&
"We should know how many cycles are left for this instruction");
if (PrevInstWaitInfo.VmCnt) {
CurrVmcnt++;
if ((unsigned)CyclesLeft < CyclesToWaitVm)
CyclesToWaitVm = CyclesLeft;
}
if (PrevInstWaitInfo.ExpCnt) {
CurrExpcnt++;
if ((unsigned)CyclesLeft < CyclesToWaitExp)
CyclesToWaitExp = CyclesLeft;
}
if (PrevInstWaitInfo.LgkmCnt) {
CurrLgkmcnt++;
if ((unsigned)CyclesLeft < CyclesToWaitLgkm)
CyclesToWaitLgkm = CyclesLeft;
}
if (PrevInstWaitInfo.VsCnt) {
CurrVscnt++;
if ((unsigned)CyclesLeft < CyclesToWaitVs)
CyclesToWaitVs = CyclesLeft;
}
}
unsigned CyclesToWait = ~0U;
if (CurrVmcnt > Vmcnt && CyclesToWaitVm < CyclesToWait)
CyclesToWait = CyclesToWaitVm;
if (CurrExpcnt > Expcnt && CyclesToWaitExp < CyclesToWait)
CyclesToWait = CyclesToWaitExp;
if (CurrLgkmcnt > Lgkmcnt && CyclesToWaitLgkm < CyclesToWait)
CyclesToWait = CyclesToWaitLgkm;
if (CurrVscnt > Vscnt && CyclesToWaitVs < CyclesToWait)
CyclesToWait = CyclesToWaitVs;
if (CyclesToWait == ~0U)
return 0;
return CyclesToWait;
}
void AMDGPUCustomBehaviour::computeWaitCnt(const InstRef &IR, unsigned &Vmcnt,
unsigned &Expcnt, unsigned &Lgkmcnt,
unsigned &Vscnt) {
AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU());
const Instruction &Inst = *IR.getInstruction();
unsigned Opcode = Inst.getOpcode();
switch (Opcode) {
case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
case AMDGPU::S_WAITCNT_VMCNT_gfx10:
case AMDGPU::S_WAITCNT_VSCNT_gfx10: {
const MCAOperand *OpReg = Inst.getOperand(0);
const MCAOperand *OpImm = Inst.getOperand(1);
assert(OpReg && OpReg->isReg() && "First operand should be a register.");
assert(OpImm && OpImm->isImm() && "Second operand should be an immediate.");
if (OpReg->getReg() != AMDGPU::SGPR_NULL) {
WithColor::warning() << "The register component of "
<< MCII.getName(Opcode) << " will be completely "
<< "ignored. So the wait may not be accurate.\n";
}
switch (Opcode) {
case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
Expcnt = OpImm->getImm();
break;
case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
Lgkmcnt = OpImm->getImm();
break;
case AMDGPU::S_WAITCNT_VMCNT_gfx10:
Vmcnt = OpImm->getImm();
break;
case AMDGPU::S_WAITCNT_VSCNT_gfx10:
Vscnt = OpImm->getImm();
break;
}
return;
}
case AMDGPU::S_WAITCNT_gfx10:
case AMDGPU::S_WAITCNT_gfx6_gfx7:
case AMDGPU::S_WAITCNT_vi:
unsigned WaitCnt = Inst.getOperand(0)->getImm();
AMDGPU::decodeWaitcnt(IV, WaitCnt, Vmcnt, Expcnt, Lgkmcnt);
return;
}
}
void AMDGPUCustomBehaviour::generateWaitCntInfo() {
AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU());
InstrWaitCntInfo.resize(SrcMgr.size());
for (const auto &EN : llvm::enumerate(SrcMgr.getInstructions())) {
const std::unique_ptr<Instruction> &Inst = EN.value();
unsigned Index = EN.index();
unsigned Opcode = Inst->getOpcode();
const MCInstrDesc &MCID = MCII.get(Opcode);
if ((MCID.TSFlags & SIInstrFlags::DS) &&
(MCID.TSFlags & SIInstrFlags::LGKM_CNT)) {
InstrWaitCntInfo[Index].LgkmCnt = true;
if (isAlwaysGDS(Opcode) || hasModifiersSet(Inst, AMDGPU::OpName::gds))
InstrWaitCntInfo[Index].ExpCnt = true;
} else if (MCID.TSFlags & SIInstrFlags::FLAT) {
InstrWaitCntInfo[Index].LgkmCnt = true;
if (!STI.hasFeature(AMDGPU::FeatureVscnt))
InstrWaitCntInfo[Index].VmCnt = true;
else if (MCID.mayLoad() && !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet))
InstrWaitCntInfo[Index].VmCnt = true;
else
InstrWaitCntInfo[Index].VsCnt = true;
} else if (isVMEM(MCID) && !AMDGPU::getMUBUFIsBufferInv(Opcode)) {
if (!STI.hasFeature(AMDGPU::FeatureVscnt))
InstrWaitCntInfo[Index].VmCnt = true;
else if ((MCID.mayLoad() &&
!(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet)) ||
((MCID.TSFlags & SIInstrFlags::MIMG) && !MCID.mayLoad() &&
!MCID.mayStore()))
InstrWaitCntInfo[Index].VmCnt = true;
else if (MCID.mayStore())
InstrWaitCntInfo[Index].VsCnt = true;
if (IV.Major < 7 &&
(MCID.mayStore() || (MCID.TSFlags & SIInstrFlags::IsAtomicRet)))
InstrWaitCntInfo[Index].ExpCnt = true;
} else if (MCID.TSFlags & SIInstrFlags::SMRD) {
InstrWaitCntInfo[Index].LgkmCnt = true;
} else if (MCID.TSFlags & SIInstrFlags::EXP) {
InstrWaitCntInfo[Index].ExpCnt = true;
} else {
switch (Opcode) {
case AMDGPU::S_SENDMSG:
case AMDGPU::S_SENDMSGHALT:
case AMDGPU::S_MEMTIME:
case AMDGPU::S_MEMREALTIME:
InstrWaitCntInfo[Index].LgkmCnt = true;
break;
}
}
}
}
bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc &MCID) {
return MCID.TSFlags & SIInstrFlags::MUBUF ||
MCID.TSFlags & SIInstrFlags::MTBUF ||
MCID.TSFlags & SIInstrFlags::MIMG;
}
bool AMDGPUCustomBehaviour::hasModifiersSet(
const std::unique_ptr<Instruction> &Inst, unsigned OpName) const {
int Idx = AMDGPU::getNamedOperandIdx(Inst->getOpcode(), OpName);
if (Idx == -1)
return false;
const MCAOperand *Op = Inst->getOperand(Idx);
if (Op == nullptr || !Op->isImm() || !Op->getImm())
return false;
return true;
}
bool AMDGPUCustomBehaviour::isAlwaysGDS(uint16_t Opcode) const {
return Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::DS_GWS_INIT ||
Opcode == AMDGPU::DS_GWS_SEMA_V || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
Opcode == AMDGPU::DS_GWS_SEMA_P ||
Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
Opcode == AMDGPU::DS_GWS_BARRIER;
}
} }
using namespace llvm;
using namespace mca;
static CustomBehaviour *
createAMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
const mca::SourceMgr &SrcMgr,
const MCInstrInfo &MCII) {
return new AMDGPUCustomBehaviour(STI, SrcMgr, MCII);
}
static InstrPostProcess *
createAMDGPUInstrPostProcess(const MCSubtargetInfo &STI,
const MCInstrInfo &MCII) {
return new AMDGPUInstrPostProcess(STI, MCII);
}
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMCA() {
TargetRegistry::RegisterCustomBehaviour(getTheAMDGPUTarget(),
createAMDGPUCustomBehaviour);
TargetRegistry::RegisterInstrPostProcess(getTheAMDGPUTarget(),
createAMDGPUInstrPostProcess);
TargetRegistry::RegisterCustomBehaviour(getTheGCNTarget(),
createAMDGPUCustomBehaviour);
TargetRegistry::RegisterInstrPostProcess(getTheGCNTarget(),
createAMDGPUInstrPostProcess);
}