#include "AMDGPU.h"
#include "AMDGPULegalizerInfo.h"
#include "AMDGPURegisterBankInfo.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "amdgpu-regbank-combiner"
using namespace llvm;
using namespace MIPatternMatch;
class AMDGPURegBankCombinerHelper {
protected:
MachineIRBuilder &B;
MachineFunction &MF;
MachineRegisterInfo &MRI;
const GCNSubtarget &Subtarget;
const RegisterBankInfo &RBI;
const TargetRegisterInfo &TRI;
const SIInstrInfo &TII;
CombinerHelper &Helper;
public:
AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
: B(B), MF(B.getMF()), MRI(*B.getMRI()),
Subtarget(MF.getSubtarget<GCNSubtarget>()),
RBI(*Subtarget.getRegBankInfo()), TRI(*Subtarget.getRegisterInfo()),
TII(*Subtarget.getInstrInfo()), Helper(Helper){};
bool isVgprRegBank(Register Reg);
Register getAsVgpr(Register Reg);
struct MinMaxMedOpc {
unsigned Min, Max, Med;
};
struct Med3MatchInfo {
unsigned Opc;
Register Val0, Val1, Val2;
};
MinMaxMedOpc getMinMaxPair(unsigned Opc);
template <class m_Cst, typename CstTy>
bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
Register &Val, CstTy &K0, CstTy &K1);
bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg);
bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg);
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
void applyClamp(MachineInstr &MI, Register &Reg);
private:
AMDGPU::SIModeRegisterDefaults getMode();
bool getIEEE();
bool getDX10Clamp();
bool isFminnumIeee(const MachineInstr &MI);
bool isFCst(MachineInstr *MI);
bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1);
};
bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
}
Register AMDGPURegBankCombinerHelper::getAsVgpr(Register Reg) {
if (isVgprRegBank(Reg))
return Reg;
for (MachineInstr &Use : MRI.use_instructions(Reg)) {
Register Def = Use.getOperand(0).getReg();
if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
return Def;
}
Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
return VgprReg;
}
AMDGPURegBankCombinerHelper::MinMaxMedOpc
AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Unsupported opcode");
case AMDGPU::G_SMAX:
case AMDGPU::G_SMIN:
return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
case AMDGPU::G_UMAX:
case AMDGPU::G_UMIN:
return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
case AMDGPU::G_FMAXNUM:
case AMDGPU::G_FMINNUM:
return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
case AMDGPU::G_FMAXNUM_IEEE:
case AMDGPU::G_FMINNUM_IEEE:
return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
AMDGPU::G_AMDGPU_FMED3};
}
}
template <class m_Cst, typename CstTy>
bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI,
MachineRegisterInfo &MRI,
MinMaxMedOpc MMMOpc, Register &Val,
CstTy &K0, CstTy &K1) {
return mi_match(
MI, MRI,
m_any_of(
m_CommutativeBinOp(
MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
m_Cst(K1)),
m_CommutativeBinOp(
MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
m_Cst(K0))));
}
bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
MachineInstr &MI, Med3MatchInfo &MatchInfo) {
Register Dst = MI.getOperand(0).getReg();
if (!isVgprRegBank(Dst))
return false;
LLT Ty = MRI.getType(Dst);
if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
Ty != LLT::scalar(32))
return false;
MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
Register Val;
Optional<ValueAndVReg> K0, K1;
if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
return false;
if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
return false;
if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
return false;
MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
return true;
}
bool AMDGPURegBankCombinerHelper::matchFPMinMaxToMed3(
MachineInstr &MI, Med3MatchInfo &MatchInfo) {
Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
Ty != LLT::scalar(32))
return false;
auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
Register Val;
Optional<FPValueAndVReg> K0, K1;
if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
return false;
if (K0->Value > K1->Value)
return false;
if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
(!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
return true;
}
}
return false;
}
bool AMDGPURegBankCombinerHelper::matchFPMinMaxToClamp(MachineInstr &MI,
Register &Reg) {
auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
Register Val;
Optional<FPValueAndVReg> K0, K1;
if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
return false;
if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
return false;
if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
isKnownNeverSNaN(Val, MRI)) ||
isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
Reg = Val;
return true;
}
return false;
}
bool AMDGPURegBankCombinerHelper::matchFPMed3ToClamp(MachineInstr &MI,
Register &Reg) {
if (MI.getIntrinsicID() != Intrinsic::amdgcn_fmed3)
return false;
MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
if (isFCst(Src0) && !isFCst(Src1))
std::swap(Src0, Src1);
if (isFCst(Src1) && !isFCst(Src2))
std::swap(Src1, Src2);
if (isFCst(Src0) && !isFCst(Src1))
std::swap(Src0, Src1);
if (!isClampZeroToOne(Src1, Src2))
return false;
Register Val = Src0->getOperand(0).getReg();
auto isOp3Zero = [&]() {
MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
return false;
};
if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
(getIEEE() && getDX10Clamp() &&
(isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
Reg = Val;
return true;
}
return false;
}
void AMDGPURegBankCombinerHelper::applyClamp(MachineInstr &MI, Register &Reg) {
B.setInstrAndDebugLoc(MI);
B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
MI.getFlags());
MI.eraseFromParent();
}
void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
Med3MatchInfo &MatchInfo) {
B.setInstrAndDebugLoc(MI);
B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
{getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
getAsVgpr(MatchInfo.Val2)},
MI.getFlags());
MI.eraseFromParent();
}
AMDGPU::SIModeRegisterDefaults AMDGPURegBankCombinerHelper::getMode() {
return MF.getInfo<SIMachineFunctionInfo>()->getMode();
}
bool AMDGPURegBankCombinerHelper::getIEEE() { return getMode().IEEE; }
bool AMDGPURegBankCombinerHelper::getDX10Clamp() { return getMode().DX10Clamp; }
bool AMDGPURegBankCombinerHelper::isFminnumIeee(const MachineInstr &MI) {
return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
}
bool AMDGPURegBankCombinerHelper::isFCst(MachineInstr *MI) {
return MI->getOpcode() == AMDGPU::G_FCONSTANT;
}
bool AMDGPURegBankCombinerHelper::isClampZeroToOne(MachineInstr *K0,
MachineInstr *K1) {
if (isFCst(K0) && isFCst(K1)) {
const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm();
const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm();
return (KO_FPImm->isExactlyValue(0.0) && K1_FPImm->isExactlyValue(1.0)) ||
(KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isExactlyValue(0.0));
}
return false;
}
class AMDGPURegBankCombinerHelperState {
protected:
CombinerHelper &Helper;
AMDGPURegBankCombinerHelper &RegBankHelper;
public:
AMDGPURegBankCombinerHelperState(CombinerHelper &Helper,
AMDGPURegBankCombinerHelper &RegBankHelper)
: Helper(Helper), RegBankHelper(RegBankHelper) {}
};
#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AMDGPUGenRegBankGICombiner.inc"
#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
namespace {
#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
#include "AMDGPUGenRegBankGICombiner.inc"
#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
class AMDGPURegBankCombinerInfo final : public CombinerInfo {
GISelKnownBits *KB;
MachineDominatorTree *MDT;
public:
AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg;
AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
const AMDGPULegalizerInfo *LI,
GISelKnownBits *KB, MachineDominatorTree *MDT)
: CombinerInfo( false, true,
LI, EnableOpt, OptSize, MinSize),
KB(KB), MDT(MDT) {
if (!GeneratedRuleCfg.parseCommandLineOption())
report_fatal_error("Invalid rule identifier");
}
bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
MachineIRBuilder &B) const override;
};
bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
CombinerHelper Helper(Observer, B, KB, MDT);
AMDGPURegBankCombinerHelper RegBankHelper(B, Helper);
AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper,
RegBankHelper);
if (Generated.tryCombineAll(Observer, MI, B))
return true;
return false;
}
#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
#include "AMDGPUGenRegBankGICombiner.inc"
#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
class AMDGPURegBankCombiner : public MachineFunctionPass {
public:
static char ID;
AMDGPURegBankCombiner(bool IsOptNone = false);
StringRef getPassName() const override {
return "AMDGPURegBankCombiner";
}
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
private:
bool IsOptNone;
};
}
void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
AU.setPreservesCFG();
getSelectionDAGFallbackAnalysisUsage(AU);
AU.addRequired<GISelKnownBitsAnalysis>();
AU.addPreserved<GISelKnownBitsAnalysis>();
if (!IsOptNone) {
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
}
MachineFunctionPass::getAnalysisUsage(AU);
}
AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
: MachineFunctionPass(ID), IsOptNone(IsOptNone) {
initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry());
}
bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
auto *TPC = &getAnalysis<TargetPassConfig>();
const Function &F = MF.getFunction();
bool EnableOpt =
MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const AMDGPULegalizerInfo *LI
= static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
MachineDominatorTree *MDT =
IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
F.hasMinSize(), LI, KB, MDT);
Combiner C(PCInfo, TPC);
return C.combineMachineInstrs(MF, nullptr);
}
char AMDGPURegBankCombiner::ID = 0;
INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
"Combine AMDGPU machine instrs after regbankselect",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
"Combine AMDGPU machine instrs after regbankselect", false,
false)
namespace llvm {
FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone) {
return new AMDGPURegBankCombiner(IsOptNone);
}
}