#include "AMDGPUCombinerHelper.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
using namespace MIPatternMatch;
LLVM_READNONE
static bool fnegFoldsIntoMI(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AMDGPU::G_FADD:
case AMDGPU::G_FSUB:
case AMDGPU::G_FMUL:
case AMDGPU::G_FMA:
case AMDGPU::G_FMAD:
case AMDGPU::G_FMINNUM:
case AMDGPU::G_FMAXNUM:
case AMDGPU::G_FMINNUM_IEEE:
case AMDGPU::G_FMAXNUM_IEEE:
case AMDGPU::G_FSIN:
case AMDGPU::G_FPEXT:
case AMDGPU::G_INTRINSIC_TRUNC:
case AMDGPU::G_FPTRUNC:
case AMDGPU::G_FRINT:
case AMDGPU::G_FNEARBYINT:
case AMDGPU::G_INTRINSIC_ROUND:
case AMDGPU::G_INTRINSIC_ROUNDEVEN:
case AMDGPU::G_FCANONICALIZE:
case AMDGPU::G_AMDGPU_RCP_IFLAG:
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
case AMDGPU::G_AMDGPU_FMAX_LEGACY:
return true;
case AMDGPU::G_INTRINSIC: {
unsigned IntrinsicID = MI.getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_rcp:
case Intrinsic::amdgcn_rcp_legacy:
case Intrinsic::amdgcn_sin:
case Intrinsic::amdgcn_fmul_legacy:
case Intrinsic::amdgcn_fmed3:
case Intrinsic::amdgcn_fma_legacy:
return true;
default:
return false;
}
}
default:
return false;
}
}
LLVM_READONLY
static bool opMustUseVOP3Encoding(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
return MI.getNumOperands() >
(MI.getOpcode() == AMDGPU::G_INTRINSIC ? 4u : 3u) ||
MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
}
LLVM_READONLY
static bool hasSourceMods(const MachineInstr &MI) {
if (!MI.memoperands().empty())
return false;
switch (MI.getOpcode()) {
case AMDGPU::COPY:
case AMDGPU::G_SELECT:
case AMDGPU::G_FDIV:
case AMDGPU::G_FREM:
case TargetOpcode::INLINEASM:
case TargetOpcode::INLINEASM_BR:
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
case AMDGPU::G_BITCAST:
case AMDGPU::G_ANYEXT:
case AMDGPU::G_BUILD_VECTOR:
case AMDGPU::G_BUILD_VECTOR_TRUNC:
case AMDGPU::G_PHI:
return false;
case AMDGPU::G_INTRINSIC: {
unsigned IntrinsicID = MI.getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_interp_p1:
case Intrinsic::amdgcn_interp_p2:
case Intrinsic::amdgcn_interp_mov:
case Intrinsic::amdgcn_interp_p1_f16:
case Intrinsic::amdgcn_interp_p2_f16:
case Intrinsic::amdgcn_div_scale:
return false;
default:
return true;
}
}
default:
return true;
}
}
static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI,
unsigned CostThreshold = 4) {
unsigned NumMayIncreaseSize = 0;
Register Dst = MI.getOperand(0).getReg();
for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) {
if (!hasSourceMods(Use))
return false;
if (!opMustUseVOP3Encoding(Use, MRI)) {
if (++NumMayIncreaseSize > CostThreshold)
return false;
}
}
return true;
}
static bool mayIgnoreSignedZero(MachineInstr &MI) {
const TargetOptions &Options = MI.getMF()->getTarget().Options;
return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz);
}
static bool isInv2Pi(const APFloat &APF) {
static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
static const APFloat KF64(APFloat::IEEEdouble(),
APInt(64, 0x3fc45f306dc9c882));
return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) ||
APF.bitwiseIsEqual(KF64);
}
static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg,
MachineRegisterInfo &MRI) {
Optional<FPValueAndVReg> FPValReg;
if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) {
if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
return true;
const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>();
if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))
return true;
}
return false;
}
static unsigned inverseMinMax(unsigned Opc) {
switch (Opc) {
case AMDGPU::G_FMAXNUM:
return AMDGPU::G_FMINNUM;
case AMDGPU::G_FMINNUM:
return AMDGPU::G_FMAXNUM;
case AMDGPU::G_FMAXNUM_IEEE:
return AMDGPU::G_FMINNUM_IEEE;
case AMDGPU::G_FMINNUM_IEEE:
return AMDGPU::G_FMAXNUM_IEEE;
case AMDGPU::G_AMDGPU_FMAX_LEGACY:
return AMDGPU::G_AMDGPU_FMIN_LEGACY;
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
return AMDGPU::G_AMDGPU_FMAX_LEGACY;
default:
llvm_unreachable("invalid min/max opcode");
}
}
bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,
MachineInstr *&MatchInfo) {
Register Src = MI.getOperand(1).getReg();
MatchInfo = MRI.getVRegDef(Src);
if (MRI.hasOneNonDBGUse(Src)) {
if (allUsesHaveSourceMods(MI, MRI, 0))
return false;
} else {
if (fnegFoldsIntoMI(*MatchInfo) &&
(allUsesHaveSourceMods(MI, MRI) ||
!allUsesHaveSourceMods(*MatchInfo, MRI)))
return false;
}
switch (MatchInfo->getOpcode()) {
case AMDGPU::G_FMINNUM:
case AMDGPU::G_FMAXNUM:
case AMDGPU::G_FMINNUM_IEEE:
case AMDGPU::G_FMAXNUM_IEEE:
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
case AMDGPU::G_AMDGPU_FMAX_LEGACY:
return !isConstantCostlierToNegate(*MatchInfo,
MatchInfo->getOperand(2).getReg(), MRI);
case AMDGPU::G_FADD:
case AMDGPU::G_FSUB:
case AMDGPU::G_FMA:
case AMDGPU::G_FMAD:
return mayIgnoreSignedZero(*MatchInfo);
case AMDGPU::G_FMUL:
case AMDGPU::G_FPEXT:
case AMDGPU::G_INTRINSIC_TRUNC:
case AMDGPU::G_FPTRUNC:
case AMDGPU::G_FRINT:
case AMDGPU::G_FNEARBYINT:
case AMDGPU::G_INTRINSIC_ROUND:
case AMDGPU::G_INTRINSIC_ROUNDEVEN:
case AMDGPU::G_FSIN:
case AMDGPU::G_FCANONICALIZE:
case AMDGPU::G_AMDGPU_RCP_IFLAG:
return true;
case AMDGPU::G_INTRINSIC: {
unsigned IntrinsicID = MatchInfo->getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_rcp:
case Intrinsic::amdgcn_rcp_legacy:
case Intrinsic::amdgcn_sin:
case Intrinsic::amdgcn_fmul_legacy:
case Intrinsic::amdgcn_fmed3:
return true;
case Intrinsic::amdgcn_fma_legacy:
return mayIgnoreSignedZero(*MatchInfo);
default:
return false;
}
}
default:
return false;
}
}
void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
MachineInstr *&MatchInfo) {
auto NegateOperand = [&](MachineOperand &Op) {
Register Reg = Op.getReg();
if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg))))
Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0);
replaceRegOpWith(MRI, Op, Reg);
};
auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) {
Register XReg = X.getReg();
Register YReg = Y.getReg();
if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg))))
replaceRegOpWith(MRI, X, XReg);
else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg))))
replaceRegOpWith(MRI, Y, YReg);
else {
YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0);
replaceRegOpWith(MRI, Y, YReg);
}
};
Builder.setInstrAndDebugLoc(*MatchInfo);
switch (MatchInfo->getOpcode()) {
case AMDGPU::G_FADD:
case AMDGPU::G_FSUB:
NegateOperand(MatchInfo->getOperand(1));
NegateOperand(MatchInfo->getOperand(2));
break;
case AMDGPU::G_FMUL:
NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
break;
case AMDGPU::G_FMINNUM:
case AMDGPU::G_FMAXNUM:
case AMDGPU::G_FMINNUM_IEEE:
case AMDGPU::G_FMAXNUM_IEEE:
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
NegateOperand(MatchInfo->getOperand(1));
NegateOperand(MatchInfo->getOperand(2));
unsigned Opposite = inverseMinMax(MatchInfo->getOpcode());
replaceOpcodeWith(*MatchInfo, Opposite);
break;
}
case AMDGPU::G_FMA:
case AMDGPU::G_FMAD:
NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
NegateOperand(MatchInfo->getOperand(3));
break;
case AMDGPU::G_FPEXT:
case AMDGPU::G_INTRINSIC_TRUNC:
case AMDGPU::G_FRINT:
case AMDGPU::G_FNEARBYINT:
case AMDGPU::G_INTRINSIC_ROUND:
case AMDGPU::G_INTRINSIC_ROUNDEVEN:
case AMDGPU::G_FSIN:
case AMDGPU::G_FCANONICALIZE:
case AMDGPU::G_AMDGPU_RCP_IFLAG:
case AMDGPU::G_FPTRUNC:
NegateOperand(MatchInfo->getOperand(1));
break;
case AMDGPU::G_INTRINSIC: {
unsigned IntrinsicID = MatchInfo->getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_rcp:
case Intrinsic::amdgcn_rcp_legacy:
case Intrinsic::amdgcn_sin:
NegateOperand(MatchInfo->getOperand(2));
break;
case Intrinsic::amdgcn_fmul_legacy:
NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
break;
case Intrinsic::amdgcn_fmed3:
NegateOperand(MatchInfo->getOperand(2));
NegateOperand(MatchInfo->getOperand(3));
NegateOperand(MatchInfo->getOperand(4));
break;
case Intrinsic::amdgcn_fma_legacy:
NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
NegateOperand(MatchInfo->getOperand(4));
break;
default:
llvm_unreachable("folding fneg not supported for this intrinsic");
}
break;
}
default:
llvm_unreachable("folding fneg not supported for this instruction");
}
Register Dst = MI.getOperand(0).getReg();
Register MatchInfoDst = MatchInfo->getOperand(0).getReg();
if (MRI.hasOneNonDBGUse(MatchInfoDst)) {
replaceRegWith(MRI, Dst, MatchInfoDst);
} else {
LLT Type = MRI.getType(Dst);
Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type);
replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo);
replaceRegWith(MRI, Dst, NegatedMatchInfo);
auto NextInst = ++MatchInfo->getIterator();
Builder.setInstrAndDebugLoc(*NextInst);
Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags());
}
MI.eraseFromParent();
}