#include "NVPTXTargetTransformInfo.h"
#include "NVPTXUtilities.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "NVPTXtti"
static bool readsThreadIndex(const IntrinsicInst *II) {
switch (II->getIntrinsicID()) {
default: return false;
case Intrinsic::nvvm_read_ptx_sreg_tid_x:
case Intrinsic::nvvm_read_ptx_sreg_tid_y:
case Intrinsic::nvvm_read_ptx_sreg_tid_z:
return true;
}
}
static bool readsLaneId(const IntrinsicInst *II) {
return II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_laneid;
}
static bool isNVVMAtomic(const IntrinsicInst *II) {
switch (II->getIntrinsicID()) {
default: return false;
case Intrinsic::nvvm_atomic_load_inc_32:
case Intrinsic::nvvm_atomic_load_dec_32:
case Intrinsic::nvvm_atomic_add_gen_f_cta:
case Intrinsic::nvvm_atomic_add_gen_f_sys:
case Intrinsic::nvvm_atomic_add_gen_i_cta:
case Intrinsic::nvvm_atomic_add_gen_i_sys:
case Intrinsic::nvvm_atomic_and_gen_i_cta:
case Intrinsic::nvvm_atomic_and_gen_i_sys:
case Intrinsic::nvvm_atomic_cas_gen_i_cta:
case Intrinsic::nvvm_atomic_cas_gen_i_sys:
case Intrinsic::nvvm_atomic_dec_gen_i_cta:
case Intrinsic::nvvm_atomic_dec_gen_i_sys:
case Intrinsic::nvvm_atomic_inc_gen_i_cta:
case Intrinsic::nvvm_atomic_inc_gen_i_sys:
case Intrinsic::nvvm_atomic_max_gen_i_cta:
case Intrinsic::nvvm_atomic_max_gen_i_sys:
case Intrinsic::nvvm_atomic_min_gen_i_cta:
case Intrinsic::nvvm_atomic_min_gen_i_sys:
case Intrinsic::nvvm_atomic_or_gen_i_cta:
case Intrinsic::nvvm_atomic_or_gen_i_sys:
case Intrinsic::nvvm_atomic_exch_gen_i_cta:
case Intrinsic::nvvm_atomic_exch_gen_i_sys:
case Intrinsic::nvvm_atomic_xor_gen_i_cta:
case Intrinsic::nvvm_atomic_xor_gen_i_sys:
return true;
}
}
bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
if (const Argument *Arg = dyn_cast<Argument>(V))
return !isKernelFunction(*Arg->getParent());
if (const Instruction *I = dyn_cast<Instruction>(V)) {
if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
unsigned AS = LI->getPointerAddressSpace();
return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL;
}
if (I->isAtomic())
return true;
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
if (readsThreadIndex(II) || readsLaneId(II))
return true;
if (isNVVMAtomic(II))
return true;
}
if (isa<CallInst>(I))
return true;
}
return false;
}
static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
enum FtzRequirementTy {
FTZ_Any, FTZ_MustBeOn, FTZ_MustBeOff, };
enum SpecialCase {
SPC_Reciprocal,
};
struct SimplifyAction {
Optional<Intrinsic::ID> IID;
Optional<Instruction::CastOps> CastOp;
Optional<Instruction::BinaryOps> BinaryOp;
Optional<SpecialCase> Special;
FtzRequirementTy FtzRequirement = FTZ_Any;
bool IsHalfTy = false;
SimplifyAction() = default;
SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq,
bool IsHalfTy = false)
: IID(IID), FtzRequirement(FtzReq), IsHalfTy(IsHalfTy) {}
SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
: BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
: Special(Special), FtzRequirement(FtzReq) {}
};
const SimplifyAction Action = [II]() -> SimplifyAction {
switch (II->getIntrinsicID()) {
case Intrinsic::nvvm_ceil_d:
return {Intrinsic::ceil, FTZ_Any};
case Intrinsic::nvvm_ceil_f:
return {Intrinsic::ceil, FTZ_MustBeOff};
case Intrinsic::nvvm_ceil_ftz_f:
return {Intrinsic::ceil, FTZ_MustBeOn};
case Intrinsic::nvvm_fabs_d:
return {Intrinsic::fabs, FTZ_Any};
case Intrinsic::nvvm_fabs_f:
return {Intrinsic::fabs, FTZ_MustBeOff};
case Intrinsic::nvvm_fabs_ftz_f:
return {Intrinsic::fabs, FTZ_MustBeOn};
case Intrinsic::nvvm_floor_d:
return {Intrinsic::floor, FTZ_Any};
case Intrinsic::nvvm_floor_f:
return {Intrinsic::floor, FTZ_MustBeOff};
case Intrinsic::nvvm_floor_ftz_f:
return {Intrinsic::floor, FTZ_MustBeOn};
case Intrinsic::nvvm_fma_rn_d:
return {Intrinsic::fma, FTZ_Any};
case Intrinsic::nvvm_fma_rn_f:
return {Intrinsic::fma, FTZ_MustBeOff};
case Intrinsic::nvvm_fma_rn_ftz_f:
return {Intrinsic::fma, FTZ_MustBeOn};
case Intrinsic::nvvm_fma_rn_f16:
return {Intrinsic::fma, FTZ_MustBeOff, true};
case Intrinsic::nvvm_fma_rn_ftz_f16:
return {Intrinsic::fma, FTZ_MustBeOn, true};
case Intrinsic::nvvm_fma_rn_f16x2:
return {Intrinsic::fma, FTZ_MustBeOff, true};
case Intrinsic::nvvm_fma_rn_ftz_f16x2:
return {Intrinsic::fma, FTZ_MustBeOn, true};
case Intrinsic::nvvm_fmax_d:
return {Intrinsic::maxnum, FTZ_Any};
case Intrinsic::nvvm_fmax_f:
return {Intrinsic::maxnum, FTZ_MustBeOff};
case Intrinsic::nvvm_fmax_ftz_f:
return {Intrinsic::maxnum, FTZ_MustBeOn};
case Intrinsic::nvvm_fmax_nan_f:
return {Intrinsic::maximum, FTZ_MustBeOff};
case Intrinsic::nvvm_fmax_ftz_nan_f:
return {Intrinsic::maximum, FTZ_MustBeOn};
case Intrinsic::nvvm_fmax_f16:
return {Intrinsic::maxnum, FTZ_MustBeOff, true};
case Intrinsic::nvvm_fmax_ftz_f16:
return {Intrinsic::maxnum, FTZ_MustBeOn, true};
case Intrinsic::nvvm_fmax_f16x2:
return {Intrinsic::maxnum, FTZ_MustBeOff, true};
case Intrinsic::nvvm_fmax_ftz_f16x2:
return {Intrinsic::maxnum, FTZ_MustBeOn, true};
case Intrinsic::nvvm_fmax_nan_f16:
return {Intrinsic::maximum, FTZ_MustBeOff, true};
case Intrinsic::nvvm_fmax_ftz_nan_f16:
return {Intrinsic::maximum, FTZ_MustBeOn, true};
case Intrinsic::nvvm_fmax_nan_f16x2:
return {Intrinsic::maximum, FTZ_MustBeOff, true};
case Intrinsic::nvvm_fmax_ftz_nan_f16x2:
return {Intrinsic::maximum, FTZ_MustBeOn, true};
case Intrinsic::nvvm_fmin_d:
return {Intrinsic::minnum, FTZ_Any};
case Intrinsic::nvvm_fmin_f:
return {Intrinsic::minnum, FTZ_MustBeOff};
case Intrinsic::nvvm_fmin_ftz_f:
return {Intrinsic::minnum, FTZ_MustBeOn};
case Intrinsic::nvvm_fmin_nan_f:
return {Intrinsic::minimum, FTZ_MustBeOff};
case Intrinsic::nvvm_fmin_ftz_nan_f:
return {Intrinsic::minimum, FTZ_MustBeOn};
case Intrinsic::nvvm_fmin_f16:
return {Intrinsic::minnum, FTZ_MustBeOff, true};
case Intrinsic::nvvm_fmin_ftz_f16:
return {Intrinsic::minnum, FTZ_MustBeOn, true};
case Intrinsic::nvvm_fmin_f16x2:
return {Intrinsic::minnum, FTZ_MustBeOff, true};
case Intrinsic::nvvm_fmin_ftz_f16x2:
return {Intrinsic::minnum, FTZ_MustBeOn, true};
case Intrinsic::nvvm_fmin_nan_f16:
return {Intrinsic::minimum, FTZ_MustBeOff, true};
case Intrinsic::nvvm_fmin_ftz_nan_f16:
return {Intrinsic::minimum, FTZ_MustBeOn, true};
case Intrinsic::nvvm_fmin_nan_f16x2:
return {Intrinsic::minimum, FTZ_MustBeOff, true};
case Intrinsic::nvvm_fmin_ftz_nan_f16x2:
return {Intrinsic::minimum, FTZ_MustBeOn, true};
case Intrinsic::nvvm_round_d:
return {Intrinsic::round, FTZ_Any};
case Intrinsic::nvvm_round_f:
return {Intrinsic::round, FTZ_MustBeOff};
case Intrinsic::nvvm_round_ftz_f:
return {Intrinsic::round, FTZ_MustBeOn};
case Intrinsic::nvvm_sqrt_rn_d:
return {Intrinsic::sqrt, FTZ_Any};
case Intrinsic::nvvm_sqrt_f:
return {Intrinsic::sqrt, FTZ_Any};
case Intrinsic::nvvm_sqrt_rn_f:
return {Intrinsic::sqrt, FTZ_MustBeOff};
case Intrinsic::nvvm_sqrt_rn_ftz_f:
return {Intrinsic::sqrt, FTZ_MustBeOn};
case Intrinsic::nvvm_trunc_d:
return {Intrinsic::trunc, FTZ_Any};
case Intrinsic::nvvm_trunc_f:
return {Intrinsic::trunc, FTZ_MustBeOff};
case Intrinsic::nvvm_trunc_ftz_f:
return {Intrinsic::trunc, FTZ_MustBeOn};
case Intrinsic::nvvm_d2i_rz:
case Intrinsic::nvvm_f2i_rz:
case Intrinsic::nvvm_d2ll_rz:
case Intrinsic::nvvm_f2ll_rz:
return {Instruction::FPToSI};
case Intrinsic::nvvm_d2ui_rz:
case Intrinsic::nvvm_f2ui_rz:
case Intrinsic::nvvm_d2ull_rz:
case Intrinsic::nvvm_f2ull_rz:
return {Instruction::FPToUI};
case Intrinsic::nvvm_i2d_rz:
case Intrinsic::nvvm_i2f_rz:
case Intrinsic::nvvm_ll2d_rz:
case Intrinsic::nvvm_ll2f_rz:
return {Instruction::SIToFP};
case Intrinsic::nvvm_ui2d_rz:
case Intrinsic::nvvm_ui2f_rz:
case Intrinsic::nvvm_ull2d_rz:
case Intrinsic::nvvm_ull2f_rz:
return {Instruction::UIToFP};
case Intrinsic::nvvm_add_rn_d:
return {Instruction::FAdd, FTZ_Any};
case Intrinsic::nvvm_add_rn_f:
return {Instruction::FAdd, FTZ_MustBeOff};
case Intrinsic::nvvm_add_rn_ftz_f:
return {Instruction::FAdd, FTZ_MustBeOn};
case Intrinsic::nvvm_mul_rn_d:
return {Instruction::FMul, FTZ_Any};
case Intrinsic::nvvm_mul_rn_f:
return {Instruction::FMul, FTZ_MustBeOff};
case Intrinsic::nvvm_mul_rn_ftz_f:
return {Instruction::FMul, FTZ_MustBeOn};
case Intrinsic::nvvm_div_rn_d:
return {Instruction::FDiv, FTZ_Any};
case Intrinsic::nvvm_div_rn_f:
return {Instruction::FDiv, FTZ_MustBeOff};
case Intrinsic::nvvm_div_rn_ftz_f:
return {Instruction::FDiv, FTZ_MustBeOn};
case Intrinsic::nvvm_rcp_rn_d:
return {SPC_Reciprocal, FTZ_Any};
case Intrinsic::nvvm_rcp_rn_f:
return {SPC_Reciprocal, FTZ_MustBeOff};
case Intrinsic::nvvm_rcp_rn_ftz_f:
return {SPC_Reciprocal, FTZ_MustBeOn};
default:
return {};
}
}();
if (Action.FtzRequirement != FTZ_Any) {
const char *AttrName =
Action.IsHalfTy ? "denormal-fp-math" : "denormal-fp-math-f32";
StringRef Attr =
II->getFunction()->getFnAttribute(AttrName).getValueAsString();
DenormalMode Mode = parseDenormalFPAttribute(Attr);
bool FtzEnabled = Mode.Output != DenormalMode::IEEE;
if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
return nullptr;
}
if (Action.IID) {
SmallVector<Value *, 4> Args(II->args());
Type *Tys[] = {II->getArgOperand(0)->getType()};
return CallInst::Create(
Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
}
if (Action.BinaryOp)
return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
II->getArgOperand(1), II->getName());
if (Action.CastOp)
return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
II->getName());
if (!Action.Special)
return nullptr;
switch (*Action.Special) {
case SPC_Reciprocal:
return BinaryOperator::Create(
Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
II->getArgOperand(0), II->getName());
}
llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
}
Optional<Instruction *>
NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) {
return I;
}
return None;
}
InstructionCost NVPTXTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
switch (ISD) {
default:
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
Opd2Info,
Opd1PropInfo, Opd2PropInfo);
case ISD::ADD:
case ISD::MUL:
case ISD::XOR:
case ISD::OR:
case ISD::AND:
if (LT.second.SimpleTy == MVT::i64)
return 2 * LT.first;
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
Opd2Info,
Opd1PropInfo, Opd2PropInfo);
}
}
void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) {
BaseT::getUnrollingPreferences(L, SE, UP, ORE);
UP.Partial = UP.Runtime = true;
UP.PartialThreshold = UP.Threshold / 4;
}
void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
}