#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
#include "llvm/ADT/Triple.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/Support/Alignment.h"
namespace llvm {
enum AMDGPUDwarfFlavour : unsigned;
class Function;
class Instruction;
class MachineFunction;
class TargetMachine;
class AMDGPUSubtarget {
public:
enum Generation {
INVALID = 0,
R600 = 1,
R700 = 2,
EVERGREEN = 3,
NORTHERN_ISLANDS = 4,
SOUTHERN_ISLANDS = 5,
SEA_ISLANDS = 6,
VOLCANIC_ISLANDS = 7,
GFX9 = 8,
GFX10 = 9,
GFX11 = 10
};
private:
Triple TargetTriple;
protected:
bool GCN3Encoding = false;
bool Has16BitInsts = false;
bool HasTrue16BitInsts = false;
bool HasMadMixInsts = false;
bool HasMadMacF32Insts = false;
bool HasDsSrc2Insts = false;
bool HasSDWA = false;
bool HasVOP3PInsts = false;
bool HasMulI24 = true;
bool HasMulU24 = true;
bool HasSMulHi = false;
bool HasInv2PiInlineImm = false;
bool HasFminFmaxLegacy = true;
bool EnablePromoteAlloca = false;
bool HasTrigReducedRange = false;
unsigned MaxWavesPerEU = 10;
unsigned LocalMemorySize = 0;
char WavefrontSizeLog2 = 0;
public:
AMDGPUSubtarget(const Triple &TT);
static const AMDGPUSubtarget &get(const MachineFunction &MF);
static const AMDGPUSubtarget &get(const TargetMachine &TM,
const Function &F);
std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const {
std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
return getWavesPerEU(F, FlatWorkGroupSizes);
}
std::pair<unsigned, unsigned>
getWavesPerEU(const Function &F,
std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
const Function &) const;
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
bool isAmdHsaOS() const {
return TargetTriple.getOS() == Triple::AMDHSA;
}
bool isAmdPalOS() const {
return TargetTriple.getOS() == Triple::AMDPAL;
}
bool isMesa3DOS() const {
return TargetTriple.getOS() == Triple::Mesa3D;
}
bool isMesaKernel(const Function &F) const;
bool isAmdHsaOrMesa(const Function &F) const {
return isAmdHsaOS() || isMesaKernel(F);
}
bool isGCN() const {
return TargetTriple.getArch() == Triple::amdgcn;
}
bool isGCN3Encoding() const {
return GCN3Encoding;
}
bool has16BitInsts() const {
return Has16BitInsts;
}
bool hasTrue16BitInsts() const { return HasTrue16BitInsts; }
bool hasMadMixInsts() const {
return HasMadMixInsts;
}
bool hasMadMacF32Insts() const {
return HasMadMacF32Insts || !isGCN();
}
bool hasDsSrc2Insts() const {
return HasDsSrc2Insts;
}
bool hasSDWA() const {
return HasSDWA;
}
bool hasVOP3PInsts() const {
return HasVOP3PInsts;
}
bool hasMulI24() const {
return HasMulI24;
}
bool hasMulU24() const {
return HasMulU24;
}
bool hasSMulHi() const {
return HasSMulHi;
}
bool hasInv2PiInlineImm() const {
return HasInv2PiInlineImm;
}
bool hasFminFmaxLegacy() const {
return HasFminFmaxLegacy;
}
bool hasTrigReducedRange() const {
return HasTrigReducedRange;
}
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
unsigned getWavefrontSize() const {
return 1 << WavefrontSizeLog2;
}
unsigned getWavefrontSizeLog2() const {
return WavefrontSizeLog2;
}
unsigned getLocalMemorySize() const {
return LocalMemorySize;
}
Align getAlignmentForImplicitArgPtr() const {
return isAmdHsaOS() ? Align(8) : Align(4);
}
unsigned getExplicitKernelArgOffset(const Function &F) const {
switch (TargetTriple.getOS()) {
case Triple::AMDHSA:
case Triple::AMDPAL:
case Triple::Mesa3D:
return 0;
case Triple::UnknownOS:
default:
return 36;
}
llvm_unreachable("invalid triple OS");
}
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
virtual unsigned getMinFlatWorkGroupSize() const = 0;
virtual unsigned getMaxFlatWorkGroupSize() const = 0;
virtual unsigned
getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;
virtual unsigned getMinWavesPerEU() const = 0;
unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
bool makeLIDRangeMetadata(Instruction *I) const;
unsigned getImplicitArgNumBytes(const Function &F) const;
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
virtual ~AMDGPUSubtarget() = default;
};
}
#endif