#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
#define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
#include "ARM.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/SubtargetFeature.h"
namespace llvm {
class APInt;
class ARMTargetLowering;
class Instruction;
class Loop;
class SCEV;
class ScalarEvolution;
class Type;
class Value;
namespace TailPredication {
enum Mode {
Disabled = 0,
EnabledNoReductions,
Enabled,
ForceEnabledNoReductions,
ForceEnabled
};
}
namespace TPLoop {
enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow };
}
class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
using BaseT = BasicTTIImplBase<ARMTTIImpl>;
using TTI = TargetTransformInfo;
friend BaseT;
const ARMSubtarget *ST;
const ARMTargetLowering *TLI;
const FeatureBitset InlineFeaturesAllowed = {
ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
};
const ARMSubtarget *getST() const { return ST; }
const ARMTargetLowering *getTLI() const { return TLI; }
public:
explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
bool enableInterleavedAccessVectorization() { return true; }
TTI::AddressingModeKind
getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const;
bool isFPVectorizationPotentiallyUnsafe() {
return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
}
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const;
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
APInt &UndefElts2, APInt &UndefElts3,
std::function<void(Instruction *, unsigned, APInt, APInt &)>
SimplifyAndSetOp) const;
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty);
using BaseT::getIntImmCost;
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind);
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind,
Instruction *Inst = nullptr);
unsigned getNumberOfRegisters(unsigned ClassID) const {
bool Vector = (ClassID == 1);
if (Vector) {
if (ST->hasNEON())
return 16;
if (ST->hasMVEIntegerOps())
return 8;
return 0;
}
if (ST->isThumb1Only())
return 8;
return 13;
}
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
switch (K) {
case TargetTransformInfo::RGK_Scalar:
return TypeSize::getFixed(32);
case TargetTransformInfo::RGK_FixedWidthVector:
if (ST->hasNEON())
return TypeSize::getFixed(128);
if (ST->hasMVEIntegerOps())
return TypeSize::getFixed(128);
return TypeSize::getFixed(0);
case TargetTransformInfo::RGK_ScalableVector:
return TypeSize::getScalable(0);
}
llvm_unreachable("Unsupported register kind");
}
unsigned getMaxInterleaveFactor(unsigned VF) {
return ST->getMaxInterleaveFactor();
}
bool isProfitableLSRChainElement(Instruction *I);
bool isLegalMaskedLoad(Type *DataTy, Align Alignment);
bool isLegalMaskedStore(Type *DataTy, Align Alignment) {
return isLegalMaskedLoad(DataTy, Alignment);
}
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) {
return true;
}
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
return forceScalarizeMaskedGather(VTy, Alignment);
}
bool isLegalMaskedGather(Type *Ty, Align Alignment);
bool isLegalMaskedScatter(Type *Ty, Align Alignment) {
return isLegalMaskedGather(Ty, Alignment);
}
InstructionCost getMemcpyCost(const Instruction *I);
int getNumMemOps(const IntrinsicInst *I) const;
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = None);
bool preferInLoopReduction(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index);
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
const SCEV *Ptr);
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind);
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
Optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind);
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
Type *ResTy, VectorType *ValTy,
TTI::TargetCostKind CostKind);
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
bool maybeLoweredToCall(Instruction &I);
bool isLoweredToCall(const Function *F);
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo);
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,
LoopVectorizationLegality *LVL);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE);
PredicationStyle emitGetActiveLaneMask() const;
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
bool shouldBuildLookupTablesForConstant(Constant *C) const {
if (ST->isROPI() || ST->isRWPI())
return !C->needsDynamicRelocation();
return true;
}
};
inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
"Only possible block sizes for VREV are: 16, 32, 64");
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz != 8 && EltSz != 16 && EltSz != 32)
return false;
unsigned BlockElts = M[0] + 1;
if (M[0] < 0)
BlockElts = BlockSize / EltSz;
if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;
for (unsigned i = 0, e = M.size(); i < e; ++i) {
if (M[i] < 0)
continue; if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
return false;
}
return true;
}
}
#endif