#include "AMDGPU.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/Cloning.h"
#define DEBUG_TYPE "amdgpu-propagate-attributes"
using namespace llvm;
namespace llvm {
extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
}
namespace {
static constexpr const FeatureBitset TargetFeatures = {
AMDGPU::FeatureWavefrontSize16,
AMDGPU::FeatureWavefrontSize32,
AMDGPU::FeatureWavefrontSize64
};
static constexpr const char *AttributeNames[] = {"amdgpu-waves-per-eu"};
static constexpr unsigned NumAttr =
sizeof(AttributeNames) / sizeof(AttributeNames[0]);
class AMDGPUPropagateAttributes {
class FnProperties {
private:
explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
public:
explicit FnProperties(const TargetMachine &TM, const Function &F) {
Features = TM.getSubtargetImpl(F)->getFeatureBits();
for (unsigned I = 0; I < NumAttr; ++I)
if (F.hasFnAttribute(AttributeNames[I]))
Attributes[I] = F.getFnAttribute(AttributeNames[I]);
}
bool operator == (const FnProperties &Other) const {
if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
return false;
for (unsigned I = 0; I < NumAttr; ++I)
if (Attributes[I] != Other.Attributes[I])
return false;
return true;
}
FnProperties adjustToCaller(const FnProperties &CallerProps) const {
FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
for (unsigned I = 0; I < NumAttr; ++I)
New.Attributes[I] = CallerProps.Attributes[I];
return New;
}
FeatureBitset Features;
Optional<Attribute> Attributes[NumAttr];
};
class Clone {
public:
Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
Properties(Props), OrigF(OrigF), NewF(NewF) {}
FnProperties Properties;
Function *OrigF;
Function *NewF;
};
const TargetMachine *TM;
bool AllowClone;
SmallSet<Function *, 32> Roots;
SmallVector<Clone, 32> Clones;
Function *findFunction(const FnProperties &PropsNeeded,
Function *OrigF);
Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
void setFeatures(Function &F, const FeatureBitset &NewFeatures);
void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
std::string getFeatureString(const FeatureBitset &Features) const;
bool process();
public:
AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
TM(TM), AllowClone(AllowClone) {}
bool process(Function &F);
bool process(Module &M);
};
class AMDGPUPropagateAttributesEarly : public FunctionPass {
const TargetMachine *TM;
public:
static char ID;
AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
FunctionPass(ID), TM(TM) {
initializeAMDGPUPropagateAttributesEarlyPass(
*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
};
class AMDGPUPropagateAttributesLate : public ModulePass {
const TargetMachine *TM;
public:
static char ID;
AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
ModulePass(ID), TM(TM) {
initializeAMDGPUPropagateAttributesLatePass(
*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override;
};
}
char AMDGPUPropagateAttributesEarly::ID = 0;
char AMDGPUPropagateAttributesLate::ID = 0;
INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
"amdgpu-propagate-attributes-early",
"Early propagate attributes from kernels to functions",
false, false)
INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
"amdgpu-propagate-attributes-late",
"Late propagate attributes from kernels to functions",
false, false)
Function *
AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
Function *OrigF) {
for (Clone &C : Clones)
if (C.OrigF == OrigF && PropsNeeded == C.Properties)
return C.NewF;
return nullptr;
}
bool AMDGPUPropagateAttributes::process(Module &M) {
for (auto &F : M.functions())
if (AMDGPU::isKernel(F.getCallingConv()))
Roots.insert(&F);
return Roots.empty() ? false : process();
}
bool AMDGPUPropagateAttributes::process(Function &F) {
Roots.insert(&F);
return process();
}
bool AMDGPUPropagateAttributes::process() {
bool Changed = false;
SmallSet<Function *, 32> NewRoots;
SmallSet<Function *, 32> Replaced;
assert(!Roots.empty());
Module &M = *(*Roots.begin())->getParent();
do {
Roots.insert(NewRoots.begin(), NewRoots.end());
NewRoots.clear();
for (auto &F : M.functions()) {
if (F.isDeclaration())
continue;
const FnProperties CalleeProps(*TM, F);
SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
SmallSet<CallBase *, 32> Visited;
for (User *U : F.users()) {
Instruction *I = dyn_cast<Instruction>(U);
if (!I)
continue;
CallBase *CI = dyn_cast<CallBase>(I);
if (!CI || CI->getCalledOperand() != &F)
continue;
Function *Caller = CI->getCaller();
if (!Caller || !Visited.insert(CI).second)
continue;
if (!Roots.count(Caller) && !NewRoots.count(Caller))
continue;
const FnProperties CallerProps(*TM, *Caller);
if (CalleeProps == CallerProps) {
if (!Roots.count(&F))
NewRoots.insert(&F);
continue;
}
Function *NewF = findFunction(CallerProps, &F);
if (!NewF) {
const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
if (!AllowClone) {
setFeatures(F, NewProps.Features);
setAttributes(F, NewProps.Attributes);
NewRoots.insert(&F);
Changed = true;
break;
}
NewF = cloneWithProperties(F, NewProps);
Clones.push_back(Clone(CallerProps, &F, NewF));
NewRoots.insert(NewF);
}
ToReplace.push_back(std::make_pair(CI, NewF));
Replaced.insert(&F);
Changed = true;
}
while (!ToReplace.empty()) {
auto R = ToReplace.pop_back_val();
R.first->setCalledFunction(R.second);
}
}
} while (!NewRoots.empty());
for (Function *F : Replaced) {
if (F->use_empty())
F->eraseFromParent();
}
Roots.clear();
Clones.clear();
return Changed;
}
Function *
AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
const FnProperties &NewProps) {
LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
ValueToValueMapTy dummy;
Function *NewF = CloneFunction(&F, dummy);
setFeatures(*NewF, NewProps.Features);
setAttributes(*NewF, NewProps.Attributes);
NewF->setVisibility(GlobalValue::DefaultVisibility);
NewF->setLinkage(GlobalValue::InternalLinkage);
if (F.hasName() && F.hasLocalLinkage()) {
std::string NewName = std::string(NewF->getName());
NewF->takeName(&F);
F.setName(NewName);
}
return NewF;
}
void AMDGPUPropagateAttributes::setFeatures(Function &F,
const FeatureBitset &NewFeatures) {
std::string NewFeatureStr = getFeatureString(NewFeatures);
LLVM_DEBUG(dbgs() << "Set features "
<< getFeatureString(NewFeatures & TargetFeatures)
<< " on " << F.getName() << '\n');
F.removeFnAttr("target-features");
F.addFnAttr("target-features", NewFeatureStr);
}
void AMDGPUPropagateAttributes::setAttributes(Function &F,
const ArrayRef<Optional<Attribute>> NewAttrs) {
LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
for (unsigned I = 0; I < NumAttr; ++I) {
F.removeFnAttr(AttributeNames[I]);
if (NewAttrs[I]) {
LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
F.addFnAttr(*NewAttrs[I]);
}
}
}
std::string
AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
{
std::string Ret;
for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
if (Features[KV.Value])
Ret += (StringRef("+") + KV.Key + ",").str();
else if (TargetFeatures[KV.Value])
Ret += (StringRef("-") + KV.Key + ",").str();
}
Ret.pop_back(); return Ret;
}
bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
if (!TM) {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)
return false;
TM = &TPC->getTM<TargetMachine>();
}
if (!AMDGPU::isKernel(F.getCallingConv()))
return false;
return AMDGPUPropagateAttributes(TM, false).process(F);
}
bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
if (!TM) {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)
return false;
TM = &TPC->getTM<TargetMachine>();
}
return AMDGPUPropagateAttributes(TM, true).process(M);
}
FunctionPass
*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
return new AMDGPUPropagateAttributesEarly(TM);
}
ModulePass
*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
return new AMDGPUPropagateAttributesLate(TM);
}
PreservedAnalyses
AMDGPUPropagateAttributesEarlyPass::run(Function &F,
FunctionAnalysisManager &AM) {
if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
return PreservedAnalyses::all();
return AMDGPUPropagateAttributes(&TM, false).process(F)
? PreservedAnalyses::none()
: PreservedAnalyses::all();
}
PreservedAnalyses
AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) {
return AMDGPUPropagateAttributes(&TM, true).process(M)
? PreservedAnalyses::none()
: PreservedAnalyses::all();
}