#include "llvm/Transforms/IPO/ArgumentPromotion.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <utility>
#include <vector>
using namespace llvm;
#define DEBUG_TYPE "argpromotion"
STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
namespace {
struct ArgPart {
Type *Ty;
Align Alignment;
Instruction *MustExecInstr;
};
using OffsetAndArgPart = std::pair<int64_t, ArgPart>;
}
static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL,
Value *Ptr, Type *ResElemTy, int64_t Offset) {
unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
APInt OrigOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);
if (!Ptr->getType()->isOpaquePointerTy()) {
Type *OrigElemTy = Ptr->getType()->getNonOpaquePointerElementType();
if (OrigOffset == 0 && OrigElemTy == ResElemTy)
return Ptr;
if (OrigElemTy->isSized()) {
APInt TmpOffset = OrigOffset;
Type *TmpTy = OrigElemTy;
SmallVector<APInt> IntIndices =
DL.getGEPIndicesForOffset(TmpTy, TmpOffset);
if (TmpOffset == 0) {
while (TmpTy != ResElemTy) {
Type *NextTy = GetElementPtrInst::getTypeAtIndex(TmpTy, (uint64_t)0);
if (!NextTy)
break;
IntIndices.push_back(APInt::getZero(
isa<StructType>(TmpTy) ? 32 : OrigOffset.getBitWidth()));
TmpTy = NextTy;
}
SmallVector<Value *> Indices;
for (const APInt &Index : IntIndices)
Indices.push_back(IRB.getInt(Index));
if (OrigOffset != 0 || TmpTy == ResElemTy) {
Ptr = IRB.CreateGEP(OrigElemTy, Ptr, Indices);
return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace));
}
}
}
}
if (OrigOffset != 0) {
Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(AddrSpace));
Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(OrigOffset));
}
return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace));
}
static Function *
doPromotion(Function *F, FunctionAnalysisManager &FAM,
const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>>
&ArgsToPromote) {
FunctionType *FTy = F->getFunctionType();
std::vector<Type *> Params;
SmallVector<AttributeSet, 8> ArgAttrVec;
AttributeList PAL = F->getAttributes();
unsigned ArgNo = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
++I, ++ArgNo) {
if (!ArgsToPromote.count(&*I)) {
Params.push_back(I->getType());
ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo));
} else if (I->use_empty()) {
++NumArgumentsDead;
} else {
const auto &ArgParts = ArgsToPromote.find(&*I)->second;
for (const auto &Pair : ArgParts) {
Params.push_back(Pair.second.Ty);
ArgAttrVec.push_back(AttributeSet());
}
++NumArgumentsPromoted;
}
}
Type *RetTy = FTy->getReturnType();
FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(),
F->getName());
NF->copyAttributesFrom(F);
NF->copyMetadata(F, 0);
F->setSubprogram(nullptr);
LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
<< "From: " << *F);
uint64_t LargestVectorWidth = 0;
for (auto *I : Params)
if (auto *VT = dyn_cast<llvm::VectorType>(I))
LargestVectorWidth = std::max(
LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize());
NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(),
PAL.getRetAttrs(), ArgAttrVec));
AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth);
ArgAttrVec.clear();
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
SmallVector<Value *, 16> Args;
const DataLayout &DL = F->getParent()->getDataLayout();
while (!F->use_empty()) {
CallBase &CB = cast<CallBase>(*F->user_back());
assert(CB.getCalledFunction() == F);
const AttributeList &CallPAL = CB.getAttributes();
IRBuilder<NoFolder> IRB(&CB);
auto *AI = CB.arg_begin();
ArgNo = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
++I, ++AI, ++ArgNo) {
if (!ArgsToPromote.count(&*I)) {
Args.push_back(*AI); ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
} else if (!I->use_empty()) {
Value *V = *AI;
const auto &ArgParts = ArgsToPromote.find(&*I)->second;
for (const auto &Pair : ArgParts) {
LoadInst *LI = IRB.CreateAlignedLoad(
Pair.second.Ty,
createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first),
Pair.second.Alignment, V->getName() + ".val");
if (Pair.second.MustExecInstr) {
LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());
LI->copyMetadata(*Pair.second.MustExecInstr,
{LLVMContext::MD_range, LLVMContext::MD_nonnull,
LLVMContext::MD_dereferenceable,
LLVMContext::MD_dereferenceable_or_null,
LLVMContext::MD_align, LLVMContext::MD_noundef,
LLVMContext::MD_nontemporal});
}
Args.push_back(LI);
ArgAttrVec.push_back(AttributeSet());
}
}
}
for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
Args.push_back(*AI);
ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
}
SmallVector<OperandBundleDef, 1> OpBundles;
CB.getOperandBundlesAsDefs(OpBundles);
CallBase *NewCS = nullptr;
if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
Args, OpBundles, "", &CB);
} else {
auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", &CB);
NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind());
NewCS = NewCall;
}
NewCS->setCallingConv(CB.getCallingConv());
NewCS->setAttributes(AttributeList::get(F->getContext(),
CallPAL.getFnAttrs(),
CallPAL.getRetAttrs(), ArgAttrVec));
NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
Args.clear();
ArgAttrVec.clear();
AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(),
LargestVectorWidth);
if (!CB.use_empty()) {
CB.replaceAllUsesWith(NewCS);
NewCS->takeName(&CB);
}
CB.eraseFromParent();
}
NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
SmallVector<AllocaInst *, 4> Allocas;
Function::arg_iterator I2 = NF->arg_begin();
for (Argument &Arg : F->args()) {
if (!ArgsToPromote.count(&Arg)) {
Arg.replaceAllUsesWith(&*I2);
I2->takeName(&Arg);
++I2;
continue;
}
auto RauwUndefMetadata = make_scope_exit(
[&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); });
if (Arg.use_empty())
continue;
assert(Arg.getType()->isPointerTy() &&
"Only arguments with a pointer type are promotable");
IRBuilder<NoFolder> IRB(&NF->begin()->front());
SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca;
for (const auto &Pair : ArgsToPromote.find(&Arg)->second) {
int64_t Offset = Pair.first;
const ArgPart &Part = Pair.second;
Argument *NewArg = I2++;
NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val");
AllocaInst *NewAlloca = IRB.CreateAlloca(
Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc");
NewAlloca->setAlignment(Pair.second.Alignment);
IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment);
OffsetToAlloca.insert({Offset, NewAlloca});
}
auto GetAlloca = [&](Value *Ptr) {
APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
true);
assert(Ptr == &Arg && "Not constant offset from arg?");
return OffsetToAlloca.lookup(Offset.getSExtValue());
};
SmallVector<Value *, 16> Worklist;
SmallVector<Instruction *, 16> DeadInsts;
append_range(Worklist, Arg.users());
while (!Worklist.empty()) {
Value *V = Worklist.pop_back_val();
if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) {
DeadInsts.push_back(cast<Instruction>(V));
append_range(Worklist, V->users());
continue;
}
if (auto *LI = dyn_cast<LoadInst>(V)) {
Value *Ptr = LI->getPointerOperand();
LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr));
continue;
}
if (auto *SI = dyn_cast<StoreInst>(V)) {
assert(!SI->isVolatile() && "Volatile operations can't be promoted.");
Value *Ptr = SI->getPointerOperand();
SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr));
continue;
}
llvm_unreachable("Unexpected user");
}
for (Instruction *I : DeadInsts) {
I->replaceAllUsesWith(PoisonValue::get(I->getType()));
I->eraseFromParent();
}
for (const auto &Pair : OffsetToAlloca) {
assert(isAllocaPromotable(Pair.second) &&
"By design, only promotable allocas should be produced.");
Allocas.push_back(Pair.second);
}
}
LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size()
<< " alloca(s) are promotable by Mem2Reg\n");
if (!Allocas.empty()) {
auto &DT = FAM.getResult<DominatorTreeAnalysis>(*NF);
auto &AC = FAM.getResult<AssumptionAnalysis>(*NF);
PromoteMemToReg(Allocas, DT, &AC);
}
return NF;
}
static bool allCallersPassValidPointerForArgument(Argument *Arg,
Align NeededAlign,
uint64_t NeededDerefBytes) {
Function *Callee = Arg->getParent();
const DataLayout &DL = Callee->getParent()->getDataLayout();
APInt Bytes(64, NeededDerefBytes);
if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL))
return true;
return all_of(Callee->users(), [&](User *U) {
CallBase &CB = cast<CallBase>(*U);
return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()),
NeededAlign, Bytes, DL);
});
}
static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
unsigned MaxElements, bool IsRecursive,
SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {
if (Arg->use_empty())
return true;
SmallDenseMap<int64_t, ArgPart, 4> ArgParts;
Align NeededAlign(1);
uint64_t NeededDerefBytes = 0;
bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign();
auto HandleEndUser = [&](auto *I, Type *Ty,
bool GuaranteedToExecute) -> Optional<bool> {
if (!I->isSimple())
return false;
Value *Ptr = I->getPointerOperand();
APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
true);
if (Ptr != Arg)
return None;
if (Offset.getSignificantBits() >= 64)
return false;
TypeSize Size = DL.getTypeStoreSize(Ty);
if (Size.isScalable())
return false;
if (IsRecursive && Ty->isPointerTy())
return false;
int64_t Off = Offset.getSExtValue();
auto Pair = ArgParts.try_emplace(
Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr});
ArgPart &Part = Pair.first->second;
bool OffsetNotSeenBefore = Pair.second;
if (MaxElements > 0 && ArgParts.size() > MaxElements) {
LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
<< "more than " << MaxElements << " parts\n");
return false;
}
if (Part.Ty != Ty) {
LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
<< "accessed as both " << *Part.Ty << " and " << *Ty
<< " at offset " << Off << "\n");
return false;
}
if (!GuaranteedToExecute &&
(OffsetNotSeenBefore || Part.Alignment < I->getAlign())) {
if (Off < 0)
return false;
if (!isAligned(I->getAlign(), Off))
return false;
NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue());
NeededAlign = std::max(NeededAlign, I->getAlign());
}
Part.Alignment = std::max(Part.Alignment, I->getAlign());
return true;
};
for (Instruction &I : Arg->getParent()->getEntryBlock()) {
Optional<bool> Res{};
if (LoadInst *LI = dyn_cast<LoadInst>(&I))
Res = HandleEndUser(LI, LI->getType(), true);
else if (StoreInst *SI = dyn_cast<StoreInst>(&I))
Res = HandleEndUser(SI, SI->getValueOperand()->getType(),
true);
if (Res && !*Res)
return false;
if (!isGuaranteedToTransferExecutionToSuccessor(&I))
break;
}
SmallVector<const Use *, 16> Worklist;
SmallPtrSet<const Use *, 16> Visited;
SmallVector<LoadInst *, 16> Loads;
auto AppendUses = [&](const Value *V) {
for (const Use &U : V->uses())
if (Visited.insert(&U).second)
Worklist.push_back(&U);
};
AppendUses(Arg);
while (!Worklist.empty()) {
const Use *U = Worklist.pop_back_val();
Value *V = U->getUser();
if (isa<BitCastInst>(V)) {
AppendUses(V);
continue;
}
if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
if (!GEP->hasAllConstantIndices())
return false;
AppendUses(V);
continue;
}
if (auto *LI = dyn_cast<LoadInst>(V)) {
if (!*HandleEndUser(LI, LI->getType(), false))
return false;
Loads.push_back(LI);
continue;
}
auto *SI = dyn_cast<StoreInst>(V);
if (AreStoresAllowed && SI &&
U->getOperandNo() == StoreInst::getPointerOperandIndex()) {
if (!*HandleEndUser(SI, SI->getValueOperand()->getType(),
false))
return false;
continue;
}
LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
<< "unknown user " << *V << "\n");
return false;
}
if (NeededDerefBytes || NeededAlign > 1) {
if (!allCallersPassValidPointerForArgument(Arg, NeededAlign,
NeededDerefBytes)) {
LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
<< "not dereferenceable or aligned\n");
return false;
}
}
if (ArgParts.empty())
return true;
append_range(ArgPartsVec, ArgParts);
sort(ArgPartsVec, llvm::less_first());
int64_t Offset = ArgPartsVec[0].first;
for (const auto &Pair : ArgPartsVec) {
if (Pair.first < Offset)
return false;
Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty);
}
if (AreStoresAllowed)
return true;
df_iterator_default_set<BasicBlock *, 16> TranspBlocks;
for (LoadInst *Load : Loads) {
BasicBlock *BB = Load->getParent();
MemoryLocation Loc = MemoryLocation::get(Load);
if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod))
return false;
for (BasicBlock *P : predecessors(BB)) {
for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
if (AAR.canBasicBlockModify(*TranspBB, Loc))
return false;
}
}
return true;
}
static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
const TargetTransformInfo &TTI) {
return all_of(F.uses(), [&](const Use &U) {
CallBase *CB = dyn_cast<CallBase>(U.getUser());
if (!CB)
return false;
const Function *Caller = CB->getCaller();
const Function *Callee = CB->getCalledFunction();
return TTI.areTypesABICompatible(Caller, Callee, Types);
});
}
static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
unsigned MaxElements, bool IsRecursive) {
if (F->hasFnAttribute(Attribute::Naked))
return nullptr;
if (!F->hasLocalLinkage())
return nullptr;
if (F->isVarArg())
return nullptr;
if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
return nullptr;
SmallVector<Argument *, 16> PointerArgs;
for (Argument &I : F->args())
if (I.getType()->isPointerTy())
PointerArgs.push_back(&I);
if (PointerArgs.empty())
return nullptr;
for (Use &U : F->uses()) {
CallBase *CB = dyn_cast<CallBase>(U.getUser());
if (CB == nullptr || !CB->isCallee(&U) ||
CB->getFunctionType() != F->getFunctionType())
return nullptr;
if (CB->isMustTailCall())
return nullptr;
if (CB->getFunction() == F)
IsRecursive = true;
}
for (BasicBlock &BB : *F)
if (BB.getTerminatingMustTailCall())
return nullptr;
const DataLayout &DL = F->getParent()->getDataLayout();
auto &AAR = FAM.getResult<AAManager>(*F);
const auto &TTI = FAM.getResult<TargetIRAnalysis>(*F);
DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;
for (Argument *PtrArg : PointerArgs) {
if (PtrArg->hasStructRetAttr()) {
unsigned ArgNo = PtrArg->getArgNo();
F->removeParamAttr(ArgNo, Attribute::StructRet);
F->addParamAttr(ArgNo, Attribute::NoAlias);
for (Use &U : F->uses()) {
CallBase &CB = cast<CallBase>(*U.getUser());
CB.removeParamAttr(ArgNo, Attribute::StructRet);
CB.addParamAttr(ArgNo, Attribute::NoAlias);
}
}
SmallVector<OffsetAndArgPart, 4> ArgParts;
if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
SmallVector<Type *, 4> Types;
for (const auto &Pair : ArgParts)
Types.push_back(Pair.second.Ty);
if (areTypesABICompatible(Types, *F, TTI)) {
ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
}
}
}
if (ArgsToPromote.empty())
return nullptr;
return doPromotion(F, FAM, ArgsToPromote);
}
PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
CGSCCAnalysisManager &AM,
LazyCallGraph &CG,
CGSCCUpdateResult &UR) {
bool Changed = false, LocalChange;
do {
LocalChange = false;
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
bool IsRecursive = C.size() > 1;
for (LazyCallGraph::Node &N : C) {
Function &OldF = N.getFunction();
Function *NewF = promoteArguments(&OldF, FAM, MaxElements, IsRecursive);
if (!NewF)
continue;
LocalChange = true;
C.getOuterRefSCC().replaceNodeFunction(N, *NewF);
FAM.clear(OldF, OldF.getName());
OldF.eraseFromParent();
PreservedAnalyses FuncPA;
FuncPA.preserveSet<CFGAnalyses>();
for (auto *U : NewF->users()) {
auto *UserF = cast<CallBase>(U)->getFunction();
FAM.invalidate(*UserF, FuncPA);
}
}
Changed |= LocalChange;
} while (LocalChange);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
PA.preserveSet<AllAnalysesOn<Function>>();
return PA;
}