#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
#define DEBUG_TYPE "tailcallelim"
STATISTIC(NumEliminated, "Number of tail calls removed");
STATISTIC(NumRetDuped, "Number of return duplicated");
STATISTIC(NumAccumAdded, "Number of accumulators introduced");
static bool canTRE(Function &F) {
return llvm::all_of(instructions(F), [](Instruction &I) {
auto *AI = dyn_cast<AllocaInst>(&I);
return !AI || AI->isStaticAlloca();
});
}
namespace {
struct AllocaDerivedValueTracker {
void walk(Value *Root) {
SmallVector<Use *, 32> Worklist;
SmallPtrSet<Use *, 32> Visited;
auto AddUsesToWorklist = [&](Value *V) {
for (auto &U : V->uses()) {
if (!Visited.insert(&U).second)
continue;
Worklist.push_back(&U);
}
};
AddUsesToWorklist(Root);
while (!Worklist.empty()) {
Use *U = Worklist.pop_back_val();
Instruction *I = cast<Instruction>(U->getUser());
switch (I->getOpcode()) {
case Instruction::Call:
case Instruction::Invoke: {
auto &CB = cast<CallBase>(*I);
if (CB.isArgOperand(U) && CB.isByValArgument(CB.getArgOperandNo(U)))
continue;
bool IsNocapture =
CB.isDataOperand(U) && CB.doesNotCapture(CB.getDataOperandNo(U));
callUsesLocalStack(CB, IsNocapture);
if (IsNocapture) {
continue;
}
break;
}
case Instruction::Load: {
continue;
}
case Instruction::Store: {
if (U->getOperandNo() == 0)
EscapePoints.insert(I);
continue; }
case Instruction::BitCast:
case Instruction::GetElementPtr:
case Instruction::PHI:
case Instruction::Select:
case Instruction::AddrSpaceCast:
break;
default:
EscapePoints.insert(I);
break;
}
AddUsesToWorklist(I);
}
}
void callUsesLocalStack(CallBase &CB, bool IsNocapture) {
AllocaUsers.insert(&CB);
if (IsNocapture)
return;
if (!CB.onlyReadsMemory())
EscapePoints.insert(&CB);
}
SmallPtrSet<Instruction *, 32> AllocaUsers;
SmallPtrSet<Instruction *, 32> EscapePoints;
};
}
static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
if (F.callsFunctionThatReturnsTwice())
return false;
AllocaDerivedValueTracker Tracker;
for (Argument &Arg : F.args()) {
if (Arg.hasByValAttr())
Tracker.walk(&Arg);
}
for (auto &BB : F) {
for (auto &I : BB)
if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
Tracker.walk(AI);
}
bool Modified = false;
enum VisitType {
UNVISITED,
UNESCAPED,
ESCAPED
};
DenseMap<BasicBlock *, VisitType> Visited;
SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped;
SmallVector<CallInst *, 32> DeferredTails;
BasicBlock *BB = &F.getEntryBlock();
VisitType Escaped = UNESCAPED;
do {
for (auto &I : *BB) {
if (Tracker.EscapePoints.count(&I))
Escaped = ESCAPED;
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI || CI->isTailCall() || isa<DbgInfoIntrinsic>(&I) ||
isa<PseudoProbeInst>(&I))
continue;
bool IsNoTail =
CI->isNoTailCall() || CI->hasOperandBundlesOtherThan(
{LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_ptrauth});
if (!IsNoTail && CI->doesNotAccessMemory()) {
bool SafeToTail = true;
for (auto &Arg : CI->args()) {
if (isa<Constant>(Arg.getUser()))
continue;
if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
if (!A->hasByValAttr())
continue;
SafeToTail = false;
break;
}
if (SafeToTail) {
using namespace ore;
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "tailcall-readnone", CI)
<< "marked as tail call candidate (readnone)";
});
CI->setTailCall();
Modified = true;
continue;
}
}
if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI))
DeferredTails.push_back(CI);
}
for (auto *SuccBB : successors(BB)) {
auto &State = Visited[SuccBB];
if (State < Escaped) {
State = Escaped;
if (State == ESCAPED)
WorklistEscaped.push_back(SuccBB);
else
WorklistUnescaped.push_back(SuccBB);
}
}
if (!WorklistEscaped.empty()) {
BB = WorklistEscaped.pop_back_val();
Escaped = ESCAPED;
} else {
BB = nullptr;
while (!WorklistUnescaped.empty()) {
auto *NextBB = WorklistUnescaped.pop_back_val();
if (Visited[NextBB] == UNESCAPED) {
BB = NextBB;
Escaped = UNESCAPED;
break;
}
}
}
} while (BB);
for (CallInst *CI : DeferredTails) {
if (Visited[CI->getParent()] != ESCAPED) {
LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n");
CI->setTailCall();
Modified = true;
}
}
return Modified;
}
static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) {
if (isa<DbgInfoIntrinsic>(I))
return true;
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
llvm::findAllocaForValue(II->getArgOperand(1)))
return true;
if (I->mayHaveSideEffects()) return false;
if (LoadInst *L = dyn_cast<LoadInst>(I)) {
if (CI->mayHaveSideEffects()) {
const DataLayout &DL = L->getModule()->getDataLayout();
if (isModSet(AA->getModRefInfo(CI, MemoryLocation::get(L))) ||
!isSafeToLoadUnconditionally(L->getPointerOperand(), L->getType(),
L->getAlign(), DL, L))
return false;
}
}
return !is_contained(I->operands(), CI);
}
static bool canTransformAccumulatorRecursion(Instruction *I, CallInst *CI) {
if (!I->isAssociative() || !I->isCommutative())
return false;
assert(I->getNumOperands() == 2 &&
"Associative/commutative operations should have 2 args!");
if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
(I->getOperand(0) != CI && I->getOperand(1) != CI))
return false;
if (!I->hasOneUse() || !isa<ReturnInst>(I->user_back()))
return false;
return true;
}
static Instruction *firstNonDbg(BasicBlock::iterator I) {
while (isa<DbgInfoIntrinsic>(I))
++I;
return &*I;
}
namespace {
class TailRecursionEliminator {
Function &F;
const TargetTransformInfo *TTI;
AliasAnalysis *AA;
OptimizationRemarkEmitter *ORE;
DomTreeUpdater &DTU;
BasicBlock *HeaderBB = nullptr;
SmallVector<PHINode *, 8> ArgumentPHIs;
PHINode *RetPN = nullptr;
PHINode *RetKnownPN = nullptr;
SmallVector<SelectInst *, 8> RetSelects;
PHINode *AccPN = nullptr;
Instruction *AccumulatorRecursionInstr = nullptr;
TailRecursionEliminator(Function &F, const TargetTransformInfo *TTI,
AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
DomTreeUpdater &DTU)
: F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
CallInst *findTRECandidate(BasicBlock *BB);
void createTailRecurseLoopHeader(CallInst *CI);
void insertAccumulator(Instruction *AccRecInstr);
bool eliminateCall(CallInst *CI);
void cleanupAndFinalize();
bool processBlock(BasicBlock &BB);
void copyByValueOperandIntoLocalTemp(CallInst *CI, int OpndIdx);
void copyLocalTempOfByValueOperandIntoArguments(CallInst *CI, int OpndIdx);
public:
static bool eliminate(Function &F, const TargetTransformInfo *TTI,
AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
DomTreeUpdater &DTU);
};
}
CallInst *TailRecursionEliminator::findTRECandidate(BasicBlock *BB) {
Instruction *TI = BB->getTerminator();
if (&BB->front() == TI) return nullptr;
CallInst *CI = nullptr;
BasicBlock::iterator BBI(TI);
while (true) {
CI = dyn_cast<CallInst>(BBI);
if (CI && CI->getCalledFunction() == &F)
break;
if (BBI == BB->begin())
return nullptr; --BBI;
}
assert((!CI->isTailCall() || !CI->isNoTailCall()) &&
"Incompatible call site attributes(Tail,NoTail)");
if (!CI->isTailCall())
return nullptr;
if (BB == &F.getEntryBlock() &&
firstNonDbg(BB->front().getIterator()) == CI &&
firstNonDbg(std::next(BB->begin())) == TI && CI->getCalledFunction() &&
!TTI->isLoweredToCall(CI->getCalledFunction())) {
auto I = CI->arg_begin(), E = CI->arg_end();
Function::arg_iterator FI = F.arg_begin(), FE = F.arg_end();
for (; I != E && FI != FE; ++I, ++FI)
if (*I != &*FI) break;
if (I == E && FI == FE)
return nullptr;
}
return CI;
}
void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) {
HeaderBB = &F.getEntryBlock();
BasicBlock *NewEntry = BasicBlock::Create(F.getContext(), "", &F, HeaderBB);
NewEntry->takeName(HeaderBB);
HeaderBB->setName("tailrecurse");
BranchInst *BI = BranchInst::Create(HeaderBB, NewEntry);
BI->setDebugLoc(CI->getDebugLoc());
for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(),
NEBI = NewEntry->begin();
OEBI != E;)
if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
if (isa<ConstantInt>(AI->getArraySize()))
AI->moveBefore(&*NEBI);
Instruction *InsertPos = &HeaderBB->front();
for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
PHINode *PN =
PHINode::Create(I->getType(), 2, I->getName() + ".tr", InsertPos);
I->replaceAllUsesWith(PN); PN->addIncoming(&*I, NewEntry);
ArgumentPHIs.push_back(PN);
}
Type *RetType = F.getReturnType();
if (!RetType->isVoidTy()) {
Type *BoolType = Type::getInt1Ty(F.getContext());
RetPN = PHINode::Create(RetType, 2, "ret.tr", InsertPos);
RetKnownPN = PHINode::Create(BoolType, 2, "ret.known.tr", InsertPos);
RetPN->addIncoming(PoisonValue::get(RetType), NewEntry);
RetKnownPN->addIncoming(ConstantInt::getFalse(BoolType), NewEntry);
}
DTU.recalculate(*NewEntry->getParent());
}
void TailRecursionEliminator::insertAccumulator(Instruction *AccRecInstr) {
assert(!AccPN && "Trying to insert multiple accumulators");
AccumulatorRecursionInstr = AccRecInstr;
pred_iterator PB = pred_begin(HeaderBB), PE = pred_end(HeaderBB);
AccPN = PHINode::Create(F.getReturnType(), std::distance(PB, PE) + 1,
"accumulator.tr", &HeaderBB->front());
for (pred_iterator PI = PB; PI != PE; ++PI) {
BasicBlock *P = *PI;
if (P == &F.getEntryBlock()) {
Constant *Identity = ConstantExpr::getBinOpIdentity(
AccRecInstr->getOpcode(), AccRecInstr->getType());
AccPN->addIncoming(Identity, P);
} else {
AccPN->addIncoming(AccPN, P);
}
}
++NumAccumAdded;
}
void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst *CI,
int OpndIdx) {
Type *AggTy = CI->getParamByValType(OpndIdx);
assert(AggTy);
const DataLayout &DL = F.getParent()->getDataLayout();
Align Alignment(CI->getParamAlign(OpndIdx).valueOrOne());
Value *NewAlloca = new AllocaInst(
AggTy, DL.getAllocaAddrSpace(), nullptr, Alignment,
CI->getArgOperand(OpndIdx)->getName(), &*F.getEntryBlock().begin());
IRBuilder<> Builder(CI);
Value *Size = Builder.getInt64(DL.getTypeAllocSize(AggTy));
Builder.CreateMemCpy(NewAlloca, Alignment,
CI->getArgOperand(OpndIdx),
Alignment, Size);
CI->setArgOperand(OpndIdx, NewAlloca);
}
void TailRecursionEliminator::copyLocalTempOfByValueOperandIntoArguments(
CallInst *CI, int OpndIdx) {
Type *AggTy = CI->getParamByValType(OpndIdx);
assert(AggTy);
const DataLayout &DL = F.getParent()->getDataLayout();
Align Alignment(CI->getParamAlign(OpndIdx).valueOrOne());
IRBuilder<> Builder(CI);
Value *Size = Builder.getInt64(DL.getTypeAllocSize(AggTy));
Builder.CreateMemCpy(F.getArg(OpndIdx), Alignment,
CI->getArgOperand(OpndIdx),
Alignment, Size);
}
bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
ReturnInst *Ret = cast<ReturnInst>(CI->getParent()->getTerminator());
Instruction *AccRecInstr = nullptr;
BasicBlock::iterator BBI(CI);
for (++BBI; &*BBI != Ret; ++BBI) {
if (canMoveAboveCall(&*BBI, CI, AA))
continue;
if (AccPN || !canTransformAccumulatorRecursion(&*BBI, CI))
return false;
AccRecInstr = &*BBI;
}
BasicBlock *BB = Ret->getParent();
using namespace ore;
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "tailcall-recursion", CI)
<< "transforming tail recursion into loop";
});
if (!HeaderBB)
createTailRecurseLoopHeader(CI);
for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
if (CI->isByValArgument(I))
copyByValueOperandIntoLocalTemp(CI, I);
}
for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
if (CI->isByValArgument(I)) {
copyLocalTempOfByValueOperandIntoArguments(CI, I);
ArgumentPHIs[I]->addIncoming(F.getArg(I), BB);
} else
ArgumentPHIs[I]->addIncoming(CI->getArgOperand(I), BB);
}
if (AccRecInstr) {
insertAccumulator(AccRecInstr);
AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
}
if (RetPN) {
if (Ret->getReturnValue() == CI || AccRecInstr) {
RetPN->addIncoming(RetPN, BB);
RetKnownPN->addIncoming(RetKnownPN, BB);
} else {
SelectInst *SI = SelectInst::Create(
RetKnownPN, RetPN, Ret->getReturnValue(), "current.ret.tr", Ret);
RetSelects.push_back(SI);
RetPN->addIncoming(SI, BB);
RetKnownPN->addIncoming(ConstantInt::getTrue(RetKnownPN->getType()), BB);
}
if (AccPN)
AccPN->addIncoming(AccRecInstr ? AccRecInstr : AccPN, BB);
}
BranchInst *NewBI = BranchInst::Create(HeaderBB, Ret);
NewBI->setDebugLoc(CI->getDebugLoc());
BB->getInstList().erase(Ret); BB->getInstList().erase(CI); DTU.applyUpdates({{DominatorTree::Insert, BB, HeaderBB}});
++NumEliminated;
return true;
}
void TailRecursionEliminator::cleanupAndFinalize() {
for (PHINode *PN : ArgumentPHIs) {
if (Value *PNV = simplifyInstruction(PN, F.getParent()->getDataLayout())) {
PN->replaceAllUsesWith(PNV);
PN->eraseFromParent();
}
}
if (RetPN) {
if (RetSelects.empty()) {
RetPN->dropAllReferences();
RetPN->eraseFromParent();
RetKnownPN->dropAllReferences();
RetKnownPN->eraseFromParent();
if (AccPN) {
Instruction *AccRecInstr = AccumulatorRecursionInstr;
for (BasicBlock &BB : F) {
ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator());
if (!RI)
continue;
Instruction *AccRecInstrNew = AccRecInstr->clone();
AccRecInstrNew->setName("accumulator.ret.tr");
AccRecInstrNew->setOperand(AccRecInstr->getOperand(0) == AccPN,
RI->getOperand(0));
AccRecInstrNew->insertBefore(RI);
RI->setOperand(0, AccRecInstrNew);
}
}
} else {
for (BasicBlock &BB : F) {
ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator());
if (!RI)
continue;
SelectInst *SI = SelectInst::Create(
RetKnownPN, RetPN, RI->getOperand(0), "current.ret.tr", RI);
RetSelects.push_back(SI);
RI->setOperand(0, SI);
}
if (AccPN) {
Instruction *AccRecInstr = AccumulatorRecursionInstr;
for (SelectInst *SI : RetSelects) {
Instruction *AccRecInstrNew = AccRecInstr->clone();
AccRecInstrNew->setName("accumulator.ret.tr");
AccRecInstrNew->setOperand(AccRecInstr->getOperand(0) == AccPN,
SI->getFalseValue());
AccRecInstrNew->insertBefore(SI);
SI->setFalseValue(AccRecInstrNew);
}
}
}
}
}
bool TailRecursionEliminator::processBlock(BasicBlock &BB) {
Instruction *TI = BB.getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
if (BI->isConditional())
return false;
BasicBlock *Succ = BI->getSuccessor(0);
ReturnInst *Ret = dyn_cast<ReturnInst>(Succ->getFirstNonPHIOrDbg(true));
if (!Ret)
return false;
CallInst *CI = findTRECandidate(&BB);
if (!CI)
return false;
LLVM_DEBUG(dbgs() << "FOLDING: " << *Succ
<< "INTO UNCOND BRANCH PRED: " << BB);
FoldReturnIntoUncondBranch(Ret, Succ, &BB, &DTU);
++NumRetDuped;
if (pred_empty(Succ))
DTU.deleteBB(Succ);
eliminateCall(CI);
return true;
} else if (isa<ReturnInst>(TI)) {
CallInst *CI = findTRECandidate(&BB);
if (CI)
return eliminateCall(CI);
}
return false;
}
bool TailRecursionEliminator::eliminate(Function &F,
const TargetTransformInfo *TTI,
AliasAnalysis *AA,
OptimizationRemarkEmitter *ORE,
DomTreeUpdater &DTU) {
if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
return false;
bool MadeChange = false;
MadeChange |= markTails(F, ORE);
if (F.getFunctionType()->isVarArg())
return MadeChange;
if (!canTRE(F))
return MadeChange;
TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU);
for (BasicBlock &BB : F)
MadeChange |= TRE.processBlock(BB);
TRE.cleanupAndFinalize();
return MadeChange;
}
namespace {
struct TailCallElim : public FunctionPass {
static char ID; TailCallElim() : FunctionPass(ID) {
initializeTailCallElimPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<PostDominatorTreeWrapperPass>();
}
bool runOnFunction(Function &F) override {
if (skipFunction(F))
return false;
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
return TailRecursionEliminator::eliminate(
F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
&getAnalysis<AAResultsWrapperPass>().getAAResults(),
&getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(), DTU);
}
};
}
char TailCallElim::ID = 0;
INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", "Tail Call Elimination",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(TailCallElim, "tailcallelim", "Tail Call Elimination",
false, false)
FunctionPass *llvm::createTailCallEliminationPass() {
return new TailCallElim();
}
PreservedAnalyses TailCallElimPass::run(Function &F,
FunctionAnalysisManager &AM) {
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
AliasAnalysis &AA = AM.getResult<AAManager>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
auto *PDT = AM.getCachedResult<PostDominatorTreeAnalysis>(F);
DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
bool Changed = TailRecursionEliminator::eliminate(F, &TTI, &AA, &ORE, DTU);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<PostDominatorTreeAnalysis>();
return PA;
}