#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/AntiDepBreaker.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "post-RA-sched"
STATISTIC(NumNoops, "Number of noops inserted");
STATISTIC(NumStalls, "Number of pipeline stalls");
STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
static cl::opt<bool>
EnablePostRAScheduler("post-RA-scheduler",
cl::desc("Enable scheduling after register allocation"),
cl::init(false), cl::Hidden);
static cl::opt<std::string>
EnableAntiDepBreaking("break-anti-dependencies",
cl::desc("Break post-RA scheduling anti-dependencies: "
"\"critical\", \"all\", or \"none\""),
cl::init("none"), cl::Hidden);
static cl::opt<int>
DebugDiv("postra-sched-debugdiv",
cl::desc("Debug control MBBs that are scheduled"),
cl::init(0), cl::Hidden);
static cl::opt<int>
DebugMod("postra-sched-debugmod",
cl::desc("Debug control MBBs that are scheduled"),
cl::init(0), cl::Hidden);
AntiDepBreaker::~AntiDepBreaker() = default;
namespace {
class PostRAScheduler : public MachineFunctionPass {
const TargetInstrInfo *TII = nullptr;
RegisterClassInfo RegClassInfo;
public:
static char ID;
PostRAScheduler() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetPassConfig>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
}
bool runOnMachineFunction(MachineFunction &Fn) override;
private:
bool enablePostRAScheduler(
const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode &Mode,
TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const;
};
char PostRAScheduler::ID = 0;
class SchedulePostRATDList : public ScheduleDAGInstrs {
LatencyPriorityQueue AvailableQueue;
std::vector<SUnit*> PendingQueue;
ScheduleHazardRecognizer *HazardRec;
AntiDepBreaker *AntiDepBreak;
AliasAnalysis *AA;
std::vector<SUnit*> Sequence;
std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
unsigned EndIndex = 0;
public:
SchedulePostRATDList(
MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
const RegisterClassInfo &,
TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs);
~SchedulePostRATDList() override;
void startBlock(MachineBasicBlock *BB) override;
void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; }
void enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
unsigned regioninstrs) override;
void exitRegion() override;
void schedule() override;
void EmitSchedule();
void Observe(MachineInstr &MI, unsigned Count);
void finishBlock() override;
private:
void postprocessDAG();
void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
void ReleaseSuccessors(SUnit *SU);
void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
void ListScheduleTopDown();
void dumpSchedule() const;
void emitNoop(unsigned CurCycle);
};
}
char &llvm::PostRASchedulerID = PostRAScheduler::ID;
INITIALIZE_PASS(PostRAScheduler, DEBUG_TYPE,
"Post RA top-down list latency scheduler", false, false)
SchedulePostRATDList::SchedulePostRATDList(
MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
const RegisterClassInfo &RCI,
TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs)
: ScheduleDAGInstrs(MF, &MLI), AA(AA) {
const InstrItineraryData *InstrItins =
MF.getSubtarget().getInstrItineraryData();
HazardRec =
MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer(
InstrItins, this);
MF.getSubtarget().getPostRAMutations(Mutations);
assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
MRI.tracksLiveness()) &&
"Live-ins must be accurate for anti-dependency breaking");
AntiDepBreak = ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL)
? createAggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs)
: ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL)
? createCriticalAntiDepBreaker(MF, RCI)
: nullptr));
}
SchedulePostRATDList::~SchedulePostRATDList() {
delete HazardRec;
delete AntiDepBreak;
}
void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
unsigned regioninstrs) {
ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
Sequence.clear();
}
void SchedulePostRATDList::exitRegion() {
LLVM_DEBUG({
dbgs() << "*** Final schedule ***\n";
dumpSchedule();
dbgs() << '\n';
});
ScheduleDAGInstrs::exitRegion();
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const {
for (const SUnit *SU : Sequence) {
if (SU)
dumpNode(*SU);
else
dbgs() << "**** NOOP ****\n";
}
}
#endif
bool PostRAScheduler::enablePostRAScheduler(
const TargetSubtargetInfo &ST,
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode &Mode,
TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const {
Mode = ST.getAntiDepBreakMode();
ST.getCriticalPathRCs(CriticalPathRCs);
if (EnablePostRAScheduler.getPosition() > 0)
return EnablePostRAScheduler;
return ST.enablePostRAScheduler() &&
OptLevel >= ST.getOptLevelToEnablePostRAScheduler();
}
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;
TII = Fn.getSubtarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
RegClassInfo.runOnMachineFunction(Fn);
TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
TargetSubtargetInfo::ANTIDEP_NONE;
SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs;
if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(),
AntiDepMode, CriticalPathRCs))
return false;
if (EnableAntiDepBreaking.getPosition() > 0) {
AntiDepMode = (EnableAntiDepBreaking == "all")
? TargetSubtargetInfo::ANTIDEP_ALL
: ((EnableAntiDepBreaking == "critical")
? TargetSubtargetInfo::ANTIDEP_CRITICAL
: TargetSubtargetInfo::ANTIDEP_NONE);
}
LLVM_DEBUG(dbgs() << "PostRAScheduler\n");
SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode,
CriticalPathRCs);
for (auto &MBB : Fn) {
#ifndef NDEBUG
if (DebugDiv > 0) {
static int bbcnt = 0;
if (bbcnt++ % DebugDiv != DebugMod)
continue;
dbgs() << "*** DEBUG scheduling " << Fn.getName() << ":"
<< printMBBReference(MBB) << " ***\n";
}
#endif
Scheduler.startBlock(&MBB);
MachineBasicBlock::iterator Current = MBB.end();
unsigned Count = MBB.size(), CurrentCount = Count;
for (MachineBasicBlock::iterator I = Current; I != MBB.begin();) {
MachineInstr &MI = *std::prev(I);
--Count;
if (MI.isCall() || TII->isSchedulingBoundary(MI, &MBB, Fn)) {
Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count);
Scheduler.setEndIndex(CurrentCount);
Scheduler.schedule();
Scheduler.exitRegion();
Scheduler.EmitSchedule();
Current = &MI;
CurrentCount = Count;
Scheduler.Observe(MI, CurrentCount);
}
I = MI;
if (MI.isBundle())
Count -= MI.getBundleSize();
}
assert(Count == 0 && "Instruction count mismatch!");
assert((MBB.begin() == Current || CurrentCount != 0) &&
"Instruction count mismatch!");
Scheduler.enterRegion(&MBB, MBB.begin(), Current, CurrentCount);
Scheduler.setEndIndex(CurrentCount);
Scheduler.schedule();
Scheduler.exitRegion();
Scheduler.EmitSchedule();
Scheduler.finishBlock();
Scheduler.fixupKills(MBB);
}
return true;
}
void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) {
ScheduleDAGInstrs::startBlock(BB);
HazardRec->Reset();
if (AntiDepBreak)
AntiDepBreak->StartBlock(BB);
}
void SchedulePostRATDList::schedule() {
buildSchedGraph(AA);
if (AntiDepBreak) {
unsigned Broken =
AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
EndIndex, DbgValues);
if (Broken != 0) {
ScheduleDAG::clearDAG();
buildSchedGraph(AA);
NumFixedAnti += Broken;
}
}
postprocessDAG();
LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");
LLVM_DEBUG(dump());
AvailableQueue.initNodes(SUnits);
ListScheduleTopDown();
AvailableQueue.releaseState();
}
void SchedulePostRATDList::Observe(MachineInstr &MI, unsigned Count) {
if (AntiDepBreak)
AntiDepBreak->Observe(MI, Count, EndIndex);
}
void SchedulePostRATDList::finishBlock() {
if (AntiDepBreak)
AntiDepBreak->FinishBlock();
ScheduleDAGInstrs::finishBlock();
}
void SchedulePostRATDList::postprocessDAG() {
for (auto &M : Mutations)
M->apply(this);
}
void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
SUnit *SuccSU = SuccEdge->getSUnit();
if (SuccEdge->isWeak()) {
--SuccSU->WeakPredsLeft;
return;
}
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
dumpNode(*SuccSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
#endif
--SuccSU->NumPredsLeft;
if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
PendingQueue.push_back(SuccSU);
}
void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
ReleaseSucc(SU, &*I);
}
}
void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
LLVM_DEBUG(dumpNode(*SU));
Sequence.push_back(SU);
assert(CurCycle >= SU->getDepth() &&
"Node scheduled above its depth!");
SU->setDepthToAtLeast(CurCycle);
ReleaseSuccessors(SU);
SU->isScheduled = true;
AvailableQueue.scheduledNode(SU);
}
void SchedulePostRATDList::emitNoop(unsigned CurCycle) {
LLVM_DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
HazardRec->EmitNoop();
Sequence.push_back(nullptr); ++NumNoops;
}
void SchedulePostRATDList::ListScheduleTopDown() {
unsigned CurCycle = 0;
HazardRec->Reset();
ReleaseSuccessors(&EntrySU);
for (SUnit &SUnit : SUnits) {
if (!SUnit.NumPredsLeft && !SUnit.isAvailable) {
AvailableQueue.push(&SUnit);
SUnit.isAvailable = true;
}
}
bool CycleHasInsts = false;
std::vector<SUnit*> NotReady;
Sequence.reserve(SUnits.size());
while (!AvailableQueue.empty() || !PendingQueue.empty()) {
unsigned MinDepth = ~0u;
for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
if (PendingQueue[i]->getDepth() <= CurCycle) {
AvailableQueue.push(PendingQueue[i]);
PendingQueue[i]->isAvailable = true;
PendingQueue[i] = PendingQueue.back();
PendingQueue.pop_back();
--i; --e;
} else if (PendingQueue[i]->getDepth() < MinDepth)
MinDepth = PendingQueue[i]->getDepth();
}
LLVM_DEBUG(dbgs() << "\n*** Examining Available\n";
AvailableQueue.dump(this));
SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr;
bool HasNoopHazards = false;
while (!AvailableQueue.empty()) {
SUnit *CurSUnit = AvailableQueue.pop();
ScheduleHazardRecognizer::HazardType HT =
HazardRec->getHazardType(CurSUnit, 0);
if (HT == ScheduleHazardRecognizer::NoHazard) {
if (HazardRec->ShouldPreferAnother(CurSUnit)) {
if (!NotPreferredSUnit) {
NotPreferredSUnit = CurSUnit;
continue;
}
} else {
FoundSUnit = CurSUnit;
break;
}
}
HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
NotReady.push_back(CurSUnit);
}
if (NotPreferredSUnit) {
if (!FoundSUnit) {
LLVM_DEBUG(
dbgs() << "*** Will schedule a non-preferred instruction...\n");
FoundSUnit = NotPreferredSUnit;
} else {
AvailableQueue.push(NotPreferredSUnit);
}
NotPreferredSUnit = nullptr;
}
if (!NotReady.empty()) {
AvailableQueue.push_all(NotReady);
NotReady.clear();
}
if (FoundSUnit) {
unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit);
for (unsigned i = 0; i != NumPreNoops; ++i)
emitNoop(CurCycle);
ScheduleNodeTopDown(FoundSUnit, CurCycle);
HazardRec->EmitInstruction(FoundSUnit);
CycleHasInsts = true;
if (HazardRec->atIssueLimit()) {
LLVM_DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle
<< '\n');
HazardRec->AdvanceCycle();
++CurCycle;
CycleHasInsts = false;
}
} else {
if (CycleHasInsts) {
LLVM_DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
HazardRec->AdvanceCycle();
} else if (!HasNoopHazards) {
LLVM_DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
HazardRec->AdvanceCycle();
++NumStalls;
} else {
emitNoop(CurCycle);
}
++CurCycle;
CycleHasInsts = false;
}
}
#ifndef NDEBUG
unsigned ScheduledNodes = VerifyScheduledDAG(false);
unsigned Noops = llvm::count(Sequence, nullptr);
assert(Sequence.size() - Noops == ScheduledNodes &&
"The number of nodes scheduled doesn't match the expected number!");
#endif }
void SchedulePostRATDList::EmitSchedule() {
RegionBegin = RegionEnd;
if (FirstDbgValue)
BB->splice(RegionEnd, BB, FirstDbgValue);
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
if (SUnit *SU = Sequence[i])
BB->splice(RegionEnd, BB, SU->getInstr());
else
TII->insertNoop(*BB, RegionEnd);
if (i == 0)
RegionBegin = std::prev(RegionEnd);
}
for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
MachineInstr *DbgValue = P.first;
MachineBasicBlock::iterator OrigPrivMI = P.second;
BB->splice(++OrigPrivMI, BB, DbgValue);
}
DbgValues.clear();
FirstDbgValue = nullptr;
}