#include "InstrEmitter.h"
#include "SDNodeDbgValue.h"
#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
STATISTIC(NumUnfolds, "Number of nodes unfolded");
STATISTIC(NumDups, "Number of duplicated nodes");
STATISTIC(NumPRCopies, "Number of physical copies");
static RegisterScheduler
fastDAGScheduler("fast", "Fast suboptimal list scheduling",
createFastDAGScheduler);
static RegisterScheduler
linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling",
createDAGLinearizer);
namespace {
struct FastPriorityQueue {
SmallVector<SUnit *, 16> Queue;
bool empty() const { return Queue.empty(); }
void push(SUnit *U) {
Queue.push_back(U);
}
SUnit *pop() {
if (empty()) return nullptr;
return Queue.pop_back_val();
}
};
class ScheduleDAGFast : public ScheduleDAGSDNodes {
private:
FastPriorityQueue AvailableQueue;
unsigned NumLiveRegs;
std::vector<SUnit*> LiveRegDefs;
std::vector<unsigned> LiveRegCycles;
public:
ScheduleDAGFast(MachineFunction &mf)
: ScheduleDAGSDNodes(mf) {}
void Schedule() override;
void AddPred(SUnit *SU, const SDep &D) {
SU->addPred(D);
}
void RemovePred(SUnit *SU, const SDep &D) {
SU->removePred(D);
}
private:
void ReleasePred(SUnit *SU, SDep *PredEdge);
void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
void ScheduleNodeBottomUp(SUnit*, unsigned);
SUnit *CopyAndMoveSuccessors(SUnit*);
void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
const TargetRegisterClass*,
const TargetRegisterClass*,
SmallVectorImpl<SUnit*>&);
bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
void ListScheduleBottomUp();
bool forceUnitLatencies() const override { return true; }
};
}
void ScheduleDAGFast::Schedule() {
LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");
NumLiveRegs = 0;
LiveRegDefs.resize(TRI->getNumRegs(), nullptr);
LiveRegCycles.resize(TRI->getNumRegs(), 0);
BuildSchedGraph(nullptr);
LLVM_DEBUG(dump());
ListScheduleBottomUp();
}
void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
SUnit *PredSU = PredEdge->getSUnit();
#ifndef NDEBUG
if (PredSU->NumSuccsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
dumpNode(*PredSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
#endif
--PredSU->NumSuccsLeft;
if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
PredSU->isAvailable = true;
AvailableQueue.push(PredSU);
}
}
void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
for (SDep &Pred : SU->Preds) {
ReleasePred(SU, &Pred);
if (Pred.isAssignedRegDep()) {
if (!LiveRegDefs[Pred.getReg()]) {
++NumLiveRegs;
LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
LiveRegCycles[Pred.getReg()] = CurCycle;
}
}
}
}
void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
LLVM_DEBUG(dumpNode(*SU));
assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
SU->setHeightToAtLeast(CurCycle);
Sequence.push_back(SU);
ReleasePredecessors(SU, CurCycle);
for (SDep &Succ : SU->Succs) {
if (Succ.isAssignedRegDep()) {
if (LiveRegCycles[Succ.getReg()] == Succ.getSUnit()->getHeight()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
assert(LiveRegDefs[Succ.getReg()] == SU &&
"Physical register dependency violated?");
--NumLiveRegs;
LiveRegDefs[Succ.getReg()] = nullptr;
LiveRegCycles[Succ.getReg()] = 0;
}
}
}
SU->isScheduled = true;
}
SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
if (SU->getNode()->getGluedNode())
return nullptr;
SDNode *N = SU->getNode();
if (!N)
return nullptr;
SUnit *NewSU;
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
MVT VT = N->getSimpleValueType(i);
if (VT == MVT::Glue)
return nullptr;
else if (VT == MVT::Other)
TryUnfold = true;
}
for (const SDValue &Op : N->op_values()) {
MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
if (VT == MVT::Glue)
return nullptr;
}
if (TryUnfold) {
SmallVector<SDNode*, 2> NewNodes;
if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
return nullptr;
LLVM_DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
assert(NewNodes.size() == 2 && "Expected a load folding node!");
N = NewNodes[1];
SDNode *LoadNode = NewNodes[0];
unsigned NumVals = N->getNumValues();
unsigned OldNumVals = SU->getNode()->getNumValues();
for (unsigned i = 0; i != NumVals; ++i)
DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
SDValue(LoadNode, 1));
SUnit *NewSU = newSUnit(N);
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NewSU->NodeNum);
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
NewSU->isTwoAddress = true;
break;
}
}
if (MCID.isCommutable())
NewSU->isCommutable = true;
bool isNewLoad = true;
SUnit *LoadSU;
if (LoadNode->getNodeId() != -1) {
LoadSU = &SUnits[LoadNode->getNodeId()];
isNewLoad = false;
} else {
LoadSU = newSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum);
}
SDep ChainPred;
SmallVector<SDep, 4> ChainSuccs;
SmallVector<SDep, 4> LoadPreds;
SmallVector<SDep, 4> NodePreds;
SmallVector<SDep, 4> NodeSuccs;
for (SDep &Pred : SU->Preds) {
if (Pred.isCtrl())
ChainPred = Pred;
else if (Pred.getSUnit()->getNode() &&
Pred.getSUnit()->getNode()->isOperandOf(LoadNode))
LoadPreds.push_back(Pred);
else
NodePreds.push_back(Pred);
}
for (SDep &Succ : SU->Succs) {
if (Succ.isCtrl())
ChainSuccs.push_back(Succ);
else
NodeSuccs.push_back(Succ);
}
if (ChainPred.getSUnit()) {
RemovePred(SU, ChainPred);
if (isNewLoad)
AddPred(LoadSU, ChainPred);
}
for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
const SDep &Pred = LoadPreds[i];
RemovePred(SU, Pred);
if (isNewLoad) {
AddPred(LoadSU, Pred);
}
}
for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
const SDep &Pred = NodePreds[i];
RemovePred(SU, Pred);
AddPred(NewSU, Pred);
}
for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
SDep D = NodeSuccs[i];
SUnit *SuccDep = D.getSUnit();
D.setSUnit(SU);
RemovePred(SuccDep, D);
D.setSUnit(NewSU);
AddPred(SuccDep, D);
}
for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
SDep D = ChainSuccs[i];
SUnit *SuccDep = D.getSUnit();
D.setSUnit(SU);
RemovePred(SuccDep, D);
if (isNewLoad) {
D.setSUnit(LoadSU);
AddPred(SuccDep, D);
}
}
if (isNewLoad) {
SDep D(LoadSU, SDep::Barrier);
D.setLatency(LoadSU->Latency);
AddPred(NewSU, D);
}
++NumUnfolds;
if (NewSU->NumSuccsLeft == 0) {
NewSU->isAvailable = true;
return NewSU;
}
SU = NewSU;
}
LLVM_DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
NewSU = Clone(SU);
for (SDep &Pred : SU->Preds)
if (!Pred.isArtificial())
AddPred(NewSU, Pred);
SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
for (SDep &Succ : SU->Succs) {
if (Succ.isArtificial())
continue;
SUnit *SuccSU = Succ.getSUnit();
if (SuccSU->isScheduled) {
SDep D = Succ;
D.setSUnit(NewSU);
AddPred(SuccSU, D);
D.setSUnit(SU);
DelDeps.push_back(std::make_pair(SuccSU, D));
}
}
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
RemovePred(DelDeps[i].first, DelDeps[i].second);
++NumDups;
return NewSU;
}
void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
const TargetRegisterClass *DestRC,
const TargetRegisterClass *SrcRC,
SmallVectorImpl<SUnit*> &Copies) {
SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(nullptr));
CopyFromSU->CopySrcRC = SrcRC;
CopyFromSU->CopyDstRC = DestRC;
SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(nullptr));
CopyToSU->CopySrcRC = DestRC;
CopyToSU->CopyDstRC = SrcRC;
SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
for (SDep &Succ : SU->Succs) {
if (Succ.isArtificial())
continue;
SUnit *SuccSU = Succ.getSUnit();
if (SuccSU->isScheduled) {
SDep D = Succ;
D.setSUnit(CopyToSU);
AddPred(SuccSU, D);
DelDeps.push_back(std::make_pair(SuccSU, Succ));
}
}
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
RemovePred(DelDeps[i].first, DelDeps[i].second);
}
SDep FromDep(SU, SDep::Data, Reg);
FromDep.setLatency(SU->Latency);
AddPred(CopyFromSU, FromDep);
SDep ToDep(CopyFromSU, SDep::Data, 0);
ToDep.setLatency(CopyFromSU->Latency);
AddPred(CopyToSU, ToDep);
Copies.push_back(CopyFromSU);
Copies.push_back(CopyToSU);
++NumPRCopies;
}
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const TargetInstrInfo *TII) {
unsigned NumRes;
if (N->getOpcode() == ISD::CopyFromReg) {
NumRes = 1;
} else {
const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
NumRes = MCID.getNumDefs();
for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
}
}
return N->getSimpleValueType(NumRes);
}
static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
std::vector<SUnit *> &LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVectorImpl<unsigned> &LRegs,
const TargetRegisterInfo *TRI,
const SDNode *Node = nullptr) {
bool Added = false;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
if (!LiveRegDefs[*AI])
continue;
if (LiveRegDefs[*AI] == SU)
continue;
if (Node && LiveRegDefs[*AI]->getNode() == Node)
continue;
if (RegAdded.insert(*AI).second) {
LRegs.push_back(*AI);
Added = true;
}
}
return Added;
}
bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
SmallVectorImpl<unsigned> &LRegs){
if (NumLiveRegs == 0)
return false;
SmallSet<unsigned, 4> RegAdded;
for (SDep &Pred : SU->Preds) {
if (Pred.isAssignedRegDep()) {
CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs,
RegAdded, LRegs, TRI);
}
}
for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
if (Node->getOpcode() == ISD::INLINEASM ||
Node->getOpcode() == ISD::INLINEASM_BR) {
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
--NumOps;
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
++i; if (InlineAsm::isRegDefKind(Flags) ||
InlineAsm::isRegDefEarlyClobberKind(Flags) ||
InlineAsm::isClobberKind(Flags)) {
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
if (Register::isPhysicalRegister(Reg))
CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
} else
i += NumVals;
}
continue;
}
if (Node->getOpcode() == ISD::CopyToReg) {
Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
if (Reg.isPhysical()) {
SDNode *SrcNode = Node->getOperand(2).getNode();
CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI, SrcNode);
}
}
if (!Node->isMachineOpcode())
continue;
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
if (!MCID.ImplicitDefs)
continue;
for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
}
return !LRegs.empty();
}
void ScheduleDAGFast::ListScheduleBottomUp() {
unsigned CurCycle = 0;
ReleasePredecessors(&ExitSU, CurCycle);
if (!SUnits.empty()) {
SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
RootSU->isAvailable = true;
AvailableQueue.push(RootSU);
}
SmallVector<SUnit*, 4> NotReady;
DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
Sequence.reserve(SUnits.size());
while (!AvailableQueue.empty()) {
bool Delayed = false;
LRegsMap.clear();
SUnit *CurSU = AvailableQueue.pop();
while (CurSU) {
SmallVector<unsigned, 4> LRegs;
if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
break;
Delayed = true;
LRegsMap.insert(std::make_pair(CurSU, LRegs));
CurSU->isPending = true; NotReady.push_back(CurSU);
CurSU = AvailableQueue.pop();
}
if (Delayed && !CurSU) {
if (!CurSU) {
SUnit *TrySU = NotReady[0];
SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
assert(LRegs.size() == 1 && "Can't handle this yet!");
unsigned Reg = LRegs[0];
SUnit *LRDef = LiveRegDefs[Reg];
MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
const TargetRegisterClass *RC =
TRI->getMinimalPhysRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
SUnit *NewDef = nullptr;
if (DestRC != RC) {
NewDef = CopyAndMoveSuccessors(LRDef);
if (!DestRC && !NewDef)
report_fatal_error("Can't handle live physical "
"register dependency!");
}
if (!NewDef) {
SmallVector<SUnit*, 2> Copies;
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
<< " to SU #" << Copies.front()->NodeNum << "\n");
AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
NewDef = Copies.back();
}
LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
<< " to SU #" << TrySU->NodeNum << "\n");
LiveRegDefs[Reg] = NewDef;
AddPred(NewDef, SDep(TrySU, SDep::Artificial));
TrySU->isAvailable = false;
CurSU = NewDef;
}
if (!CurSU) {
llvm_unreachable("Unable to resolve live physical register dependencies!");
}
}
for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
NotReady[i]->isPending = false;
if (NotReady[i]->isAvailable)
AvailableQueue.push(NotReady[i]);
}
NotReady.clear();
if (CurSU)
ScheduleNodeBottomUp(CurSU, CurCycle);
++CurCycle;
}
std::reverse(Sequence.begin(), Sequence.end());
#ifndef NDEBUG
VerifyScheduledSequence(true);
#endif
}
namespace {
class ScheduleDAGLinearize : public ScheduleDAGSDNodes {
public:
ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {}
void Schedule() override;
MachineBasicBlock *
EmitSchedule(MachineBasicBlock::iterator &InsertPos) override;
private:
std::vector<SDNode*> Sequence;
DenseMap<SDNode*, SDNode*> GluedMap;
void ScheduleNode(SDNode *N);
};
}
void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
if (N->getNodeId() != 0)
llvm_unreachable(nullptr);
if (!N->isMachineOpcode() &&
(N->getOpcode() == ISD::EntryToken || isPassiveNode(N)))
return;
LLVM_DEBUG(dbgs() << "\n*** Scheduling: ");
LLVM_DEBUG(N->dump(DAG));
Sequence.push_back(N);
unsigned NumOps = N->getNumOperands();
if (unsigned NumLeft = NumOps) {
SDNode *GluedOpN = nullptr;
do {
const SDValue &Op = N->getOperand(NumLeft-1);
SDNode *OpN = Op.getNode();
if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) {
GluedOpN = OpN;
assert(OpN->getNodeId() != 0 && "Glue operand not ready?");
OpN->setNodeId(0);
ScheduleNode(OpN);
continue;
}
if (OpN == GluedOpN)
continue;
DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN);
if (DI != GluedMap.end() && DI->second != N)
OpN = DI->second;
unsigned Degree = OpN->getNodeId();
assert(Degree > 0 && "Predecessor over-released!");
OpN->setNodeId(--Degree);
if (Degree == 0)
ScheduleNode(OpN);
} while (--NumLeft);
}
}
static SDNode *findGluedUser(SDNode *N) {
while (SDNode *Glued = N->getGluedUser())
N = Glued;
return N;
}
void ScheduleDAGLinearize::Schedule() {
LLVM_DEBUG(dbgs() << "********** DAG Linearization **********\n");
SmallVector<SDNode*, 8> Glues;
unsigned DAGSize = 0;
for (SDNode &Node : DAG->allnodes()) {
SDNode *N = &Node;
unsigned Degree = N->use_size();
N->setNodeId(Degree);
unsigned NumVals = N->getNumValues();
if (NumVals && N->getValueType(NumVals-1) == MVT::Glue &&
N->hasAnyUseOfValue(NumVals-1)) {
SDNode *User = findGluedUser(N);
if (User) {
Glues.push_back(N);
GluedMap.insert(std::make_pair(N, User));
}
}
if (N->isMachineOpcode() ||
(N->getOpcode() != ISD::EntryToken && !isPassiveNode(N)))
++DAGSize;
}
for (unsigned i = 0, e = Glues.size(); i != e; ++i) {
SDNode *Glue = Glues[i];
SDNode *GUser = GluedMap[Glue];
unsigned Degree = Glue->getNodeId();
unsigned UDegree = GUser->getNodeId();
SDNode *ImmGUser = Glue->getGluedUser();
for (const SDNode *U : Glue->uses())
if (U == ImmGUser)
--Degree;
GUser->setNodeId(UDegree + Degree);
Glue->setNodeId(1);
}
Sequence.reserve(DAGSize);
ScheduleNode(DAG->getRoot().getNode());
}
MachineBasicBlock*
ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos,
DAG->getUseInstrRefDebugInfo());
DenseMap<SDValue, Register> VRBaseMap;
LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; });
unsigned NumNodes = Sequence.size();
MachineBasicBlock *BB = Emitter.getBlock();
for (unsigned i = 0; i != NumNodes; ++i) {
SDNode *N = Sequence[NumNodes-i-1];
LLVM_DEBUG(N->dump(DAG));
Emitter.EmitNode(N, false, false, VRBaseMap);
if (N->getHasDebugValue()) {
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
for (auto *DV : DAG->GetDbgValues(N)) {
if (!DV->isEmitted())
if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap))
BB->insert(InsertPos, DbgMI);
}
}
}
LLVM_DEBUG(dbgs() << '\n');
InsertPos = Emitter.getInsertPos();
return Emitter.getBlock();
}
llvm::ScheduleDAGSDNodes *
llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
return new ScheduleDAGFast(*IS->MF);
}
llvm::ScheduleDAGSDNodes *
llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) {
return new ScheduleDAGLinearize(*IS->MF);
}