#include "SIFrameLowering.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "frame-info"
static cl::opt<bool> EnableSpillVGPRToAGPR(
"amdgpu-spill-vgpr-to-agpr",
cl::desc("Enable spilling VGPRs to AGPRs"),
cl::ReallyHidden,
cl::init(true));
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
LivePhysRegs &LiveRegs,
const TargetRegisterClass &RC,
bool Unused = false) {
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
for (unsigned i = 0; CSRegs[i]; ++i)
LiveRegs.addReg(CSRegs[i]);
if (Unused) {
for (MCRegister Reg : RC) {
if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
return Reg;
}
} else {
for (MCRegister Reg : RC) {
if (LiveRegs.available(MRI, Reg))
return Reg;
}
}
return MCRegister();
}
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
LivePhysRegs &LiveRegs,
Register &TempSGPR,
Optional<int> &FrameIndex,
bool IsFP) {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
TargetStackID::SGPRSpill);
if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
llvm_unreachable("allocate SGPR spill should have worked");
FrameIndex = NewFI;
LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to "
<< printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
<< '\n');
return;
}
TempSGPR = findScratchNonCalleeSaveRegister(
MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
if (!TempSGPR) {
int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
TargetStackID::SGPRSpill);
if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
FrameIndex = NewFI;
LLVM_DEBUG(
auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
<< printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
} else {
MF.getFrameInfo().RemoveStackObject(NewFI);
FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "
<< (IsFP ? "FP" : "BP") << '\n');
}
} else {
LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
<< printReg(TempSGPR, TRI) << '\n');
}
}
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
LivePhysRegs &LiveRegs, MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
Register SpillReg, int FI) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
LiveRegs.addReg(SpillReg);
TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, true,
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
&LiveRegs);
LiveRegs.removeReg(SpillReg);
}
static void buildEpilogRestore(const GCNSubtarget &ST,
const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
LivePhysRegs &LiveRegs, MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, Register SpillReg, int FI) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false,
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
&LiveRegs);
}
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, const SIInstrInfo *TII,
Register TargetReg) {
MachineFunction *MF = MBB.getParent();
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
if (MFI->getGITPtrHigh() != 0xffffffff) {
BuildMI(MBB, I, DL, SMovB32, TargetHi)
.addImm(MFI->getGITPtrHigh())
.addReg(TargetReg, RegState::ImplicitDefine);
} else {
const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
BuildMI(MBB, I, DL, GetPC64, TargetReg);
}
Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
MF->getRegInfo().addLiveIn(GitPtrLo);
MBB.addLiveIn(GitPtrLo);
BuildMI(MBB, I, DL, SMovB32, TargetLo)
.addReg(GitPtrLo);
}
void SIFrameLowering::emitEntryFunctionFlatScratchInit(
MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Register FlatScrInitLo;
Register FlatScrInitHi;
if (ST.isAmdPalOS()) {
LivePhysRegs LiveRegs;
LiveRegs.init(*TRI);
LiveRegs.addLiveIns(MBB);
MachineRegisterInfo &MRI = MF.getRegInfo();
Register FlatScrInit = AMDGPU::NoRegister;
ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
AllSGPR64s = AllSGPR64s.slice(
std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
for (MCPhysReg Reg : AllSGPR64s) {
if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
!TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
FlatScrInit = Reg;
break;
}
}
assert(FlatScrInit && "Failed to find free register for scratch init");
FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
buildGitPtr(MBB, I, DL, TII, FlatScrInit);
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
auto *MMO = MF.getMachineMemOperand(
PtrInfo,
MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable,
8, Align(4));
unsigned Offset =
MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
.addReg(FlatScrInit)
.addImm(EncodedOffset) .addImm(0) .addMemOperand(MMO);
const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
.addReg(FlatScrInitHi)
.addImm(0xffff);
And->getOperand(3).setIsDead(); } else {
Register FlatScratchInitReg =
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
assert(FlatScratchInitReg);
MachineRegisterInfo &MRI = MF.getRegInfo();
MRI.addLiveIn(FlatScratchInitReg);
MBB.addLiveIn(FlatScratchInitReg);
FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
}
if (ST.flatScratchIsPointer()) {
if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
.addReg(FlatScrInitLo)
.addReg(ScratchWaveOffsetReg);
auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
FlatScrInitHi)
.addReg(FlatScrInitHi)
.addImm(0);
Addc->getOperand(3).setIsDead();
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
addReg(FlatScrInitLo).
addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
(31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
addReg(FlatScrInitHi).
addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
(31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
return;
}
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
.addReg(FlatScrInitLo)
.addReg(ScratchWaveOffsetReg);
auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
AMDGPU::FLAT_SCR_HI)
.addReg(FlatScrInitHi)
.addImm(0);
Addc->getOperand(3).setIsDead();
return;
}
assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
.addReg(FlatScrInitHi, RegState::Kill);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
.addReg(FlatScrInitLo)
.addReg(ScratchWaveOffsetReg);
auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
AMDGPU::FLAT_SCR_HI)
.addReg(FlatScrInitLo, RegState::Kill)
.addImm(8);
LShr->getOperand(3).setIsDead(true); }
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
I != E; ++I) {
if (!MFI.isDeadObjectIndex(I))
return false;
}
return true;
}
Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
MachineFunction &MF) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
assert(MFI->isEntryFunction());
Register ScratchRsrcReg = MFI->getScratchRSrcReg();
if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
allStackObjectsAreDead(MF.getFrameInfo())))
return Register();
if (ST.hasSGPRInitBug() ||
ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
return ScratchRsrcReg;
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
for (MCPhysReg Reg : AllSGPR128s) {
if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
!TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
MRI.replaceRegWith(ScratchRsrcReg, Reg);
MFI->setScratchRSrcReg(Reg);
return Reg;
}
}
return ScratchRsrcReg;
}
static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
}
void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = MF.getFunction();
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
assert(MFI->isEntryFunction());
Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
Register ScratchRsrcReg;
if (!ST.enableFlatScratch())
ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
if (ScratchRsrcReg) {
for (MachineBasicBlock &OtherBB : MF) {
if (&OtherBB != &MBB) {
OtherBB.addLiveIn(ScratchRsrcReg);
}
}
}
Register PreloadedScratchRsrcReg;
if (ST.isAmdHsaOrMesa(F)) {
PreloadedScratchRsrcReg =
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
MRI.addLiveIn(PreloadedScratchRsrcReg);
MBB.addLiveIn(PreloadedScratchRsrcReg);
}
}
DebugLoc DL;
MachineBasicBlock::iterator I = MBB.begin();
Register ScratchWaveOffsetReg;
if (PreloadedScratchWaveOffsetReg &&
TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
AllSGPRs = AllSGPRs.slice(
std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
for (MCPhysReg Reg : AllSGPRs) {
if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
!TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
ScratchWaveOffsetReg = Reg;
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
.addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
break;
}
}
} else {
ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
}
assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
if (requiresStackPointerReference(MF)) {
Register SPReg = MFI->getStackPtrOffsetReg();
assert(SPReg != AMDGPU::SP_REG);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
.addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
}
if (hasFP(MF)) {
Register FPReg = MFI->getFrameOffsetReg();
assert(FPReg != AMDGPU::FP_REG);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
}
bool NeedsFlatScratchInit =
MFI->hasFlatScratchInit() &&
(MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
(!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
}
if (NeedsFlatScratchInit) {
emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
}
if (ScratchRsrcReg) {
emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
PreloadedScratchRsrcReg,
ScratchRsrcReg, ScratchWaveOffsetReg);
}
}
void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, Register PreloadedScratchRsrcReg,
Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const Function &Fn = MF.getFunction();
if (ST.isAmdPalOS()) {
Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
buildGitPtr(MBB, I, DL, TII, Rsrc01);
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
auto MMO = MF.getMachineMemOperand(PtrInfo,
MachineMemOperand::MOLoad |
MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable,
16, Align(4));
unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
.addReg(Rsrc01)
.addImm(EncodedOffset) .addImm(0) .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
.addMemOperand(MMO);
if (ST.isWave32()) {
const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
.addImm(21)
.addReg(Rsrc03);
}
} else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
assert(!ST.isAmdHsaOrMesa(Fn));
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
if (MFI->hasImplicitBufferPtr()) {
Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
BuildMI(MBB, I, DL, Mov64, Rsrc01)
.addReg(MFI->getImplicitBufferPtrUserSGPR())
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
} else {
const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
auto MMO = MF.getMachineMemOperand(
PtrInfo,
MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable,
8, Align(4));
BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
.addReg(MFI->getImplicitBufferPtrUserSGPR())
.addImm(0) .addImm(0) .addMemOperand(MMO)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
}
} else {
Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
BuildMI(MBB, I, DL, SMovB32, Rsrc0)
.addExternalSymbol("SCRATCH_RSRC_DWORD0")
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, SMovB32, Rsrc1)
.addExternalSymbol("SCRATCH_RSRC_DWORD1")
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
}
BuildMI(MBB, I, DL, SMovB32, Rsrc2)
.addImm(Rsrc23 & 0xffffffff)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, SMovB32, Rsrc3)
.addImm(Rsrc23 >> 32)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
} else if (ST.isAmdHsaOrMesa(Fn)) {
assert(PreloadedScratchRsrcReg);
if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
.addReg(PreloadedScratchRsrcReg, RegState::Kill);
}
}
Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
.addReg(ScratchRsrcSub0)
.addReg(ScratchWaveOffsetReg)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
.addReg(ScratchRsrcSub1)
.addImm(0)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
Addc->getOperand(3).setIsDead(); }
bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
switch (ID) {
case TargetStackID::Default:
case TargetStackID::NoAlloc:
case TargetStackID::SGPRSpill:
return true;
case TargetStackID::ScalableVector:
case TargetStackID::WasmLocal:
return false;
}
llvm_unreachable("Invalid TargetStackID::Value");
}
static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
const SIMachineFunctionInfo *FuncInfo,
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, bool IsProlog) {
if (LiveRegs.empty()) {
LiveRegs.init(TRI);
if (IsProlog) {
LiveRegs.addLiveIns(MBB);
} else {
LiveRegs.addLiveOuts(MBB);
LiveRegs.stepBackward(*MBBI);
}
}
}
static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
bool IsProlog) {
Register ScratchExecCopy;
MachineRegisterInfo &MRI = MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
DebugLoc DL;
initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
ScratchExecCopy = findScratchNonCalleeSaveRegister(
MRI, LiveRegs, *TRI.getWaveMaskRegClass());
if (!ScratchExecCopy)
report_fatal_error("failed to find free scratch register");
LiveRegs.addReg(ScratchExecCopy);
const unsigned OrSaveExec =
ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
auto SaveExec = BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
.addImm(-1);
SaveExec->getOperand(3).setIsDead();
return ScratchExecCopy;
}
static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill;
}
void SIFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
if (FuncInfo->isEntryFunction()) {
emitEntryFunctionPrologue(MF, MBB);
return;
}
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
Register FramePtrReg = FuncInfo->getFrameOffsetReg();
Register BasePtrReg =
TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
LivePhysRegs LiveRegs;
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc DL;
bool HasFP = false;
bool HasBP = false;
uint32_t NumBytes = MFI.getStackSize();
uint32_t RoundedSize = NumBytes;
Register ScratchExecCopy;
Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
FuncInfo->getSGPRSpillVGPRs()) {
if (!Reg.FI)
continue;
if (!ScratchExecCopy)
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
true);
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, Reg.VGPR,
*Reg.FI);
}
for (auto ReservedWWM : FuncInfo->wwmAllocation()) {
if (!ScratchExecCopy)
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
std::get<0>(ReservedWWM), std::get<1>(ReservedWWM));
}
if (ScratchExecCopy) {
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
.addReg(ScratchExecCopy, RegState::Kill);
LiveRegs.addReg(ScratchExecCopy);
}
auto SaveSGPRToMemory = [&](Register Reg, const int FI) {
assert(!MFI.isDeadObjectIndex(FI));
initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, true);
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(Reg);
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
FI);
};
auto SaveSGPRToVGPRLane = [&](Register Reg, const int FI) {
assert(!MFI.isDeadObjectIndex(FI));
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
ArrayRef<SIRegisterInfo::SpilledReg> Spill =
FuncInfo->getSGPRToVGPRSpills(FI);
assert(Spill.size() == 1);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
.addReg(Reg)
.addImm(Spill[0].Lane)
.addReg(Spill[0].VGPR, RegState::Undef);
};
if (FPSaveIndex) {
if (spilledToMemory(MF, *FPSaveIndex))
SaveSGPRToMemory(FramePtrReg, *FPSaveIndex);
else
SaveSGPRToVGPRLane(FramePtrReg, *FPSaveIndex);
}
if (FuncInfo->SGPRForFPSaveRestoreCopy) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
FuncInfo->SGPRForFPSaveRestoreCopy)
.addReg(FramePtrReg)
.setMIFlag(MachineInstr::FrameSetup);
}
if (BPSaveIndex) {
if (spilledToMemory(MF, *BPSaveIndex))
SaveSGPRToMemory(BasePtrReg, *BPSaveIndex);
else
SaveSGPRToVGPRLane(BasePtrReg, *BPSaveIndex);
}
if (FuncInfo->SGPRForBPSaveRestoreCopy) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
FuncInfo->SGPRForBPSaveRestoreCopy)
.addReg(BasePtrReg)
.setMIFlag(MachineInstr::FrameSetup);
}
SmallVector<MCPhysReg, 2> TempSGPRs;
if (FuncInfo->SGPRForFPSaveRestoreCopy)
TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
if (FuncInfo->SGPRForBPSaveRestoreCopy)
TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
if (!TempSGPRs.empty()) {
for (MachineBasicBlock &MBB : MF) {
for (MCPhysReg Reg : TempSGPRs)
MBB.addLiveIn(Reg);
MBB.sortUniqueLiveIns();
}
if (!LiveRegs.empty()) {
LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
}
}
if (TRI.hasStackRealignment(MF)) {
HasFP = true;
const unsigned Alignment = MFI.getMaxAlign().value();
RoundedSize += Alignment;
if (LiveRegs.empty()) {
LiveRegs.init(TRI);
LiveRegs.addLiveIns(MBB);
}
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
.addReg(StackPtrReg)
.addImm((Alignment - 1) * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
.addReg(FramePtrReg, RegState::Kill)
.addImm(-Alignment * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
And->getOperand(3).setIsDead(); FuncInfo->setIsStackRealigned(true);
} else if ((HasFP = hasFP(MF))) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
.addReg(StackPtrReg)
.setMIFlag(MachineInstr::FrameSetup);
}
if ((HasBP = TRI.hasBasePointer(MF))) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
.addReg(StackPtrReg)
.setMIFlag(MachineInstr::FrameSetup);
}
if (HasFP && RoundedSize != 0) {
auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
.addReg(StackPtrReg)
.addImm(RoundedSize * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
Add->getOperand(3).setIsDead(); }
assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
FuncInfo->FramePointerSaveIndex)) &&
"Needed to save FP but didn't save it anywhere");
assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
!FuncInfo->FramePointerSaveIndex) ||
EnableSpillVGPRToAGPR) &&
"Saved FP but didn't need it");
assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
FuncInfo->BasePointerSaveIndex)) &&
"Needed to save BP but didn't save it anywhere");
assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&
!FuncInfo->BasePointerSaveIndex)) &&
"Saved BP but didn't need it");
}
void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
if (FuncInfo->isEntryFunction())
return;
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
LivePhysRegs LiveRegs;
DebugLoc DL;
const MachineFrameInfo &MFI = MF.getFrameInfo();
uint32_t NumBytes = MFI.getStackSize();
uint32_t RoundedSize = FuncInfo->isStackRealigned()
? NumBytes + MFI.getMaxAlign().value()
: NumBytes;
const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
const Register BasePtrReg =
TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
if (RoundedSize != 0 && hasFP(MF)) {
auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
.addReg(StackPtrReg)
.addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
.setMIFlag(MachineInstr::FrameDestroy);
Add->getOperand(3).setIsDead(); }
if (FuncInfo->SGPRForFPSaveRestoreCopy) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
.addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
.setMIFlag(MachineInstr::FrameDestroy);
}
if (FuncInfo->SGPRForBPSaveRestoreCopy) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
.addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
.setMIFlag(MachineInstr::FrameDestroy);
}
auto RestoreSGPRFromMemory = [&](Register Reg, const int FI) {
initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, false);
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
FI);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), Reg)
.addReg(TmpVGPR, RegState::Kill);
};
auto RestoreSGPRFromVGPRLane = [&](Register Reg, const int FI) {
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
ArrayRef<SIRegisterInfo::SpilledReg> Spill =
FuncInfo->getSGPRToVGPRSpills(FI);
assert(Spill.size() == 1);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), Reg)
.addReg(Spill[0].VGPR)
.addImm(Spill[0].Lane);
};
if (FPSaveIndex) {
const int FramePtrFI = *FPSaveIndex;
assert(!MFI.isDeadObjectIndex(FramePtrFI));
if (spilledToMemory(MF, FramePtrFI))
RestoreSGPRFromMemory(FramePtrReg, FramePtrFI);
else
RestoreSGPRFromVGPRLane(FramePtrReg, FramePtrFI);
}
if (BPSaveIndex) {
const int BasePtrFI = *BPSaveIndex;
assert(!MFI.isDeadObjectIndex(BasePtrFI));
if (spilledToMemory(MF, BasePtrFI))
RestoreSGPRFromMemory(BasePtrReg, BasePtrFI);
else
RestoreSGPRFromVGPRLane(BasePtrReg, BasePtrFI);
}
Register ScratchExecCopy;
for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
FuncInfo->getSGPRSpillVGPRs()) {
if (!Reg.FI)
continue;
if (!ScratchExecCopy)
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
Reg.VGPR, *Reg.FI);
}
for (auto ReservedWWM : FuncInfo->wwmAllocation()) {
if (!ScratchExecCopy)
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
std::get<0>(ReservedWWM), std::get<1>(ReservedWWM));
}
if (ScratchExecCopy) {
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
.addReg(ScratchExecCopy, RegState::Kill);
}
}
#ifndef NDEBUG
static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
I != E; ++I) {
if (!MFI.isDeadObjectIndex(I) &&
MFI.getStackID(I) == TargetStackID::SGPRSpill &&
(I != FuncInfo->FramePointerSaveIndex &&
I != FuncInfo->BasePointerSaveIndex)) {
return false;
}
}
return true;
}
#endif
StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI,
Register &FrameReg) const {
const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
FrameReg = RI->getFrameRegister(MF);
return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));
}
void SIFrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF,
RegScavenger *RS) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
if (!FuncInfo->isEntryFunction()) {
FuncInfo->allocateWWMReservedSpillSlots(MFI, *TRI);
}
const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
&& EnableSpillVGPRToAGPR;
if (SpillVGPRToAGPR) {
BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
bool SeenDbgInstr = false;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
int FrameIndex;
if (MI.isDebugInstr())
SeenDbgInstr = true;
if (TII->isVGPRSpill(MI)) {
unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::vaddr);
int FI = MI.getOperand(FIOp).getIndex();
Register VReg =
TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
TRI->isAGPR(MRI, VReg))) {
RS->enterBasicBlock(MBB);
TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
SpillFIs.set(FI);
continue;
}
} else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
TII->isLoadFromStackSlot(MI, FrameIndex))
if (!MFI.isFixedObjectIndex(FrameIndex))
NonVGPRSpillFIs.set(FrameIndex);
}
}
for (unsigned FI : SpillFIs.set_bits())
if (!NonVGPRSpillFIs.test(FI))
FuncInfo->setVGPRToAGPRSpillDead(FI);
for (MachineBasicBlock &MBB : MF) {
for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
MBB.addLiveIn(Reg);
for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
MBB.addLiveIn(Reg);
MBB.sortUniqueLiveIns();
if (!SpillFIs.empty() && SeenDbgInstr) {
for (MachineInstr &MI : MBB) {
if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
SpillFIs[MI.getOperand(0).getIndex()]) {
MI.getOperand(0).ChangeToRegister(Register(), false );
}
}
}
}
}
bool HaveSGPRToVMemSpill =
FuncInfo->removeDeadFrameIndices(MFI, true);
assert(allSGPRSpillsAreDead(MF) &&
"SGPR spill should have been removed in SILowerSGPRSpills");
if (!allStackObjectsAreDead(MFI)) {
assert(RS && "RegScavenger required if spilling");
RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
if (HaveSGPRToVMemSpill &&
allocateScavengingFrameIndexesNearIncomingSP(MF)) {
RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false));
}
}
}
void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced(
MachineFunction &MF, RegScavenger *RS) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
Register UnusedLowVGPR =
TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
TRI->getHWRegIndex(VGPRForAGPRCopy))) {
FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
MRI.freezeReservedRegs(MF);
}
}
}
void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedVGPRs,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
if (MFI->isEntryFunction())
return;
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
if (!ST.hasGFX90AInsts())
SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
const bool WillHaveFP =
FrameInfo.hasCalls() &&
(SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
for (auto SSpill : MFI->getSGPRSpillVGPRs())
SavedVGPRs.reset(SSpill.VGPR);
LivePhysRegs LiveRegs;
LiveRegs.init(*TRI);
if (WillHaveFP || hasFP(MF)) {
assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex &&
"Re-reserving spill slot for FP");
getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy,
MFI->FramePointerSaveIndex, true);
}
if (TRI->hasBasePointer(MF)) {
if (MFI->SGPRForFPSaveRestoreCopy)
LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
assert(!MFI->SGPRForBPSaveRestoreCopy &&
!MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP");
getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy,
MFI->BasePointerSaveIndex, false);
}
}
void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
if (MFI->isEntryFunction())
return;
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
SavedRegs.reset(MFI->getStackPtrOffsetReg());
const BitVector AllSavedRegs = SavedRegs;
SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
const bool WillHaveFP =
FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
if (WillHaveFP || hasFP(MF))
SavedRegs.reset(MFI->getFrameOffsetReg());
const MachineRegisterInfo &MRI = MF.getRegInfo();
Register RetAddrReg = TRI->getReturnAddressReg(MF);
if (!MFI->isEntryFunction() &&
(FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
}
}
bool SIFrameLowering::assignCalleeSavedSpillSlots(
MachineFunction &MF, const TargetRegisterInfo *TRI,
std::vector<CalleeSavedInfo> &CSI) const {
if (CSI.empty())
return true;
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
!FuncInfo->SGPRForBPSaveRestoreCopy)
return false;
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *RI = ST.getRegisterInfo();
Register FramePtrReg = FuncInfo->getFrameOffsetReg();
Register BasePtrReg = RI->getBaseRegister();
unsigned NumModifiedRegs = 0;
if (FuncInfo->SGPRForFPSaveRestoreCopy)
NumModifiedRegs++;
if (FuncInfo->SGPRForBPSaveRestoreCopy)
NumModifiedRegs++;
for (auto &CS : CSI) {
if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
if (--NumModifiedRegs)
break;
} else if (CS.getReg() == BasePtrReg &&
FuncInfo->SGPRForBPSaveRestoreCopy) {
CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
if (--NumModifiedRegs)
break;
}
}
return false;
}
bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
const MachineFunction &MF) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
uint64_t EstStackSize = MFI.estimateStackSize(MF);
uint64_t MaxOffset = EstStackSize - 1;
if (ST.enableFlatScratch()) {
const SIInstrInfo *TII = ST.getInstrInfo();
if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
SIInstrFlags::FlatScratch))
return false;
} else {
if (SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset))
return false;
}
return true;
}
MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
int64_t Amount = I->getOperand(0).getImm();
if (Amount == 0)
return MBB.erase(I);
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const DebugLoc &DL = I->getDebugLoc();
unsigned Opc = I->getOpcode();
bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
if (!hasReservedCallFrame(MF)) {
Amount = alignTo(Amount, getStackAlign());
assert(isUInt<32>(Amount) && "exceeded stack address space size");
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Register SPReg = MFI->getStackPtrOffsetReg();
Amount *= getScratchScaleFactor(ST);
if (IsDestroy)
Amount = -Amount;
auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
.addReg(SPReg)
.addImm(Amount);
Add->getOperand(3).setIsDead(); } else if (CalleePopAmount != 0) {
llvm_unreachable("is this used?");
}
return MBB.erase(I);
}
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
}
bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.hasCalls() &&
!MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
return MFI.getStackSize() != 0;
}
return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
MF) ||
MF.getTarget().Options.DisableFramePointerElim(MF);
}
bool SIFrameLowering::requiresStackPointerReference(
const MachineFunction &MF) const {
assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&
"only expected to call this for entry points");
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.hasCalls())
return true;
return frameTriviallyRequiresSP(MFI);
}