#include "AMDGPU.h"
#include "AMDGPUMachineModuleInfo.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/TargetParser.h"
using namespace llvm;
using namespace llvm::AMDGPU;
#define DEBUG_TYPE "si-memory-legalizer"
#define PASS_NAME "SI Memory Legalizer"
static cl::opt<bool> AmdgcnSkipCacheInvalidations(
"amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden,
cl::desc("Use this to skip inserting cache invalidating instructions."));
namespace {
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
enum class SIMemOp {
NONE = 0u,
LOAD = 1u << 0,
STORE = 1u << 1,
LLVM_MARK_AS_BITMASK_ENUM( STORE)
};
enum class Position {
BEFORE,
AFTER
};
enum class SIAtomicScope {
NONE,
SINGLETHREAD,
WAVEFRONT,
WORKGROUP,
AGENT,
SYSTEM
};
enum class SIAtomicAddrSpace {
NONE = 0u,
GLOBAL = 1u << 0,
LDS = 1u << 1,
SCRATCH = 1u << 2,
GDS = 1u << 3,
OTHER = 1u << 4,
FLAT = GLOBAL | LDS | SCRATCH,
ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
LLVM_MARK_AS_BITMASK_ENUM( ALL)
};
class SIMemOpInfo final {
private:
friend class SIMemOpAccess;
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
SIAtomicScope Scope = SIAtomicScope::SYSTEM;
SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
bool IsCrossAddressSpaceOrdering = false;
bool IsVolatile = false;
bool IsNonTemporal = false;
SIMemOpInfo(AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
SIAtomicScope Scope = SIAtomicScope::SYSTEM,
SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
bool IsCrossAddressSpaceOrdering = true,
AtomicOrdering FailureOrdering =
AtomicOrdering::SequentiallyConsistent,
bool IsVolatile = false,
bool IsNonTemporal = false)
: Ordering(Ordering), FailureOrdering(FailureOrdering),
Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
InstrAddrSpace(InstrAddrSpace),
IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
IsVolatile(IsVolatile),
IsNonTemporal(IsNonTemporal) {
if (Ordering == AtomicOrdering::NotAtomic) {
assert(Scope == SIAtomicScope::NONE &&
OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
!IsCrossAddressSpaceOrdering &&
FailureOrdering == AtomicOrdering::NotAtomic);
return;
}
assert(Scope != SIAtomicScope::NONE &&
(OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
SIAtomicAddrSpace::NONE &&
(InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
SIAtomicAddrSpace::NONE);
if ((OrderingAddrSpace == InstrAddrSpace) &&
isPowerOf2_32(uint32_t(InstrAddrSpace)))
this->IsCrossAddressSpaceOrdering = false;
if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
SIAtomicAddrSpace::NONE) {
this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
} else if ((InstrAddrSpace &
~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
SIAtomicAddrSpace::NONE) {
this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
} else if ((InstrAddrSpace &
~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
this->Scope = std::min(Scope, SIAtomicScope::AGENT);
}
}
public:
SIAtomicScope getScope() const {
return Scope;
}
AtomicOrdering getOrdering() const {
return Ordering;
}
AtomicOrdering getFailureOrdering() const {
return FailureOrdering;
}
SIAtomicAddrSpace getInstrAddrSpace() const {
return InstrAddrSpace;
}
SIAtomicAddrSpace getOrderingAddrSpace() const {
return OrderingAddrSpace;
}
bool getIsCrossAddressSpaceOrdering() const {
return IsCrossAddressSpaceOrdering;
}
bool isVolatile() const {
return IsVolatile;
}
bool isNonTemporal() const {
return IsNonTemporal;
}
bool isAtomic() const {
return Ordering != AtomicOrdering::NotAtomic;
}
};
class SIMemOpAccess final {
private:
AMDGPUMachineModuleInfo *MMI = nullptr;
void reportUnsupported(const MachineBasicBlock::iterator &MI,
const char *Msg) const;
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace) const;
SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
Optional<SIMemOpInfo> constructFromMIWithMMO(
const MachineBasicBlock::iterator &MI) const;
public:
SIMemOpAccess(MachineFunction &MF);
Optional<SIMemOpInfo> getLoadInfo(
const MachineBasicBlock::iterator &MI) const;
Optional<SIMemOpInfo> getStoreInfo(
const MachineBasicBlock::iterator &MI) const;
Optional<SIMemOpInfo> getAtomicFenceInfo(
const MachineBasicBlock::iterator &MI) const;
Optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo(
const MachineBasicBlock::iterator &MI) const;
};
class SICacheControl {
protected:
const GCNSubtarget &ST;
const SIInstrInfo *TII = nullptr;
IsaVersion IV;
bool InsertCacheInv;
SICacheControl(const GCNSubtarget &ST);
bool enableNamedBit(const MachineBasicBlock::iterator MI,
AMDGPU::CPol::CPol Bit) const;
public:
static std::unique_ptr<SICacheControl> create(const GCNSubtarget &ST);
virtual bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const = 0;
virtual bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const = 0;
virtual bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const = 0;
virtual bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
SIAtomicAddrSpace AddrSpace,
SIMemOp Op, bool IsVolatile,
bool IsNonTemporal) const = 0;
virtual bool insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
SIMemOp Op,
bool IsCrossAddrSpaceOrdering,
Position Pos) const = 0;
virtual bool insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const = 0;
virtual bool insertRelease(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
Position Pos) const = 0;
virtual ~SICacheControl() = default;
};
class SIGfx6CacheControl : public SICacheControl {
protected:
bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
return enableNamedBit(MI, AMDGPU::CPol::GLC);
}
bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
return enableNamedBit(MI, AMDGPU::CPol::SLC);
}
public:
SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {}
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const override;
bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const override;
bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const override;
bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsVolatile,
bool IsNonTemporal) const override;
bool insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
SIMemOp Op,
bool IsCrossAddrSpaceOrdering,
Position Pos) const override;
bool insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const override;
bool insertRelease(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
Position Pos) const override;
};
class SIGfx7CacheControl : public SIGfx6CacheControl {
public:
SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {}
bool insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const override;
};
class SIGfx90ACacheControl : public SIGfx7CacheControl {
public:
SIGfx90ACacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {}
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const override;
bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const override;
bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const override;
bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsVolatile,
bool IsNonTemporal) const override;
bool insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
SIMemOp Op,
bool IsCrossAddrSpaceOrdering,
Position Pos) const override;
bool insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const override;
bool insertRelease(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
Position Pos) const override;
};
class SIGfx940CacheControl : public SIGfx90ACacheControl {
protected:
bool enableSC0Bit(const MachineBasicBlock::iterator &MI) const {
return enableNamedBit(MI, AMDGPU::CPol::SC0);
}
bool enableSC1Bit(const MachineBasicBlock::iterator &MI) const {
return enableNamedBit(MI, AMDGPU::CPol::SC1);
}
bool enableNTBit(const MachineBasicBlock::iterator &MI) const {
return enableNamedBit(MI, AMDGPU::CPol::NT);
}
public:
SIGfx940CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {};
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const override;
bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const override;
bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const override;
bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsVolatile,
bool IsNonTemporal) const override;
bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, Position Pos) const override;
bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering,
Position Pos) const override;
};
class SIGfx10CacheControl : public SIGfx7CacheControl {
protected:
bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
return enableNamedBit(MI, AMDGPU::CPol::DLC);
}
public:
SIGfx10CacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {}
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const override;
bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsVolatile,
bool IsNonTemporal) const override;
bool insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
SIMemOp Op,
bool IsCrossAddrSpaceOrdering,
Position Pos) const override;
bool insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const override;
};
class SIGfx11CacheControl : public SIGfx10CacheControl {
public:
SIGfx11CacheControl(const GCNSubtarget &ST) : SIGfx10CacheControl(ST) {}
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const override;
bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsVolatile,
bool IsNonTemporal) const override;
};
class SIMemoryLegalizer final : public MachineFunctionPass {
private:
std::unique_ptr<SICacheControl> CC = nullptr;
std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
bool isAtomicRet(const MachineInstr &MI) const {
return SIInstrInfo::isAtomicRet(MI);
}
bool removeAtomicPseudoMIs();
bool expandLoad(const SIMemOpInfo &MOI,
MachineBasicBlock::iterator &MI);
bool expandStore(const SIMemOpInfo &MOI,
MachineBasicBlock::iterator &MI);
bool expandAtomicFence(const SIMemOpInfo &MOI,
MachineBasicBlock::iterator &MI);
bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
MachineBasicBlock::iterator &MI);
public:
static char ID;
SIMemoryLegalizer() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
StringRef getPassName() const override {
return PASS_NAME;
}
bool runOnMachineFunction(MachineFunction &MF) override;
};
}
void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
const char *Msg) const {
const Function &Func = MI->getParent()->getParent()->getFunction();
DiagnosticInfoUnsupported Diag(Func, Msg, MI->getDebugLoc());
Func.getContext().diagnose(Diag);
}
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
SIAtomicAddrSpace InstrAddrSpace) const {
if (SSID == SyncScope::System)
return std::make_tuple(SIAtomicScope::SYSTEM,
SIAtomicAddrSpace::ATOMIC,
true);
if (SSID == MMI->getAgentSSID())
return std::make_tuple(SIAtomicScope::AGENT,
SIAtomicAddrSpace::ATOMIC,
true);
if (SSID == MMI->getWorkgroupSSID())
return std::make_tuple(SIAtomicScope::WORKGROUP,
SIAtomicAddrSpace::ATOMIC,
true);
if (SSID == MMI->getWavefrontSSID())
return std::make_tuple(SIAtomicScope::WAVEFRONT,
SIAtomicAddrSpace::ATOMIC,
true);
if (SSID == SyncScope::SingleThread)
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
SIAtomicAddrSpace::ATOMIC,
true);
if (SSID == MMI->getSystemOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SYSTEM,
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
if (SSID == MMI->getAgentOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::AGENT,
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WORKGROUP,
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WAVEFRONT,
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
SIAtomicAddrSpace::ATOMIC & InstrAddrSpace,
false);
return None;
}
SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
if (AS == AMDGPUAS::FLAT_ADDRESS)
return SIAtomicAddrSpace::FLAT;
if (AS == AMDGPUAS::GLOBAL_ADDRESS)
return SIAtomicAddrSpace::GLOBAL;
if (AS == AMDGPUAS::LOCAL_ADDRESS)
return SIAtomicAddrSpace::LDS;
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
return SIAtomicAddrSpace::SCRATCH;
if (AS == AMDGPUAS::REGION_ADDRESS)
return SIAtomicAddrSpace::GDS;
return SIAtomicAddrSpace::OTHER;
}
SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
}
Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
const MachineBasicBlock::iterator &MI) const {
assert(MI->getNumMemOperands() > 0);
SyncScope::ID SSID = SyncScope::SingleThread;
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
bool IsNonTemporal = true;
bool IsVolatile = false;
for (const auto &MMO : MI->memoperands()) {
IsNonTemporal &= MMO->isNonTemporal();
IsVolatile |= MMO->isVolatile();
InstrAddrSpace |=
toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
AtomicOrdering OpOrdering = MMO->getSuccessOrdering();
if (OpOrdering != AtomicOrdering::NotAtomic) {
const auto &IsSyncScopeInclusion =
MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
if (!IsSyncScopeInclusion) {
reportUnsupported(MI,
"Unsupported non-inclusive atomic synchronization scope");
return None;
}
SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
Ordering = getMergedAtomicOrdering(Ordering, OpOrdering);
assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
FailureOrdering =
getMergedAtomicOrdering(FailureOrdering, MMO->getFailureOrdering());
}
}
SIAtomicScope Scope = SIAtomicScope::NONE;
SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
bool IsCrossAddressSpaceOrdering = false;
if (Ordering != AtomicOrdering::NotAtomic) {
auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
if (!ScopeOrNone) {
reportUnsupported(MI, "Unsupported atomic synchronization scope");
return None;
}
std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
*ScopeOrNone;
if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
reportUnsupported(MI, "Unsupported atomic address space");
return None;
}
}
return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
IsNonTemporal);
}
Optional<SIMemOpInfo> SIMemOpAccess::getLoadInfo(
const MachineBasicBlock::iterator &MI) const {
assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
if (!(MI->mayLoad() && !MI->mayStore()))
return None;
if (MI->getNumMemOperands() == 0)
return SIMemOpInfo();
return constructFromMIWithMMO(MI);
}
Optional<SIMemOpInfo> SIMemOpAccess::getStoreInfo(
const MachineBasicBlock::iterator &MI) const {
assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
if (!(!MI->mayLoad() && MI->mayStore()))
return None;
if (MI->getNumMemOperands() == 0)
return SIMemOpInfo();
return constructFromMIWithMMO(MI);
}
Optional<SIMemOpInfo> SIMemOpAccess::getAtomicFenceInfo(
const MachineBasicBlock::iterator &MI) const {
assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
return None;
AtomicOrdering Ordering =
static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
if (!ScopeOrNone) {
reportUnsupported(MI, "Unsupported atomic synchronization scope");
return None;
}
SIAtomicScope Scope = SIAtomicScope::NONE;
SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
bool IsCrossAddressSpaceOrdering = false;
std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
*ScopeOrNone;
if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
reportUnsupported(MI, "Unsupported atomic address space");
return None;
}
return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
IsCrossAddressSpaceOrdering, AtomicOrdering::NotAtomic);
}
Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
const MachineBasicBlock::iterator &MI) const {
assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
if (!(MI->mayLoad() && MI->mayStore()))
return None;
if (MI->getNumMemOperands() == 0)
return SIMemOpInfo();
return constructFromMIWithMMO(MI);
}
SICacheControl::SICacheControl(const GCNSubtarget &ST) : ST(ST) {
TII = ST.getInstrInfo();
IV = getIsaVersion(ST.getCPU());
InsertCacheInv = !AmdgcnSkipCacheInvalidations;
}
bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI,
AMDGPU::CPol::CPol Bit) const {
MachineOperand *CPol = TII->getNamedOperand(*MI, AMDGPU::OpName::cpol);
if (!CPol)
return false;
CPol->setImm(CPol->getImm() | Bit);
return true;
}
std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
GCNSubtarget::Generation Generation = ST.getGeneration();
if (ST.hasGFX940Insts())
return std::make_unique<SIGfx940CacheControl>(ST);
if (ST.hasGFX90AInsts())
return std::make_unique<SIGfx90ACacheControl>(ST);
if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
return std::make_unique<SIGfx6CacheControl>(ST);
if (Generation < AMDGPUSubtarget::GFX10)
return std::make_unique<SIGfx7CacheControl>(ST);
if (Generation < AMDGPUSubtarget::GFX11)
return std::make_unique<SIGfx10CacheControl>(ST);
return std::make_unique<SIGfx11CacheControl>(ST);
}
bool SIGfx6CacheControl::enableLoadCacheBypass(
const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
assert(MI->mayLoad() && !MI->mayStore());
bool Changed = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
return Changed;
}
bool SIGfx6CacheControl::enableStoreCacheBypass(
const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
assert(!MI->mayLoad() && MI->mayStore());
bool Changed = false;
return Changed;
}
bool SIGfx6CacheControl::enableRMWCacheBypass(
const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
assert(MI->mayLoad() && MI->mayStore());
bool Changed = false;
return Changed;
}
bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsVolatile, bool IsNonTemporal) const {
assert(MI->mayLoad() ^ MI->mayStore());
assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
bool Changed = false;
if (IsVolatile) {
if (Op == SIMemOp::LOAD)
Changed |= enableGLCBit(MI);
Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
Position::AFTER);
return Changed;
}
if (IsNonTemporal) {
Changed |= enableGLCBit(MI);
Changed |= enableSLCBit(MI);
return Changed;
}
return Changed;
}
bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
SIMemOp Op,
bool IsCrossAddrSpaceOrdering,
Position Pos) const {
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
if (Pos == Position::AFTER)
++MI;
bool VMCnt = false;
bool LGKMCnt = false;
if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
VMCnt |= true;
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
case SIAtomicScope::WORKGROUP:
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if (VMCnt || LGKMCnt) {
unsigned WaitCntImmediate =
AMDGPU::encodeWaitcnt(IV,
VMCnt ? 0 : getVmcntBitMask(IV),
getExpcntBitMask(IV),
LGKMCnt ? 0 : getLgkmcntBitMask(IV));
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
Changed = true;
}
if (Pos == Position::AFTER)
--MI;
return Changed;
}
bool SIGfx6CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const {
if (!InsertCacheInv)
return false;
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
if (Pos == Position::AFTER)
++MI;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1));
Changed = true;
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if (Pos == Position::AFTER)
--MI;
return Changed;
}
bool SIGfx6CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
Position Pos) const {
return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
IsCrossAddrSpaceOrdering, Pos);
}
bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const {
if (!InsertCacheInv)
return false;
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
const unsigned InvalidateL1 = STM.isAmdPalOS() || STM.isMesa3DOS()
? AMDGPU::BUFFER_WBINVL1
: AMDGPU::BUFFER_WBINVL1_VOL;
if (Pos == Position::AFTER)
++MI;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
BuildMI(MBB, MI, DL, TII->get(InvalidateL1));
Changed = true;
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if (Pos == Position::AFTER)
--MI;
return Changed;
}
bool SIGfx90ACacheControl::enableLoadCacheBypass(
const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
assert(MI->mayLoad() && !MI->mayStore());
bool Changed = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WORKGROUP:
if (ST.isTgSplitEnabled())
Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
return Changed;
}
bool SIGfx90ACacheControl::enableStoreCacheBypass(
const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
assert(!MI->mayLoad() && MI->mayStore());
bool Changed = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
return Changed;
}
bool SIGfx90ACacheControl::enableRMWCacheBypass(
const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
assert(MI->mayLoad() && MI->mayStore());
bool Changed = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
return Changed;
}
bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsVolatile, bool IsNonTemporal) const {
assert(MI->mayLoad() ^ MI->mayStore());
assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
bool Changed = false;
if (IsVolatile) {
if (Op == SIMemOp::LOAD)
Changed |= enableGLCBit(MI);
Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
Position::AFTER);
return Changed;
}
if (IsNonTemporal) {
Changed |= enableGLCBit(MI);
Changed |= enableSLCBit(MI);
return Changed;
}
return Changed;
}
bool SIGfx90ACacheControl::insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
SIMemOp Op,
bool IsCrossAddrSpaceOrdering,
Position Pos) const {
if (ST.isTgSplitEnabled()) {
if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
(Scope == SIAtomicScope::WORKGROUP)) {
Scope = SIAtomicScope::AGENT;
}
AddrSpace &= ~SIAtomicAddrSpace::LDS;
}
return SIGfx7CacheControl::insertWait(MI, Scope, AddrSpace, Op,
IsCrossAddrSpaceOrdering, Pos);
}
bool SIGfx90ACacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const {
if (!InsertCacheInv)
return false;
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
if (Pos == Position::AFTER)
++MI;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INVL2));
Changed = true;
break;
case SIAtomicScope::AGENT:
break;
case SIAtomicScope::WORKGROUP:
if (ST.isTgSplitEnabled()) {
Scope = SIAtomicScope::AGENT;
}
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if (Pos == Position::AFTER)
--MI;
Changed |= SIGfx7CacheControl::insertAcquire(MI, Scope, AddrSpace, Pos);
return Changed;
}
bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
Position Pos) const {
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
if (Pos == Position::AFTER)
++MI;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
.addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
Changed = true;
break;
case SIAtomicScope::AGENT:
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if (Pos == Position::AFTER)
--MI;
Changed |=
SIGfx7CacheControl::insertRelease(MI, Scope, AddrSpace,
IsCrossAddrSpaceOrdering, Pos);
return Changed;
}
bool SIGfx940CacheControl::enableLoadCacheBypass(
const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
assert(MI->mayLoad() && !MI->mayStore());
bool Changed = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
Changed |= enableSC0Bit(MI);
Changed |= enableSC1Bit(MI);
break;
case SIAtomicScope::AGENT:
Changed |= enableSC1Bit(MI);
break;
case SIAtomicScope::WORKGROUP:
Changed |= enableSC0Bit(MI);
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
return Changed;
}
bool SIGfx940CacheControl::enableStoreCacheBypass(
const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const {
assert(!MI->mayLoad() && MI->mayStore());
bool Changed = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
Changed |= enableSC0Bit(MI);
Changed |= enableSC1Bit(MI);
break;
case SIAtomicScope::AGENT:
Changed |= enableSC1Bit(MI);
break;
case SIAtomicScope::WORKGROUP:
Changed |= enableSC0Bit(MI);
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
return Changed;
}
bool SIGfx940CacheControl::enableRMWCacheBypass(
const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
assert(MI->mayLoad() && MI->mayStore());
bool Changed = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
Changed |= enableSC1Bit(MI);
break;
case SIAtomicScope::AGENT:
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
return Changed;
}
bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsVolatile, bool IsNonTemporal) const {
assert(MI->mayLoad() ^ MI->mayStore());
assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
bool Changed = false;
if (IsVolatile) {
Changed |= enableSC0Bit(MI);
Changed |= enableSC1Bit(MI);
Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
Position::AFTER);
return Changed;
}
if (IsNonTemporal) {
Changed |= enableNTBit(MI);
return Changed;
}
return Changed;
}
bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const {
if (!InsertCacheInv)
return false;
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
if (Pos == Position::AFTER)
++MI;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV))
.addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
Changed = true;
break;
case SIAtomicScope::AGENT:
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV))
.addImm(AMDGPU::CPol::SC1);
Changed = true;
break;
case SIAtomicScope::WORKGROUP:
if (ST.isTgSplitEnabled()) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV))
.addImm(AMDGPU::CPol::SC0);
Changed = true;
}
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if (Pos == Position::AFTER)
--MI;
return Changed;
}
bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
Position Pos) const {
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
if (Pos == Position::AFTER)
++MI;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
.addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
Changed = true;
break;
case SIAtomicScope::AGENT:
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
.addImm(AMDGPU::CPol::SC1);
Changed = true;
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if (Pos == Position::AFTER)
--MI;
Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
IsCrossAddrSpaceOrdering, Pos);
return Changed;
}
bool SIGfx10CacheControl::enableLoadCacheBypass(
const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
assert(MI->mayLoad() && !MI->mayStore());
bool Changed = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
Changed |= enableGLCBit(MI);
Changed |= enableDLCBit(MI);
break;
case SIAtomicScope::WORKGROUP:
if (!ST.isCuModeEnabled())
Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
return Changed;
}
bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsVolatile, bool IsNonTemporal) const {
assert(MI->mayLoad() ^ MI->mayStore());
assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
bool Changed = false;
if (IsVolatile) {
if (Op == SIMemOp::LOAD) {
Changed |= enableGLCBit(MI);
Changed |= enableDLCBit(MI);
}
Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
Position::AFTER);
return Changed;
}
if (IsNonTemporal) {
if (Op == SIMemOp::STORE)
Changed |= enableGLCBit(MI);
Changed |= enableSLCBit(MI);
return Changed;
}
return Changed;
}
bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
SIMemOp Op,
bool IsCrossAddrSpaceOrdering,
Position Pos) const {
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
if (Pos == Position::AFTER)
++MI;
bool VMCnt = false;
bool VSCnt = false;
bool LGKMCnt = false;
if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
VMCnt |= true;
if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
VSCnt |= true;
break;
case SIAtomicScope::WORKGROUP:
if (!ST.isCuModeEnabled()) {
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
VMCnt |= true;
if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
VSCnt |= true;
}
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
case SIAtomicScope::WORKGROUP:
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if (VMCnt || LGKMCnt) {
unsigned WaitCntImmediate =
AMDGPU::encodeWaitcnt(IV,
VMCnt ? 0 : getVmcntBitMask(IV),
getExpcntBitMask(IV),
LGKMCnt ? 0 : getLgkmcntBitMask(IV));
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
Changed = true;
}
if (VSCnt) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
.addImm(0);
Changed = true;
}
if (Pos == Position::AFTER)
--MI;
return Changed;
}
bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const {
if (!InsertCacheInv)
return false;
bool Changed = false;
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
if (Pos == Position::AFTER)
++MI;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV));
Changed = true;
break;
case SIAtomicScope::WORKGROUP:
if (!ST.isCuModeEnabled()) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
Changed = true;
}
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
if (Pos == Position::AFTER)
--MI;
return Changed;
}
bool SIGfx11CacheControl::enableLoadCacheBypass(
const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
assert(MI->mayLoad() && !MI->mayStore());
bool Changed = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WORKGROUP:
if (!ST.isCuModeEnabled())
Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
break;
default:
llvm_unreachable("Unsupported synchronization scope");
}
}
return Changed;
}
bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsVolatile, bool IsNonTemporal) const {
assert(MI->mayLoad() ^ MI->mayStore());
assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
bool Changed = false;
if (IsVolatile) {
if (Op == SIMemOp::LOAD)
Changed |= enableGLCBit(MI);
Changed |= enableDLCBit(MI);
Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
Position::AFTER);
return Changed;
}
if (IsNonTemporal) {
if (Op == SIMemOp::STORE)
Changed |= enableGLCBit(MI);
Changed |= enableSLCBit(MI);
Changed |= enableDLCBit(MI);
return Changed;
}
return Changed;
}
bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
if (AtomicPseudoMIs.empty())
return false;
for (auto &MI : AtomicPseudoMIs)
MI->eraseFromParent();
AtomicPseudoMIs.clear();
return true;
}
bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
MachineBasicBlock::iterator &MI) {
assert(MI->mayLoad() && !MI->mayStore());
bool Changed = false;
if (MOI.isAtomic()) {
if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
MOI.getOrdering() == AtomicOrdering::Acquire ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
MOI.getOrderingAddrSpace());
}
if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
Changed |= CC->insertWait(MI, MOI.getScope(),
MOI.getOrderingAddrSpace(),
SIMemOp::LOAD | SIMemOp::STORE,
MOI.getIsCrossAddressSpaceOrdering(),
Position::BEFORE);
if (MOI.getOrdering() == AtomicOrdering::Acquire ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
Changed |= CC->insertWait(MI, MOI.getScope(),
MOI.getInstrAddrSpace(),
SIMemOp::LOAD,
MOI.getIsCrossAddressSpaceOrdering(),
Position::AFTER);
Changed |= CC->insertAcquire(MI, MOI.getScope(),
MOI.getOrderingAddrSpace(),
Position::AFTER);
}
return Changed;
}
Changed |= CC->enableVolatileAndOrNonTemporal(MI, MOI.getInstrAddrSpace(),
SIMemOp::LOAD, MOI.isVolatile(),
MOI.isNonTemporal());
return Changed;
}
bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
MachineBasicBlock::iterator &MI) {
assert(!MI->mayLoad() && MI->mayStore());
bool Changed = false;
if (MOI.isAtomic()) {
if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
MOI.getOrdering() == AtomicOrdering::Release ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
Changed |= CC->enableStoreCacheBypass(MI, MOI.getScope(),
MOI.getOrderingAddrSpace());
}
if (MOI.getOrdering() == AtomicOrdering::Release ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
Changed |= CC->insertRelease(MI, MOI.getScope(),
MOI.getOrderingAddrSpace(),
MOI.getIsCrossAddressSpaceOrdering(),
Position::BEFORE);
return Changed;
}
Changed |= CC->enableVolatileAndOrNonTemporal(
MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
MOI.isNonTemporal());
return Changed;
}
bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
MachineBasicBlock::iterator &MI) {
assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
AtomicPseudoMIs.push_back(MI);
bool Changed = false;
if (MOI.isAtomic()) {
if (MOI.getOrdering() == AtomicOrdering::Acquire ||
MOI.getOrdering() == AtomicOrdering::Release ||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
Changed |= CC->insertRelease(MI, MOI.getScope(),
MOI.getOrderingAddrSpace(),
MOI.getIsCrossAddressSpaceOrdering(),
Position::BEFORE);
if (MOI.getOrdering() == AtomicOrdering::Acquire ||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
Changed |= CC->insertAcquire(MI, MOI.getScope(),
MOI.getOrderingAddrSpace(),
Position::BEFORE);
return Changed;
}
return Changed;
}
bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
MachineBasicBlock::iterator &MI) {
assert(MI->mayLoad() && MI->mayStore());
bool Changed = false;
if (MOI.isAtomic()) {
if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
MOI.getOrdering() == AtomicOrdering::Acquire ||
MOI.getOrdering() == AtomicOrdering::Release ||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
Changed |= CC->enableRMWCacheBypass(MI, MOI.getScope(),
MOI.getInstrAddrSpace());
}
if (MOI.getOrdering() == AtomicOrdering::Release ||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
Changed |= CC->insertRelease(MI, MOI.getScope(),
MOI.getOrderingAddrSpace(),
MOI.getIsCrossAddressSpaceOrdering(),
Position::BEFORE);
if (MOI.getOrdering() == AtomicOrdering::Acquire ||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
Changed |= CC->insertWait(MI, MOI.getScope(),
MOI.getInstrAddrSpace(),
isAtomicRet(*MI) ? SIMemOp::LOAD :
SIMemOp::STORE,
MOI.getIsCrossAddressSpaceOrdering(),
Position::AFTER);
Changed |= CC->insertAcquire(MI, MOI.getScope(),
MOI.getOrderingAddrSpace(),
Position::AFTER);
}
return Changed;
}
return Changed;
}
bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
SIMemOpAccess MOA(MF);
CC = SICacheControl::create(MF.getSubtarget<GCNSubtarget>());
for (auto &MBB : MF) {
for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
if (MI->isBundle() && MI->mayLoadOrStore()) {
MachineBasicBlock::instr_iterator II(MI->getIterator());
for (MachineBasicBlock::instr_iterator I = ++II, E = MBB.instr_end();
I != E && I->isBundledWithPred(); ++I) {
I->unbundleFromPred();
for (MachineOperand &MO : I->operands())
if (MO.isReg())
MO.setIsInternalRead(false);
}
MI->eraseFromParent();
MI = II->getIterator();
}
if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
continue;
if (const auto &MOI = MOA.getLoadInfo(MI))
Changed |= expandLoad(MOI.value(), MI);
else if (const auto &MOI = MOA.getStoreInfo(MI))
Changed |= expandStore(MOI.value(), MI);
else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
Changed |= expandAtomicFence(MOI.value(), MI);
else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
Changed |= expandAtomicCmpxchgOrRmw(MOI.value(), MI);
}
}
Changed |= removeAtomicPseudoMIs();
return Changed;
}
INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
char SIMemoryLegalizer::ID = 0;
char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
FunctionPass *llvm::createSIMemoryLegalizerPass() {
return new SIMemoryLegalizer();
}