# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s --- | define amdgpu_kernel void @no_sched_barrier(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) { ret void } define amdgpu_kernel void @sched_barrier_mask_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) { ret void } define amdgpu_kernel void @sched_barrier_mask_1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) { ret void } define amdgpu_kernel void @sched_barrier_mask_2(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) { ret void } define amdgpu_kernel void @sched_barrier_mask_4(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) { ret void } define amdgpu_kernel void @sched_barrier_mask_8(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) { ret void } define amdgpu_kernel void @sched_barrier_mask_16(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) { ret void } define amdgpu_kernel void @sched_barrier_mask_32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) { ret void } define amdgpu_kernel void @sched_barrier_mask_64(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) { ret void } define amdgpu_kernel void @sched_barrier_mask_128(i32 addrspace(3)* noalias %out, i32 addrspace(3)* noalias %in) { ret void } define amdgpu_kernel void @sched_barrier_mask_256(i32 addrspace(3)* noalias %out, i32 addrspace(3)* noalias %in) { ret void } define amdgpu_kernel void @sched_barrier_mask_512(i32 addrspace(3)* noalias %out, i32 addrspace(3)* noalias %in) { ret void } define amdgpu_kernel void @sched_barrier_masks_8_12(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) { ret void } define amdgpu_kernel void @sched_barrier_mask_4_bundle(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) { ret void } define amdgpu_kernel void @sched_barrier_mask_0_bundle(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) { ret void } !0 = distinct !{!0} !1 = !{!1, !0} ... --- name: no_sched_barrier tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: no_sched_barrier ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:sreg_64 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_NOP 0 %5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_ENDPGM 0 ... # MASK = 0x0000 0000: No instructions may be scheduled across SCHED_BARRIER. --- name: sched_barrier_mask_0 tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: sched_barrier_mask_0 ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:sreg_64 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_NOP 0 SCHED_BARRIER 0 %5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_ENDPGM 0 ... # MASK = 0x0000 0001: ALL, non-memory, non-side-effect producing instructions may be # scheduled across SCHED_BARRIER, i.e. allow ALU instructions to pass. --- name: sched_barrier_mask_1 tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: sched_barrier_mask_1 ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: SCHED_BARRIER 1 ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:sreg_64 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_NOP 0 SCHED_BARRIER 1 %5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_ENDPGM 0 ... # MASK = 0x0000 0002: VALU instructions may be scheduled across SCHED_BARRIER. --- name: sched_barrier_mask_2 tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: sched_barrier_mask_2 ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: SCHED_BARRIER 2 ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec ; CHECK-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec ; CHECK-NEXT: [[V_MUL_LO_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]] %0:sreg_64 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %3:vgpr_32 = nsw V_MUL_LO_U32_e64 %2, %2, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %3, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %2, %1, implicit $exec %5:vgpr_32 = nsw V_MUL_LO_U32_e64 %2, %1, implicit $exec S_NOP 0 SCHED_BARRIER 2 %6:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %7:vgpr_32 = nsw V_MUL_LO_U32_e64 %6, %6, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %7, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_ENDPGM 0, implicit %4, implicit %5 ... # MASK = 0x0000 0004: SALU instructions may be scheduled across SCHED_BARRIER. --- name: sched_barrier_mask_4 tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: sched_barrier_mask_4 ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF2]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_1:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_2:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_1]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_3:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_2]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_4:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_3]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 4 ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[V_MUL_LO_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]] %0:sreg_64 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:areg_128 = IMPLICIT_DEF %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) %5:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %1, implicit $exec %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %1, implicit $exec S_NOP 0 %7:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %2, 0, 0, 0, implicit $mode, implicit $exec %8:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %7, 0, 0, 0, implicit $mode, implicit $exec %9:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %8, 0, 0, 0, implicit $mode, implicit $exec %10:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %9, 0, 0, 0, implicit $mode, implicit $exec %11:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %10, 0, 0, 0, implicit $mode, implicit $exec SCHED_BARRIER 4 %12:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %13:vgpr_32 = nsw V_MUL_LO_U32_e64 %12, %12, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %13, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_ENDPGM 0, implicit %5, implicit %6, implicit %11 ... # MASK = 0x0000 0008: MFMA instructions may be scheduled across SCHED_BARRIER. --- name: sched_barrier_mask_8 tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: sched_barrier_mask_8 ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF2]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_1:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_2:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_1]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 8 ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_3:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_2]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_MUL_LO_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_4:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_3]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]] %0:sreg_64 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:areg_128 = IMPLICIT_DEF %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) %5:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %1, implicit $exec %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %1, implicit $exec S_NOP 0 %7:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %2, 0, 0, 0, implicit $mode, implicit $exec %8:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %7, 0, 0, 0, implicit $mode, implicit $exec %9:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %8, 0, 0, 0, implicit $mode, implicit $exec %10:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %9, 0, 0, 0, implicit $mode, implicit $exec %11:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %10, 0, 0, 0, implicit $mode, implicit $exec SCHED_BARRIER 8 %12:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %13:vgpr_32 = nsw V_MUL_LO_U32_e64 %12, %12, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %13, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_ENDPGM 0, implicit %5, implicit %6, implicit %11 ... # MASK = 0x0000 0010: ALL VMEM instructions may be scheduled across SCHED_BARRIER. --- name: sched_barrier_mask_16 tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: sched_barrier_mask_16 ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 16 ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:sreg_64 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_NOP 0 SCHED_BARRIER 16 %5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_ENDPGM 0 ... # MASK = 0x0000 0020: VMEM read instructions may be scheduled across SCHED_BARRIER. --- name: sched_barrier_mask_32 tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: sched_barrier_mask_32 ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: SCHED_BARRIER 32 ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:sreg_64 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_NOP 0 SCHED_BARRIER 32 %5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_ENDPGM 0 ... # MASK = 0x0000 0040: VMEM write instructions may be scheduled across SCHED_BARRIER. --- name: sched_barrier_mask_64 tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: sched_barrier_mask_64 ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 64 ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:sreg_64 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_NOP 0 SCHED_BARRIER 64 %5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_ENDPGM 0 ... # MASK = 0x0000 0080: ALL DS instructions may be scheduled across SCHED_BARRIER. --- name: sched_barrier_mask_128 tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: sched_barrier_mask_128 ; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3) ; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3) ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_]], [[DS_READ_U16_gfx9_]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 128 ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_1]], [[DS_READ_U16_gfx9_1]], implicit $exec ; CHECK-NEXT: dead %0:sreg_64 = IMPLICIT_DEF ; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3) ; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_1]], [[V_MUL_LO_U32_e64_]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3) ; CHECK-NEXT: S_ENDPGM 0 %0:sreg_64 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = DS_READ_U16_gfx9 %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3) %3:vgpr_32 = nsw V_MUL_LO_U32_e64 %2, %2, implicit $exec DS_WRITE_B32 %3, %1, 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3) S_NOP 0 SCHED_BARRIER 128 %4:vgpr_32 = DS_READ_U16_gfx9 %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3) %5:vgpr_32 = nsw V_MUL_LO_U32_e64 %4, %4, implicit $exec DS_WRITE_B32 %5, %3, 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3) S_ENDPGM 0 ... # MASK = 0x0000 0100: ALL DS read instructions may be scheduled across SCHED_BARRIER. --- name: sched_barrier_mask_256 tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: sched_barrier_mask_256 ; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3) ; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3) ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_]], [[DS_READ_U16_gfx9_]], implicit $exec ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3) ; CHECK-NEXT: SCHED_BARRIER 256 ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_1]], [[DS_READ_U16_gfx9_1]], implicit $exec ; CHECK-NEXT: dead %0:sreg_64 = IMPLICIT_DEF ; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_1]], [[V_MUL_LO_U32_e64_]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3) ; CHECK-NEXT: S_ENDPGM 0 %0:sreg_64 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = DS_READ_U16_gfx9 %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3) %3:vgpr_32 = nsw V_MUL_LO_U32_e64 %2, %2, implicit $exec DS_WRITE_B32 %3, %1, 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3) S_NOP 0 SCHED_BARRIER 256 %4:vgpr_32 = DS_READ_U16_gfx9 %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3) %5:vgpr_32 = nsw V_MUL_LO_U32_e64 %4, %4, implicit $exec DS_WRITE_B32 %5, %3, 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3) S_ENDPGM 0 ... # MASK = 0x0000 0200: ALL DS write instructions may be scheduled across SCHED_BARRIER. --- name: sched_barrier_mask_512 tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: sched_barrier_mask_512 ; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3) ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_]], [[DS_READ_U16_gfx9_]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 512 ; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3) ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_1]], [[DS_READ_U16_gfx9_1]], implicit $exec ; CHECK-NEXT: dead %0:sreg_64 = IMPLICIT_DEF ; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3) ; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_1]], [[V_MUL_LO_U32_e64_]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3) ; CHECK-NEXT: S_ENDPGM 0 %0:sreg_64 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = DS_READ_U16_gfx9 %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3) %3:vgpr_32 = nsw V_MUL_LO_U32_e64 %2, %2, implicit $exec DS_WRITE_B32 %3, %1, 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3) S_NOP 0 SCHED_BARRIER 512 %4:vgpr_32 = DS_READ_U16_gfx9 %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3) %5:vgpr_32 = nsw V_MUL_LO_U32_e64 %4, %4, implicit $exec DS_WRITE_B32 %5, %3, 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3) S_ENDPGM 0 ... # MASK = 0x0000 0008: MFMA instructions may be scheduled across SCHED_BARRIER. # MASK = 0x0000 000C: MFMA and SALU may be scheduled across SCHED_BARRIER. # Check that S_NOP can move moved before the first SCHED_BARRIER but not the second. --- name: sched_barrier_masks_8_12 tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: sched_barrier_masks_8_12 ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF2]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_1:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: SCHED_BARRIER 12 ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_2:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_1]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 8 ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_3:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_2]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[V_MUL_LO_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec ; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_4:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_3]], 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]] %0:sreg_64 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:areg_128 = IMPLICIT_DEF %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) %5:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %1, implicit $exec %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %1, implicit $exec SCHED_BARRIER 12 %7:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %2, 0, 0, 0, implicit $mode, implicit $exec %8:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %7, 0, 0, 0, implicit $mode, implicit $exec %9:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %8, 0, 0, 0, implicit $mode, implicit $exec %10:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %9, 0, 0, 0, implicit $mode, implicit $exec %11:areg_128 = V_MFMA_F32_4X4X1F32_e64 %1, %3, %10, 0, 0, 0, implicit $mode, implicit $exec S_NOP 0 SCHED_BARRIER 8 S_NOP 0 %12:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %13:vgpr_32 = nsw V_MUL_LO_U32_e64 %12, %12, implicit $exec GLOBAL_STORE_DWORD_SADDR %1, %13, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_ENDPGM 0, implicit %5, implicit %6, implicit %11 ... # MASK = 0x0000 0004: SALU instructions may be scheduled across SCHED_BARRIER. --- name: sched_barrier_mask_4_bundle tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: sched_barrier_mask_4_bundle ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec ; CHECK-NEXT: BUNDLE [[GLOBAL_LOAD_DWORD_SADDR]], implicit [[GLOBAL_LOAD_DWORD_SADDR1]] { ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: } ; CHECK-NEXT: SCHED_BARRIER 4 ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:sreg_64 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec SCHED_BARRIER 4 BUNDLE implicit %3, %5 { S_NOP 0 S_NOP 0 } GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_ENDPGM 0 ... # MASK = 0x0000 0000: No instructions may be scheduled across SCHED_BARRIER. --- name: sched_barrier_mask_0_bundle tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: sched_barrier_mask_0_bundle ; CHECK: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: BUNDLE [[GLOBAL_LOAD_DWORD_SADDR1]], implicit [[GLOBAL_LOAD_DWORD_SADDR]] { ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: } ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:sreg_64 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1) %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec SCHED_BARRIER 0 BUNDLE implicit %3, %5 { S_NOP 0 S_NOP 0 } GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1) S_ENDPGM 0 ...