Compiler projects using llvm
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s

define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias dereferenceable(18446744073709551615) %arg0, i32 %arg1) {
  ; GCN-LABEL: name: mmo_offsets0
  ; GCN: bb.0.bb.0:
  ; GCN-NEXT:   liveins: $sgpr0, $vgpr0
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
  ; GCN-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
  ; GCN-NEXT:   [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg0, addrspace 6)
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 16, align 1, addrspace 4)
  ; GCN-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 32, align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 48, align 1, addrspace 4)
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 64, align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 80, align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY]], %subreg.sub1
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 96, align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 112, align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 64
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 128
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4)
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]]
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY2]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 72
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 144
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4)
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]]
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY3]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 80
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_5]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 160
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]]
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY4]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 88
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 176
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY]]
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY5]], 176, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 96
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_10:%[0-9]+]]:sreg_32 = S_MOV_B32 192
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY]]
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY6]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_11:%[0-9]+]]:sreg_32 = S_MOV_B32 104
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_12:%[0-9]+]]:sreg_32 = S_MOV_B32 208
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY]]
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY7]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY8]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_13:%[0-9]+]]:sreg_32 = S_MOV_B32 112
  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY9]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_14:%[0-9]+]]:sreg_32 = S_MOV_B32 224
  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY10]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4)
  ; GCN-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[COPY]], %subreg.sub1
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:sreg_32 = COPY [[COPY]]
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY11]], [[S_LOAD_DWORDX4_IMM]], [[COPY12]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY13]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_15:%[0-9]+]]:sreg_32 = S_MOV_B32 120
  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY14]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_16:%[0-9]+]]:sreg_32 = S_MOV_B32 240
  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY15]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4)
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:sreg_32 = COPY [[COPY]]
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY16]], [[S_LOAD_DWORDX4_IMM]], [[COPY17]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY18]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4)
  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY19]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 128, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_17:%[0-9]+]]:sreg_32 = S_MOV_B32 256
  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY20]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_BOTHEN [[COPY]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:sreg_32 = COPY [[COPY]]
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY21]], [[S_LOAD_DWORDX4_IMM]], [[COPY22]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY23]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 272, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_18:%[0-9]+]]:sreg_32 = S_MOV_B32 136
  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY24]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 272, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_19:%[0-9]+]]:sreg_32 = S_MOV_B32 272
  ; GCN-NEXT:   [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY25]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 272, align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   [[COPY27:%[0-9]+]]:sreg_32 = COPY [[COPY]]
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY26]], [[S_LOAD_DWORDX4_IMM]], [[COPY27]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4)
  ; GCN-NEXT:   [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_20:%[0-9]+]]:sreg_32 = S_MOV_B32 288
  ; GCN-NEXT:   [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY30]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   [[COPY32:%[0-9]+]]:sreg_32 = COPY [[COPY]]
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY31]], [[S_LOAD_DWORDX4_IMM]], [[COPY32]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
  ; GCN-NEXT:   [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_21:%[0-9]+]]:sreg_32 = S_MOV_B32 152
  ; GCN-NEXT:   [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY34]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4)
  ; GCN-NEXT:   [[S_MOV_B32_22:%[0-9]+]]:sreg_32 = S_MOV_B32 304
  ; GCN-NEXT:   [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY35]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
  ; GCN-NEXT:   [[COPY37:%[0-9]+]]:sreg_32 = COPY [[COPY]]
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY36]], [[S_LOAD_DWORDX4_IMM]], [[COPY37]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4)
  ; GCN-NEXT:   S_ENDPGM 0
bb.0:
  %tmp0 = load <4 x i32>, <4 x i32> addrspace(6)* %arg0, align 16, !invariant.load !0
  %buffer0 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 16, i1 false, i1 false) #0
  %buffer1 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) #0
  %buffer2 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 1, i32 16, i1 false, i1 false) #0
  %buffer3 = call nsz <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 16, i1 false, i1 false) #0

  ; Insert inline asm to keep the different instruction types from being mixed.  This makes the output easier to read.
  call void asm sideeffect "", "" ()

  call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer0, <4 x i32> %tmp0, i32 0, i32 32, i1 false, i1 false) #1
  call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) #1
  call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer2, <4 x i32> %tmp0, i32 1, i32 32, i1 false, i1 false) #1
  call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %buffer3, <4 x i32> %tmp0, i32 %arg1, i32 32, i1 false, i1 false) #1

  call void asm sideeffect "", "" ()

  %buffer_format0 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 48, i1 false, i1 false) #0
  %buffer_format1 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) #0
  %buffer_format2 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 1, i32 48, i1 false, i1 false) #0
  %buffer_format3 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 48, i1 false, i1 false) #0

  call void asm sideeffect "", "" ()

  call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format0, <4 x i32> %tmp0, i32 0, i32 64, i1 false, i1 false) #1
  call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false, i1 false) #1
  call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format2, <4 x i32> %tmp0, i32 1, i32 64, i1 false, i1 false) #1
  call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %buffer_format3, <4 x i32> %tmp0, i32 %arg1, i32 64, i1 false, i1 false) #1

  call void asm sideeffect "", "" ()

  %atomic_add0 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 80, i1 false) #2
  %atomic_add1 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false) #2
  %atomic_add2 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 1, i32 80, i1 false) #2
  %atomic_add3 = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 80, i1 false) #2

  call void asm sideeffect "", "" ()

  %atomic_cmpswap0 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 96, i1 false) #2
  %atomic_cmpswap1 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false) #2
  %atomic_cmpswap2 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 1, i32 96, i1 false) #2
  %atomic_cmpswap3 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 96, i1 false) #2

  call void asm sideeffect "", "" ()

  %fadd1 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 0, i32 112, i1 false) #2
  %fadd2 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 0, i32 %arg1, i1 false) #2
  %fadd3 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 1, i32 112, i1 false) #2
  %fadd4 = call float @llvm.amdgcn.buffer.atomic.fadd.f32(float 1.0, <4 x i32> %tmp0, i32 %arg1, i32 112, i1 false) #2

  call void asm sideeffect "", "" ()

  ; rsrc, offset, soffset, cachepolicy
  %raw_buffer0 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 128, i32 0, i32 0) #0
  %raw_buffer1 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 64, i32 64, i32 0) #0
  %raw_buffer2 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 128, i32 0) #0
  %raw_buffer3 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 128, i32 0) #0
  %raw_buffer4 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %tmp0, i32 128, i32 %arg1, i32 0) #0

  call void asm sideeffect "", "" ()

  %raw_buffer_format0 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 144, i32 0, i32 0) #0
  %raw_buffer_format1 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 72, i32 72, i32 0) #0
  %raw_buffer_format2 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 144, i32 0) #0
  %raw_buffer_format3 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 144, i32 0) #0
  %raw_buffer_format4 = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 144, i32 %arg1, i32 0) #0

  call void asm sideeffect "", "" ()

  %raw_atomic_add0 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 160, i32 0, i32 0) #2
  %raw_atomic_add1 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 80, i32 80, i32 0) #2
  %raw_atomic_add2 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 160, i32 0) #2
  %raw_atomic_add3 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 160, i32 0) #2
  %raw_atomic_add4 = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 160, i32 %arg1, i32 0) #2

  call void asm sideeffect "", "" ()

  %raw_atomic_cmpswap0 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 176, i32 0, i32 0) #2
  %raw_atomic_cmpswap1 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 88, i32 88, i32 0) #2
  %raw_atomic_cmpswap2 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 176, i32 0) #2
  %raw_atomic_cmpswap3 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 176, i32 0) #2
  %raw_atomic_cmpswap4 = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 176, i32 %arg1, i32 0) #2

  call void asm sideeffect "", "" ()

  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer0, <4 x i32> %tmp0, i32 192, i32 0, i32 0) #2
  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer1, <4 x i32> %tmp0, i32 96, i32 96, i32 0) #2
  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer2, <4 x i32> %tmp0, i32 0, i32 192, i32 0) #2
  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer3, <4 x i32> %tmp0, i32 %arg1, i32 192, i32 0) #2
  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %raw_buffer4, <4 x i32> %tmp0, i32 192, i32 %arg1, i32 0) #2

  call void asm sideeffect "", "" ()

  call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format0, <4 x i32> %tmp0, i32 208, i32 0, i32 0) #2
  call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format1, <4 x i32> %tmp0, i32 104, i32 104, i32 0) #2
  call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format2, <4 x i32> %tmp0, i32 0, i32 208, i32 0) #2
  call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format3, <4 x i32> %tmp0, i32 %arg1, i32 208, i32 0) #2
  call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %raw_buffer_format4, <4 x i32> %tmp0, i32 208, i32 %arg1, i32 0) #2

  call void asm sideeffect "", "" ()

  ; rsrc, vindex, offset, soffset, cachepolicy
  %struct_buffer0 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 224, i32 0, i32 0) #0
  %struct_buffer1 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 112, i32 112, i32 0) #0
  %struct_buffer2 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 0, i32 224, i32 0) #0
  %struct_buffer3 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i32 224, i32 0) #0
  %struct_buffer4 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 0, i32 224, i32 %arg1, i32 0) #0
  %struct_buffer5 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 1, i32 224, i32 0, i32 0) #0
  %struct_buffer6 = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 224, i32 0, i32 0) #0

  call void asm sideeffect "", "" ()

  %struct_buffer_format0 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 240, i32 0, i32 0) #0
  %struct_buffer_format1 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 120, i32 120, i32 0) #0
  %struct_buffer_format2 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 0, i32 240, i32 0) #0
  %struct_buffer_format3 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 %arg1, i32 240, i32 0) #0
  %struct_buffer_format4 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 0, i32 240, i32 %arg1, i32 0) #0
  %struct_buffer_format5 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 1, i32 240, i32 0, i32 0) #0
  %struct_buffer_format6 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp0, i32 %arg1, i32 240, i32 0, i32 0) #0

  call void asm sideeffect "", "" ()

  %struct_atomic_add0 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 256, i32 0, i32 0) #2
  %struct_atomic_add1 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 128, i32 128, i32 0) #2
  %struct_atomic_add2 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 0, i32 256, i32 0) #2
  %struct_atomic_add3 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 256, i32 0) #2
  %struct_atomic_add4 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 0, i32 256, i32 %arg1, i32 0) #2
  %struct_atomic_add5 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 1, i32 256, i32 0, i32 0) #2
  %struct_atomic_add6 = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 256, i32 0, i32 0) #2

  call void asm sideeffect "", "" ()

  %struct_atomic_cmpswap0 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 272, i32 0, i32 0) #2
  %struct_atomic_cmpswap1 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 136, i32 136, i32 0) #2
  %struct_atomic_cmpswap2 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 0, i32 272, i32 0) #2
  %struct_atomic_cmpswap3 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 272, i32 0) #2
  %struct_atomic_cmpswap4 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 0, i32 272, i32 %arg1, i32 0) #2
  %struct_atomic_cmpswap5 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 1, i32 272, i32 0, i32 0) #2
  %struct_atomic_cmpswap6 = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %arg1, i32 %arg1, <4 x i32> %tmp0, i32 %arg1, i32 272, i32 0, i32 0) #2

  call void asm sideeffect "", "" ()

  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer0, <4 x i32> %tmp0, i32 0, i32 288, i32 0, i32 0) #2
  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer1, <4 x i32> %tmp0, i32 0, i32 144, i32 144, i32 0) #2
  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer2, <4 x i32> %tmp0, i32 0, i32 0, i32 288, i32 0) #2
  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer3, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 288, i32 0) #2
  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer4, <4 x i32> %tmp0, i32 0, i32 288, i32 %arg1, i32 0) #2
  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer5, <4 x i32> %tmp0, i32 1, i32 288, i32 0, i32 0) #2
  call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %struct_buffer6, <4 x i32> %tmp0, i32 %arg1, i32 288, i32 0, i32 0) #2

  call void asm sideeffect "", "" ()

  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format0, <4 x i32> %tmp0, i32 0, i32 304, i32 0, i32 0) #2
  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format1, <4 x i32> %tmp0, i32 0, i32 152, i32 152, i32 0) #2
  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format2, <4 x i32> %tmp0, i32 0, i32 0, i32 304, i32 0) #2
  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format3, <4 x i32> %tmp0, i32 0, i32 %arg1, i32 304, i32 0) #2
  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format4, <4 x i32> %tmp0, i32 0, i32 304, i32 %arg1, i32 0) #2
  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format5, <4 x i32> %tmp0, i32 1, i32 304, i32 0, i32 0) #2
  call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %struct_buffer_format6, <4 x i32> %tmp0, i32 %arg1, i32 304, i32 0, i32 0) #2

  ret void
}

declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #0
declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #0
declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #2
declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #2
declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1) #2
declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #0
declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #0
declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) #2
declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32) #2
declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #2
declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #2

declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #0
declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #0
declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32) #2
declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32) #2
declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #2
declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #2

attributes #0 = { nounwind readonly }
attributes #1 = { nounwind writeonly }
attributes #2 = { nounwind }

!0 = !{}