Compiler projects using llvm
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10PLUS,GFX11 %s

define amdgpu_gfx i1 @return_i1() #0 {
; GFX9-LABEL: return_i1:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v0, 1
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: return_i1:
; GFX10PLUS:       ; %bb.0: ; %entry
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 1
; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
entry:
  ret i1 1
}

define amdgpu_gfx void @call_i1() #0 {
; GFX9-LABEL: call_i1:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v1, s33, 2
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, return_i1@gotpcrel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, return_i1@gotpcrel32@hi+12
; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX9-NEXT:    v_writelane_b32 v1, s30, 0
; GFX9-NEXT:    v_writelane_b32 v1, s31, 1
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v1, 1
; GFX9-NEXT:    v_readlane_b32 s30, v1, 0
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    v_readlane_b32 s33, v1, 2
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_i1:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v1, s33, 2
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, return_i1@gotpcrel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, return_i1@gotpcrel32@hi+12
; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX10-NEXT:    v_writelane_b32 v1, s30, 0
; GFX10-NEXT:    v_writelane_b32 v1, s31, 1
; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v1, 1
; GFX10-NEXT:    v_readlane_b32 s30, v1, 0
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    v_readlane_b32 s33, v1, 2
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_i1:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v1, s32 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v1, s33, 2
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, return_i1@gotpcrel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, return_i1@gotpcrel32@hi+12
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    v_writelane_b32 v1, s30, 0
; GFX11-NEXT:    v_writelane_b32 v1, s31, 1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v1, 1
; GFX11-NEXT:    v_readlane_b32 s30, v1, 0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    v_readlane_b32 s33, v1, 2
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v1, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx i1 @return_i1()
  ret void
}

define amdgpu_gfx i16 @return_i16() #0 {
; GFX9-LABEL: return_i16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v0, 10
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: return_i16:
; GFX10PLUS:       ; %bb.0: ; %entry
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 10
; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
entry:
  ret i16 10
}

define amdgpu_gfx void @call_i16() #0 {
; GFX9-LABEL: call_i16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v1, s33, 2
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, return_i16@gotpcrel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, return_i16@gotpcrel32@hi+12
; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX9-NEXT:    v_writelane_b32 v1, s30, 0
; GFX9-NEXT:    v_writelane_b32 v1, s31, 1
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v1, 1
; GFX9-NEXT:    v_readlane_b32 s30, v1, 0
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    v_readlane_b32 s33, v1, 2
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_i16:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v1, s33, 2
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, return_i16@gotpcrel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, return_i16@gotpcrel32@hi+12
; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX10-NEXT:    v_writelane_b32 v1, s30, 0
; GFX10-NEXT:    v_writelane_b32 v1, s31, 1
; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v1, 1
; GFX10-NEXT:    v_readlane_b32 s30, v1, 0
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    v_readlane_b32 s33, v1, 2
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_i16:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v1, s32 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v1, s33, 2
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, return_i16@gotpcrel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, return_i16@gotpcrel32@hi+12
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    v_writelane_b32 v1, s30, 0
; GFX11-NEXT:    v_writelane_b32 v1, s31, 1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v1, 1
; GFX11-NEXT:    v_readlane_b32 s30, v1, 0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    v_readlane_b32 s33, v1, 2
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v1, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx i16 @return_i16()
  ret void
}

define amdgpu_gfx <2 x i16> @return_2xi16() #0 {
; GFX9-LABEL: return_2xi16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v0, 0x20001
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: return_2xi16:
; GFX10PLUS:       ; %bb.0: ; %entry
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 0x20001
; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
entry:
  ret <2 x i16> <i16 1, i16 2>
}

define amdgpu_gfx void @call_2xi16() #0 {
; GFX9-LABEL: call_2xi16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v1, s33, 2
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, return_2xi16@gotpcrel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, return_2xi16@gotpcrel32@hi+12
; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX9-NEXT:    v_writelane_b32 v1, s30, 0
; GFX9-NEXT:    v_writelane_b32 v1, s31, 1
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v1, 1
; GFX9-NEXT:    v_readlane_b32 s30, v1, 0
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    v_readlane_b32 s33, v1, 2
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_2xi16:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v1, s33, 2
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, return_2xi16@gotpcrel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, return_2xi16@gotpcrel32@hi+12
; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX10-NEXT:    v_writelane_b32 v1, s30, 0
; GFX10-NEXT:    v_writelane_b32 v1, s31, 1
; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v1, 1
; GFX10-NEXT:    v_readlane_b32 s30, v1, 0
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    v_readlane_b32 s33, v1, 2
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_2xi16:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v1, s32 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v1, s33, 2
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, return_2xi16@gotpcrel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, return_2xi16@gotpcrel32@hi+12
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    v_writelane_b32 v1, s30, 0
; GFX11-NEXT:    v_writelane_b32 v1, s31, 1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v1, 1
; GFX11-NEXT:    v_readlane_b32 s30, v1, 0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    v_readlane_b32 s33, v1, 2
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v1, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx <2 x i16> @return_2xi16()
  ret void
}

define amdgpu_gfx <3 x i16> @return_3xi16() #0 {
; GFX9-LABEL: return_3xi16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v0, 0x20001
; GFX9-NEXT:    v_mov_b32_e32 v1, 3
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: return_3xi16:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    v_mov_b32_e32 v0, 0x20001
; GFX10-NEXT:    v_mov_b32_e32 v1, 3
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: return_3xi16:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  ret <3 x i16> <i16 1, i16 2, i16 3>
}

define amdgpu_gfx void @call_3xi16() #0 {
; GFX9-LABEL: call_3xi16:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v2, s33, 2
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, return_3xi16@gotpcrel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, return_3xi16@gotpcrel32@hi+12
; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX9-NEXT:    v_writelane_b32 v2, s30, 0
; GFX9-NEXT:    v_writelane_b32 v2, s31, 1
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v2, 1
; GFX9-NEXT:    v_readlane_b32 s30, v2, 0
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    v_readlane_b32 s33, v2, 2
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_3xi16:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v2, s33, 2
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, return_3xi16@gotpcrel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, return_3xi16@gotpcrel32@hi+12
; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX10-NEXT:    v_writelane_b32 v2, s30, 0
; GFX10-NEXT:    v_writelane_b32 v2, s31, 1
; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v2, 1
; GFX10-NEXT:    v_readlane_b32 s30, v2, 0
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    v_readlane_b32 s33, v2, 2
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_3xi16:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v2, s32 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v2, s33, 2
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, return_3xi16@gotpcrel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, return_3xi16@gotpcrel32@hi+12
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    v_writelane_b32 v2, s30, 0
; GFX11-NEXT:    v_writelane_b32 v2, s31, 1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v2, 1
; GFX11-NEXT:    v_readlane_b32 s30, v2, 0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    v_readlane_b32 s33, v2, 2
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v2, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx <3 x i16> @return_3xi16()
  ret void
}

; Check that return values that do not fit in registers do not crash

define amdgpu_gfx <512 x i32> @return_512xi32() #0 {
; GFX9-LABEL: return_512xi32:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_mov_b32_e32 v1, 0
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1020
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2044
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2040
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2036
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2032
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2028
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2024
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2020
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2016
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2012
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2008
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2004
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2000
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1996
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1992
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1988
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1984
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1980
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1976
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1972
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1968
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1964
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1960
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1956
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1952
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1948
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1944
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1940
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1936
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1932
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1928
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1924
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1920
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1916
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1912
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1908
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1904
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1900
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1896
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1892
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1888
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1884
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1880
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1876
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1872
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1868
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1864
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1860
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1856
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1852
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1848
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1844
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1840
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1836
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1832
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1828
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1824
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1820
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1816
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1812
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1808
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1804
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1800
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1796
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1792
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1788
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1784
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1780
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1776
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1772
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1768
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1764
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1760
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1756
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1752
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1748
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1744
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1740
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1736
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1732
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1728
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1724
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1720
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1716
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1712
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1708
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1704
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1700
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1696
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1692
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1688
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1684
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1680
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1676
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1672
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1668
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1664
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1660
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1656
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1652
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1648
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1644
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1640
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1636
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1632
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1628
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1624
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1620
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1616
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1612
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1608
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1604
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1600
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1596
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1592
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1588
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1584
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1580
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1576
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1572
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1568
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1564
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1560
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1556
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1552
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1548
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1544
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1540
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1536
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1532
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1528
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1524
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1520
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1516
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1512
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1508
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1504
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1500
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1496
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1492
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1488
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1484
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1480
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1476
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1472
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1468
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1464
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1460
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1456
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1452
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1448
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1444
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1440
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1436
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1432
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1428
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1424
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1420
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1416
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1412
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1408
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1404
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1400
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1396
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1392
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1388
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1384
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1380
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1376
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1372
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1368
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1364
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1360
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1356
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1352
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1348
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1344
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1340
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1336
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1332
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1328
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1324
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1320
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1316
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1312
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1308
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1304
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1300
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1296
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1292
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1288
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1284
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1280
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1276
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1272
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1268
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1264
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1260
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1256
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1252
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1248
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1244
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1240
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1236
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1232
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1228
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1224
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1220
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1216
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1212
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1208
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1204
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1200
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1196
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1192
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1188
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1184
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1180
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1176
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1172
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1168
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1164
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1160
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1156
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1152
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1148
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1144
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1140
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1136
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1132
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1128
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1124
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1120
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1116
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1112
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1108
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1104
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1100
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1096
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1092
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1088
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1084
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1080
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1076
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1072
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1068
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1064
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1060
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1056
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1052
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1048
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1044
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1040
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1036
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1032
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1028
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1024
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1016
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1012
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1008
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1004
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1000
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:996
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:992
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:988
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:984
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:980
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:976
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:972
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:968
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:964
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:960
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:956
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:952
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:948
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:944
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:940
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:936
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:932
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:928
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:924
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:920
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:916
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:912
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:908
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:904
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:900
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:896
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:892
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:888
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:884
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:880
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:876
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:872
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:868
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:864
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:860
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:856
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:852
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:848
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:844
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:840
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:836
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:832
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:828
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:824
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:820
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:816
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:812
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:808
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:804
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:800
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:796
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:792
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:788
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:784
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:780
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:776
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:772
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:768
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:764
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:760
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:756
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:752
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:748
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:744
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:740
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:736
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:732
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:728
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:724
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:720
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:716
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:712
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:708
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:704
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:700
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:696
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:692
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:688
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:684
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:680
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:676
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:672
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:668
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:664
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:660
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:656
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:652
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:648
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:644
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:640
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:636
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:632
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:628
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:624
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:620
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:616
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:612
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:608
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:604
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:600
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:596
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:592
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:588
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:584
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:580
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:576
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:572
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:568
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:564
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:560
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:556
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:552
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:548
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:544
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:540
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:536
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:532
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:528
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:524
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:520
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:516
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:512
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:508
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:504
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:500
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:496
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:492
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:488
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:484
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:480
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:476
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:472
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:468
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:464
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:460
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:456
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:452
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:448
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:444
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:440
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:436
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:432
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:428
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:424
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:420
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:416
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:412
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:408
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:404
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:400
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:396
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:392
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:388
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:384
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:380
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:376
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:372
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:368
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:364
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:360
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:356
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:352
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:348
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:344
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:340
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:336
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:332
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:328
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:324
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:320
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:316
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:312
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:308
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:304
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:300
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:296
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:292
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:288
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:284
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:280
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:276
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:272
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:268
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:264
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:260
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:256
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:252
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:248
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:244
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:240
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:236
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:232
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:228
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:224
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:220
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:216
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:212
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:208
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:204
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:200
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:196
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:192
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:188
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:184
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:180
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:176
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:172
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:168
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:164
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:160
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:156
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:152
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:148
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:144
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:140
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:136
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:132
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:128
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:124
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:120
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:116
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:112
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:108
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:104
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:100
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:96
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:92
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:88
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:84
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:80
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:76
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:72
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:68
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:64
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:60
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:56
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:52
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:48
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:44
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:40
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:36
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:32
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:28
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:24
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:20
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:8
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: return_512xi32:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    v_mov_b32_e32 v1, 0
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1020
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2044
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2040
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2036
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2032
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2028
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2024
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2020
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2016
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2012
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2008
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2004
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2000
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1996
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1992
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1988
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1984
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1980
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1976
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1972
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1968
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1964
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1960
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1956
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1952
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1948
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1944
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1940
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1936
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1932
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1928
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1924
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1920
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1916
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1912
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1908
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1904
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1900
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1896
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1892
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1888
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1884
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1880
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1876
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1872
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1868
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1864
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1860
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1856
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1852
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1848
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1844
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1840
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1836
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1832
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1828
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1824
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1820
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1816
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1812
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1808
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1804
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1800
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1796
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1792
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1788
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1784
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1780
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1776
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1772
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1768
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1764
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1760
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1756
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1752
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1748
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1744
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1740
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1736
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1732
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1728
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1724
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1720
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1716
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1712
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1708
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1704
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1700
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1696
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1692
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1688
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1684
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1680
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1676
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1672
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1668
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1664
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1660
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1656
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1652
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1648
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1644
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1640
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1636
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1632
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1628
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1624
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1620
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1616
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1612
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1608
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1604
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1600
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1596
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1592
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1588
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1584
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1580
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1576
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1572
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1568
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1564
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1560
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1556
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1552
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1548
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1544
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1540
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1536
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1532
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1528
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1524
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1520
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1516
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1512
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1508
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1504
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1500
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1496
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1492
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1488
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1484
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1480
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1476
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1472
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1468
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1464
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1460
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1456
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1452
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1448
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1444
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1440
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1436
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1432
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1428
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1424
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1420
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1416
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1412
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1408
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1404
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1400
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1396
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1392
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1388
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1384
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1380
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1376
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1372
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1368
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1364
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1360
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1356
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1352
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1348
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1344
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1340
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1336
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1332
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1328
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1324
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1320
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1316
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1312
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1308
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1304
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1300
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1296
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1292
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1288
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1284
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1280
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1276
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1272
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1268
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1264
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1260
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1256
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1252
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1248
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1244
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1240
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1236
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1232
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1228
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1224
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1220
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1216
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1212
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1208
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1204
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1200
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1196
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1192
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1188
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1184
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1180
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1176
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1172
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1168
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1164
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1160
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1156
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1152
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1148
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1144
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1140
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1136
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1132
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1128
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1124
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1120
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1116
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1112
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1108
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1104
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1100
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1096
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1092
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1088
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1084
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1080
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1076
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1072
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1068
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1064
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1060
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1056
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1052
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1048
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1044
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1040
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1036
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1032
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1028
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1024
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1016
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1012
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1008
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1004
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:1000
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:996
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:992
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:988
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:984
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:980
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:976
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:972
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:968
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:964
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:960
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:956
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:952
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:948
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:944
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:940
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:936
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:932
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:928
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:924
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:920
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:916
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:912
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:908
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:904
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:900
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:896
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:892
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:888
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:884
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:880
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:876
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:872
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:868
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:864
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:860
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:856
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:852
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:848
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:844
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:840
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:836
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:832
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:828
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:824
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:820
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:816
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:812
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:808
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:804
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:800
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:796
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:792
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:788
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:784
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:780
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:776
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:772
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:768
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:764
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:760
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:756
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:752
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:748
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:744
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:740
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:736
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:732
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:728
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:724
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:720
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:716
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:712
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:708
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:704
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:700
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:696
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:692
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:688
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:684
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:680
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:676
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:672
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:668
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:664
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:660
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:656
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:652
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:648
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:644
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:640
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:636
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:632
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:628
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:624
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:620
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:616
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:612
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:608
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:604
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:600
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:596
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:592
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:588
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:584
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:580
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:576
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:572
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:568
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:564
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:560
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:556
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:552
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:548
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:544
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:540
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:536
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:532
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:528
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:524
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:520
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:516
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:512
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:508
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:504
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:500
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:496
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:492
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:488
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:484
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:480
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:476
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:472
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:468
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:464
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:460
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:456
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:452
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:448
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:444
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:440
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:436
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:432
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:428
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:424
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:420
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:416
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:412
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:408
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:404
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:400
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:396
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:392
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:388
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:384
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:380
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:376
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:372
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:368
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:364
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:360
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:356
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:352
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:348
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:344
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:340
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:336
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:332
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:328
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:324
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:320
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:316
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:312
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:308
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:304
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:300
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:296
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:292
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:288
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:284
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:280
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:276
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:272
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:268
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:264
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:260
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:256
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:252
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:248
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:244
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:240
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:236
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:232
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:228
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:224
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:220
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:216
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:212
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:208
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:204
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:200
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:196
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:192
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:188
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:184
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:180
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:176
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:172
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:168
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:164
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:160
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:156
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:152
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:148
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:144
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:140
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:136
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:132
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:128
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:124
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:120
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:116
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:112
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:108
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:104
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:100
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:96
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:92
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:88
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:84
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:80
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:76
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:72
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:68
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:64
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:60
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:56
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:52
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:48
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:44
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:40
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:36
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:32
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:28
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:24
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:20
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:8
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
; GFX10-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: return_512xi32:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_mov_b32 s0, 0
; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX11-NEXT:    s_mov_b32 s3, s0
; GFX11-NEXT:    s_mov_b32 s1, s0
; GFX11-NEXT:    s_mov_b32 s2, s0
; GFX11-NEXT:    v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2
; GFX11-NEXT:    v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT:    s_clause 0x3e
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:2032
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:2016
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:2000
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1984
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1968
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1952
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1936
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1920
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1904
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1888
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1872
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1856
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1840
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1824
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1808
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1792
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1776
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1760
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1744
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1728
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1712
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1696
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1680
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1664
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1648
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1632
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1616
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1600
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1584
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1568
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1552
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1536
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1520
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1504
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1488
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1472
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1456
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1440
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1424
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1408
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1392
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1376
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1360
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1344
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1328
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1312
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1296
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1280
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1264
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1248
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1232
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1216
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1200
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1184
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1168
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1152
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1136
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1120
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1104
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1088
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1072
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1056
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1040
; GFX11-NEXT:    s_clause 0x3e
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1024
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:1008
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:992
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:976
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:960
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:944
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:928
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:912
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:896
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:880
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:864
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:848
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:832
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:816
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:800
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:784
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:768
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:752
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:736
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:720
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:704
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:688
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:672
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:656
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:640
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:624
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:608
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:592
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:576
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:560
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:544
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:528
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:512
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:496
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:480
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:464
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:448
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:432
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:416
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:400
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:384
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:368
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:352
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:336
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:320
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:304
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:288
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:272
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:256
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:240
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:224
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:208
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:192
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:176
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:160
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:144
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:128
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:112
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:96
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:80
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:64
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:48
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:32
; GFX11-NEXT:    s_clause 0x1
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:16
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  ret <512 x i32> zeroinitializer
}

define amdgpu_gfx void @call_512xi32() #0 {
; GFX9-LABEL: call_512xi32:
; GFX9:       ; %bb.0: ; %entry
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v2, s33, 2
; GFX9-NEXT:    s_add_i32 s33, s32, 0x1ffc0
; GFX9-NEXT:    s_and_b32 s33, s33, 0xfffe0000
; GFX9-NEXT:    s_add_i32 s32, s32, 0x60000
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, return_512xi32@gotpcrel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, return_512xi32@gotpcrel32@hi+12
; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX9-NEXT:    v_writelane_b32 v2, s30, 0
; GFX9-NEXT:    v_lshrrev_b32_e64 v0, 6, s33
; GFX9-NEXT:    v_writelane_b32 v2, s31, 1
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v2, 1
; GFX9-NEXT:    v_readlane_b32 s30, v2, 0
; GFX9-NEXT:    s_add_i32 s32, s32, 0xfffa0000
; GFX9-NEXT:    v_readlane_b32 s33, v2, 2
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: call_512xi32:
; GFX10:       ; %bb.0: ; %entry
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v2, s33, 2
; GFX10-NEXT:    s_add_i32 s33, s32, 0xffe0
; GFX10-NEXT:    s_add_i32 s32, s32, 0x30000
; GFX10-NEXT:    s_and_b32 s33, s33, 0xffff0000
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, return_512xi32@gotpcrel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, return_512xi32@gotpcrel32@hi+12
; GFX10-NEXT:    v_writelane_b32 v2, s30, 0
; GFX10-NEXT:    s_load_dwordx2 s[34:35], s[34:35], 0x0
; GFX10-NEXT:    v_lshrrev_b32_e64 v0, 5, s33
; GFX10-NEXT:    v_writelane_b32 v2, s31, 1
; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v2, 1
; GFX10-NEXT:    v_readlane_b32 s30, v2, 0
; GFX10-NEXT:    s_add_i32 s32, s32, 0xfffd0000
; GFX10-NEXT:    v_readlane_b32 s33, v2, 2
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: call_512xi32:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v5, s32 offset:2048 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v5, s33, 2
; GFX11-NEXT:    s_add_i32 s33, s32, 0x7ff
; GFX11-NEXT:    s_addk_i32 s32, 0x1800
; GFX11-NEXT:    s_and_b32 s33, s33, 0xfffff800
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, return_512xi32@gotpcrel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, return_512xi32@gotpcrel32@hi+12
; GFX11-NEXT:    v_writelane_b32 v5, s30, 0
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    v_mov_b32_e32 v0, s33
; GFX11-NEXT:    v_writelane_b32 v5, s31, 1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v5, 1
; GFX11-NEXT:    v_readlane_b32 s30, v5, 0
; GFX11-NEXT:    s_addk_i32 s32, 0xe800
; GFX11-NEXT:    v_readlane_b32 s33, v5, 2
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v5, off, s32 offset:2048 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
entry:
  call amdgpu_gfx <512 x i32> @return_512xi32()
  ret void
}

attributes #0 = { nounwind }