; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX900 ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX90A ; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX940 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX10 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX900-GISEL declare void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr, i32 %size, i32 %offset, i32 %aux) define amdgpu_ps void @global_load_lds_dword_vaddr(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr) { ; GFX900-LABEL: global_load_lds_dword_vaddr: ; GFX900: ; %bb.0: ; %main_body ; GFX900-NEXT: v_readfirstlane_b32 s0, v2 ; GFX900-NEXT: s_mov_b32 m0, s0 ; GFX900-NEXT: s_nop 0 ; GFX900-NEXT: global_load_dword v[0:1], off offset:16 glc lds ; GFX900-NEXT: s_endpgm ; ; GFX90A-LABEL: global_load_lds_dword_vaddr: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: v_readfirstlane_b32 s0, v2 ; GFX90A-NEXT: s_mov_b32 m0, s0 ; GFX90A-NEXT: s_nop 0 ; GFX90A-NEXT: global_load_dword v[0:1], off offset:16 glc lds ; GFX90A-NEXT: s_endpgm ; ; GFX940-LABEL: global_load_lds_dword_vaddr: ; GFX940: ; %bb.0: ; %main_body ; GFX940-NEXT: v_readfirstlane_b32 s0, v2 ; GFX940-NEXT: s_mov_b32 m0, s0 ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: global_load_lds_dword v[0:1], off offset:16 sc0 ; GFX940-NEXT: s_endpgm ; ; GFX10-LABEL: global_load_lds_dword_vaddr: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 ; GFX10-NEXT: s_mov_b32 m0, s0 ; GFX10-NEXT: global_load_dword v[0:1], off offset:16 glc lds ; GFX10-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: global_load_lds_dword_vaddr: ; GFX900-GISEL: ; %bb.0: ; %main_body ; GFX900-GISEL-NEXT: v_readfirstlane_b32 m0, v2 ; GFX900-GISEL-NEXT: s_nop 4 ; GFX900-GISEL-NEXT: global_load_dword v[0:1], off offset:16 glc lds ; GFX900-GISEL-NEXT: s_endpgm main_body: call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 4, i32 16, i32 1) ret void } define amdgpu_ps void @global_load_lds_dword_saddr(i8 addrspace(1)* nocapture inreg %gptr, i8 addrspace(3)* nocapture %lptr) { ; GFX900-LABEL: global_load_lds_dword_saddr: ; GFX900: ; %bb.0: ; %main_body ; GFX900-NEXT: v_readfirstlane_b32 s2, v0 ; GFX900-NEXT: v_mov_b32_e32 v1, 0 ; GFX900-NEXT: s_mov_b32 m0, s2 ; GFX900-NEXT: s_nop 0 ; GFX900-NEXT: global_load_dword v1, s[0:1] offset:32 slc lds ; GFX900-NEXT: s_endpgm ; ; GFX90A-LABEL: global_load_lds_dword_saddr: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: v_readfirstlane_b32 s2, v0 ; GFX90A-NEXT: v_mov_b32_e32 v1, 0 ; GFX90A-NEXT: s_mov_b32 m0, s2 ; GFX90A-NEXT: s_nop 0 ; GFX90A-NEXT: global_load_dword v1, s[0:1] offset:32 slc lds ; GFX90A-NEXT: s_endpgm ; ; GFX940-LABEL: global_load_lds_dword_saddr: ; GFX940: ; %bb.0: ; %main_body ; GFX940-NEXT: v_readfirstlane_b32 s2, v0 ; GFX940-NEXT: v_mov_b32_e32 v1, 0 ; GFX940-NEXT: s_mov_b32 m0, s2 ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: global_load_lds_dword v1, s[0:1] offset:32 nt ; GFX940-NEXT: s_endpgm ; ; GFX10-LABEL: global_load_lds_dword_saddr: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_readfirstlane_b32 s2, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_mov_b32 m0, s2 ; GFX10-NEXT: global_load_dword v0, s[0:1] offset:32 slc lds ; GFX10-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: global_load_lds_dword_saddr: ; GFX900-GISEL: ; %bb.0: ; %main_body ; GFX900-GISEL-NEXT: v_readfirstlane_b32 m0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-GISEL-NEXT: s_nop 3 ; GFX900-GISEL-NEXT: global_load_dword v0, s[0:1] offset:32 slc lds ; GFX900-GISEL-NEXT: s_endpgm main_body: call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 4, i32 32, i32 2) ret void } define amdgpu_ps void @global_load_lds_dword_saddr_and_vaddr(i8 addrspace(1)* nocapture inreg %gptr, i8 addrspace(3)* nocapture %lptr, i32 %voffset) { ; GFX900-LABEL: global_load_lds_dword_saddr_and_vaddr: ; GFX900: ; %bb.0: ; %main_body ; GFX900-NEXT: v_readfirstlane_b32 s2, v0 ; GFX900-NEXT: s_mov_b32 m0, s2 ; GFX900-NEXT: s_nop 0 ; GFX900-NEXT: global_load_dword v1, s[0:1] offset:48 lds ; GFX900-NEXT: s_endpgm ; ; GFX90A-LABEL: global_load_lds_dword_saddr_and_vaddr: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: v_readfirstlane_b32 s2, v0 ; GFX90A-NEXT: s_mov_b32 m0, s2 ; GFX90A-NEXT: s_nop 0 ; GFX90A-NEXT: global_load_dword v1, s[0:1] offset:48 scc lds ; GFX90A-NEXT: s_endpgm ; ; GFX940-LABEL: global_load_lds_dword_saddr_and_vaddr: ; GFX940: ; %bb.0: ; %main_body ; GFX940-NEXT: v_readfirstlane_b32 s2, v0 ; GFX940-NEXT: s_mov_b32 m0, s2 ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1 ; GFX940-NEXT: s_endpgm ; ; GFX10-LABEL: global_load_lds_dword_saddr_and_vaddr: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_readfirstlane_b32 s2, v0 ; GFX10-NEXT: s_mov_b32 m0, s2 ; GFX10-NEXT: global_load_dword v1, s[0:1] offset:48 lds ; GFX10-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: global_load_lds_dword_saddr_and_vaddr: ; GFX900-GISEL: ; %bb.0: ; %main_body ; GFX900-GISEL-NEXT: v_readfirstlane_b32 m0, v0 ; GFX900-GISEL-NEXT: s_nop 4 ; GFX900-GISEL-NEXT: global_load_dword v1, s[0:1] offset:48 lds ; GFX900-GISEL-NEXT: s_endpgm main_body: %voffset.64 = zext i32 %voffset to i64 %gep = getelementptr i8, i8 addrspace(1)* %gptr, i64 %voffset.64 call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gep, i8 addrspace(3)* %lptr, i32 4, i32 48, i32 16) ret void } define amdgpu_ps void @global_load_lds_ushort_vaddr(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr) { ; GFX900-LABEL: global_load_lds_ushort_vaddr: ; GFX900: ; %bb.0: ; %main_body ; GFX900-NEXT: v_readfirstlane_b32 s0, v2 ; GFX900-NEXT: s_mov_b32 m0, s0 ; GFX900-NEXT: s_nop 0 ; GFX900-NEXT: global_load_ushort v[0:1], off lds ; GFX900-NEXT: s_endpgm ; ; GFX90A-LABEL: global_load_lds_ushort_vaddr: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: v_readfirstlane_b32 s0, v2 ; GFX90A-NEXT: s_mov_b32 m0, s0 ; GFX90A-NEXT: s_nop 0 ; GFX90A-NEXT: global_load_ushort v[0:1], off lds ; GFX90A-NEXT: s_endpgm ; ; GFX940-LABEL: global_load_lds_ushort_vaddr: ; GFX940: ; %bb.0: ; %main_body ; GFX940-NEXT: v_readfirstlane_b32 s0, v2 ; GFX940-NEXT: s_mov_b32 m0, s0 ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: global_load_lds_ushort v[0:1], off ; GFX940-NEXT: s_endpgm ; ; GFX10-LABEL: global_load_lds_ushort_vaddr: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 ; GFX10-NEXT: s_mov_b32 m0, s0 ; GFX10-NEXT: global_load_ushort v[0:1], off dlc lds ; GFX10-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: global_load_lds_ushort_vaddr: ; GFX900-GISEL: ; %bb.0: ; %main_body ; GFX900-GISEL-NEXT: v_readfirstlane_b32 m0, v2 ; GFX900-GISEL-NEXT: s_nop 4 ; GFX900-GISEL-NEXT: global_load_ushort v[0:1], off lds ; GFX900-GISEL-NEXT: s_endpgm main_body: call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 2, i32 0, i32 4) ret void } define amdgpu_ps void @global_load_lds_ubyte_vaddr(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr) { ; GFX900-LABEL: global_load_lds_ubyte_vaddr: ; GFX900: ; %bb.0: ; %main_body ; GFX900-NEXT: v_readfirstlane_b32 s0, v2 ; GFX900-NEXT: s_mov_b32 m0, s0 ; GFX900-NEXT: s_nop 0 ; GFX900-NEXT: global_load_ubyte v[0:1], off lds ; GFX900-NEXT: s_endpgm ; ; GFX90A-LABEL: global_load_lds_ubyte_vaddr: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: v_readfirstlane_b32 s0, v2 ; GFX90A-NEXT: s_mov_b32 m0, s0 ; GFX90A-NEXT: s_nop 0 ; GFX90A-NEXT: global_load_ubyte v[0:1], off lds ; GFX90A-NEXT: s_endpgm ; ; GFX940-LABEL: global_load_lds_ubyte_vaddr: ; GFX940: ; %bb.0: ; %main_body ; GFX940-NEXT: v_readfirstlane_b32 s0, v2 ; GFX940-NEXT: s_mov_b32 m0, s0 ; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: global_load_lds_ubyte v[0:1], off ; GFX940-NEXT: s_endpgm ; ; GFX10-LABEL: global_load_lds_ubyte_vaddr: ; GFX10: ; %bb.0: ; %main_body ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 ; GFX10-NEXT: s_mov_b32 m0, s0 ; GFX10-NEXT: global_load_ubyte v[0:1], off lds ; GFX10-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: global_load_lds_ubyte_vaddr: ; GFX900-GISEL: ; %bb.0: ; %main_body ; GFX900-GISEL-NEXT: v_readfirstlane_b32 m0, v2 ; GFX900-GISEL-NEXT: s_nop 4 ; GFX900-GISEL-NEXT: global_load_ubyte v[0:1], off lds ; GFX900-GISEL-NEXT: s_endpgm main_body: call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 1, i32 0, i32 0) ret void }