; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=GFX8V3 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=GFX8V4 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=GFX8V5 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefixes=GFX9V3 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=GFX9V4 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=GFX9V5 %s define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) { ; GFX8V3-LABEL: addrspacecast: ; GFX8V3: ; %bb.0: ; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; GFX8V3-NEXT: s_load_dword s3, s[4:5], 0x44 ; GFX8V3-NEXT: s_load_dword s5, s[4:5], 0x40 ; GFX8V3-NEXT: v_mov_b32_e32 v2, 1 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V3-NEXT: s_mov_b32 s2, s0 ; GFX8V3-NEXT: s_cmp_lg_u32 s0, -1 ; GFX8V3-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 ; GFX8V3-NEXT: s_mov_b32 s4, s1 ; GFX8V3-NEXT: s_cmp_lg_u32 s1, -1 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s2 ; GFX8V3-NEXT: s_cselect_b64 s[0:1], s[4:5], 0 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s3 ; GFX8V3-NEXT: flat_store_dword v[0:1], v2 ; GFX8V3-NEXT: s_waitcnt vmcnt(0) ; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V3-NEXT: v_mov_b32_e32 v2, 2 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s1 ; GFX8V3-NEXT: flat_store_dword v[0:1], v2 ; GFX8V3-NEXT: s_waitcnt vmcnt(0) ; GFX8V3-NEXT: s_endpgm ; ; GFX8V4-LABEL: addrspacecast: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; GFX8V4-NEXT: s_load_dword s3, s[4:5], 0x44 ; GFX8V4-NEXT: s_load_dword s5, s[4:5], 0x40 ; GFX8V4-NEXT: v_mov_b32_e32 v2, 1 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V4-NEXT: s_mov_b32 s2, s0 ; GFX8V4-NEXT: s_cmp_lg_u32 s0, -1 ; GFX8V4-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 ; GFX8V4-NEXT: s_mov_b32 s4, s1 ; GFX8V4-NEXT: s_cmp_lg_u32 s1, -1 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s2 ; GFX8V4-NEXT: s_cselect_b64 s[0:1], s[4:5], 0 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s3 ; GFX8V4-NEXT: flat_store_dword v[0:1], v2 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V4-NEXT: v_mov_b32_e32 v2, 2 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s1 ; GFX8V4-NEXT: flat_store_dword v[0:1], v2 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: s_endpgm ; ; GFX8V5-LABEL: addrspacecast: ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8V5-NEXT: s_load_dword s3, s[4:5], 0xc8 ; GFX8V5-NEXT: s_load_dword s5, s[4:5], 0xcc ; GFX8V5-NEXT: v_mov_b32_e32 v2, 1 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: s_mov_b32 s2, s0 ; GFX8V5-NEXT: s_cmp_lg_u32 s0, -1 ; GFX8V5-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 ; GFX8V5-NEXT: s_mov_b32 s4, s1 ; GFX8V5-NEXT: s_cmp_lg_u32 s1, -1 ; GFX8V5-NEXT: v_mov_b32_e32 v0, s2 ; GFX8V5-NEXT: s_cselect_b64 s[0:1], s[4:5], 0 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s3 ; GFX8V5-NEXT: flat_store_dword v[0:1], v2 ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V5-NEXT: v_mov_b32_e32 v2, 2 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s1 ; GFX8V5-NEXT: flat_store_dword v[0:1], v2 ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; ; GFX9V3-LABEL: addrspacecast: ; GFX9V3: ; %bb.0: ; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9V3-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16) ; GFX9V3-NEXT: s_lshl_b32 s3, s2, 16 ; GFX9V3-NEXT: s_getreg_b32 s4, hwreg(HW_REG_SH_MEM_BASES, 16, 16) ; GFX9V3-NEXT: v_mov_b32_e32 v2, 1 ; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V3-NEXT: s_mov_b32 s2, s0 ; GFX9V3-NEXT: s_cmp_lg_u32 s0, -1 ; GFX9V3-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 ; GFX9V3-NEXT: s_lshl_b32 s5, s4, 16 ; GFX9V3-NEXT: s_mov_b32 s4, s1 ; GFX9V3-NEXT: s_cmp_lg_u32 s1, -1 ; GFX9V3-NEXT: v_mov_b32_e32 v0, s2 ; GFX9V3-NEXT: s_cselect_b64 s[0:1], s[4:5], 0 ; GFX9V3-NEXT: v_mov_b32_e32 v1, s3 ; GFX9V3-NEXT: flat_store_dword v[0:1], v2 ; GFX9V3-NEXT: s_waitcnt vmcnt(0) ; GFX9V3-NEXT: v_mov_b32_e32 v0, s0 ; GFX9V3-NEXT: v_mov_b32_e32 v2, 2 ; GFX9V3-NEXT: v_mov_b32_e32 v1, s1 ; GFX9V3-NEXT: flat_store_dword v[0:1], v2 ; GFX9V3-NEXT: s_waitcnt vmcnt(0) ; GFX9V3-NEXT: s_endpgm ; ; GFX9V4-LABEL: addrspacecast: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9V4-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16) ; GFX9V4-NEXT: s_lshl_b32 s3, s2, 16 ; GFX9V4-NEXT: s_getreg_b32 s4, hwreg(HW_REG_SH_MEM_BASES, 16, 16) ; GFX9V4-NEXT: v_mov_b32_e32 v2, 1 ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V4-NEXT: s_mov_b32 s2, s0 ; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1 ; GFX9V4-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 ; GFX9V4-NEXT: s_lshl_b32 s5, s4, 16 ; GFX9V4-NEXT: s_mov_b32 s4, s1 ; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1 ; GFX9V4-NEXT: v_mov_b32_e32 v0, s2 ; GFX9V4-NEXT: s_cselect_b64 s[0:1], s[4:5], 0 ; GFX9V4-NEXT: v_mov_b32_e32 v1, s3 ; GFX9V4-NEXT: flat_store_dword v[0:1], v2 ; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: v_mov_b32_e32 v0, s0 ; GFX9V4-NEXT: v_mov_b32_e32 v2, 2 ; GFX9V4-NEXT: v_mov_b32_e32 v1, s1 ; GFX9V4-NEXT: flat_store_dword v[0:1], v2 ; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: s_endpgm ; ; GFX9V5-LABEL: addrspacecast: ; GFX9V5: ; %bb.0: ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9V5-NEXT: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES, 0, 16) ; GFX9V5-NEXT: s_lshl_b32 s3, s2, 16 ; GFX9V5-NEXT: s_getreg_b32 s4, hwreg(HW_REG_SH_MEM_BASES, 16, 16) ; GFX9V5-NEXT: v_mov_b32_e32 v2, 1 ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V5-NEXT: s_mov_b32 s2, s0 ; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1 ; GFX9V5-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 ; GFX9V5-NEXT: s_lshl_b32 s5, s4, 16 ; GFX9V5-NEXT: s_mov_b32 s4, s1 ; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1 ; GFX9V5-NEXT: v_mov_b32_e32 v0, s2 ; GFX9V5-NEXT: s_cselect_b64 s[0:1], s[4:5], 0 ; GFX9V5-NEXT: v_mov_b32_e32 v1, s3 ; GFX9V5-NEXT: flat_store_dword v[0:1], v2 ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: v_mov_b32_e32 v0, s0 ; GFX9V5-NEXT: v_mov_b32_e32 v2, 2 ; GFX9V5-NEXT: v_mov_b32_e32 v1, s1 ; GFX9V5-NEXT: flat_store_dword v[0:1], v2 ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: s_endpgm %flat.private = addrspacecast i32 addrspace(5)* %ptr.private to i32* %flat.local = addrspacecast i32 addrspace(3)* %ptr.local to i32* store volatile i32 1, i32* %flat.private store volatile i32 2, i32* %flat.local ret void } define amdgpu_kernel void @llvm_amdgcn_is_shared(i8* %ptr) { ; GFX8V3-LABEL: llvm_amdgcn_is_shared: ; GFX8V3: ; %bb.0: ; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x40 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0 ; GFX8V3-NEXT: s_cselect_b32 s0, 1, 0 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V3-NEXT: flat_store_dword v[0:1], v0 ; GFX8V3-NEXT: s_waitcnt vmcnt(0) ; GFX8V3-NEXT: s_endpgm ; ; GFX8V4-LABEL: llvm_amdgcn_is_shared: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x40 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0 ; GFX8V4-NEXT: s_cselect_b32 s0, 1, 0 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V4-NEXT: flat_store_dword v[0:1], v0 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: s_endpgm ; ; GFX8V5-LABEL: llvm_amdgcn_is_shared: ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: s_load_dword s0, s[4:5], 0xcc ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0 ; GFX8V5-NEXT: s_cselect_b32 s0, 1, 0 ; GFX8V5-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V5-NEXT: flat_store_dword v[0:1], v0 ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; ; GFX9V3-LABEL: llvm_amdgcn_is_shared: ; GFX9V3: ; %bb.0: ; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V3-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16) ; GFX9V3-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9V3-NEXT: s_cmp_eq_u32 s1, s0 ; GFX9V3-NEXT: s_cselect_b32 s0, 1, 0 ; GFX9V3-NEXT: v_mov_b32_e32 v0, s0 ; GFX9V3-NEXT: global_store_dword v[0:1], v0, off ; GFX9V3-NEXT: s_waitcnt vmcnt(0) ; GFX9V3-NEXT: s_endpgm ; ; GFX9V4-LABEL: llvm_amdgcn_is_shared: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V4-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16) ; GFX9V4-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9V4-NEXT: s_cmp_eq_u32 s1, s0 ; GFX9V4-NEXT: s_cselect_b32 s0, 1, 0 ; GFX9V4-NEXT: v_mov_b32_e32 v0, s0 ; GFX9V4-NEXT: global_store_dword v[0:1], v0, off ; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: s_endpgm ; ; GFX9V5-LABEL: llvm_amdgcn_is_shared: ; GFX9V5: ; %bb.0: ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V5-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16) ; GFX9V5-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9V5-NEXT: s_cmp_eq_u32 s1, s0 ; GFX9V5-NEXT: s_cselect_b32 s0, 1, 0 ; GFX9V5-NEXT: v_mov_b32_e32 v0, s0 ; GFX9V5-NEXT: global_store_dword v[0:1], v0, off ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: s_endpgm %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr) %zext = zext i1 %is.shared to i32 store volatile i32 %zext, i32 addrspace(1)* undef ret void } define amdgpu_kernel void @llvm_amdgcn_is_private(i8* %ptr) { ; GFX8V3-LABEL: llvm_amdgcn_is_private: ; GFX8V3: ; %bb.0: ; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x44 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0 ; GFX8V3-NEXT: s_cselect_b32 s0, 1, 0 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V3-NEXT: flat_store_dword v[0:1], v0 ; GFX8V3-NEXT: s_waitcnt vmcnt(0) ; GFX8V3-NEXT: s_endpgm ; ; GFX8V4-LABEL: llvm_amdgcn_is_private: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x44 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0 ; GFX8V4-NEXT: s_cselect_b32 s0, 1, 0 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V4-NEXT: flat_store_dword v[0:1], v0 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: s_endpgm ; ; GFX8V5-LABEL: llvm_amdgcn_is_private: ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: s_load_dword s0, s[4:5], 0xc8 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0 ; GFX8V5-NEXT: s_cselect_b32 s0, 1, 0 ; GFX8V5-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V5-NEXT: flat_store_dword v[0:1], v0 ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; ; GFX9V3-LABEL: llvm_amdgcn_is_private: ; GFX9V3: ; %bb.0: ; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V3-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16) ; GFX9V3-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9V3-NEXT: s_cmp_eq_u32 s1, s0 ; GFX9V3-NEXT: s_cselect_b32 s0, 1, 0 ; GFX9V3-NEXT: v_mov_b32_e32 v0, s0 ; GFX9V3-NEXT: global_store_dword v[0:1], v0, off ; GFX9V3-NEXT: s_waitcnt vmcnt(0) ; GFX9V3-NEXT: s_endpgm ; ; GFX9V4-LABEL: llvm_amdgcn_is_private: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V4-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16) ; GFX9V4-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9V4-NEXT: s_cmp_eq_u32 s1, s0 ; GFX9V4-NEXT: s_cselect_b32 s0, 1, 0 ; GFX9V4-NEXT: v_mov_b32_e32 v0, s0 ; GFX9V4-NEXT: global_store_dword v[0:1], v0, off ; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: s_endpgm ; ; GFX9V5-LABEL: llvm_amdgcn_is_private: ; GFX9V5: ; %bb.0: ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V5-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16) ; GFX9V5-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9V5-NEXT: s_cmp_eq_u32 s1, s0 ; GFX9V5-NEXT: s_cselect_b32 s0, 1, 0 ; GFX9V5-NEXT: v_mov_b32_e32 v0, s0 ; GFX9V5-NEXT: global_store_dword v[0:1], v0, off ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: s_endpgm %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr) %zext = zext i1 %is.private to i32 store volatile i32 %zext, i32 addrspace(1)* undef ret void } define amdgpu_kernel void @llvm_trap() { ; GFX8V3-LABEL: llvm_trap: ; GFX8V3: ; %bb.0: ; GFX8V3-NEXT: s_mov_b64 s[0:1], s[4:5] ; GFX8V3-NEXT: s_trap 2 ; ; GFX8V4-LABEL: llvm_trap: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_mov_b64 s[0:1], s[4:5] ; GFX8V4-NEXT: s_trap 2 ; ; GFX8V5-LABEL: llvm_trap: ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xc8 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: s_trap 2 ; ; GFX9V3-LABEL: llvm_trap: ; GFX9V3: ; %bb.0: ; GFX9V3-NEXT: s_mov_b64 s[0:1], s[4:5] ; GFX9V3-NEXT: s_trap 2 ; ; GFX9V4-LABEL: llvm_trap: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_trap 2 ; ; GFX9V5-LABEL: llvm_trap: ; GFX9V5: ; %bb.0: ; GFX9V5-NEXT: s_trap 2 call void @llvm.trap() unreachable } define amdgpu_kernel void @llvm_debugtrap() { ; GFX8V3-LABEL: llvm_debugtrap: ; GFX8V3: ; %bb.0: ; GFX8V3-NEXT: s_trap 3 ; ; GFX8V4-LABEL: llvm_debugtrap: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: s_trap 3 ; ; GFX8V5-LABEL: llvm_debugtrap: ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: s_trap 3 ; ; GFX9V3-LABEL: llvm_debugtrap: ; GFX9V3: ; %bb.0: ; GFX9V3-NEXT: s_trap 3 ; ; GFX9V4-LABEL: llvm_debugtrap: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: s_trap 3 ; ; GFX9V5-LABEL: llvm_debugtrap: ; GFX9V5: ; %bb.0: ; GFX9V5-NEXT: s_trap 3 call void @llvm.debugtrap() unreachable } define amdgpu_kernel void @llvm_amdgcn_queue_ptr(i64 addrspace(1)* %ptr) { ; GFX8V3-LABEL: llvm_amdgcn_queue_ptr: ; GFX8V3: ; %bb.0: ; GFX8V3-NEXT: v_mov_b32_e32 v0, s6 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s7 ; GFX8V3-NEXT: s_add_u32 s0, s8, 8 ; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc ; GFX8V3-NEXT: s_addc_u32 s1, s9, 0 ; GFX8V3-NEXT: s_waitcnt vmcnt(0) ; GFX8V3-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s1 ; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc ; GFX8V3-NEXT: s_waitcnt vmcnt(0) ; GFX8V3-NEXT: v_mov_b32_e32 v0, s4 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s5 ; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc ; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX8V3-NEXT: s_waitcnt vmcnt(0) ; GFX8V3-NEXT: v_mov_b32_e32 v0, s10 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s11 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V3-NEXT: v_mov_b32_e32 v3, s1 ; GFX8V3-NEXT: v_mov_b32_e32 v2, s0 ; GFX8V3-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8V3-NEXT: s_waitcnt vmcnt(0) ; GFX8V3-NEXT: s_endpgm ; ; GFX8V4-LABEL: llvm_amdgcn_queue_ptr: ; GFX8V4: ; %bb.0: ; GFX8V4-NEXT: v_mov_b32_e32 v0, s6 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s7 ; GFX8V4-NEXT: s_add_u32 s0, s8, 8 ; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc ; GFX8V4-NEXT: s_addc_u32 s1, s9, 0 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s1 ; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: v_mov_b32_e32 v0, s4 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s5 ; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: v_mov_b32_e32 v0, s10 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s11 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V4-NEXT: v_mov_b32_e32 v3, s1 ; GFX8V4-NEXT: v_mov_b32_e32 v2, s0 ; GFX8V4-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8V4-NEXT: s_waitcnt vmcnt(0) ; GFX8V4-NEXT: s_endpgm ; ; GFX8V5-LABEL: llvm_amdgcn_queue_ptr: ; GFX8V5: ; %bb.0: ; GFX8V5-NEXT: s_add_u32 s0, s6, 8 ; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc ; GFX8V5-NEXT: s_addc_u32 s1, s7, 0 ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: v_mov_b32_e32 v0, s0 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s1 ; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: v_mov_b32_e32 v0, s4 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s5 ; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: v_mov_b32_e32 v0, s8 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s9 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX8V5-NEXT: v_mov_b32_e32 v3, s1 ; GFX8V5-NEXT: v_mov_b32_e32 v2, s0 ; GFX8V5-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8V5-NEXT: s_waitcnt vmcnt(0) ; GFX8V5-NEXT: s_endpgm ; ; GFX9V3-LABEL: llvm_amdgcn_queue_ptr: ; GFX9V3: ; %bb.0: ; GFX9V3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9V3-NEXT: global_load_ubyte v0, v2, s[6:7] glc ; GFX9V3-NEXT: s_waitcnt vmcnt(0) ; GFX9V3-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc ; GFX9V3-NEXT: s_waitcnt vmcnt(0) ; GFX9V3-NEXT: global_load_ubyte v0, v2, s[4:5] glc ; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX9V3-NEXT: s_waitcnt vmcnt(0) ; GFX9V3-NEXT: v_mov_b32_e32 v0, s10 ; GFX9V3-NEXT: v_mov_b32_e32 v1, s11 ; GFX9V3-NEXT: ; kill: killed $sgpr6_sgpr7 ; GFX9V3-NEXT: ; kill: killed $sgpr4_sgpr5 ; GFX9V3-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V3-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9V3-NEXT: s_waitcnt vmcnt(0) ; GFX9V3-NEXT: s_endpgm ; ; GFX9V4-LABEL: llvm_amdgcn_queue_ptr: ; GFX9V4: ; %bb.0: ; GFX9V4-NEXT: v_mov_b32_e32 v2, 0 ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[6:7] glc ; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc ; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[4:5] glc ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: v_mov_b32_e32 v0, s10 ; GFX9V4-NEXT: v_mov_b32_e32 v1, s11 ; GFX9V4-NEXT: ; kill: killed $sgpr6_sgpr7 ; GFX9V4-NEXT: ; kill: killed $sgpr4_sgpr5 ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V4-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9V4-NEXT: s_waitcnt vmcnt(0) ; GFX9V4-NEXT: s_endpgm ; ; GFX9V5-LABEL: llvm_amdgcn_queue_ptr: ; GFX9V5: ; %bb.0: ; GFX9V5-NEXT: v_mov_b32_e32 v2, 0 ; GFX9V5-NEXT: global_load_ubyte v0, v[0:1], off glc ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: global_load_ubyte v0, v2, s[6:7] offset:8 glc ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: global_load_ubyte v0, v2, s[4:5] glc ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: v_mov_b32_e32 v0, s8 ; GFX9V5-NEXT: v_mov_b32_e32 v1, s9 ; GFX9V5-NEXT: ; kill: killed $sgpr4_sgpr5 ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0) ; GFX9V5-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9V5-NEXT: s_waitcnt vmcnt(0) ; GFX9V5-NEXT: s_endpgm %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() %dispatch.id = call i64 @llvm.amdgcn.dispatch.id() %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr ret void } declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() declare i64 @llvm.amdgcn.dispatch.id() declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() declare i1 @llvm.amdgcn.is.shared(i8*) declare i1 @llvm.amdgcn.is.private(i8*) declare void @llvm.trap() declare void @llvm.debugtrap()