; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s @gds0 = internal addrspace(2) global [4 x i32] undef, align 4 @lds0 = internal addrspace(3) global [4 x i32] undef, align 128 @lds1 = internal addrspace(3) global [4 x i32] undef, align 256 ; These two objects should be allocated at the same constant offsets ; from the base. define amdgpu_kernel void @alloc_lds_gds(i32 addrspace(1)* %out) #1 { ; GCN-LABEL: alloc_lds_gds: ; GCN: ; %bb.0: ; GCN-NEXT: v_mov_b32_e32 v0, 5 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: s_mov_b32 m0, 16 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:12 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_endpgm %gep.gds = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds0, i32 0, i32 3 %val0 = atomicrmw add i32 addrspace(2)* %gep.gds, i32 5 acq_rel %gep.lds = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds0, i32 0, i32 3 %val1 = atomicrmw add i32 addrspace(3)* %gep.lds, i32 5 acq_rel ret void } ; The LDS alignment shouldn't change offset of GDS. define amdgpu_kernel void @alloc_lds_gds_align(i32 addrspace(1)* %out) #1 { ; GCN-LABEL: alloc_lds_gds_align: ; GCN: ; %bb.0: ; GCN-NEXT: v_mov_b32_e32 v0, 5 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: s_mov_b32 m0, 16 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:140 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:12 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_endpgm %gep.gds = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds0, i32 0, i32 3 %val0 = atomicrmw add i32 addrspace(2)* %gep.gds, i32 5 acq_rel %gep.lds0 = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds0, i32 0, i32 3 %val1 = atomicrmw add i32 addrspace(3)* %gep.lds0, i32 5 acq_rel %gep.lds1 = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds1, i32 0, i32 3 %val2 = atomicrmw add i32 addrspace(3)* %gep.lds1, i32 5 acq_rel ret void } @gds_align8 = internal addrspace(2) global [4 x i32] undef, align 8 @gds_align32 = internal addrspace(2) global [4 x i32] undef, align 32 define amdgpu_kernel void @gds_global_align(i32 addrspace(1)* %out) { ; GCN-LABEL: gds_global_align: ; GCN: ; %bb.0: ; GCN-NEXT: v_mov_b32_e32 v0, 5 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: s_mov_b32 m0, 32 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:28 gds ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 ; GCN-NEXT: s_endpgm %gep.gds0 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align8, i32 0, i32 3 %val0 = atomicrmw add i32 addrspace(2)* %gep.gds0, i32 5 acq_rel %gep.gds1 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align32, i32 0, i32 3 %val1 = atomicrmw add i32 addrspace(2)* %gep.gds1, i32 5 acq_rel ret void } define amdgpu_kernel void @gds_global_align_plus_attr(i32 addrspace(1)* %out) #0 { ; GCN-LABEL: gds_global_align_plus_attr: ; GCN: ; %bb.0: ; GCN-NEXT: v_mov_b32_e32 v0, 5 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: s_movk_i32 m0, 0x420 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:1052 gds ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:1036 gds ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 ; GCN-NEXT: s_endpgm %gep.gds0 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align8, i32 0, i32 3 %val0 = atomicrmw add i32 addrspace(2)* %gep.gds0, i32 5 acq_rel %gep.gds1 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align32, i32 0, i32 3 %val1 = atomicrmw add i32 addrspace(2)* %gep.gds1, i32 5 acq_rel ret void } @small.gds = internal addrspace(2) global i8 undef, align 1 @gds.external = external unnamed_addr addrspace(3) global [0 x i32], align 4 define amdgpu_kernel void @gds_extern_align(i32 addrspace(1)* %out, [4 x i32] addrspace(2)* %gds.arg) #0 { ; GCN-LABEL: gds_extern_align: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s0, s[0:1], 0x8 ; GCN-NEXT: v_mov_b32_e32 v0, 5 ; GCN-NEXT: s_movk_i32 m0, 0x401 ; GCN-NEXT: s_movk_i32 s1, 0x400 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s1 ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: buffer_wbinvl1 ; GCN-NEXT: s_endpgm call void asm sideeffect "; use $0","s"(i8 addrspace(2)* @small.gds) %gep.gds0 = getelementptr [4 x i32], [4 x i32] addrspace(2)* %gds.arg, i32 0, i32 3 %val0 = atomicrmw add i32 addrspace(2)* %gep.gds0, i32 5 acq_rel ret void } attributes #0 = { "amdgpu-gds-size"="1024" }