; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; Test the localizer did something and we don't materialize all ; constants in SGPRs in the entry block. define amdgpu_kernel void @localize_constants(i1 %cond) { ; GFX9-LABEL: localize_constants: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0 ; GFX9-NEXT: s_mov_b32 s0, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_xor_b32 s1, s1, -1 ; GFX9-NEXT: s_and_b32 s1, s1, 1 ; GFX9-NEXT: s_cmp_lg_u32 s1, 0 ; GFX9-NEXT: s_cbranch_scc0 .LBB0_2 ; GFX9-NEXT: ; %bb.1: ; %bb1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6 ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7 ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8 ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8 ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7 ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: .LBB0_2: ; %Flow ; GFX9-NEXT: s_xor_b32 s0, s0, -1 ; GFX9-NEXT: s_and_b32 s0, s0, 1 ; GFX9-NEXT: s_cmp_lg_u32 s0, 0 ; GFX9-NEXT: s_cbranch_scc1 .LBB0_4 ; GFX9-NEXT: ; %bb.3: ; %bb0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8 ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7 ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8 ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7 ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6 ; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: .LBB0_4: ; %bb2 ; GFX9-NEXT: s_endpgm entry: br i1 %cond, label %bb0, label %bb1 bb0: store volatile i32 123, i32 addrspace(1)* undef store volatile i32 456, i32 addrspace(1)* undef store volatile i32 999, i32 addrspace(1)* undef store volatile i32 1000, i32 addrspace(1)* undef store volatile i32 455, i32 addrspace(1)* undef store volatile i32 23526, i32 addrspace(1)* undef br label %bb2 bb1: store volatile i32 23526, i32 addrspace(1)* undef store volatile i32 455, i32 addrspace(1)* undef store volatile i32 1000, i32 addrspace(1)* undef store volatile i32 456, i32 addrspace(1)* undef store volatile i32 999, i32 addrspace(1)* undef store volatile i32 123, i32 addrspace(1)* undef br label %bb2 bb2: ret void } ; FIXME: These aren't localized because thesee were legalized before ; the localizer, and are no longer G_GLOBAL_VALUE. @gv0 = addrspace(1) global i32 undef, align 4 @gv1 = addrspace(1) global i32 undef, align 4 @gv2 = addrspace(1) global i32 undef, align 4 @gv3 = addrspace(1) global i32 undef, align 4 define amdgpu_kernel void @localize_globals(i1 %cond) { ; GFX9-LABEL: localize_globals: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0 ; GFX9-NEXT: s_mov_b32 s0, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_xor_b32 s1, s1, -1 ; GFX9-NEXT: s_and_b32 s1, s1, 1 ; GFX9-NEXT: s_cmp_lg_u32 s1, 0 ; GFX9-NEXT: s_cbranch_scc0 .LBB1_2 ; GFX9-NEXT: ; %bb.1: ; %bb1 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, gv2@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, gv2@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, gv3@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, gv3@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 1 ; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dword v0, v0, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: .LBB1_2: ; %Flow ; GFX9-NEXT: s_xor_b32 s0, s0, -1 ; GFX9-NEXT: s_and_b32 s0, s0, 1 ; GFX9-NEXT: s_cmp_lg_u32 s0, 0 ; GFX9-NEXT: s_cbranch_scc1 .LBB1_4 ; GFX9-NEXT: ; %bb.3: ; %bb0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, gv0@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, gv0@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_getpc_b64 s[2:3] ; GFX9-NEXT: s_add_u32 s2, s2, gv1@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s3, s3, gv1@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dword v0, v0, s[0:1] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v0, v1, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: .LBB1_4: ; %bb2 ; GFX9-NEXT: s_endpgm entry: br i1 %cond, label %bb0, label %bb1 bb0: store volatile i32 0, i32 addrspace(1)* @gv0 store volatile i32 1, i32 addrspace(1)* @gv1 br label %bb2 bb1: store volatile i32 0, i32 addrspace(1)* @gv2 store volatile i32 1, i32 addrspace(1)* @gv3 br label %bb2 bb2: ret void } @static.gv0 = internal addrspace(1) global i32 undef, align 4 @static.gv1 = internal addrspace(1) global i32 undef, align 4 @static.gv2 = internal addrspace(1) global i32 undef, align 4 @static.gv3 = internal addrspace(1) global i32 undef, align 4 define void @localize_internal_globals(i1 %cond) { ; GFX9-LABEL: localize_internal_globals: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX9-NEXT: s_xor_b64 s[4:5], vcc, -1 ; GFX9-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[6:7] ; GFX9-NEXT: s_cbranch_execnz .LBB2_3 ; GFX9-NEXT: ; %bb.1: ; %Flow ; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] ; GFX9-NEXT: s_cbranch_execnz .LBB2_4 ; GFX9-NEXT: .LBB2_2: ; %bb2 ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX9-NEXT: .LBB2_3: ; %bb1 ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, static.gv2@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s7, s7, static.gv2@rel32@hi+12 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: global_store_dword v0, v0, s[6:7] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, static.gv3@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s7, s7, static.gv3@rel32@hi+12 ; GFX9-NEXT: v_mov_b32_e32 v1, 1 ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] ; GFX9-NEXT: s_cbranch_execz .LBB2_2 ; GFX9-NEXT: .LBB2_4: ; %bb0 ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, static.gv0@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s7, s7, static.gv0@rel32@hi+12 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: global_store_dword v0, v0, s[6:7] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_getpc_b64 s[6:7] ; GFX9-NEXT: s_add_u32 s6, s6, static.gv1@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s7, s7, static.gv1@rel32@hi+12 ; GFX9-NEXT: v_mov_b32_e32 v1, 1 ; GFX9-NEXT: global_store_dword v0, v1, s[6:7] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] entry: br i1 %cond, label %bb0, label %bb1 bb0: store volatile i32 0, i32 addrspace(1)* @static.gv0 store volatile i32 1, i32 addrspace(1)* @static.gv1 br label %bb2 bb1: store volatile i32 0, i32 addrspace(1)* @static.gv2 store volatile i32 1, i32 addrspace(1)* @static.gv3 br label %bb2 bb2: ret void } ; This would crash from using the wrong insert point define void @sink_null_insert_pt(i32 addrspace(4)* %arg0) { ; GFX9-LABEL: sink_null_insert_pt: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_or_saveexec_b64 s[16:17], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[16:17] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dword v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] entry: %load0 = load volatile i32, i32 addrspace(1)* null, align 4 br label %bb1 bb1: call void null() ret void }