; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s ; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s declare amdgpu_gfx float @extern_func(float) #0 declare amdgpu_gfx float @extern_func_many_args(<64 x float>) #0 @funcptr = external hidden unnamed_addr addrspace(4) constant void()*, align 4 define amdgpu_gfx float @no_stack(float %arg0) #0 { %add = fadd float %arg0, 1.0 ret float %add } define amdgpu_gfx float @simple_stack(float %arg0) #0 { %stack = alloca float, i32 4, align 4, addrspace(5) store volatile float 2.0, float addrspace(5)* %stack %val = load volatile float, float addrspace(5)* %stack %add = fadd float %arg0, %val ret float %add } define amdgpu_gfx float @multiple_stack(float %arg0) #0 { %stack = alloca float, i32 4, align 4, addrspace(5) store volatile float 2.0, float addrspace(5)* %stack %val = load volatile float, float addrspace(5)* %stack %add = fadd float %arg0, %val %stack2 = alloca float, i32 4, align 4, addrspace(5) store volatile float 2.0, float addrspace(5)* %stack2 %val2 = load volatile float, float addrspace(5)* %stack2 %add2 = fadd float %add, %val2 ret float %add2 } define amdgpu_gfx float @dynamic_stack(float %arg0) #0 { bb0: %cmp = fcmp ogt float %arg0, 0.0 br i1 %cmp, label %bb1, label %bb2 bb1: %stack = alloca float, i32 4, align 4, addrspace(5) store volatile float 2.0, float addrspace(5)* %stack %val = load volatile float, float addrspace(5)* %stack %add = fadd float %arg0, %val br label %bb2 bb2: %res = phi float [ 0.0, %bb0 ], [ %add, %bb1 ] ret float %res } define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 { bb0: br label %bb1 bb1: %ctr = phi i32 [ 0, %bb0 ], [ %newctr, %bb1 ] %stack = alloca float, i32 4, align 4, addrspace(5) store volatile float 2.0, float addrspace(5)* %stack %val = load volatile float, float addrspace(5)* %stack %add = fadd float %arg0, %val %cmp = icmp sgt i32 %ctr, 0 %newctr = sub i32 %ctr, 1 br i1 %cmp, label %bb1, label %bb2 bb2: ret float %add } define amdgpu_gfx float @no_stack_call(float %arg0) #0 { %res = call amdgpu_gfx float @simple_stack(float %arg0) ret float %res } define amdgpu_gfx float @simple_stack_call(float %arg0) #0 { %stack = alloca float, i32 4, align 4, addrspace(5) store volatile float 2.0, float addrspace(5)* %stack %val = load volatile float, float addrspace(5)* %stack %res = call amdgpu_gfx float @simple_stack(float %arg0) %add = fadd float %res, %val ret float %add } define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 { %res = call amdgpu_gfx float @extern_func(float %arg0) ret float %res } define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 { %stack = alloca float, i32 4, align 4, addrspace(5) store volatile float 2.0, float addrspace(5)* %stack %val = load volatile float, float addrspace(5)* %stack %res = call amdgpu_gfx float @extern_func(float %arg0) %add = fadd float %res, %val ret float %add } define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 { %res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0) ret float %res } define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 { %fptr = load void()*, void()* addrspace(4)* @funcptr call amdgpu_gfx void %fptr() ret float %arg0 } define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 { %stack = alloca float, i32 4, align 4, addrspace(5) store volatile float 2.0, float addrspace(5)* %stack %val = load volatile float, float addrspace(5)* %stack %fptr = load void()*, void()* addrspace(4)* @funcptr call amdgpu_gfx void %fptr() %add = fadd float %arg0, %val ret float %add } define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 { %stack = alloca float, i32 4, align 4, addrspace(5) store volatile float 2.0, float addrspace(5)* %stack %val = load volatile float, float addrspace(5)* %stack %res = call amdgpu_gfx float @simple_stack_recurse(float %arg0) %add = fadd float %res, %val ret float %add } @lds = internal addrspace(3) global [64 x float] undef define amdgpu_gfx float @simple_lds(float %arg0) #0 { %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0 %val = load float, float addrspace(3)* %lds_ptr ret float %val } define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 { %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0 %val = load float, float addrspace(3)* %lds_ptr %res = call amdgpu_gfx float @simple_lds_recurse(float %val) ret float %res } attributes #0 = { nounwind } ; GCN: amdpal.pipelines: ; GCN-NEXT: - .registers: ; GCN-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}} ; GCN-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}} ; GCN-NEXT: .shader_functions: ; GCN-NEXT: dynamic_stack: ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x28{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; SDAG-NEXT: .vgpr_count: 0x2{{$}} ; GISEL-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: dynamic_stack_loop: ; GCN-NEXT: .lds_size: 0{{$}} ; SDAG-NEXT: .sgpr_count: 0x25{{$}} ; GISEL-NEXT: .sgpr_count: 0x26{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; SDAG-NEXT: .vgpr_count: 0x3{{$}} ; GISEL-NEXT: .vgpr_count: 0x4{{$}} ; GCN-NEXT: multiple_stack: ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x21{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}} ; GCN-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: no_stack: ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x20{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}} ; GCN-NEXT: .vgpr_count: 0x1{{$}} ; GCN-NEXT: no_stack_call: ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x24{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; GCN-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: no_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} ; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; GCN-NEXT: .vgpr_count: 0x2b{{$}} ; GCN-NEXT: no_stack_extern_call_many_args: ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} ; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}} ; GCN-NEXT: .vgpr_count: 0x2b{{$}} ; GCN-NEXT: no_stack_indirect_call: ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} ; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; GCN-NEXT: .vgpr_count: 0x2b{{$}} ; GCN-NEXT: simple_lds: ; GCN-NEXT: .lds_size: 0x100{{$}} ; GCN-NEXT: .sgpr_count: 0x20{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}} ; GCN-NEXT: .vgpr_count: 0x1{{$}} ; GCN-NEXT: simple_lds_recurse: ; GCN-NEXT: .lds_size: 0x100{{$}} ; GCN-NEXT: .sgpr_count: 0x26{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; GCN-NEXT: .vgpr_count: 0x29{{$}} ; GCN-NEXT: simple_stack: ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x21{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}} ; GCN-NEXT: .vgpr_count: 0x2{{$}} ; GCN-NEXT: simple_stack_call: ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x24{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} ; GCN-NEXT: .vgpr_count: 0x4{{$}} ; GCN-NEXT: simple_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} ; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} ; GCN-NEXT: .vgpr_count: 0x2b{{$}} ; GCN-NEXT: simple_stack_indirect_call: ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} ; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} ; GCN-NEXT: .vgpr_count: 0x2b{{$}} ; GCN-NEXT: simple_stack_recurse: ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x26{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} ; GCN-NEXT: .vgpr_count: 0x2a{{$}} ; GCN-NEXT: ...