; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 -verify-machineinstrs | FileCheck %s ; ; Test stack clash protection probing for static allocas. ; Small: one probe. define i32 @fun0() #0 { ; CHECK-LABEL: fun0: ; CHECK: # %bb.0: ; CHECK-NEXT: aghi %r15, -560 ; CHECK-NEXT: .cfi_def_cfa_offset 720 ; CHECK-NEXT: cg %r0, 552(%r15) ; CHECK-NEXT: mvhi 552(%r15), 1 ; CHECK-NEXT: l %r2, 160(%r15) ; CHECK-NEXT: aghi %r15, 560 ; CHECK-NEXT: br %r14 %a = alloca i32, i64 100 %b = getelementptr inbounds i32, i32* %a, i64 98 store volatile i32 1, i32* %b %c = load volatile i32, i32* %a ret i32 %c } ; Medium: two probes. define i32 @fun1() #0 { ; CHECK-LABEL: fun1: ; CHECK: # %bb.0: ; CHECK-NEXT: aghi %r15, -4096 ; CHECK-NEXT: .cfi_def_cfa_offset 4256 ; CHECK-NEXT: cg %r0, 4088(%r15) ; CHECK-NEXT: aghi %r15, -4080 ; CHECK-NEXT: .cfi_def_cfa_offset 8336 ; CHECK-NEXT: cg %r0, 4072(%r15) ; CHECK-NEXT: mvhi 976(%r15), 1 ; CHECK-NEXT: l %r2, 176(%r15) ; CHECK-NEXT: aghi %r15, 8176 ; CHECK-NEXT: br %r14 %a = alloca i32, i64 2000 %b = getelementptr inbounds i32, i32* %a, i64 200 store volatile i32 1, i32* %b %c = load volatile i32, i32* %a ret i32 %c } ; Large: Use a loop to allocate and probe in steps. define i32 @fun2() #0 { ; CHECK-LABEL: fun2: ; CHECK: # %bb.0: ; CHECK-NEXT: lgr %r0, %r15 ; CHECK-NEXT: .cfi_def_cfa_register %r0 ; CHECK-NEXT: agfi %r0, -69632 ; CHECK-NEXT: .cfi_def_cfa_offset 69792 ; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: aghi %r15, -4096 ; CHECK-NEXT: cg %r0, 4088(%r15) ; CHECK-NEXT: clgrjh %r15, %r0, .LBB2_1 ; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: .cfi_def_cfa_register %r15 ; CHECK-NEXT: aghi %r15, -2544 ; CHECK-NEXT: .cfi_def_cfa_offset 72336 ; CHECK-NEXT: cg %r0, 2536(%r15) ; CHECK-NEXT: lhi %r0, 1 ; CHECK-NEXT: mvhi 568(%r15), 1 ; CHECK-NEXT: sty %r0, 28968(%r15) ; CHECK-NEXT: l %r2, 176(%r15) ; CHECK-NEXT: agfi %r15, 72176 ; CHECK-NEXT: br %r14 %a = alloca i32, i64 18000 %b0 = getelementptr inbounds i32, i32* %a, i64 98 %b1 = getelementptr inbounds i32, i32* %a, i64 7198 store volatile i32 1, i32* %b0 store volatile i32 1, i32* %b1 %c = load volatile i32, i32* %a ret i32 %c } ; Ends evenly on the step so no remainder needed. define void @fun3() #0 { ; CHECK-LABEL: fun3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lgr %r0, %r15 ; CHECK-NEXT: .cfi_def_cfa_register %r0 ; CHECK-NEXT: aghi %r0, -28672 ; CHECK-NEXT: .cfi_def_cfa_offset 28832 ; CHECK-NEXT: .LBB3_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: aghi %r15, -4096 ; CHECK-NEXT: cg %r0, 4088(%r15) ; CHECK-NEXT: clgrjh %r15, %r0, .LBB3_1 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: .cfi_def_cfa_register %r15 ; CHECK-NEXT: mvhi 180(%r15), 0 ; CHECK-NEXT: l %r0, 180(%r15) ; CHECK-NEXT: aghi %r15, 28672 ; CHECK-NEXT: br %r14 entry: %stack = alloca [7122 x i32], align 4 %i = alloca i32, align 4 %0 = bitcast [7122 x i32]* %stack to i8* %i.0.i.0..sroa_cast = bitcast i32* %i to i8* store volatile i32 0, i32* %i, align 4 %i.0.i.0.6 = load volatile i32, i32* %i, align 4 ret void } ; Loop with bigger step. define void @fun4() #0 "stack-probe-size"="8192" { ; CHECK-LABEL: fun4: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lgr %r0, %r15 ; CHECK-NEXT: .cfi_def_cfa_register %r0 ; CHECK-NEXT: aghi %r0, -24576 ; CHECK-NEXT: .cfi_def_cfa_offset 24736 ; CHECK-NEXT: .LBB4_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: aghi %r15, -8192 ; CHECK-NEXT: cg %r0, 8184(%r15) ; CHECK-NEXT: clgrjh %r15, %r0, .LBB4_1 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: .cfi_def_cfa_register %r15 ; CHECK-NEXT: aghi %r15, -7608 ; CHECK-NEXT: .cfi_def_cfa_offset 32344 ; CHECK-NEXT: cg %r0, 7600(%r15) ; CHECK-NEXT: mvhi 180(%r15), 0 ; CHECK-NEXT: l %r0, 180(%r15) ; CHECK-NEXT: aghi %r15, 32184 ; CHECK-NEXT: br %r14 entry: %stack = alloca [8000 x i32], align 4 %i = alloca i32, align 4 %0 = bitcast [8000 x i32]* %stack to i8* %i.0.i.0..sroa_cast = bitcast i32* %i to i8* store volatile i32 0, i32* %i, align 4 %i.0.i.0.6 = load volatile i32, i32* %i, align 4 ret void } ; Probe size should be modulo stack alignment. define void @fun5() #0 "stack-probe-size"="4100" { ; CHECK-LABEL: fun5: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: aghi %r15, -4096 ; CHECK-NEXT: .cfi_def_cfa_offset 4256 ; CHECK-NEXT: cg %r0, 4088(%r15) ; CHECK-NEXT: aghi %r15, -88 ; CHECK-NEXT: .cfi_def_cfa_offset 4344 ; CHECK-NEXT: cg %r0, 80(%r15) ; CHECK-NEXT: mvhi 180(%r15), 0 ; CHECK-NEXT: l %r0, 180(%r15) ; CHECK-NEXT: aghi %r15, 4184 ; CHECK-NEXT: br %r14 entry: %stack = alloca [1000 x i32], align 4 %i = alloca i32, align 4 %0 = bitcast [1000 x i32]* %stack to i8* %i.0.i.0..sroa_cast = bitcast i32* %i to i8* store volatile i32 0, i32* %i, align 4 %i.0.i.0.6 = load volatile i32, i32* %i, align 4 ret void } ; The minimum probe size is the stack alignment. define void @fun6() #0 "stack-probe-size"="5" { ; CHECK-LABEL: fun6: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lgr %r0, %r15 ; CHECK-NEXT: .cfi_def_cfa_register %r0 ; CHECK-NEXT: aghi %r0, -4184 ; CHECK-NEXT: .cfi_def_cfa_offset 4344 ; CHECK-NEXT: .LBB6_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: aghi %r15, -8 ; CHECK-NEXT: cg %r0, 0(%r15) ; CHECK-NEXT: clgrjh %r15, %r0, .LBB6_1 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: .cfi_def_cfa_register %r15 ; CHECK-NEXT: mvhi 180(%r15), 0 ; CHECK-NEXT: l %r0, 180(%r15) ; CHECK-NEXT: aghi %r15, 4184 ; CHECK-NEXT: br %r14 entry: %stack = alloca [1000 x i32], align 4 %i = alloca i32, align 4 %0 = bitcast [1000 x i32]* %stack to i8* %i.0.i.0..sroa_cast = bitcast i32* %i to i8* store volatile i32 0, i32* %i, align 4 %i.0.i.0.6 = load volatile i32, i32* %i, align 4 ret void } ; Small with a natural probe (STMG) - needs no extra probe. define i32 @fun7() #0 { ; CHECK-LABEL: fun7: ; CHECK: # %bb.0: ; CHECK-NEXT: stmg %r14, %r15, 112(%r15) ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: aghi %r15, -3976 ; CHECK-NEXT: .cfi_def_cfa_offset 4136 ; CHECK-NEXT: brasl %r14, foo@PLT ; CHECK-NEXT: st %r2, 568(%r15) ; CHECK-NEXT: l %r2, 176(%r15) ; CHECK-NEXT: lmg %r14, %r15, 4088(%r15) ; CHECK-NEXT: br %r14 %v = call i32 @foo() %a = alloca i32, i64 950 %b = getelementptr inbounds i32, i32* %a, i64 98 store volatile i32 %v, i32* %b %c = load volatile i32, i32* %a ret i32 %c } ; Medium with an STMG - still needs probing. define i32 @fun8() #0 { ; CHECK-LABEL: fun8: ; CHECK: # %bb.0: ; CHECK-NEXT: stmg %r14, %r15, 112(%r15) ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: aghi %r15, -3984 ; CHECK-NEXT: .cfi_def_cfa_offset 4144 ; CHECK-NEXT: cg %r0, 3976(%r15) ; CHECK-NEXT: brasl %r14, foo@PLT ; CHECK-NEXT: st %r2, 976(%r15) ; CHECK-NEXT: l %r2, 176(%r15) ; CHECK-NEXT: lmg %r14, %r15, 4096(%r15) ; CHECK-NEXT: br %r14 %v = call i32 @foo() %a = alloca i32, i64 952 %b = getelementptr inbounds i32, i32* %a, i64 200 store volatile i32 %v, i32* %b %c = load volatile i32, i32* %a ret i32 %c } define void @fun9() #0 "backchain" { ; CHECK-LABEL: fun9: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lgr %r1, %r15 ; CHECK-NEXT: lgr %r0, %r15 ; CHECK-NEXT: .cfi_def_cfa_register %r0 ; CHECK-NEXT: aghi %r0, -28672 ; CHECK-NEXT: .cfi_def_cfa_offset 28832 ; CHECK-NEXT: .LBB9_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: aghi %r15, -4096 ; CHECK-NEXT: cg %r0, 4088(%r15) ; CHECK-NEXT: clgrjh %r15, %r0, .LBB9_1 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: .cfi_def_cfa_register %r15 ; CHECK-NEXT: stg %r1, 0(%r15) ; CHECK-NEXT: mvhi 180(%r15), 0 ; CHECK-NEXT: l %r0, 180(%r15) ; CHECK-NEXT: aghi %r15, 28672 ; CHECK-NEXT: br %r14 entry: %stack = alloca [7122 x i32], align 4 %i = alloca i32, align 4 %0 = bitcast [7122 x i32]* %stack to i8* %i.0.i.0..sroa_cast = bitcast i32* %i to i8* store volatile i32 0, i32* %i, align 4 %i.0.i.0.6 = load volatile i32, i32* %i, align 4 ret void } declare i32 @foo() attributes #0 = { "probe-stack"="inline-asm" }