; RUN: llc -verify-machineinstrs < %s -mtriple=thumbv7k-apple-watchos | FileCheck %s declare swifttailcc void @callee_stack0() declare swifttailcc void @callee_stack4([4 x i32], i32) declare swifttailcc void @callee_stack20([4 x i32], [5 x i32]) declare extern_weak swifttailcc void @callee_weak() define swifttailcc void @caller_to0_from0() nounwind { ; CHECK-LABEL: _caller_to0_from0: tail call swifttailcc void @callee_stack0() ret void ; CHECK-NOT: add ; CHECK-NOT: sub ; CHECK: b.w _callee_stack0 } define swifttailcc void @caller_to0_from4([4 x i32], i32) { ; CHECK-LABEL: _caller_to0_from4: tail call swifttailcc void @callee_stack0() ret void ; CHECK: add sp, #16 ; CHECK-NEXT: b.w _callee_stack0 } define swifttailcc void @caller_to4_from0() { ; Key point is that the "42" should go #16 below incoming stack ; pointer (we didn't have arg space to reuse). tail call swifttailcc void @callee_stack4([4 x i32] undef, i32 42) ret void ; CHECK-LABEL: _caller_to4_from0: ; CHECK: sub sp, #16 ; CHECK: movs [[TMP:r[0-9]+]], #42 ; CHECK: str [[TMP]], [sp] ; CHECK-NOT: add sp ; CHECK: b.w _callee_stack4 } define swifttailcc void @caller_to4_from4([4 x i32], i32 %a) { ; CHECK-LABEL: _caller_to4_from4: ; CHECK-NOT: sub sp ; Key point is that the "%a" should go where at SP on entry. tail call swifttailcc void @callee_stack4([4 x i32] undef, i32 42) ret void ; CHECK: str {{r[0-9]+}}, [sp] ; CHECK-NOT: add sp ; CHECK-NEXT: b.w _callee_stack4 } define swifttailcc void @caller_to20_from4([4 x i32], i32 %a) { ; CHECK-LABEL: _caller_to20_from4: ; CHECK: sub sp, #16 ; Important point is that the call reuses the "dead" argument space ; above %a on the stack. If it tries to go below incoming-SP then the ; _callee will not deallocate the space, even in swifttailcc. tail call swifttailcc void @callee_stack20([4 x i32] undef, [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5]) ; CHECK: str {{.*}}, [sp] ; CHECK: str {{.*}}, [sp, #4] ; CHECK: str {{.*}}, [sp, #8] ; CHECK: str {{.*}}, [sp, #12] ; CHECK: str {{.*}}, [sp, #16] ; CHECK-NOT: add sp ; CHECK-NOT: sub sp ; CHECK: b.w _callee_stack20 ret void } define swifttailcc void @caller_to4_from24([4 x i32], i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: _caller_to4_from24: ; Key point is that the "%a" should go where at #16 above SP on entry. tail call swifttailcc void @callee_stack4([4 x i32] undef, i32 42) ret void ; CHECK: str {{.*}}, [sp, #16] ; CHECK: add sp, #16 ; CHECK-NEXT: b.w _callee_stack4 } define swifttailcc void @caller_to20_from20([4 x i32], [5 x i32] %a) { ; CHECK-LABEL: _caller_to20_from20: ; CHECK-NOT: add sp, ; CHECK-NOT: sub sp, ; Here we want to make sure that both loads happen before the stores: ; otherwise either %a or %b.w will be wrongly clobbered. tail call swifttailcc void @callee_stack20([4 x i32] undef, [5 x i32] %a) ret void ; If these ever get interleaved make sure aliasing slots don't clobber each ; other. ; CHECK: ldrd {{.*}}, {{.*}}, [sp, #12] ; CHECK: ldm.w sp, ; CHECK: stm.w ; CHECK: strd ; CHECK-NEXT: b.w _callee_stack20 } define swifttailcc void @disable_tail_calls() nounwind "disable-tail-calls"="true" { ; CHECK-LABEL: disable_tail_calls: tail call swifttailcc void @callee_stack0() ret void ; CHECK: bl _callee_stack0 ; CHECK: ret } define swifttailcc void @normal_ret_with_stack([4 x i32], i32 %a) { ; CHECK: _normal_ret_with_stack: ; CHECK: add sp, #16 ; CHECK: bx lr ret void } declare { [2 x float] } @get_vec2() define void @fromC_totail() { ; COMMON-LABEL: fromC_totail: ; COMMON: puch {r4, lr} ; COMMON: sub sp, #8 ; COMMON-NOT: sub sp, ; COMMON: movs [[TMP:r[0-9]+]], #42 ; COMMON: str [[TMP]], [sp] ; COMMON: bl _callee_stack4 ; We must reset the stack to where it was before the call by undoing its extra stack pop. ; COMMON: sub sp, #16 ; COMMON: str [[TMP]], [sp] ; COMMON: bl callee_stack4 ; COMMON: sub sp, #16 call swifttailcc void @callee_stack4([4 x i32] undef, i32 42) call swifttailcc void @callee_stack4([4 x i32] undef, i32 42) ret void } define void @fromC_totail_noreservedframe(i32 %len) { ; COMMON-LABEL: fromC_totail_noreservedframe: ; COMMON: sub.w sp, sp, r{{.*}} ; COMMON: movs [[TMP:r[0-9]+]], #42 ; Note stack is subtracted here to allocate space for arg ; COMMON: sub.w sp, #16 ; COMMON: str [[TMP]], [sp] ; COMMON: bl _callee_stack4 ; And here. ; COMMON: sub sp, #16 ; COMMON: str [[TMP]], [sp] ; COMMON: bl _callee_stack4 ; But not restored here because callee_stack8 did that for us. ; COMMON-NOT: sub sp, ; Variable sized allocation prevents reserving frame at start of function so each call must allocate any stack space it needs. %var = alloca i32, i32 %len call swifttailcc void @callee_stack4([4 x i32] undef, i32 42) call swifttailcc void @callee_stack4([4 x i32] undef, i32 42) ret void } declare void @Ccallee_stack4([4 x i32], i32) define swifttailcc void @fromtail_toC() { ; COMMON-LABEL: fromtail_toC: ; COMMON: push {r4, lr} ; COMMON: sub sp, #8 ; COMMON-NOT: sub sp, ; COMMON: movs [[TMP:r[0-9]+]], #42 ; COMMON: str [[TMP]], [sp] ; COMMON: bl _Ccallee_stack4 ; C callees will return with the stack exactly where we left it, so we mustn't try to fix anything. ; COMMON-NOT: add sp, ; COMMON-NOT: sub sp, ; COMMON: str [[TMP]], [sp]{{$}} ; COMMON: bl _Ccallee_stack4 ; COMMON-NOT: sub sp, call void @Ccallee_stack4([4 x i32] undef, i32 42) call void @Ccallee_stack4([4 x i32] undef, i32 42) ret void } declare swifttailcc i8* @SwiftSelf(i8 * swiftasync %context, i8* swiftself %closure) define swiftcc i8* @CallSwiftSelf(i8* swiftself %closure, i8* %context) { ; CHECK-LABEL: CallSwiftSelf: ; CHECK: push{{.*}}r10 %res = call swifttailcc i8* @SwiftSelf(i8 * swiftasync %context, i8* swiftself %closure) ret i8* %res }