; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc %s -verify-machineinstrs -mtriple aarch64-apple-darwin -global-isel -o - 2>&1 | FileCheck %s ; There are two things we want to test here: ; (1) We can tail call musttail calls. ; (2) We spill and reload all of the arguments around a normal call. declare i32 @musttail_variadic_callee(i32, ...) define i32 @test_musttail_variadic(i32 %arg0, ...) { ; CHECK-LABEL: test_musttail_variadic: ; CHECK: ; %bb.0: ; CHECK-NEXT: b _musttail_variadic_callee %r = musttail call i32 (i32, ...) @musttail_variadic_callee(i32 %arg0, ...) ret i32 %r } declare [2 x i64] @musttail_variadic_aggret_callee(i32 %arg0, ...) define [2 x i64] @test_musttail_variadic_aggret(i32 %arg0, ...) { ; CHECK-LABEL: test_musttail_variadic_aggret: ; CHECK: ; %bb.0: ; CHECK-NEXT: b _musttail_variadic_aggret_callee %r = musttail call [2 x i64] (i32, ...) @musttail_variadic_aggret_callee(i32 %arg0, ...) ret [2 x i64] %r } ; Test musttailing with a normal call in the block. Test that we spill and ; restore, as a normal call will clobber all argument registers. @asdf = internal constant [4 x i8] c"asdf" declare void @puts(i8*) define i32 @test_musttail_variadic_spill(i32 %arg0, ...) { ; CHECK-LABEL: test_musttail_variadic_spill: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #224 ; CHECK-NEXT: .cfi_def_cfa_offset 224 ; CHECK-NEXT: stp x28, x27, [sp, #128] ; 16-byte Folded Spill ; CHECK-NEXT: stp x26, x25, [sp, #144] ; 16-byte Folded Spill ; CHECK-NEXT: stp x24, x23, [sp, #160] ; 16-byte Folded Spill ; CHECK-NEXT: stp x22, x21, [sp, #176] ; 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #192] ; 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #208] ; 16-byte Folded Spill ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_offset w19, -24 ; CHECK-NEXT: .cfi_offset w20, -32 ; CHECK-NEXT: .cfi_offset w21, -40 ; CHECK-NEXT: .cfi_offset w22, -48 ; CHECK-NEXT: .cfi_offset w23, -56 ; CHECK-NEXT: .cfi_offset w24, -64 ; CHECK-NEXT: .cfi_offset w25, -72 ; CHECK-NEXT: .cfi_offset w26, -80 ; CHECK-NEXT: .cfi_offset w27, -88 ; CHECK-NEXT: .cfi_offset w28, -96 ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: Lloh0: ; CHECK-NEXT: adrp x0, _asdf@PAGE ; CHECK-NEXT: Lloh1: ; CHECK-NEXT: add x0, x0, _asdf@PAGEOFF ; CHECK-NEXT: mov x20, x1 ; CHECK-NEXT: mov x21, x2 ; CHECK-NEXT: mov x22, x3 ; CHECK-NEXT: mov x23, x4 ; CHECK-NEXT: mov x24, x5 ; CHECK-NEXT: mov x25, x6 ; CHECK-NEXT: mov x26, x7 ; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill ; CHECK-NEXT: mov x27, x8 ; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill ; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill ; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill ; CHECK-NEXT: bl _puts ; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload ; CHECK-NEXT: mov w0, w19 ; CHECK-NEXT: mov x1, x20 ; CHECK-NEXT: mov x2, x21 ; CHECK-NEXT: mov x3, x22 ; CHECK-NEXT: mov x4, x23 ; CHECK-NEXT: mov x5, x24 ; CHECK-NEXT: mov x6, x25 ; CHECK-NEXT: mov x7, x26 ; CHECK-NEXT: mov x8, x27 ; CHECK-NEXT: ldp q3, q2, [sp, #64] ; 32-byte Folded Reload ; CHECK-NEXT: ldp q5, q4, [sp, #32] ; 32-byte Folded Reload ; CHECK-NEXT: ldp q7, q6, [sp] ; 32-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #208] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #192] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #176] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #160] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #144] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x28, x27, [sp, #128] ; 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #224 ; CHECK-NEXT: b _musttail_variadic_callee ; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1 call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0)) %r = musttail call i32 (i32, ...) @musttail_variadic_callee(i32 %arg0, ...) ret i32 %r } ; Test musttailing with a varargs call in the block. Test that we spill and ; reload all arguments in the variadic argument pack. declare void @llvm.va_start(i8*) nounwind declare void(i8*, ...)* @get_f(i8* %this) define void @f_thunk(i8* %this, ...) { ; CHECK-LABEL: f_thunk: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #256 ; CHECK-NEXT: .cfi_def_cfa_offset 256 ; CHECK-NEXT: stp x28, x27, [sp, #160] ; 16-byte Folded Spill ; CHECK-NEXT: stp x26, x25, [sp, #176] ; 16-byte Folded Spill ; CHECK-NEXT: stp x24, x23, [sp, #192] ; 16-byte Folded Spill ; CHECK-NEXT: stp x22, x21, [sp, #208] ; 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #224] ; 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #240] ; 16-byte Folded Spill ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_offset w19, -24 ; CHECK-NEXT: .cfi_offset w20, -32 ; CHECK-NEXT: .cfi_offset w21, -40 ; CHECK-NEXT: .cfi_offset w22, -48 ; CHECK-NEXT: .cfi_offset w23, -56 ; CHECK-NEXT: .cfi_offset w24, -64 ; CHECK-NEXT: .cfi_offset w25, -72 ; CHECK-NEXT: .cfi_offset w26, -80 ; CHECK-NEXT: .cfi_offset w27, -88 ; CHECK-NEXT: .cfi_offset w28, -96 ; CHECK-NEXT: add x9, sp, #128 ; CHECK-NEXT: add x10, sp, #256 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x20, x1 ; CHECK-NEXT: mov x21, x2 ; CHECK-NEXT: mov x22, x3 ; CHECK-NEXT: mov x23, x4 ; CHECK-NEXT: mov x24, x5 ; CHECK-NEXT: mov x25, x6 ; CHECK-NEXT: mov x26, x7 ; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill ; CHECK-NEXT: mov x27, x8 ; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill ; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill ; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill ; CHECK-NEXT: str x10, [x9] ; CHECK-NEXT: bl _get_f ; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload ; CHECK-NEXT: mov x9, x0 ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: mov x1, x20 ; CHECK-NEXT: mov x2, x21 ; CHECK-NEXT: mov x3, x22 ; CHECK-NEXT: mov x4, x23 ; CHECK-NEXT: mov x5, x24 ; CHECK-NEXT: mov x6, x25 ; CHECK-NEXT: mov x7, x26 ; CHECK-NEXT: mov x8, x27 ; CHECK-NEXT: ldp q3, q2, [sp, #64] ; 32-byte Folded Reload ; CHECK-NEXT: ldp q5, q4, [sp, #32] ; 32-byte Folded Reload ; CHECK-NEXT: ldp q7, q6, [sp] ; 32-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #240] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #224] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #208] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #192] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #176] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x28, x27, [sp, #160] ; 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #256 ; CHECK-NEXT: br x9 %ap = alloca [4 x i8*], align 16 %ap_i8 = bitcast [4 x i8*]* %ap to i8* call void @llvm.va_start(i8* %ap_i8) %fptr = call void(i8*, ...)*(i8*) @get_f(i8* %this) musttail call void (i8*, ...) %fptr(i8* %this, ...) ret void } ; We don't need any spills and reloads here, but we should still emit the ; copies in call lowering. define void @g_thunk(i8* %fptr_i8, ...) { ; CHECK-LABEL: g_thunk: ; CHECK: ; %bb.0: ; CHECK-NEXT: br x0 %fptr = bitcast i8* %fptr_i8 to void (i8*, ...)* musttail call void (i8*, ...) %fptr(i8* %fptr_i8, ...) ret void } ; Test that this works with multiple exits and basic blocks. %struct.Foo = type { i1, i8*, i8* } @g = external global i32 define void @h_thunk(%struct.Foo* %this, ...) { ; CHECK-LABEL: h_thunk: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldrb w9, [x0] ; CHECK-NEXT: tbz w9, #0, LBB5_2 ; CHECK-NEXT: ; %bb.1: ; %then ; CHECK-NEXT: ldr x9, [x0, #8] ; CHECK-NEXT: br x9 ; CHECK-NEXT: LBB5_2: ; %else ; CHECK-NEXT: Lloh2: ; CHECK-NEXT: adrp x10, _g@GOTPAGE ; CHECK-NEXT: ldr x9, [x0, #16] ; CHECK-NEXT: mov w11, #42 ; CHECK-NEXT: Lloh3: ; CHECK-NEXT: ldr x10, [x10, _g@GOTPAGEOFF] ; CHECK-NEXT: Lloh4: ; CHECK-NEXT: str w11, [x10] ; CHECK-NEXT: br x9 ; CHECK-NEXT: .loh AdrpLdrGotStr Lloh2, Lloh3, Lloh4 %cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0 %cond = load i1, i1* %cond_p br i1 %cond, label %then, label %else then: %a_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 1 %a_i8 = load i8*, i8** %a_p %a = bitcast i8* %a_i8 to void (%struct.Foo*, ...)* musttail call void (%struct.Foo*, ...) %a(%struct.Foo* %this, ...) ret void else: %b_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 2 %b_i8 = load i8*, i8** %b_p %b = bitcast i8* %b_i8 to void (%struct.Foo*, ...)* store i32 42, i32* @g musttail call void (%struct.Foo*, ...) %b(%struct.Foo* %this, ...) ret void }