; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-apple-darwin | FileCheck %s ; TLS function were wrongly model and after fixing that, shrink-wrapping ; cannot help here. To achieve the expected lowering, we need to playing ; tricks similar to AArch64 fast TLS calling convention (r255821). ; Applying tricks on x86-64 similar to r255821. ; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin -O0 | FileCheck %s --check-prefix=CHECK-O0 %struct.S = type { i8 } @sg = internal thread_local global %struct.S zeroinitializer, align 1 @__dso_handle = external global i8 @__tls_guard = internal thread_local unnamed_addr global i1 false @sum1 = internal thread_local global i32 0, align 4 declare void @_ZN1SC1Ev(ptr) declare void @_ZN1SD1Ev(ptr) declare i32 @_tlv_atexit(ptr, ptr, ptr) ; Every GPR should be saved - except rdi, rax, and rsp ; CHECK-LABEL: _ZTW2sg ; CHECK-NOT: pushq %r11 ; CHECK-NOT: pushq %r10 ; CHECK-NOT: pushq %r9 ; CHECK-NOT: pushq %r8 ; CHECK-NOT: pushq %rsi ; CHECK-NOT: pushq %rdx ; CHECK-NOT: pushq %rcx ; CHECK-NOT: pushq %rbx ; CHECK: callq ; CHECK: jne ; CHECK: callq ; CHECK: tlv_atexit ; CHECK: callq ; CHECK-NOT: popq %rbx ; CHECK-NOT: popq %rcx ; CHECK-NOT: popq %rdx ; CHECK-NOT: popq %rsi ; CHECK-NOT: popq %r8 ; CHECK-NOT: popq %r9 ; CHECK-NOT: popq %r10 ; CHECK-NOT: popq %r11 ; CHECK-O0-LABEL: _ZTW2sg ; CHECK-O0: pushq %r11 ; CHECK-O0: pushq %r10 ; CHECK-O0: pushq %r9 ; CHECK-O0: pushq %r8 ; CHECK-O0: pushq %rsi ; CHECK-O0: pushq %rdx ; CHECK-O0: pushq %rcx ; CHECK-O0: callq ; CHECK-O0: jne ; CHECK-O0: callq ; CHECK-O0: tlv_atexit ; CHECK-O0: callq ; CHECK-O0: popq %rcx ; CHECK-O0: popq %rdx ; CHECK-O0: popq %rsi ; CHECK-O0: popq %r8 ; CHECK-O0: popq %r9 ; CHECK-O0: popq %r10 ; CHECK-O0: popq %r11 define cxx_fast_tlscc nonnull ptr @_ZTW2sg() nounwind { %.b.i = load i1, ptr @__tls_guard, align 1 br i1 %.b.i, label %__tls_init.exit, label %init.i init.i: store i1 true, ptr @__tls_guard, align 1 tail call void @_ZN1SC1Ev(ptr nonnull @sg) #2 %1 = tail call i32 @_tlv_atexit(ptr nonnull @_ZN1SD1Ev, ptr nonnull @sg, ptr nonnull @__dso_handle) #2 br label %__tls_init.exit __tls_init.exit: ret ptr @sg } ; CHECK-LABEL: _ZTW4sum1 ; CHECK-NOT: pushq %r11 ; CHECK-NOT: pushq %r10 ; CHECK-NOT: pushq %r9 ; CHECK-NOT: pushq %r8 ; CHECK-NOT: pushq %rsi ; CHECK-NOT: pushq %rdx ; CHECK-NOT: pushq %rcx ; CHECK-NOT: pushq %rbx ; CHECK: callq ; CHECK-O0-LABEL: _ZTW4sum1 ; CHECK-O0-NOT: pushq %r11 ; CHECK-O0-NOT: pushq %r10 ; CHECK-O0-NOT: pushq %r9 ; CHECK-O0-NOT: pushq %r8 ; CHECK-O0-NOT: pushq %rsi ; CHECK-O0-NOT: pushq %rdx ; CHECK-O0-NOT: pushq %rcx ; CHECK-O0-NOT: pushq %rbx ; CHECK-O0-NOT: movq %r11 ; CHECK-O0-NOT: movq %r10 ; CHECK-O0-NOT: movq %r9 ; CHECK-O0-NOT: movq %r8 ; CHECK-O0-NOT: movq %rsi ; CHECK-O0-NOT: movq %rdx ; CHECK-O0-NOT: movq %rcx ; CHECK-O0-NOT: movq %rbx ; CHECK-O0: callq define cxx_fast_tlscc nonnull ptr @_ZTW4sum1() nounwind { ret ptr @sum1 } ; Make sure at O0 we don't overwrite RBP. ; CHECK-O0-LABEL: _ZTW4sum2 ; CHECK-O0: pushq %rbp ; CHECK-O0: movq %rsp, %rbp ; CHECK-O0-NOT: movq %r{{.*}}, (%rbp) define cxx_fast_tlscc ptr @_ZTW4sum2() #0 { ret ptr @sum1 } ; Make sure at O0, we don't generate spilling/reloading of the CSRs. ; CHECK-O0-LABEL: tls_test2 ; CHECK-O0-NOT: pushq %r11 ; CHECK-O0-NOT: pushq %r10 ; CHECK-O0-NOT: pushq %r9 ; CHECK-O0-NOT: pushq %r8 ; CHECK-O0-NOT: pushq %rsi ; CHECK-O0-NOT: pushq %rdx ; CHECK-O0: callq {{.*}}tls_helper ; CHECK-O0-NOT: popq %rdx ; CHECK-O0-NOT: popq %rsi ; CHECK-O0-NOT: popq %r8 ; CHECK-O0-NOT: popq %r9 ; CHECK-O0-NOT: popq %r10 ; CHECK-O0-NOT: popq %r11 ; CHECK-O0: ret %class.C = type { i32 } @tC = internal thread_local global %class.C zeroinitializer, align 4 declare cxx_fast_tlscc void @tls_helper() define cxx_fast_tlscc ptr @tls_test2() #1 { call cxx_fast_tlscc void @tls_helper() ret ptr @tC } ; Make sure we do not allow tail call when caller and callee have different ; calling conventions. declare ptr @_ZN1CD1Ev(ptr readnone returned %this) ; CHECK-LABEL: tls_test ; CHECK: callq {{.*}}tlv_atexit define cxx_fast_tlscc void @tls_test() { entry: store i32 0, ptr @tC, align 4 %0 = tail call i32 @_tlv_atexit(ptr @_ZN1CD1Ev, ptr @tC, ptr nonnull @__dso_handle) #1 ret void } @ssp_var = internal thread_local global i8 0, align 1 ; CHECK-LABEL: test_ssp ; CHECK-NOT: pushq %r11 ; CHECK-NOT: pushq %r10 ; CHECK-NOT: pushq %r9 ; CHECK-NOT: pushq %r8 ; CHECK-NOT: pushq %rsi ; CHECK-NOT: pushq %rdx ; CHECK-NOT: pushq %rcx ; CHECK-NOT: pushq %rbx ; CHECK: callq define cxx_fast_tlscc nonnull ptr @test_ssp() #2 { ret ptr @ssp_var } attributes #0 = { nounwind "frame-pointer"="all" } attributes #1 = { nounwind } attributes #2 = { nounwind sspreq }