; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16 -fp-contract=fast | FileCheck %s ; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16,+slowfpvfmx -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE ; Check generated fp16 fused MAC and MLS. define arm_aapcs_vfpcc void @fusedMACTest2(half *%a1, half *%a2, half *%a3) { ; CHECK-LABEL: fusedMACTest2: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vfma.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fusedMACTest2: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r1] ; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0 ; DONT-FUSE-NEXT: vldr.16 s2, [r2] ; DONT-FUSE-NEXT: vadd.f16 s0, s0, s2 ; DONT-FUSE-NEXT: vstr.16 s0, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2 %f2 = load half, half *%a2, align 2 %f3 = load half, half *%a3, align 2 %1 = fmul half %f1, %f2 %2 = fadd half %1, %f3 store half %2, half *%a1, align 2 ret void } define arm_aapcs_vfpcc void @fusedMACTest4(half *%a1, half *%a2, half *%a3) { ; CHECK-LABEL: fusedMACTest4: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r2] ; CHECK-NEXT: vldr.16 s2, [r1] ; CHECK-NEXT: vldr.16 s4, [r0] ; CHECK-NEXT: vfms.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fusedMACTest4: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r2] ; DONT-FUSE-NEXT: vldr.16 s2, [r1] ; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0 ; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vsub.f16 s0, s2, s0 ; DONT-FUSE-NEXT: vstr.16 s0, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2 %f2 = load half, half *%a2, align 2 %f3 = load half, half *%a3, align 2 %1 = fmul half %f2, %f3 %2 = fsub half %f1, %1 store half %2, half *%a1, align 2 ret void } define arm_aapcs_vfpcc void @fusedMACTest6(half *%a1, half *%a2, half *%a3) { ; CHECK-LABEL: fusedMACTest6: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vfnma.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fusedMACTest6: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r1] ; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vnmul.f16 s0, s2, s0 ; DONT-FUSE-NEXT: vldr.16 s2, [r2] ; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2 ; DONT-FUSE-NEXT: vstr.16 s0, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2 %f2 = load half, half *%a2, align 2 %f3 = load half, half *%a3, align 2 %1 = fmul half %f1, %f2 %2 = fsub half -0.0, %1 %3 = fsub half %2, %f3 store half %3, half *%a1, align 2 ret void } define arm_aapcs_vfpcc void @fusedMACTest8(half *%a1, half *%a2, half *%a3) { ; CHECK-LABEL: fusedMACTest8: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vfnms.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fusedMACTest8: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r1] ; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0 ; DONT-FUSE-NEXT: vldr.16 s2, [r2] ; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2 ; DONT-FUSE-NEXT: vstr.16 s0, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2 %f2 = load half, half *%a2, align 2 %f3 = load half, half *%a3, align 2 %1 = fmul half %f1, %f2 %2 = fsub half %1, %f3 store half %2, half *%a1, align 2 ret void } define arm_aapcs_vfpcc void @test_fma_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp { ; CHECK-LABEL: test_fma_f16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vfma.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: test_fma_f16: ; DONT-FUSE: @ %bb.0: @ %entry ; DONT-FUSE-NEXT: vldr.16 s0, [r1] ; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vldr.16 s4, [r2] ; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0 ; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr entry: %a = load half, half *%aa, align 2 %b = load half, half *%bb, align 2 %c = load half, half *%cc, align 2 %tmp1 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone store half %tmp1, half *%aa, align 2 ret void } define arm_aapcs_vfpcc void @test_fnms_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp { ; CHECK-LABEL: test_fnms_f16: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vfma.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: test_fnms_f16: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r1] ; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vldr.16 s4, [r2] ; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0 ; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr %a = load half, half *%aa, align 2 %b = load half, half *%bb, align 2 %c = load half, half *%cc, align 2 %tmp2 = fsub half -0.0, %c %tmp3 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone store half %tmp3, half *%aa, align 2 ret void } define arm_aapcs_vfpcc void @test_fma_const_fold(half *%aa, half *%bb) nounwind { ; CHECK-LABEL: test_fma_const_fold: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vadd.f16 s0, s2, s0 ; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: test_fma_const_fold: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r1] ; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vadd.f16 s0, s2, s0 ; DONT-FUSE-NEXT: vstr.16 s0, [r0] ; DONT-FUSE-NEXT: bx lr %a = load half, half *%aa, align 2 %b = load half, half *%bb, align 2 %ret = call half @llvm.fma.f16(half %a, half 1.0, half %b) store half %ret, half *%aa, align 2 ret void } define arm_aapcs_vfpcc void @test_fma_canonicalize(half *%aa, half *%bb) nounwind { ; CHECK-LABEL: test_fma_canonicalize: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r0] ; CHECK-NEXT: vldr.16 s2, [r1] ; CHECK-NEXT: vmov.f16 s4, #2.000000e+00 ; CHECK-NEXT: vfma.f16 s2, s0, s4 ; CHECK-NEXT: vstr.16 s2, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: test_fma_canonicalize: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r0] ; DONT-FUSE-NEXT: vldr.16 s2, [r1] ; DONT-FUSE-NEXT: vmov.f16 s4, #2.000000e+00 ; DONT-FUSE-NEXT: vfma.f16 s2, s0, s4 ; DONT-FUSE-NEXT: vstr.16 s2, [r0] ; DONT-FUSE-NEXT: bx lr %a = load half, half *%aa, align 2 %b = load half, half *%bb, align 2 %ret = call half @llvm.fma.f16(half 2.0, half %a, half %b) store half %ret, half *%aa, align 2 ret void } define arm_aapcs_vfpcc void @fms1(half *%a1, half *%a2, half *%a3) { ; CHECK-LABEL: fms1: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vfms.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fms1: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r1] ; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vldr.16 s4, [r2] ; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0 ; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2 %f2 = load half, half *%a2, align 2 %f3 = load half, half *%a3, align 2 %s = fsub half -0.0, %f1 %ret = call half @llvm.fma.f16(half %s, half %f2, half %f3) store half %ret, half *%a1, align 2 ret void } define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) { ; CHECK-LABEL: fms2: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vfms.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fms2: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r1] ; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vldr.16 s4, [r2] ; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0 ; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2 %f2 = load half, half *%a2, align 2 %f3 = load half, half *%a3, align 2 %s = fsub half -0.0, %f1 %ret = call half @llvm.fma.f16(half %f2, half %s, half %f3) store half %ret, half *%a1, align 2 ret void } define arm_aapcs_vfpcc void @fnma1(half *%a1, half *%a2, half *%a3) { ; CHECK-LABEL: fnma1: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vfnma.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fnma1: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r1] ; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vldr.16 s4, [r2] ; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0 ; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2 %f2 = load half, half *%a2, align 2 %f3 = load half, half *%a3, align 2 %fma = call half @llvm.fma.f16(half %f1, half %f2, half %f3) %n1 = fsub half -0.0, %fma store half %n1, half *%a1, align 2 ret void } define arm_aapcs_vfpcc void @fnma2(half *%a1, half *%a2, half *%a3) { ; CHECK-LABEL: fnma2: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vfnma.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fnma2: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r1] ; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vldr.16 s4, [r2] ; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0 ; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2 %f2 = load half, half *%a2, align 2 %f3 = load half, half *%a3, align 2 %n1 = fsub half -0.0, %f1 %n3 = fsub half -0.0, %f3 %ret = call half @llvm.fma.f16(half %n1, half %f2, half %n3) store half %ret, half *%a1, align 2 ret void } define arm_aapcs_vfpcc void @fnms1(half *%a1, half *%a2, half *%a3) { ; CHECK-LABEL: fnms1: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vfnms.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fnms1: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r1] ; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vldr.16 s4, [r2] ; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0 ; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2 %f2 = load half, half *%a2, align 2 %f3 = load half, half *%a3, align 2 %n3 = fsub half -0.0, %f3 %ret = call half @llvm.fma.f16(half %f1, half %f2, half %n3) store half %ret, half *%a1, align 2 ret void } define arm_aapcs_vfpcc void @fnms2(half *%a1, half *%a2, half *%a3) { ; CHECK-LABEL: fnms2: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vfnms.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fnms2: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r1] ; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vldr.16 s4, [r2] ; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0 ; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2 %f2 = load half, half *%a2, align 2 %f3 = load half, half *%a3, align 2 %n1 = fsub half -0.0, %f1 %fma = call half @llvm.fma.f16(half %n1, half %f2, half %f3) %n = fsub half -0.0, %fma store half %n, half *%a1, align 2 ret void } define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) { ; CHECK-LABEL: fnms3: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r0] ; CHECK-NEXT: vldr.16 s2, [r1] ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vfnms.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fnms3: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r0] ; DONT-FUSE-NEXT: vldr.16 s2, [r1] ; DONT-FUSE-NEXT: vldr.16 s4, [r2] ; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0 ; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2 %f2 = load half, half *%a2, align 2 %f3 = load half, half *%a3, align 2 %n2 = fsub half -0.0, %f2 %fma = call half @llvm.fma.f16(half %f1, half %n2, half %f3) %n1 = fsub half -0.0, %fma store half %n1, half *%a1, align 2 ret void } declare half @llvm.fma.f16(half, half, half) nounwind readnone