; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c -O3 | FileCheck %s --check-prefixes=AVX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK-32 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK-64 define i32 @test_f16_oeq_q(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_oeq_q: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovnel %ebx, %ebp ; SSE2-NEXT: cmovpl %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_oeq_q: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: cmovnel %esi, %eax ; AVX-NEXT: cmovpl %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_oeq_q: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovnel %eax, %ecx ; CHECK-32-NEXT: cmovpl %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_oeq_q: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vucomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovnel %esi, %eax ; CHECK-64-NEXT: cmovpl %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.f16( half %f1, half %f2, metadata !"oeq", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_ogt_q(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_ogt_q: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovbel %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_ogt_q: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: cmovbel %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_ogt_q: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmoval %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_ogt_q: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vucomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovbel %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.f16( half %f1, half %f2, metadata !"ogt", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_oge_q(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_oge_q: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovbl %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_oge_q: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: cmovbl %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_oge_q: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovael %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_oge_q: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vucomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovbl %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.f16( half %f1, half %f2, metadata !"oge", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_olt_q(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_olt_q: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: ucomiss %xmm0, %xmm1 ; SSE2-NEXT: cmovbel %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_olt_q: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm1, %ecx ; AVX-NEXT: vpextrw $0, %xmm0, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: cmovbel %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_olt_q: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmoval %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_olt_q: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vucomish %xmm0, %xmm1 ; CHECK-64-NEXT: cmovbel %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.f16( half %f1, half %f2, metadata !"olt", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_ole_q(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_ole_q: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: ucomiss %xmm0, %xmm1 ; SSE2-NEXT: cmovbl %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_ole_q: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm1, %ecx ; AVX-NEXT: vpextrw $0, %xmm0, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: cmovbl %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_ole_q: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovael %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_ole_q: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vucomish %xmm0, %xmm1 ; CHECK-64-NEXT: cmovbl %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.f16( half %f1, half %f2, metadata !"ole", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_one_q(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_one_q: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovel %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_one_q: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: cmovel %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_one_q: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovnel %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_one_q: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vucomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovel %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.f16( half %f1, half %f2, metadata !"one", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_ord_q(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_ord_q: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovpl %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_ord_q: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: cmovpl %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_ord_q: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovnpl %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_ord_q: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vucomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovpl %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.f16( half %f1, half %f2, metadata !"ord", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_ueq_q(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_ueq_q: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovnel %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_ueq_q: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: cmovnel %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_ueq_q: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovel %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_ueq_q: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vucomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovnel %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.f16( half %f1, half %f2, metadata !"ueq", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_ugt_q(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_ugt_q: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: ucomiss %xmm0, %xmm1 ; SSE2-NEXT: cmovael %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_ugt_q: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm1, %ecx ; AVX-NEXT: vpextrw $0, %xmm0, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: cmovael %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_ugt_q: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovbl %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_ugt_q: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vucomish %xmm0, %xmm1 ; CHECK-64-NEXT: cmovael %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.f16( half %f1, half %f2, metadata !"ugt", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_uge_q(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_uge_q: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: ucomiss %xmm0, %xmm1 ; SSE2-NEXT: cmoval %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_uge_q: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm1, %ecx ; AVX-NEXT: vpextrw $0, %xmm0, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: cmoval %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_uge_q: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovbel %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_uge_q: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vucomish %xmm0, %xmm1 ; CHECK-64-NEXT: cmoval %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.f16( half %f1, half %f2, metadata !"uge", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_ult_q(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_ult_q: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovael %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_ult_q: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: cmovael %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_ult_q: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovbl %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_ult_q: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vucomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovael %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.f16( half %f1, half %f2, metadata !"ult", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_ule_q(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_ule_q: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmoval %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_ule_q: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: cmoval %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_ule_q: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovbel %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_ule_q: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vucomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmoval %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.f16( half %f1, half %f2, metadata !"ule", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_une_q(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_une_q: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovnel %ebp, %ebx ; SSE2-NEXT: cmovpl %ebp, %ebx ; SSE2-NEXT: movl %ebx, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_une_q: ; AVX: # %bb.0: ; AVX-NEXT: movl %esi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: cmovnel %edi, %eax ; AVX-NEXT: cmovpl %edi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_une_q: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovnel %eax, %ecx ; CHECK-32-NEXT: cmovpl %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_une_q: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %esi, %eax ; CHECK-64-NEXT: vucomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovnel %edi, %eax ; CHECK-64-NEXT: cmovpl %edi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.f16( half %f1, half %f2, metadata !"une", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_uno_q(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_uno_q: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovnpl %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_uno_q: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: cmovnpl %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_uno_q: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovpl %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_uno_q: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vucomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovnpl %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.f16( half %f1, half %f2, metadata !"uno", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_oeq_s(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_oeq_s: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovnel %ebx, %ebp ; SSE2-NEXT: cmovpl %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_oeq_s: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: cmovnel %esi, %eax ; AVX-NEXT: cmovpl %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_oeq_s: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovnel %eax, %ecx ; CHECK-32-NEXT: cmovpl %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_oeq_s: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vcomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovnel %esi, %eax ; CHECK-64-NEXT: cmovpl %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.f16( half %f1, half %f2, metadata !"oeq", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_ogt_s(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_ogt_s: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovbel %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_ogt_s: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: cmovbel %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_ogt_s: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmoval %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_ogt_s: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vcomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovbel %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.f16( half %f1, half %f2, metadata !"ogt", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_oge_s(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_oge_s: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovbl %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_oge_s: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: cmovbl %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_oge_s: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovael %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_oge_s: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vcomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovbl %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.f16( half %f1, half %f2, metadata !"oge", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_olt_s(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_olt_s: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: comiss %xmm0, %xmm1 ; SSE2-NEXT: cmovbel %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_olt_s: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm1, %ecx ; AVX-NEXT: vpextrw $0, %xmm0, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: cmovbel %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_olt_s: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmoval %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_olt_s: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vcomish %xmm0, %xmm1 ; CHECK-64-NEXT: cmovbel %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.f16( half %f1, half %f2, metadata !"olt", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_ole_s(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_ole_s: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: comiss %xmm0, %xmm1 ; SSE2-NEXT: cmovbl %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_ole_s: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm1, %ecx ; AVX-NEXT: vpextrw $0, %xmm0, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: cmovbl %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_ole_s: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovael %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_ole_s: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vcomish %xmm0, %xmm1 ; CHECK-64-NEXT: cmovbl %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.f16( half %f1, half %f2, metadata !"ole", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_one_s(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_one_s: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovel %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_one_s: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: cmovel %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_one_s: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovnel %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_one_s: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vcomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovel %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.f16( half %f1, half %f2, metadata !"one", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_ord_s(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_ord_s: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovpl %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_ord_s: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: cmovpl %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_ord_s: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovnpl %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_ord_s: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vcomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovpl %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.f16( half %f1, half %f2, metadata !"ord", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_ueq_s(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_ueq_s: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovnel %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_ueq_s: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: cmovnel %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_ueq_s: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovel %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_ueq_s: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vcomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovnel %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.f16( half %f1, half %f2, metadata !"ueq", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_ugt_s(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_ugt_s: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: comiss %xmm0, %xmm1 ; SSE2-NEXT: cmovael %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_ugt_s: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm1, %ecx ; AVX-NEXT: vpextrw $0, %xmm0, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: cmovael %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_ugt_s: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovbl %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_ugt_s: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vcomish %xmm0, %xmm1 ; CHECK-64-NEXT: cmovael %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.f16( half %f1, half %f2, metadata !"ugt", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_uge_s(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_uge_s: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: comiss %xmm0, %xmm1 ; SSE2-NEXT: cmoval %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_uge_s: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm1, %ecx ; AVX-NEXT: vpextrw $0, %xmm0, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: cmoval %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_uge_s: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovbel %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_uge_s: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vcomish %xmm0, %xmm1 ; CHECK-64-NEXT: cmoval %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.f16( half %f1, half %f2, metadata !"uge", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_ult_s(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_ult_s: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovael %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_ult_s: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: cmovael %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_ult_s: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovbl %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_ult_s: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vcomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovael %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.f16( half %f1, half %f2, metadata !"ult", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_ule_s(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_ule_s: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmoval %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_ule_s: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: cmoval %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_ule_s: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovbel %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_ule_s: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vcomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmoval %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.f16( half %f1, half %f2, metadata !"ule", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_une_s(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_une_s: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovnel %ebp, %ebx ; SSE2-NEXT: cmovpl %ebp, %ebx ; SSE2-NEXT: movl %ebx, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_une_s: ; AVX: # %bb.0: ; AVX-NEXT: movl %esi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: cmovnel %edi, %eax ; AVX-NEXT: cmovpl %edi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_une_s: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovnel %eax, %ecx ; CHECK-32-NEXT: cmovpl %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_une_s: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %esi, %eax ; CHECK-64-NEXT: vcomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovnel %edi, %eax ; CHECK-64-NEXT: cmovpl %edi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.f16( half %f1, half %f2, metadata !"une", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define i32 @test_f16_uno_s(i32 %a, i32 %b, half %f1, half %f2) #0 { ; SSE2-LABEL: test_f16_uno_s: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp ; SSE2-NEXT: pushq %rbx ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movl %esi, %ebx ; SSE2-NEXT: movl %edi, %ebp ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: comiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: cmovnpl %ebx, %ebp ; SSE2-NEXT: movl %ebp, %eax ; SSE2-NEXT: addq $8, %rsp ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; ; AVX-LABEL: test_f16_uno_s: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vpextrw $0, %xmm0, %ecx ; AVX-NEXT: vpextrw $0, %xmm1, %edx ; AVX-NEXT: movzwl %dx, %edx ; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: cmovnpl %esi, %eax ; AVX-NEXT: retq ; ; CHECK-32-LABEL: test_f16_uno_s: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vcomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %eax ; CHECK-32-NEXT: leal {{[0-9]+}}(%esp), %ecx ; CHECK-32-NEXT: cmovpl %eax, %ecx ; CHECK-32-NEXT: movl (%ecx), %eax ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: test_f16_uno_s: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: movl %edi, %eax ; CHECK-64-NEXT: vcomish %xmm1, %xmm0 ; CHECK-64-NEXT: cmovnpl %esi, %eax ; CHECK-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.f16( half %f1, half %f2, metadata !"uno", metadata !"fpexcept.strict") #0 %res = select i1 %cond, i32 %a, i32 %b ret i32 %res } define void @foo(half %0, half %1) #0 { ; SSE2-LABEL: foo: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill ; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT ; SSE2-NEXT: ucomiss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: jbe .LBB28_1 ; SSE2-NEXT: # %bb.2: ; SSE2-NEXT: popq %rax ; SSE2-NEXT: jmp bar@PLT # TAILCALL ; SSE2-NEXT: .LBB28_1: ; SSE2-NEXT: popq %rax ; SSE2-NEXT: retq ; ; AVX-LABEL: foo: ; AVX: # %bb.0: ; AVX-NEXT: vpextrw $0, %xmm0, %eax ; AVX-NEXT: vpextrw $0, %xmm1, %ecx ; AVX-NEXT: movzwl %cx, %ecx ; AVX-NEXT: vmovd %ecx, %xmm0 ; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX-NEXT: movzwl %ax, %eax ; AVX-NEXT: vmovd %eax, %xmm1 ; AVX-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: jbe .LBB28_1 ; AVX-NEXT: # %bb.2: ; AVX-NEXT: jmp bar@PLT # TAILCALL ; AVX-NEXT: .LBB28_1: ; AVX-NEXT: retq ; ; CHECK-32-LABEL: foo: ; CHECK-32: # %bb.0: ; CHECK-32-NEXT: vmovsh {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: vucomish {{[0-9]+}}(%esp), %xmm0 ; CHECK-32-NEXT: jbe .LBB28_1 ; CHECK-32-NEXT: # %bb.2: ; CHECK-32-NEXT: jmp bar@PLT # TAILCALL ; CHECK-32-NEXT: .LBB28_1: ; CHECK-32-NEXT: retl ; ; CHECK-64-LABEL: foo: ; CHECK-64: # %bb.0: ; CHECK-64-NEXT: vucomish %xmm1, %xmm0 ; CHECK-64-NEXT: jbe .LBB28_1 ; CHECK-64-NEXT: # %bb.2: ; CHECK-64-NEXT: jmp bar@PLT # TAILCALL ; CHECK-64-NEXT: .LBB28_1: ; CHECK-64-NEXT: retq %3 = call i1 @llvm.experimental.constrained.fcmp.f16( half %0, half %1, metadata !"ogt", metadata !"fpexcept.strict") #0 br i1 %3, label %4, label %5 4: ; preds = %2 tail call void @bar() #0 br label %5 5: ; preds = %4, %2 ret void } declare void @bar() attributes #0 = { strictfp nounwind } declare i1 @llvm.experimental.constrained.fcmp.f16(half, half, metadata, metadata) declare i1 @llvm.experimental.constrained.fcmps.f16(half, half, metadata, metadata)