; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c -O3 | FileCheck %s --check-prefixes=AVX,F16C ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX,AVX512 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64 declare half @llvm.experimental.constrained.sitofp.f16.i1(i1, metadata, metadata) declare half @llvm.experimental.constrained.sitofp.f16.i8(i8, metadata, metadata) declare half @llvm.experimental.constrained.sitofp.f16.i16(i16, metadata, metadata) declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata) declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata) declare half @llvm.experimental.constrained.uitofp.f16.i1(i1, metadata, metadata) declare half @llvm.experimental.constrained.uitofp.f16.i8(i8, metadata, metadata) declare half @llvm.experimental.constrained.uitofp.f16.i16(i16, metadata, metadata) declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata) declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata) define half @sitofp_i1tof16(i1 %x) #0 { ; SSE2-LABEL: sitofp_i1tof16: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: andb $1, %dil ; SSE2-NEXT: negb %dil ; SSE2-NEXT: movsbl %dil, %eax ; SSE2-NEXT: cvtsi2ss %eax, %xmm0 ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: popq %rax ; SSE2-NEXT: retq ; ; AVX-LABEL: sitofp_i1tof16: ; AVX: # %bb.0: ; AVX-NEXT: andb $1, %dil ; AVX-NEXT: negb %dil ; AVX-NEXT: movsbl %dil, %eax ; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; X86-LABEL: sitofp_i1tof16: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: andb $1, %al ; X86-NEXT: negb %al ; X86-NEXT: movsbl %al, %eax ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: sitofp_i1tof16: ; X64: # %bb.0: ; X64-NEXT: andb $1, %dil ; X64-NEXT: negb %dil ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 ; X64-NEXT: retq %result = call half @llvm.experimental.constrained.sitofp.f16.i1(i1 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret half %result } define half @sitofp_i8tof16(i8 %x) #0 { ; SSE2-LABEL: sitofp_i8tof16: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movsbl %dil, %eax ; SSE2-NEXT: cvtsi2ss %eax, %xmm0 ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: popq %rax ; SSE2-NEXT: retq ; ; AVX-LABEL: sitofp_i8tof16: ; AVX: # %bb.0: ; AVX-NEXT: movsbl %dil, %eax ; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; X86-LABEL: sitofp_i8tof16: ; X86: # %bb.0: ; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: sitofp_i8tof16: ; X64: # %bb.0: ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 ; X64-NEXT: retq %result = call half @llvm.experimental.constrained.sitofp.f16.i8(i8 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret half %result } define half @sitofp_i16tof16(i16 %x) #0 { ; SSE2-LABEL: sitofp_i16tof16: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movswl %di, %eax ; SSE2-NEXT: cvtsi2ss %eax, %xmm0 ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: popq %rax ; SSE2-NEXT: retq ; ; AVX-LABEL: sitofp_i16tof16: ; AVX: # %bb.0: ; AVX-NEXT: movswl %di, %eax ; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; X86-LABEL: sitofp_i16tof16: ; X86: # %bb.0: ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: sitofp_i16tof16: ; X64: # %bb.0: ; X64-NEXT: movswl %di, %eax ; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 ; X64-NEXT: retq %result = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret half %result } define half @sitofp_i32tof16(i32 %x) #0 { ; SSE2-LABEL: sitofp_i32tof16: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: cvtsi2ss %edi, %xmm0 ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: popq %rax ; SSE2-NEXT: retq ; ; AVX-LABEL: sitofp_i32tof16: ; AVX: # %bb.0: ; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; X86-LABEL: sitofp_i32tof16: ; X86: # %bb.0: ; X86-NEXT: vcvtsi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: sitofp_i32tof16: ; X64: # %bb.0: ; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0 ; X64-NEXT: retq %result = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret half %result } define half @sitofp_i64tof16(i64 %x) #0 { ; SSE2-LABEL: sitofp_i64tof16: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: cvtsi2ss %rdi, %xmm0 ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: popq %rax ; SSE2-NEXT: retq ; ; AVX-LABEL: sitofp_i64tof16: ; AVX: # %bb.0: ; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; X86-LABEL: sitofp_i64tof16: ; X86: # %bb.0: ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-NEXT: vcvtqq2ph %xmm0, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: sitofp_i64tof16: ; X64: # %bb.0: ; X64-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm0 ; X64-NEXT: retq %result = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret half %result } define half @uitofp_i1tof16(i1 %x) #0 { ; SSE2-LABEL: uitofp_i1tof16: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: andl $1, %edi ; SSE2-NEXT: cvtsi2ss %edi, %xmm0 ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: popq %rax ; SSE2-NEXT: retq ; ; AVX-LABEL: uitofp_i1tof16: ; AVX: # %bb.0: ; AVX-NEXT: andl $1, %edi ; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; X86-LABEL: uitofp_i1tof16: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: andb $1, %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: uitofp_i1tof16: ; X64: # %bb.0: ; X64-NEXT: andl $1, %edi ; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0 ; X64-NEXT: retq %result = call half @llvm.experimental.constrained.uitofp.f16.i1(i1 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret half %result } define half @uitofp_i8tof16(i8 %x) #0 { ; SSE2-LABEL: uitofp_i8tof16: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movzbl %dil, %eax ; SSE2-NEXT: cvtsi2ss %eax, %xmm0 ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: popq %rax ; SSE2-NEXT: retq ; ; AVX-LABEL: uitofp_i8tof16: ; AVX: # %bb.0: ; AVX-NEXT: movzbl %dil, %eax ; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; X86-LABEL: uitofp_i8tof16: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: uitofp_i8tof16: ; X64: # %bb.0: ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 ; X64-NEXT: retq %result = call half @llvm.experimental.constrained.uitofp.f16.i8(i8 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret half %result } define half @uitofp_i16tof16(i16 %x) #0 { ; SSE2-LABEL: uitofp_i16tof16: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movzwl %di, %eax ; SSE2-NEXT: cvtsi2ss %eax, %xmm0 ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: popq %rax ; SSE2-NEXT: retq ; ; AVX-LABEL: uitofp_i16tof16: ; AVX: # %bb.0: ; AVX-NEXT: movzwl %di, %eax ; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; X86-LABEL: uitofp_i16tof16: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: uitofp_i16tof16: ; X64: # %bb.0: ; X64-NEXT: movzwl %di, %eax ; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 ; X64-NEXT: retq %result = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret half %result } define half @uitofp_i32tof16(i32 %x) #0 { ; SSE2-LABEL: uitofp_i32tof16: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movl %edi, %eax ; SSE2-NEXT: cvtsi2ss %rax, %xmm0 ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: popq %rax ; SSE2-NEXT: retq ; ; F16C-LABEL: uitofp_i32tof16: ; F16C: # %bb.0: ; F16C-NEXT: movl %edi, %eax ; F16C-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 ; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; F16C-NEXT: vmovd %xmm0, %eax ; F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; F16C-NEXT: retq ; ; AVX512-LABEL: uitofp_i32tof16: ; AVX512: # %bb.0: ; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0 ; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; AVX512-NEXT: retq ; ; X86-LABEL: uitofp_i32tof16: ; X86: # %bb.0: ; X86-NEXT: vcvtusi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: uitofp_i32tof16: ; X64: # %bb.0: ; X64-NEXT: vcvtusi2sh %edi, %xmm0, %xmm0 ; X64-NEXT: retq %result = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret half %result } define half @uitofp_i64tof16(i64 %x) #0 { ; SSE2-LABEL: uitofp_i64tof16: ; SSE2: # %bb.0: ; SSE2-NEXT: movq %rdi, %rax ; SSE2-NEXT: shrq %rax ; SSE2-NEXT: movl %edi, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: orq %rax, %rcx ; SSE2-NEXT: testq %rdi, %rdi ; SSE2-NEXT: cmovnsq %rdi, %rcx ; SSE2-NEXT: cvtsi2ss %rcx, %xmm0 ; SSE2-NEXT: jns .LBB9_2 ; SSE2-NEXT: # %bb.1: ; SSE2-NEXT: addss %xmm0, %xmm0 ; SSE2-NEXT: .LBB9_2: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: popq %rax ; SSE2-NEXT: retq ; ; F16C-LABEL: uitofp_i64tof16: ; F16C: # %bb.0: ; F16C-NEXT: movq %rdi, %rax ; F16C-NEXT: shrq %rax ; F16C-NEXT: movl %edi, %ecx ; F16C-NEXT: andl $1, %ecx ; F16C-NEXT: orq %rax, %rcx ; F16C-NEXT: testq %rdi, %rdi ; F16C-NEXT: cmovnsq %rdi, %rcx ; F16C-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0 ; F16C-NEXT: jns .LBB9_2 ; F16C-NEXT: # %bb.1: ; F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0 ; F16C-NEXT: .LBB9_2: ; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; F16C-NEXT: vmovd %xmm0, %eax ; F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; F16C-NEXT: retq ; ; AVX512-LABEL: uitofp_i64tof16: ; AVX512: # %bb.0: ; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0 ; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; AVX512-NEXT: retq ; ; X86-LABEL: uitofp_i64tof16: ; X86: # %bb.0: ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-NEXT: vcvtuqq2ph %xmm0, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: uitofp_i64tof16: ; X64: # %bb.0: ; X64-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm0 ; X64-NEXT: retq %result = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret half %result } attributes #0 = { strictfp nounwind }