Compiler projects using llvm
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefix=SSE-32
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefix=SSE-64
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=AVX,AVX-32
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=AVX,AVX-64
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX512F,AVX512F-32
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX512F,AVX512F-64
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VL,AVX512VL-32
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VL,AVX512VL-64
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=AVX512DQ,AVX512DQ-32
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=AVX512DQ,AVX512DQ-64
; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VLDQ,AVX512VLDQ-32
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VLDQ,AVX512VLDQ-64

declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata)
declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata)
declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float>, metadata)
declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float>, metadata)
declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double>, metadata)
declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double>, metadata)
declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float>, metadata)
declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float>, metadata)
declare <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double>, metadata)
declare <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double>, metadata)
declare <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float>, metadata)
declare <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float>, metadata)
declare <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f64(<2 x double>, metadata)
declare <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f64(<2 x double>, metadata)
declare <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f32(<2 x float>, metadata)
declare <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f32(<2 x float>, metadata)
declare <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f64(<2 x double>, metadata)
declare <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f64(<2 x double>, metadata)
declare <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f32(<2 x float>, metadata)
declare <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f32(<2 x float>, metadata)
declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float>, metadata)
declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float>, metadata)
declare <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f32(<4 x float>, metadata)
declare <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f32(<4 x float>, metadata)
declare <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f32(<4 x float>, metadata)
declare <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f32(<4 x float>, metadata)
declare <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f32(<4 x float>, metadata)
declare <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f32(<4 x float>, metadata)

define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    pushl %ebp
; SSE-32-NEXT:    .cfi_def_cfa_offset 8
; SSE-32-NEXT:    .cfi_offset %ebp, -8
; SSE-32-NEXT:    movl %esp, %ebp
; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
; SSE-32-NEXT:    andl $-8, %esp
; SSE-32-NEXT:    subl $24, %esp
; SSE-32-NEXT:    movhps %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw (%esp)
; SSE-32-NEXT:    movzwl (%esp), %eax
; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw (%esp)
; SSE-32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-32-NEXT:    movl %ebp, %esp
; SSE-32-NEXT:    popl %ebp
; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
; SSE-64-NEXT:    movq %rax, %xmm1
; SSE-64-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
; SSE-64-NEXT:    movq %rax, %xmm0
; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-64-NEXT:    movdqa %xmm1, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; AVX-32:       # %bb.0:
; AVX-32-NEXT:    pushl %ebp
; AVX-32-NEXT:    .cfi_def_cfa_offset 8
; AVX-32-NEXT:    .cfi_offset %ebp, -8
; AVX-32-NEXT:    movl %esp, %ebp
; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX-32-NEXT:    andl $-8, %esp
; AVX-32-NEXT:    subl $16, %esp
; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT:    vmovhps %xmm0, (%esp)
; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fldl (%esp)
; AVX-32-NEXT:    fisttpll (%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    movl %ebp, %esp
; AVX-32-NEXT:    popl %ebp
; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX-32-NEXT:    retl
;
; AVX-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; AVX-64:       # %bb.0:
; AVX-64-NEXT:    vcvttsd2si %xmm0, %rax
; AVX-64-NEXT:    vmovq %rax, %xmm1
; AVX-64-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-64-NEXT:    vcvttsd2si %xmm0, %rax
; AVX-64-NEXT:    vmovq %rax, %xmm0
; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-64-NEXT:    retq
;
; AVX512F-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; AVX512F-32:       # %bb.0:
; AVX512F-32-NEXT:    pushl %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
; AVX512F-32-NEXT:    movl %esp, %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT:    andl $-8, %esp
; AVX512F-32-NEXT:    subl $16, %esp
; AVX512F-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    vmovhps %xmm0, (%esp)
; AVX512F-32-NEXT:    fldl {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    fldl (%esp)
; AVX512F-32-NEXT:    fisttpll (%esp)
; AVX512F-32-NEXT:    wait
; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
; AVX512F-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT:    movl %ebp, %esp
; AVX512F-32-NEXT:    popl %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT:    retl
;
; AVX512F-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; AVX512F-64:       # %bb.0:
; AVX512F-64-NEXT:    vcvttsd2si %xmm0, %rax
; AVX512F-64-NEXT:    vmovq %rax, %xmm1
; AVX512F-64-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-64-NEXT:    vcvttsd2si %xmm0, %rax
; AVX512F-64-NEXT:    vmovq %rax, %xmm0
; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-64-NEXT:    retq
;
; AVX512VL-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; AVX512VL-32:       # %bb.0:
; AVX512VL-32-NEXT:    pushl %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
; AVX512VL-32-NEXT:    movl %esp, %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX512VL-32-NEXT:    andl $-8, %esp
; AVX512VL-32-NEXT:    subl $16, %esp
; AVX512VL-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    vmovhps %xmm0, (%esp)
; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    fldl (%esp)
; AVX512VL-32-NEXT:    fisttpll (%esp)
; AVX512VL-32-NEXT:    wait
; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT:    movl %ebp, %esp
; AVX512VL-32-NEXT:    popl %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX512VL-32-NEXT:    retl
;
; AVX512VL-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; AVX512VL-64:       # %bb.0:
; AVX512VL-64-NEXT:    vcvttsd2si %xmm0, %rax
; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
; AVX512VL-64-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-64-NEXT:    vcvttsd2si %xmm0, %rax
; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-64-NEXT:    retq
;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT:    vcvttpd2qq %zmm0, %zmm0
; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttpd2qq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i64> %ret
}

define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    pushl %ebp
; SSE-32-NEXT:    .cfi_def_cfa_offset 8
; SSE-32-NEXT:    .cfi_offset %ebp, -8
; SSE-32-NEXT:    movl %esp, %ebp
; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
; SSE-32-NEXT:    andl $-8, %esp
; SSE-32-NEXT:    subl $24, %esp
; SSE-32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-32-NEXT:    comisd %xmm1, %xmm0
; SSE-32-NEXT:    movapd %xmm1, %xmm2
; SSE-32-NEXT:    jae .LBB1_2
; SSE-32-NEXT:  # %bb.1:
; SSE-32-NEXT:    xorpd %xmm2, %xmm2
; SSE-32-NEXT:  .LBB1_2:
; SSE-32-NEXT:    movapd %xmm0, %xmm3
; SSE-32-NEXT:    subsd %xmm2, %xmm3
; SSE-32-NEXT:    movsd %xmm3, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    setae %al
; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
; SSE-32-NEXT:    orl $3072, %ecx # imm = 0xC00
; SSE-32-NEXT:    movw %cx, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-32-NEXT:    comisd %xmm1, %xmm0
; SSE-32-NEXT:    jae .LBB1_4
; SSE-32-NEXT:  # %bb.3:
; SSE-32-NEXT:    xorpd %xmm1, %xmm1
; SSE-32-NEXT:  .LBB1_4:
; SSE-32-NEXT:    subsd %xmm1, %xmm0
; SSE-32-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    setae %cl
; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw (%esp)
; SSE-32-NEXT:    movzwl (%esp), %edx
; SSE-32-NEXT:    orl $3072, %edx # imm = 0xC00
; SSE-32-NEXT:    movw %dx, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw (%esp)
; SSE-32-NEXT:    movzbl %al, %eax
; SSE-32-NEXT:    shll $31, %eax
; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT:    movd %eax, %xmm1
; SSE-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-32-NEXT:    movzbl %cl, %eax
; SSE-32-NEXT:    shll $31, %eax
; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT:    movd %eax, %xmm1
; SSE-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE-32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE-32-NEXT:    movl %ebp, %esp
; SSE-32-NEXT:    popl %ebp
; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
; SSE-64-NEXT:    comisd %xmm3, %xmm0
; SSE-64-NEXT:    xorpd %xmm2, %xmm2
; SSE-64-NEXT:    xorpd %xmm1, %xmm1
; SSE-64-NEXT:    jb .LBB1_2
; SSE-64-NEXT:  # %bb.1:
; SSE-64-NEXT:    movapd %xmm3, %xmm1
; SSE-64-NEXT:  .LBB1_2:
; SSE-64-NEXT:    movapd %xmm0, %xmm4
; SSE-64-NEXT:    subsd %xmm1, %xmm4
; SSE-64-NEXT:    cvttsd2si %xmm4, %rax
; SSE-64-NEXT:    setae %cl
; SSE-64-NEXT:    movzbl %cl, %ecx
; SSE-64-NEXT:    shlq $63, %rcx
; SSE-64-NEXT:    xorq %rax, %rcx
; SSE-64-NEXT:    movq %rcx, %xmm1
; SSE-64-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-64-NEXT:    comisd %xmm3, %xmm0
; SSE-64-NEXT:    jb .LBB1_4
; SSE-64-NEXT:  # %bb.3:
; SSE-64-NEXT:    movapd %xmm3, %xmm2
; SSE-64-NEXT:  .LBB1_4:
; SSE-64-NEXT:    subsd %xmm2, %xmm0
; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
; SSE-64-NEXT:    setae %cl
; SSE-64-NEXT:    movzbl %cl, %ecx
; SSE-64-NEXT:    shlq $63, %rcx
; SSE-64-NEXT:    xorq %rax, %rcx
; SSE-64-NEXT:    movq %rcx, %xmm0
; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-64-NEXT:    movdqa %xmm1, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX-32:       # %bb.0:
; AVX-32-NEXT:    pushl %ebp
; AVX-32-NEXT:    .cfi_def_cfa_offset 8
; AVX-32-NEXT:    .cfi_offset %ebp, -8
; AVX-32-NEXT:    movl %esp, %ebp
; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX-32-NEXT:    andl $-8, %esp
; AVX-32-NEXT:    subl $16, %esp
; AVX-32-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-32-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-32-NEXT:    vcomisd %xmm1, %xmm2
; AVX-32-NEXT:    vmovapd %xmm1, %xmm3
; AVX-32-NEXT:    jae .LBB1_2
; AVX-32-NEXT:  # %bb.1:
; AVX-32-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
; AVX-32-NEXT:  .LBB1_2:
; AVX-32-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
; AVX-32-NEXT:    vmovsd %xmm2, (%esp)
; AVX-32-NEXT:    fldl (%esp)
; AVX-32-NEXT:    fisttpll (%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    setae %al
; AVX-32-NEXT:    movzbl %al, %eax
; AVX-32-NEXT:    shll $31, %eax
; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT:    vcomisd %xmm1, %xmm0
; AVX-32-NEXT:    jae .LBB1_4
; AVX-32-NEXT:  # %bb.3:
; AVX-32-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
; AVX-32-NEXT:  .LBB1_4:
; AVX-32-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
; AVX-32-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    setae %cl
; AVX-32-NEXT:    movzbl %cl, %ecx
; AVX-32-NEXT:    shll $31, %ecx
; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
; AVX-32-NEXT:    movl %ebp, %esp
; AVX-32-NEXT:    popl %ebp
; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX-32-NEXT:    retl
;
; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX-64:       # %bb.0:
; AVX-64-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-64-NEXT:    vcomisd %xmm1, %xmm0
; AVX-64-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
; AVX-64-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
; AVX-64-NEXT:    jb .LBB1_2
; AVX-64-NEXT:  # %bb.1:
; AVX-64-NEXT:    vmovapd %xmm1, %xmm3
; AVX-64-NEXT:  .LBB1_2:
; AVX-64-NEXT:    vsubsd %xmm3, %xmm0, %xmm3
; AVX-64-NEXT:    vcvttsd2si %xmm3, %rax
; AVX-64-NEXT:    setae %cl
; AVX-64-NEXT:    movzbl %cl, %ecx
; AVX-64-NEXT:    shlq $63, %rcx
; AVX-64-NEXT:    xorq %rax, %rcx
; AVX-64-NEXT:    vmovq %rcx, %xmm3
; AVX-64-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-64-NEXT:    vcomisd %xmm1, %xmm0
; AVX-64-NEXT:    jb .LBB1_4
; AVX-64-NEXT:  # %bb.3:
; AVX-64-NEXT:    vmovapd %xmm1, %xmm2
; AVX-64-NEXT:  .LBB1_4:
; AVX-64-NEXT:    vsubsd %xmm2, %xmm0, %xmm0
; AVX-64-NEXT:    vcvttsd2si %xmm0, %rax
; AVX-64-NEXT:    setae %cl
; AVX-64-NEXT:    movzbl %cl, %ecx
; AVX-64-NEXT:    shlq $63, %rcx
; AVX-64-NEXT:    xorq %rax, %rcx
; AVX-64-NEXT:    vmovq %rcx, %xmm0
; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT:    retq
;
; AVX512F-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX512F-32:       # %bb.0:
; AVX512F-32-NEXT:    pushl %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
; AVX512F-32-NEXT:    movl %esp, %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT:    andl $-8, %esp
; AVX512F-32-NEXT:    subl $16, %esp
; AVX512F-32-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512F-32-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
; AVX512F-32-NEXT:    xorl %eax, %eax
; AVX512F-32-NEXT:    vcomisd %xmm2, %xmm1
; AVX512F-32-NEXT:    setae %al
; AVX512F-32-NEXT:    kmovw %eax, %k1
; AVX512F-32-NEXT:    vmovsd %xmm2, %xmm2, %xmm3 {%k1} {z}
; AVX512F-32-NEXT:    vsubsd %xmm3, %xmm1, %xmm1
; AVX512F-32-NEXT:    vmovsd %xmm1, (%esp)
; AVX512F-32-NEXT:    xorl %ecx, %ecx
; AVX512F-32-NEXT:    vcomisd %xmm2, %xmm0
; AVX512F-32-NEXT:    setae %cl
; AVX512F-32-NEXT:    kmovw %ecx, %k1
; AVX512F-32-NEXT:    vmovsd %xmm2, %xmm2, %xmm1 {%k1} {z}
; AVX512F-32-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
; AVX512F-32-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    fldl (%esp)
; AVX512F-32-NEXT:    fisttpll (%esp)
; AVX512F-32-NEXT:    fldl {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    wait
; AVX512F-32-NEXT:    shll $31, %eax
; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT:    shll $31, %ecx
; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX512F-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
; AVX512F-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
; AVX512F-32-NEXT:    movl %ebp, %esp
; AVX512F-32-NEXT:    popl %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT:    retl
;
; AVX512F-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX512F-64:       # %bb.0:
; AVX512F-64-NEXT:    vcvttsd2usi %xmm0, %rax
; AVX512F-64-NEXT:    vmovq %rax, %xmm1
; AVX512F-64-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-64-NEXT:    vcvttsd2usi %xmm0, %rax
; AVX512F-64-NEXT:    vmovq %rax, %xmm0
; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-64-NEXT:    retq
;
; AVX512VL-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX512VL-32:       # %bb.0:
; AVX512VL-32-NEXT:    pushl %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
; AVX512VL-32-NEXT:    movl %esp, %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX512VL-32-NEXT:    andl $-8, %esp
; AVX512VL-32-NEXT:    subl $16, %esp
; AVX512VL-32-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512VL-32-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
; AVX512VL-32-NEXT:    xorl %eax, %eax
; AVX512VL-32-NEXT:    vcomisd %xmm2, %xmm1
; AVX512VL-32-NEXT:    setae %al
; AVX512VL-32-NEXT:    kmovw %eax, %k1
; AVX512VL-32-NEXT:    vmovsd %xmm2, %xmm2, %xmm3 {%k1} {z}
; AVX512VL-32-NEXT:    vsubsd %xmm3, %xmm1, %xmm1
; AVX512VL-32-NEXT:    vmovsd %xmm1, (%esp)
; AVX512VL-32-NEXT:    xorl %ecx, %ecx
; AVX512VL-32-NEXT:    vcomisd %xmm2, %xmm0
; AVX512VL-32-NEXT:    setae %cl
; AVX512VL-32-NEXT:    kmovw %ecx, %k1
; AVX512VL-32-NEXT:    vmovsd %xmm2, %xmm2, %xmm1 {%k1} {z}
; AVX512VL-32-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
; AVX512VL-32-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    fldl (%esp)
; AVX512VL-32-NEXT:    fisttpll (%esp)
; AVX512VL-32-NEXT:    fldl {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    wait
; AVX512VL-32-NEXT:    shll $31, %eax
; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT:    shll $31, %ecx
; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX512VL-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
; AVX512VL-32-NEXT:    movl %ebp, %esp
; AVX512VL-32-NEXT:    popl %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX512VL-32-NEXT:    retl
;
; AVX512VL-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX512VL-64:       # %bb.0:
; AVX512VL-64-NEXT:    vcvttsd2usi %xmm0, %rax
; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
; AVX512VL-64-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-64-NEXT:    vcvttsd2usi %xmm0, %rax
; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-64-NEXT:    retq
;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT:    vcvttpd2uqq %zmm0, %zmm0
; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttpd2uqq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i64> %ret
}

define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    pushl %ebp
; SSE-32-NEXT:    .cfi_def_cfa_offset 8
; SSE-32-NEXT:    .cfi_offset %ebp, -8
; SSE-32-NEXT:    movl %esp, %ebp
; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
; SSE-32-NEXT:    andl $-8, %esp
; SSE-32-NEXT:    subl $24, %esp
; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw (%esp)
; SSE-32-NEXT:    movzwl (%esp), %eax
; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw (%esp)
; SSE-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-32-NEXT:    movl %ebp, %esp
; SSE-32-NEXT:    popl %ebp
; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttss2si %xmm0, %rax
; SSE-64-NEXT:    movq %rax, %xmm1
; SSE-64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; SSE-64-NEXT:    cvttss2si %xmm0, %rax
; SSE-64-NEXT:    movq %rax, %xmm0
; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-64-NEXT:    movdqa %xmm1, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
; AVX-32:       # %bb.0:
; AVX-32-NEXT:    pushl %ebp
; AVX-32-NEXT:    .cfi_def_cfa_offset 8
; AVX-32-NEXT:    .cfi_offset %ebp, -8
; AVX-32-NEXT:    movl %esp, %ebp
; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX-32-NEXT:    andl $-8, %esp
; AVX-32-NEXT:    subl $16, %esp
; AVX-32-NEXT:    vmovss %xmm0, (%esp)
; AVX-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT:    flds (%esp)
; AVX-32-NEXT:    fisttpll (%esp)
; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    movl %ebp, %esp
; AVX-32-NEXT:    popl %ebp
; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX-32-NEXT:    retl
;
; AVX-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
; AVX-64:       # %bb.0:
; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
; AVX-64-NEXT:    vmovq %rax, %xmm1
; AVX-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
; AVX-64-NEXT:    vmovq %rax, %xmm0
; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-64-NEXT:    retq
;
; AVX512F-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
; AVX512F-32:       # %bb.0:
; AVX512F-32-NEXT:    pushl %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
; AVX512F-32-NEXT:    movl %esp, %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT:    andl $-8, %esp
; AVX512F-32-NEXT:    subl $16, %esp
; AVX512F-32-NEXT:    vmovd %xmm0, (%esp)
; AVX512F-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    flds (%esp)
; AVX512F-32-NEXT:    fisttpll (%esp)
; AVX512F-32-NEXT:    flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    wait
; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT:    movl %ebp, %esp
; AVX512F-32-NEXT:    popl %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT:    retl
;
; AVX512F-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
; AVX512F-64:       # %bb.0:
; AVX512F-64-NEXT:    vcvttss2si %xmm0, %rax
; AVX512F-64-NEXT:    vmovq %rax, %xmm1
; AVX512F-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512F-64-NEXT:    vcvttss2si %xmm0, %rax
; AVX512F-64-NEXT:    vmovq %rax, %xmm0
; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-64-NEXT:    retq
;
; AVX512VL-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
; AVX512VL-32:       # %bb.0:
; AVX512VL-32-NEXT:    pushl %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
; AVX512VL-32-NEXT:    movl %esp, %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX512VL-32-NEXT:    andl $-8, %esp
; AVX512VL-32-NEXT:    subl $16, %esp
; AVX512VL-32-NEXT:    vmovd %xmm0, (%esp)
; AVX512VL-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    flds (%esp)
; AVX512VL-32-NEXT:    fisttpll (%esp)
; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    wait
; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT:    movl %ebp, %esp
; AVX512VL-32-NEXT:    popl %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX512VL-32-NEXT:    retl
;
; AVX512VL-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
; AVX512VL-64:       # %bb.0:
; AVX512VL-64-NEXT:    vcvttss2si %xmm0, %rax
; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
; AVX512VL-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-64-NEXT:    vcvttss2si %xmm0, %rax
; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-64-NEXT:    retq
;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT:    vcvttps2qq %ymm0, %zmm0
; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttps2qq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i64> %ret
}

define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64_load128(ptr %x) strictfp {
; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    pushl %ebp
; SSE-32-NEXT:    .cfi_def_cfa_offset 8
; SSE-32-NEXT:    .cfi_offset %ebp, -8
; SSE-32-NEXT:    movl %esp, %ebp
; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
; SSE-32-NEXT:    andl $-8, %esp
; SSE-32-NEXT:    subl $24, %esp
; SSE-32-NEXT:    movl 8(%ebp), %eax
; SSE-32-NEXT:    movaps (%eax), %xmm0
; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw (%esp)
; SSE-32-NEXT:    movzwl (%esp), %eax
; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw (%esp)
; SSE-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-32-NEXT:    movl %ebp, %esp
; SSE-32-NEXT:    popl %ebp
; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    movaps (%rdi), %xmm1
; SSE-64-NEXT:    cvttss2si %xmm1, %rax
; SSE-64-NEXT:    movq %rax, %xmm0
; SSE-64-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
; SSE-64-NEXT:    cvttss2si %xmm1, %rax
; SSE-64-NEXT:    movq %rax, %xmm1
; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-64-NEXT:    retq
;
; AVX-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
; AVX-32:       # %bb.0:
; AVX-32-NEXT:    pushl %ebp
; AVX-32-NEXT:    .cfi_def_cfa_offset 8
; AVX-32-NEXT:    .cfi_offset %ebp, -8
; AVX-32-NEXT:    movl %esp, %ebp
; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX-32-NEXT:    andl $-8, %esp
; AVX-32-NEXT:    subl $16, %esp
; AVX-32-NEXT:    movl 8(%ebp), %eax
; AVX-32-NEXT:    vmovaps (%eax), %xmm0
; AVX-32-NEXT:    vmovss %xmm0, (%esp)
; AVX-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT:    flds (%esp)
; AVX-32-NEXT:    fisttpll (%esp)
; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    movl %ebp, %esp
; AVX-32-NEXT:    popl %ebp
; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX-32-NEXT:    retl
;
; AVX-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
; AVX-64:       # %bb.0:
; AVX-64-NEXT:    vcvttss2si 4(%rdi), %rax
; AVX-64-NEXT:    vmovq %rax, %xmm0
; AVX-64-NEXT:    vcvttss2si (%rdi), %rax
; AVX-64-NEXT:    vmovq %rax, %xmm1
; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-64-NEXT:    retq
;
; AVX512F-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
; AVX512F-32:       # %bb.0:
; AVX512F-32-NEXT:    pushl %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
; AVX512F-32-NEXT:    movl %esp, %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT:    andl $-8, %esp
; AVX512F-32-NEXT:    subl $16, %esp
; AVX512F-32-NEXT:    movl 8(%ebp), %eax
; AVX512F-32-NEXT:    vmovdqa (%eax), %xmm0
; AVX512F-32-NEXT:    vmovd %xmm0, (%esp)
; AVX512F-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    flds (%esp)
; AVX512F-32-NEXT:    fisttpll (%esp)
; AVX512F-32-NEXT:    flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    wait
; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT:    movl %ebp, %esp
; AVX512F-32-NEXT:    popl %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT:    retl
;
; AVX512F-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
; AVX512F-64:       # %bb.0:
; AVX512F-64-NEXT:    vcvttss2si 4(%rdi), %rax
; AVX512F-64-NEXT:    vmovq %rax, %xmm0
; AVX512F-64-NEXT:    vcvttss2si (%rdi), %rax
; AVX512F-64-NEXT:    vmovq %rax, %xmm1
; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-64-NEXT:    retq
;
; AVX512VL-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
; AVX512VL-32:       # %bb.0:
; AVX512VL-32-NEXT:    pushl %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
; AVX512VL-32-NEXT:    movl %esp, %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX512VL-32-NEXT:    andl $-8, %esp
; AVX512VL-32-NEXT:    subl $16, %esp
; AVX512VL-32-NEXT:    movl 8(%ebp), %eax
; AVX512VL-32-NEXT:    vmovdqa (%eax), %xmm0
; AVX512VL-32-NEXT:    vmovd %xmm0, (%esp)
; AVX512VL-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    flds (%esp)
; AVX512VL-32-NEXT:    fisttpll (%esp)
; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    wait
; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT:    movl %ebp, %esp
; AVX512VL-32-NEXT:    popl %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX512VL-32-NEXT:    retl
;
; AVX512VL-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
; AVX512VL-64:       # %bb.0:
; AVX512VL-64-NEXT:    vcvttss2si 4(%rdi), %rax
; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
; AVX512VL-64-NEXT:    vcvttss2si (%rdi), %rax
; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-64-NEXT:    retq
;
; AVX512DQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
; AVX512DQ-32:       # %bb.0:
; AVX512DQ-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
; AVX512DQ-32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX512DQ-32-NEXT:    vcvttps2qq %ymm0, %zmm0
; AVX512DQ-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-32-NEXT:    vzeroupper
; AVX512DQ-32-NEXT:    retl
;
; AVX512DQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
; AVX512DQ-64:       # %bb.0:
; AVX512DQ-64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX512DQ-64-NEXT:    vcvttps2qq %ymm0, %zmm0
; AVX512DQ-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-64-NEXT:    vzeroupper
; AVX512DQ-64-NEXT:    retq
;
; AVX512VLDQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
; AVX512VLDQ-32:       # %bb.0:
; AVX512VLDQ-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
; AVX512VLDQ-32-NEXT:    vcvttps2qq (%eax), %xmm0
; AVX512VLDQ-32-NEXT:    retl
;
; AVX512VLDQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
; AVX512VLDQ-64:       # %bb.0:
; AVX512VLDQ-64-NEXT:    vcvttps2qq (%rdi), %xmm0
; AVX512VLDQ-64-NEXT:    retq
  %a = load <4 x float>, ptr %x
  %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
  %c = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float> %b, metadata !"fpexcept.strict") #0
  ret <2 x i64> %c
}

define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    pushl %ebp
; SSE-32-NEXT:    .cfi_def_cfa_offset 8
; SSE-32-NEXT:    .cfi_offset %ebp, -8
; SSE-32-NEXT:    movl %esp, %ebp
; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
; SSE-32-NEXT:    andl $-8, %esp
; SSE-32-NEXT:    subl $24, %esp
; SSE-32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-32-NEXT:    comiss %xmm1, %xmm0
; SSE-32-NEXT:    movaps %xmm1, %xmm2
; SSE-32-NEXT:    jae .LBB4_2
; SSE-32-NEXT:  # %bb.1:
; SSE-32-NEXT:    xorps %xmm2, %xmm2
; SSE-32-NEXT:  .LBB4_2:
; SSE-32-NEXT:    movaps %xmm0, %xmm3
; SSE-32-NEXT:    subss %xmm2, %xmm3
; SSE-32-NEXT:    movss %xmm3, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    setae %al
; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
; SSE-32-NEXT:    orl $3072, %ecx # imm = 0xC00
; SSE-32-NEXT:    movw %cx, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; SSE-32-NEXT:    comiss %xmm1, %xmm0
; SSE-32-NEXT:    jae .LBB4_4
; SSE-32-NEXT:  # %bb.3:
; SSE-32-NEXT:    xorps %xmm1, %xmm1
; SSE-32-NEXT:  .LBB4_4:
; SSE-32-NEXT:    subss %xmm1, %xmm0
; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    setae %cl
; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw (%esp)
; SSE-32-NEXT:    movzwl (%esp), %edx
; SSE-32-NEXT:    orl $3072, %edx # imm = 0xC00
; SSE-32-NEXT:    movw %dx, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw (%esp)
; SSE-32-NEXT:    movzbl %al, %eax
; SSE-32-NEXT:    shll $31, %eax
; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT:    movd %eax, %xmm1
; SSE-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-32-NEXT:    movzbl %cl, %eax
; SSE-32-NEXT:    shll $31, %eax
; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT:    movd %eax, %xmm1
; SSE-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE-32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE-32-NEXT:    movl %ebp, %esp
; SSE-32-NEXT:    popl %ebp
; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE-64-NEXT:    comiss %xmm3, %xmm0
; SSE-64-NEXT:    xorps %xmm2, %xmm2
; SSE-64-NEXT:    xorps %xmm1, %xmm1
; SSE-64-NEXT:    jb .LBB4_2
; SSE-64-NEXT:  # %bb.1:
; SSE-64-NEXT:    movaps %xmm3, %xmm1
; SSE-64-NEXT:  .LBB4_2:
; SSE-64-NEXT:    movaps %xmm0, %xmm4
; SSE-64-NEXT:    subss %xmm1, %xmm4
; SSE-64-NEXT:    cvttss2si %xmm4, %rax
; SSE-64-NEXT:    setae %cl
; SSE-64-NEXT:    movzbl %cl, %ecx
; SSE-64-NEXT:    shlq $63, %rcx
; SSE-64-NEXT:    xorq %rax, %rcx
; SSE-64-NEXT:    movq %rcx, %xmm1
; SSE-64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; SSE-64-NEXT:    comiss %xmm3, %xmm0
; SSE-64-NEXT:    jb .LBB4_4
; SSE-64-NEXT:  # %bb.3:
; SSE-64-NEXT:    movaps %xmm3, %xmm2
; SSE-64-NEXT:  .LBB4_4:
; SSE-64-NEXT:    subss %xmm2, %xmm0
; SSE-64-NEXT:    cvttss2si %xmm0, %rax
; SSE-64-NEXT:    setae %cl
; SSE-64-NEXT:    movzbl %cl, %ecx
; SSE-64-NEXT:    shlq $63, %rcx
; SSE-64-NEXT:    xorq %rax, %rcx
; SSE-64-NEXT:    movq %rcx, %xmm0
; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-64-NEXT:    movdqa %xmm1, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; AVX-32:       # %bb.0:
; AVX-32-NEXT:    pushl %ebp
; AVX-32-NEXT:    .cfi_def_cfa_offset 8
; AVX-32-NEXT:    .cfi_offset %ebp, -8
; AVX-32-NEXT:    movl %esp, %ebp
; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX-32-NEXT:    andl $-8, %esp
; AVX-32-NEXT:    subl $16, %esp
; AVX-32-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vcomiss %xmm1, %xmm2
; AVX-32-NEXT:    vmovaps %xmm1, %xmm3
; AVX-32-NEXT:    jae .LBB4_2
; AVX-32-NEXT:  # %bb.1:
; AVX-32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
; AVX-32-NEXT:  .LBB4_2:
; AVX-32-NEXT:    vsubss %xmm3, %xmm2, %xmm2
; AVX-32-NEXT:    vmovss %xmm2, {{[0-9]+}}(%esp)
; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    setae %al
; AVX-32-NEXT:    movzbl %al, %eax
; AVX-32-NEXT:    shll $31, %eax
; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT:    vcomiss %xmm1, %xmm0
; AVX-32-NEXT:    jae .LBB4_4
; AVX-32-NEXT:  # %bb.3:
; AVX-32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; AVX-32-NEXT:  .LBB4_4:
; AVX-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
; AVX-32-NEXT:    vmovss %xmm0, (%esp)
; AVX-32-NEXT:    flds (%esp)
; AVX-32-NEXT:    fisttpll (%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    setae %cl
; AVX-32-NEXT:    movzbl %cl, %ecx
; AVX-32-NEXT:    shll $31, %ecx
; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
; AVX-32-NEXT:    movl %ebp, %esp
; AVX-32-NEXT:    popl %ebp
; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX-32-NEXT:    retl
;
; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; AVX-64:       # %bb.0:
; AVX-64-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-64-NEXT:    vcomiss %xmm1, %xmm0
; AVX-64-NEXT:    vxorps %xmm2, %xmm2, %xmm2
; AVX-64-NEXT:    vxorps %xmm3, %xmm3, %xmm3
; AVX-64-NEXT:    jb .LBB4_2
; AVX-64-NEXT:  # %bb.1:
; AVX-64-NEXT:    vmovaps %xmm1, %xmm3
; AVX-64-NEXT:  .LBB4_2:
; AVX-64-NEXT:    vsubss %xmm3, %xmm0, %xmm3
; AVX-64-NEXT:    vcvttss2si %xmm3, %rax
; AVX-64-NEXT:    setae %cl
; AVX-64-NEXT:    movzbl %cl, %ecx
; AVX-64-NEXT:    shlq $63, %rcx
; AVX-64-NEXT:    xorq %rax, %rcx
; AVX-64-NEXT:    vmovq %rcx, %xmm3
; AVX-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX-64-NEXT:    vcomiss %xmm1, %xmm0
; AVX-64-NEXT:    jb .LBB4_4
; AVX-64-NEXT:  # %bb.3:
; AVX-64-NEXT:    vmovaps %xmm1, %xmm2
; AVX-64-NEXT:  .LBB4_4:
; AVX-64-NEXT:    vsubss %xmm2, %xmm0, %xmm0
; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
; AVX-64-NEXT:    setae %cl
; AVX-64-NEXT:    movzbl %cl, %ecx
; AVX-64-NEXT:    shlq $63, %rcx
; AVX-64-NEXT:    xorq %rax, %rcx
; AVX-64-NEXT:    vmovq %rcx, %xmm0
; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT:    retq
;
; AVX512F-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; AVX512F-32:       # %bb.0:
; AVX512F-32-NEXT:    pushl %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
; AVX512F-32-NEXT:    movl %esp, %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT:    andl $-8, %esp
; AVX512F-32-NEXT:    subl $16, %esp
; AVX512F-32-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512F-32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX512F-32-NEXT:    xorl %eax, %eax
; AVX512F-32-NEXT:    vcomiss %xmm2, %xmm1
; AVX512F-32-NEXT:    setae %al
; AVX512F-32-NEXT:    kmovw %eax, %k1
; AVX512F-32-NEXT:    vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
; AVX512F-32-NEXT:    vsubss %xmm3, %xmm1, %xmm1
; AVX512F-32-NEXT:    vmovss %xmm1, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    xorl %ecx, %ecx
; AVX512F-32-NEXT:    vcomiss %xmm2, %xmm0
; AVX512F-32-NEXT:    setae %cl
; AVX512F-32-NEXT:    kmovw %ecx, %k1
; AVX512F-32-NEXT:    vmovss %xmm2, %xmm2, %xmm1 {%k1} {z}
; AVX512F-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
; AVX512F-32-NEXT:    vmovss %xmm0, (%esp)
; AVX512F-32-NEXT:    flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    flds (%esp)
; AVX512F-32-NEXT:    fisttpll (%esp)
; AVX512F-32-NEXT:    wait
; AVX512F-32-NEXT:    shll $31, %eax
; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT:    shll $31, %ecx
; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX512F-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
; AVX512F-32-NEXT:    movl %ebp, %esp
; AVX512F-32-NEXT:    popl %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT:    retl
;
; AVX512F-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; AVX512F-64:       # %bb.0:
; AVX512F-64-NEXT:    vcvttss2usi %xmm0, %rax
; AVX512F-64-NEXT:    vmovq %rax, %xmm1
; AVX512F-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512F-64-NEXT:    vcvttss2usi %xmm0, %rax
; AVX512F-64-NEXT:    vmovq %rax, %xmm0
; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-64-NEXT:    retq
;
; AVX512VL-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; AVX512VL-32:       # %bb.0:
; AVX512VL-32-NEXT:    pushl %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
; AVX512VL-32-NEXT:    movl %esp, %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX512VL-32-NEXT:    andl $-8, %esp
; AVX512VL-32-NEXT:    subl $16, %esp
; AVX512VL-32-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512VL-32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT:    xorl %eax, %eax
; AVX512VL-32-NEXT:    vcomiss %xmm2, %xmm1
; AVX512VL-32-NEXT:    setae %al
; AVX512VL-32-NEXT:    kmovw %eax, %k1
; AVX512VL-32-NEXT:    vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
; AVX512VL-32-NEXT:    vsubss %xmm3, %xmm1, %xmm1
; AVX512VL-32-NEXT:    vmovss %xmm1, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    xorl %ecx, %ecx
; AVX512VL-32-NEXT:    vcomiss %xmm2, %xmm0
; AVX512VL-32-NEXT:    setae %cl
; AVX512VL-32-NEXT:    kmovw %ecx, %k1
; AVX512VL-32-NEXT:    vmovss %xmm2, %xmm2, %xmm1 {%k1} {z}
; AVX512VL-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
; AVX512VL-32-NEXT:    vmovss %xmm0, (%esp)
; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    flds (%esp)
; AVX512VL-32-NEXT:    fisttpll (%esp)
; AVX512VL-32-NEXT:    wait
; AVX512VL-32-NEXT:    shll $31, %eax
; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT:    shll $31, %ecx
; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
; AVX512VL-32-NEXT:    movl %ebp, %esp
; AVX512VL-32-NEXT:    popl %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX512VL-32-NEXT:    retl
;
; AVX512VL-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; AVX512VL-64:       # %bb.0:
; AVX512VL-64-NEXT:    vcvttss2usi %xmm0, %rax
; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
; AVX512VL-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-64-NEXT:    vcvttss2usi %xmm0, %rax
; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-64-NEXT:    retq
;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT:    vcvttps2uqq %ymm0, %zmm0
; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttps2uqq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i64> %ret
}

define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64_load128(ptr %x) strictfp {
; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    pushl %ebp
; SSE-32-NEXT:    .cfi_def_cfa_offset 8
; SSE-32-NEXT:    .cfi_offset %ebp, -8
; SSE-32-NEXT:    movl %esp, %ebp
; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
; SSE-32-NEXT:    andl $-8, %esp
; SSE-32-NEXT:    subl $24, %esp
; SSE-32-NEXT:    movl 8(%ebp), %eax
; SSE-32-NEXT:    movaps (%eax), %xmm0
; SSE-32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-32-NEXT:    comiss %xmm1, %xmm0
; SSE-32-NEXT:    movaps %xmm1, %xmm2
; SSE-32-NEXT:    jae .LBB5_2
; SSE-32-NEXT:  # %bb.1:
; SSE-32-NEXT:    xorps %xmm2, %xmm2
; SSE-32-NEXT:  .LBB5_2:
; SSE-32-NEXT:    movaps %xmm0, %xmm3
; SSE-32-NEXT:    subss %xmm2, %xmm3
; SSE-32-NEXT:    movss %xmm3, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    setae %al
; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
; SSE-32-NEXT:    orl $3072, %ecx # imm = 0xC00
; SSE-32-NEXT:    movw %cx, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; SSE-32-NEXT:    comiss %xmm1, %xmm0
; SSE-32-NEXT:    jae .LBB5_4
; SSE-32-NEXT:  # %bb.3:
; SSE-32-NEXT:    xorps %xmm1, %xmm1
; SSE-32-NEXT:  .LBB5_4:
; SSE-32-NEXT:    subss %xmm1, %xmm0
; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    setae %cl
; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw (%esp)
; SSE-32-NEXT:    movzwl (%esp), %edx
; SSE-32-NEXT:    orl $3072, %edx # imm = 0xC00
; SSE-32-NEXT:    movw %dx, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw (%esp)
; SSE-32-NEXT:    movzbl %al, %eax
; SSE-32-NEXT:    shll $31, %eax
; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT:    movd %eax, %xmm1
; SSE-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-32-NEXT:    movzbl %cl, %eax
; SSE-32-NEXT:    shll $31, %eax
; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT:    movd %eax, %xmm1
; SSE-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE-32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE-32-NEXT:    movl %ebp, %esp
; SSE-32-NEXT:    popl %ebp
; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    movaps (%rdi), %xmm1
; SSE-64-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE-64-NEXT:    comiss %xmm3, %xmm1
; SSE-64-NEXT:    xorps %xmm2, %xmm2
; SSE-64-NEXT:    xorps %xmm0, %xmm0
; SSE-64-NEXT:    jb .LBB5_2
; SSE-64-NEXT:  # %bb.1:
; SSE-64-NEXT:    movaps %xmm3, %xmm0
; SSE-64-NEXT:  .LBB5_2:
; SSE-64-NEXT:    movaps %xmm1, %xmm4
; SSE-64-NEXT:    subss %xmm0, %xmm4
; SSE-64-NEXT:    cvttss2si %xmm4, %rax
; SSE-64-NEXT:    setae %cl
; SSE-64-NEXT:    movzbl %cl, %ecx
; SSE-64-NEXT:    shlq $63, %rcx
; SSE-64-NEXT:    xorq %rax, %rcx
; SSE-64-NEXT:    movq %rcx, %xmm0
; SSE-64-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
; SSE-64-NEXT:    comiss %xmm3, %xmm1
; SSE-64-NEXT:    jb .LBB5_4
; SSE-64-NEXT:  # %bb.3:
; SSE-64-NEXT:    movaps %xmm3, %xmm2
; SSE-64-NEXT:  .LBB5_4:
; SSE-64-NEXT:    subss %xmm2, %xmm1
; SSE-64-NEXT:    cvttss2si %xmm1, %rax
; SSE-64-NEXT:    setae %cl
; SSE-64-NEXT:    movzbl %cl, %ecx
; SSE-64-NEXT:    shlq $63, %rcx
; SSE-64-NEXT:    xorq %rax, %rcx
; SSE-64-NEXT:    movq %rcx, %xmm1
; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-64-NEXT:    retq
;
; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
; AVX-32:       # %bb.0:
; AVX-32-NEXT:    pushl %ebp
; AVX-32-NEXT:    .cfi_def_cfa_offset 8
; AVX-32-NEXT:    .cfi_offset %ebp, -8
; AVX-32-NEXT:    movl %esp, %ebp
; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX-32-NEXT:    andl $-8, %esp
; AVX-32-NEXT:    subl $16, %esp
; AVX-32-NEXT:    movl 8(%ebp), %eax
; AVX-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vcomiss %xmm1, %xmm2
; AVX-32-NEXT:    vmovaps %xmm1, %xmm3
; AVX-32-NEXT:    jae .LBB5_2
; AVX-32-NEXT:  # %bb.1:
; AVX-32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
; AVX-32-NEXT:  .LBB5_2:
; AVX-32-NEXT:    vsubss %xmm3, %xmm2, %xmm2
; AVX-32-NEXT:    vmovss %xmm2, {{[0-9]+}}(%esp)
; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    setae %al
; AVX-32-NEXT:    movzbl %al, %eax
; AVX-32-NEXT:    shll $31, %eax
; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT:    vcomiss %xmm1, %xmm0
; AVX-32-NEXT:    jae .LBB5_4
; AVX-32-NEXT:  # %bb.3:
; AVX-32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; AVX-32-NEXT:  .LBB5_4:
; AVX-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
; AVX-32-NEXT:    vmovss %xmm0, (%esp)
; AVX-32-NEXT:    flds (%esp)
; AVX-32-NEXT:    fisttpll (%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    setae %cl
; AVX-32-NEXT:    movzbl %cl, %ecx
; AVX-32-NEXT:    shll $31, %ecx
; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
; AVX-32-NEXT:    movl %ebp, %esp
; AVX-32-NEXT:    popl %ebp
; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX-32-NEXT:    retl
;
; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
; AVX-64:       # %bb.0:
; AVX-64-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-64-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; AVX-64-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-64-NEXT:    vcomiss %xmm1, %xmm3
; AVX-64-NEXT:    vxorps %xmm2, %xmm2, %xmm2
; AVX-64-NEXT:    vxorps %xmm4, %xmm4, %xmm4
; AVX-64-NEXT:    jb .LBB5_2
; AVX-64-NEXT:  # %bb.1:
; AVX-64-NEXT:    vmovaps %xmm1, %xmm4
; AVX-64-NEXT:  .LBB5_2:
; AVX-64-NEXT:    vsubss %xmm4, %xmm3, %xmm3
; AVX-64-NEXT:    vcvttss2si %xmm3, %rax
; AVX-64-NEXT:    setae %cl
; AVX-64-NEXT:    movzbl %cl, %ecx
; AVX-64-NEXT:    shlq $63, %rcx
; AVX-64-NEXT:    xorq %rax, %rcx
; AVX-64-NEXT:    vmovq %rcx, %xmm3
; AVX-64-NEXT:    vcomiss %xmm1, %xmm0
; AVX-64-NEXT:    jb .LBB5_4
; AVX-64-NEXT:  # %bb.3:
; AVX-64-NEXT:    vmovaps %xmm1, %xmm2
; AVX-64-NEXT:  .LBB5_4:
; AVX-64-NEXT:    vsubss %xmm2, %xmm0, %xmm0
; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
; AVX-64-NEXT:    setae %cl
; AVX-64-NEXT:    movzbl %cl, %ecx
; AVX-64-NEXT:    shlq $63, %rcx
; AVX-64-NEXT:    xorq %rax, %rcx
; AVX-64-NEXT:    vmovq %rcx, %xmm0
; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; AVX-64-NEXT:    retq
;
; AVX512F-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
; AVX512F-32:       # %bb.0:
; AVX512F-32-NEXT:    pushl %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
; AVX512F-32-NEXT:    movl %esp, %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX512F-32-NEXT:    andl $-8, %esp
; AVX512F-32-NEXT:    subl $16, %esp
; AVX512F-32-NEXT:    movl 8(%ebp), %eax
; AVX512F-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX512F-32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX512F-32-NEXT:    xorl %eax, %eax
; AVX512F-32-NEXT:    vcomiss %xmm2, %xmm1
; AVX512F-32-NEXT:    setae %al
; AVX512F-32-NEXT:    kmovw %eax, %k1
; AVX512F-32-NEXT:    vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
; AVX512F-32-NEXT:    vsubss %xmm3, %xmm1, %xmm1
; AVX512F-32-NEXT:    vmovss %xmm1, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    xorl %ecx, %ecx
; AVX512F-32-NEXT:    vcomiss %xmm2, %xmm0
; AVX512F-32-NEXT:    setae %cl
; AVX512F-32-NEXT:    kmovw %ecx, %k1
; AVX512F-32-NEXT:    vmovss %xmm2, %xmm2, %xmm1 {%k1} {z}
; AVX512F-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
; AVX512F-32-NEXT:    vmovss %xmm0, (%esp)
; AVX512F-32-NEXT:    flds {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX512F-32-NEXT:    flds (%esp)
; AVX512F-32-NEXT:    fisttpll (%esp)
; AVX512F-32-NEXT:    wait
; AVX512F-32-NEXT:    shll $31, %eax
; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT:    shll $31, %ecx
; AVX512F-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512F-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX512F-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512F-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
; AVX512F-32-NEXT:    movl %ebp, %esp
; AVX512F-32-NEXT:    popl %ebp
; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX512F-32-NEXT:    retl
;
; AVX512F-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
; AVX512F-64:       # %bb.0:
; AVX512F-64-NEXT:    vcvttss2usi 4(%rdi), %rax
; AVX512F-64-NEXT:    vmovq %rax, %xmm0
; AVX512F-64-NEXT:    vcvttss2usi (%rdi), %rax
; AVX512F-64-NEXT:    vmovq %rax, %xmm1
; AVX512F-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-64-NEXT:    retq
;
; AVX512VL-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
; AVX512VL-32:       # %bb.0:
; AVX512VL-32-NEXT:    pushl %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa_offset 8
; AVX512VL-32-NEXT:    .cfi_offset %ebp, -8
; AVX512VL-32-NEXT:    movl %esp, %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX512VL-32-NEXT:    andl $-8, %esp
; AVX512VL-32-NEXT:    subl $16, %esp
; AVX512VL-32-NEXT:    movl 8(%ebp), %eax
; AVX512VL-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT:    xorl %eax, %eax
; AVX512VL-32-NEXT:    vcomiss %xmm2, %xmm1
; AVX512VL-32-NEXT:    setae %al
; AVX512VL-32-NEXT:    kmovw %eax, %k1
; AVX512VL-32-NEXT:    vmovss %xmm2, %xmm2, %xmm3 {%k1} {z}
; AVX512VL-32-NEXT:    vsubss %xmm3, %xmm1, %xmm1
; AVX512VL-32-NEXT:    vmovss %xmm1, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    xorl %ecx, %ecx
; AVX512VL-32-NEXT:    vcomiss %xmm2, %xmm0
; AVX512VL-32-NEXT:    setae %cl
; AVX512VL-32-NEXT:    kmovw %ecx, %k1
; AVX512VL-32-NEXT:    vmovss %xmm2, %xmm2, %xmm1 {%k1} {z}
; AVX512VL-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
; AVX512VL-32-NEXT:    vmovss %xmm0, (%esp)
; AVX512VL-32-NEXT:    flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT:    flds (%esp)
; AVX512VL-32-NEXT:    fisttpll (%esp)
; AVX512VL-32-NEXT:    wait
; AVX512VL-32-NEXT:    shll $31, %eax
; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT:    shll $31, %ecx
; AVX512VL-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512VL-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX512VL-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512VL-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
; AVX512VL-32-NEXT:    movl %ebp, %esp
; AVX512VL-32-NEXT:    popl %ebp
; AVX512VL-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX512VL-32-NEXT:    retl
;
; AVX512VL-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
; AVX512VL-64:       # %bb.0:
; AVX512VL-64-NEXT:    vcvttss2usi 4(%rdi), %rax
; AVX512VL-64-NEXT:    vmovq %rax, %xmm0
; AVX512VL-64-NEXT:    vcvttss2usi (%rdi), %rax
; AVX512VL-64-NEXT:    vmovq %rax, %xmm1
; AVX512VL-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-64-NEXT:    retq
;
; AVX512DQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
; AVX512DQ-32:       # %bb.0:
; AVX512DQ-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
; AVX512DQ-32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX512DQ-32-NEXT:    vcvttps2uqq %ymm0, %zmm0
; AVX512DQ-32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-32-NEXT:    vzeroupper
; AVX512DQ-32-NEXT:    retl
;
; AVX512DQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
; AVX512DQ-64:       # %bb.0:
; AVX512DQ-64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX512DQ-64-NEXT:    vcvttps2uqq %ymm0, %zmm0
; AVX512DQ-64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-64-NEXT:    vzeroupper
; AVX512DQ-64-NEXT:    retq
;
; AVX512VLDQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
; AVX512VLDQ-32:       # %bb.0:
; AVX512VLDQ-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
; AVX512VLDQ-32-NEXT:    vcvttps2uqq (%eax), %xmm0
; AVX512VLDQ-32-NEXT:    retl
;
; AVX512VLDQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
; AVX512VLDQ-64:       # %bb.0:
; AVX512VLDQ-64-NEXT:    vcvttps2uqq (%rdi), %xmm0
; AVX512VLDQ-64-NEXT:    retq
  %a = load <4 x float>, ptr %x
  %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
  %c = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float> %b, metadata !"fpexcept.strict") #0
  ret <2 x i64> %c
}

define <2 x i32> @strict_vector_fptosi_v2f64_to_v2i32(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    cvttpd2dq %xmm0, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttpd2dq %xmm0, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i32> %ret
}

define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
; SSE-32-NEXT:    comisd %xmm3, %xmm0
; SSE-32-NEXT:    xorpd %xmm2, %xmm2
; SSE-32-NEXT:    xorpd %xmm1, %xmm1
; SSE-32-NEXT:    jb .LBB7_2
; SSE-32-NEXT:  # %bb.1:
; SSE-32-NEXT:    movapd %xmm3, %xmm1
; SSE-32-NEXT:  .LBB7_2:
; SSE-32-NEXT:    setae %al
; SSE-32-NEXT:    movzbl %al, %eax
; SSE-32-NEXT:    shll $31, %eax
; SSE-32-NEXT:    movapd %xmm0, %xmm4
; SSE-32-NEXT:    subsd %xmm1, %xmm4
; SSE-32-NEXT:    cvttsd2si %xmm4, %ecx
; SSE-32-NEXT:    xorl %eax, %ecx
; SSE-32-NEXT:    movd %ecx, %xmm1
; SSE-32-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-32-NEXT:    comisd %xmm3, %xmm0
; SSE-32-NEXT:    jb .LBB7_4
; SSE-32-NEXT:  # %bb.3:
; SSE-32-NEXT:    movapd %xmm3, %xmm2
; SSE-32-NEXT:  .LBB7_4:
; SSE-32-NEXT:    setae %al
; SSE-32-NEXT:    movzbl %al, %eax
; SSE-32-NEXT:    shll $31, %eax
; SSE-32-NEXT:    subsd %xmm2, %xmm0
; SSE-32-NEXT:    cvttsd2si %xmm0, %ecx
; SSE-32-NEXT:    xorl %eax, %ecx
; SSE-32-NEXT:    movd %ecx, %xmm0
; SSE-32-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE-32-NEXT:    movdqa %xmm1, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
; SSE-64-NEXT:    movd %eax, %xmm1
; SSE-64-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
; SSE-64-NEXT:    movd %eax, %xmm0
; SSE-64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE-64-NEXT:    movdqa %xmm1, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-32-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
; AVX-32:       # %bb.0:
; AVX-32-NEXT:    pushl %ebp
; AVX-32-NEXT:    .cfi_def_cfa_offset 8
; AVX-32-NEXT:    .cfi_offset %ebp, -8
; AVX-32-NEXT:    movl %esp, %ebp
; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX-32-NEXT:    andl $-8, %esp
; AVX-32-NEXT:    subl $16, %esp
; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT:    vmovhps %xmm0, (%esp)
; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fldl (%esp)
; AVX-32-NEXT:    fisttpll (%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vpinsrd $1, (%esp), %xmm0, %xmm0
; AVX-32-NEXT:    movl %ebp, %esp
; AVX-32-NEXT:    popl %ebp
; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX-32-NEXT:    retl
;
; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
; AVX-64:       # %bb.0:
; AVX-64-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-64-NEXT:    vcvttsd2si %xmm1, %rax
; AVX-64-NEXT:    vcvttsd2si %xmm0, %rcx
; AVX-64-NEXT:    vmovd %ecx, %xmm0
; AVX-64-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
; AVX-64-NEXT:    retq
;
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vmovaps %xmm0, %xmm0
; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttpd2udq %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttpd2udq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i32> %ret
}

define <2 x i32> @strict_vector_fptosi_v2f32_to_v2i32(<2 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i32:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i32> %ret
}

define <2 x i32> @strict_vector_fptoui_v2f32_to_v2i32(<2 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE-32-NEXT:    comiss %xmm3, %xmm0
; SSE-32-NEXT:    xorps %xmm2, %xmm2
; SSE-32-NEXT:    xorps %xmm1, %xmm1
; SSE-32-NEXT:    jb .LBB9_2
; SSE-32-NEXT:  # %bb.1:
; SSE-32-NEXT:    movaps %xmm3, %xmm1
; SSE-32-NEXT:  .LBB9_2:
; SSE-32-NEXT:    setae %al
; SSE-32-NEXT:    movzbl %al, %eax
; SSE-32-NEXT:    shll $31, %eax
; SSE-32-NEXT:    movaps %xmm0, %xmm4
; SSE-32-NEXT:    subss %xmm1, %xmm4
; SSE-32-NEXT:    cvttss2si %xmm4, %ecx
; SSE-32-NEXT:    xorl %eax, %ecx
; SSE-32-NEXT:    movd %ecx, %xmm1
; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; SSE-32-NEXT:    comiss %xmm3, %xmm0
; SSE-32-NEXT:    jb .LBB9_4
; SSE-32-NEXT:  # %bb.3:
; SSE-32-NEXT:    movaps %xmm3, %xmm2
; SSE-32-NEXT:  .LBB9_4:
; SSE-32-NEXT:    setae %al
; SSE-32-NEXT:    movzbl %al, %eax
; SSE-32-NEXT:    shll $31, %eax
; SSE-32-NEXT:    subss %xmm2, %xmm0
; SSE-32-NEXT:    cvttss2si %xmm0, %ecx
; SSE-32-NEXT:    xorl %eax, %ecx
; SSE-32-NEXT:    movd %ecx, %xmm0
; SSE-32-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE-32-NEXT:    movdqa %xmm1, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttss2si %xmm0, %rax
; SSE-64-NEXT:    movd %eax, %xmm1
; SSE-64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; SSE-64-NEXT:    cvttss2si %xmm0, %rax
; SSE-64-NEXT:    movd %eax, %xmm0
; SSE-64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE-64-NEXT:    movdqa %xmm1, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
; AVX-32:       # %bb.0:
; AVX-32-NEXT:    pushl %ebp
; AVX-32-NEXT:    .cfi_def_cfa_offset 8
; AVX-32-NEXT:    .cfi_offset %ebp, -8
; AVX-32-NEXT:    movl %esp, %ebp
; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX-32-NEXT:    andl $-8, %esp
; AVX-32-NEXT:    subl $16, %esp
; AVX-32-NEXT:    vmovss %xmm0, (%esp)
; AVX-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT:    flds (%esp)
; AVX-32-NEXT:    fisttpll (%esp)
; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    movl %ebp, %esp
; AVX-32-NEXT:    popl %ebp
; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX-32-NEXT:    retl
;
; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
; AVX-64:       # %bb.0:
; AVX-64-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-64-NEXT:    vcvttss2si %xmm1, %rax
; AVX-64-NEXT:    vcvttss2si %xmm0, %rcx
; AVX-64-NEXT:    vmovd %ecx, %xmm0
; AVX-64-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
; AVX-64-NEXT:    retq
;
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT:    vcvttps2udq %zmm0, %zmm0
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT:    vcvttps2udq %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT:    vcvttps2udq %zmm0, %zmm0
; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VLDQ-NEXT:    vcvttps2udq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i32> %ret
}

define <2 x i16> @strict_vector_fptosi_v2f64_to_v2i16(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    cvttpd2dq %xmm0, %xmm0
; SSE-32-NEXT:    packssdw %xmm0, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttpd2dq %xmm0, %xmm0
; SSE-64-NEXT:    packssdw %xmm0, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512VL-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i16> %ret
}

define <2 x i16> @strict_vector_fptoui_v2f64_to_v2i16(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    cvttpd2dq %xmm0, %xmm0
; SSE-32-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttpd2dq %xmm0, %xmm0
; SSE-64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512VL-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i16> %ret
}

define <2 x i16> @strict_vector_fptosi_v2f32_to_v2i16(<2 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-32-NEXT:    packssdw %xmm0, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-64-NEXT:    packssdw %xmm0, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i16> %ret
}

define <2 x i16> @strict_vector_fptoui_v2f32_to_v2i16(<2 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-32-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i16> %ret
}

define <2 x i8> @strict_vector_fptosi_v2f64_to_v2i8(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    cvttpd2dq %xmm0, %xmm0
; SSE-32-NEXT:    packssdw %xmm0, %xmm0
; SSE-32-NEXT:    packsswb %xmm0, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttpd2dq %xmm0, %xmm0
; SSE-64-NEXT:    packssdw %xmm0, %xmm0
; SSE-64-NEXT:    packsswb %xmm0, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f64(<2 x double> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i8> %ret
}

define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    cvttpd2dq %xmm0, %xmm0
; SSE-32-NEXT:    packuswb %xmm0, %xmm0
; SSE-32-NEXT:    packuswb %xmm0, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttpd2dq %xmm0, %xmm0
; SSE-64-NEXT:    packuswb %xmm0, %xmm0
; SSE-64-NEXT:    packuswb %xmm0, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f64(<2 x double> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i8> %ret
}

define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-32-NEXT:    packssdw %xmm0, %xmm0
; SSE-32-NEXT:    packsswb %xmm0, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-64-NEXT:    packssdw %xmm0, %xmm0
; SSE-64-NEXT:    packsswb %xmm0, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f32(<2 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i8> %ret
}

define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-32-NEXT:    packuswb %xmm0, %xmm0
; SSE-32-NEXT:    packuswb %xmm0, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-64-NEXT:    packuswb %xmm0, %xmm0
; SSE-64-NEXT:    packuswb %xmm0, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f32(<2 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i8> %ret
}

define <2 x i1> @strict_vector_fptosi_v2f64_to_v2i1(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    pushl %ebp
; SSE-32-NEXT:    .cfi_def_cfa_offset 8
; SSE-32-NEXT:    .cfi_offset %ebp, -8
; SSE-32-NEXT:    movl %esp, %ebp
; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
; SSE-32-NEXT:    andl $-8, %esp
; SSE-32-NEXT:    subl $24, %esp
; SSE-32-NEXT:    movhps %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw (%esp)
; SSE-32-NEXT:    movzwl (%esp), %eax
; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw (%esp)
; SSE-32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-32-NEXT:    movl %ebp, %esp
; SSE-32-NEXT:    popl %ebp
; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
; SSE-64-NEXT:    movq %rax, %xmm1
; SSE-64-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
; SSE-64-NEXT:    movq %rax, %xmm0
; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-64-NEXT:    movdqa %xmm1, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-32-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
; AVX-32:       # %bb.0:
; AVX-32-NEXT:    pushl %ebp
; AVX-32-NEXT:    .cfi_def_cfa_offset 8
; AVX-32-NEXT:    .cfi_offset %ebp, -8
; AVX-32-NEXT:    movl %esp, %ebp
; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX-32-NEXT:    andl $-8, %esp
; AVX-32-NEXT:    subl $16, %esp
; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT:    vmovhps %xmm0, (%esp)
; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fldl (%esp)
; AVX-32-NEXT:    fisttpll (%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    movl %ebp, %esp
; AVX-32-NEXT:    popl %ebp
; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX-32-NEXT:    retl
;
; AVX-64-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
; AVX-64:       # %bb.0:
; AVX-64-NEXT:    vcvttsd2si %xmm0, %rax
; AVX-64-NEXT:    vmovq %rax, %xmm1
; AVX-64-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-64-NEXT:    vcvttsd2si %xmm0, %rax
; AVX-64-NEXT:    vmovq %rax, %xmm0
; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-64-NEXT:    retq
;
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
; AVX512VL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i1:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttpd2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpmovd2m %xmm0, %k0
; AVX512VLDQ-NEXT:    vpmovm2q %k0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f64(<2 x double> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i1> %ret
}

define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    pushl %ebp
; SSE-32-NEXT:    .cfi_def_cfa_offset 8
; SSE-32-NEXT:    .cfi_offset %ebp, -8
; SSE-32-NEXT:    movl %esp, %ebp
; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
; SSE-32-NEXT:    andl $-8, %esp
; SSE-32-NEXT:    subl $24, %esp
; SSE-32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-32-NEXT:    comisd %xmm1, %xmm0
; SSE-32-NEXT:    movapd %xmm1, %xmm2
; SSE-32-NEXT:    jae .LBB19_2
; SSE-32-NEXT:  # %bb.1:
; SSE-32-NEXT:    xorpd %xmm2, %xmm2
; SSE-32-NEXT:  .LBB19_2:
; SSE-32-NEXT:    movapd %xmm0, %xmm3
; SSE-32-NEXT:    subsd %xmm2, %xmm3
; SSE-32-NEXT:    movsd %xmm3, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    setae %al
; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
; SSE-32-NEXT:    orl $3072, %ecx # imm = 0xC00
; SSE-32-NEXT:    movw %cx, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-32-NEXT:    comisd %xmm1, %xmm0
; SSE-32-NEXT:    jae .LBB19_4
; SSE-32-NEXT:  # %bb.3:
; SSE-32-NEXT:    xorpd %xmm1, %xmm1
; SSE-32-NEXT:  .LBB19_4:
; SSE-32-NEXT:    subsd %xmm1, %xmm0
; SSE-32-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    setae %cl
; SSE-32-NEXT:    fldl {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw (%esp)
; SSE-32-NEXT:    movzwl (%esp), %edx
; SSE-32-NEXT:    orl $3072, %edx # imm = 0xC00
; SSE-32-NEXT:    movw %dx, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw (%esp)
; SSE-32-NEXT:    movzbl %al, %eax
; SSE-32-NEXT:    shll $31, %eax
; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT:    movd %eax, %xmm1
; SSE-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-32-NEXT:    movzbl %cl, %eax
; SSE-32-NEXT:    shll $31, %eax
; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT:    movd %eax, %xmm1
; SSE-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE-32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE-32-NEXT:    movl %ebp, %esp
; SSE-32-NEXT:    popl %ebp
; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
; SSE-64-NEXT:    comisd %xmm3, %xmm0
; SSE-64-NEXT:    xorpd %xmm2, %xmm2
; SSE-64-NEXT:    xorpd %xmm1, %xmm1
; SSE-64-NEXT:    jb .LBB19_2
; SSE-64-NEXT:  # %bb.1:
; SSE-64-NEXT:    movapd %xmm3, %xmm1
; SSE-64-NEXT:  .LBB19_2:
; SSE-64-NEXT:    movapd %xmm0, %xmm4
; SSE-64-NEXT:    subsd %xmm1, %xmm4
; SSE-64-NEXT:    cvttsd2si %xmm4, %rax
; SSE-64-NEXT:    setae %cl
; SSE-64-NEXT:    movzbl %cl, %ecx
; SSE-64-NEXT:    shlq $63, %rcx
; SSE-64-NEXT:    xorq %rax, %rcx
; SSE-64-NEXT:    movq %rcx, %xmm1
; SSE-64-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-64-NEXT:    comisd %xmm3, %xmm0
; SSE-64-NEXT:    jb .LBB19_4
; SSE-64-NEXT:  # %bb.3:
; SSE-64-NEXT:    movapd %xmm3, %xmm2
; SSE-64-NEXT:  .LBB19_4:
; SSE-64-NEXT:    subsd %xmm2, %xmm0
; SSE-64-NEXT:    cvttsd2si %xmm0, %rax
; SSE-64-NEXT:    setae %cl
; SSE-64-NEXT:    movzbl %cl, %ecx
; SSE-64-NEXT:    shlq $63, %rcx
; SSE-64-NEXT:    xorq %rax, %rcx
; SSE-64-NEXT:    movq %rcx, %xmm0
; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-64-NEXT:    movdqa %xmm1, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-32-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; AVX-32:       # %bb.0:
; AVX-32-NEXT:    pushl %ebp
; AVX-32-NEXT:    .cfi_def_cfa_offset 8
; AVX-32-NEXT:    .cfi_offset %ebp, -8
; AVX-32-NEXT:    movl %esp, %ebp
; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX-32-NEXT:    andl $-8, %esp
; AVX-32-NEXT:    subl $16, %esp
; AVX-32-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX-32-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-32-NEXT:    vcomisd %xmm1, %xmm2
; AVX-32-NEXT:    vmovapd %xmm1, %xmm3
; AVX-32-NEXT:    jae .LBB19_2
; AVX-32-NEXT:  # %bb.1:
; AVX-32-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
; AVX-32-NEXT:  .LBB19_2:
; AVX-32-NEXT:    vsubsd %xmm3, %xmm2, %xmm2
; AVX-32-NEXT:    vmovsd %xmm2, (%esp)
; AVX-32-NEXT:    fldl (%esp)
; AVX-32-NEXT:    fisttpll (%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    setae %al
; AVX-32-NEXT:    movzbl %al, %eax
; AVX-32-NEXT:    shll $31, %eax
; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT:    vcomisd %xmm1, %xmm0
; AVX-32-NEXT:    jae .LBB19_4
; AVX-32-NEXT:  # %bb.3:
; AVX-32-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
; AVX-32-NEXT:  .LBB19_4:
; AVX-32-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
; AVX-32-NEXT:    vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fldl {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    setae %cl
; AVX-32-NEXT:    movzbl %cl, %ecx
; AVX-32-NEXT:    shll $31, %ecx
; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
; AVX-32-NEXT:    movl %ebp, %esp
; AVX-32-NEXT:    popl %ebp
; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX-32-NEXT:    retl
;
; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; AVX-64:       # %bb.0:
; AVX-64-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-64-NEXT:    vcomisd %xmm1, %xmm0
; AVX-64-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
; AVX-64-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
; AVX-64-NEXT:    jb .LBB19_2
; AVX-64-NEXT:  # %bb.1:
; AVX-64-NEXT:    vmovapd %xmm1, %xmm3
; AVX-64-NEXT:  .LBB19_2:
; AVX-64-NEXT:    vsubsd %xmm3, %xmm0, %xmm3
; AVX-64-NEXT:    vcvttsd2si %xmm3, %rax
; AVX-64-NEXT:    setae %cl
; AVX-64-NEXT:    movzbl %cl, %ecx
; AVX-64-NEXT:    shlq $63, %rcx
; AVX-64-NEXT:    xorq %rax, %rcx
; AVX-64-NEXT:    vmovq %rcx, %xmm3
; AVX-64-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-64-NEXT:    vcomisd %xmm1, %xmm0
; AVX-64-NEXT:    jb .LBB19_4
; AVX-64-NEXT:  # %bb.3:
; AVX-64-NEXT:    vmovapd %xmm1, %xmm2
; AVX-64-NEXT:  .LBB19_4:
; AVX-64-NEXT:    vsubsd %xmm2, %xmm0, %xmm0
; AVX-64-NEXT:    vcvttsd2si %xmm0, %rax
; AVX-64-NEXT:    setae %cl
; AVX-64-NEXT:    movzbl %cl, %ecx
; AVX-64-NEXT:    shlq $63, %rcx
; AVX-64-NEXT:    xorq %rax, %rcx
; AVX-64-NEXT:    vmovq %rcx, %xmm0
; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT:    retq
;
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vmovaps %xmm0, %xmm0
; AVX512F-NEXT:    vcvttpd2udq %zmm0, %ymm0
; AVX512F-NEXT:    vpslld $31, %ymm0, %ymm0
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttpd2udq %xmm0, %xmm0
; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
; AVX512VL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT:    vcvttpd2udq %zmm0, %ymm0
; AVX512DQ-NEXT:    vpslld $31, %ymm0, %ymm0
; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttpd2udq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpmovd2m %xmm0, %k0
; AVX512VLDQ-NEXT:    vpmovm2q %k0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f64(<2 x double> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i1> %ret
}

define <2 x i1> @strict_vector_fptosi_v2f32_to_v2i1(<2 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    pushl %ebp
; SSE-32-NEXT:    .cfi_def_cfa_offset 8
; SSE-32-NEXT:    .cfi_offset %ebp, -8
; SSE-32-NEXT:    movl %esp, %ebp
; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
; SSE-32-NEXT:    andl $-8, %esp
; SSE-32-NEXT:    subl $24, %esp
; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw (%esp)
; SSE-32-NEXT:    movzwl (%esp), %eax
; SSE-32-NEXT:    orl $3072, %eax # imm = 0xC00
; SSE-32-NEXT:    movw %ax, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw (%esp)
; SSE-32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-32-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-32-NEXT:    movl %ebp, %esp
; SSE-32-NEXT:    popl %ebp
; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttss2si %xmm0, %rax
; SSE-64-NEXT:    movq %rax, %xmm1
; SSE-64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; SSE-64-NEXT:    cvttss2si %xmm0, %rax
; SSE-64-NEXT:    movq %rax, %xmm0
; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-64-NEXT:    movdqa %xmm1, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-32-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
; AVX-32:       # %bb.0:
; AVX-32-NEXT:    pushl %ebp
; AVX-32-NEXT:    .cfi_def_cfa_offset 8
; AVX-32-NEXT:    .cfi_offset %ebp, -8
; AVX-32-NEXT:    movl %esp, %ebp
; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX-32-NEXT:    andl $-8, %esp
; AVX-32-NEXT:    subl $16, %esp
; AVX-32-NEXT:    vmovss %xmm0, (%esp)
; AVX-32-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT:    flds (%esp)
; AVX-32-NEXT:    fisttpll (%esp)
; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    movl %ebp, %esp
; AVX-32-NEXT:    popl %ebp
; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX-32-NEXT:    retl
;
; AVX-64-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
; AVX-64:       # %bb.0:
; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
; AVX-64-NEXT:    vmovq %rax, %xmm1
; AVX-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
; AVX-64-NEXT:    vmovq %rax, %xmm0
; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-64-NEXT:    retq
;
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vcvttss2si %xmm0, %eax
; AVX512F-NEXT:    andl $1, %eax
; AVX512F-NEXT:    kmovw %eax, %k0
; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512F-NEXT:    vcvttss2si %xmm0, %eax
; AVX512F-NEXT:    kmovw %eax, %k1
; AVX512F-NEXT:    kshiftlw $1, %k1, %k1
; AVX512F-NEXT:    korw %k1, %k0, %k1
; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttss2si %xmm0, %eax
; AVX512VL-NEXT:    andl $1, %eax
; AVX512VL-NEXT:    kmovw %eax, %k0
; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT:    vcvttss2si %xmm0, %eax
; AVX512VL-NEXT:    kmovw %eax, %k1
; AVX512VL-NEXT:    kshiftlw $1, %k1, %k1
; AVX512VL-NEXT:    korw %k1, %k0, %k1
; AVX512VL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512DQ-NEXT:    vcvttss2si %xmm1, %eax
; AVX512DQ-NEXT:    kmovw %eax, %k0
; AVX512DQ-NEXT:    kshiftlb $1, %k0, %k0
; AVX512DQ-NEXT:    vcvttss2si %xmm0, %eax
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kshiftlb $7, %k1, %k1
; AVX512DQ-NEXT:    kshiftrb $7, %k1, %k1
; AVX512DQ-NEXT:    korw %k0, %k1, %k0
; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i1:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512VLDQ-NEXT:    vcvttss2si %xmm1, %eax
; AVX512VLDQ-NEXT:    kmovw %eax, %k0
; AVX512VLDQ-NEXT:    kshiftlb $1, %k0, %k0
; AVX512VLDQ-NEXT:    vcvttss2si %xmm0, %eax
; AVX512VLDQ-NEXT:    kmovw %eax, %k1
; AVX512VLDQ-NEXT:    kshiftlb $7, %k1, %k1
; AVX512VLDQ-NEXT:    kshiftrb $7, %k1, %k1
; AVX512VLDQ-NEXT:    korw %k0, %k1, %k0
; AVX512VLDQ-NEXT:    vpmovm2q %k0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f32(<2 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i1> %ret
}

define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    pushl %ebp
; SSE-32-NEXT:    .cfi_def_cfa_offset 8
; SSE-32-NEXT:    .cfi_offset %ebp, -8
; SSE-32-NEXT:    movl %esp, %ebp
; SSE-32-NEXT:    .cfi_def_cfa_register %ebp
; SSE-32-NEXT:    andl $-8, %esp
; SSE-32-NEXT:    subl $24, %esp
; SSE-32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-32-NEXT:    comiss %xmm1, %xmm0
; SSE-32-NEXT:    movaps %xmm1, %xmm2
; SSE-32-NEXT:    jae .LBB21_2
; SSE-32-NEXT:  # %bb.1:
; SSE-32-NEXT:    xorps %xmm2, %xmm2
; SSE-32-NEXT:  .LBB21_2:
; SSE-32-NEXT:    movaps %xmm0, %xmm3
; SSE-32-NEXT:    subss %xmm2, %xmm3
; SSE-32-NEXT:    movss %xmm3, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    setae %al
; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
; SSE-32-NEXT:    orl $3072, %ecx # imm = 0xC00
; SSE-32-NEXT:    movw %cx, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; SSE-32-NEXT:    comiss %xmm1, %xmm0
; SSE-32-NEXT:    jae .LBB21_4
; SSE-32-NEXT:  # %bb.3:
; SSE-32-NEXT:    xorps %xmm1, %xmm1
; SSE-32-NEXT:  .LBB21_4:
; SSE-32-NEXT:    subss %xmm1, %xmm0
; SSE-32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    setae %cl
; SSE-32-NEXT:    flds {{[0-9]+}}(%esp)
; SSE-32-NEXT:    wait
; SSE-32-NEXT:    fnstcw (%esp)
; SSE-32-NEXT:    movzwl (%esp), %edx
; SSE-32-NEXT:    orl $3072, %edx # imm = 0xC00
; SSE-32-NEXT:    movw %dx, {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT:    fldcw (%esp)
; SSE-32-NEXT:    movzbl %al, %eax
; SSE-32-NEXT:    shll $31, %eax
; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT:    movd %eax, %xmm1
; SSE-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-32-NEXT:    movzbl %cl, %eax
; SSE-32-NEXT:    shll $31, %eax
; SSE-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT:    movd %eax, %xmm1
; SSE-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE-32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE-32-NEXT:    movl %ebp, %esp
; SSE-32-NEXT:    popl %ebp
; SSE-32-NEXT:    .cfi_def_cfa %esp, 4
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE-64-NEXT:    comiss %xmm3, %xmm0
; SSE-64-NEXT:    xorps %xmm2, %xmm2
; SSE-64-NEXT:    xorps %xmm1, %xmm1
; SSE-64-NEXT:    jb .LBB21_2
; SSE-64-NEXT:  # %bb.1:
; SSE-64-NEXT:    movaps %xmm3, %xmm1
; SSE-64-NEXT:  .LBB21_2:
; SSE-64-NEXT:    movaps %xmm0, %xmm4
; SSE-64-NEXT:    subss %xmm1, %xmm4
; SSE-64-NEXT:    cvttss2si %xmm4, %rax
; SSE-64-NEXT:    setae %cl
; SSE-64-NEXT:    movzbl %cl, %ecx
; SSE-64-NEXT:    shlq $63, %rcx
; SSE-64-NEXT:    xorq %rax, %rcx
; SSE-64-NEXT:    movq %rcx, %xmm1
; SSE-64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; SSE-64-NEXT:    comiss %xmm3, %xmm0
; SSE-64-NEXT:    jb .LBB21_4
; SSE-64-NEXT:  # %bb.3:
; SSE-64-NEXT:    movaps %xmm3, %xmm2
; SSE-64-NEXT:  .LBB21_4:
; SSE-64-NEXT:    subss %xmm2, %xmm0
; SSE-64-NEXT:    cvttss2si %xmm0, %rax
; SSE-64-NEXT:    setae %cl
; SSE-64-NEXT:    movzbl %cl, %ecx
; SSE-64-NEXT:    shlq $63, %rcx
; SSE-64-NEXT:    xorq %rax, %rcx
; SSE-64-NEXT:    movq %rcx, %xmm0
; SSE-64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-64-NEXT:    movdqa %xmm1, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; AVX-32:       # %bb.0:
; AVX-32-NEXT:    pushl %ebp
; AVX-32-NEXT:    .cfi_def_cfa_offset 8
; AVX-32-NEXT:    .cfi_offset %ebp, -8
; AVX-32-NEXT:    movl %esp, %ebp
; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
; AVX-32-NEXT:    andl $-8, %esp
; AVX-32-NEXT:    subl $16, %esp
; AVX-32-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vcomiss %xmm1, %xmm2
; AVX-32-NEXT:    vmovaps %xmm1, %xmm3
; AVX-32-NEXT:    jae .LBB21_2
; AVX-32-NEXT:  # %bb.1:
; AVX-32-NEXT:    vxorps %xmm3, %xmm3, %xmm3
; AVX-32-NEXT:  .LBB21_2:
; AVX-32-NEXT:    vsubss %xmm3, %xmm2, %xmm2
; AVX-32-NEXT:    vmovss %xmm2, {{[0-9]+}}(%esp)
; AVX-32-NEXT:    flds {{[0-9]+}}(%esp)
; AVX-32-NEXT:    fisttpll {{[0-9]+}}(%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    setae %al
; AVX-32-NEXT:    movzbl %al, %eax
; AVX-32-NEXT:    shll $31, %eax
; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT:    vcomiss %xmm1, %xmm0
; AVX-32-NEXT:    jae .LBB21_4
; AVX-32-NEXT:  # %bb.3:
; AVX-32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
; AVX-32-NEXT:  .LBB21_4:
; AVX-32-NEXT:    vsubss %xmm1, %xmm0, %xmm0
; AVX-32-NEXT:    vmovss %xmm0, (%esp)
; AVX-32-NEXT:    flds (%esp)
; AVX-32-NEXT:    fisttpll (%esp)
; AVX-32-NEXT:    wait
; AVX-32-NEXT:    setae %cl
; AVX-32-NEXT:    movzbl %cl, %ecx
; AVX-32-NEXT:    shll $31, %ecx
; AVX-32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
; AVX-32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
; AVX-32-NEXT:    movl %ebp, %esp
; AVX-32-NEXT:    popl %ebp
; AVX-32-NEXT:    .cfi_def_cfa %esp, 4
; AVX-32-NEXT:    retl
;
; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; AVX-64:       # %bb.0:
; AVX-64-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-64-NEXT:    vcomiss %xmm1, %xmm0
; AVX-64-NEXT:    vxorps %xmm2, %xmm2, %xmm2
; AVX-64-NEXT:    vxorps %xmm3, %xmm3, %xmm3
; AVX-64-NEXT:    jb .LBB21_2
; AVX-64-NEXT:  # %bb.1:
; AVX-64-NEXT:    vmovaps %xmm1, %xmm3
; AVX-64-NEXT:  .LBB21_2:
; AVX-64-NEXT:    vsubss %xmm3, %xmm0, %xmm3
; AVX-64-NEXT:    vcvttss2si %xmm3, %rax
; AVX-64-NEXT:    setae %cl
; AVX-64-NEXT:    movzbl %cl, %ecx
; AVX-64-NEXT:    shlq $63, %rcx
; AVX-64-NEXT:    xorq %rax, %rcx
; AVX-64-NEXT:    vmovq %rcx, %xmm3
; AVX-64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX-64-NEXT:    vcomiss %xmm1, %xmm0
; AVX-64-NEXT:    jb .LBB21_4
; AVX-64-NEXT:  # %bb.3:
; AVX-64-NEXT:    vmovaps %xmm1, %xmm2
; AVX-64-NEXT:  .LBB21_4:
; AVX-64-NEXT:    vsubss %xmm2, %xmm0, %xmm0
; AVX-64-NEXT:    vcvttss2si %xmm0, %rax
; AVX-64-NEXT:    setae %cl
; AVX-64-NEXT:    movzbl %cl, %ecx
; AVX-64-NEXT:    shlq $63, %rcx
; AVX-64-NEXT:    xorq %rax, %rcx
; AVX-64-NEXT:    vmovq %rcx, %xmm0
; AVX-64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT:    retq
;
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vcvttss2si %xmm0, %eax
; AVX512F-NEXT:    andl $1, %eax
; AVX512F-NEXT:    kmovw %eax, %k0
; AVX512F-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512F-NEXT:    vcvttss2si %xmm0, %eax
; AVX512F-NEXT:    kmovw %eax, %k1
; AVX512F-NEXT:    kshiftlw $1, %k1, %k1
; AVX512F-NEXT:    korw %k1, %k0, %k1
; AVX512F-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttss2si %xmm0, %eax
; AVX512VL-NEXT:    andl $1, %eax
; AVX512VL-NEXT:    kmovw %eax, %k0
; AVX512VL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT:    vcvttss2si %xmm0, %eax
; AVX512VL-NEXT:    kmovw %eax, %k1
; AVX512VL-NEXT:    kshiftlw $1, %k1, %k1
; AVX512VL-NEXT:    korw %k1, %k0, %k1
; AVX512VL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512DQ-NEXT:    vcvttss2si %xmm1, %eax
; AVX512DQ-NEXT:    kmovw %eax, %k0
; AVX512DQ-NEXT:    kshiftlb $1, %k0, %k0
; AVX512DQ-NEXT:    vcvttss2si %xmm0, %eax
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kshiftlb $7, %k1, %k1
; AVX512DQ-NEXT:    kshiftrb $7, %k1, %k1
; AVX512DQ-NEXT:    korw %k0, %k1, %k0
; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512VLDQ-NEXT:    vcvttss2si %xmm1, %eax
; AVX512VLDQ-NEXT:    kmovw %eax, %k0
; AVX512VLDQ-NEXT:    kshiftlb $1, %k0, %k0
; AVX512VLDQ-NEXT:    vcvttss2si %xmm0, %eax
; AVX512VLDQ-NEXT:    kmovw %eax, %k1
; AVX512VLDQ-NEXT:    kshiftlb $7, %k1, %k1
; AVX512VLDQ-NEXT:    kshiftrb $7, %k1, %k1
; AVX512VLDQ-NEXT:    korw %k0, %k1, %k0
; AVX512VLDQ-NEXT:    vpmovm2q %k0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f32(<2 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <2 x i1> %ret
}

define <4 x i32> @strict_vector_fptosi_v4f32_to_v4i32(<4 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i32:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <4 x i32> %ret
}

define <4 x i32> @strict_vector_fptoui_v4f32_to_v4i32(<4 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    movaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
; SSE-32-NEXT:    movaps %xmm0, %xmm2
; SSE-32-NEXT:    cmpltps %xmm1, %xmm2
; SSE-32-NEXT:    movaps %xmm2, %xmm3
; SSE-32-NEXT:    andnps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3
; SSE-32-NEXT:    andnps %xmm1, %xmm2
; SSE-32-NEXT:    subps %xmm2, %xmm0
; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-32-NEXT:    xorps %xmm3, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    movaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
; SSE-64-NEXT:    movaps %xmm0, %xmm2
; SSE-64-NEXT:    cmpltps %xmm1, %xmm2
; SSE-64-NEXT:    movaps %xmm2, %xmm3
; SSE-64-NEXT:    andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
; SSE-64-NEXT:    andnps %xmm1, %xmm2
; SSE-64-NEXT:    subps %xmm2, %xmm0
; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-64-NEXT:    xorps %xmm3, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
; AVX-NEXT:    vcmpltps %xmm1, %xmm0, %xmm2
; AVX-NEXT:    vxorps %xmm3, %xmm3, %xmm3
; AVX-NEXT:    vmovaps {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
; AVX-NEXT:    vblendvps %xmm2, %xmm3, %xmm4, %xmm4
; AVX-NEXT:    vblendvps %xmm2, %xmm3, %xmm1, %xmm1
; AVX-NEXT:    vsubps %xmm1, %xmm0, %xmm0
; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX-NEXT:    vxorps %xmm4, %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vmovaps %xmm0, %xmm0
; AVX512F-NEXT:    vcvttps2udq %zmm0, %zmm0
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttps2udq %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT:    vcvttps2udq %zmm0, %zmm0
; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttps2udq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <4 x i32> %ret
}

define <4 x i8> @strict_vector_fptosi_v4f32_to_v4i8(<4 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-32-NEXT:    packssdw %xmm0, %xmm0
; SSE-32-NEXT:    packsswb %xmm0, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-64-NEXT:    packssdw %xmm0, %xmm0
; SSE-64-NEXT:    packsswb %xmm0, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i8:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f32(<4 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <4 x i8> %ret
}

define <4 x i8> @strict_vector_fptoui_v4f32_to_v4i8(<4 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-32-NEXT:    packuswb %xmm0, %xmm0
; SSE-32-NEXT:    packuswb %xmm0, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-64-NEXT:    packuswb %xmm0, %xmm0
; SSE-64-NEXT:    packuswb %xmm0, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT:    vpmovdb %xmm0, %xmm0
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i8:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpmovdb %xmm0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f32(<4 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <4 x i8> %ret
}

define <4 x i1> @strict_vector_fptosi_v4f32_to_v4i1(<4 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
; AVX512VL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i1:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpmovd2m %xmm0, %k0
; AVX512VLDQ-NEXT:    vpmovm2d %k0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f32(<4 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <4 x i1> %ret
}

define <4 x i1> @strict_vector_fptoui_v4f32_to_v4i1(<4 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
; SSE-32:       # %bb.0:
; SSE-32-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-32-NEXT:    retl
;
; SSE-64-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
; SSE-64:       # %bb.0:
; SSE-64-NEXT:    cvttps2dq %xmm0, %xmm0
; SSE-64-NEXT:    retq
;
; AVX-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
; AVX:       # %bb.0:
; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX-NEXT:    ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
; AVX512VL-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512VL-NEXT:    ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
; AVX512DQ:       # %bb.0:
; AVX512DQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i1:
; AVX512VLDQ:       # %bb.0:
; AVX512VLDQ-NEXT:    vcvttps2dq %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512VLDQ-NEXT:    vpmovd2m %xmm0, %k0
; AVX512VLDQ-NEXT:    vpmovm2d %k0, %xmm0
; AVX512VLDQ-NEXT:    ret{{[l|q]}}
  %ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f32(<4 x float> %a,
                                              metadata !"fpexcept.strict") #0
  ret <4 x i1> %ret
}

attributes #0 = { strictfp }