Compiler projects using llvm
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s

declare i32 @llvm.x86.avx512fp16.vcomi.sh(<8 x half>, <8 x half>, i32, i32)

define i32 @test_x86_avx512fp16_ucomi_sh_lt(<8 x half> %a0, <8 x half> %a1) {
; CHECK-LABEL: test_x86_avx512fp16_ucomi_sh_lt:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcmpngesh %xmm1, %xmm0, %k0
; CHECK-NEXT:    kmovw %k0, %eax
; CHECK-NEXT:    retq
  %res = call i32 @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %a0, <8 x half> %a1, i32 9, i32 4)
  ret i32 %res
}

declare <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half>, i32) nounwind readnone

define <32 x half> @test_sqrt_ph_512(<32 x half> %a0) {
; CHECK-LABEL: test_sqrt_ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsqrtph %zmm0, %zmm0
; CHECK-NEXT:    retq
  %1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
  ret <32 x half> %1
}

define <32 x half> @test_sqrt_ph_512_fast(<32 x half> %a0, <32 x half> %a1) {
; CHECK-LABEL: test_sqrt_ph_512_fast:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrsqrtph %zmm0, %zmm0
; CHECK-NEXT:    vmulph %zmm0, %zmm1, %zmm0
; CHECK-NEXT:    retq
  %1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
  %2 = fdiv fast <32 x half> %a1, %1
  ret <32 x half> %2
}

define <32 x half> @test_sqrt_ph_512_fast_estimate_attribute(<32 x half> %a0, <32 x half> %a1) "reciprocal-estimates"="vec-sqrt" {
; CHECK-LABEL: test_sqrt_ph_512_fast_estimate_attribute:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrsqrtph %zmm0, %zmm0
; CHECK-NEXT:    vmulph %zmm0, %zmm1, %zmm0
; CHECK-NEXT:    retq
  %1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
  %2 = fdiv fast <32 x half> %a1, %1
  ret <32 x half> %2
}

define <32 x half> @test_sqrt_ph_512_fast_estimate_attribute_2(<32 x half> %a0, <32 x half> %a1) "reciprocal-estimates"="vec-sqrth:1" {
; CHECK-LABEL: test_sqrt_ph_512_fast_estimate_attribute_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrsqrtph %zmm0, %zmm2
; CHECK-NEXT:    vmulph %zmm2, %zmm0, %zmm0
; CHECK-NEXT:    vfmadd213ph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to32}, %zmm2, %zmm0
; CHECK-NEXT:    vmulph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to32}, %zmm2, %zmm2
; CHECK-NEXT:    vmulph %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    vmulph %zmm0, %zmm2, %zmm0
; CHECK-NEXT:    retq
  %1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
  %2 = fdiv fast <32 x half> %a1, %1
  ret <32 x half> %2
}

define <32 x half> @test_mask_sqrt_ph_512(<32 x half> %a0, <32 x half> %passthru, i32 %mask) {
; CHECK-LABEL: test_mask_sqrt_ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vsqrtph %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
  %2 = bitcast i32 %mask to <32 x i1>
  %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> %passthru
  ret <32 x half> %3
}

define <32 x half> @test_maskz_sqrt_ph_512(<32 x half> %a0, i32 %mask) {
; CHECK-LABEL: test_maskz_sqrt_ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vsqrtph %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
  %2 = bitcast i32 %mask to <32 x i1>
  %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> zeroinitializer
  ret <32 x half> %3
}

declare <32 x half> @llvm.sqrt.v32f16(<32 x half>)

define <32 x half> @test_sqrt_round_ph_512(<32 x half> %a0) {
; CHECK-LABEL: test_sqrt_round_ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsqrtph {rz-sae}, %zmm0, %zmm0
; CHECK-NEXT:    retq
  %1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11)
  ret <32 x half> %1
}

define <32 x half> @test_mask_sqrt_round_ph_512(<32 x half> %a0, <32 x half> %passthru, i32 %mask) {
; CHECK-LABEL: test_mask_sqrt_round_ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vsqrtph {rz-sae}, %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11)
  %2 = bitcast i32 %mask to <32 x i1>
  %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> %passthru
  ret <32 x half> %3
}

define <32 x half> @test_maskz_sqrt_round_ph_512(<32 x half> %a0, i32 %mask) {
; CHECK-LABEL: test_maskz_sqrt_round_ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vsqrtph {rz-sae}, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11)
  %2 = bitcast i32 %mask to <32 x i1>
  %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> zeroinitializer
  ret <32 x half> %3
}

declare <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half>, <8 x half>, <8 x half>, i8, i32) nounwind readnone

define <8 x half> @test_sqrt_sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) {
; CHECK-LABEL: test_sqrt_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vsqrtsh %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask, i32 4)
  ret <8 x half> %res
}

define half @test_sqrt_sh2(half %a0, half %a1) {
; CHECK-LABEL: test_sqrt_sh2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrsqrtsh %xmm0, %xmm0, %xmm0
; CHECK-NEXT:    vmulsh %xmm0, %xmm1, %xmm0
; CHECK-NEXT:    retq
  %1 = call fast half @llvm.sqrt.f16(half %a0)
  %2 = fdiv fast half %a1, %1
  ret half %2
}

define half @test_sqrt_sh3(half %a0, half %a1) {
; CHECK-LABEL: test_sqrt_sh3:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsqrtsh %xmm0, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %1 = call fast half @llvm.sqrt.f16(half %a0)
  ret half %1
}

declare half @llvm.sqrt.f16(half)

define <8 x half> @test_sqrt_sh_r(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) {
; CHECK-LABEL: test_sqrt_sh_r:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vsqrtsh {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask, i32 10)
  ret <8 x half> %res
}

define <8 x half> @test_sqrt_sh_nomask(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) {
; CHECK-LABEL: test_sqrt_sh_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsqrtsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 -1, i32 4)
  ret <8 x half> %res
}

define <8 x half> @test_sqrt_sh_z(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) {
; CHECK-LABEL: test_sqrt_sh_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vsqrtsh {ru-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> zeroinitializer, i8 %mask, i32 10)
  ret <8 x half> %res
}

declare <32 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.512(<32 x half>, <32 x half>, i32)
declare <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half>, <8 x half>, <8 x half>, i8)

define <32 x half> @test_rsqrt_ph_512(<32 x half> %a0) {
; CHECK-LABEL: test_rsqrt_ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrsqrtph %zmm0, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.512(<32 x half> %a0, <32 x half> zeroinitializer, i32 -1)
  ret <32 x half> %res
}

define <8 x half> @test_rsqrt_sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) {
; CHECK-LABEL: test_rsqrt_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrsqrtsh %xmm0, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> %a2, i8 -1)
  ret <8 x half> %res
}

define <8 x half> @test_rsqrt_sh_load(<8 x half> %a0, ptr %a1ptr) {
; CHECK-LABEL: test_rsqrt_sh_load:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrsqrtsh (%rdi), %xmm0, %xmm0
; CHECK-NEXT:    retq
  %a1 = load <8 x half>, ptr %a1ptr
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> undef, i8 -1)
  ret <8 x half> %res
}

define <8 x half> @test_rsqrt_sh_maskz(<8 x half> %a0, i8 %mask) {
; CHECK-LABEL: test_rsqrt_sh_maskz:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vrsqrtsh %xmm0, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> zeroinitializer, i8 %mask)
  ret <8 x half> %res
}

define <8 x half> @test_rsqrt_sh_mask(<8 x half> %a0, <8 x half> %b0, <8 x half> %c0, i8 %mask) {
; CHECK-LABEL: test_rsqrt_sh_mask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vrsqrtsh %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %b0, <8 x half> %c0, i8 %mask)
  ret <8 x half> %res
}

declare <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half>, i32)

define i32 @test_int_x86_avx512_fpclass_ph_512(<32 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vfpclassph $2, %zmm0, %k1
; CHECK-NEXT:    vfpclassph $4, %zmm0, %k0 {%k1}
; CHECK-NEXT:    kmovd %k0, %eax
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = call <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half> %x0, i32 4)
  %res1 = call <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half> %x0, i32 2)
  %1 = and <32 x i1> %res1, %res
  %2 = bitcast <32 x i1> %1 to i32
  ret i32 %2
}

declare i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half>, i32, i8)

define i8 @test_int_x86_avx512_mask_fpclass_sh(<8 x half> %x0) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vfpclasssh $4, %xmm0, %k1
; CHECK-NEXT:    vfpclasssh $2, %xmm0, %k0 {%k1}
; CHECK-NEXT:    kmovd %k0, %eax
; CHECK-NEXT:    # kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
  %res = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 2, i8 -1)
  %res1 = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 4, i8 %res)
  ret i8 %res1
}

define i8 @test_int_x86_avx512_mask_fpclass_sh_load(ptr %x0ptr) {
; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sh_load:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vfpclasssh $4, (%rdi), %k0
; CHECK-NEXT:    kmovd %k0, %eax
; CHECK-NEXT:    # kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
  %x0 = load <8 x half>, ptr %x0ptr
  %res = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 4, i8 -1)
  ret i8 %res
}

declare <32 x half> @llvm.x86.avx512fp16.mask.rcp.ph.512(<32 x half>, <32 x half>, i32)

define <32 x half> @test_rcp_ph_512(<32 x half> %a0, <32 x half> %a1, i32 %mask) {
; CHECK-LABEL: test_rcp_ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vrcpph %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vmovaps %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x half> @llvm.x86.avx512fp16.mask.rcp.ph.512(<32 x half> %a0, <32 x half> %a1, i32 %mask)
  ret <32 x half> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half>, <8 x half>, <8 x half>, i8)

define <8 x half> @test_rcp_sh(<8 x half> %a0) {
; CHECK-LABEL: test_rcp_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrcpsh %xmm0, %xmm0, %xmm0
; CHECK-NEXT:    retq
    %res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> zeroinitializer, i8 -1)
    ret <8 x half> %res
}

define <8 x half> @test_rcp_sh_load(<8 x half> %a0, ptr %a1ptr) {
; CHECK-LABEL: test_rcp_sh_load:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrcpsh (%rdi), %xmm0, %xmm0
; CHECK-NEXT:    retq
  %a1 = load <8 x half>, ptr %a1ptr
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> zeroinitializer, i8 -1)
  ret <8 x half> %res
}

declare <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half>, i32, <32 x half>, i32, i32)

define <32 x half>@test_int_x86_avx512_mask_reduce_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vreduceph $8, %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vreduceph $4, {sae}, %zmm0, %zmm0
; CHECK-NEXT:    vaddph %zmm0, %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4)
  %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8)
  %res2 = fadd <32 x half> %res, %res1
  ret <32 x half> %res2
}

declare <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32, i32)

define <8 x half>@test_int_x86_avx512_mask_reduce_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_reduce_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vreducesh $4, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4, i32 4)
  ret <8 x half> %res
}

define <8 x half>@test_int_x86_avx512_mask_reduce_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_reduce_sh_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vreducesh $4, {sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 4, i32 8)
  ret <8 x half> %res
}

declare <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half>, i32, <32 x half>, i32, i32)

define <32 x half>@test_int_x86_avx512_mask_rndscale_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vrndscaleph $8, %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vrndscaleph $4, {sae}, %zmm0, %zmm0
; CHECK-NEXT:    vaddph %zmm0, %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4)
  %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8)
  %res2 = fadd <32 x half> %res, %res1
  ret <32 x half> %res2
}

declare <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32, i32)

define <8 x half>@test_int_x86_avx512_mask_rndscale_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vrndscalesh $4, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4, i32 4)
  ret <8 x half> %res
}

define <8 x half>@test_int_x86_avx512_mask_rndscale_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_sh_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vrndscalesh $4, {sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 4, i32 8)
  ret <8 x half> %res
}

declare <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half>, <32 x half>, i32, i32)

define <32 x half>@test_int_x86_avx512_mask_getexp_ph_512(<32 x half> %x0, <32 x half> %x1, i32 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_getexp_ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vgetexpph %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vgetexpph {sae}, %zmm0, %zmm0
; CHECK-NEXT:    vaddph %zmm0, %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half> %x0, <32 x half> %x1, i32 %x2, i32 4)
  %res2 = call <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half> %x0, <32 x half> zeroinitializer, i32 -1, i32 8)
  %res3 = fadd <32 x half> %res1, %res2
  ret <32 x half> %res3
}

declare <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32)

define <8 x half>@test_int_x86_avx512_mask_getexp_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_getexp_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vgetexpsh %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
    %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4)
    ret <8 x half> %res
}

define <8 x half>@test_int_x86_avx512_mask_getexp_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getexp_sh_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vgetexpsh {sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
    %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 8)
    ret <8 x half> %res
}

define <8 x half>@test_int_x86_avx512_mask_getexp_sh_load(<8 x half> %x0, ptr %x1ptr) {
; CHECK-LABEL: test_int_x86_avx512_mask_getexp_sh_load:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vgetexpsh (%rdi), %xmm0, %xmm0
; CHECK-NEXT:    retq
  %x1 = load <8 x half>, ptr %x1ptr
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> undef, i8 -1, i32 4)
  ret <8 x half> %res
}

declare <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half>, i32, <32 x half>, i32, i32)

define <32 x half>@test_int_x86_avx512_mask_getmant_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vgetmantph $8, %zmm0, %zmm1 {%k1}
; CHECK-NEXT:    vgetmantph $4, {sae}, %zmm0, %zmm0
; CHECK-NEXT:    vaddph %zmm0, %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4)
  %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8)
  %res2 = fadd <32 x half> %res, %res1
  ret <32 x half> %res2
}

declare <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half>, <8 x half>, i32, <8 x half>, i8, i32)

define <8 x half>@test_int_x86_avx512_mask_getmant_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vgetmantsh $11, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> %x3, i8 %x4, i32 4)
  ret <8 x half> %res
}

define <8 x half>@test_int_x86_avx512_mask_getmant_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sh_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vgetmantsh $11, %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> %x3, i8 -1, i32 4)
  ret <8 x half> %res
}

define <8 x half>@test_int_x86_avx512_mask_getmant_sh_z(<8 x half> %x0, <8 x half> %x1, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sh_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vgetmantsh $11, %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> zeroinitializer, i8 %x4, i32 4)
  ret <8 x half> %res
}

declare <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half>, <32 x half>, <32 x half>, i32, i32)

define <32 x half>@test_int_x86_avx512_mask_scalef_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vscalefph {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT:    vscalefph {rn-sae}, %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    vaddph %zmm0, %zmm2, %zmm0
; CHECK-NEXT:    retq
  %mask = bitcast i32 %x3 to <32 x i1>
  %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3, i32 11)
  %res2 = call <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> zeroinitializer, i32 -1, i32 8)
  %res3 = fadd <32 x half> %res1, %res2
  ret <32 x half> %res3
}

declare <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32)

define <8 x half>@test_int_x86_avx512_mask_scalef_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vscalefsh %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
    %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4)
    ret <8 x half> %res
}

define <8 x half>@test_int_x86_avx512_mask_scalef_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sh_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vscalefsh {rn-sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
    %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 8)
    ret <8 x half> %res
}

define <8 x half>@test_int_x86_avx512_mask_scalef_sh_load(<8 x half> %x0, ptr %x1ptr) {
; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sh_load:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vscalefsh (%rdi), %xmm0, %xmm0
; CHECK-NEXT:    retq
  %x1 = load <8 x half>, ptr %x1ptr
  %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> undef, i8 -1, i32 4)
  ret <8 x half> %res
}

declare <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)

define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vaddsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vmovaps %xmm2, %xmm3
; CHECK-NEXT:    vaddsh %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT:    vaddsh %xmm1, %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT:    vaddsh (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
  %val.half = load half,ptr %ptr
  %val = insertelement <8 x half> undef, half %val.half, i32 0
  %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
  %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
  %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
  %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
  ret <8 x half> %res3
}

declare <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)

define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vsubsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vmovaps %xmm2, %xmm3
; CHECK-NEXT:    vsubsh %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT:    vsubsh %xmm1, %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT:    vsubsh (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
  %val.half = load half,ptr %ptr
  %val = insertelement <8 x half> undef, half %val.half, i32 0
  %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
  %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
  %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
  %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
  ret <8 x half> %res3
}

declare <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)

define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vmulsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vmovaps %xmm2, %xmm3
; CHECK-NEXT:    vmulsh %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT:    vmulsh %xmm1, %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT:    vmulsh (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
  %val.half = load half,ptr %ptr
  %val = insertelement <8 x half> undef, half %val.half, i32 0
  %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
  %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
  %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
  %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
  ret <8 x half> %res3
}

declare <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)

define <8 x half> @test_int_x86_avx512fp16_mask_div_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vdivsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vmovaps %xmm2, %xmm3
; CHECK-NEXT:    vdivsh %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT:    vdivsh %xmm1, %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT:    vdivsh (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
  %val.half = load half,ptr %ptr
  %val = insertelement <8 x half> undef, half %val.half, i32 0
  %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
  %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
  %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
  %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
  ret <8 x half> %res3
}

declare <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)

define <8 x half> @test_int_x86_avx512fp16_mask_min_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_min_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vminsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vmovaps %xmm2, %xmm3
; CHECK-NEXT:    vminsh %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT:    vminsh %xmm1, %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT:    vminsh (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
  %val.half = load half,ptr %ptr
  %val = insertelement <8 x half> undef, half %val.half, i32 0
  %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
  %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
  %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
  %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
  ret <8 x half> %res3
}

declare <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32)

define <8 x half> @test_int_x86_avx512fp16_mask_max_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) {
; CHECK-LABEL: test_int_x86_avx512fp16_mask_max_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vmaxsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vmovaps %xmm2, %xmm3
; CHECK-NEXT:    vmaxsh %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT:    vmaxsh %xmm1, %xmm3, %xmm0 {%k1} {z}
; CHECK-NEXT:    vmaxsh (%rsi), %xmm0, %xmm2 {%k1}
; CHECK-NEXT:    vmovaps %xmm2, %xmm0
; CHECK-NEXT:    retq
  %val.half = load half,ptr %ptr
  %val = insertelement <8 x half> undef, half %val.half, i32 0
  %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4)
  %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4)
  %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4)
  %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4)
  ret <8 x half> %res3
}

declare i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half>, <8 x half>, i32, i8, i32)

define i8 @test_int_x86_avx512_mask_cmp_sh(<8 x half> %x0, <8 x half> %x1, i8 %x3, i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcmpunordsh %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT:    kmovd %k0, %eax
; CHECK-NEXT:    # kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
  %res2 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 3, i8 %x3, i32 4)
  ret i8 %res2
}


define i8 @test_int_x86_avx512_mask_cmp_sh_all(<8 x half> %x0, <8 x half> %x1, i8 %x3, i32 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sh_all:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcmplesh %xmm1, %xmm0, %k0
; CHECK-NEXT:    kmovd %k0, %ecx
; CHECK-NEXT:    vcmpunordsh {sae}, %xmm1, %xmm0, %k0
; CHECK-NEXT:    kmovd %k0, %edx
; CHECK-NEXT:    vcmpneqsh %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT:    kmovd %k0, %esi
; CHECK-NEXT:    vcmpnltsh {sae}, %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT:    kmovd %k0, %eax
; CHECK-NEXT:    andb %sil, %al
; CHECK-NEXT:    andb %dl, %al
; CHECK-NEXT:    andb %cl, %al
; CHECK-NEXT:    # kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
  %res1 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 2, i8 -1, i32 4)
  %res2 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 3, i8 -1, i32 8)
  %res3 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 4, i8 %x3, i32 4)
  %res4 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 5, i8 %x3, i32 8)

  %res11 = and i8 %res1, %res2
  %res12 = and i8 %res3, %res4
  %res13 = and i8 %res11, %res12
  ret i8 %res13
}

declare <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32>, i32)

define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtdq2ph %zmm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %mask = bitcast i16 %x2 to <16 x i1>
  %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
  %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
  ret <16 x half> %res
}

define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_r:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtdq2ph {ru-sae}, %zmm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %mask = bitcast i16 %x2 to <16 x i1>
  %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10)
  %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
  ret <16 x half> %res
}

define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtdq2ph %zmm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
  ret <16 x half> %res
}

define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_z(<16 x i32> %x0, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtdq2ph %zmm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %mask = bitcast i16 %x2 to <16 x i1>
  %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
  %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer
  ret <16 x half> %res
}

define <16 x half> @sint_to_fp_16i32_to_16f16(<16 x i32> %x) {
; CHECK-LABEL: sint_to_fp_16i32_to_16f16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtdq2ph %zmm0, %ymm0
; CHECK-NEXT:    retq
  %res = sitofp <16 x i32> %x to <16 x half>
  ret <16 x half> %res
}

declare <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32>, i32)

define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_r:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtudq2ph {ru-sae}, %zmm0, %ymm1 {%k1}
; CHECK-NEXT:    vmovaps %ymm1, %ymm0
; CHECK-NEXT:    retq
  %mask = bitcast i16 %x2 to <16 x i1>
  %res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10)
  %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1
  ret <16 x half> %res
}

define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtudq2ph %zmm0, %ymm0
; CHECK-NEXT:    retq
  %res = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
  ret <16 x half> %res
}

define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_z(<16 x i32> %x0, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtudq2ph %zmm0, %ymm0 {%k1} {z}
; CHECK-NEXT:    retq
  %mask = bitcast i16 %x2 to <16 x i1>
  %res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4)
  %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer
  ret <16 x half> %res
}

define <16 x half> @uint_to_fp_16i32_to_16f16(<16 x i32> %x) {
; CHECK-LABEL: uint_to_fp_16i32_to_16f16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtudq2ph %zmm0, %ymm0
; CHECK-NEXT:    retq
  %res = uitofp <16 x i32> %x to <16 x half>
  ret <16 x half> %res
}

declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half>, <16 x i32>, i16, i32)

define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2dq {ru-sae}, %ymm0, %zmm1 {%k1}
; CHECK-NEXT:    vcvtph2dq {rn-sae}, %ymm0, %zmm0
; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10)
  %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
  %res2 = add <16 x i32> %res, %res1
  ret <16 x i32> %res2
}

declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half>, <16 x i32>, i16, i32)

define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2udq {ru-sae}, %ymm0, %zmm1 {%k1}
; CHECK-NEXT:    vcvtph2udq {rn-sae}, %ymm0, %zmm0
; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10)
  %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
  %res2 = add <16 x i32> %res, %res1
  ret <16 x i32> %res2
}

declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half>, <16 x i32>, i16, i32)

define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2dq %ymm0, %zmm1 {%k1}
; CHECK-NEXT:    vcvttph2dq {sae}, %ymm0, %zmm0
; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4)
  %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
  %res2 = add <16 x i32> %res, %res1
  ret <16 x i32> %res2
}

declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half>, <16 x i32>, i16, i32)

define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2udq %ymm0, %zmm1 {%k1}
; CHECK-NEXT:    vcvttph2udq {sae}, %ymm0, %zmm0
; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4)
  %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8)
  %res2 = add <16 x i32> %res, %res1
  ret <16 x i32> %res2
}

declare <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64>, i32)

define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtqq2ph %zmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %mask = bitcast i8 %x2 to <8 x i1>
  %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_r:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtqq2ph {ru-sae}, %zmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %mask = bitcast i8 %x2 to <8 x i1>
  %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10)
  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtqq2ph %zmm0, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_z(<8 x i64> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtqq2ph %zmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %mask = bitcast i8 %x2 to <8 x i1>
  %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
  ret <8 x half> %res
}

declare <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64>, i32)

define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtuqq2ph %zmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %mask = bitcast i8 %x2 to <8 x i1>
  %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_r:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtuqq2ph {ru-sae}, %zmm0, %xmm1 {%k1}
; CHECK-NEXT:    vmovaps %xmm1, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %mask = bitcast i8 %x2 to <8 x i1>
  %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10)
  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtuqq2ph %zmm0, %xmm0
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %res = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
  ret <8 x half> %res
}

define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_z(<8 x i64> %x0, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_z:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtuqq2ph %zmm0, %xmm0 {%k1} {z}
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
  %mask = bitcast i8 %x2 to <8 x i1>
  %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4)
  %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer
  ret <8 x half> %res
}

declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half>, <8 x i64>, i8, i32)

define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2qq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2qq_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2qq {ru-sae}, %xmm0, %zmm1 {%k1}
; CHECK-NEXT:    vcvtph2qq {rn-sae}, %xmm0, %zmm0
; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10)
  %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8)
  %res2 = add <8 x i64> %res, %res1
  ret <8 x i64> %res2
}

declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half>, <8 x i64>, i8, i32)

define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2uqq_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvtph2uqq {ru-sae}, %xmm0, %zmm1 {%k1}
; CHECK-NEXT:    vcvtph2uqq {rn-sae}, %xmm0, %zmm0
; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10)
  %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8)
  %res2 = add <8 x i64> %res, %res1
  ret <8 x i64> %res2
}

declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half>, <8 x i64>, i8, i32)

define <8 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_512:
; CHECK:       # %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1
; CHECK-NEXT:    vcvttph2uqq {sae}, %xmm0, %zmm1 {%k1}
; CHECK-NEXT:    vcvttph2uqq %xmm0, %zmm0
; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
; CHECK-NEXT:    retq
  %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 8)
  %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 4)
  %res2 = add <8 x i64> %res, %res1
  ret <8 x i64> %res2
}

declare i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half>, i32)

define i32 @test_x86_avx512fp16_vcvtsh2si32(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtsh2si %xmm0, %ecx
; CHECK-NEXT:    vcvtsh2si {rz-sae}, %xmm0, %eax
; CHECK-NEXT:    addl %ecx, %eax
; CHECK-NEXT:    retq
  %res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 4)
  %res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 11)
  %res = add i32 %res1, %res2
  ret i32 %res
}

declare i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half>, i32)

define i64 @test_x86_avx512fp16_vcvtsh2si64(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtsh2si %xmm0, %rcx
; CHECK-NEXT:    vcvtsh2si {ru-sae}, %xmm0, %rax
; CHECK-NEXT:    addq %rcx, %rax
; CHECK-NEXT:    retq
  %res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 4)
  %res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 10)
  %res = add i64 %res1, %res2
  ret i64 %res
}

declare i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half>, i32)

define i32 @test_x86_avx512fp16_vcvttsh2si32(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttsh2si %xmm0, %ecx
; CHECK-NEXT:    vcvttsh2si {sae}, %xmm0, %eax
; CHECK-NEXT:    addl %ecx, %eax
; CHECK-NEXT:    retq
  %res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 4)
  %res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 8)
  %res = add i32 %res1, %res2
  ret i32 %res
}

declare i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half>, i32)

define i64 @test_x86_avx512fp16_vcvttsh2si64(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttsh2si %xmm0, %rcx
; CHECK-NEXT:    vcvttsh2si {sae}, %xmm0, %rax
; CHECK-NEXT:    addq %rcx, %rax
; CHECK-NEXT:    retq
  %res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 4)
  %res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 8)
  %res = add i64 %res1, %res2
  ret i64 %res
}


declare i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half>, i32)

define i32 @test_x86_avx512fp16_vcvtsh2usi32(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtsh2usi %xmm0, %ecx
; CHECK-NEXT:    vcvtsh2usi {rd-sae}, %xmm0, %eax
; CHECK-NEXT:    addl %ecx, %eax
; CHECK-NEXT:    retq
  %res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 4)
  %res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 9)
  %res = add i32 %res1, %res2
  ret i32 %res
}


declare i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half>, i32)

define i64 @test_x86_avx512fp16_vcvtsh2usi64(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtsh2usi %xmm0, %rcx
; CHECK-NEXT:    vcvtsh2usi {ru-sae}, %xmm0, %rax
; CHECK-NEXT:    addq %rcx, %rax
; CHECK-NEXT:    retq
  %res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 4)
  %res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 10)
  %res = add i64 %res1, %res2
  ret i64 %res
}

declare i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half>, i32)

define i32 @test_x86_avx512fp16_vcvttsh2usi32(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttsh2usi %xmm0, %ecx
; CHECK-NEXT:    vcvttsh2usi {sae}, %xmm0, %eax
; CHECK-NEXT:    addl %ecx, %eax
; CHECK-NEXT:    retq
  %res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 4)
  %res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 8)
  %res = add i32 %res1, %res2
  ret i32 %res
}

declare i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half>, i32)

define i64 @test_x86_avx512fp16_vcvttsh2usi64(<8 x half> %arg0) {
; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvttsh2usi %xmm0, %rcx
; CHECK-NEXT:    vcvttsh2usi {sae}, %xmm0, %rax
; CHECK-NEXT:    addq %rcx, %rax
; CHECK-NEXT:    retq
  %res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 4)
  %res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 8)
  %res = add i64 %res1, %res2
  ret i64 %res
}

declare <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half>, i32, i32)

define <8 x half> @test_x86_avx512fp16_vcvtsi2sh(<8 x half> %arg0, i32 %arg1) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtsi2sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtsi2sh %edi, %xmm0, %xmm1
; CHECK-NEXT:    vcvtsi2sh %edi, {rd-sae}, %xmm0, %xmm0
; CHECK-NEXT:    vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 4)
  %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 9)
  %res = fadd <8 x half> %res1, %res2
  ret <8 x half> %res
}

declare <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half>, i64, i32)

define <8 x half> @test_x86_avx512fp16_vcvtsi642sh(<8 x half> %arg0, i64 %arg1) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtsi642sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtsi2sh %rdi, %xmm0, %xmm1
; CHECK-NEXT:    vcvtsi2sh %rdi, {rn-sae}, %xmm0, %xmm0
; CHECK-NEXT:    vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 4)
  %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 8)
  %res = fadd <8 x half> %res1, %res2
  ret <8 x half> %res
}

declare <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half>, i32, i32)

define <8 x half> @test_x86_avx512fp16_vcvtusi2sh(<8 x half> %arg0, i32 %arg1) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtusi2sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtusi2sh %edi, %xmm0, %xmm1
; CHECK-NEXT:    vcvtusi2sh %edi, {rd-sae}, %xmm0, %xmm0
; CHECK-NEXT:    vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 4)
  %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 9)
  %res = fadd <8 x half> %res1, %res2
  ret <8 x half> %res
}

declare <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half>, i64, i32)

define <8 x half> @test_x86_avx512fp16_vcvtusi642sh(<8 x half> %arg0, i64 %arg1) {
; CHECK-LABEL: test_x86_avx512fp16_vcvtusi642sh:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vcvtusi2sh %rdi, %xmm0, %xmm1
; CHECK-NEXT:    vcvtusi2sh %rdi, {rd-sae}, %xmm0, %xmm0
; CHECK-NEXT:    vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT:    retq
  %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 4)
  %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 9)
  %res = fadd <8 x half> %res1, %res2
  ret <8 x half> %res
}


define <16 x half> @test_mm256_castph128_ph256_freeze(<8 x half> %a0) nounwind {
; CHECK-LABEL: test_mm256_castph128_ph256_freeze:
; CHECK:       # %bb.0:
; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT:    retq
  %a1 = freeze <8 x half> poison
  %res = shufflevector <8 x half> %a0, <8 x half> %a1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x half> %res
}


define <32 x half> @test_mm512_castph128_ph512_freeze(<8 x half> %a0) nounwind {
; CHECK-LABEL: test_mm512_castph128_ph512_freeze:
; CHECK:       # %bb.0:
; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; CHECK-NEXT:    retq
  %a1 = freeze <8 x half> poison
  %res = shufflevector <8 x half> %a0, <8 x half> %a1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <32 x half> %res
}


define <32 x half> @test_mm512_castph256_ph512_freeze(<16 x half> %a0) nounwind {
; CHECK-LABEL: test_mm512_castph256_ph512_freeze:
; CHECK:       # %bb.0:
; CHECK-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
; CHECK-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
; CHECK-NEXT:    retq
  %a1 = freeze <16 x half> poison
  %res = shufflevector <16 x half> %a0, <16 x half> %a1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
  ret <32 x half> %res
}