; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s define signext i16 @test_mm_cvtsi128_si16(<2 x i64> %A) local_unnamed_addr #0 { ; CHECK-LABEL: test_mm_cvtsi128_si16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmovw %xmm0, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq entry: %0 = bitcast <2 x i64> %A to <8 x i16> %vecext.i = extractelement <8 x i16> %0, i32 0 ret i16 %vecext.i } define <2 x i64> @test_mm_cvtsi16_si128(i16 signext %A) local_unnamed_addr #0 { ; CHECK-LABEL: test_mm_cvtsi16_si128: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmovw %edi, %xmm0 ; CHECK-NEXT: retq entry: %vecinit7.i = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %A, i32 0 %0 = bitcast <8 x i16> %vecinit7.i to <2 x i64> ret <2 x i64> %0 } define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %mask = bitcast i8 %x2 to <8 x i1> %res0 = sitofp <8 x i32> %x0 to <8 x half> %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_256_z(<8 x i32> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_256_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %mask = bitcast i8 %x2 to <8 x i1> %res0 = sitofp <8 x i32> %x0 to <8 x half> %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer ret <8 x half> %res } define <8 x half> @sint_to_fp_8i32_to_8f16(<8 x i32> %x) { ; CHECK-LABEL: sint_to_fp_8i32_to_8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtdq2ph %ymm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = sitofp <8 x i32> %x to <8 x half> ret <8 x half> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32>, <8 x half>, i8) define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_dq2ph_128_z(<4 x i32> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_128_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtdq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2) ret <8 x half> %res } define <4 x half> @sint_to_fp_4i32_to_4f16(<4 x i32> %x) { ; CHECK-LABEL: sint_to_fp_4i32_to_4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = sitofp <4 x i32> %x to <4 x half> ret <4 x half> %res } define <2 x half> @sint_to_fp_2i32_to_2f16(<2 x i32> %x) { ; CHECK-LABEL: sint_to_fp_2i32_to_2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = sitofp <2 x i32> %x to <2 x half> ret <2 x half> %res } define <4 x i32> @fp_to_sint_4f16_to_4i32(<4 x half> %x) { ; CHECK-LABEL: fp_to_sint_4f16_to_4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 ; CHECK-NEXT: retq %res = fptosi <4 x half> %x to <4 x i32> ret <4 x i32> %res } define <2 x i32> @fp_to_sint_2f16_to_2i32(<2 x half> %x) { ; CHECK-LABEL: fp_to_sint_2f16_to_2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 ; CHECK-NEXT: retq %res = fptosi <2 x half> %x to <2 x i32> ret <2 x i32> %res } define <2 x i16> @fp_to_sint_2f16_to_2i16(<2 x half> %x) { ; CHECK-LABEL: fp_to_sint_2f16_to_2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2w %xmm0, %xmm0 ; CHECK-NEXT: retq %res = fptosi <2 x half> %x to <2 x i16> ret <2 x i16> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256(<8 x i32> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %mask = bitcast i8 %x2 to <8 x i1> %res0 = uitofp <8 x i32> %x0 to <8 x half> %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_256_z(<8 x i32> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_256_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %mask = bitcast i8 %x2 to <8 x i1> %res0 = uitofp <8 x i32> %x0 to <8 x half> %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer ret <8 x half> %res } define <8 x half> @uint_to_fp_8i32_to_8f16(<8 x i32> %x) { ; CHECK-LABEL: uint_to_fp_8i32_to_8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtudq2ph %ymm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = uitofp <8 x i32> %x to <8 x half> ret <8 x half> %res } define <8 x i32> @fp_to_uint_8f16_to_8i32(<8 x half> %x) { ; CHECK-LABEL: fp_to_uint_8f16_to_8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 ; CHECK-NEXT: retq %res = fptoui <8 x half> %x to <8 x i32> ret <8 x i32> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32>, <8 x half>, i8) define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_nomask(<4 x i32> %x0, <8 x half> %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> %x1, i8 -1) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_udq2ph_128_z(<4 x i32> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_128_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtudq2ph.128(<4 x i32> %x0, <8 x half> zeroinitializer, i8 %x2) ret <8 x half> %res } define <4 x half> @uint_to_fp_4i32_to_4f16(<4 x i32> %x) { ; CHECK-LABEL: uint_to_fp_4i32_to_4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = uitofp <4 x i32> %x to <4 x half> ret <4 x half> %res } define <2 x half> @uint_to_fp_2i32_to_2f16(<2 x i32> %x) { ; CHECK-LABEL: uint_to_fp_2i32_to_2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtudq2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = uitofp <2 x i32> %x to <2 x half> ret <2 x half> %res } define <4 x i32> @fp_to_uint_4f16_to_4i32(<4 x half> %x) { ; CHECK-LABEL: fp_to_uint_4f16_to_4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 ; CHECK-NEXT: retq %res = fptoui <4 x half> %x to <4 x i32> ret <4 x i32> %res } define <2 x i32> @fp_to_uint_2f16_to_2i32(<2 x half> %x) { ; CHECK-LABEL: fp_to_uint_2f16_to_2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 ; CHECK-NEXT: retq %res = fptoui <2 x half> %x to <2 x i32> ret <2 x i32> %res } define <2 x i16> @fp_to_uint_2f16_to_2i16(<2 x half> %x) { ; CHECK-LABEL: fp_to_uint_2f16_to_2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0 ; CHECK-NEXT: retq %res = fptoui <2 x half> %x to <2 x i16> ret <2 x i16> %res } declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half>, <4 x i32>, i8) define <4 x i32> @test_int_x86_avx512_cvt_ph2dq_128(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2dq %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) ret <4 x i32> %res } define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2dq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) ret <4 x i32> %res } define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_128(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2dq %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) ret <4 x i32> %res } declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half>, <8 x i32>, i8) define <8 x i32> @test_int_x86_avx512_cvt_ph2dq_256(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2dq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2dq %xmm0, %ymm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) ret <8 x i32> %res } define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2dq %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) ret <8 x i32> %res } define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2dq_256(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2dq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2dq %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) ret <8 x i32> %res } declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8) define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) ret <4 x i32> %res } define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) ret <4 x i32> %res } define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) ret <4 x i32> %res } declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8) define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) ret <8 x i32> %res } define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) ret <8 x i32> %res } define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) ret <8 x i32> %res } declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8) define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) ret <4 x i32> %res } define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) ret <4 x i32> %res } define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) ret <4 x i32> %res } declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8) define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) ret <8 x i32> %res } define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) ret <8 x i32> %res } define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) ret <8 x i32> %res } declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8) define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) ret <4 x i32> %res } define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) ret <4 x i32> %res } define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) ret <4 x i32> %res } declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8) define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) ret <8 x i32> %res } define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) ret <8 x i32> %res } define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) ret <8 x i32> %res } declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8) define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2) ret <4 x double> %res } define <4 x double> @test_int_x86_avx512_mask_cvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_256_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0 ; CHECK-NEXT: retq %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1) ret <4 x double> %res } declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8) define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2) ret <2 x double> %res } define <2 x double> @test_int_x86_avx512_mask_cvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2pd_128_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1) ret <2 x double> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8) define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_256_load(ptr %px0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_256_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtpd2phy (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq %x0 = load <4 x double>, ptr %px0, align 32 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8) define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_pd2ph_128_load(ptr %px0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ph_128_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtpd2phx (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq %x0 = load <2 x double>, ptr %px0, align 16 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64>, <8 x half>, i8) define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_256_z(<4 x i64> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_256_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2) ret <8 x half> %res } define <4 x half> @sint_to_fp_4i64_to_4f16(<4 x i64> %x) { ; CHECK-LABEL: sint_to_fp_4i64_to_4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtqq2ph %ymm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = sitofp <4 x i64> %x to <4 x half> ret <4 x half> %res } define <4 x i64> @fp_to_sint_4f16_to_4i64(<4 x half> %x) { ; CHECK-LABEL: fp_to_sint_4f16_to_4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0 ; CHECK-NEXT: retq %res = fptosi <4 x half> %x to <4 x i64> ret <4 x i64> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64>, <8 x half>, i8) define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_128_z(<2 x i64> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_128_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2) ret <8 x half> %res } define <2 x half> @sint_to_fp_2i64_to_2f16(<2 x i64> %x) { ; CHECK-LABEL: sint_to_fp_2i64_to_2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = sitofp <2 x i64> %x to <2 x half> ret <2 x half> %res } define <2 x i64> @fp_to_sint_2f16_to_2i64(<2 x half> %x) { ; CHECK-LABEL: fp_to_sint_2f16_to_2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0 ; CHECK-NEXT: retq %res = fptosi <2 x half> %x to <2 x i64> ret <2 x i64> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64>, <8 x half>, i8) define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask(<4 x i64> %x0, <8 x half> %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> %x1, i8 -1) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_256_z(<4 x i64> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_256_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.256(<4 x i64> %x0, <8 x half> zeroinitializer, i8 %x2) ret <8 x half> %res } define <4 x half> @uint_to_fp_4i64_to_4f16(<4 x i64> %x) { ; CHECK-LABEL: uint_to_fp_4i64_to_4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtuqq2ph %ymm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = uitofp <4 x i64> %x to <4 x half> ret <4 x half> %res } define <4 x i64> @fp_to_uint_4f16_to_4i64(<4 x half> %x) { ; CHECK-LABEL: fp_to_uint_4f16_to_4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0 ; CHECK-NEXT: retq %res = fptoui <4 x half> %x to <4 x i64> ret <4 x i64> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64>, <8 x half>, i8) define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask(<2 x i64> %x0, <8 x half> %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> %x1, i8 -1) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_128_z(<2 x i64> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_128_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtuqq2ph.128(<2 x i64> %x0, <8 x half> zeroinitializer, i8 %x2) ret <8 x half> %res } define <2 x half> @uint_to_fp_2i64_to_2f16(<2 x i64> %x) { ; CHECK-LABEL: uint_to_fp_2i64_to_2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = uitofp <2 x i64> %x to <2 x half> ret <2 x half> %res } define <2 x i64> @fp_to_uint_2f16_to_2i64(<2 x half> %x) { ; CHECK-LABEL: fp_to_uint_2f16_to_2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0 ; CHECK-NEXT: retq %res = fptoui <2 x half> %x to <2 x i64> ret <2 x i64> %res } declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half>, <2 x i64>, i8) define <2 x i64> @test_int_x86_avx512_cvtt_ph2qq_128(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> undef, i8 -1) ret <2 x i64> %res } define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2qq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) ret <2 x i64> %res } define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_128(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2qq %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2) ret <2 x i64> %res } declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half>, <4 x i64>, i8) define <4 x i64> @test_int_x86_avx512_cvtt_ph2qq_256(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2qq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0 ; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> undef, i8 -1) ret <4 x i64> %res } define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2qq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2qq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2qq %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) ret <4 x i64> %res } define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2qq_256(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2qq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2qq %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2qq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2) ret <4 x i64> %res } declare <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half>, <2 x i64>, i8) define <2 x i64> @test_int_x86_avx512_cvtt_ph2uqq_128(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> undef, i8 -1) ret <2 x i64> %res } define <2 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> %x1, i8 %x2) ret <2 x i64> %res } define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_128(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2uqq %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <2 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.128(<8 x half> %x0, <2 x i64> zeroinitializer, i8 %x2) ret <2 x i64> %res } declare <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half>, <4 x i64>, i8) define <4 x i64> @test_int_x86_avx512_cvtt_ph2uqq_256(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2uqq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0 ; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> undef, i8 -1) ret <4 x i64> %res } define <4 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> %x1, i8 %x2) ret <4 x i64> %res } define <4 x i64> @test_int_x86_avx512_maskz_cvtt_ph2uqq_256(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2uqq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2uqq %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.256(<8 x half> %x0, <4 x i64> zeroinitializer, i8 %x2) ret <4 x i64> %res } declare <8 x half> @llvm.sqrt.v8f16(<8 x half>) declare <16 x half> @llvm.sqrt.v16f16(<16 x half>) define <8 x half> @test_sqrt_ph_128(<8 x half> %a0) { ; CHECK-LABEL: test_sqrt_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vsqrtph %xmm0, %xmm0 ; CHECK-NEXT: retq %1 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0) ret <8 x half> %1 } define <8 x half> @test_sqrt_ph_128_fast(<8 x half> %a0, <8 x half> %a1) { ; CHECK-LABEL: test_sqrt_ph_128_fast: ; CHECK: # %bb.0: ; CHECK-NEXT: vrsqrtph %xmm0, %xmm0 ; CHECK-NEXT: vmulph %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %1 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0) %2 = fdiv fast <8 x half> %a1, %1 ret <8 x half> %2 } define <8 x half> @test_sqrt_ph_128_fast2(<8 x half> %a0, <8 x half> %a1) { ; CHECK-LABEL: test_sqrt_ph_128_fast2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsqrtph %xmm0, %xmm0 ; CHECK-NEXT: retq %1 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0) ret <8 x half> %1 } define <8 x half> @test_mask_sqrt_ph_128(<8 x half> %a0, <8 x half> %passthru, i8 %mask) { ; CHECK-LABEL: test_mask_sqrt_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vsqrtph %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %1 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0) %2 = bitcast i8 %mask to <8 x i1> %3 = select <8 x i1> %2, <8 x half> %1, <8 x half> %passthru ret <8 x half> %3 } define <8 x half> @test_maskz_sqrt_ph_128(<8 x half> %a0, i8 %mask) { ; CHECK-LABEL: test_maskz_sqrt_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vsqrtph %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %1 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0) %2 = bitcast i8 %mask to <8 x i1> %3 = select <8 x i1> %2, <8 x half> %1, <8 x half> zeroinitializer ret <8 x half> %3 } define <16 x half> @test_sqrt_ph_256(<16 x half> %a0) { ; CHECK-LABEL: test_sqrt_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vsqrtph %ymm0, %ymm0 ; CHECK-NEXT: retq %1 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0) ret <16 x half> %1 } define <16 x half> @test_sqrt_ph_256_fast(<16 x half> %a0, <16 x half> %a1) { ; CHECK-LABEL: test_sqrt_ph_256_fast: ; CHECK: # %bb.0: ; CHECK-NEXT: vrsqrtph %ymm0, %ymm0 ; CHECK-NEXT: vmulph %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq %1 = call fast <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0) %2 = fdiv fast <16 x half> %a1, %1 ret <16 x half> %2 } define <16 x half> @test_mask_sqrt_ph_256(<16 x half> %a0, <16 x half> %passthru, i16 %mask) { ; CHECK-LABEL: test_mask_sqrt_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vsqrtph %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %1 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0) %2 = bitcast i16 %mask to <16 x i1> %3 = select <16 x i1> %2, <16 x half> %1, <16 x half> %passthru ret <16 x half> %3 } define <16 x half> @test_maskz_sqrt_ph_256(<16 x half> %a0, i16 %mask) { ; CHECK-LABEL: test_maskz_sqrt_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vsqrtph %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %1 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %a0) %2 = bitcast i16 %mask to <16 x i1> %3 = select <16 x i1> %2, <16 x half> %1, <16 x half> zeroinitializer ret <16 x half> %3 } declare <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.128(<8 x half>, <8 x half>, i8) declare <16 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.256(<16 x half>, <16 x half>, i16) define <8 x half> @test_rsqrt_ph_128(<8 x half> %a0) { ; CHECK-LABEL: test_rsqrt_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vrsqrtph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.128(<8 x half> %a0, <8 x half> zeroinitializer, i8 -1) ret <8 x half> %res } define <16 x half> @test_rsqrt_ph_256(<16 x half> %a0) { ; CHECK-LABEL: test_rsqrt_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vrsqrtph %ymm0, %ymm0 ; CHECK-NEXT: retq %res = call <16 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.256(<16 x half> %a0, <16 x half> zeroinitializer, i16 -1) ret <16 x half> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.rcp.ph.128(<8 x half>, <8 x half>, i8) declare <16 x half> @llvm.x86.avx512fp16.mask.rcp.ph.256(<16 x half>, <16 x half>, i16) define <8 x half> @test_rcp_ph_128(<8 x half> %a0, <8 x half> %a1, i8 %mask) { ; CHECK-LABEL: test_rcp_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vrcpph %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.ph.128(<8 x half> %a0, <8 x half> %a1, i8 %mask) ret <8 x half> %res } define <16 x half> @test_rcp_ph_256(<16 x half> %a0, <16 x half> %a1, i16 %mask) { ; CHECK-LABEL: test_rcp_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vrcpph %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <16 x half> @llvm.x86.avx512fp16.mask.rcp.ph.256(<16 x half> %a0, <16 x half> %a1, i16 %mask) ret <16 x half> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.reduce.ph.128(<8 x half>, i32, <8 x half>, i8) declare <16 x half> @llvm.x86.avx512fp16.mask.reduce.ph.256(<16 x half>, i32, <16 x half>, i16) define <8 x half>@test_int_x86_avx512_mask_reduce_ph_128(<8 x half> %x0, <8 x half> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vreduceph $8, %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vreduceph $4, %xmm0, %xmm0 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.ph.128(<8 x half> %x0, i32 8, <8 x half> %x2, i8 %x3) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.ph.128(<8 x half> %x0, i32 4, <8 x half> %x2, i8 -1) %res2 = fadd <8 x half> %res, %res1 ret <8 x half> %res2 } define <16 x half>@test_int_x86_avx512_mask_reduce_ph_256(<16 x half> %x0, <16 x half> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vreduceph $8, %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vreduceph $4, %ymm0, %ymm0 ; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <16 x half> @llvm.x86.avx512fp16.mask.reduce.ph.256(<16 x half> %x0, i32 8, <16 x half> %x2, i16 %x3) %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.reduce.ph.256(<16 x half> %x0, i32 4, <16 x half> %x2, i16 -1) %res2 = fadd <16 x half> %res, %res1 ret <16 x half> %res2 } declare <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half>, i32) declare <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half>, i32) define i8 @test_int_x86_avx512_fpclass_ph_128(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassph $2, %xmm0, %k1 ; CHECK-NEXT: vfpclassph $4, %xmm0, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %res = call <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half> %x0, i32 4) %res1 = call <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half> %x0, i32 2) %1 = and <8 x i1> %res1, %res %2 = bitcast <8 x i1> %1 to i8 ret i8 %2 } define i16 @test_int_x86_avx512_fpclass_ph_256(<16 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassph $2, %ymm0, %k1 ; CHECK-NEXT: vfpclassph $4, %ymm0, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = call <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half> %x0, i32 4) %res1 = call <16 x i1> @llvm.x86.avx512fp16.fpclass.ph.256(<16 x half> %x0, i32 2) %1 = and <16 x i1> %res1, %res %2 = bitcast <16 x i1> %1 to i16 ret i16 %2 } declare <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half>, <8 x half>, i8) declare <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half>, <16 x half>, i16) define <8 x half>@test_int_x86_avx512_getexp_ph_128(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_getexp_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vgetexpph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half> %x0, <8 x half> zeroinitializer, i8 -1) ret <8 x half> %res } define <8 x half>@test_int_x86_avx512_mask_getexp_ph_128(<8 x half> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vgetexpph %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } define <8 x half>@test_int_x86_avx512_maskz_getexp_ph_128(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_getexp_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vgetexpph %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.ph.128(<8 x half> %x0, <8 x half> zeroinitializer, i8 %x2) ret <8 x half> %res } define <16 x half>@test_int_x86_avx512_getexp_ph_256(<16 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_getexp_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vgetexpph %ymm0, %ymm0 ; CHECK-NEXT: retq %res = call <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half> %x0, <16 x half> zeroinitializer, i16 -1) ret <16 x half> %res } define <16 x half>@test_int_x86_avx512_mask_getexp_ph_256(<16 x half> %x0, <16 x half> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vgetexpph %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half> %x0, <16 x half> %x1, i16 %x2) ret <16 x half> %res } define <16 x half>@test_int_x86_avx512_maskz_getexp_ph_256(<16 x half> %x0, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_getexp_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vgetexpph %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <16 x half> @llvm.x86.avx512fp16.mask.getexp.ph.256(<16 x half> %x0, <16 x half> zeroinitializer, i16 %x2) ret <16 x half> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.getmant.ph.128(<8 x half>, i32, <8 x half>, i8) declare <16 x half> @llvm.x86.avx512fp16.mask.getmant.ph.256(<16 x half>, i32, <16 x half>, i16) define <8 x half>@test_int_x86_avx512_mask_getmant_ph_128(<8 x half> %x0, <8 x half> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vgetmantph $8, %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vgetmantph $4, %xmm0, %xmm0 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.ph.128(<8 x half> %x0, i32 8, <8 x half> %x2, i8 %x3) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.ph.128(<8 x half> %x0, i32 4, <8 x half> %x2, i8 -1) %res2 = fadd <8 x half> %res, %res1 ret <8 x half> %res2 } define <16 x half>@test_int_x86_avx512_mask_getmant_ph_256(<16 x half> %x0, <16 x half> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vgetmantph $8, %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vgetmantph $4, %ymm0, %ymm0 ; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <16 x half> @llvm.x86.avx512fp16.mask.getmant.ph.256(<16 x half> %x0, i32 8, <16 x half> %x2, i16 %x3) %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.getmant.ph.256(<16 x half> %x0, i32 4, <16 x half> %x2, i16 -1) %res2 = fadd <16 x half> %res, %res1 ret <16 x half> %res2 } declare <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128(<8 x half>, i32, <8 x half>, i8) declare <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256(<16 x half>, i32, <16 x half>, i16) define <8 x half>@test_int_x86_avx512_mask_rndscale_ph_128(<8 x half> %x0, <8 x half> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vrndscaleph $8, %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vrndscaleph $4, %xmm0, %xmm0 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128(<8 x half> %x0, i32 8, <8 x half> %x2, i8 %x3) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128(<8 x half> %x0, i32 4, <8 x half> %x2, i8 -1) %res2 = fadd <8 x half> %res, %res1 ret <8 x half> %res2 } define <16 x half>@test_int_x86_avx512_mask_rndscale_ph_256(<16 x half> %x0, <16 x half> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vrndscaleph $8, %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vrndscaleph $4, %ymm0, %ymm0 ; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256(<16 x half> %x0, i32 8, <16 x half> %x2, i16 %x3) %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256(<16 x half> %x0, i32 4, <16 x half> %x2, i16 -1) %res2 = fadd <16 x half> %res, %res1 ret <16 x half> %res2 } declare <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half>, <8 x half>, <8 x half>, i8) declare <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half>, <16 x half>, <16 x half>, i16) define <8 x half>@test_int_x86_avx512_scalef_ph_128(<8 x half> %x0, <8 x half> %x1) { ; CHECK-LABEL: test_int_x86_avx512_scalef_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vscalefph %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> zeroinitializer, i8 -1) ret <8 x half> %res } define <8 x half>@test_int_x86_avx512_mask_scalef_ph_128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vscalefph %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %mask = bitcast i8 %x3 to <8 x i1> %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, i8 %x3) ret <8 x half> %res } define <8 x half>@test_int_x86_avx512_maskz_scalef_ph_128(<8 x half> %x0, <8 x half> %x1, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_maskz_scalef_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vscalefph %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %mask = bitcast i8 %x3 to <8 x i1> %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128(<8 x half> %x0, <8 x half> %x1, <8 x half> zeroinitializer, i8 %x3) ret <8 x half> %res } define <16 x half>@test_int_x86_avx512_scalef_ph_256(<16 x half> %x0, <16 x half> %x1) { ; CHECK-LABEL: test_int_x86_avx512_scalef_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vscalefph %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %res = call <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> zeroinitializer, i16 -1) ret <16 x half> %res } define <16 x half>@test_int_x86_avx512_mask_scalef_ph_256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vscalefph %ymm1, %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq %mask = bitcast i16 %x3 to <16 x i1> %res = call <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> %x2, i16 %x3) ret <16 x half> %res } define <16 x half>@test_int_x86_avx512_maskz_scalef_ph_256(<16 x half> %x0, <16 x half> %x1, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_maskz_scalef_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vscalefph %ymm1, %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %mask = bitcast i16 %x3 to <16 x i1> %res = call <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256(<16 x half> %x0, <16 x half> %x1, <16 x half> zeroinitializer, i16 %x3) ret <16 x half> %res }