; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s define <16 x half> @test_int_x86_avx512fp16_add_ph_256(<16 x half> %x1, <16 x half> %x2) { ; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %res = fadd <16 x half> %x1, %x2 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_mask_add_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovaps %ymm2, %ymm3 ; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm3 {%k1} ; CHECK-NEXT: vaddph (%rsi), %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vaddph %ymm2, %ymm3, %ymm0 ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %val = load <16 x half>, ptr %ptr %res0 = fadd <16 x half> %x1, %x2 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src %t3 = fadd <16 x half> %x1, %val %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src %res = fadd <16 x half> %res1 , %res2 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_maskz_add_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %res0 = fadd <16 x half> %x1, %x2 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer ret <16 x half> %res1 } define <8 x half> @test_int_x86_avx512fp16_add_ph_128(<8 x half> %x1, <8 x half> %x2) { ; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = fadd <8 x half> %x1, %x2 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_mask_add_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vaddph (%rsi), %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vaddph %xmm2, %xmm3, %xmm0 ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %val = load <8 x half>, ptr %ptr %res0 = fadd <8 x half> %x1, %x2 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src %t3 = fadd <8 x half> %x1, %val %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src %res = fadd <8 x half> %res1 , %res2 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_maskz_add_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %res0 = fadd <8 x half> %x1, %x2 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer ret <8 x half> %res1 } define <16 x half> @test_int_x86_avx512fp16_sub_ph_256(<16 x half> %x1, <16 x half> %x2) { ; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vsubph %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %res = fsub <16 x half> %x1, %x2 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_mask_sub_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovaps %ymm2, %ymm3 ; CHECK-NEXT: vsubph %ymm1, %ymm0, %ymm3 {%k1} ; CHECK-NEXT: vsubph (%rsi), %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vsubph %ymm2, %ymm3, %ymm0 ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %val = load <16 x half>, ptr %ptr %res0 = fsub <16 x half> %x1, %x2 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src %t3 = fsub <16 x half> %x1, %val %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src %res = fsub <16 x half> %res1 , %res2 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_maskz_sub_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vsubph %ymm1, %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %res0 = fsub <16 x half> %x1, %x2 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer ret <16 x half> %res1 } define <8 x half> @test_int_x86_avx512fp16_sub_ph_128(<8 x half> %x1, <8 x half> %x2) { ; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = fsub <8 x half> %x1, %x2 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_mask_sub_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vsubph (%rsi), %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vsubph %xmm2, %xmm3, %xmm0 ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %val = load <8 x half>, ptr %ptr %res0 = fsub <8 x half> %x1, %x2 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src %t3 = fsub <8 x half> %x1, %val %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src %res = fsub <8 x half> %res1 , %res2 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_maskz_sub_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %res0 = fsub <8 x half> %x1, %x2 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer ret <8 x half> %res1 } define <16 x half> @test_int_x86_avx512fp16_mul_ph_256(<16 x half> %x1, <16 x half> %x2) { ; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %res = fmul <16 x half> %x1, %x2 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_mask_mul_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovaps %ymm2, %ymm3 ; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm3 {%k1} ; CHECK-NEXT: vmulph (%rsi), %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vmulph %ymm2, %ymm3, %ymm0 ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %val = load <16 x half>, ptr %ptr %res0 = fmul <16 x half> %x1, %x2 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src %t3 = fmul <16 x half> %x1, %val %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src %res = fmul <16 x half> %res1 , %res2 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_maskz_mul_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %res0 = fmul <16 x half> %x1, %x2 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer ret <16 x half> %res1 } define <8 x half> @test_int_x86_avx512fp16_mul_ph_128(<8 x half> %x1, <8 x half> %x2) { ; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = fmul <8 x half> %x1, %x2 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_mask_mul_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vmulph (%rsi), %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmulph %xmm2, %xmm3, %xmm0 ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %val = load <8 x half>, ptr %ptr %res0 = fmul <8 x half> %x1, %x2 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src %t3 = fmul <8 x half> %x1, %val %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src %res = fmul <8 x half> %res1 , %res2 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_maskz_mul_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %res0 = fmul <8 x half> %x1, %x2 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer ret <8 x half> %res1 } define <16 x half> @test_int_x86_avx512fp16_div_ph_256(<16 x half> %x1, <16 x half> %x2) { ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %res = fdiv <16 x half> %x1, %x2 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_div_ph_256_fast(<16 x half> %x1, <16 x half> %x2) { ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_256_fast: ; CHECK: # %bb.0: ; CHECK-NEXT: vrcpph %ymm1, %ymm1 ; CHECK-NEXT: vmulph %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq %res = fdiv fast <16 x half> %x1, %x2 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_mask_div_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovaps %ymm2, %ymm3 ; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm3 {%k1} ; CHECK-NEXT: vdivph (%rsi), %ymm0, %ymm2 {%k1} ; CHECK-NEXT: vdivph %ymm2, %ymm3, %ymm0 ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %val = load <16 x half>, ptr %ptr %res0 = fdiv <16 x half> %x1, %x2 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src %t3 = fdiv <16 x half> %x1, %val %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src %res = fdiv <16 x half> %res1 , %res2 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_maskz_div_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %res0 = fdiv <16 x half> %x1, %x2 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer ret <16 x half> %res1 } define <8 x half> @test_int_x86_avx512fp16_div_ph_128(<8 x half> %x1, <8 x half> %x2) { ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = fdiv <8 x half> %x1, %x2 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_div_ph_128_fast(<8 x half> %x1, <8 x half> %x2) { ; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_128_fast: ; CHECK: # %bb.0: ; CHECK-NEXT: vrcpph %xmm1, %xmm1 ; CHECK-NEXT: vmulph %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = fdiv fast <8 x half> %x1, %x2 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_mask_div_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vdivph (%rsi), %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vdivph %xmm2, %xmm3, %xmm0 ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %val = load <8 x half>, ptr %ptr %res0 = fdiv <8 x half> %x1, %x2 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src %t3 = fdiv <8 x half> %x1, %val %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src %res = fdiv <8 x half> %res1 , %res2 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_maskz_div_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %res0 = fdiv <8 x half> %x1, %x2 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer ret <8 x half> %res1 } define <16 x half> @test_min_ph_256(<16 x half> %x1, <16 x half> %x2) { ; CHECK-LABEL: test_min_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %res0 = fcmp olt <16 x half> %x1, %x2 %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2 ret <16 x half> %res1 } define <16 x half> @test_max_ph_256(<16 x half> %x1, <16 x half> %x2) { ; CHECK-LABEL: test_max_ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vmaxph %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %res0 = fcmp ogt <16 x half> %x1, %x2 %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2 ret <16 x half> %res1 } define <8 x half> @test_min_ph_128(<8 x half> %x1, <8 x half> %x2) { ; CHECK-LABEL: test_min_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = fcmp olt <8 x half> %x1, %x2 %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2 ret <8 x half> %res1 } define <8 x half> @test_max_ph_128(<8 x half> %x1, <8 x half> %x2) { ; CHECK-LABEL: test_max_ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = fcmp ogt <8 x half> %x1, %x2 %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2 ret <8 x half> %res1 } declare <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half>, <8 x half>) declare <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half>, <16 x half>) define <8 x half> @test_max_ph_128_2(<8 x half> %x1, <8 x half> %x2) { ; CHECK-LABEL: test_max_ph_128_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = call <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half> %x1, <8 x half> %x2) ret <8 x half> %res0 } define <16 x half> @test_max_ph_256_2(<16 x half> %x1, <16 x half> %x2) { ; CHECK-LABEL: test_max_ph_256_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmaxph %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %res0 = call <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half> %x1, <16 x half> %x2) ret <16 x half> %res0 } declare <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half>, <8 x half>) declare <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half>, <16 x half>) define <8 x half> @test_min_ph_128_2(<8 x half> %x1, <8 x half> %x2) { ; CHECK-LABEL: test_min_ph_128_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = call <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half> %x1, <8 x half> %x2) ret <8 x half> %res0 } define <16 x half> @test_min_ph_256_2(<16 x half> %x1, <16 x half> %x2) { ; CHECK-LABEL: test_min_ph_256_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %res0 = call <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half> %x1, <16 x half> %x2) ret <16 x half> %res0 } declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8) define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2) ret <4 x double> %res } define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0 ; CHECK-NEXT: retq %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1) ret <4 x double> %res } declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8) define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2) ret <2 x double> %res } define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1) ret <2 x double> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8) define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256_load(ptr %px0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtpd2phy (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq %x0 = load <4 x double>, ptr %px0, align 32 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8) define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128_load(ptr %px0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtpd2phx (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq %x0 = load <2 x double>, ptr %px0, align 16 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8) define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) ret <4 x i32> %res } define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) ret <4 x i32> %res } define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) ret <4 x i32> %res } declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8) define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) ret <8 x i32> %res } define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) ret <8 x i32> %res } define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) ret <8 x i32> %res } declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8) define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) ret <4 x i32> %res } define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) ret <4 x i32> %res } define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) ret <4 x i32> %res } declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8) define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) ret <8 x i32> %res } define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) ret <8 x i32> %res } define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) ret <8 x i32> %res } declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8) define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) ret <4 x i32> %res } define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) ret <4 x i32> %res } define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) ret <4 x i32> %res } declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8) define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) ret <8 x i32> %res } define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) ret <8 x i32> %res } define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) ret <8 x i32> %res } declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half>, <4 x float>, i8) define <4 x float> @test_int_x86_avx512_cvt_ph2psx_128(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> undef, i8 -1) ret <4 x float> %res } define <4 x float> @test_int_x86_avx512_mask_cvt_ph2psx_128(<8 x half> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> %x1, i8 %x2) ret <4 x float> %res } define <4 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_128(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> zeroinitializer, i8 %x2) ret <4 x float> %res } declare <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half>, <8 x float>, i8) define <8 x float> @test_int_x86_avx512_cvt_ph2psx_256(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0 ; CHECK-NEXT: retq %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> undef, i8 -1) ret <8 x float> %res } define <8 x float> @test_int_x86_avx512_mask_cvt_ph2psx_256(<8 x half> %x0, <8 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2psx %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> %x1, i8 %x2) ret <8 x float> %res } define <8 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_256(<8 x half> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> zeroinitializer, i8 %x2) ret <8 x float> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float>, <8 x half>, i8) define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_128(<4 x float> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtps2phx %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 %x2) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 -1) %res2 = fadd <8 x half> %res, %res1 ret <8 x half> %res2 } declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float>, <8 x half>, i8) define <8 x half> @test_int_x86_avx512_cvt_ps2phx_256(<8 x float> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> undef, i8 -1) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_256(<8 x float> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtps2phx %ymm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> %x1, i8 %x2) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_256(<8 x float> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> zeroinitializer, i8 %x2) ret <8 x half> %res }