; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256(<16 x i16> %arg0, <16 x half> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtw2ph %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %res0 = sitofp <16 x i16> %arg0 to <16 x half> %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_b(ptr %arg0, <16 x half> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_b: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtw2ph (%rdi){1to16}, %ymm0 {%k1} ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %scalar = load i16, ptr %arg0 %scalar_in_vector = insertelement <16 x i16> undef, i16 %scalar, i32 0 %val = shufflevector <16 x i16> %scalar_in_vector, <16 x i16> undef, <16 x i32> zeroinitializer %res0 = sitofp <16 x i16> %val to <16 x half> %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_nomask(<16 x i16> %arg0, <16 x half> %arg1) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtw2ph %ymm0, %ymm0 ; CHECK-NEXT: retq %res = sitofp <16 x i16> %arg0 to <16 x half> ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_z(<16 x i16> %arg0, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtw2ph %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %res0 = sitofp <16 x i16> %arg0 to <16 x half> %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_256_load(ptr %arg0, <16 x half> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_256_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtw2ph (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %val = load <16 x i16>, ptr %arg0 %res0 = sitofp <16 x i16> %val to <16 x half> %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1 ret <16 x half> %res } declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half>, <16 x i16>, i16) define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2w %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_b(ptr %arg0, <16 x i16> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_b: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtph2w (%rdi){1to16}, %ymm0 {%k1} ; CHECK-NEXT: retq %scalar = load half, ptr %arg0 %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0 %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2w %ymm0, %ymm0 ; CHECK-NEXT: retq %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_z(<16 x half> %arg0, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2w %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_256_load(ptr %arg0, <16 x i16> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_256_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtph2w (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq %val = load <16 x half>, ptr %arg0 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask) ret <16 x i16> %res } define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256(<16 x i16> %arg0, <16 x half> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %res0 = uitofp <16 x i16> %arg0 to <16 x half> %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_b(ptr %arg0, <16 x half> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_b: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtuw2ph (%rdi){1to16}, %ymm0 {%k1} ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %scalar = load i16, ptr %arg0 %scalar_in_vector = insertelement <16 x i16> undef, i16 %scalar, i32 0 %val = shufflevector <16 x i16> %scalar_in_vector, <16 x i16> undef, <16 x i32> zeroinitializer %res0 = uitofp <16 x i16> %val to <16 x half> %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_nomask(<16 x i16> %arg0, <16 x half> %arg1) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm0 ; CHECK-NEXT: retq %res = uitofp <16 x i16> %arg0 to <16 x half> ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_z(<16 x i16> %arg0, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtuw2ph %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %res0 = uitofp <16 x i16> %arg0 to <16 x half> %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer ret <16 x half> %res } define <16 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_256_load(ptr %arg0, <16 x half> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_256_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtuw2ph (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq %msk = bitcast i16 %mask to <16 x i1> %val = load <16 x i16>, ptr %arg0 %res0 = uitofp <16 x i16> %val to <16 x half> %res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %arg1 ret <16 x half> %res } declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half>, <16 x i16>, i16) define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2uw %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_b(ptr %arg0, <16 x i16> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_b: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtph2uw (%rdi){1to16}, %ymm0 {%k1} ; CHECK-NEXT: retq %scalar = load half, ptr %arg0 %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0 %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2uw %ymm0, %ymm0 ; CHECK-NEXT: retq %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_z(<16 x half> %arg0, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2uw %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_256_load(ptr %arg0, <16 x i16> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_256_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtph2uw (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq %val = load <16 x half>, ptr %arg0 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask) ret <16 x i16> %res } declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half>, <16 x i16>, i16) define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2w %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_b(ptr %arg0, <16 x i16> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_b: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvttph2w (%rdi){1to16}, %ymm0 {%k1} ; CHECK-NEXT: retq %scalar = load half, ptr %arg0 %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0 %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2w %ymm0, %ymm0 ; CHECK-NEXT: retq %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_z(<16 x half> %arg0, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2w %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_256_load(ptr %arg0, <16 x i16> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_256_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvttph2w (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq %val = load <16 x half>, ptr %arg0 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask) ret <16 x i16> %res } declare <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half>, <16 x i16>, i16) define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2uw %ymm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 %mask) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_b(ptr %arg0, <16 x i16> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_b: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvttph2uw (%rdi){1to16}, %ymm0 {%k1} ; CHECK-NEXT: retq %scalar = load half, ptr %arg0 %scalar_in_vector = insertelement <16 x half> undef, half %scalar, i32 0 %val = shufflevector <16 x half> %scalar_in_vector, <16 x half> undef, <16 x i32> zeroinitializer %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_nomask(<16 x half> %arg0, <16 x i16> %arg1) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2uw %ymm0, %ymm0 ; CHECK-NEXT: retq %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> %arg1, i16 -1) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_z(<16 x half> %arg0, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2uw %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %arg0, <16 x i16> zeroinitializer, i16 %mask) ret <16 x i16> %res } define <16 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_256_load(ptr %arg0, <16 x i16> %arg1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_256_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvttph2uw (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq %val = load <16 x half>, ptr %arg0 %res = call <16 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.256(<16 x half> %val, <16 x i16> %arg1, i16 %mask) ret <16 x i16> %res } define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128(<8 x i16> %arg0, <8 x half> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtw2ph %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %res0 = sitofp <8 x i16> %arg0 to <8 x half> %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_b(ptr %arg0, <8 x half> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_b: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtw2ph (%rdi){1to8}, %xmm0 {%k1} ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %scalar = load i16, ptr %arg0 %scalar_in_vector = insertelement <8 x i16> undef, i16 %scalar, i32 0 %val = shufflevector <8 x i16> %scalar_in_vector, <8 x i16> undef, <8 x i32> zeroinitializer %res0 = sitofp <8 x i16> %val to <8 x half> %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_nomask(<8 x i16> %arg0, <8 x half> %arg1) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = sitofp <8 x i16> %arg0 to <8 x half> ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_z(<8 x i16> %arg0, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %res0 = sitofp <8 x i16> %arg0 to <8 x half> %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_mask_cvtw2ph_128_load(ptr %arg0, <8 x half> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtw2ph_128_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtw2ph (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %val = load <8 x i16>, ptr %arg0 %res0 = sitofp <8 x i16> %val to <8 x half> %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1 ret <8 x half> %res } declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half>, <8 x i16>, i8) define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2w %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_b(ptr %arg0, <8 x i16> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_b: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtph2w (%rdi){1to8}, %xmm0 {%k1} ; CHECK-NEXT: retq %scalar = load half, ptr %arg0 %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0 %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2w %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_z(<8 x half> %arg0, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2w %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2w_128_load(ptr %arg0, <8 x i16> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2w_128_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtph2w (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq %val = load <8 x half>, ptr %arg0 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask) ret <8 x i16> %res } define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128(<8 x i16> %arg0, <8 x half> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %res0 = uitofp <8 x i16> %arg0 to <8 x half> %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_b(ptr %arg0, <8 x half> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_b: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtuw2ph (%rdi){1to8}, %xmm0 {%k1} ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %scalar = load i16, ptr %arg0 %scalar_in_vector = insertelement <8 x i16> undef, i16 %scalar, i32 0 %val = shufflevector <8 x i16> %scalar_in_vector, <8 x i16> undef, <8 x i32> zeroinitializer %res0 = uitofp <8 x i16> %val to <8 x half> %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_nomask(<8 x i16> %arg0, <8 x half> %arg1) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = uitofp <8 x i16> %arg0 to <8 x half> ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_z(<8 x i16> %arg0, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %res0 = uitofp <8 x i16> %arg0 to <8 x half> %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer ret <8 x half> %res } define <8 x half> @test_int_x86_avx512fp16_mask_cvtuw2ph_128_load(ptr %arg0, <8 x half> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtuw2ph_128_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtuw2ph (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq %msk = bitcast i8 %mask to <8 x i1> %val = load <8 x i16>, ptr %arg0 %res0 = uitofp <8 x i16> %val to <8 x half> %res = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %arg1 ret <8 x half> %res } declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half>, <8 x i16>, i8) define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2uw %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_b(ptr %arg0, <8 x i16> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_b: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtph2uw (%rdi){1to8}, %xmm0 {%k1} ; CHECK-NEXT: retq %scalar = load half, ptr %arg0 %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0 %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtph2uw %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_z(<8 x half> %arg0, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2uw %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvtph2uw_128_load(ptr %arg0, <8 x i16> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvtph2uw_128_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvtph2uw (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq %val = load <8 x half>, ptr %arg0 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvtph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask) ret <8 x i16> %res } declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half>, <8 x i16>, i8) define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2w %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_b(ptr %arg0, <8 x i16> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_b: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvttph2w (%rdi){1to8}, %xmm0 {%k1} ; CHECK-NEXT: retq %scalar = load half, ptr %arg0 %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0 %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2w %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_z(<8 x half> %arg0, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2w %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2w_128_load(ptr %arg0, <8 x i16> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2w_128_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvttph2w (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq %val = load <8 x half>, ptr %arg0 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2w.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask) ret <8 x i16> %res } declare <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half>, <8 x i16>, i8) define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 %mask) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_b(ptr %arg0, <8 x i16> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_b: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvttph2uw (%rdi){1to8}, %xmm0 {%k1} ; CHECK-NEXT: retq %scalar = load half, ptr %arg0 %scalar_in_vector = insertelement <8 x half> undef, half %scalar, i32 0 %val = shufflevector <8 x half> %scalar_in_vector, <8 x half> undef, <8 x i32> zeroinitializer %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_nomask(<8 x half> %arg0, <8 x i16> %arg1) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> %arg1, i8 -1) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_z(<8 x half> %arg0, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2uw %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %arg0, <8 x i16> zeroinitializer, i8 %mask) ret <8 x i16> %res } define <8 x i16> @test_int_x86_avx512fp16_mask_cvttph2uw_128_load(ptr %arg0, <8 x i16> %arg1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvttph2uw_128_load: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %esi, %k1 ; CHECK-NEXT: vcvttph2uw (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq %val = load <8 x half>, ptr %arg0 %res = call <8 x i16> @llvm.x86.avx512fp16.mask.vcvttph2uw.128(<8 x half> %val, <8 x i16> %arg1, i8 %mask) ret <8 x i16> %res } define <4 x half> @test_u16tofp4(<4 x i16> %arg0) { ; CHECK-LABEL: test_u16tofp4: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = uitofp <4 x i16> %arg0 to <4 x half> ret <4 x half> %res } define <2 x half> @test_s16tofp2(<2 x i16> %arg0) { ; CHECK-LABEL: test_s16tofp2: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = sitofp <2 x i16> %arg0 to <2 x half> ret <2 x half> %res } define <4 x half> @test_u8tofp4(<4 x i8> %arg0) { ; CHECK-LABEL: test_u8tofp4: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = uitofp <4 x i8> %arg0 to <4 x half> ret <4 x half> %res } define <2 x half> @test_s8tofp2(<2 x i8> %arg0) { ; CHECK-LABEL: test_s8tofp2: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0 ; CHECK-NEXT: vcvtw2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = sitofp <2 x i8> %arg0 to <2 x half> ret <2 x half> %res } define <2 x half> @test_u1tofp2(<2 x i1> %arg0) { ; CHECK-LABEL: test_u1tofp2: ; CHECK: # %bb.0: ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-NEXT: vcvtuw2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = uitofp <2 x i1> %arg0 to <2 x half> ret <2 x half> %res } define <4 x half> @test_s17tofp4(<4 x i17> %arg0) { ; CHECK-LABEL: test_s17tofp4: ; CHECK: # %bb.0: ; CHECK-NEXT: vpslld $15, %xmm0, %xmm0 ; CHECK-NEXT: vpsrad $15, %xmm0, %xmm0 ; CHECK-NEXT: vcvtdq2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = sitofp <4 x i17> %arg0 to <4 x half> ret <4 x half> %res } define <2 x half> @test_u33tofp2(<2 x i33> %arg0) { ; CHECK-LABEL: test_u33tofp2: ; CHECK: # %bb.0: ; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; CHECK-NEXT: vcvtuqq2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = uitofp <2 x i33> %arg0 to <2 x half> ret <2 x half> %res }