; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s declare i32 @llvm.x86.avx512fp16.vcomi.sh(<8 x half>, <8 x half>, i32, i32) define i32 @test_x86_avx512fp16_ucomi_sh_lt(<8 x half> %a0, <8 x half> %a1) { ; CHECK-LABEL: test_x86_avx512fp16_ucomi_sh_lt: ; CHECK: # %bb.0: ; CHECK-NEXT: vcmpngesh %xmm1, %xmm0, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512fp16.vcomi.sh(<8 x half> %a0, <8 x half> %a1, i32 9, i32 4) ret i32 %res } declare <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half>, i32) nounwind readnone define <32 x half> @test_sqrt_ph_512(<32 x half> %a0) { ; CHECK-LABEL: test_sqrt_ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vsqrtph %zmm0, %zmm0 ; CHECK-NEXT: retq %1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) ret <32 x half> %1 } define <32 x half> @test_sqrt_ph_512_fast(<32 x half> %a0, <32 x half> %a1) { ; CHECK-LABEL: test_sqrt_ph_512_fast: ; CHECK: # %bb.0: ; CHECK-NEXT: vrsqrtph %zmm0, %zmm0 ; CHECK-NEXT: vmulph %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) %2 = fdiv fast <32 x half> %a1, %1 ret <32 x half> %2 } define <32 x half> @test_sqrt_ph_512_fast_estimate_attribute(<32 x half> %a0, <32 x half> %a1) "reciprocal-estimates"="vec-sqrt" { ; CHECK-LABEL: test_sqrt_ph_512_fast_estimate_attribute: ; CHECK: # %bb.0: ; CHECK-NEXT: vrsqrtph %zmm0, %zmm0 ; CHECK-NEXT: vmulph %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) %2 = fdiv fast <32 x half> %a1, %1 ret <32 x half> %2 } define <32 x half> @test_sqrt_ph_512_fast_estimate_attribute_2(<32 x half> %a0, <32 x half> %a1) "reciprocal-estimates"="vec-sqrth:1" { ; CHECK-LABEL: test_sqrt_ph_512_fast_estimate_attribute_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vrsqrtph %zmm0, %zmm2 ; CHECK-NEXT: vmulph %zmm2, %zmm0, %zmm0 ; CHECK-NEXT: vfmadd213ph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to32}, %zmm2, %zmm0 ; CHECK-NEXT: vmulph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to32}, %zmm2, %zmm2 ; CHECK-NEXT: vmulph %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: vmulph %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq %1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) %2 = fdiv fast <32 x half> %a1, %1 ret <32 x half> %2 } define <32 x half> @test_mask_sqrt_ph_512(<32 x half> %a0, <32 x half> %passthru, i32 %mask) { ; CHECK-LABEL: test_mask_sqrt_ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vsqrtph %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq %1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) %2 = bitcast i32 %mask to <32 x i1> %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> %passthru ret <32 x half> %3 } define <32 x half> @test_maskz_sqrt_ph_512(<32 x half> %a0, i32 %mask) { ; CHECK-LABEL: test_maskz_sqrt_ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vsqrtph %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %1 = call <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0) %2 = bitcast i32 %mask to <32 x i1> %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> zeroinitializer ret <32 x half> %3 } declare <32 x half> @llvm.sqrt.v32f16(<32 x half>) define <32 x half> @test_sqrt_round_ph_512(<32 x half> %a0) { ; CHECK-LABEL: test_sqrt_round_ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vsqrtph {rz-sae}, %zmm0, %zmm0 ; CHECK-NEXT: retq %1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11) ret <32 x half> %1 } define <32 x half> @test_mask_sqrt_round_ph_512(<32 x half> %a0, <32 x half> %passthru, i32 %mask) { ; CHECK-LABEL: test_mask_sqrt_round_ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vsqrtph {rz-sae}, %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq %1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11) %2 = bitcast i32 %mask to <32 x i1> %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> %passthru ret <32 x half> %3 } define <32 x half> @test_maskz_sqrt_round_ph_512(<32 x half> %a0, i32 %mask) { ; CHECK-LABEL: test_maskz_sqrt_round_ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vsqrtph {rz-sae}, %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %1 = call <32 x half> @llvm.x86.avx512fp16.sqrt.ph.512(<32 x half> %a0, i32 11) %2 = bitcast i32 %mask to <32 x i1> %3 = select <32 x i1> %2, <32 x half> %1, <32 x half> zeroinitializer ret <32 x half> %3 } declare <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half>, <8 x half>, <8 x half>, i8, i32) nounwind readnone define <8 x half> @test_sqrt_sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) { ; CHECK-LABEL: test_sqrt_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vsqrtsh %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask, i32 4) ret <8 x half> %res } define half @test_sqrt_sh2(half %a0, half %a1) { ; CHECK-LABEL: test_sqrt_sh2: ; CHECK: # %bb.0: ; CHECK-NEXT: vrsqrtsh %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vmulsh %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %1 = call fast half @llvm.sqrt.f16(half %a0) %2 = fdiv fast half %a1, %1 ret half %2 } define half @test_sqrt_sh3(half %a0, half %a1) { ; CHECK-LABEL: test_sqrt_sh3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsqrtsh %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %1 = call fast half @llvm.sqrt.f16(half %a0) ret half %1 } declare half @llvm.sqrt.f16(half) define <8 x half> @test_sqrt_sh_r(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) { ; CHECK-LABEL: test_sqrt_sh_r: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vsqrtsh {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask, i32 10) ret <8 x half> %res } define <8 x half> @test_sqrt_sh_nomask(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { ; CHECK-LABEL: test_sqrt_sh_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vsqrtsh %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 -1, i32 4) ret <8 x half> %res } define <8 x half> @test_sqrt_sh_z(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2, i8 %mask) { ; CHECK-LABEL: test_sqrt_sh_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vsqrtsh {ru-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.sqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> zeroinitializer, i8 %mask, i32 10) ret <8 x half> %res } declare <32 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.512(<32 x half>, <32 x half>, i32) declare <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half>, <8 x half>, <8 x half>, i8) define <32 x half> @test_rsqrt_ph_512(<32 x half> %a0) { ; CHECK-LABEL: test_rsqrt_ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vrsqrtph %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <32 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.512(<32 x half> %a0, <32 x half> zeroinitializer, i32 -1) ret <32 x half> %res } define <8 x half> @test_rsqrt_sh(<8 x half> %a0, <8 x half> %a1, <8 x half> %a2) { ; CHECK-LABEL: test_rsqrt_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: vrsqrtsh %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> %a2, i8 -1) ret <8 x half> %res } define <8 x half> @test_rsqrt_sh_load(<8 x half> %a0, ptr %a1ptr) { ; CHECK-LABEL: test_rsqrt_sh_load: ; CHECK: # %bb.0: ; CHECK-NEXT: vrsqrtsh (%rdi), %xmm0, %xmm0 ; CHECK-NEXT: retq %a1 = load <8 x half>, ptr %a1ptr %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> undef, i8 -1) ret <8 x half> %res } define <8 x half> @test_rsqrt_sh_maskz(<8 x half> %a0, i8 %mask) { ; CHECK-LABEL: test_rsqrt_sh_maskz: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vrsqrtsh %xmm0, %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> zeroinitializer, i8 %mask) ret <8 x half> %res } define <8 x half> @test_rsqrt_sh_mask(<8 x half> %a0, <8 x half> %b0, <8 x half> %c0, i8 %mask) { ; CHECK-LABEL: test_rsqrt_sh_mask: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vrsqrtsh %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh(<8 x half> %a0, <8 x half> %b0, <8 x half> %c0, i8 %mask) ret <8 x half> %res } declare <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half>, i32) define i32 @test_int_x86_avx512_fpclass_ph_512(<32 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassph $2, %zmm0, %k1 ; CHECK-NEXT: vfpclassph $4, %zmm0, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = call <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half> %x0, i32 4) %res1 = call <32 x i1> @llvm.x86.avx512fp16.fpclass.ph.512(<32 x half> %x0, i32 2) %1 = and <32 x i1> %res1, %res %2 = bitcast <32 x i1> %1 to i32 ret i32 %2 } declare i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half>, i32, i8) define i8 @test_int_x86_avx512_mask_fpclass_sh(<8 x half> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasssh $4, %xmm0, %k1 ; CHECK-NEXT: vfpclasssh $2, %xmm0, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %res = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 2, i8 -1) %res1 = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 4, i8 %res) ret i8 %res1 } define i8 @test_int_x86_avx512_mask_fpclass_sh_load(ptr %x0ptr) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sh_load: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasssh $4, (%rdi), %k0 ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %x0 = load <8 x half>, ptr %x0ptr %res = call i8 @llvm.x86.avx512fp16.mask.fpclass.sh(<8 x half> %x0, i32 4, i8 -1) ret i8 %res } declare <32 x half> @llvm.x86.avx512fp16.mask.rcp.ph.512(<32 x half>, <32 x half>, i32) define <32 x half> @test_rcp_ph_512(<32 x half> %a0, <32 x half> %a1, i32 %mask) { ; CHECK-LABEL: test_rcp_ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vrcpph %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <32 x half> @llvm.x86.avx512fp16.mask.rcp.ph.512(<32 x half> %a0, <32 x half> %a1, i32 %mask) ret <32 x half> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half>, <8 x half>, <8 x half>, i8) define <8 x half> @test_rcp_sh(<8 x half> %a0) { ; CHECK-LABEL: test_rcp_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: vrcpsh %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half> %a0, <8 x half> %a0, <8 x half> zeroinitializer, i8 -1) ret <8 x half> %res } define <8 x half> @test_rcp_sh_load(<8 x half> %a0, ptr %a1ptr) { ; CHECK-LABEL: test_rcp_sh_load: ; CHECK: # %bb.0: ; CHECK-NEXT: vrcpsh (%rdi), %xmm0, %xmm0 ; CHECK-NEXT: retq %a1 = load <8 x half>, ptr %a1ptr %res = call <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh(<8 x half> %a0, <8 x half> %a1, <8 x half> zeroinitializer, i8 -1) ret <8 x half> %res } declare <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half>, i32, <32 x half>, i32, i32) define <32 x half>@test_int_x86_avx512_mask_reduce_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vreduceph $8, %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vreduceph $4, {sae}, %zmm0, %zmm0 ; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4) %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.reduce.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8) %res2 = fadd <32 x half> %res, %res1 ret <32 x half> %res2 } declare <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32, i32) define <8 x half>@test_int_x86_avx512_mask_reduce_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vreducesh $4, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4, i32 4) ret <8 x half> %res } define <8 x half>@test_int_x86_avx512_mask_reduce_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_sh_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vreducesh $4, {sae}, %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.reduce.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 4, i32 8) ret <8 x half> %res } declare <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half>, i32, <32 x half>, i32, i32) define <32 x half>@test_int_x86_avx512_mask_rndscale_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vrndscaleph $8, %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vrndscaleph $4, {sae}, %zmm0, %zmm0 ; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4) %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8) %res2 = fadd <32 x half> %res, %res1 ret <32 x half> %res2 } declare <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32, i32) define <8 x half>@test_int_x86_avx512_mask_rndscale_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vrndscalesh $4, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4, i32 4) ret <8 x half> %res } define <8 x half>@test_int_x86_avx512_mask_rndscale_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_sh_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vrndscalesh $4, {sae}, %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 4, i32 8) ret <8 x half> %res } declare <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half>, <32 x half>, i32, i32) define <32 x half>@test_int_x86_avx512_mask_getexp_ph_512(<32 x half> %x0, <32 x half> %x1, i32 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vgetexpph %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vgetexpph {sae}, %zmm0, %zmm0 ; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half> %x0, <32 x half> %x1, i32 %x2, i32 4) %res2 = call <32 x half> @llvm.x86.avx512fp16.mask.getexp.ph.512(<32 x half> %x0, <32 x half> zeroinitializer, i32 -1, i32 8) %res3 = fadd <32 x half> %res1, %res2 ret <32 x half> %res3 } declare <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32) define <8 x half>@test_int_x86_avx512_mask_getexp_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vgetexpsh %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4) ret <8 x half> %res } define <8 x half>@test_int_x86_avx512_mask_getexp_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_sh_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vgetexpsh {sae}, %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 8) ret <8 x half> %res } define <8 x half>@test_int_x86_avx512_mask_getexp_sh_load(<8 x half> %x0, ptr %x1ptr) { ; CHECK-LABEL: test_int_x86_avx512_mask_getexp_sh_load: ; CHECK: # %bb.0: ; CHECK-NEXT: vgetexpsh (%rdi), %xmm0, %xmm0 ; CHECK-NEXT: retq %x1 = load <8 x half>, ptr %x1ptr %res = call <8 x half> @llvm.x86.avx512fp16.mask.getexp.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> undef, i8 -1, i32 4) ret <8 x half> %res } declare <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half>, i32, <32 x half>, i32, i32) define <32 x half>@test_int_x86_avx512_mask_getmant_ph_512(<32 x half> %x0, <32 x half> %x2, i32 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vgetmantph $8, %zmm0, %zmm1 {%k1} ; CHECK-NEXT: vgetmantph $4, {sae}, %zmm0, %zmm0 ; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half> %x0, i32 8, <32 x half> %x2, i32 %x3, i32 4) %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.getmant.ph.512(<32 x half> %x0, i32 4, <32 x half> %x2, i32 -1, i32 8) %res2 = fadd <32 x half> %res, %res1 ret <32 x half> %res2 } declare <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half>, <8 x half>, i32, <8 x half>, i8, i32) define <8 x half>@test_int_x86_avx512_mask_getmant_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vgetmantsh $11, %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> %x3, i8 %x4, i32 4) ret <8 x half> %res } define <8 x half>@test_int_x86_avx512_mask_getmant_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sh_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vgetmantsh $11, %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> %x3, i8 -1, i32 4) ret <8 x half> %res } define <8 x half>@test_int_x86_avx512_mask_getmant_sh_z(<8 x half> %x0, <8 x half> %x1, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sh_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vgetmantsh $11, %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.getmant.sh(<8 x half> %x0, <8 x half> %x1, i32 11, <8 x half> zeroinitializer, i8 %x4, i32 4) ret <8 x half> %res } declare <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half>, <32 x half>, <32 x half>, i32, i32) define <32 x half>@test_int_x86_avx512_mask_scalef_ph_512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vscalefph {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vscalefph {rn-sae}, %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: vaddph %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq %mask = bitcast i32 %x3 to <32 x i1> %res1 = call <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> %x2, i32 %x3, i32 11) %res2 = call <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512(<32 x half> %x0, <32 x half> %x1, <32 x half> zeroinitializer, i32 -1, i32 8) %res3 = fadd <32 x half> %res1, %res2 ret <32 x half> %res3 } declare <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half>, <8 x half>,<8 x half>, i8, i32) define <8 x half>@test_int_x86_avx512_mask_scalef_sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vscalefsh %xmm1, %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 %x4, i32 4) ret <8 x half> %res } define <8 x half>@test_int_x86_avx512_mask_scalef_sh_nomask(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sh_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vscalefsh {rn-sae}, %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> %x3, i8 -1, i32 8) ret <8 x half> %res } define <8 x half>@test_int_x86_avx512_mask_scalef_sh_load(<8 x half> %x0, ptr %x1ptr) { ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sh_load: ; CHECK: # %bb.0: ; CHECK-NEXT: vscalefsh (%rdi), %xmm0, %xmm0 ; CHECK-NEXT: retq %x1 = load <8 x half>, ptr %x1ptr %res = call <8 x half> @llvm.x86.avx512fp16.mask.scalef.sh(<8 x half> %x0, <8 x half> %x1, <8 x half> undef, i8 -1, i32 4) ret <8 x half> %res } declare <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vaddsh %xmm1, %xmm3, %xmm0 {%k1} {z} ; CHECK-NEXT: vaddsh (%rsi), %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %val.half = load half,ptr %ptr %val = insertelement <8 x half> undef, half %val.half, i32 0 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) ret <8 x half> %res3 } declare <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vsubsh %xmm1, %xmm3, %xmm0 {%k1} {z} ; CHECK-NEXT: vsubsh (%rsi), %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %val.half = load half,ptr %ptr %val = insertelement <8 x half> undef, half %val.half, i32 0 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) ret <8 x half> %res3 } declare <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmulsh %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vmulsh %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vmulsh %xmm1, %xmm3, %xmm0 {%k1} {z} ; CHECK-NEXT: vmulsh (%rsi), %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %val.half = load half,ptr %ptr %val = insertelement <8 x half> undef, half %val.half, i32 0 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) ret <8 x half> %res3 } declare <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) define <8 x half> @test_int_x86_avx512fp16_mask_div_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vdivsh %xmm1, %xmm3, %xmm0 {%k1} {z} ; CHECK-NEXT: vdivsh (%rsi), %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %val.half = load half,ptr %ptr %val = insertelement <8 x half> undef, half %val.half, i32 0 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) ret <8 x half> %res3 } declare <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) define <8 x half> @test_int_x86_avx512fp16_mask_min_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_min_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vminsh %xmm1, %xmm3, %xmm0 {%k1} {z} ; CHECK-NEXT: vminsh (%rsi), %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %val.half = load half,ptr %ptr %val = insertelement <8 x half> undef, half %val.half, i32 0 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) ret <8 x half> %res3 } declare <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half>, <8 x half>, <8 x half>, i8, i32) define <8 x half> @test_int_x86_avx512fp16_mask_max_sh(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { ; CHECK-LABEL: test_int_x86_avx512fp16_mask_max_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vmovaps %xmm2, %xmm3 ; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm3 {%k1} ; CHECK-NEXT: vmaxsh %xmm1, %xmm3, %xmm0 {%k1} {z} ; CHECK-NEXT: vmaxsh (%rsi), %xmm0, %xmm2 {%k1} ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq %val.half = load half,ptr %ptr %val = insertelement <8 x half> undef, half %val.half, i32 0 %res0 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %x1, <8 x half> %x2, <8 x half> zeroinitializer, i8 -1, i32 4) %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res0, <8 x half> %x2, <8 x half> %src , i8 %mask, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res1, <8 x half> %x2, <8 x half> zeroinitializer , i8 %mask, i32 4) %res3 = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> %res2, <8 x half> %val, <8 x half> %src , i8 %mask, i32 4) ret <8 x half> %res3 } declare i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half>, <8 x half>, i32, i8, i32) define i8 @test_int_x86_avx512_mask_cmp_sh(<8 x half> %x0, <8 x half> %x1, i8 %x3, i32 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sh: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcmpunordsh %xmm1, %xmm0, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %res2 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 3, i8 %x3, i32 4) ret i8 %res2 } define i8 @test_int_x86_avx512_mask_cmp_sh_all(<8 x half> %x0, <8 x half> %x1, i8 %x3, i32 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sh_all: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcmplesh %xmm1, %xmm0, %k0 ; CHECK-NEXT: kmovd %k0, %ecx ; CHECK-NEXT: vcmpunordsh {sae}, %xmm1, %xmm0, %k0 ; CHECK-NEXT: kmovd %k0, %edx ; CHECK-NEXT: vcmpneqsh %xmm1, %xmm0, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %esi ; CHECK-NEXT: vcmpnltsh {sae}, %xmm1, %xmm0, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: andb %sil, %al ; CHECK-NEXT: andb %dl, %al ; CHECK-NEXT: andb %cl, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %res1 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 2, i8 -1, i32 4) %res2 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 3, i8 -1, i32 8) %res3 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 4, i8 %x3, i32 4) %res4 = call i8 @llvm.x86.avx512fp16.mask.cmp.sh(<8 x half> %x0, <8 x half> %x1, i32 5, i8 %x3, i32 8) %res11 = and i8 %res1, %res2 %res12 = and i8 %res3, %res4 %res13 = and i8 %res11, %res12 ret i8 %res13 } declare <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32>, i32) define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512(<16 x i32> %x0, <16 x half> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %mask = bitcast i16 %x2 to <16 x i1> %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4) %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_r: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtdq2ph {ru-sae}, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %mask = bitcast i16 %x2 to <16 x i1> %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10) %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0 ; CHECK-NEXT: retq %res = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4) ret <16 x half> %res } define <16 x half> @test_int_x86_avx512_mask_cvt_dq2ph_512_z(<16 x i32> %x0, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ph_512_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %mask = bitcast i16 %x2 to <16 x i1> %res0 = call <16 x half> @llvm.x86.avx512.sitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4) %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer ret <16 x half> %res } define <16 x half> @sint_to_fp_16i32_to_16f16(<16 x i32> %x) { ; CHECK-LABEL: sint_to_fp_16i32_to_16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtdq2ph %zmm0, %ymm0 ; CHECK-NEXT: retq %res = sitofp <16 x i32> %x to <16 x half> ret <16 x half> %res } declare <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32>, i32) define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_r(<16 x i32> %x0, <16 x half> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_r: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtudq2ph {ru-sae}, %zmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq %mask = bitcast i16 %x2 to <16 x i1> %res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 10) %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> %x1 ret <16 x half> %res } define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_nomask(<16 x i32> %x0, <16 x half> %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0 ; CHECK-NEXT: retq %res = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4) ret <16 x half> %res } define <16 x half> @test_int_x86_avx512_mask_cvt_udq2ph_512_z(<16 x i32> %x0, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ph_512_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %mask = bitcast i16 %x2 to <16 x i1> %res0 = call <16 x half> @llvm.x86.avx512.uitofp.round.v16f16.v16i32(<16 x i32> %x0, i32 4) %res = select <16 x i1> %mask, <16 x half> %res0, <16 x half> zeroinitializer ret <16 x half> %res } define <16 x half> @uint_to_fp_16i32_to_16f16(<16 x i32> %x) { ; CHECK-LABEL: uint_to_fp_16i32_to_16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0 ; CHECK-NEXT: retq %res = uitofp <16 x i32> %x to <16 x half> ret <16 x half> %res } declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half>, <16 x i32>, i16, i32) define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2dq_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2dq {ru-sae}, %ymm0, %zmm1 {%k1} ; CHECK-NEXT: vcvtph2dq {rn-sae}, %ymm0, %zmm0 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10) %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8) %res2 = add <16 x i32> %res, %res1 ret <16 x i32> %res2 } declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half>, <16 x i32>, i16, i32) define <16 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2udq {ru-sae}, %ymm0, %zmm1 {%k1} ; CHECK-NEXT: vcvtph2udq {rn-sae}, %ymm0, %zmm0 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 10) %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8) %res2 = add <16 x i32> %res, %res1 ret <16 x i32> %res2 } declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half>, <16 x i32>, i16, i32) define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2dq %ymm0, %zmm1 {%k1} ; CHECK-NEXT: vcvttph2dq {sae}, %ymm0, %zmm0 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4) %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8) %res2 = add <16 x i32> %res, %res1 ret <16 x i32> %res2 } declare <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half>, <16 x i32>, i16, i32) define <16 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_512(<16 x half> %x0, <16 x i32> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2udq %ymm0, %zmm1 {%k1} ; CHECK-NEXT: vcvttph2udq {sae}, %ymm0, %zmm0 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 %x2, i32 4) %res1 = call <16 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.512(<16 x half> %x0, <16 x i32> %x1, i16 -1, i32 8) %res2 = add <16 x i32> %res, %res1 ret <16 x i32> %res2 } declare <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64>, i32) define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %mask = bitcast i8 %x2 to <8 x i1> %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_r: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtqq2ph {ru-sae}, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %mask = bitcast i8 %x2 to <8 x i1> %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10) %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_qq2ph_512_z(<8 x i64> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ph_512_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %mask = bitcast i8 %x2 to <8 x i1> %res0 = call <8 x half> @llvm.x86.avx512.sitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer ret <8 x half> %res } declare <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64>, i32) define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512(<8 x i64> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %mask = bitcast i8 %x2 to <8 x i1> %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_r(<8 x i64> %x0, <8 x half> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_r: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtuqq2ph {ru-sae}, %zmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %mask = bitcast i8 %x2 to <8 x i1> %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 10) %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> %x1 ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask(<8 x i64> %x0, <8 x half> %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_nomask: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) ret <8 x half> %res } define <8 x half> @test_int_x86_avx512_mask_cvt_uqq2ph_512_z(<8 x i64> %x0, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ph_512_z: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %mask = bitcast i8 %x2 to <8 x i1> %res0 = call <8 x half> @llvm.x86.avx512.uitofp.round.v8f16.v8i64(<8 x i64> %x0, i32 4) %res = select <8 x i1> %mask, <8 x half> %res0, <8 x half> zeroinitializer ret <8 x half> %res } declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half>, <8 x i64>, i8, i32) define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2qq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2qq_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2qq {ru-sae}, %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vcvtph2qq {rn-sae}, %xmm0, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10) %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2qq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8) %res2 = add <8 x i64> %res, %res1 ret <8 x i64> %res2 } declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half>, <8 x i64>, i8, i32) define <8 x i64> @test_int_x86_avx512_mask_cvt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2uqq_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvtph2uqq {ru-sae}, %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vcvtph2uqq {rn-sae}, %xmm0, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 10) %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvtph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 8) %res2 = add <8 x i64> %res, %res1 ret <8 x i64> %res2 } declare <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half>, <8 x i64>, i8, i32) define <8 x i64> @test_int_x86_avx512_mask_cvtt_ph2uqq_512(<8 x half> %x0, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2uqq_512: ; CHECK: # %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 ; CHECK-NEXT: vcvttph2uqq {sae}, %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vcvttph2uqq %xmm0, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 %x2, i32 8) %res1 = call <8 x i64> @llvm.x86.avx512fp16.mask.vcvttph2uqq.512(<8 x half> %x0, <8 x i64> %x1, i8 -1, i32 4) %res2 = add <8 x i64> %res, %res1 ret <8 x i64> %res2 } declare i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half>, i32) define i32 @test_x86_avx512fp16_vcvtsh2si32(<8 x half> %arg0) { ; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si32: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtsh2si %xmm0, %ecx ; CHECK-NEXT: vcvtsh2si {rz-sae}, %xmm0, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: retq %res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 4) %res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2si32(<8 x half> %arg0, i32 11) %res = add i32 %res1, %res2 ret i32 %res } declare i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half>, i32) define i64 @test_x86_avx512fp16_vcvtsh2si64(<8 x half> %arg0) { ; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2si64: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtsh2si %xmm0, %rcx ; CHECK-NEXT: vcvtsh2si {ru-sae}, %xmm0, %rax ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: retq %res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 4) %res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2si64(<8 x half> %arg0, i32 10) %res = add i64 %res1, %res2 ret i64 %res } declare i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half>, i32) define i32 @test_x86_avx512fp16_vcvttsh2si32(<8 x half> %arg0) { ; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si32: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttsh2si %xmm0, %ecx ; CHECK-NEXT: vcvttsh2si {sae}, %xmm0, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: retq %res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 4) %res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2si32(<8 x half> %arg0, i32 8) %res = add i32 %res1, %res2 ret i32 %res } declare i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half>, i32) define i64 @test_x86_avx512fp16_vcvttsh2si64(<8 x half> %arg0) { ; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2si64: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttsh2si %xmm0, %rcx ; CHECK-NEXT: vcvttsh2si {sae}, %xmm0, %rax ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: retq %res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 4) %res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2si64(<8 x half> %arg0, i32 8) %res = add i64 %res1, %res2 ret i64 %res } declare i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half>, i32) define i32 @test_x86_avx512fp16_vcvtsh2usi32(<8 x half> %arg0) { ; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi32: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtsh2usi %xmm0, %ecx ; CHECK-NEXT: vcvtsh2usi {rd-sae}, %xmm0, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: retq %res1 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 4) %res2 = call i32 @llvm.x86.avx512fp16.vcvtsh2usi32(<8 x half> %arg0, i32 9) %res = add i32 %res1, %res2 ret i32 %res } declare i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half>, i32) define i64 @test_x86_avx512fp16_vcvtsh2usi64(<8 x half> %arg0) { ; CHECK-LABEL: test_x86_avx512fp16_vcvtsh2usi64: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtsh2usi %xmm0, %rcx ; CHECK-NEXT: vcvtsh2usi {ru-sae}, %xmm0, %rax ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: retq %res1 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 4) %res2 = call i64 @llvm.x86.avx512fp16.vcvtsh2usi64(<8 x half> %arg0, i32 10) %res = add i64 %res1, %res2 ret i64 %res } declare i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half>, i32) define i32 @test_x86_avx512fp16_vcvttsh2usi32(<8 x half> %arg0) { ; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi32: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttsh2usi %xmm0, %ecx ; CHECK-NEXT: vcvttsh2usi {sae}, %xmm0, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: retq %res1 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 4) %res2 = call i32 @llvm.x86.avx512fp16.vcvttsh2usi32(<8 x half> %arg0, i32 8) %res = add i32 %res1, %res2 ret i32 %res } declare i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half>, i32) define i64 @test_x86_avx512fp16_vcvttsh2usi64(<8 x half> %arg0) { ; CHECK-LABEL: test_x86_avx512fp16_vcvttsh2usi64: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttsh2usi %xmm0, %rcx ; CHECK-NEXT: vcvttsh2usi {sae}, %xmm0, %rax ; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: retq %res1 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 4) %res2 = call i64 @llvm.x86.avx512fp16.vcvttsh2usi64(<8 x half> %arg0, i32 8) %res = add i64 %res1, %res2 ret i64 %res } declare <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half>, i32, i32) define <8 x half> @test_x86_avx512fp16_vcvtsi2sh(<8 x half> %arg0, i32 %arg1) { ; CHECK-LABEL: test_x86_avx512fp16_vcvtsi2sh: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtsi2sh %edi, %xmm0, %xmm1 ; CHECK-NEXT: vcvtsi2sh %edi, {rd-sae}, %xmm0, %xmm0 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi2sh(<8 x half> %arg0, i32 %arg1, i32 9) %res = fadd <8 x half> %res1, %res2 ret <8 x half> %res } declare <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half>, i64, i32) define <8 x half> @test_x86_avx512fp16_vcvtsi642sh(<8 x half> %arg0, i64 %arg1) { ; CHECK-LABEL: test_x86_avx512fp16_vcvtsi642sh: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm1 ; CHECK-NEXT: vcvtsi2sh %rdi, {rn-sae}, %xmm0, %xmm0 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtsi642sh(<8 x half> %arg0, i64 %arg1, i32 8) %res = fadd <8 x half> %res1, %res2 ret <8 x half> %res } declare <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half>, i32, i32) define <8 x half> @test_x86_avx512fp16_vcvtusi2sh(<8 x half> %arg0, i32 %arg1) { ; CHECK-LABEL: test_x86_avx512fp16_vcvtusi2sh: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtusi2sh %edi, %xmm0, %xmm1 ; CHECK-NEXT: vcvtusi2sh %edi, {rd-sae}, %xmm0, %xmm0 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi2sh(<8 x half> %arg0, i32 %arg1, i32 9) %res = fadd <8 x half> %res1, %res2 ret <8 x half> %res } declare <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half>, i64, i32) define <8 x half> @test_x86_avx512fp16_vcvtusi642sh(<8 x half> %arg0, i64 %arg1) { ; CHECK-LABEL: test_x86_avx512fp16_vcvtusi642sh: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm1 ; CHECK-NEXT: vcvtusi2sh %rdi, {rd-sae}, %xmm0, %xmm0 ; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res1 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 4) %res2 = call <8 x half> @llvm.x86.avx512fp16.vcvtusi642sh(<8 x half> %arg0, i64 %arg1, i32 9) %res = fadd <8 x half> %res1, %res2 ret <8 x half> %res } define <16 x half> @test_mm256_castph128_ph256_freeze(<8 x half> %a0) nounwind { ; CHECK-LABEL: test_mm256_castph128_ph256_freeze: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; CHECK-NEXT: retq %a1 = freeze <8 x half> poison %res = shufflevector <8 x half> %a0, <8 x half> %a1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ret <16 x half> %res } define <32 x half> @test_mm512_castph128_ph512_freeze(<8 x half> %a0) nounwind { ; CHECK-LABEL: test_mm512_castph128_ph512_freeze: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %a1 = freeze <8 x half> poison %res = shufflevector <8 x half> %a0, <8 x half> %a1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ret <32 x half> %res } define <32 x half> @test_mm512_castph256_ph512_freeze(<16 x half> %a0) nounwind { ; CHECK-LABEL: test_mm512_castph256_ph512_freeze: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ; CHECK-NEXT: retq %a1 = freeze <16 x half> poison %res = shufflevector <16 x half> %a0, <16 x half> %a1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> ret <32 x half> %res }