; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -o - %s | FileCheck %s declare <4 x i1> @llvm.arm.mve.vctp64(i32) declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>) declare <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, i32, <4 x i1>, <2 x i64>) declare <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <2 x i64>) declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>) declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>) declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64*, <2 x i64>, i32, i32, i32, <4 x i1>) declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>) declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>) declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64*, <2 x i64>, <2 x i64>, i32, i32, <4 x i1>) declare <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, i32 immarg, <4 x i1>) declare <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, i32 immarg, <4 x i1>) declare <2 x i64> @llvm.arm.cde.vcx2q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, i32 immarg, <4 x i1>) declare <2 x i64> @llvm.arm.cde.vcx2qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, i32 immarg, <4 x i1>) declare <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>) declare <2 x i64> @llvm.arm.cde.vcx3qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>) define arm_aapcs_vfpcc zeroext i16 @test_vctp64q(i32 %a) { ; CHECK-LABEL: @test_vctp64q( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i1> @llvm.arm.mve.vctp64(i32 [[A:%.*]]) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v2i1(<2 x i1> [[TMP0]]) ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16 ; CHECK-NEXT: ret i16 [[TMP4]] ; entry: %0 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a) %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0) %2 = trunc i32 %1 to i16 ret i16 %2 } define arm_aapcs_vfpcc zeroext i16 @test_vctp64q_m(i32 %a, i16 zeroext %p) { ; CHECK-LABEL: @test_vctp64q_m( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i1> @llvm.arm.mve.vctp64(i32 [[A:%.*]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v2i1(<2 x i1> [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP1]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16 ; CHECK-NEXT: ret i16 [[TMP7]] ; entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a) %3 = and <4 x i1> %1, %2 %4 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %3) %5 = trunc i32 %4 to i16 ret i16 %5 } define arm_aapcs_vfpcc <2 x i64> @test_vmullbq_int_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { ; CHECK-LABEL: @test_vmullbq_int_m_s32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v2i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, i32 0, <2 x i1> [[TMP3]], <2 x i64> [[INACTIVE:%.*]]) ; CHECK-NEXT: ret <2 x i64> [[TMP4]] ; entry: %0 = zext i16 %p to i32 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = tail call <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, <4 x i1> %1, <2 x i64> %inactive) ret <2 x i64> %2 } define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { ; CHECK-LABEL: @test_vqdmullbq_m_s32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v2i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <2 x i1> [[TMP3]], <2 x i64> [[INACTIVE:%.*]]) ; CHECK-NEXT: ret <2 x i64> [[TMP4]] ; entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1, <2 x i64> %inactive) ret <2 x i64> %2 } define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_s64(<2 x i64> %addr, i16 zeroext %p) { ; CHECK-LABEL: @test_vldrdq_gather_base_z_s64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v2i1(<2 x i64> [[ADDR:%.*]], i32 888, <2 x i1> [[TMP3]]) ; CHECK-NEXT: ret <2 x i64> [[TMP4]] ; entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <4 x i1> %1) ret <2 x i64> %2 } define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(<2 x i64>* %addr, i16 zeroext %p) { ; CHECK-LABEL: @test_vldrdq_gather_base_wb_z_s64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> [[TMP0]], i32 664, <2 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP5]], 1 ; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[ADDR]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP5]], 0 ; CHECK-NEXT: ret <2 x i64> [[TMP7]] ; entry: %0 = load <2 x i64>, <2 x i64>* %addr, align 8 %1 = zext i16 %p to i32 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 664, <4 x i1> %2) %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1 store <2 x i64> %4, <2 x i64>* %addr, align 8 %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0 ret <2 x i64> %5 } define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) { ; CHECK-LABEL: @test_vldrdq_gather_offset_z_s64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v2i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 0, i32 0, <2 x i1> [[TMP3]]) ; CHECK-NEXT: ret <2 x i64> [[TMP4]] ; entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <4 x i1> %1) ret <2 x i64> %2 } define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) { ; CHECK-LABEL: @test_vstrdq_scatter_base_p_s64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) ; CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64> [[ADDR:%.*]], i32 888, <2 x i64> [[VALUE:%.*]], <2 x i1> [[TMP3]]) ; CHECK-NEXT: ret void ; entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <4 x i1> %1) ret void } define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) { ; CHECK-LABEL: @test_vstrdq_scatter_base_wb_p_s64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> [[TMP0]], i32 248, <2 x i64> [[VALUE:%.*]], <2 x i1> [[TMP4]]) ; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[ADDR]], align 8 ; CHECK-NEXT: ret void ; entry: %0 = load <2 x i64>, <2 x i64>* %addr, align 8 %1 = zext i16 %p to i32 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 248, <2 x i64> %value, <4 x i1> %2) store <2 x i64> %3, <2 x i64>* %addr, align 8 ret void } define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { ; CHECK-LABEL: @test_vstrdq_scatter_offset_p_s64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) ; CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v2i1(i64* [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0, <2 x i1> [[TMP3]]) ; CHECK-NEXT: ret void ; entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1) ret void } define <8 x i16> @test_vcx1q_m(<2 x i64> %inactive, i16 zeroext %p) { ; CHECK-LABEL: @test_vcx1q_m( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], i32 1111, <2 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <8 x i16> ; CHECK-NEXT: ret <8 x i16> [[TMP5]] ; entry: %0 = zext i16 %p to i32 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = tail call <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, i32 1111, <4 x i1> %1) %3 = bitcast <2 x i64> %2 to <8 x i16> ret <8 x i16> %3 } define <16 x i8> @test_vcx1qa_m(<2 x i64> %acc, i16 zeroext %p) { ; CHECK-LABEL: @test_vcx1qa_m( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v2i1(i32 0, <2 x i64> [[ACC:%.*]], i32 1112, <2 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> ; CHECK-NEXT: ret <16 x i8> [[TMP5]] ; entry: %0 = zext i16 %p to i32 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) %2 = tail call <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v4i1(i32 0, <2 x i64> %acc, i32 1112, <4 x i1> %1) %3 = bitcast <2 x i64> %2 to <16 x i8> ret <16 x i8> %3 } define <4 x i32> @test_vcx2q_m(<2 x i64> %inactive, <4 x float> %n, i16 zeroext %p) { ; CHECK-LABEL: @test_vcx2q_m( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[N:%.*]] to <16 x i8> ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.cde.vcx2q.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], <16 x i8> [[TMP0]], i32 111, <2 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP6]] ; entry: %0 = bitcast <4 x float> %n to <16 x i8> %1 = zext i16 %p to i32 %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) %3 = tail call <2 x i64> @llvm.arm.cde.vcx2q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, i32 111, <4 x i1> %2) %4 = bitcast <2 x i64> %3 to <4 x i32> ret <4 x i32> %4 } define <4 x float> @test_vcx2qa_m(<2 x i64> %acc, <8 x half> %n, i16 zeroext %p) { ; CHECK-LABEL: @test_vcx2qa_m( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[N:%.*]] to <16 x i8> ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.cde.vcx2qa.predicated.v2i64.v2i1(i32 0, <2 x i64> [[ACC:%.*]], <16 x i8> [[TMP0]], i32 112, <2 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <4 x float> ; CHECK-NEXT: ret <4 x float> [[TMP6]] ; entry: %0 = bitcast <8 x half> %n to <16 x i8> %1 = zext i16 %p to i32 %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) %3 = tail call <2 x i64> @llvm.arm.cde.vcx2qa.predicated.v2i64.v4i1(i32 0, <2 x i64> %acc, <16 x i8> %0, i32 112, <4 x i1> %2) %4 = bitcast <2 x i64> %3 to <4 x float> ret <4 x float> %4 } define <2 x i64> @test_vcx3q_m(<2 x i64> %inactive, <4 x float> %n, <16 x i8> %m, i16 zeroext %p) { ; CHECK-LABEL: @test_vcx3q_m( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[N:%.*]] to <16 x i8> ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], <16 x i8> [[TMP0]], <16 x i8> [[M:%.*]], i32 11, <2 x i1> [[TMP4]]) ; CHECK-NEXT: ret <2 x i64> [[TMP5]] ; entry: %0 = bitcast <4 x float> %n to <16 x i8> %1 = zext i16 %p to i32 %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) %3 = tail call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %m, i32 11, <4 x i1> %2) ret <2 x i64> %3 } define <8 x half> @test_vcx3qa_m(<2 x i64> %inactive, <8 x half> %n, <4 x i32> %m, i16 zeroext %p) { ; CHECK-LABEL: @test_vcx3qa_m( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[N:%.*]] to <16 x i8> ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[M:%.*]] to <16 x i8> ; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]]) ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.arm.cde.vcx3qa.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 12, <2 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <8 x half> ; CHECK-NEXT: ret <8 x half> [[TMP7]] ; entry: %0 = bitcast <8 x half> %n to <16 x i8> %1 = bitcast <4 x i32> %m to <16 x i8> %2 = zext i16 %p to i32 %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) %4 = tail call <2 x i64> @llvm.arm.cde.vcx3qa.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %1, i32 12, <4 x i1> %3) %5 = bitcast <2 x i64> %4 to <8 x half> ret <8 x half> %5 }