// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --force-update
// REQUIRES: webassembly-registered-target, asserts
// FIXME: This should not be using -O2 and implicitly testing the entire IR opt pipeline.
// RUN: %clang -Xclang -no-opaque-pointers %s -O2 -emit-llvm -S -o - -target wasm32-unknown-unknown -msimd128 -Wall -Weverything -Wno-missing-prototypes -fno-lax-vector-conversions -Werror | FileCheck %s
// CHECK-LABEL: @test_v128_load(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__V_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i32>*
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[__V_I]], align 1, !tbaa [[TBAA2:![0-9]+]]
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_v128_load8_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i64 0
// CHECK-NEXT: [[VECINIT16_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VECINIT16_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_v128_load16_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i16*
// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__V1_I]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 [[TMP0]], i64 0
// CHECK-NEXT: [[VECINIT8_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT8_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_v128_load32_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i32*
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[__V1_I]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 [[TMP0]], i64 0
// CHECK-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: ret <4 x i32> [[VECINIT4_I]]
//
v128_t
// CHECK-LABEL: @test_v128_load64_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i64*
// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[__V1_I]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 [[TMP0]], i64 0
// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_i16x8_load8x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <8 x i8>*
// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[CONV_I:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_u16x8_load8x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <8 x i8>*
// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[CONV_I:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_i32x4_load16x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i16>*
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, <4 x i16>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[CONV_I:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[CONV_I]]
//
v128_t
// CHECK-LABEL: @test_u32x4_load16x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i16>*
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, <4 x i16>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[CONV_I:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[CONV_I]]
//
v128_t
// CHECK-LABEL: @test_i64x2_load32x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <2 x i32>*
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, <2 x i32>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[CONV_I:%.*]] = sext <2 x i32> [[TMP0]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_u64x2_load32x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to <2 x i32>*
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, <2 x i32>* [[__V1_I]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[CONV_I:%.*]] = zext <2 x i32> [[TMP0]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_v128_load32_zero(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i32*
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[__V1_I]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[TMP0]], i64 0
// CHECK-NEXT: ret <4 x i32> [[VECINIT4_I]]
//
v128_t
// CHECK-LABEL: @test_v128_load64_zero(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__V1_I:%.*]] = bitcast i8* [[MEM:%.*]] to i64*
// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[__V1_I]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i64 0
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_v128_load8_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[TMP0]], i64 15
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_v128_load16_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[TMP0]], i64 7
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_v128_load32_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x i32> [[VEC:%.*]], i32 [[TMP0]], i64 3
// CHECK-NEXT: ret <4 x i32> [[VECINS_I]]
//
v128_t
// CHECK-LABEL: @test_v128_load64_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64>
// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP0]], i64 1
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_v128_store(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__V_I:%.*]] = bitcast i8* [[MEM:%.*]] to <4 x i32>*
// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[__V_I]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @test_v128_store8_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i64 15
// CHECK-NEXT: store i8 [[VECEXT_I]], i8* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @test_v128_store16_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i64 7
// CHECK-NEXT: store i16 [[VECEXT_I]], i16* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @test_v128_store32_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[VEC:%.*]], i64 3
// CHECK-NEXT: store i32 [[VECEXT_I]], i32* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @test_v128_store64_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64>
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
// CHECK-NEXT: store i64 [[VECEXT_I]], i64* [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @test_i8x16_make(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 [[C0:%.*]], i64 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[C1:%.*]], i64 1
// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[C2:%.*]], i64 2
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[C3:%.*]], i64 3
// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[C4:%.*]], i64 4
// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[C5:%.*]], i64 5
// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[C6:%.*]], i64 6
// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[C7:%.*]], i64 7
// CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[C8:%.*]], i64 8
// CHECK-NEXT: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[C9:%.*]], i64 9
// CHECK-NEXT: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[C10:%.*]], i64 10
// CHECK-NEXT: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[C11:%.*]], i64 11
// CHECK-NEXT: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[C12:%.*]], i64 12
// CHECK-NEXT: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[C13:%.*]], i64 13
// CHECK-NEXT: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[C14:%.*]], i64 14
// CHECK-NEXT: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[C15:%.*]], i64 15
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_u8x16_make(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 [[C0:%.*]], i64 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[C1:%.*]], i64 1
// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[C2:%.*]], i64 2
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[C3:%.*]], i64 3
// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[C4:%.*]], i64 4
// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[C5:%.*]], i64 5
// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[C6:%.*]], i64 6
// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[C7:%.*]], i64 7
// CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[C8:%.*]], i64 8
// CHECK-NEXT: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[C9:%.*]], i64 9
// CHECK-NEXT: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[C10:%.*]], i64 10
// CHECK-NEXT: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[C11:%.*]], i64 11
// CHECK-NEXT: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[C12:%.*]], i64 12
// CHECK-NEXT: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[C13:%.*]], i64 13
// CHECK-NEXT: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[C14:%.*]], i64 14
// CHECK-NEXT: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[C15:%.*]], i64 15
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_i16x8_make(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 [[C0:%.*]], i64 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C1:%.*]], i64 1
// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C2:%.*]], i64 2
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C3:%.*]], i64 3
// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C4:%.*]], i64 4
// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C5:%.*]], i64 5
// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C6:%.*]], i64 6
// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C7:%.*]], i64 7
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_u16x8_make(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 [[C0:%.*]], i64 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C1:%.*]], i64 1
// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C2:%.*]], i64 2
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C3:%.*]], i64 3
// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C4:%.*]], i64 4
// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C5:%.*]], i64 5
// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C6:%.*]], i64 6
// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C7:%.*]], i64 7
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_i32x4_make(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 [[C0:%.*]], i64 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C1:%.*]], i64 1
// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C2:%.*]], i64 2
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C3:%.*]], i64 3
// CHECK-NEXT: ret <4 x i32> [[VECINIT3_I]]
//
v128_t
// CHECK-LABEL: @test_u32x4_make(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 [[C0:%.*]], i64 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C1:%.*]], i64 1
// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C2:%.*]], i64 2
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C3:%.*]], i64 3
// CHECK-NEXT: ret <4 x i32> [[VECINIT3_I]]
//
v128_t
// CHECK-LABEL: @test_i64x2_make(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 [[C0:%.*]], i64 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[C1:%.*]], i64 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_u64x2_make(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 [[C0:%.*]], i64 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[C1:%.*]], i64 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_f32x4_make(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[C0:%.*]], i64 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[C1:%.*]], i64 1
// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[C2:%.*]], i64 2
// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[C3:%.*]], i64 3
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_f64x2_make(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double [[C0:%.*]], i64 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[C1:%.*]], i64 1
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_i8x16_const(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 50462976, i32 117835012, i32 185207048, i32 252579084>
//
v128_t
// CHECK-LABEL: @test_u8x16_const(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 50462976, i32 117835012, i32 185207048, i32 252579084>
//
v128_t
// CHECK-LABEL: @test_i16x8_const(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 65536, i32 196610, i32 327684, i32 458758>
//
v128_t
// CHECK-LABEL: @test_u16x8_const(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 65536, i32 196610, i32 327684, i32 458758>
//
v128_t
// CHECK-LABEL: @test_i32x4_const(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 0, i32 1, i32 2, i32 3>
//
v128_t
// CHECK-LABEL: @test_u32x4_const(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 0, i32 1, i32 2, i32 3>
//
v128_t
// CHECK-LABEL: @test_i64x2_const(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 0, i32 0, i32 1, i32 0>
//
v128_t
// CHECK-LABEL: @test_u64x2_const(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 0, i32 0, i32 1, i32 0>
//
v128_t
// CHECK-LABEL: @test_f32x4_const(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>
//
v128_t
// CHECK-LABEL: @test_f64x2_const(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 0, i32 0, i32 0, i32 1072693248>
//
v128_t
// CHECK-LABEL: @test_i8x16_const_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 707406378, i32 707406378, i32 707406378, i32 707406378>
//
v128_t
// CHECK-LABEL: @test_u8x16_const_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 707406378, i32 707406378, i32 707406378, i32 707406378>
//
v128_t
// CHECK-LABEL: @test_i16x8_const_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 2752554, i32 2752554, i32 2752554, i32 2752554>
//
v128_t
// CHECK-LABEL: @test_u16x8_const_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 2752554, i32 2752554, i32 2752554, i32 2752554>
//
v128_t
// CHECK-LABEL: @test_i32x4_const_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 42, i32 42, i32 42, i32 42>
//
v128_t
// CHECK-LABEL: @test_u32x4_const_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 42, i32 42, i32 42, i32 42>
//
v128_t
// CHECK-LABEL: @test_i64x2_const_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 42, i32 0, i32 42, i32 0>
//
v128_t
// CHECK-LABEL: @test_u64x2_const_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 42, i32 0, i32 42, i32 0>
//
v128_t
// CHECK-LABEL: @test_f32x4_const_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 1109917696, i32 1109917696, i32 1109917696, i32 1109917696>
//
v128_t
// CHECK-LABEL: @test_f64x2_const_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret <4 x i32> <i32 0, i32 1078263808, i32 0, i32 1078263808>
//
v128_t
// CHECK-LABEL: @test_i8x16_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i64 0
// CHECK-NEXT: [[VECINIT15_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_u8x16_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i64 0
// CHECK-NEXT: [[VECINIT15_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_i8x16_extract_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i64 15
// CHECK-NEXT: ret i8 [[VECEXT_I]]
//
int8_t
// CHECK-LABEL: @test_u8x16_extract_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i64 15
// CHECK-NEXT: ret i8 [[VECEXT_I]]
//
uint8_t
// CHECK-LABEL: @test_i8x16_replace_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[B:%.*]], i64 15
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_u8x16_replace_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[B:%.*]], i64 15
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_i16x8_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i64 0
// CHECK-NEXT: [[VECINIT7_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_u16x8_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i64 0
// CHECK-NEXT: [[VECINIT7_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_i16x8_extract_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i64 7
// CHECK-NEXT: ret i16 [[VECEXT_I]]
//
int16_t
// CHECK-LABEL: @test_u16x8_extract_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i64 7
// CHECK-NEXT: ret i16 [[VECEXT_I]]
//
uint16_t
// CHECK-LABEL: @test_i16x8_replace_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[B:%.*]], i64 7
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_u16x8_replace_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[B:%.*]], i64 7
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_i32x4_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i64 0
// CHECK-NEXT: [[VECINIT3_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: ret <4 x i32> [[VECINIT3_I]]
//
v128_t
// CHECK-LABEL: @test_u32x4_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i64 0
// CHECK-NEXT: [[VECINIT3_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: ret <4 x i32> [[VECINIT3_I]]
//
v128_t
// CHECK-LABEL: @test_i32x4_extract_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[A:%.*]], i64 3
// CHECK-NEXT: ret i32 [[VECEXT_I]]
//
int32_t
// CHECK-LABEL: @test_u32x4_extract_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[A:%.*]], i64 3
// CHECK-NEXT: ret i32 [[VECEXT_I]]
//
uint32_t
// CHECK-LABEL: @test_i32x4_replace_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x i32> [[A:%.*]], i32 [[B:%.*]], i64 3
// CHECK-NEXT: ret <4 x i32> [[VECINS_I]]
//
v128_t
// CHECK-LABEL: @test_u32x4_replace_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x i32> [[A:%.*]], i32 [[B:%.*]], i64 3
// CHECK-NEXT: ret <4 x i32> [[VECINS_I]]
//
v128_t
// CHECK-LABEL: @test_i64x2_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_u64x2_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_i64x2_extract_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
// CHECK-NEXT: ret i64 [[VECEXT_I]]
//
int64_t
// CHECK-LABEL: @test_u64x2_extract_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
// CHECK-NEXT: ret i64 [[VECEXT_I]]
//
uint64_t
// CHECK-LABEL: @test_i64x2_replace_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_u64x2_replace_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_f32x4_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i64 0
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[VECINIT_I]] to <4 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_f32x4_extract_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
// CHECK-NEXT: ret float [[VECEXT_I]]
//
float
// CHECK-LABEL: @test_f32x4_replace_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <4 x float> [[TMP0]], float [[B:%.*]], i64 3
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[VECINS_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_f64x2_splat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i64 0
// CHECK-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x double> [[VECINIT_I]], <2 x double> poison, <2 x i32> zeroinitializer
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_f64x2_extract_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i64 1
// CHECK-NEXT: ret double [[VECEXT_I]]
//
double
// CHECK-LABEL: @test_f64x2_replace_lane(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[VECINS_I:%.*]] = insertelement <2 x double> [[TMP0]], double [[B:%.*]], i64 1
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[VECINS_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_i8x16_eq(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i8x16_ne(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <16 x i8> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i8x16_lt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <16 x i8> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_u8x16_lt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <16 x i8> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i8x16_gt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <16 x i8> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_u8x16_gt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <16 x i8> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i8x16_le(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <16 x i8> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_u8x16_le(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <16 x i8> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i8x16_ge(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <16 x i8> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_u8x16_ge(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <16 x i8> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i16x8_eq(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i16x8_ne(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <8 x i16> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i16x8_lt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <8 x i16> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_u16x8_lt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <8 x i16> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i16x8_gt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <8 x i16> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_u16x8_gt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <8 x i16> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i16x8_le(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <8 x i16> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_u16x8_le(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <8 x i16> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i16x8_ge(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <8 x i16> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_u16x8_ge(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <8 x i16> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i32x4_eq(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_i32x4_ne(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_i32x4_lt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_u32x4_lt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <4 x i32> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_i32x4_gt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_u32x4_gt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_i32x4_le(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <4 x i32> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_u32x4_le(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_i32x4_ge(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_u32x4_ge(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_i64x2_eq(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <2 x i64> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i64x2_ne(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <2 x i64> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i64x2_lt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <2 x i64> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i64x2_gt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <2 x i64> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i64x2_le(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <2 x i64> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i64x2_ge(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
// CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <2 x i64> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f32x4_eq(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <4 x float> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_f32x4_ne(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
// CHECK-NEXT: [[CMP_I:%.*]] = fcmp une <4 x float> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_f32x4_lt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
// CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_f32x4_gt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ogt <4 x float> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_f32x4_le(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ole <4 x float> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_f32x4_ge(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oge <4 x float> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[SEXT_I]]
//
v128_t
// CHECK-LABEL: @test_f64x2_eq(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <2 x double> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f64x2_ne(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
// CHECK-NEXT: [[CMP_I:%.*]] = fcmp une <2 x double> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f64x2_lt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
// CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt <2 x double> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f64x2_gt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ogt <2 x double> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f64x2_le(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
// CHECK-NEXT: [[CMP_I:%.*]] = fcmp ole <2 x double> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f64x2_ge(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
// CHECK-NEXT: [[CMP_I:%.*]] = fcmp oge <2 x double> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_v128_not(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[NEG_I:%.*]] = xor <4 x i32> [[A:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
// CHECK-NEXT: ret <4 x i32> [[NEG_I]]
//
v128_t
// CHECK-LABEL: @test_v128_and(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[B:%.*]], [[A:%.*]]
// CHECK-NEXT: ret <4 x i32> [[AND_I]]
//
v128_t
// CHECK-LABEL: @test_v128_or(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OR_I:%.*]] = or <4 x i32> [[B:%.*]], [[A:%.*]]
// CHECK-NEXT: ret <4 x i32> [[OR_I]]
//
v128_t
// CHECK-LABEL: @test_v128_xor(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[XOR_I:%.*]] = xor <4 x i32> [[B:%.*]], [[A:%.*]]
// CHECK-NEXT: ret <4 x i32> [[XOR_I]]
//
v128_t
// CHECK-LABEL: @test_v128_andnot(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[NEG_I:%.*]] = xor <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
// CHECK-NEXT: [[AND_I:%.*]] = and <4 x i32> [[NEG_I]], [[A:%.*]]
// CHECK-NEXT: ret <4 x i32> [[AND_I]]
//
v128_t
// CHECK-LABEL: @test_v128_any_true(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> [[TMP0]])
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
bool
// CHECK-LABEL: @test_v128_bitselect(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[MASK:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_i8x16_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP0]], i1 false)
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i8x16_neg(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, [[TMP0]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[SUB_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_i8x16_all_true(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v16i8(<16 x i8> [[TMP0]])
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
bool
// CHECK-LABEL: @test_i8x16_bitmask(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> [[TMP0]])
// CHECK-NEXT: ret i32 [[TMP1]]
//
uint32_t
// CHECK-LABEL: @test_i8x16_popcnt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i8x16_shl(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
// CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i64 0
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[SHL_I:%.*]] = shl <16 x i8> [[TMP0]], [[SH_PROM_I]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[SHL_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i8x16_shr(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
// CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i64 0
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[SHR_I:%.*]] = ashr <16 x i8> [[TMP0]], [[SH_PROM_I]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_u8x16_shr(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
// CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> undef, i8 [[TMP1]], i64 0
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-NEXT: [[SHR_I:%.*]] = lshr <16 x i8> [[TMP0]], [[SH_PROM_I]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i8x16_add(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[TMP1]], [[TMP0]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[ADD_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i8x16_add_sat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_u8x16_add_sat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i8x16_sub(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[SUB_I:%.*]] = sub <16 x i8> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[SUB_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i8x16_sub_sat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.signed.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_u8x16_sub_sat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i8x16_min(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_u8x16_min(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i8x16_max(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_u8x16_max(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_u8x16_avgr(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i16x8_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP0]], i1 false)
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i16x8_neg(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, [[TMP0]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[SUB_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_i16x8_all_true(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v8i16(<8 x i16> [[TMP0]])
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
bool
// CHECK-LABEL: @test_i16x8_bitmask(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> [[TMP0]])
// CHECK-NEXT: ret i32 [[TMP1]]
//
uint32_t
// CHECK-LABEL: @test_i16x8_shl(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
// CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i64 0
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[SHL_I:%.*]] = shl <8 x i16> [[TMP0]], [[SH_PROM_I]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[SHL_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i16x8_shr(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
// CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i64 0
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[SHR_I:%.*]] = ashr <8 x i16> [[TMP0]], [[SH_PROM_I]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_u16x8_shr(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
// CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> undef, i16 [[TMP1]], i64 0
// CHECK-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[SHR_I:%.*]] = lshr <8 x i16> [[TMP0]], [[SH_PROM_I]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i16x8_add(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[TMP1]], [[TMP0]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[ADD_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i16x8_add_sat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_u16x8_add_sat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i16x8_sub(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SUB_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i16x8_sub_sat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.signed.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_u16x8_sub_sat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i16x8_mul(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[TMP1]], [[TMP0]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i16x8_min(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_u16x8_min(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i16x8_max(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_u16x8_max(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_u16x8_avgr(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i32x4_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A:%.*]], i1 false)
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_i32x4_neg(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]]
// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
//
v128_t
// CHECK-LABEL: @test_i32x4_all_true(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A:%.*]])
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
bool
// CHECK-LABEL: @test_i32x4_bitmask(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A:%.*]])
// CHECK-NEXT: ret i32 [[TMP0]]
//
uint32_t
// CHECK-LABEL: @test_i32x4_shl(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[SHL_I:%.*]] = shl <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
// CHECK-NEXT: ret <4 x i32> [[SHL_I]]
//
v128_t
// CHECK-LABEL: @test_i32x4_shr(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[SHR_I:%.*]] = ashr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
// CHECK-NEXT: ret <4 x i32> [[SHR_I]]
//
v128_t
// CHECK-LABEL: @test_u32x4_shr(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[SHR_I:%.*]] = lshr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
// CHECK-NEXT: ret <4 x i32> [[SHR_I]]
//
v128_t
// CHECK-LABEL: @test_i32x4_add(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[B:%.*]], [[A:%.*]]
// CHECK-NEXT: ret <4 x i32> [[ADD_I]]
//
v128_t
// CHECK-LABEL: @test_i32x4_sub(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: ret <4 x i32> [[SUB_I]]
//
v128_t
// CHECK-LABEL: @test_i32x4_mul(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B:%.*]], [[A:%.*]]
// CHECK-NEXT: ret <4 x i32> [[MUL_I]]
//
v128_t
// CHECK-LABEL: @test_i32x4_min(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_u32x4_min(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_i32x4_max(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_u32x4_max(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_i32x4_dot_i16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.dot(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i64x2_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP0]], i1 false)
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i64x2_neg(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> zeroinitializer, [[TMP0]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SUB_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_i64x2_all_true(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v2i64(<2 x i64> [[TMP0]])
// CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK-NEXT: ret i1 [[TOBOOL_I]]
//
bool
// CHECK-LABEL: @test_i64x2_bitmask(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> [[TMP0]])
// CHECK-NEXT: ret i32 [[TMP1]]
//
uint32_t
// CHECK-LABEL: @test_i64x2_shl(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[CONV_I:%.*]] = zext i32 [[B:%.*]] to i64
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i64 0
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
// CHECK-NEXT: [[SHL_I:%.*]] = shl <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SHL_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_i64x2_shr(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[CONV_I:%.*]] = zext i32 [[B:%.*]] to i64
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i64 0
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
// CHECK-NEXT: [[SHR_I:%.*]] = ashr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_u64x2_shr(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[CONV_I:%.*]] = zext i32 [[B:%.*]] to i64
// CHECK-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[CONV_I]], i64 0
// CHECK-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
// CHECK-NEXT: [[SHR_I:%.*]] = lshr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_i64x2_add(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP0]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[ADD_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i64x2_sub(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SUB_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i64x2_mul(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
// CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i64> [[TMP1]], [[TMP0]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f32x4_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f32x4_neg(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <4 x float> [[TMP0]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_f32x4_sqrt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f32x4_ceil(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f32x4_floor(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f32x4_trunc(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f32x4_nearest(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f32x4_add(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
// CHECK-NEXT: [[ADD_I:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[ADD_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f32x4_sub(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
// CHECK-NEXT: [[SUB_I:%.*]] = fsub <4 x float> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[SUB_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f32x4_mul(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[MUL_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f32x4_div(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
// CHECK-NEXT: [[DIV_I:%.*]] = fdiv <4 x float> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[DIV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f32x4_min(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_f32x4_max(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_f32x4_pmin(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_f32x4_pmax(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_f64x2_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f64x2_neg(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <2 x double> [[TMP0]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_f64x2_sqrt(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f64x2_ceil(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f64x2_floor(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f64x2_trunc(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f64x2_nearest(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f64x2_add(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
// CHECK-NEXT: [[ADD_I:%.*]] = fadd <2 x double> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[ADD_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f64x2_sub(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
// CHECK-NEXT: [[SUB_I:%.*]] = fsub <2 x double> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[SUB_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f64x2_mul(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[MUL_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f64x2_div(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
// CHECK-NEXT: [[DIV_I:%.*]] = fdiv <2 x double> [[TMP0]], [[TMP1]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[DIV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f64x2_min(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.minimum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_f64x2_max(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_f64x2_pmin(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_f64x2_pmax(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i32x4_trunc_sat_f32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]])
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_u32x4_trunc_sat_f32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]])
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_f32x4_convert_i32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CONV_I:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_f32x4_convert_u32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CONV_I:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_f64x2_convert_low_i32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
// CHECK-NEXT: [[CONV_I:%.*]] = sitofp <2 x i32> [[VECINIT2_I]] to <2 x double>
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_f64x2_convert_low_u32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
// CHECK-NEXT: [[CONV_I:%.*]] = uitofp <2 x i32> [[VECINIT2_I]] to <2 x double>
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_i32x4_trunc_sat_f64x2_zero(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_u32x4_trunc_sat_f64x2_zero(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_f32x4_demote_f64x2_zero(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: [[CONV_I:%.*]] = fptrunc <4 x double> [[SHUFFLE_I]] to <4 x float>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_f64x2_promote_low_f32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
// CHECK-NEXT: [[CONV_I:%.*]] = fpext <2 x float> [[VECINIT2_I]] to <2 x double>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_i8x16_shuffle(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i16x8_shuffle(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9, i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i32x4_shuffle(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i64x2_shuffle(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7)
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i8x16_swizzle(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i8x16_narrow_i16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.signed.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_u8x16_narrow_i16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.unsigned.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t
// CHECK-LABEL: @test_i16x8_narrow_i32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_u16x8_narrow_i32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_i16x8_extend_low_i8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
// CHECK-NEXT: [[CONV_I:%.*]] = sext <8 x i8> [[VECINIT14_I]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_i16x8_extend_high_i8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[CONV_I:%.*]] = sext <8 x i8> [[VECINIT14_I]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_u16x8_extend_low_u8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
// CHECK-NEXT: [[CONV_I:%.*]] = zext <8 x i8> [[VECINIT14_I]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_u16x8_extend_high_u8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[CONV_I:%.*]] = zext <8 x i8> [[VECINIT14_I]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_i32x4_extend_low_i16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: [[CONV_I:%.*]] = sext <4 x i16> [[VECINIT6_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[CONV_I]]
//
v128_t
// CHECK-LABEL: @test_i32x4_extend_high_i16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK-NEXT: [[CONV_I:%.*]] = sext <4 x i16> [[VECINIT6_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[CONV_I]]
//
v128_t
// CHECK-LABEL: @test_u32x4_extend_low_u16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: [[CONV_I:%.*]] = zext <4 x i16> [[VECINIT6_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[CONV_I]]
//
v128_t
// CHECK-LABEL: @test_u32x4_extend_high_u16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK-NEXT: [[CONV_I:%.*]] = zext <4 x i16> [[VECINIT6_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[CONV_I]]
//
v128_t
// CHECK-LABEL: @test_i64x2_extend_low_i32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
// CHECK-NEXT: [[CONV_I:%.*]] = sext <2 x i32> [[VECINIT2_I]] to <2 x i64>
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_i64x2_extend_high_i32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
// CHECK-NEXT: [[CONV_I:%.*]] = sext <2 x i32> [[VECINIT2_I]] to <2 x i64>
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_u64x2_extend_low_u32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
// CHECK-NEXT: [[CONV_I:%.*]] = zext <2 x i32> [[VECINIT2_I]] to <2 x i64>
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_u64x2_extend_high_u32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
// CHECK-NEXT: [[CONV_I:%.*]] = zext <2 x i32> [[VECINIT2_I]] to <2 x i64>
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_i16x8_extadd_pairwise_i8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.signed.v8i16(<16 x i8> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_u16x8_extadd_pairwise_u8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.unsigned.v8i16(<16 x i8> [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i32x4_extadd_pairwise_i16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> [[TMP0]])
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_u32x4_extadd_pairwise_u16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.unsigned.v4i32(<8 x i16> [[TMP0]])
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
//
v128_t
// CHECK-LABEL: @test_i16x8_extmul_low_i8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i16x8_extmul_high_i8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_u16x8_extmul_low_u8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_u16x8_extmul_high_u8x16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
// CHECK-NEXT: [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I3_I]], [[CONV_I_I]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
v128_t
// CHECK-LABEL: @test_i32x4_extmul_low_i16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
// CHECK-NEXT: ret <4 x i32> [[MUL_I]]
//
v128_t
// CHECK-LABEL: @test_i32x4_extmul_high_i16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
// CHECK-NEXT: ret <4 x i32> [[MUL_I]]
//
v128_t
// CHECK-LABEL: @test_u32x4_extmul_low_u16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
// CHECK-NEXT: ret <4 x i32> [[MUL_I]]
//
v128_t
// CHECK-LABEL: @test_u32x4_extmul_high_u16x8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I3_I]], [[CONV_I_I]]
// CHECK-NEXT: ret <4 x i32> [[MUL_I]]
//
v128_t
// CHECK-LABEL: @test_i64x2_extmul_low_i32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_i64x2_extmul_high_i32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
// CHECK-NEXT: [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
// CHECK-NEXT: [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_u64x2_extmul_low_u32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_u64x2_extmul_high_u32x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
// CHECK-NEXT: [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
// CHECK-NEXT: [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
// CHECK-NEXT: [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
// CHECK-NEXT: [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I3_I]], [[CONV_I_I]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
v128_t
// CHECK-LABEL: @test_i16x8_q15mulr_sat(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.q15mulr.sat.signed(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
v128_t