// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
// RUN: %clang_cc1 -no-opaque-pointers -triple arm64-none-linux-gnu -target-feature +neon \
// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
// REQUIRES: aarch64-registered-target || arm-registered-target
// CHECK-LABEL: define {{[^@]+}}@test_vtbl1_s8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL11_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl1_s8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL1_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbl2_s8
// CHECK-SAME: ([2 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [2 x <8 x i8>] [[A_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL13_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_s8
// CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL2_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbl3_s8
// CHECK-SAME: ([3 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [3 x <8 x i8>] [[A_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL26_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_s8
// CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL3_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbl4_s8
// CHECK-SAME: ([4 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [4 x <8 x i8>] [[A_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
// CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL28_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_s8
// CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL4_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl1q_s8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
// CHECK-NEXT: ret <16 x i8> [[VTBL1_I]]
//
int8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_s8
// CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]])
// CHECK-NEXT: ret <16 x i8> [[VTBL2_I]]
//
int8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_s8
// CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]])
// CHECK-NEXT: ret <16 x i8> [[VTBL3_I]]
//
int8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_s8
// CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]])
// CHECK-NEXT: ret <16 x i8> [[VTBL4_I]]
//
int8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbx1_s8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]])
// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
// CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]]
// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
// CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
// CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
// CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbx2_s8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X2_T]], %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX13_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbx3_s8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X3_T]], %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
// CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
// CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
// CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]]
// CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
// CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
// CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
// CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbx4_s8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X8X4_T]], %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
// CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX28_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx1_s8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX1_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_s8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX2_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_s8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX3_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_s8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX4_I]]
//
int8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx1q_s8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
// CHECK-NEXT: ret <16 x i8> [[VTBX1_I]]
//
int8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_s8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X2_T]], %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
// CHECK-NEXT: ret <16 x i8> [[VTBX2_I]]
//
int8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_s8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X3_T]], %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]])
// CHECK-NEXT: ret <16 x i8> [[VTBX3_I]]
//
int8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_s8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_INT8X16X4_T]], %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]])
// CHECK-NEXT: ret <16 x i8> [[VTBX4_I]]
//
int8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbl1_u8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL11_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl1_u8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL1_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbl2_u8
// CHECK-SAME: ([2 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [2 x <8 x i8>] [[A_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL13_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_u8
// CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL2_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbl3_u8
// CHECK-SAME: ([3 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [3 x <8 x i8>] [[A_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL26_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_u8
// CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL3_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbl4_u8
// CHECK-SAME: ([4 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [4 x <8 x i8>] [[A_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
// CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL28_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_u8
// CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL4_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl1q_u8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
// CHECK-NEXT: ret <16 x i8> [[VTBL1_I]]
//
uint8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_u8
// CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]])
// CHECK-NEXT: ret <16 x i8> [[VTBL2_I]]
//
uint8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_u8
// CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]])
// CHECK-NEXT: ret <16 x i8> [[VTBL3_I]]
//
uint8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_u8
// CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]])
// CHECK-NEXT: ret <16 x i8> [[VTBL4_I]]
//
uint8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbx1_u8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]])
// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
// CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]]
// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
// CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
// CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
// CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbx2_u8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X2_T]], %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX13_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbx3_u8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X3_T]], %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
// CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
// CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
// CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]]
// CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
// CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
// CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
// CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbx4_u8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X8X4_T]], %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
// CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX28_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx1_u8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX1_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_u8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX2_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_u8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX3_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_u8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX4_I]]
//
uint8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx1q_u8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
// CHECK-NEXT: ret <16 x i8> [[VTBX1_I]]
//
uint8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_u8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X2_T]], %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
// CHECK-NEXT: ret <16 x i8> [[VTBX2_I]]
//
uint8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_u8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X3_T]], %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]])
// CHECK-NEXT: ret <16 x i8> [[VTBX3_I]]
//
uint8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_u8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_UINT8X16X4_T]], %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]])
// CHECK-NEXT: ret <16 x i8> [[VTBX4_I]]
//
uint8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbl1_p8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL11_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl1_p8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL1_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbl2_p8
// CHECK-SAME: ([2 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [2 x <8 x i8>] [[A_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL13_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_p8
// CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL2_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbl3_p8
// CHECK-SAME: ([3 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [3 x <8 x i8>] [[A_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL26_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_p8
// CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL3_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbl4_p8
// CHECK-SAME: ([4 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [4 x <8 x i8>] [[A_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
// CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL28_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_p8
// CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[B]])
// CHECK-NEXT: ret <8 x i8> [[VTBL4_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl1q_p8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
// CHECK-NEXT: ret <16 x i8> [[VTBL1_I]]
//
poly8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_p8
// CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[A_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[B]])
// CHECK-NEXT: ret <16 x i8> [[VTBL2_I]]
//
poly8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_p8
// CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[A_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[B]])
// CHECK-NEXT: ret <16 x i8> [[VTBL3_I]]
//
poly8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_p8
// CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[A_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[B]])
// CHECK-NEXT: ret <16 x i8> [[VTBL4_I]]
//
poly8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbx1_p8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]])
// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
// CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]]
// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
// CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
// CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
// CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbx2_p8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [2 x <8 x i8>] [[B_COERCE]], [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X2_T]], %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX13_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbx3_p8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [3 x <8 x i8>] [[B_COERCE]], [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X3_T]], %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
// CHECK-NEXT: [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
// CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
// CHECK-NEXT: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]]
// CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
// CHECK-NEXT: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
// CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
// CHECK-NEXT: ret <8 x i8> [[VTBX_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vtbx4_p8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [4 x <8 x i8>] [[B_COERCE]], [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X8X4_T]], %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
// CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX28_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx1_p8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX1_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_p8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX2_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_p8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX3_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_p8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[C]])
// CHECK-NEXT: ret <8 x i8> [[VTBX4_I]]
//
poly8x8_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx1q_p8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
// CHECK-NEXT: ret <16 x i8> [[VTBX1_I]]
//
poly8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_p8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[B_COERCE]], [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X2_T]], %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
// CHECK-NEXT: ret <16 x i8> [[VTBX2_I]]
//
poly8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_p8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[B_COERCE]], [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X3_T]], %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[C]])
// CHECK-NEXT: ret <16 x i8> [[VTBX3_I]]
//
poly8x16_t
// CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_p8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[B_COERCE]], [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[B]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16
// CHECK-NEXT: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
// CHECK-NEXT: [[VAL_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16
// CHECK-NEXT: [[VAL1_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK-NEXT: [[VAL3_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK-NEXT: [[VAL5_I:%.*]] = getelementptr inbounds [[STRUCT_POLY8X16X4_T]], %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[C]])
// CHECK-NEXT: ret <16 x i8> [[VTBX4_I]]
//
poly8x16_t