Compiler projects using llvm
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -no-opaque-pointers -triple powerpc64le-linux-unknown -target-cpu pwr10 \
// RUN:   -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -no-opaque-pointers -triple powerpc64le-linux-unknown -target-cpu pwr9 \
// RUN:   -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -no-opaque-pointers -triple powerpc64le-linux-unknown -target-cpu pwr8 \
// RUN:   -emit-llvm -o - %s | FileCheck %s

// CHECK-LABEL: @test1(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[PTR1_ADDR:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    [[PTR2_ADDR:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    store <512 x i1>* [[PTR1:%.*]], <512 x i1>** [[PTR1_ADDR]], align 8
// CHECK-NEXT:    store <512 x i1>* [[PTR2:%.*]], <512 x i1>** [[PTR2_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load <512 x i1>*, <512 x i1>** [[PTR1_ADDR]], align 8
// CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP0]], i64 2
// CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[ADD_PTR]], align 64
// CHECK-NEXT:    [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[PTR2_ADDR]], align 8
// CHECK-NEXT:    [[ADD_PTR1:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP2]], i64 1
// CHECK-NEXT:    store <512 x i1> [[TMP1]], <512 x i1>* [[ADD_PTR1]], align 64
// CHECK-NEXT:    ret void
//
void test1(__vector_quad *ptr1, __vector_quad *ptr2) {
  *(ptr2 + 1) = *(ptr1 + 2);
}

// CHECK-LABEL: @test2(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[PTR1_ADDR:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    [[PTR2_ADDR:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    store <256 x i1>* [[PTR1:%.*]], <256 x i1>** [[PTR1_ADDR]], align 8
// CHECK-NEXT:    store <256 x i1>* [[PTR2:%.*]], <256 x i1>** [[PTR2_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load <256 x i1>*, <256 x i1>** [[PTR1_ADDR]], align 8
// CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP0]], i64 2
// CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[ADD_PTR]], align 32
// CHECK-NEXT:    [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[PTR2_ADDR]], align 8
// CHECK-NEXT:    [[ADD_PTR1:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP2]], i64 1
// CHECK-NEXT:    store <256 x i1> [[TMP1]], <256 x i1>* [[ADD_PTR1]], align 32
// CHECK-NEXT:    ret void
//
void test2(__vector_pair *ptr1, __vector_pair *ptr2) {
  *(ptr2 + 1) = *(ptr1 + 2);
}

typedef __vector_quad vq_t;
// CHECK-LABEL: @testVQTypedef(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[INP_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[OUTP_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VQIN:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    [[VQOUT:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    store i32* [[INP:%.*]], i32** [[INP_ADDR]], align 8
// CHECK-NEXT:    store i32* [[OUTP:%.*]], i32** [[OUTP_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[INP_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
// CHECK-NEXT:    store <512 x i1>* [[TMP1]], <512 x i1>** [[VQIN]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[OUTP_ADDR]], align 8
// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <512 x i1>*
// CHECK-NEXT:    store <512 x i1>* [[TMP3]], <512 x i1>** [[VQOUT]], align 8
// CHECK-NEXT:    [[TMP4:%.*]] = load <512 x i1>*, <512 x i1>** [[VQIN]], align 8
// CHECK-NEXT:    [[TMP5:%.*]] = load <512 x i1>, <512 x i1>* [[TMP4]], align 64
// CHECK-NEXT:    [[TMP6:%.*]] = load <512 x i1>*, <512 x i1>** [[VQOUT]], align 8
// CHECK-NEXT:    store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64
// CHECK-NEXT:    ret void
//
void testVQTypedef(int *inp, int *outp) {
  vq_t *vqin = (vq_t *)inp;
  vq_t *vqout = (vq_t *)outp;
  *vqout = *vqin;
}

// CHECK-LABEL: @testVQArg3(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VQ_ADDR:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VQP:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    store <512 x i1>* [[VQ:%.*]], <512 x i1>** [[VQ_ADDR]], align 8
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
// CHECK-NEXT:    store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQ_ADDR]], align 8
// CHECK-NEXT:    [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64
// CHECK-NEXT:    [[TMP4:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    store <512 x i1> [[TMP3]], <512 x i1>* [[TMP4]], align 64
// CHECK-NEXT:    ret void
//
void testVQArg3(__vector_quad *vq, int *ptr) {
  __vector_quad *vqp = (__vector_quad *)ptr;
  *vqp = *vq;
}

// CHECK-LABEL: @testVQArg4(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VQ_ADDR:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VQP:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    store <512 x i1>* [[VQ:%.*]], <512 x i1>** [[VQ_ADDR]], align 8
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
// CHECK-NEXT:    store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQ_ADDR]], align 8
// CHECK-NEXT:    [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64
// CHECK-NEXT:    [[TMP4:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    store <512 x i1> [[TMP3]], <512 x i1>* [[TMP4]], align 64
// CHECK-NEXT:    ret void
//
void testVQArg4(const __vector_quad *const vq, int *ptr) {
  __vector_quad *vqp = (__vector_quad *)ptr;
  *vqp = *vq;
}

// CHECK-LABEL: @testVQArg5(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VQA_ADDR:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VQP:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    store <512 x i1>* [[VQA:%.*]], <512 x i1>** [[VQA_ADDR]], align 8
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
// CHECK-NEXT:    store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQA_ADDR]], align 8
// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP2]], i64 0
// CHECK-NEXT:    [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[ARRAYIDX]], align 64
// CHECK-NEXT:    [[TMP4:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    store <512 x i1> [[TMP3]], <512 x i1>* [[TMP4]], align 64
// CHECK-NEXT:    ret void
//
void testVQArg5(__vector_quad vqa[], int *ptr) {
  __vector_quad *vqp = (__vector_quad *)ptr;
  *vqp = vqa[0];
}

// CHECK-LABEL: @testVQArg7(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VQ_ADDR:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VQP:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    store <512 x i1>* [[VQ:%.*]], <512 x i1>** [[VQ_ADDR]], align 8
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
// CHECK-NEXT:    store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQ_ADDR]], align 8
// CHECK-NEXT:    [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64
// CHECK-NEXT:    [[TMP4:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    store <512 x i1> [[TMP3]], <512 x i1>* [[TMP4]], align 64
// CHECK-NEXT:    ret void
//
void testVQArg7(const vq_t *vq, int *ptr) {
  __vector_quad *vqp = (__vector_quad *)ptr;
  *vqp = *vq;
}

// CHECK-LABEL: @testVQRet2(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VQP:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
// CHECK-NEXT:    store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP2]], i64 2
// CHECK-NEXT:    ret <512 x i1>* [[ADD_PTR]]
//
__vector_quad *testVQRet2(int *ptr) {
  __vector_quad *vqp = (__vector_quad *)ptr;
  return vqp + 2;
}

// CHECK-LABEL: @testVQRet3(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VQP:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
// CHECK-NEXT:    store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP2]], i64 2
// CHECK-NEXT:    ret <512 x i1>* [[ADD_PTR]]
//
const __vector_quad *testVQRet3(int *ptr) {
  __vector_quad *vqp = (__vector_quad *)ptr;
  return vqp + 2;
}

// CHECK-LABEL: @testVQRet5(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VQP:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
// CHECK-NEXT:    store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds <512 x i1>, <512 x i1>* [[TMP2]], i64 2
// CHECK-NEXT:    ret <512 x i1>* [[ADD_PTR]]
//
const vq_t *testVQRet5(int *ptr) {
  __vector_quad *vqp = (__vector_quad *)ptr;
  return vqp + 2;
}

// CHECK-LABEL: @testVQSizeofAlignof(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VQP:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT:    [[VQ:%.*]] = alloca <512 x i1>, align 64
// CHECK-NEXT:    [[SIZET:%.*]] = alloca i32, align 4
// CHECK-NEXT:    [[ALIGNT:%.*]] = alloca i32, align 4
// CHECK-NEXT:    [[SIZEV:%.*]] = alloca i32, align 4
// CHECK-NEXT:    [[ALIGNV:%.*]] = alloca i32, align 4
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
// CHECK-NEXT:    store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
// CHECK-NEXT:    [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64
// CHECK-NEXT:    store <512 x i1> [[TMP3]], <512 x i1>* [[VQ]], align 64
// CHECK-NEXT:    store i32 64, i32* [[SIZET]], align 4
// CHECK-NEXT:    store i32 64, i32* [[ALIGNT]], align 4
// CHECK-NEXT:    store i32 64, i32* [[SIZEV]], align 4
// CHECK-NEXT:    store i32 64, i32* [[ALIGNV]], align 4
// CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[SIZET]], align 4
// CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ALIGNT]], align 4
// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP4]], [[TMP5]]
// CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[SIZEV]], align 4
// CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[ADD]], [[TMP6]]
// CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ALIGNV]], align 4
// CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP7]]
// CHECK-NEXT:    ret i32 [[ADD2]]
//
int testVQSizeofAlignof(int *ptr) {
  __vector_quad *vqp = (__vector_quad *)ptr;
  __vector_quad vq = *vqp;
  unsigned sizet = sizeof(__vector_quad);
  unsigned alignt = __alignof__(__vector_quad);
  unsigned sizev = sizeof(vq);
  unsigned alignv = __alignof__(vq);
  return sizet + alignt + sizev + alignv;
}

typedef __vector_pair vp_t;
// CHECK-LABEL: @testVPTypedef(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[INP_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[OUTP_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VPIN:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    [[VPOUT:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    store i32* [[INP:%.*]], i32** [[INP_ADDR]], align 8
// CHECK-NEXT:    store i32* [[OUTP:%.*]], i32** [[OUTP_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[INP_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
// CHECK-NEXT:    store <256 x i1>* [[TMP1]], <256 x i1>** [[VPIN]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[OUTP_ADDR]], align 8
// CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <256 x i1>*
// CHECK-NEXT:    store <256 x i1>* [[TMP3]], <256 x i1>** [[VPOUT]], align 8
// CHECK-NEXT:    [[TMP4:%.*]] = load <256 x i1>*, <256 x i1>** [[VPIN]], align 8
// CHECK-NEXT:    [[TMP5:%.*]] = load <256 x i1>, <256 x i1>* [[TMP4]], align 32
// CHECK-NEXT:    [[TMP6:%.*]] = load <256 x i1>*, <256 x i1>** [[VPOUT]], align 8
// CHECK-NEXT:    store <256 x i1> [[TMP5]], <256 x i1>* [[TMP6]], align 32
// CHECK-NEXT:    ret void
//
void testVPTypedef(int *inp, int *outp) {
  vp_t *vpin = (vp_t *)inp;
  vp_t *vpout = (vp_t *)outp;
  *vpout = *vpin;
}

// CHECK-LABEL: @testVPArg3(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VP_ADDR:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VPP:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    store <256 x i1>* [[VP:%.*]], <256 x i1>** [[VP_ADDR]], align 8
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
// CHECK-NEXT:    store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VP_ADDR]], align 8
// CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32
// CHECK-NEXT:    [[TMP4:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    store <256 x i1> [[TMP3]], <256 x i1>* [[TMP4]], align 32
// CHECK-NEXT:    ret void
//
void testVPArg3(__vector_pair *vp, int *ptr) {
  __vector_pair *vpp = (__vector_pair *)ptr;
  *vpp = *vp;
}

// CHECK-LABEL: @testVPArg4(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VP_ADDR:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VPP:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    store <256 x i1>* [[VP:%.*]], <256 x i1>** [[VP_ADDR]], align 8
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
// CHECK-NEXT:    store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VP_ADDR]], align 8
// CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32
// CHECK-NEXT:    [[TMP4:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    store <256 x i1> [[TMP3]], <256 x i1>* [[TMP4]], align 32
// CHECK-NEXT:    ret void
//
void testVPArg4(const __vector_pair *const vp, int *ptr) {
  __vector_pair *vpp = (__vector_pair *)ptr;
  *vpp = *vp;
}

// CHECK-LABEL: @testVPArg5(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VPA_ADDR:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VPP:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    store <256 x i1>* [[VPA:%.*]], <256 x i1>** [[VPA_ADDR]], align 8
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
// CHECK-NEXT:    store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPA_ADDR]], align 8
// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP2]], i64 0
// CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[ARRAYIDX]], align 32
// CHECK-NEXT:    [[TMP4:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    store <256 x i1> [[TMP3]], <256 x i1>* [[TMP4]], align 32
// CHECK-NEXT:    ret void
//
void testVPArg5(__vector_pair vpa[], int *ptr) {
  __vector_pair *vpp = (__vector_pair *)ptr;
  *vpp = vpa[0];
}

// CHECK-LABEL: @testVPArg7(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VP_ADDR:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VPP:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    store <256 x i1>* [[VP:%.*]], <256 x i1>** [[VP_ADDR]], align 8
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
// CHECK-NEXT:    store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VP_ADDR]], align 8
// CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32
// CHECK-NEXT:    [[TMP4:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    store <256 x i1> [[TMP3]], <256 x i1>* [[TMP4]], align 32
// CHECK-NEXT:    ret void
//
void testVPArg7(const vp_t *vp, int *ptr) {
  __vector_pair *vpp = (__vector_pair *)ptr;
  *vpp = *vp;
}

// CHECK-LABEL: @testVPRet2(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VPP:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
// CHECK-NEXT:    store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP2]], i64 2
// CHECK-NEXT:    ret <256 x i1>* [[ADD_PTR]]
//
__vector_pair *testVPRet2(int *ptr) {
  __vector_pair *vpp = (__vector_pair *)ptr;
  return vpp + 2;
}

// CHECK-LABEL: @testVPRet3(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VPP:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
// CHECK-NEXT:    store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP2]], i64 2
// CHECK-NEXT:    ret <256 x i1>* [[ADD_PTR]]
//
const __vector_pair *testVPRet3(int *ptr) {
  __vector_pair *vpp = (__vector_pair *)ptr;
  return vpp + 2;
}

// CHECK-LABEL: @testVPRet5(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VPP:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
// CHECK-NEXT:    store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds <256 x i1>, <256 x i1>* [[TMP2]], i64 2
// CHECK-NEXT:    ret <256 x i1>* [[ADD_PTR]]
//
const vp_t *testVPRet5(int *ptr) {
  __vector_pair *vpp = (__vector_pair *)ptr;
  return vpp + 2;
}

// CHECK-LABEL: @testVPSizeofAlignof(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT:    [[VPP:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT:    [[VP:%.*]] = alloca <256 x i1>, align 32
// CHECK-NEXT:    [[SIZET:%.*]] = alloca i32, align 4
// CHECK-NEXT:    [[ALIGNT:%.*]] = alloca i32, align 4
// CHECK-NEXT:    [[SIZEV:%.*]] = alloca i32, align 4
// CHECK-NEXT:    [[ALIGNV:%.*]] = alloca i32, align 4
// CHECK-NEXT:    store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
// CHECK-NEXT:    store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
// CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32
// CHECK-NEXT:    store <256 x i1> [[TMP3]], <256 x i1>* [[VP]], align 32
// CHECK-NEXT:    store i32 32, i32* [[SIZET]], align 4
// CHECK-NEXT:    store i32 32, i32* [[ALIGNT]], align 4
// CHECK-NEXT:    store i32 32, i32* [[SIZEV]], align 4
// CHECK-NEXT:    store i32 32, i32* [[ALIGNV]], align 4
// CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[SIZET]], align 4
// CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ALIGNT]], align 4
// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP4]], [[TMP5]]
// CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[SIZEV]], align 4
// CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[ADD]], [[TMP6]]
// CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ALIGNV]], align 4
// CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP7]]
// CHECK-NEXT:    ret i32 [[ADD2]]
//
int testVPSizeofAlignof(int *ptr) {
  __vector_pair *vpp = (__vector_pair *)ptr;
  __vector_pair vp = *vpp;
  unsigned sizet = sizeof(__vector_pair);
  unsigned alignt = __alignof__(__vector_pair);
  unsigned sizev = sizeof(vp);
  unsigned alignv = __alignof__(vp);
  return sizet + alignt + sizev + alignv;
}