// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
// RUN: -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -target-cpu pwr10 \
// RUN: -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-BE
// CHECK-LABEL: @testVQLocal(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT: [[VQ1:%.*]] = alloca <512 x i1>, align 64
// CHECK-NEXT: [[VQ2:%.*]] = alloca <512 x i1>, align 64
// CHECK-NEXT: [[VQ3:%.*]] = alloca <512 x i1>, align 64
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT: store <16 x i8> [[VC:%.*]], <16 x i8>* [[VC_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
// CHECK-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64
// CHECK-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[VQ1]], align 64
// CHECK-NEXT: [[TMP4:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
// CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[VQ2]], align 64
// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
// CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
// CHECK-NEXT: [[TMP7:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP5]], <16 x i8> [[TMP6]])
// CHECK-NEXT: store <512 x i1> [[TMP7]], <512 x i1>* [[VQ3]], align 64
// CHECK-NEXT: [[TMP8:%.*]] = load <512 x i1>, <512 x i1>* [[VQ3]], align 64
// CHECK-NEXT: [[TMP9:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
// CHECK-NEXT: store <512 x i1> [[TMP8]], <512 x i1>* [[TMP9]], align 64
// CHECK-NEXT: ret void
//
// CHECK-BE-LABEL: @testVQLocal(
// CHECK-BE-NEXT: entry:
// CHECK-BE-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-BE-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-BE-NEXT: [[VQP:%.*]] = alloca <512 x i1>*, align 8
// CHECK-BE-NEXT: [[VQ1:%.*]] = alloca <512 x i1>, align 64
// CHECK-BE-NEXT: [[VQ2:%.*]] = alloca <512 x i1>, align 64
// CHECK-BE-NEXT: [[VQ3:%.*]] = alloca <512 x i1>, align 64
// CHECK-BE-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-BE-NEXT: store <16 x i8> [[VC:%.*]], <16 x i8>* [[VC_ADDR]], align 16
// CHECK-BE-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <512 x i1>*
// CHECK-BE-NEXT: store <512 x i1>* [[TMP1]], <512 x i1>** [[VQP]], align 8
// CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
// CHECK-BE-NEXT: [[TMP3:%.*]] = load <512 x i1>, <512 x i1>* [[TMP2]], align 64
// CHECK-BE-NEXT: store <512 x i1> [[TMP3]], <512 x i1>* [[VQ1]], align 64
// CHECK-BE-NEXT: [[TMP4:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
// CHECK-BE-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[VQ2]], align 64
// CHECK-BE-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
// CHECK-BE-NEXT: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
// CHECK-BE-NEXT: [[TMP7:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP5]], <16 x i8> [[TMP6]])
// CHECK-BE-NEXT: store <512 x i1> [[TMP7]], <512 x i1>* [[VQ3]], align 64
// CHECK-BE-NEXT: [[TMP8:%.*]] = load <512 x i1>, <512 x i1>* [[VQ3]], align 64
// CHECK-BE-NEXT: [[TMP9:%.*]] = load <512 x i1>*, <512 x i1>** [[VQP]], align 8
// CHECK-BE-NEXT: store <512 x i1> [[TMP8]], <512 x i1>* [[TMP9]], align 64
// CHECK-BE-NEXT: ret void
//
void
// CHECK-LABEL: @testVPLocal(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8
// CHECK-NEXT: [[VP1:%.*]] = alloca <256 x i1>, align 32
// CHECK-NEXT: [[VP2:%.*]] = alloca <256 x i1>, align 32
// CHECK-NEXT: [[VP3:%.*]] = alloca <256 x i1>, align 32
// CHECK-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64
// CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-NEXT: store <16 x i8> [[VC:%.*]], <16 x i8>* [[VC_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
// CHECK-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32
// CHECK-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[VP1]], align 32
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
// CHECK-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
// CHECK-NEXT: store <256 x i1> [[TMP6]], <256 x i1>* [[VP2]], align 64
// CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
// CHECK-NEXT: [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
// CHECK-NEXT: [[TMP9:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP8]], <16 x i8> [[TMP7]])
// CHECK-NEXT: store <256 x i1> [[TMP9]], <256 x i1>* [[VP2]], align 64
// CHECK-NEXT: [[TMP10:%.*]] = load <256 x i1>, <256 x i1>* [[VP3]], align 32
// CHECK-NEXT: [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
// CHECK-NEXT: [[TMP12:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP10]], <16 x i8> [[TMP11]])
// CHECK-NEXT: store <512 x i1> [[TMP12]], <512 x i1>* [[VQ]], align 64
// CHECK-NEXT: [[TMP13:%.*]] = load <256 x i1>, <256 x i1>* [[VP3]], align 32
// CHECK-NEXT: [[TMP14:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
// CHECK-NEXT: store <256 x i1> [[TMP13]], <256 x i1>* [[TMP14]], align 32
// CHECK-NEXT: ret void
//
// CHECK-BE-LABEL: @testVPLocal(
// CHECK-BE-NEXT: entry:
// CHECK-BE-NEXT: [[PTR_ADDR:%.*]] = alloca i32*, align 8
// CHECK-BE-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16
// CHECK-BE-NEXT: [[VPP:%.*]] = alloca <256 x i1>*, align 8
// CHECK-BE-NEXT: [[VP1:%.*]] = alloca <256 x i1>, align 32
// CHECK-BE-NEXT: [[VP2:%.*]] = alloca <256 x i1>, align 32
// CHECK-BE-NEXT: [[VP3:%.*]] = alloca <256 x i1>, align 32
// CHECK-BE-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64
// CHECK-BE-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_ADDR]], align 8
// CHECK-BE-NEXT: store <16 x i8> [[VC:%.*]], <16 x i8>* [[VC_ADDR]], align 16
// CHECK-BE-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PTR_ADDR]], align 8
// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <256 x i1>*
// CHECK-BE-NEXT: store <256 x i1>* [[TMP1]], <256 x i1>** [[VPP]], align 8
// CHECK-BE-NEXT: [[TMP2:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
// CHECK-BE-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32
// CHECK-BE-NEXT: store <256 x i1> [[TMP3]], <256 x i1>* [[VP1]], align 32
// CHECK-BE-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
// CHECK-BE-NEXT: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
// CHECK-BE-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
// CHECK-BE-NEXT: store <256 x i1> [[TMP6]], <256 x i1>* [[VP2]], align 64
// CHECK-BE-NEXT: [[TMP7:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
// CHECK-BE-NEXT: [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
// CHECK-BE-NEXT: [[TMP9:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
// CHECK-BE-NEXT: store <256 x i1> [[TMP9]], <256 x i1>* [[VP2]], align 64
// CHECK-BE-NEXT: [[TMP10:%.*]] = load <256 x i1>, <256 x i1>* [[VP3]], align 32
// CHECK-BE-NEXT: [[TMP11:%.*]] = load <16 x i8>, <16 x i8>* [[VC_ADDR]], align 16
// CHECK-BE-NEXT: [[TMP12:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP10]], <16 x i8> [[TMP11]])
// CHECK-BE-NEXT: store <512 x i1> [[TMP12]], <512 x i1>* [[VQ]], align 64
// CHECK-BE-NEXT: [[TMP13:%.*]] = load <256 x i1>, <256 x i1>* [[VP3]], align 32
// CHECK-BE-NEXT: [[TMP14:%.*]] = load <256 x i1>*, <256 x i1>** [[VPP]], align 8
// CHECK-BE-NEXT: store <256 x i1> [[TMP13]], <256 x i1>* [[TMP14]], align 32
// CHECK-BE-NEXT: ret void
//
void
// CHECK-LABEL: @testRestrictQualifiedPointer2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ACC_ADDR:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16
// CHECK-NEXT: store <512 x i1>* [[ACC:%.*]], <512 x i1>** [[ACC_ADDR]], align 8
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[ARR]], i64 0, i64 0
// CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>, <512 x i1>* [[TMP1]], align 64
// CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float>* [[ARRAYDECAY]] to <16 x i8>*
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 0
// CHECK-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP6]], align 16
// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 1
// CHECK-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP8]], align 16
// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 2
// CHECK-NEXT: store <16 x i8> [[TMP9]], <16 x i8>* [[TMP10]], align 16
// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 3
// CHECK-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* [[TMP12]], align 16
// CHECK-NEXT: ret void
//
// CHECK-BE-LABEL: @testRestrictQualifiedPointer2(
// CHECK-BE-NEXT: entry:
// CHECK-BE-NEXT: [[ACC_ADDR:%.*]] = alloca <512 x i1>*, align 8
// CHECK-BE-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16
// CHECK-BE-NEXT: store <512 x i1>* [[ACC:%.*]], <512 x i1>** [[ACC_ADDR]], align 8
// CHECK-BE-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[ARR]], i64 0, i64 0
// CHECK-BE-NEXT: [[TMP0:%.*]] = load <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
// CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>, <512 x i1>* [[TMP1]], align 64
// CHECK-BE-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
// CHECK-BE-NEXT: [[TMP4:%.*]] = bitcast <4 x float>* [[ARRAYDECAY]] to <16 x i8>*
// CHECK-BE-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
// CHECK-BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 0
// CHECK-BE-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP6]], align 16
// CHECK-BE-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
// CHECK-BE-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 1
// CHECK-BE-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP8]], align 16
// CHECK-BE-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
// CHECK-BE-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 2
// CHECK-BE-NEXT: store <16 x i8> [[TMP9]], <16 x i8>* [[TMP10]], align 16
// CHECK-BE-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
// CHECK-BE-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 3
// CHECK-BE-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* [[TMP12]], align 16
// CHECK-BE-NEXT: ret void
//
void
// CHECK-LABEL: @testVolatileQualifiedPointer2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ACC_ADDR:%.*]] = alloca <512 x i1>*, align 8
// CHECK-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16
// CHECK-NEXT: store volatile <512 x i1>* [[ACC:%.*]], <512 x i1>** [[ACC_ADDR]], align 8
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[ARR]], i64 0, i64 0
// CHECK-NEXT: [[TMP0:%.*]] = load volatile <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load volatile <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>, <512 x i1>* [[TMP1]], align 64
// CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float>* [[ARRAYDECAY]] to <16 x i8>*
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 0
// CHECK-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP6]], align 16
// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 1
// CHECK-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP8]], align 16
// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 2
// CHECK-NEXT: store <16 x i8> [[TMP9]], <16 x i8>* [[TMP10]], align 16
// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 3
// CHECK-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* [[TMP12]], align 16
// CHECK-NEXT: ret void
//
// CHECK-BE-LABEL: @testVolatileQualifiedPointer2(
// CHECK-BE-NEXT: entry:
// CHECK-BE-NEXT: [[ACC_ADDR:%.*]] = alloca <512 x i1>*, align 8
// CHECK-BE-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16
// CHECK-BE-NEXT: store volatile <512 x i1>* [[ACC:%.*]], <512 x i1>** [[ACC_ADDR]], align 8
// CHECK-BE-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[ARR]], i64 0, i64 0
// CHECK-BE-NEXT: [[TMP0:%.*]] = load volatile <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
// CHECK-BE-NEXT: [[TMP1:%.*]] = load volatile <512 x i1>*, <512 x i1>** [[ACC_ADDR]], align 8
// CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>, <512 x i1>* [[TMP1]], align 64
// CHECK-BE-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
// CHECK-BE-NEXT: [[TMP4:%.*]] = bitcast <4 x float>* [[ARRAYDECAY]] to <16 x i8>*
// CHECK-BE-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
// CHECK-BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 0
// CHECK-BE-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP6]], align 16
// CHECK-BE-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
// CHECK-BE-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 1
// CHECK-BE-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP8]], align 16
// CHECK-BE-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
// CHECK-BE-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 2
// CHECK-BE-NEXT: store <16 x i8> [[TMP9]], <16 x i8>* [[TMP10]], align 16
// CHECK-BE-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
// CHECK-BE-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP4]], i32 3
// CHECK-BE-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* [[TMP12]], align 16
// CHECK-BE-NEXT: ret void
//
void