// RUN: %clang_cc1 -no-opaque-pointers -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK %s
// RUN: %clang_cc1 -no-opaque-pointers -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,OPT %s
typedef double dx5x5_t ;
typedef float fx2x3_t ;
typedef int ix9x3_t ;
typedef unsigned long long ullx4x2_t ;
// Floating point matrix/scalar additions.
void
void
void
void
void
void
void
void
void
void
void
void
void
void
void
void
void
void
// Integer matrix/scalar additions
void
void
void
void
void
void
void
void
void
void
void
void
void
void
void
void
void
void
void
void
void
void
void
void
// Tests for matrix multiplication.
void
void
typedef int ix3x9_t ;
typedef int ix9x9_t ;
// CHECK-LABEL: @multiply_matrix_matrix_int(
// CHECK: [[B:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
// CHECK-NEXT: [[C:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
// CHECK-NEXT: [[RES:%.*]] = call <81 x i32> @llvm.matrix.multiply.v81i32.v27i32.v27i32(<27 x i32> [[B]], <27 x i32> [[C]], i32 9, i32 3, i32 9)
// CHECK-NEXT: [[A_ADDR:%.*]] = bitcast [81 x i32]* %a to <81 x i32>*
// CHECK-NEXT: store <81 x i32> [[RES]], <81 x i32>* [[A_ADDR]], align 4
// CHECK: ret void
//
void
// CHECK-LABEL: @multiply_double_matrix_scalar_float(
// CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
// CHECK-NEXT: [[S:%.*]] = load float, float* %s.addr, align 4
// CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @multiply_compound_double_matrix_scalar_float
// CHECK: [[S:%.*]] = load float, float* %s.addr, align 4
// CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
// CHECK-NEXT: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @multiply_double_matrix_scalar_double(
// CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
// CHECK-NEXT: [[S:%.*]] = load double, double* %s.addr, align 8
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @multiply_compound_double_matrix_scalar_double(
// CHECK: [[S:%.*]] = load double, double* %s.addr, align 8
// CHECK-NEXT: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fmul <25 x double> [[A]], [[VECSPLAT]]
// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
// CHECK-NEXT: ret void
void
// CHECK-LABEL: @multiply_float_matrix_scalar_double(
// CHECK: [[S:%.*]] = load double, double* %s.addr, align 8
// CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
// CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]]
// CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @multiply_compound_float_matrix_scalar_double(
// CHECK: [[S:%.*]] = load double, double* %s.addr, align 8
// CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
// CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]]
// store <6 x float> %3, <6 x float>* %0, align 4
// ret void
void
// CHECK-LABEL: @multiply_int_matrix_scalar_short(
// CHECK: [[S:%.*]] = load i16, i16* %s.addr, align 2
// CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
// CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[VECSPLAT]], [[MAT]]
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @multiply_compound_int_matrix_scalar_short(
// CHECK: [[S:%.*]] = load i16, i16* %s.addr, align 2
// CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
// CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @multiply_int_matrix_scalar_ull(
// CHECK: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[S:%.*]] = load i64, i64* %s.addr, align 8
// CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], [[VECSPLAT]]
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
// CHECK-NEXT: ret void
//
void
void
// CHECK-LABEL: @multiply_float_matrix_constant(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
// CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
// CHECK-NEXT: store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
// CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
// CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
// CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @multiply_compound_float_matrix_constant(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
// CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
// CHECK-NEXT: store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
// CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
// CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
// CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
// CHECK-NEXT: ret void
void
// CHECK-LABEL: @multiply_int_matrix_constant(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4
// CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [27 x i32]* [[A_ADDR]] to <27 x i32>*
// CHECK-NEXT: store <27 x i32> [[A:%.*]], <27 x i32>* [[MAT_ADDR]], align 4
// CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR]], align 4
// CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, [[MAT]]
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @multiply_compound_int_matrix_constant(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [27 x i32], align 4
// CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [27 x i32]* [[A_ADDR]] to <27 x i32>*
// CHECK-NEXT: store <27 x i32> [[A:%.*]], <27 x i32>* [[MAT_ADDR]], align 4
// CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR]], align 4
// CHECK-NEXT: [[RES:%.*]] = mul <27 x i32> [[MAT]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @divide_double_matrix_scalar_float(
// CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
// CHECK-NEXT: [[S:%.*]] = load float, float* %s.addr, align 4
// CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S_EXT]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @divide_double_matrix_scalar_double(
// CHECK: [[A:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
// CHECK-NEXT: [[S:%.*]] = load double, double* %s.addr, align 8
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <25 x double> poison, double [[S]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <25 x double> [[VECINSERT]], <25 x double> poison, <25 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fdiv <25 x double> [[A]], [[VECSPLAT]]
// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* {{.*}}, align 8
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @divide_float_matrix_scalar_double(
// CHECK: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[S:%.*]] = load double, double* %s.addr, align 8
// CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]]
// CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @divide_int_matrix_scalar_short(
// CHECK: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[S:%.*]] = load i16, i16* %s.addr, align 2
// CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_EXT]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @divide_int_matrix_scalar_ull(
// CHECK: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
// CHECK-NEXT: [[S:%.*]] = load i64, i64* %s.addr, align 8
// CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <27 x i32> poison, i32 [[S_TRUNC]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <27 x i32> [[VECINSERT]], <27 x i32> poison, <27 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = sdiv <27 x i32> [[MAT]], [[VECSPLAT]]
// CHECK-NEXT: store <27 x i32> [[RES]], <27 x i32>* [[MAT_ADDR]], align 4
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @divide_ull_matrix_scalar_ull(
// CHECK: [[MAT:%.*]] = load <8 x i64>, <8 x i64>* [[MAT_ADDR:%.*]], align 8
// CHECK-NEXT: [[S:%.*]] = load i64, i64* %s.addr, align 8
// CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i32 0
// CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
// CHECK-NEXT: [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]]
// CHECK-NEXT: store <8 x i64> [[RES]], <8 x i64>* [[MAT_ADDR]], align 8
// CHECK-NEXT: ret void
//
void
// CHECK-LABEL: @divide_float_matrix_constant(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [6 x float], align 4
// CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [6 x float]* [[A_ADDR]] to <6 x float>*
// CHECK-NEXT: store <6 x float> [[A:%.*]], <6 x float>* [[MAT_ADDR]], align 4
// CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR]], align 4
// CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
// CHECK-NEXT: store <6 x float> [[RES]], <6 x float>* [[MAT_ADDR]], align 4
// CHECK-NEXT: ret void
//
void
// Tests for the matrix type operators.
typedef double dx5x5_t ;
typedef float fx2x3_t ;
// Check that we can use matrix index expression on different floating point
// matrixes and indices.
void
void
void
void
void
// Check that we can can use matrix index expressions on integer matrixes.
typedef int ix9x3_t ;
void
// Check that we can can use matrix index expressions on FP and integer
// matrixes.
typedef int ix9x3_t ;
void
// Check that we can use overloaded matrix index expressions on matrixes with
// matching dimensions, but different element types.
typedef double dx3x3_t ;
typedef float fx3x3_t ;
void
void
double
double
int
typedef double dx3x2_t ;
double
double
void
void
;
void
void