; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh -riscv-v-vector-bits-min=256 -passes='print<cost-model>' -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefix=FP-REDUCE define void @reduce_fadd_half() { ; FP-REDUCE-LABEL: 'reduce_fadd_half' ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef) %V2 = call fast half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef) %V4 = call fast half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef) %V8 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef) %V16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef) %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef) %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef) %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef) ret void } define void @reduce_fadd_float() { ; FP-REDUCE-LABEL: 'reduce_fadd_float' ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef) %V2 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef) %V4 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) %V8 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) %V16 = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> undef) %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef) %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef) %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef) ret void } define void @reduce_fadd_double() { ; FP-REDUCE-LABEL: 'reduce_fadd_double' ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef) %V2 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) %V4 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) %V8 = call fast double @llvm.vector.reduce.fadd.v8f64(double 0.0, <8 x double> undef) %V16 = call fast double @llvm.vector.reduce.fadd.v16f64(double 0.0, <16 x double> undef) %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef) %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef) %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef) ret void } define void @reduce_oredered_fadd_half() { ; FP-REDUCE-LABEL: 'reduce_oredered_fadd_half' ; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) ; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) ; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) ; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) ; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) ; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) ; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) ; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) ; FP-REDUCE-LABEL: Cost Model: Found an estimated cost of 1 for instruction: ret void %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef) %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef) %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef) %V8 = call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef) %V16 = call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef) %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef) %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef) %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef) ret void } define void @reduce_oredered_fadd_float() { ; FP-REDUCE-LABEL: 'reduce_oredered_fadd_float' ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef) %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef) %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) %V8 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) %V16 = call float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> undef) %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef) %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef) %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef) ret void } define void @reduce_oredered_fadd_double() { ; FP-REDUCE-LABEL: 'reduce_oredered_fadd_double' ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double 0.000000e+00, <16 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef) %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) %V8 = call double @llvm.vector.reduce.fadd.v8f64(double 0.0, <8 x double> undef) %V16 = call double @llvm.vector.reduce.fadd.v16f64(double 0.0, <16 x double> undef) %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef) %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef) %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef) ret void } declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>) declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>) declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>) declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>) declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>) declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>) declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>) declare half @llvm.vector.reduce.fadd.v128f16(half, <128 x half>) declare float @llvm.vector.reduce.fadd.v1f32(float, <1 x float>) declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>) declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>) declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>) declare float @llvm.vector.reduce.fadd.v64f32(float, <64 x float>) declare float @llvm.vector.reduce.fadd.v128f32(float, <128 x float>) declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>) declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>) declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>) declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>) declare double @llvm.vector.reduce.fadd.v64f64(double, <64 x double>) declare double @llvm.vector.reduce.fadd.v128f64(double, <128 x double>)