Compiler projects using llvm
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt < %s  -passes='print<cost-model>' 2>&1 -disable-output -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 | FileCheck %s --check-prefix=CHECK-NEON-RECIP
; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE-RECIP
; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN-RECIP
; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE-RECIP
; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R-RECIP
; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp < %s | FileCheck %s --check-prefix=CHECK-MVE-SIZE
; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.main-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN-SIZE
; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.base-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE-SIZE
; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=code-size -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK-V8R-SIZE

target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"

define i32 @load_extends() {
; CHECK-NEON-RECIP-LABEL: 'load_extends'
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'load_extends'
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 330 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1322 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 298 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 330 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'load_extends'
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'load_extends'
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'load_extends'
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'load_extends'
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'load_extends'
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'load_extends'
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'load_extends'
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r3 = zext i8 %loadi8 to i32
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i8 %loadi8 to i64
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r5 = zext i8 %loadi8 to i64
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r6 = sext i16 %loadi16 to i32
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %r7 = zext i16 %loadi16 to i32
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r8 = sext i16 %loadi16 to i64
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
  %loadi8 = load i8, i8* undef
  %loadi16 = load i16, i16* undef
  %loadi32 = load i32, i32* undef
  %loadv2i8 = load <2 x i8>, <2 x i8>* undef
  %loadv4i8 = load <4 x i8>, <4 x i8>* undef
  %loadv8i8 = load <8 x i8>, <8 x i8>* undef
  %loadv16i8 = load <16 x i8>, <16 x i8>* undef
  %loadv2i16 = load <2 x i16>, <2 x i16>* undef
  %loadv4i16 = load <4 x i16>, <4 x i16>* undef
  %loadv8i16 = load <8 x i16>, <8 x i16>* undef
  %loadv2i32 = load <2 x i32>, <2 x i32>* undef
  %loadv4i32 = load <4 x i32>, <4 x i32>* undef

  %r0 = sext i8 %loadi8 to i16
  %r1 = zext i8 %loadi8 to i16
  %r2 = sext i8 %loadi8 to i32
  %r3 = zext i8 %loadi8 to i32
  %r4 = sext i8 %loadi8 to i64
  %r5 = zext i8 %loadi8 to i64
  %r6 = sext i16 %loadi16 to i32
  %r7 = zext i16 %loadi16 to i32
  %r8 = sext i16 %loadi16 to i64
  %r9 = zext i16 %loadi16 to i64
  %r10 = sext i32 %loadi32 to i64
  %r11 = zext i32 %loadi32 to i64

  %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
  %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
  %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
  %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
  %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
  %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
  %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
  %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
  %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
  %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
  %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
  %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
  %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
  %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
  %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
  %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
  %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
  %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
  %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
  %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
  %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
  %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
  %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
  %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>

  %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
  %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
  %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
  %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
  %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
  %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
  %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
  %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
  %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
  %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
  %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
  %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>

  %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
  %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
  %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
  %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>

  ret i32 undef
}

define i32 @store_trunc() {
; CHECK-NEON-RECIP-LABEL: 'store_trunc'
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i832 = trunc i32 undef to i8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i864 = trunc i64 undef to i8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1632 = trunc i32 undef to i16
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1664 = trunc i64 undef to i16
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i3264 = trunc i64 undef to i32
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'store_trunc'
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i832 = trunc i32 undef to i8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i864 = trunc i64 undef to i8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1632 = trunc i32 undef to i16
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1664 = trunc i64 undef to i16
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i3264 = trunc i64 undef to i32
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'store_trunc'
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i832 = trunc i32 undef to i8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i864 = trunc i64 undef to i8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1632 = trunc i32 undef to i16
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1664 = trunc i64 undef to i16
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i3264 = trunc i64 undef to i32
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'store_trunc'
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i832 = trunc i32 undef to i8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i864 = trunc i64 undef to i8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1632 = trunc i32 undef to i16
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1664 = trunc i64 undef to i16
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i3264 = trunc i64 undef to i32
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'store_trunc'
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i832 = trunc i32 undef to i8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i864 = trunc i64 undef to i8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1632 = trunc i32 undef to i16
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1664 = trunc i64 undef to i16
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i3264 = trunc i64 undef to i32
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'store_trunc'
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i832 = trunc i32 undef to i8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i864 = trunc i64 undef to i8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1632 = trunc i32 undef to i16
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1664 = trunc i64 undef to i16
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i3264 = trunc i64 undef to i32
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'store_trunc'
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i832 = trunc i32 undef to i8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i864 = trunc i64 undef to i8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1632 = trunc i32 undef to i16
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1664 = trunc i64 undef to i16
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i3264 = trunc i64 undef to i32
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'store_trunc'
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i832 = trunc i32 undef to i8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i864 = trunc i64 undef to i8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1632 = trunc i32 undef to i16
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1664 = trunc i64 undef to i16
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i3264 = trunc i64 undef to i32
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'store_trunc'
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i832 = trunc i32 undef to i8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i864 = trunc i64 undef to i8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1632 = trunc i32 undef to i16
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i1664 = trunc i64 undef to i16
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i3264 = trunc i64 undef to i32
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
  %i816 = trunc i16 undef to i8
  %i832 = trunc i32 undef to i8
  %i864 = trunc i64 undef to i8
  %i1632 = trunc i32 undef to i16
  %i1664 = trunc i64 undef to i16
  %i3264 = trunc i64 undef to i32

  %v2816 = trunc <2 x i16> undef to <2 x i8>
  %v2832 = trunc <2 x i32> undef to <2 x i8>
  %v2864 = trunc <2 x i64> undef to <2 x i8>
  %v4816 = trunc <4 x i16> undef to <4 x i8>
  %v4832 = trunc <4 x i32> undef to <4 x i8>
  %v4864 = trunc <4 x i64> undef to <4 x i8>
  %v8816 = trunc <8 x i16> undef to <8 x i8>
  %v8832 = trunc <8 x i32> undef to <8 x i8>
  %v8864 = trunc <8 x i64> undef to <8 x i8>
  %v16816 = trunc <16 x i16> undef to <16 x i8>
  %v16832 = trunc <16 x i32> undef to <16 x i8>
  %v16864 = trunc <16 x i64> undef to <16 x i8>

  %v21632 = trunc <2 x i32> undef to <2 x i16>
  %v21664 = trunc <2 x i64> undef to <2 x i16>
  %v41632 = trunc <4 x i32> undef to <4 x i16>
  %v41664 = trunc <4 x i64> undef to <4 x i16>
  %v81632 = trunc <8 x i32> undef to <8 x i16>
  %v81664 = trunc <8 x i64> undef to <8 x i16>

  %v23264 = trunc <2 x i64> undef to <2 x i32>
  %v43264 = trunc <4 x i64> undef to <4 x i32>

  store i8 %i816, i8* undef
  store i8 %i832, i8* undef
  store i8 %i864, i8* undef
  store i16 %i1632, i16* undef
  store i16 %i1664, i16* undef
  store i32 %i3264, i32* undef
  store <2 x i8> %v2816, <2 x i8>* undef
  store <2 x i8> %v2832, <2 x i8>* undef
  store <2 x i8> %v2864, <2 x i8>* undef
  store <4 x i8> %v4816, <4 x i8>* undef
  store <4 x i8> %v4832, <4 x i8>* undef
  store <4 x i8> %v4864, <4 x i8>* undef
  store <8 x i8> %v8816, <8 x i8>* undef
  store <8 x i8> %v8832, <8 x i8>* undef
  store <8 x i8> %v8864, <8 x i8>* undef
  store <16 x i8> %v16816, <16 x i8>* undef
  store <16 x i8> %v16832, <16 x i8>* undef
  store <16 x i8> %v16864, <16 x i8>* undef
  store <2 x i16> %v21632, <2 x i16>* undef
  store <2 x i16> %v21664, <2 x i16>* undef
  store <4 x i16> %v41632, <4 x i16>* undef
  store <4 x i16> %v41664, <4 x i16>* undef
  store <8 x i16> %v81632, <8 x i16>* undef
  store <8 x i16> %v81664, <8 x i16>* undef
  store <2 x i32> %v23264, <2 x i32>* undef
  store <4 x i32> %v43264, <4 x i32>* undef

  ret i32 undef
}

define i32 @load_fpextends() {
; CHECK-NEON-RECIP-LABEL: 'load_fpextends'
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'load_fpextends'
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'load_fpextends'
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'load_fpextends'
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'load_fpextends'
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'load_fpextends'
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'load_fpextends'
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'load_fpextends'
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'load_fpextends'
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
  %loadf16 = load half, half* undef
  %loadf32 = load float, float* undef
  %loadv2f16 = load <2 x half>, <2 x half>* undef
  %loadv4f16 = load <4 x half>, <4 x half>* undef
  %loadv8f16 = load <8 x half>, <8 x half>* undef
  %loadv16f16 = load <16 x half>, <16 x half>* undef
  %loadv2f32 = load <2 x float>, <2 x float>* undef
  %loadv4f32 = load <4 x float>, <4 x float>* undef
  %loadv8f32 = load <8 x float>, <8 x float>* undef

  %r1 = fpext half %loadf16 to float
  %r2 = fpext half %loadf16 to double
  %r3 = fpext float %loadf32 to double

  %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
  %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
  %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
  %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
  %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
  %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
  %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
  %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
  %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
  %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
  %v11 = fpext <8 x float> %loadv8f32 to <8 x double>

  %loadv4f16ou = load <4 x half>, <4 x half>* undef
  %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>

  ret i32 undef
}

define i32 @load_fptrunc() {
; CHECK-NEON-RECIP-LABEL: 'load_fptrunc'
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1632 = fptrunc float undef to half
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1664 = fptrunc double undef to half
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i3264 = fptrunc double undef to float
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'load_fptrunc'
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1664 = fptrunc double undef to half
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i3264 = fptrunc double undef to float
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'load_fptrunc'
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1632 = fptrunc float undef to half
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1664 = fptrunc double undef to half
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i3264 = fptrunc double undef to float
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'load_fptrunc'
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1632 = fptrunc float undef to half
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1664 = fptrunc double undef to half
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i3264 = fptrunc double undef to float
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'load_fptrunc'
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1632 = fptrunc float undef to half
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1664 = fptrunc double undef to half
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i3264 = fptrunc double undef to float
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'load_fptrunc'
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1664 = fptrunc double undef to half
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i3264 = fptrunc double undef to float
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'load_fptrunc'
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1632 = fptrunc float undef to half
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1664 = fptrunc double undef to half
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i3264 = fptrunc double undef to float
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'load_fptrunc'
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1632 = fptrunc float undef to half
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1664 = fptrunc double undef to half
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i3264 = fptrunc double undef to float
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'load_fptrunc'
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1632 = fptrunc float undef to half
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %i1664 = fptrunc double undef to half
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i3264 = fptrunc double undef to float
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
  %i1632 = fptrunc float undef to half
  %i1664 = fptrunc double undef to half
  %i3264 = fptrunc double undef to float

  %v21632 = fptrunc <2 x float> undef to <2 x half>
  %v21664 = fptrunc <2 x double> undef to <2 x half>
  %v41632 = fptrunc <4 x float> undef to <4 x half>
  %v41664 = fptrunc <4 x double> undef to <4 x half>
  %v81632 = fptrunc <8 x float> undef to <8 x half>
  %v81664 = fptrunc <8 x double> undef to <8 x half>

  %v23264 = fptrunc <2 x double> undef to <2 x float>
  %v43264 = fptrunc <4 x double> undef to <4 x float>

  store half %i1632, half* undef
  store half %i1664, half* undef
  store float %i3264, float* undef
  store <2 x half> %v21632, <2 x half>* undef
  store <2 x half> %v21664, <2 x half>* undef
  store <4 x half> %v41632, <4 x half>* undef
  store <4 x half> %v41664, <4 x half>* undef
  store <8 x half> %v81632, <8 x half>* undef
  store <8 x half> %v81664, <8 x half>* undef
  store <2 x float> %v23264, <2 x float>* undef
  store <4 x float> %v43264, <4 x float>* undef

  ret i32 undef
}

define i32 @maskedload_extends() {
; CHECK-NEON-RECIP-LABEL: 'maskedload_extends'
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'maskedload_extends'
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_extends'
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'maskedload_extends'
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'maskedload_extends'
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'maskedload_extends'
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'maskedload_extends'
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'maskedload_extends'
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'maskedload_extends'
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
  %loadv2i8 = call <2 x i8>  @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
  %loadv4i8 = call <4 x i8>  @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
  %loadv8i8 = call <8 x i8>  @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
  %loadv16i8 = call <16 x i8>  @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
  %loadv2i16 = call <2 x i16>  @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
  %loadv4i16 = call <4 x i16>  @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
  %loadv8i16 = call <8 x i16>  @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
  %loadv2i32 = call <2 x i32>  @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
  %loadv4i32 = call <4 x i32>  @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)

  %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
  %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
  %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
  %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
  %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
  %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
  %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
  %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
  %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
  %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
  %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
  %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
  %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
  %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
  %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
  %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
  %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
  %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
  %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
  %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
  %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
  %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
  %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
  %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>

  %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
  %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
  %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
  %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
  %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
  %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
  %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
  %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
  %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
  %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
  %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
  %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>

  %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
  %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
  %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
  %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>

  ret i32 undef
}

define i32 @maskedstore_trunc() {
; CHECK-NEON-RECIP-LABEL: 'maskedstore_trunc'
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'maskedstore_trunc'
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedstore_trunc'
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'maskedstore_trunc'
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'maskedstore_trunc'
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'maskedstore_trunc'
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'maskedstore_trunc'
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'maskedstore_trunc'
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'maskedstore_trunc'
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2832 = trunc <2 x i32> undef to <2 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
  %v2816 = trunc <2 x i16> undef to <2 x i8>
  %v2832 = trunc <2 x i32> undef to <2 x i8>
  %v2864 = trunc <2 x i64> undef to <2 x i8>
  %v4816 = trunc <4 x i16> undef to <4 x i8>
  %v4832 = trunc <4 x i32> undef to <4 x i8>
  %v4864 = trunc <4 x i64> undef to <4 x i8>
  %v8816 = trunc <8 x i16> undef to <8 x i8>
  %v8832 = trunc <8 x i32> undef to <8 x i8>
  %v8864 = trunc <8 x i64> undef to <8 x i8>
  %v16816 = trunc <16 x i16> undef to <16 x i8>
  %v16832 = trunc <16 x i32> undef to <16 x i8>
  %v16864 = trunc <16 x i64> undef to <16 x i8>

  %v21632 = trunc <2 x i32> undef to <2 x i16>
  %v21664 = trunc <2 x i64> undef to <2 x i16>
  %v41632 = trunc <4 x i32> undef to <4 x i16>
  %v41664 = trunc <4 x i64> undef to <4 x i16>
  %v81632 = trunc <8 x i32> undef to <8 x i16>
  %v81664 = trunc <8 x i64> undef to <8 x i16>

  %v23264 = trunc <2 x i64> undef to <2 x i32>
  %v43264 = trunc <4 x i64> undef to <4 x i32>

  call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
  call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
  call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
  call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
  call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
  call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
  call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
  call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
  call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
  call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
  call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
  call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
  call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
  call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
  call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
  call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
  call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
  call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
  call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)

  ret i32 undef
}

define i32 @maskedload_fpextends() {
; CHECK-NEON-RECIP-LABEL: 'maskedload_fpextends'
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'maskedload_fpextends'
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_fpextends'
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'maskedload_fpextends'
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'maskedload_fpextends'
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'maskedload_fpextends'
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'maskedload_fpextends'
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'maskedload_fpextends'
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'maskedload_fpextends'
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
  %loadv2f16 = call <2 x half>  @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
  %loadv4f16 = call <4 x half>  @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
  %loadv8f16 = call <8 x half>  @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
  %loadv16f16 = call <16 x half>  @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
  %loadv2f32 = call <2 x float>  @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
  %loadv4f32 = call <4 x float>  @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
  %loadv8f32 = call <8 x float>  @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)

  %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
  %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
  %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
  %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
  %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
  %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
  %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
  %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
  %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
  %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
  %v11 = fpext <8 x float> %loadv8f32 to <8 x double>

  %loadv4f16ou = load <4 x half>, <4 x half>* undef
  %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>

  ret i32 undef
}

define i32 @maskedload_fptrunc() {
; CHECK-NEON-RECIP-LABEL: 'maskedload_fptrunc'
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'maskedload_fptrunc'
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_fptrunc'
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'maskedload_fptrunc'
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'maskedload_fptrunc'
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
; CHECK-V8R-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'maskedload_fptrunc'
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'maskedload_fptrunc'
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'maskedload_fptrunc'
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'maskedload_fptrunc'
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
  %v21632 = fptrunc <2 x float> undef to <2 x half>
  %v21664 = fptrunc <2 x double> undef to <2 x half>
  %v41632 = fptrunc <4 x float> undef to <4 x half>
  %v41664 = fptrunc <4 x double> undef to <4 x half>
  %v81632 = fptrunc <8 x float> undef to <8 x half>
  %v81664 = fptrunc <8 x double> undef to <8 x half>

  %v23264 = fptrunc <2 x double> undef to <2 x float>
  %v43264 = fptrunc <4 x double> undef to <4 x float>

  call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
  call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
  call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
  call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
  call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
  call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
  call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
  call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)

  ret i32 undef
}

declare <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>*, i32, <2 x i1>, <2 x i8>)
declare <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>*, i32, <2 x i1>, <2 x i16>)
declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>)
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>)
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>)
declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>)
declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
declare <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>)
declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
declare void @llvm.masked.store.v2i8.p0v2i8(<2 x i8>, <2 x i8>*, i32 immarg, <2 x i1>)
declare void @llvm.masked.store.v2i16.p0v2i16(<2 x i16>, <2 x i16>*, i32 immarg, <2 x i1>)
declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32 immarg, <2 x i1>)
declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32 immarg, <4 x i1>)
declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32 immarg, <4 x i1>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32 immarg, <8 x i1>)
declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32 immarg, <8 x i1>)
declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>)
declare void @llvm.masked.store.v16i16.p0v16i16(<16 x i16>, <16 x i16>*, i32 immarg, <16 x i1>)
declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32 immarg, <16 x i1>)

declare <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>*, i32, <2 x i1>, <2 x half>)
declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
declare <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>*, i32, <4 x i1>, <4 x half>)
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>)
declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
declare <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>*, i32, <16 x i1>, <16 x half>)
declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
declare void @llvm.masked.store.v2f16.p0v2f16(<2 x half>, <2 x half>*, i32 immarg, <2 x i1>)
declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32 immarg, <2 x i1>)
declare void @llvm.masked.store.v4f16.p0v4f16(<4 x half>, <4 x half>*, i32 immarg, <4 x i1>)
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)
declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32 immarg, <8 x i1>)
declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>)
declare void @llvm.masked.store.v16f16.p0v16f16(<16 x half>, <16 x half>*, i32 immarg, <16 x i1>)
declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32 immarg, <16 x i1>)