Compiler projects using llvm
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt < %s -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=code-size  -mtriple=thumbv8m.base   | FileCheck %s --check-prefix=CHECK-T1-SIZE
; RUN: opt < %s -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=code-size  -mtriple=thumbv8m.main   | FileCheck %s --check-prefix=CHECK-V8M-SIZE
; RUN: opt < %s -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=code-size  -mtriple=thumbv8.1m.main -mattr=+mve  | FileCheck %s --check-prefix=CHECK-MVE-SIZE
; RUN: opt < %s -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=code-size  -mtriple=armv8a -mattr=+neon  | FileCheck %s --check-prefix=CHECK-NEON-SIZE
; RUN: opt < %s -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=latency    -mtriple=thumbv8m.base   | FileCheck %s --check-prefix=CHECK-T1-LATENCY
; RUN: opt < %s -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=latency    -mtriple=thumbv8m.main   | FileCheck %s --check-prefix=CHECK-V8M-LATENCY
; RUN: opt < %s -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=latency    -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-V8_1M-LATENCY
; RUN: opt < %s -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=throughput -mtriple=thumbv8m.base   | FileCheck %s --check-prefix=CHECK-T1-THROUGHPUT
; RUN: opt < %s -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=throughput -mtriple=thumbv8m.main   | FileCheck %s --check-prefix=CHECK-V8M-THROUGHPUT
; RUN: opt < %s -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=throughput -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-V8_1M-THROUGHPUT
; RUN: opt < %s -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=throughput -mtriple=thumbv8.1m.main -mattr=+mve | FileCheck %s --check-prefix=CHECK-MVE-THROUGHPUT
; RUN: opt < %s -passes='print<cost-model>' 2>&1 -disable-output -cost-kind=throughput -mtriple=armv8a -mattr=+neon | FileCheck %s --check-prefix=CHECK-NEON-THROUGHPUT

target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"

define i32 @simple_loop_cost(i32 %N) {
; CHECK-T1-SIZE-LABEL: 'simple_loop_cost'
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-V8M-SIZE-LABEL: 'simple_loop_cost'
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-MVE-SIZE-LABEL: 'simple_loop_cost'
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-NEON-SIZE-LABEL: 'simple_loop_cost'
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-T1-LATENCY-LABEL: 'simple_loop_cost'
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-V8M-LATENCY-LABEL: 'simple_loop_cost'
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-V8_1M-LATENCY-LABEL: 'simple_loop_cost'
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-T1-THROUGHPUT-LABEL: 'simple_loop_cost'
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-V8M-THROUGHPUT-LABEL: 'simple_loop_cost'
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-V8_1M-THROUGHPUT-LABEL: 'simple_loop_cost'
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-MVE-THROUGHPUT-LABEL: 'simple_loop_cost'
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
;
; CHECK-NEON-THROUGHPUT-LABEL: 'simple_loop_cost'
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
;
entry:
  %zero = icmp eq i32 %N, 0
  br i1 %zero, label %exit, label %preheader

preheader:
  br label %loop

loop:
  %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
  %iv.next = add nuw i32 %iv, 1
  %cmp = icmp ne i32 %iv.next, %N
  br i1 %cmp, label %loop, label %exit

exit:
  %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
  ret i32 %res
}

define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) {
; CHECK-T1-SIZE-LABEL: 'simple_mul_loop'
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-V8M-SIZE-LABEL: 'simple_mul_loop'
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-MVE-SIZE-LABEL: 'simple_mul_loop'
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-NEON-SIZE-LABEL: 'simple_mul_loop'
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-T1-LATENCY-LABEL: 'simple_mul_loop'
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a, align 4
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-V8M-LATENCY-LABEL: 'simple_mul_loop'
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a, align 4
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-V8_1M-LATENCY-LABEL: 'simple_mul_loop'
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a, align 4
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-T1-THROUGHPUT-LABEL: 'simple_mul_loop'
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-V8M-THROUGHPUT-LABEL: 'simple_mul_loop'
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-V8_1M-THROUGHPUT-LABEL: 'simple_mul_loop'
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-MVE-THROUGHPUT-LABEL: 'simple_mul_loop'
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
;
; CHECK-NEON-THROUGHPUT-LABEL: 'simple_mul_loop'
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
;
entry:
  %zero = icmp eq i32 %N, 0
  br i1 %zero, label %exit, label %preheader

preheader:
  br label %loop

loop:
  %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
  %addr.a = getelementptr i32, i32* %A, i32 %iv
  %load = load i32, i32* %addr.a
  %mul = mul i32 %load, %load
  %addr.b = getelementptr i32, i32* %B, i32 %iv
  store i32 %mul, i32* %addr.b
  %iv.next = add nuw i32 %iv, 1
  %cmp = icmp ne i32 %iv.next, %N
  br i1 %cmp, label %loop, label %exit

exit:
  %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
  ret i32 %res
}

define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) {
; CHECK-T1-SIZE-LABEL: 'simple_mul_ext_lsr_loop'
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-T1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-V8M-SIZE-LABEL: 'simple_mul_ext_lsr_loop'
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-MVE-SIZE-LABEL: 'simple_mul_ext_lsr_loop'
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-NEON-SIZE-LABEL: 'simple_mul_ext_lsr_loop'
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-T1-LATENCY-LABEL: 'simple_mul_ext_lsr_loop'
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a, align 2
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-T1-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-V8M-LATENCY-LABEL: 'simple_mul_ext_lsr_loop'
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a, align 2
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-V8_1M-LATENCY-LABEL: 'simple_mul_ext_lsr_loop'
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a, align 2
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8_1M-LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-T1-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop'
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-T1-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-V8M-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop'
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-V8_1M-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop'
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8_1M-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res
;
; CHECK-MVE-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop'
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-MVE-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
;
; CHECK-NEON-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop'
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %loop
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEON-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res
;
entry:
  %zero = icmp eq i32 %N, 0
  br i1 %zero, label %exit, label %preheader

preheader:
  br label %loop

loop:
  %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
  %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
  %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
  %load = load i16, i16* %addr.a
  %sext = sext i16 %load to i32
  %mul = mul i32 %sext, 7
  store i32 %mul, i32* %addr.b
  %iv.next = add nuw i32 %iv, 1
  %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
  %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
  %cmp = icmp ne i32 %iv.next, %N
  br i1 %cmp, label %loop, label %exit

exit:
  %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
  ret i32 %res
}