Compiler projects using llvm
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \
; RUN:   | FileCheck %s -check-prefix=RV64IV

declare <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.nxv64i8(
  <vscale x 64 x i8>,
  <vscale x 64 x i8>,
  <vscale x 64 x i8>,
  i64,
  i64);

define <vscale x 64 x i8> @callee(<vscale x 64 x i8> %arg0, <vscale x 64 x i8> %arg1, <vscale x 64 x i8> %arg2) {
; RV64IV-LABEL: callee:
; RV64IV:       # %bb.0:
; RV64IV-NEXT:    vl8r.v v24, (a0)
; RV64IV-NEXT:    li a0, 1024
; RV64IV-NEXT:    vsetvli zero, a0, e8, m8, tu, mu
; RV64IV-NEXT:    vmacc.vv v8, v16, v24
; RV64IV-NEXT:    ret
  %ret = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.nxv64i8(
                                  <vscale x 64 x i8> %arg0,
                                  <vscale x 64 x i8> %arg1,
                                  <vscale x 64 x i8> %arg2, i64 1024, i64 0)
  ret <vscale x 64 x i8> %ret
}

define <vscale x 64 x i8> @caller() {
; RV64IV-LABEL: caller:
; RV64IV:       # %bb.0:
; RV64IV-NEXT:    addi sp, sp, -80
; RV64IV-NEXT:    .cfi_def_cfa_offset 80
; RV64IV-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
; RV64IV-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
; RV64IV-NEXT:    .cfi_offset ra, -8
; RV64IV-NEXT:    .cfi_offset s0, -16
; RV64IV-NEXT:    addi s0, sp, 80
; RV64IV-NEXT:    .cfi_def_cfa s0, 0
; RV64IV-NEXT:    csrr a0, vlenb
; RV64IV-NEXT:    slli a0, a0, 5
; RV64IV-NEXT:    sub sp, sp, a0
; RV64IV-NEXT:    andi sp, sp, -64
; RV64IV-NEXT:    csrr a0, vlenb
; RV64IV-NEXT:    li a1, 24
; RV64IV-NEXT:    mul a0, a0, a1
; RV64IV-NEXT:    add a0, sp, a0
; RV64IV-NEXT:    addi a0, a0, 64
; RV64IV-NEXT:    vl8r.v v8, (a0)
; RV64IV-NEXT:    csrr a0, vlenb
; RV64IV-NEXT:    slli a0, a0, 4
; RV64IV-NEXT:    add a0, sp, a0
; RV64IV-NEXT:    addi a0, a0, 64
; RV64IV-NEXT:    vl8r.v v16, (a0)
; RV64IV-NEXT:    csrr a0, vlenb
; RV64IV-NEXT:    slli a0, a0, 3
; RV64IV-NEXT:    add a0, sp, a0
; RV64IV-NEXT:    addi a0, a0, 64
; RV64IV-NEXT:    vl8r.v v24, (a0)
; RV64IV-NEXT:    addi a1, sp, 64
; RV64IV-NEXT:    addi a0, sp, 64
; RV64IV-NEXT:    vs8r.v v24, (a1)
; RV64IV-NEXT:    call callee@plt
; RV64IV-NEXT:    addi sp, s0, -80
; RV64IV-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
; RV64IV-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
; RV64IV-NEXT:    addi sp, sp, 80
; RV64IV-NEXT:    ret
  %local0 = alloca <vscale x 64 x i8>
  %local1 = alloca <vscale x 64 x i8>
  %local2 = alloca <vscale x 64 x i8>
  %arg0 = load volatile <vscale x 64 x i8>, <vscale x 64 x i8>* %local0
  %arg1 = load volatile <vscale x 64 x i8>, <vscale x 64 x i8>* %local1
  %arg2 = load volatile <vscale x 64 x i8>, <vscale x 64 x i8>* %local2
  %ret = call <vscale x 64 x i8> @callee(<vscale x 64 x i8> %arg0,
                                         <vscale x 64 x i8> %arg1,
                                         <vscale x 64 x i8> %arg2)
  ret <vscale x 64 x i8> %ret
}