Compiler projects using llvm
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple aarch64 -mattr=+sve < %s | FileCheck %s
; RUN: llc -mtriple aarch64 -mattr=+sme < %s | FileCheck %s

; All these tests create a vector tuple, insert z5 into one of the elements,
; and finally extracts that element from the wide vector to return it.  These
; checks ensure that z5 is always the value that is returned.

;
; Insert into two element tuples
;

; tuple:      { tuple2.res0, tuple2.res1 }
; insert z5:  {     z5     , tuple2.res1 }
; extract z5:       ^^
define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
; CHECK-LABEL: set_tuple2_nxv8i32_elt0:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z0.d, z5.d
; CHECK-NEXT:    ret
  %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
  %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
  ret <vscale x 4 x i32> %ext
}

; tuple:       { tuple2.res0, tuple2.res1 }
; insert z5:   { tuple2.res0,     z5      }
; extract z5:                     ^^
define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
; CHECK-LABEL: set_tuple2_nxv8i32_elt1:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z0.d, z5.d
; CHECK-NEXT:    ret
  %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
  %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 1)
  ret <vscale x 4 x i32> %ext
}

; This test checks the elements _not_ being set aren't changed.

; tuple:       { tuple2.res0, tuple2.res1 }
; insert z5:   { tuple2.res0,     z5      }
; extract z0:         ^^
define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1_ret_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
; CHECK-LABEL: set_tuple2_nxv8i32_elt1_ret_elt0:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ret
  %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
  %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
  ret <vscale x 4 x i32> %ext
}

; Test extract of tuple passed into function
define <vscale x 4 x i32> @get_tuple2_nxv8i32_elt1(<vscale x 8 x i32> %tuple) {
; CHECK-LABEL: get_tuple2_nxv8i32_elt1:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z0.d, z1.d
; CHECK-NEXT:    ret
  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %tuple, i32 1)
  ret <vscale x 4 x i32> %ext
}

;
; Insert into three element tuples
;

; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
; insert z5:   {     z5     , tuple3.res0, tuple3.res2 }
; extract z5:        ^^
define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
; CHECK-LABEL: set_tuple3_nxv12i32_elt0:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z0.d, z5.d
; CHECK-NEXT:    ret
  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 0)
  ret <vscale x 4 x i32> %ext
}

; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
; insert z5:   { tuple3.res0,     z5     , tuple3.res2 }
; extract z5:                     ^^
define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
; CHECK-LABEL: set_tuple3_nxv12i32_elt1:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z0.d, z5.d
; CHECK-NEXT:    ret
  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 1)
  ret <vscale x 4 x i32> %ext
}

; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
; insert z5:   { tuple3.res0, tuple3.res1,     z5      }
; extract z5:                                  ^^
define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
; CHECK-LABEL: set_tuple3_nxv12i32_elt2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z0.d, z5.d
; CHECK-NEXT:    ret
  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
  ret <vscale x 4 x i32> %ext
}

; This test checks the elements _not_ being set aren't changed.

; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
; insert z5:   { tuple3.res0,     z5     , tuple3.res2 }
; extract z2:                                  ^^
define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
; CHECK-LABEL: set_tuple3_nxv12i32_elt1_ret_elt2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z0.d, z2.d
; CHECK-NEXT:    ret
  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
  ret <vscale x 4 x i32> %ext
}

; Test extract of tuple passed into function
define <vscale x 4 x i32> @get_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 12 x i32> %tuple) {
; CHECK-LABEL: get_tuple3_nxv12i32_elt2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z0.d, z3.d
; CHECK-NEXT:    ret
  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %tuple, i32 2)
  ret <vscale x 4 x i32> %ext
}

;
; Insert into four element tuples
;

; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
; insert z5:   {     z5     , tuple4.res1, tuple4.res2, tuple4.res3 }
; extract z5:        ^^
define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
; CHECK-LABEL: set_tuple4_nxv16i32_elt0:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z0.d, z5.d
; CHECK-NEXT:    ret
  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 0)
  ret <vscale x 4 x i32> %ext
}

; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
; insert z5:   { tuple4.res0,     z5     , tuple4.res2, tuple4.res3 }
; extract z5:                     ^^
define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
; CHECK-LABEL: set_tuple4_nxv16i32_elt1:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z0.d, z5.d
; CHECK-NEXT:    ret
  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 1)
  ret <vscale x 4 x i32> %ext
}

; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
; insert z5:   { tuple4.res0, tuple4.res1,     z5     , tuple4.res3 }
; extract z5:                                  ^^
define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
; CHECK-LABEL: set_tuple4_nxv16i32_elt2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z0.d, z5.d
; CHECK-NEXT:    ret
  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
  ret <vscale x 4 x i32> %ext
}

; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
; insert z5:   { tuple4.res0, tuple4.res1, tuple4.res2,     z5      }
; extract z5:                                               ^^
define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
; CHECK-LABEL: set_tuple4_nxv16i32_elt3:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z0.d, z5.d
; CHECK-NEXT:    ret
  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 3)
  ret <vscale x 4 x i32> %ext
}

; This test checks the elements _not_ being set aren't changed.

; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
; insert z5:   { tuple4.res0, tuple4.res1, tuple4.res2,     z5      }
; extract z2:                                               ^^
define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
; CHECK-LABEL: set_tuple4_nxv16i32_elt3_ret_elt2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z0.d, z2.d
; CHECK-NEXT:    ret
  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
  ret <vscale x 4 x i32> %ext
}

; Test extract of tuple passed into function
define <vscale x 4 x i32> @get_tuple4_nxv16i32_elt3(<vscale x 16 x i32> %tuple) {
; CHECK-LABEL: get_tuple4_nxv16i32_elt3:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z0.d, z3.d
; CHECK-NEXT:    ret
  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %tuple, i32 3)
  ret <vscale x 4 x i32> %ext
}

declare <vscale x 8 x i32>  @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32>, i32, <vscale x 4 x i32>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32>, i32)

declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32>, i32, <vscale x 4 x i32>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32>, i32)

declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32>, i32, <vscale x 4 x i32>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32>, i32)