; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s ; ; Unpredicated dup instruction (which is an alias for mov): ; * register + register, ; * register + immediate ; define <vscale x 16 x i8> @dup_i8(i8 %b) { ; CHECK-LABEL: dup_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.b, w0 ; CHECK-NEXT: ret %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %b) ret <vscale x 16 x i8> %out } define <vscale x 16 x i8> @dup_imm_i8() { ; CHECK-LABEL: dup_imm_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.b, #16 // =0x10 ; CHECK-NEXT: ret %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 16) ret <vscale x 16 x i8> %out } define <vscale x 8 x i16> @dup_i16(i16 %b) { ; CHECK-LABEL: dup_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.h, w0 ; CHECK-NEXT: ret %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 %b) ret <vscale x 8 x i16> %out } define <vscale x 8 x i16> @dup_imm_i16(i16 %b) { ; CHECK-LABEL: dup_imm_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.h, #16 // =0x10 ; CHECK-NEXT: ret %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 16) ret <vscale x 8 x i16> %out } define <vscale x 4 x i32> @dup_i32(i32 %b) { ; CHECK-LABEL: dup_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.s, w0 ; CHECK-NEXT: ret %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %b) ret <vscale x 4 x i32> %out } define <vscale x 4 x i32> @dup_imm_i32(i32 %b) { ; CHECK-LABEL: dup_imm_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.s, #16 // =0x10 ; CHECK-NEXT: ret %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 16) ret <vscale x 4 x i32> %out } define <vscale x 2 x i64> @dup_i64(i64 %b) { ; CHECK-LABEL: dup_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.d, x0 ; CHECK-NEXT: ret %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 %b) ret <vscale x 2 x i64> %out } define <vscale x 2 x i64> @dup_imm_i64(i64 %b) { ; CHECK-LABEL: dup_imm_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.d, #16 // =0x10 ; CHECK-NEXT: ret %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 16) ret <vscale x 2 x i64> %out } define <vscale x 8 x half> @dup_f16(half %b) { ; CHECK-LABEL: dup_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: ret %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half %b) ret <vscale x 8 x half> %out } define <vscale x 8 x bfloat> @dup_bf16(bfloat %b) #0 { ; CHECK-LABEL: dup_bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: ret %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %b) ret <vscale x 8 x bfloat> %out } define <vscale x 8 x half> @dup_imm_f16(half %b) { ; CHECK-LABEL: dup_imm_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov z0.h, #16.00000000 ; CHECK-NEXT: ret %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 16.) ret <vscale x 8 x half> %out } define <vscale x 4 x float> @dup_f32(float %b) { ; CHECK-LABEL: dup_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: ret %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float %b) ret <vscale x 4 x float> %out } define <vscale x 4 x float> @dup_imm_f32(float %b) { ; CHECK-LABEL: dup_imm_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov z0.s, #16.00000000 ; CHECK-NEXT: ret %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 16.) ret <vscale x 4 x float> %out } define <vscale x 2 x double> @dup_f64(double %b) { ; CHECK-LABEL: dup_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z0.d, d0 ; CHECK-NEXT: ret %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double %b) ret <vscale x 2 x double> %out } define <vscale x 2 x double> @dup_imm_f64(double %b) { ; CHECK-LABEL: dup_imm_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov z0.d, #16.00000000 ; CHECK-NEXT: ret %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 16.) ret <vscale x 2 x double> %out } define <vscale x 2 x float> @dup_fmov_imm_f32_2() { ; CHECK-LABEL: dup_fmov_imm_f32_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1109917696 ; CHECK-NEXT: mov z0.s, w8 ; CHECK-NEXT: ret %out = tail call <vscale x 2 x float> @llvm.aarch64.sve.dup.x.nxv2f32(float 4.200000e+01) ret <vscale x 2 x float> %out } define <vscale x 4 x float> @dup_fmov_imm_f32_4() { ; CHECK-LABEL: dup_fmov_imm_f32_4: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1109917696 ; CHECK-NEXT: mov z0.s, w8 ; CHECK-NEXT: ret %out = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 4.200000e+01) ret <vscale x 4 x float> %out } define <vscale x 2 x double> @dup_fmov_imm_f64_2() { ; CHECK-LABEL: dup_fmov_imm_f64_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #4631107791820423168 ; CHECK-NEXT: mov z0.d, x8 ; CHECK-NEXT: ret %out = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 4.200000e+01) ret <vscale x 2 x double> %out } declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8( i8) declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16) declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32) declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64) declare <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half) declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat) declare <vscale x 2 x float> @llvm.aarch64.sve.dup.x.nxv2f32(float) declare <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float) declare <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double) ; +bf16 is required for the bfloat version. attributes #0 = { "target-features"="+sve,+bf16" }