; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse3 | FileCheck %s -check-prefixes=SSE3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse3 | FileCheck %s -check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse3 | FileCheck %s -check-prefixes=AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse3 | FileCheck %s -check-prefixes=AVX512 ; This test checks that the cost of a splat-load shuffle is correctly detected. ; If there is a combined load+broadcast instruction, like `movddup` it should ; return 0. ; ; TODO: AVX `vbroadcast*` seems to support more types than the ; 2xdouble type of `movddup`: ; - `vbroadcastss` supports 4xfloat, 8xfloat ; - `vbroadcastsd` supports 4xdouble ; NOTE: The code in this test is a hack. Since TTI cannot currently detect a ; proper broadcast pattern from a scalar load (like the one that follows), ; we use a vector load as the shuffle's operand to trigger the pattern. ; ; %load = load double, double *%ptr ; %insert = insertelement <2 x double> poison, double %load, i32 0 ; %bcast = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer define void @shuffle_load() { ; SSE-LABEL: 'shuffle_load' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE2-LABEL: 'shuffle_load' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE3-LABEL: 'shuffle_load' ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 ; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer ; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'shuffle_load' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'shuffle_load' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'shuffle_load' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %ld_2xi8 = load <2 x i8>, ptr undef %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer %ld_4xi8 = load <4 x i8>, ptr undef %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer %ld_8xi8 = load <8 x i8>, ptr undef %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer %ld_16xi8 = load <16 x i8>, ptr undef %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer %ld_32xi8 = load <32 x i8>, ptr undef %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer %ld_64xi8 = load <64 x i8>, ptr undef %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer %ld_2xi16 = load <2 x i16>, ptr undef %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer %ld_4xi16 = load <4 x i16>, ptr undef %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer %ld_8xi16 = load <8 x i16>, ptr undef %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer %ld_16xi16 = load <16 x i16>, ptr undef %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer %ld_32xi16 = load <32 x i16>, ptr undef %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer %ld_2xi32 = load <2 x i32>, ptr undef %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer %ld_4xi32 = load <4 x i32>, ptr undef %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer %ld_8xi32 = load <8 x i32>, ptr undef %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer %ld_16xi32 = load <16 x i32>, ptr undef %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer %ld_2xi64 = load <2 x i64>, ptr undef %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer %ld_4xi64 = load <4 x i64>, ptr undef %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer %ld_8xi64 = load <8 x i64>, ptr undef %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer %ld_2xf16 = load <2 x half>, ptr undef %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer %ld_4xf16 = load <4 x half>, ptr undef %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer %ld_8xf16 = load <8 x half>, ptr undef %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer %ld_16xf16 = load <16 x half>, ptr undef %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer %ld_32xf16 = load <32 x half>, ptr undef %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer %ld_2xf32 = load <2 x float>, ptr undef %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer %ld_4xf32 = load <4 x float>, ptr undef %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer %ld_8xf32 = load <8 x float>, ptr undef %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer %ld_16xf32 = load <16 x float>, ptr undef %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer %ld_2xf64 = load <2 x double>, ptr undef %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer %ld_4xf64 = load <4 x double>, ptr undef %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer %ld_8xf64 = load <8 x double>, ptr undef %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer ret void }