Compiler projects using llvm
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake-avx512 | FileCheck %s

define void @test(i32* noalias %p, i32* noalias %addr, i32* noalias %s) {
; CHECK-LABEL: @test(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x i32*> poison, i32* [[ADDR:%.*]], i32 0
; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x i32*> [[TMP0]], <8 x i32*> poison, <8 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE1]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i32 0
; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP2]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32*> poison, i32* [[P:%.*]], i32 0
; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <8 x i32*> [[TMP4]], <8 x i32*> poison, <8 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE2]], <8 x i32> [[TMP3]]
; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP5]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP1]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, <8 x i32*> [[SHUFFLE2]], <8 x i32> [[TMP7]]
; CHECK-NEXT:    [[TMP9:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> [[TMP8]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <8 x i32> [[TMP9]], [[TMP6]]
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX2]] to <8 x i32>*
; CHECK-NEXT:    store <8 x i32> [[TMP10]], <8 x i32>* [[TMP11]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %idx1 = load i32, i32 *%addr, align 8
  %arrayidx = getelementptr inbounds i32, i32* %p, i32 %idx1
  %i = load i32, i32* %arrayidx, align 4
  %gep2 = getelementptr inbounds i32, i32* %addr, i32 1
  %idx2 = load i32, i32 *%gep2, align 8
  %arrayidx1 = getelementptr inbounds i32, i32* %p, i32 %idx2
  %i1 = load i32, i32* %arrayidx1, align 4
  %add = add nsw i32 %i1, %i
  %arrayidx2 = getelementptr inbounds i32, i32* %s, i32 0
  store i32 %add, i32* %arrayidx2, align 4
  %gep3 = getelementptr inbounds i32, i32* %addr, i32 2
  %idx3 = load i32, i32 *%gep3, align 8
  %arrayidx4 = getelementptr inbounds i32, i32* %p, i32 %idx3
  %i2 = load i32, i32* %arrayidx4, align 4
  %gep4 = getelementptr inbounds i32, i32* %addr, i32 3
  %idx4 = load i32, i32 *%gep4, align 8
  %arrayidx6 = getelementptr inbounds i32, i32* %p, i32 %idx4
  %i3 = load i32, i32* %arrayidx6, align 4
  %add7 = add nsw i32 %i3, %i2
  %arrayidx9 = getelementptr inbounds i32, i32* %s, i32 1
  store i32 %add7, i32* %arrayidx9, align 4
  %gep5 = getelementptr inbounds i32, i32* %addr, i32 4
  %idx5 = load i32, i32 *%gep5, align 8
  %arrayidx11 = getelementptr inbounds i32, i32* %p, i32 %idx5
  %i4 = load i32, i32* %arrayidx11, align 4
  %gep6 = getelementptr inbounds i32, i32* %addr, i32 5
  %idx6 = load i32, i32 *%gep6, align 8
  %arrayidx13 = getelementptr inbounds i32, i32* %p, i32 %idx6
  %i5 = load i32, i32* %arrayidx13, align 4
  %add14 = add nsw i32 %i5, %i4
  %arrayidx16 = getelementptr inbounds i32, i32* %s, i32 2
  store i32 %add14, i32* %arrayidx16, align 4
  %gep7 = getelementptr inbounds i32, i32* %addr, i32 6
  %idx7 = load i32, i32 *%gep7, align 8
  %arrayidx18 = getelementptr inbounds i32, i32* %p, i32 %idx7
  %i6 = load i32, i32* %arrayidx18, align 4
  %gep8 = getelementptr inbounds i32, i32* %addr, i32 7
  %idx8 = load i32, i32 *%gep8, align 8
  %arrayidx20 = getelementptr inbounds i32, i32* %p, i32 %idx8
  %i7 = load i32, i32* %arrayidx20, align 4
  %add21 = add nsw i32 %i7, %i6
  %arrayidx23 = getelementptr inbounds i32, i32* %s, i32 3
  store i32 %add21, i32* %arrayidx23, align 4
  %gep9 = getelementptr inbounds i32, i32* %addr, i32 8
  %idx9 = load i32, i32 *%gep9, align 8
  %arrayidx25 = getelementptr inbounds i32, i32* %p, i32 %idx9
  %i8 = load i32, i32* %arrayidx25, align 4
  %gep10 = getelementptr inbounds i32, i32* %addr, i32 9
  %idx10 = load i32, i32 *%gep10, align 8
  %arrayidx27 = getelementptr inbounds i32, i32* %p, i32 %idx10
  %i9 = load i32, i32* %arrayidx27, align 4
  %add28 = add nsw i32 %i9, %i8
  %arrayidx30 = getelementptr inbounds i32, i32* %s, i32 4
  store i32 %add28, i32* %arrayidx30, align 4
  %gep11 = getelementptr inbounds i32, i32* %addr, i32 10
  %idx11 = load i32, i32 *%gep11, align 8
  %arrayidx32 = getelementptr inbounds i32, i32* %p, i32 %idx11
  %i10 = load i32, i32* %arrayidx32, align 4
  %gep12 = getelementptr inbounds i32, i32* %addr, i32 11
  %idx12 = load i32, i32 *%gep12, align 8
  %arrayidx34 = getelementptr inbounds i32, i32* %p, i32 %idx12
  %i11 = load i32, i32* %arrayidx34, align 4
  %add35 = add nsw i32 %i11, %i10
  %arrayidx37 = getelementptr inbounds i32, i32* %s, i32 5
  store i32 %add35, i32* %arrayidx37, align 4
  %gep13 = getelementptr inbounds i32, i32* %addr, i32 12
  %idx13 = load i32, i32 *%gep13, align 8
  %arrayidx39 = getelementptr inbounds i32, i32* %p, i32 %idx13
  %i12 = load i32, i32* %arrayidx39, align 4
  %gep14 = getelementptr inbounds i32, i32* %addr, i32 13
  %idx14 = load i32, i32 *%gep14, align 8
  %arrayidx41 = getelementptr inbounds i32, i32* %p, i32 %idx14
  %i13 = load i32, i32* %arrayidx41, align 4
  %add42 = add nsw i32 %i13, %i12
  %arrayidx44 = getelementptr inbounds i32, i32* %s, i32 6
  store i32 %add42, i32* %arrayidx44, align 4
  %gep15 = getelementptr inbounds i32, i32* %addr, i32 14
  %idx15 = load i32, i32 *%gep15, align 8
  %arrayidx46 = getelementptr inbounds i32, i32* %p, i32 %idx15
  %i14 = load i32, i32* %arrayidx46, align 4
  %gep16 = getelementptr inbounds i32, i32* %addr, i32 15
  %idx16 = load i32, i32 *%gep16, align 8
  %arrayidx48 = getelementptr inbounds i32, i32* %p, i32 %idx16
  %i15 = load i32, i32* %arrayidx48, align 4
  %add49 = add nsw i32 %i15, %i14
  %arrayidx51 = getelementptr inbounds i32, i32* %s, i32 7
  store i32 %add49, i32* %arrayidx51, align 4
  ret void
}