; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes='default<O2>' -enable-matrix -S %s | FileCheck %s target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64" target triple = "systemz" @ARR = internal global [100 x i32] zeroinitializer, align 4 ; This test uses 'sub' instructions for gep offsets to allow ; codegen (LSR) to create optimal asm. If 'sub' is canonicalized ; to 'xor', then the backend needs to be able to see through ; that transform to produce optimal asm. define dso_local zeroext i32 @foo(ptr noundef %a) #0 { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: tail call void @populate(ptr noundef nonnull @ARR) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: br label [[FOR_BODY4:%.*]] ; CHECK: for.body4: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY4]] ] ; CHECK-NEXT: [[SUM_11:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7:%.*]], [[FOR_BODY4]] ] ; CHECK-NEXT: [[IDX_NEG:%.*]] = sub nsw i64 0, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 4, !tbaa [[TBAA3:![0-9]+]] ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[SUM_11]] ; CHECK-NEXT: [[IDX_NEG_19:%.*]] = xor i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: [[ADD_PTR_110:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_19]] ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ADD_PTR_110]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[ADD_111:%.*]] = add i32 [[TMP1]], [[ADD]] ; CHECK-NEXT: [[IDX_NEG_216:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_217:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_216]] ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ADD_PTR_217]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[ADD_218:%.*]] = add i32 [[TMP2]], [[ADD_111]] ; CHECK-NEXT: [[IDX_NEG_3:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_3]] ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ADD_PTR_3]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[TMP3]], [[ADD_218]] ; CHECK-NEXT: [[IDX_NEG_4:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_4]] ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ADD_PTR_4]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[TMP4]], [[ADD_3]] ; CHECK-NEXT: [[IDX_NEG_5:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_5]] ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ADD_PTR_5]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[ADD_5:%.*]] = add i32 [[TMP5]], [[ADD_4]] ; CHECK-NEXT: [[IDX_NEG_6:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_6]] ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_6]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[ADD_6:%.*]] = add i32 [[TMP6]], [[ADD_5]] ; CHECK-NEXT: [[IDX_NEG_7:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_7]] ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ADD_PTR_7]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[ADD_7]] = add i32 [[TMP7]], [[ADD_6]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8 ; CHECK-NEXT: [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], 32 ; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label [[FOR_BODY4_1:%.*]], label [[FOR_BODY4]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.body4.1: ; CHECK-NEXT: [[INDVARS_IV_1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_1_7:%.*]], [[FOR_BODY4_1]] ], [ 0, [[FOR_BODY4]] ] ; CHECK-NEXT: [[SUM_11_1:%.*]] = phi i32 [ [[ADD_1_7:%.*]], [[FOR_BODY4_1]] ], [ [[ADD_7]], [[FOR_BODY4]] ] ; CHECK-NEXT: [[IDX_NEG_1:%.*]] = sub nsw i64 0, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1]] ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ADD_PTR_1]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_1:%.*]] = shl i32 [[TMP8]], 1 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_1]], [[SUM_11_1]] ; CHECK-NEXT: [[IDX_NEG_1_1:%.*]] = xor i64 [[INDVARS_IV_1]], -1 ; CHECK-NEXT: [[ADD_PTR_1_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_1]] ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ADD_PTR_1_1]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_1_1:%.*]] = shl i32 [[TMP9]], 1 ; CHECK-NEXT: [[ADD_1_1:%.*]] = add i32 [[MUL_1_1]], [[ADD_1]] ; CHECK-NEXT: [[IDX_NEG_1_2:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_2]] ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ADD_PTR_1_2]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_1_2:%.*]] = shl i32 [[TMP10]], 1 ; CHECK-NEXT: [[ADD_1_2:%.*]] = add i32 [[MUL_1_2]], [[ADD_1_1]] ; CHECK-NEXT: [[IDX_NEG_1_3:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_3]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ADD_PTR_1_3]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_1_3:%.*]] = shl i32 [[TMP11]], 1 ; CHECK-NEXT: [[ADD_1_3:%.*]] = add i32 [[MUL_1_3]], [[ADD_1_2]] ; CHECK-NEXT: [[IDX_NEG_1_4:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_4]] ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ADD_PTR_1_4]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_1_4:%.*]] = shl i32 [[TMP12]], 1 ; CHECK-NEXT: [[ADD_1_4:%.*]] = add i32 [[MUL_1_4]], [[ADD_1_3]] ; CHECK-NEXT: [[IDX_NEG_1_5:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_5]] ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ADD_PTR_1_5]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_1_5:%.*]] = shl i32 [[TMP13]], 1 ; CHECK-NEXT: [[ADD_1_5:%.*]] = add i32 [[MUL_1_5]], [[ADD_1_4]] ; CHECK-NEXT: [[IDX_NEG_1_6:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_6]] ; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ADD_PTR_1_6]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_1_6:%.*]] = shl i32 [[TMP14]], 1 ; CHECK-NEXT: [[ADD_1_6:%.*]] = add i32 [[MUL_1_6]], [[ADD_1_5]] ; CHECK-NEXT: [[IDX_NEG_1_7:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_1]] ; CHECK-NEXT: [[ADD_PTR_1_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_1_7]] ; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ADD_PTR_1_7]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_1_7:%.*]] = shl i32 [[TMP15]], 1 ; CHECK-NEXT: [[ADD_1_7]] = add i32 [[MUL_1_7]], [[ADD_1_6]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1_7]] = add nuw nsw i64 [[INDVARS_IV_1]], 8 ; CHECK-NEXT: [[EXITCOND_1_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1_7]], 32 ; CHECK-NEXT: br i1 [[EXITCOND_1_NOT_7]], label [[FOR_BODY4_2:%.*]], label [[FOR_BODY4_1]], !llvm.loop [[LOOP7]] ; CHECK: for.body4.2: ; CHECK-NEXT: [[INDVARS_IV_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_2_7:%.*]], [[FOR_BODY4_2]] ], [ 0, [[FOR_BODY4_1]] ] ; CHECK-NEXT: [[SUM_11_2:%.*]] = phi i32 [ [[ADD_2_7:%.*]], [[FOR_BODY4_2]] ], [ [[ADD_1_7]], [[FOR_BODY4_1]] ] ; CHECK-NEXT: [[IDX_NEG_2:%.*]] = sub nsw i64 0, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2]] ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ADD_PTR_2]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[TMP16]], 3 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[MUL_2]], [[SUM_11_2]] ; CHECK-NEXT: [[IDX_NEG_2_1:%.*]] = xor i64 [[INDVARS_IV_2]], -1 ; CHECK-NEXT: [[ADD_PTR_2_1:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_1]] ; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ADD_PTR_2_1]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_2_1:%.*]] = mul i32 [[TMP17]], 3 ; CHECK-NEXT: [[ADD_2_1:%.*]] = add i32 [[MUL_2_1]], [[ADD_2]] ; CHECK-NEXT: [[IDX_NEG_2_2:%.*]] = sub nuw nsw i64 -2, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_2:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_2]] ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ADD_PTR_2_2]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_2_2:%.*]] = mul i32 [[TMP18]], 3 ; CHECK-NEXT: [[ADD_2_2:%.*]] = add i32 [[MUL_2_2]], [[ADD_2_1]] ; CHECK-NEXT: [[IDX_NEG_2_3:%.*]] = sub nuw nsw i64 -3, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_3:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_3]] ; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ADD_PTR_2_3]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_2_3:%.*]] = mul i32 [[TMP19]], 3 ; CHECK-NEXT: [[ADD_2_3:%.*]] = add i32 [[MUL_2_3]], [[ADD_2_2]] ; CHECK-NEXT: [[IDX_NEG_2_4:%.*]] = sub nuw nsw i64 -4, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_4:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_4]] ; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ADD_PTR_2_4]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_2_4:%.*]] = mul i32 [[TMP20]], 3 ; CHECK-NEXT: [[ADD_2_4:%.*]] = add i32 [[MUL_2_4]], [[ADD_2_3]] ; CHECK-NEXT: [[IDX_NEG_2_5:%.*]] = sub nuw nsw i64 -5, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_5:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_5]] ; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ADD_PTR_2_5]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_2_5:%.*]] = mul i32 [[TMP21]], 3 ; CHECK-NEXT: [[ADD_2_5:%.*]] = add i32 [[MUL_2_5]], [[ADD_2_4]] ; CHECK-NEXT: [[IDX_NEG_2_6:%.*]] = sub nuw nsw i64 -6, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_6:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_6]] ; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ADD_PTR_2_6]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_2_6:%.*]] = mul i32 [[TMP22]], 3 ; CHECK-NEXT: [[ADD_2_6:%.*]] = add i32 [[MUL_2_6]], [[ADD_2_5]] ; CHECK-NEXT: [[IDX_NEG_2_7:%.*]] = sub nuw nsw i64 -7, [[INDVARS_IV_2]] ; CHECK-NEXT: [[ADD_PTR_2_7:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 [[IDX_NEG_2_7]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[ADD_PTR_2_7]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[MUL_2_7:%.*]] = mul i32 [[TMP23]], 3 ; CHECK-NEXT: [[ADD_2_7]] = add i32 [[MUL_2_7]], [[ADD_2_6]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2_7]] = add nuw nsw i64 [[INDVARS_IV_2]], 8 ; CHECK-NEXT: [[EXITCOND_2_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_2_7]], 32 ; CHECK-NEXT: br i1 [[EXITCOND_2_NOT_7]], label [[FOR_INC5_2:%.*]], label [[FOR_BODY4_2]], !llvm.loop [[LOOP7]] ; CHECK: for.inc5.2: ; CHECK-NEXT: ret i32 [[ADD_2_7]] ; entry: call void @populate(ptr noundef @ARR) br label %for.cond for.cond: ; preds = %for.inc5, %entry %j.0 = phi i32 [ 1, %entry ], [ %inc6, %for.inc5 ] %sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.inc5 ] %cmp = icmp slt i32 %j.0, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup for.cond.cleanup: ; preds = %for.cond br label %for.end7 for.body: ; preds = %for.cond br label %for.cond1 for.cond1: ; preds = %for.inc, %for.body %sum.1 = phi i32 [ %sum.0, %for.body ], [ %add, %for.inc ] %i.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] %cmp2 = icmp slt i32 %i.0, 32 br i1 %cmp2, label %for.body4, label %for.cond.cleanup3 for.cond.cleanup3: ; preds = %for.cond1 br label %for.end for.body4: ; preds = %for.cond1 %idx.ext = sext i32 %i.0 to i64 %idx.neg = sub i64 0, %idx.ext %add.ptr = getelementptr inbounds i32, ptr getelementptr inbounds ([100 x i32], ptr @ARR, i64 0, i64 99), i64 %idx.neg %0 = load i32, ptr %add.ptr, align 4, !tbaa !3 %mul = mul i32 %j.0, %0 %add = add i32 %sum.1, %mul br label %for.inc for.inc: ; preds = %for.body4 %inc = add nsw i32 %i.0, 1 br label %for.cond1, !llvm.loop !7 for.end: ; preds = %for.cond.cleanup3 br label %for.inc5 for.inc5: ; preds = %for.end %inc6 = add nsw i32 %j.0, 1 br label %for.cond, !llvm.loop !9 for.end7: ; preds = %for.cond.cleanup ret i32 %sum.0 } declare dso_local void @populate(ptr noundef) #1 attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="z10" } attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="z10" } attributes #2 = { argmemonly nocallback nofree nosync nounwind willreturn } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 7, !"frame-pointer", i32 2} !2 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git 0bfef0669075f229fd325d8c8521c9adfb453f83)"} !3 = !{!4, !4, i64 0} !4 = !{!"int", !5, i64 0} !5 = !{!"omnipotent char", !6, i64 0} !6 = !{!"Simple C/C++ TBAA"} !7 = distinct !{!7, !8} !8 = !{!"llvm.loop.mustprogress"} !9 = distinct !{!9, !8}