; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=vector-combine -mtriple=arm64-apple-darwinos -S %s | FileCheck --check-prefixes=CHECK,LIMIT-DEFAULT %s ; RUN: opt -passes=vector-combine -mtriple=arm64-apple-darwinos -vector-combine-max-scan-instrs=2 -S %s | FileCheck --check-prefixes=CHECK,LIMIT2 %s define i32 @load_extract_idx_0(<4 x i32>* %x) { ; CHECK-LABEL: @load_extract_idx_0( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 3 ; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4 ; CHECK-NEXT: ret i32 [[R]] ; %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i32 3 ret i32 %r } ; If the original load had a smaller alignment than the scalar type, the ; smaller alignment should be used. define i32 @load_extract_idx_0_small_alignment(<4 x i32>* %x) { ; CHECK-LABEL: @load_extract_idx_0_small_alignment( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 3 ; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 2 ; CHECK-NEXT: ret i32 [[R]] ; %lv = load <4 x i32>, <4 x i32>* %x, align 2 %r = extractelement <4 x i32> %lv, i32 3 ret i32 %r } define i32 @load_extract_idx_1(<4 x i32>* %x) { ; CHECK-LABEL: @load_extract_idx_1( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 1 ; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4 ; CHECK-NEXT: ret i32 [[R]] ; %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i32 1 ret i32 %r } define i32 @load_extract_idx_2(<4 x i32>* %x) { ; CHECK-LABEL: @load_extract_idx_2( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 2 ; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 8 ; CHECK-NEXT: ret i32 [[R]] ; %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i32 2 ret i32 %r } define i32 @load_extract_idx_3(<4 x i32>* %x) { ; CHECK-LABEL: @load_extract_idx_3( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 3 ; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4 ; CHECK-NEXT: ret i32 [[R]] ; %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i32 3 ret i32 %r } ; Out-of-bounds index for extractelement, should not be converted to narrow ; load, because it would introduce a dereference of a poison pointer. define i32 @load_extract_idx_4(<4 x i32>* %x) { ; CHECK-LABEL: @load_extract_idx_4( ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 4 ; CHECK-NEXT: ret i32 [[R]] ; %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i32 4 ret i32 %r } define i32 @load_extract_idx_var_i64(<4 x i32>* %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64( ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX:%.*]] ; CHECK-NEXT: ret i32 [[R]] ; %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i64 %idx ret i32 %r } declare void @maythrow() readnone define i32 @load_extract_idx_var_i64_known_valid_by_assume(<4 x i32>* %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_assume( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: call void @maythrow() ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[IDX]] ; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP0]], align 4 ; CHECK-NEXT: ret i32 [[R]] ; entry: %cmp = icmp ult i64 %idx, 4 call void @llvm.assume(i1 %cmp) %lv = load <4 x i32>, <4 x i32>* %x call void @maythrow() %r = extractelement <4 x i32> %lv, i64 %idx ret i32 %r } declare i1 @cond() define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_dominating_block(<4 x i32>* %x, i64 %idx, i1 %c.1) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_assume_in_dominating_block( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]] ; CHECK: loop: ; CHECK-NEXT: call void @maythrow() ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[IDX]] ; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP0]], align 4 ; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C_2]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[R]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i32 [[P]] ; entry: %cmp = icmp ult i64 %idx, 4 call void @llvm.assume(i1 %cmp) br i1 %c.1, label %loop, label %exit loop: %lv = load <4 x i32>, <4 x i32>* %x call void @maythrow() %r = extractelement <4 x i32> %lv, i64 %idx %c.2 = call i1 @cond() br i1 %c.2, label %loop, label %exit exit: %p = phi i32 [ %r, %loop ], [ 0, %entry ] ret i32 %p } define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_non_dominating_block(<4 x i32>* %x, i64 %idx, i1 %c.1, i1 %c.2) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_assume_in_non_dominating_block( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[ASSUME_CHECK:%.*]], label [[LOOP:%.*]] ; CHECK: assume_check: ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: br i1 [[C_2:%.*]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: call void @maythrow() ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]] ; CHECK-NEXT: [[C_3:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C_3]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[R]], [[LOOP]] ], [ 0, [[ASSUME_CHECK]] ] ; CHECK-NEXT: ret i32 0 ; entry: br i1 %c.1, label %assume_check, label %loop assume_check: %cmp = icmp ult i64 %idx, 4 call void @llvm.assume(i1 %cmp) br i1 %c.2, label %loop, label %exit loop: %lv = load <4 x i32>, <4 x i32>* %x call void @maythrow() %r = extractelement <4 x i32> %lv, i64 %idx %c.3 = call i1 @cond() br i1 %c.3, label %loop, label %exit exit: %p = phi i32 [ %r, %loop ], [ 0, %assume_check ] ret i32 0 } define i32 @load_extract_idx_var_i64_not_known_valid_by_assume_after_load(<4 x i32>* %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_assume_after_load( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: call void @maythrow() ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]] ; CHECK-NEXT: ret i32 [[R]] ; entry: %cmp = icmp ult i64 %idx, 4 %lv = load <4 x i32>, <4 x i32>* %x call void @maythrow() call void @llvm.assume(i1 %cmp) %r = extractelement <4 x i32> %lv, i64 %idx ret i32 %r } define i32 @load_extract_idx_var_i64_not_known_valid_by_assume(<4 x i32>* %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_assume( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 5 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]] ; CHECK-NEXT: ret i32 [[R]] ; entry: %cmp = icmp ult i64 %idx, 5 call void @llvm.assume(i1 %cmp) %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i64 %idx ret i32 %r } declare void @llvm.assume(i1) define i32 @load_extract_idx_var_i64_known_valid_by_and(<4 x i32>* %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_and( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 3 ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_CLAMPED]] ; CHECK-NEXT: ret i32 [[R]] ; entry: %idx.clamped = and i64 %idx, 3 %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i64 %idx.clamped ret i32 %r } define i32 @load_extract_idx_var_i64_known_valid_by_and_noundef(<4 x i32>* %x, i64 noundef %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_and_noundef( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] ; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP0]], align 4 ; CHECK-NEXT: ret i32 [[R]] ; entry: %idx.clamped = and i64 %idx, 3 %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i64 %idx.clamped ret i32 %r } define i32 @load_extract_idx_var_i64_not_known_valid_by_and(<4 x i32>* %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_and( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 4 ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_CLAMPED]] ; CHECK-NEXT: ret i32 [[R]] ; entry: %idx.clamped = and i64 %idx, 4 %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i64 %idx.clamped ret i32 %r } define i32 @load_extract_idx_var_i64_known_valid_by_urem(<4 x i32>* %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 4 ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_CLAMPED]] ; CHECK-NEXT: ret i32 [[R]] ; entry: %idx.clamped = urem i64 %idx, 4 %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i64 %idx.clamped ret i32 %r } define i32 @load_extract_idx_var_i64_known_valid_by_urem_noundef(<4 x i32>* %x, i64 noundef %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem_noundef( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 4 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] ; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP0]], align 4 ; CHECK-NEXT: ret i32 [[R]] ; entry: %idx.clamped = urem i64 %idx, 4 %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i64 %idx.clamped ret i32 %r } define i32 @load_extract_idx_var_i64_not_known_valid_by_urem(<4 x i32>* %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_urem( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 5 ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_CLAMPED]] ; CHECK-NEXT: ret i32 [[R]] ; entry: %idx.clamped = urem i64 %idx, 5 %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i64 %idx.clamped ret i32 %r } define i32 @load_extract_idx_var_i32(<4 x i32>* %x, i32 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i32( ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 [[IDX:%.*]] ; CHECK-NEXT: ret i32 [[R]] ; %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i32 %idx ret i32 %r } declare void @clobber() define i32 @load_extract_clobber_call_before(<4 x i32>* %x) { ; CHECK-LABEL: @load_extract_clobber_call_before( ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 2 ; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 8 ; CHECK-NEXT: ret i32 [[R]] ; call void @clobber() %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i32 2 ret i32 %r } define i32 @load_extract_clobber_call_between(<4 x i32>* %x) { ; CHECK-LABEL: @load_extract_clobber_call_between( ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 ; CHECK-NEXT: ret i32 [[R]] ; %lv = load <4 x i32>, <4 x i32>* %x call void @clobber() %r = extractelement <4 x i32> %lv, i32 2 ret i32 %r } define i32 @load_extract_clobber_call_after(<4 x i32>* %x) { ; CHECK-LABEL: @load_extract_clobber_call_after( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 2 ; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 8 ; CHECK-NEXT: call void @clobber() ; CHECK-NEXT: ret i32 [[R]] ; %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i32 2 call void @clobber() ret i32 %r } define i32 @load_extract_clobber_store_before(<4 x i32>* %x, i8* %y) { ; CHECK-LABEL: @load_extract_clobber_store_before( ; CHECK-NEXT: store i8 0, i8* [[Y:%.*]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 2 ; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 8 ; CHECK-NEXT: ret i32 [[R]] ; store i8 0, i8* %y %lv = load <4 x i32>, <4 x i32>* %x %r = extractelement <4 x i32> %lv, i32 2 ret i32 %r } define i32 @load_extract_clobber_store_between(<4 x i32>* %x, i8* %y) { ; CHECK-LABEL: @load_extract_clobber_store_between( ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: store i8 0, i8* [[Y:%.*]], align 1 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 ; CHECK-NEXT: ret i32 [[R]] ; %lv = load <4 x i32>, <4 x i32>* %x store i8 0, i8* %y %r = extractelement <4 x i32> %lv, i32 2 ret i32 %r } define i32 @load_extract_clobber_store_between_limit(<4 x i32>* %x, i8* %y, <8 x i32> %z) { ; CHECK-LABEL: @load_extract_clobber_store_between_limit( ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0 ; CHECK-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1 ; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]] ; CHECK-NEXT: [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]] ; CHECK-NEXT: [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]] ; CHECK-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4 ; CHECK-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]] ; CHECK-NEXT: store i8 0, i8* [[Y:%.*]], align 1 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 ; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]] ; CHECK-NEXT: ret i32 [[ADD_4]] ; %lv = load <4 x i32>, <4 x i32>* %x %z.0 = extractelement <8 x i32> %z, i32 0 %z.1 = extractelement <8 x i32> %z, i32 1 %add.0 = add i32 %z.0, %z.1 %z.2 = extractelement <8 x i32> %z, i32 2 %add.1 = add i32 %add.0, %z.2 %z.3 = extractelement <8 x i32> %z, i32 3 %add.2 = add i32 %add.1, %z.3 %z.4 = extractelement <8 x i32> %z, i32 4 %add.3 = add i32 %add.2, %z.4 store i8 0, i8* %y %r = extractelement <4 x i32> %lv, i32 2 %add.4 = add i32 %add.3, %r ret i32 %add.4 } define i32 @load_extract_clobber_store_after_limit(<4 x i32>* %x, i8* %y, <8 x i32> %z) { ; LIMIT-DEFAULT-LABEL: @load_extract_clobber_store_after_limit( ; LIMIT-DEFAULT-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0 ; LIMIT-DEFAULT-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1 ; LIMIT-DEFAULT-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]] ; LIMIT-DEFAULT-NEXT: [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2 ; LIMIT-DEFAULT-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]] ; LIMIT-DEFAULT-NEXT: [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3 ; LIMIT-DEFAULT-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]] ; LIMIT-DEFAULT-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4 ; LIMIT-DEFAULT-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]] ; LIMIT-DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i32 0, i32 2 ; LIMIT-DEFAULT-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 8 ; LIMIT-DEFAULT-NEXT: store i8 0, i8* [[Y:%.*]], align 1 ; LIMIT-DEFAULT-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]] ; LIMIT-DEFAULT-NEXT: ret i32 [[ADD_4]] ; ; LIMIT2-LABEL: @load_extract_clobber_store_after_limit( ; LIMIT2-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; LIMIT2-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z:%.*]], i32 0 ; LIMIT2-NEXT: [[Z_1:%.*]] = extractelement <8 x i32> [[Z]], i32 1 ; LIMIT2-NEXT: [[ADD_0:%.*]] = add i32 [[Z_0]], [[Z_1]] ; LIMIT2-NEXT: [[Z_2:%.*]] = extractelement <8 x i32> [[Z]], i32 2 ; LIMIT2-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[Z_2]] ; LIMIT2-NEXT: [[Z_3:%.*]] = extractelement <8 x i32> [[Z]], i32 3 ; LIMIT2-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[Z_3]] ; LIMIT2-NEXT: [[Z_4:%.*]] = extractelement <8 x i32> [[Z]], i32 4 ; LIMIT2-NEXT: [[ADD_3:%.*]] = add i32 [[ADD_2]], [[Z_4]] ; LIMIT2-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 ; LIMIT2-NEXT: store i8 0, i8* [[Y:%.*]], align 1 ; LIMIT2-NEXT: [[ADD_4:%.*]] = add i32 [[ADD_3]], [[R]] ; LIMIT2-NEXT: ret i32 [[ADD_4]] ; %lv = load <4 x i32>, <4 x i32>* %x %z.0 = extractelement <8 x i32> %z, i32 0 %z.1 = extractelement <8 x i32> %z, i32 1 %add.0 = add i32 %z.0, %z.1 %z.2 = extractelement <8 x i32> %z, i32 2 %add.1 = add i32 %add.0, %z.2 %z.3 = extractelement <8 x i32> %z, i32 3 %add.2 = add i32 %add.1, %z.3 %z.4 = extractelement <8 x i32> %z, i32 4 %add.3 = add i32 %add.2, %z.4 %r = extractelement <4 x i32> %lv, i32 2 store i8 0, i8* %y %add.4 = add i32 %add.3, %r ret i32 %add.4 } declare void @use.v4i32(<4 x i32>) define i32 @load_extract_idx_different_bbs(<4 x i32>* %x, i1 %c) { ; CHECK-LABEL: @load_extract_idx_different_bbs( ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 1 ; CHECK-NEXT: ret i32 [[R]] ; CHECK: else: ; CHECK-NEXT: call void @use.v4i32(<4 x i32> [[LV]]) ; CHECK-NEXT: ret i32 20 ; %lv = load <4 x i32>, <4 x i32>* %x br i1 %c, label %then, label %else then: %r = extractelement <4 x i32> %lv, i32 1 ret i32 %r else: call void @use.v4i32(<4 x i32> %lv) ret i32 20 } define i31 @load_with_non_power_of_2_element_type(<4 x i31>* %x) { ; CHECK-LABEL: @load_with_non_power_of_2_element_type( ; CHECK-NEXT: [[LV:%.*]] = load <4 x i31>, <4 x i31>* [[X:%.*]], align 16 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i31> [[LV]], i32 1 ; CHECK-NEXT: ret i31 [[R]] ; %lv = load <4 x i31>, <4 x i31>* %x %r = extractelement <4 x i31> %lv, i32 1 ret i31 %r } ; Scalarizing the load for multiple constant indices may not be profitable. define i32 @load_multiple_extracts_with_constant_idx(<4 x i32>* %x) { ; CHECK-LABEL: @load_multiple_extracts_with_constant_idx( ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[LV]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[LV]], [[SHIFT]] ; CHECK-NEXT: [[RES:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 ; CHECK-NEXT: ret i32 [[RES]] ; %lv = load <4 x i32>, <4 x i32>* %x %e.0 = extractelement <4 x i32> %lv, i32 0 %e.1 = extractelement <4 x i32> %lv, i32 1 %res = add i32 %e.0, %e.1 ret i32 %res } ; Scalarizing the load for multiple extracts is profitable in this case, ; because the vector large vector requires 2 vector registers. define i32 @load_multiple_extracts_with_constant_idx_profitable(<8 x i32>* %x) { ; CHECK-LABEL: @load_multiple_extracts_with_constant_idx_profitable( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i32>, <8 x i32>* [[X:%.*]], i32 0, i32 0 ; CHECK-NEXT: [[E_0:%.*]] = load i32, i32* [[TMP1]], align 16 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x i32>, <8 x i32>* [[X]], i32 0, i32 6 ; CHECK-NEXT: [[E_1:%.*]] = load i32, i32* [[TMP2]], align 8 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] ; CHECK-NEXT: ret i32 [[RES]] ; %lv = load <8 x i32>, <8 x i32>* %x, align 16 %e.0 = extractelement <8 x i32> %lv, i32 0 %e.1 = extractelement <8 x i32> %lv, i32 6 %res = add i32 %e.0, %e.1 ret i32 %res } ; Scalarizing may or may not be profitable, depending on the target. define i32 @load_multiple_2_with_variable_indices(<4 x i32>* %x, i64 %idx.0, i64 %idx.1) { ; CHECK-LABEL: @load_multiple_2_with_variable_indices( ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: [[E_0:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_0:%.*]] ; CHECK-NEXT: [[E_1:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_1:%.*]] ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] ; CHECK-NEXT: ret i32 [[RES]] ; %lv = load <4 x i32>, <4 x i32>* %x %e.0 = extractelement <4 x i32> %lv, i64 %idx.0 %e.1 = extractelement <4 x i32> %lv, i64 %idx.1 %res = add i32 %e.0, %e.1 ret i32 %res } define i32 @load_4_extracts_with_variable_indices_short_vector(<4 x i32>* %x, i64 %idx.0, i64 %idx.1, i64 %idx.2, i64 %idx.3) { ; CHECK-LABEL: @load_4_extracts_with_variable_indices_short_vector( ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, <4 x i32>* [[X:%.*]], align 16 ; CHECK-NEXT: [[E_0:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_0:%.*]] ; CHECK-NEXT: [[E_1:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_1:%.*]] ; CHECK-NEXT: [[E_2:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_2:%.*]] ; CHECK-NEXT: [[E_3:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_3:%.*]] ; CHECK-NEXT: [[RES_0:%.*]] = add i32 [[E_0]], [[E_1]] ; CHECK-NEXT: [[RES_1:%.*]] = add i32 [[RES_0]], [[E_2]] ; CHECK-NEXT: [[RES_2:%.*]] = add i32 [[RES_1]], [[E_3]] ; CHECK-NEXT: ret i32 [[RES_2]] ; %lv = load <4 x i32>, <4 x i32>* %x %e.0 = extractelement <4 x i32> %lv, i64 %idx.0 %e.1 = extractelement <4 x i32> %lv, i64 %idx.1 %e.2 = extractelement <4 x i32> %lv, i64 %idx.2 %e.3 = extractelement <4 x i32> %lv, i64 %idx.3 %res.0 = add i32 %e.0, %e.1 %res.1 = add i32 %res.0, %e.2 %res.2 = add i32 %res.1, %e.3 ret i32 %res.2 } define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid(<16 x i32>* %x, i64 %idx.0, i64 %idx.1) { ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid( ; CHECK-NEXT: [[CMP_IDX_0:%.*]] = icmp ult i64 [[IDX_0:%.*]], 16 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_IDX_0]]) ; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, <16 x i32>* [[X:%.*]], align 64 ; CHECK-NEXT: [[E_0:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_0]] ; CHECK-NEXT: [[E_1:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_1:%.*]] ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] ; CHECK-NEXT: ret i32 [[RES]] ; %cmp.idx.0 = icmp ult i64 %idx.0, 16 call void @llvm.assume(i1 %cmp.idx.0) %lv = load <16 x i32>, <16 x i32>* %x %e.0 = extractelement <16 x i32> %lv, i64 %idx.0 %e.1 = extractelement <16 x i32> %lv, i64 %idx.1 %res = add i32 %e.0, %e.1 ret i32 %res } define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_all_valid(<16 x i32>* %x, i64 %idx.0, i64 %idx.1) { ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_only_all_valid( ; CHECK-NEXT: [[CMP_IDX_0:%.*]] = icmp ult i64 [[IDX_0:%.*]], 16 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_IDX_0]]) ; CHECK-NEXT: [[CMP_IDX_1:%.*]] = icmp ult i64 [[IDX_1:%.*]], 16 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_IDX_1]]) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X:%.*]], i32 0, i64 [[IDX_0]] ; CHECK-NEXT: [[E_0:%.*]] = load i32, i32* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[X]], i32 0, i64 [[IDX_1]] ; CHECK-NEXT: [[E_1:%.*]] = load i32, i32* [[TMP2]], align 4 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] ; CHECK-NEXT: ret i32 [[RES]] ; %cmp.idx.0 = icmp ult i64 %idx.0, 16 call void @llvm.assume(i1 %cmp.idx.0) %cmp.idx.1 = icmp ult i64 %idx.1, 16 call void @llvm.assume(i1 %cmp.idx.1) %lv = load <16 x i32>, <16 x i32>* %x %e.0 = extractelement <16 x i32> %lv, i64 %idx.0 %e.1 = extractelement <16 x i32> %lv, i64 %idx.1 %res = add i32 %e.0, %e.1 ret i32 %res } define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid_by_and(<16 x i32>* %x, i64 %idx.0, i64 %idx.1) { ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid_by_and( ; CHECK-NEXT: [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0:%.*]], 15 ; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, <16 x i32>* [[X:%.*]], align 64 ; CHECK-NEXT: [[E_0:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_0_CLAMPED]] ; CHECK-NEXT: [[E_1:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_1:%.*]] ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] ; CHECK-NEXT: ret i32 [[RES]] ; %idx.0.clamped = and i64 %idx.0, 15 %lv = load <16 x i32>, <16 x i32>* %x %e.0 = extractelement <16 x i32> %lv, i64 %idx.0.clamped %e.1 = extractelement <16 x i32> %lv, i64 %idx.1 %res = add i32 %e.0, %e.1 ret i32 %res } define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and(<16 x i32>* %x, i64 %idx.0, i64 %idx.1) { ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and( ; CHECK-NEXT: [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0:%.*]], 15 ; CHECK-NEXT: [[IDX_1_CLAMPED:%.*]] = and i64 [[IDX_1:%.*]], 15 ; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, <16 x i32>* [[X:%.*]], align 64 ; CHECK-NEXT: [[E_0:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_0_CLAMPED]] ; CHECK-NEXT: [[E_1:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_1_CLAMPED]] ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] ; CHECK-NEXT: ret i32 [[RES]] ; %idx.0.clamped = and i64 %idx.0, 15 %idx.1.clamped = and i64 %idx.1, 15 %lv = load <16 x i32>, <16 x i32>* %x %e.0 = extractelement <16 x i32> %lv, i64 %idx.0.clamped %e.1 = extractelement <16 x i32> %lv, i64 %idx.1.clamped %res = add i32 %e.0, %e.1 ret i32 %res } define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and_some_noundef(<16 x i32>* %x, i64 %idx.0, i64 noundef %idx.1) { ; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and_some_noundef( ; CHECK-NEXT: [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0:%.*]], 15 ; CHECK-NEXT: [[IDX_1_CLAMPED:%.*]] = and i64 [[IDX_1:%.*]], 15 ; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, <16 x i32>* [[X:%.*]], align 64 ; CHECK-NEXT: [[E_0:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_0_CLAMPED]] ; CHECK-NEXT: [[E_1:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_1_CLAMPED]] ; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] ; CHECK-NEXT: ret i32 [[RES]] ; %idx.0.clamped = and i64 %idx.0, 15 %idx.1.clamped = and i64 %idx.1, 15 %lv = load <16 x i32>, <16 x i32>* %x %e.0 = extractelement <16 x i32> %lv, i64 %idx.0.clamped %e.1 = extractelement <16 x i32> %lv, i64 %idx.1.clamped %res = add i32 %e.0, %e.1 ret i32 %res } ; Test case from PR51992. define i8 @load_extract_safe_due_to_branch_on_poison(<8 x i8> %in, <16 x i8>* %src) { ; CHECK-LABEL: @load_extract_safe_due_to_branch_on_poison( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXT_IDX:%.*]] = extractelement <8 x i8> [[IN:%.*]], i32 0 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[EXT_IDX]], 99 ; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[EXIT:%.*]] ; CHECK: then: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret i8 0 ; entry: %ext.idx = extractelement <8 x i8> %in, i32 0 %ext.idx.i32 = zext i8 %ext.idx to i32 %cmp = icmp ult i8 %ext.idx, 99 br i1 %cmp, label %then, label %exit then: %load = load <16 x i8>, <16 x i8>* %src, align 16 %and = and i32 %ext.idx.i32, 15 %ext = extractelement <16 x i8> %load, i32 %and br label %exit exit: %p = phi i8 [ 0, %entry ], [ %ext, %then ] ret i8 0 }