; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -early-cse < %s | FileCheck %s ; Unequal mask check. ; Load-load: the second load can be removed if (assuming unequal masks) the ; second loaded value is a subset of the first loaded value considering the ; non-undef vector elements. In other words, if the second mask is a submask ; of the first one, and the through value of the second load is undef. ; Load-load, second mask is a submask of the first, second through is undef. ; Expect the second load to be removed. define <4 x i32> @f3(ptr %a0, <4 x i32> %a1) { ; CHECK-LABEL: @f3( ; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]]) ; CHECK-NEXT: [[V2:%.*]] = add <4 x i32> [[V0]], [[V0]] ; CHECK-NEXT: ret <4 x i32> [[V2]] ; %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1) %v1 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> undef) %v2 = add <4 x i32> %v0, %v1 ret <4 x i32> %v2 } ; Load-load, second mask is a submask of the first, second through is not undef. ; Expect the second load to remain. define <4 x i32> @f4(ptr %a0, <4 x i32> %a1) { ; CHECK-LABEL: @f4( ; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]]) ; CHECK-NEXT: [[V1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) ; CHECK-NEXT: [[V2:%.*]] = add <4 x i32> [[V0]], [[V1]] ; CHECK-NEXT: ret <4 x i32> [[V2]] ; %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1) %v1 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) %v2 = add <4 x i32> %v0, %v1 ret <4 x i32> %v2 } ; Load-load, second mask is not a submask of the first, second through is undef. ; Expect the second load to remain. define <4 x i32> @f5(ptr %a0, <4 x i32> %a1) { ; CHECK-LABEL: @f5( ; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]]) ; CHECK-NEXT: [[V1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) ; CHECK-NEXT: [[V2:%.*]] = add <4 x i32> [[V0]], [[V1]] ; CHECK-NEXT: ret <4 x i32> [[V2]] ; %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1) %v1 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) %v2 = add <4 x i32> %v0, %v1 ret <4 x i32> %v2 } ; Store-store: the first store can be removed if the first; mask is a submask ; of the second mask. ; Store-store, first mask is a submask of the second. ; Expect the first store to be removed. define void @f6(<4 x i32> %a0, ptr %a1) { ; CHECK-LABEL: @f6( ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) ; CHECK-NEXT: ret void ; call void @llvm.masked.store.v4i32.p0(<4 x i32> %a0, ptr %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) call void @llvm.masked.store.v4i32.p0(<4 x i32> %a0, ptr %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) ret void } ; Store-store, first mask is not a submask of the second. ; Expect both stores to remain. define void @f7(<4 x i32> %a0, ptr %a1) { ; CHECK-LABEL: @f7( ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0]], ptr [[A1]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) ; CHECK-NEXT: ret void ; call void @llvm.masked.store.v4i32.p0(<4 x i32> %a0, ptr %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) call void @llvm.masked.store.v4i32.p0(<4 x i32> %a0, ptr %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) ret void } ; Load-store: the store can be removed if the store's mask is a submask of the ; load's mask. ; Load-store, second mask is a submask of the first. ; Expect the store to be removed. define <4 x i32> @f8(ptr %a0, <4 x i32> %a1) { ; CHECK-LABEL: @f8( ; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> [[A1:%.*]]) ; CHECK-NEXT: ret <4 x i32> [[V0]] ; %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %a1) call void @llvm.masked.store.v4i32.p0(<4 x i32> %v0, ptr %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) ret <4 x i32> %v0 } ; Load-store, second mask is not a submask of the first. ; Expect the store to remain. define <4 x i32> @f9(ptr %a0, <4 x i32> %a1) { ; CHECK-LABEL: @f9( ; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A0:%.*]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> [[A1:%.*]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V0]], ptr [[A0]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) ; CHECK-NEXT: ret <4 x i32> [[V0]] ; %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a0, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a1) call void @llvm.masked.store.v4i32.p0(<4 x i32> %v0, ptr %a0, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) ret <4 x i32> %v0 } ; Store-load: the load can be removed if load's mask is a submask of the ; store's mask, and the load's through value is undef. ; Store-load, load's mask is a submask of store's mask, thru is undef. ; Expect the load to be removed. define <4 x i32> @fa(<4 x i32> %a0, ptr %a1) { ; CHECK-LABEL: @fa( ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) ; CHECK-NEXT: ret <4 x i32> [[A0]] ; call void @llvm.masked.store.v4i32.p0(<4 x i32> %a0, ptr %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> undef) ret <4 x i32> %v0 } ; Store-load, load's mask is a submask of store's mask, thru is not undef. ; Expect the load to remain. define <4 x i32> @fb(<4 x i32> %a0, ptr %a1) { ; CHECK-LABEL: @fb( ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) ; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A1]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) ; CHECK-NEXT: ret <4 x i32> [[V0]] ; call void @llvm.masked.store.v4i32.p0(<4 x i32> %a0, ptr %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>) %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer) ret <4 x i32> %v0 } ; Store-load, load's mask is not a submask of store's mask, thru is undef. ; Expect the load to remain. define <4 x i32> @fc(<4 x i32> %a0, ptr %a1) { ; CHECK-LABEL: @fc( ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) ; CHECK-NEXT: [[V0:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[A1]], i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> undef) ; CHECK-NEXT: ret <4 x i32> [[V0]] ; call void @llvm.masked.store.v4i32.p0(<4 x i32> %a0, ptr %a1, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>) %v0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %a1, i32 4, <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> undef) ret <4 x i32> %v0 } declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>) declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>)