[BasicAA] Handle two unknown sizes for GEPs

If we have two unknown sizes and one GEP operand and one non-GEP
operand, then we currently simply return MayAlias. The comment says
we can't do anything useful ... but we can! We can still check that
the underlying objects are different (and do so for the GEP-GEP case).

To reduce the compile-time impact, this a) checks this early, before
doing the relatively expensive GEP decomposition that will not be
used and b) doesn't do the check if the other operand is a phi or
select. In that case, the phi/select will already recurse, so this
would just do two slightly different recursive walks that arrive at
the same roots.

Compile-time is still a bit of a mixed bag: https://llvm-compile-time-tracker.com/compare.php?from=624af932a808b363a888139beca49f57313d9a3b&to=845356e14adbe651a553ed11318ddb5e79a24bcd&stat=instructions
On average this is a small improvement, but sqlite with ThinLTO has
a 0.5% regression (lencod has a 1% improvement).

The BasicAA test case checks this by using two memsets with unknown
size. However, the more interesting case where this is useful is
the LoopVectorize test case, as analysis of accesses in loops tends
to always us unknown sizes.

Differential Revision: https://reviews.llvm.org/D92401
This commit is contained in:
Nikita Popov 2020-12-01 18:06:37 +01:00
parent a5f5612263
commit 8b1c4e310c
5 changed files with 35 additions and 56 deletions

View File

@ -1098,6 +1098,22 @@ AliasResult BasicAAResult::aliasGEP(
const GEPOperator *GEP1, LocationSize V1Size, const AAMDNodes &V1AAInfo,
const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo,
const Value *UnderlyingV1, const Value *UnderlyingV2, AAQueryInfo &AAQI) {
// If both accesses are unknown size, we can only check whether the
// underlying objects are different.
if (!V1Size.hasValue() && !V2Size.hasValue()) {
// If the other operand is a phi/select, let phi/select handling perform
// this check. Otherwise the same recursive walk is done twice.
if (!isa<PHINode>(V2) && !isa<SelectInst>(V2)) {
AliasResult BaseAlias =
aliasCheck(UnderlyingV1, LocationSize::beforeOrAfterPointer(),
AAMDNodes(), UnderlyingV2,
LocationSize::beforeOrAfterPointer(), AAMDNodes(), AAQI);
if (BaseAlias == NoAlias)
return NoAlias;
}
return MayAlias;
}
DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT);
DecomposedGEP DecompGEP2 = DecomposeGEPExpression(V2, DL, &AC, DT);
@ -1156,10 +1172,6 @@ AliasResult BasicAAResult::aliasGEP(
// instruction. If one pointer is a GEP with a non-zero index of the other
// pointer, we know they cannot alias.
// If both accesses are unknown size, we can't do anything useful here.
if (!V1Size.hasValue() && !V2Size.hasValue())
return MayAlias;
AliasResult R = aliasCheck(
UnderlyingV1, LocationSize::beforeOrAfterPointer(), AAMDNodes(),
V2, V2Size, V2AAInfo, AAQI, nullptr, UnderlyingV2);

View File

@ -152,8 +152,7 @@ loop:
}
; CHECK-LABEL: phi_and_gep_unknown_size
; CHECK: Just Mod: call void @llvm.memset.p0i8.i32(i8* %g, i8 0, i32 %size, i1 false) <-> call void @llvm.memset.p0i8.i32(i8* %z, i8 0, i32 %size, i1 false)
; TODO: This should be NoModRef.
; CHECK: NoModRef: call void @llvm.memset.p0i8.i32(i8* %g, i8 0, i32 %size, i1 false) <-> call void @llvm.memset.p0i8.i32(i8* %z, i8 0, i32 %size, i1 false)
define void @phi_and_gep_unknown_size(i1 %c, i8* %x, i8* %y, i8* noalias %z, i32 %size) {
entry:
br i1 %c, label %true, label %false

View File

@ -267,9 +267,8 @@ exit:
; CHECK: NoAlias: i8* %a, i8* %p.base
; CHECK: NoAlias: i8* %a, i8* %p.outer
; CHECK: NoAlias: i8* %a, i8* %p.outer.next
; CHECK: MayAlias: i8* %a, i8* %p.inner
; CHECK: NoAlias: i8* %a, i8* %p.inner
; CHECK: NoAlias: i8* %a, i8* %p.inner.next
; TODO: (a, p.inner) could be NoAlias
define void @nested_loop2(i1 %c, i1 %c2, i8* noalias %p.base) {
entry:
%a = alloca i8

View File

@ -876,62 +876,31 @@ for.body:
define hidden void @mult_ptr_iv(i8* noalias nocapture readonly %x, i8* noalias nocapture %z) {
; CHECK-LABEL: @mult_ptr_iv(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[Z:%.*]], i32 3000
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, i8* [[X:%.*]], i32 3000
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i8* [[SCEVGEP1]], [[Z]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i8* [[SCEVGEP]], [[X]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[X]], i32 3000
; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, i8* [[Z]], i32 3000
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[X]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI5:%.*]] = phi i8* [ [[Z]], [[VECTOR_PH]] ], [ [[PTR_IND6:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i8* [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI4:%.*]] = phi i8* [ [[Z:%.*]], [[ENTRY]] ], [ [[PTR_IND5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[POINTER_PHI5]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[POINTER_PHI4]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP0]], i32 1
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP0]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef), !alias.scope !26
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP0]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP0]], i32 2
; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP2]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef), !alias.scope !26
; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP3]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef), !alias.scope !26
; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP2]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP3]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], <i8 10, i8 10, i8 10, i8 10>
; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER7]]
; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER8]]
; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER6]]
; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER7]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i32 1
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP4]], <4 x i8*> [[TMP1]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !29, !noalias !26
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP4]], <4 x i8*> [[TMP1]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i32 2
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP5]], <4 x i8*> [[TMP7]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !29, !noalias !26
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP6]], <4 x i8*> [[TMP8]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !29, !noalias !26
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP5]], <4 x i8*> [[TMP7]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP6]], <4 x i8*> [[TMP8]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i32 12
; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, i8* [[POINTER_PHI5]], i32 12
; CHECK-NEXT: br i1 [[TMP9]], label [[END:%.*]], label [[VECTOR_BODY]], [[LOOP31:!llvm.loop !.*]]
; CHECK: for.body:
; CHECK-NEXT: [[X_ADDR_050:%.*]] = phi i8* [ [[INCDEC_PTR2:%.*]], [[FOR_BODY]] ], [ [[X]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[Z_ADDR_049:%.*]] = phi i8* [ [[INCDEC_PTR34:%.*]], [[FOR_BODY]] ], [ [[Z]], [[ENTRY]] ]
; CHECK-NEXT: [[I_048:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = load i8, i8* [[X_ADDR_050]], align 1
; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 2
; CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1
; CHECK-NEXT: [[INCDEC_PTR2]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 3
; CHECK-NEXT: [[TMP12:%.*]] = load i8, i8* [[INCDEC_PTR1]], align 1
; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[TMP10]], 10
; CHECK-NEXT: [[MUL1:%.*]] = mul i8 [[TMP10]], [[TMP11]]
; CHECK-NEXT: [[MUL2:%.*]] = mul i8 [[TMP10]], [[TMP12]]
; CHECK-NEXT: [[INCDEC_PTR32:%.*]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 1
; CHECK-NEXT: store i8 [[MUL]], i8* [[Z_ADDR_049]], align 1
; CHECK-NEXT: [[INCDEC_PTR33:%.*]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 2
; CHECK-NEXT: store i8 [[MUL1]], i8* [[INCDEC_PTR32]], align 1
; CHECK-NEXT: [[INCDEC_PTR34]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 3
; CHECK-NEXT: store i8 [[MUL2]], i8* [[INCDEC_PTR33]], align 1
; CHECK-NEXT: [[INC]] = add nuw i32 [[I_048]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
; CHECK-NEXT: br i1 [[EXITCOND]], label [[END]], label [[FOR_BODY]], [[LOOP32:!llvm.loop !.*]]
; CHECK-NEXT: [[PTR_IND5]] = getelementptr i8, i8* [[POINTER_PHI4]], i32 12
; CHECK-NEXT: br i1 [[TMP9]], label [[END:%.*]], label [[VECTOR_BODY]], [[LOOP26:!llvm.loop !.*]]
; CHECK: end:
; CHECK-NEXT: ret void
;

View File

@ -12,7 +12,7 @@
@c1 = external global i16
define void @f(i16 %a) {
define void @f(i16 %a, [1 x i32]* %p) {
br label %bb0
bb0:
@ -24,9 +24,9 @@ bb1:
bb2:
%tmp2 = phi i16 [ %tmp1, %bb1 ], [ %tmp3, %bb2 ]
%tmp4 = getelementptr inbounds [1 x i32], [1 x i32]* undef, i32 0, i32 4
%tmp4 = getelementptr inbounds [1 x i32], [1 x i32]* %p, i32 0, i32 4
store i32 1, i32* %tmp4
%tmp5 = getelementptr inbounds [1 x i32], [1 x i32]* undef, i32 0, i32 9
%tmp5 = getelementptr inbounds [1 x i32], [1 x i32]* %p, i32 0, i32 9
store i32 0, i32* %tmp5
%tmp3 = add i16 %tmp2, 1
store i16 %tmp2, i16* @c1