[OpaquePtr][LoopAccessAnalysis] Support opaque pointers

Previously we relied on the pointee type to determine what type we need
to do runtime pointer access checks.

With opaque pointers, we can access a pointer with more than one type,
so now we keep track of all the types we're accessing a pointer's
memory with.

Also some other minor getPointerElementType() removals.

Reviewed By: #opaque-pointers, nikic

Differential Revision: https://reviews.llvm.org/D119047
This commit is contained in:
Arthur Eubanks 2022-02-07 18:16:08 -08:00
parent 022baf71ed
commit ff31020ee6
3 changed files with 244 additions and 54 deletions

View File

@ -406,8 +406,8 @@ public:
/// according to the assumptions that we've made during the analysis.
/// The method might also version the pointer stride according to \p Strides,
/// and add new predicates to \p PSE.
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
unsigned ASId, const ValueToValueMap &Strides,
void insert(Loop *Lp, Value *Ptr, Type *AccessTy, bool WritePtr,
unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides,
PredicatedScalarEvolution &PSE);
/// No run-time memory checking is necessary.

View File

@ -189,8 +189,9 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
///
/// There is no conflict when the intervals are disjoint:
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
unsigned DepSetId, unsigned ASId,
void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, Type *AccessTy,
bool WritePtr, unsigned DepSetId,
unsigned ASId,
const ValueToValueMap &Strides,
PredicatedScalarEvolution &PSE) {
// Get the stride replaced scev.
@ -227,8 +228,7 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
// Add the size of the pointed element to ScEnd.
auto &DL = Lp->getHeader()->getModule()->getDataLayout();
Type *IdxTy = DL.getIndexType(Ptr->getType());
const SCEV *EltSizeSCEV =
SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType());
const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
@ -522,19 +522,19 @@ public:
: TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {}
/// Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
Accesses.insert(MemAccessInfo(Ptr, false));
Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
if (IsReadOnly)
ReadOnlyPtr.insert(Ptr);
}
/// Register a store.
void addStore(MemoryLocation &Loc) {
void addStore(MemoryLocation &Loc, Type *AccessTy) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
Accesses.insert(MemAccessInfo(Ptr, true));
Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
}
/// Check if we can emit a run-time no-alias check for \p Access.
@ -545,12 +545,11 @@ public:
/// we will attempt to use additional run-time checks in order to get
/// the bounds of the pointer.
bool createCheckForAccess(RuntimePointerChecking &RtCheck,
MemAccessInfo Access,
MemAccessInfo Access, Type *AccessTy,
const ValueToValueMap &Strides,
DenseMap<Value *, unsigned> &DepSetId,
Loop *TheLoop, unsigned &RunningDepId,
unsigned ASId, bool ShouldCheckStride,
bool Assume);
unsigned ASId, bool ShouldCheckStride, bool Assume);
/// Check whether we can check the pointers at runtime for
/// non-intersection.
@ -583,14 +582,15 @@ public:
MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
private:
typedef SetVector<MemAccessInfo> PtrAccessSet;
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
/// Go over all memory access and check whether runtime pointer checks
/// are needed and build sets of dependency check candidates.
void processMemAccesses();
/// Set of all accesses.
PtrAccessSet Accesses;
/// Map of all accesses. Values are the types used to access memory pointed to
/// by the pointer.
PtrAccessMap Accesses;
/// The loop being checked.
const Loop *TheLoop;
@ -652,12 +652,12 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
/// Check whether a pointer address cannot wrap.
static bool isNoWrap(PredicatedScalarEvolution &PSE,
const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy,
Loop *L) {
const SCEV *PtrScev = PSE.getSCEV(Ptr);
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;
Type *AccessTy = Ptr->getType()->getPointerElementType();
int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
return true;
@ -689,7 +689,7 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
}
bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
MemAccessInfo Access,
MemAccessInfo Access, Type *AccessTy,
const ValueToValueMap &StridesMap,
DenseMap<Value *, unsigned> &DepSetId,
Loop *TheLoop, unsigned &RunningDepId,
@ -702,7 +702,7 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
// When we run after a failing dependency check we have to make sure
// we don't have wrapping pointers.
if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
auto *Expr = PSE.getSCEV(Ptr);
if (!Assume || !isa<SCEVAddRecExpr>(Expr))
return false;
@ -723,11 +723,11 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
DepId = RunningDepId++;
bool IsWrite = Access.getInt();
RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
RtCheck.insert(TheLoop, Ptr, AccessTy, IsWrite, DepId, ASId, StridesMap, PSE);
LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
return true;
}
}
bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
ScalarEvolution *SE, Loop *TheLoop,
@ -788,12 +788,15 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
}
for (auto &Access : AccessInfos) {
if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
RunningDepId, ASId, ShouldCheckWrap, false)) {
LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
<< *Access.getPointer() << '\n');
Retries.push_back(Access);
CanDoAliasSetRT = false;
for (auto &AccessTy : Accesses[Access]) {
if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
DepSetId, TheLoop, RunningDepId, ASId,
ShouldCheckWrap, false)) {
LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
<< *Access.getPointer() << '\n');
Retries.push_back(Access);
CanDoAliasSetRT = false;
}
}
}
@ -815,13 +818,16 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
// We know that we need these checks, so we can now be more aggressive
// and add further checks if required (overflow checks).
CanDoAliasSetRT = true;
for (auto Access : Retries)
if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
TheLoop, RunningDepId, ASId,
ShouldCheckWrap, /*Assume=*/true)) {
CanDoAliasSetRT = false;
break;
for (auto Access : Retries) {
for (auto &AccessTy : Accesses[Access]) {
if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
DepSetId, TheLoop, RunningDepId, ASId,
ShouldCheckWrap, /*Assume=*/true)) {
CanDoAliasSetRT = false;
break;
}
}
}
}
CanDoRT &= CanDoAliasSetRT;
@ -886,9 +892,12 @@ void AccessAnalysis::processMemAccesses() {
LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
LLVM_DEBUG({
for (auto A : Accesses)
dbgs() << "\t" << *A.getPointer() << " (" <<
(A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
"read-only" : "read")) << ")\n";
dbgs() << "\t" << *A.first.getPointer() << " ("
<< (A.first.getInt()
? "write"
: (ReadOnlyPtr.count(A.first.getPointer()) ? "read-only"
: "read"))
<< ")\n";
});
// The AliasSetTracker has nicely partitioned our pointers by metadata
@ -907,13 +916,13 @@ void AccessAnalysis::processMemAccesses() {
UnderlyingObjToAccessMap ObjToLastAccess;
// Set of access to check after all writes have been processed.
PtrAccessSet DeferredAccesses;
PtrAccessMap DeferredAccesses;
// Iterate over each alias set twice, once to process read/write pointers,
// and then to process read-only pointers.
for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
bool UseDeferred = SetIteration > 0;
PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
for (const auto &AV : AS) {
Value *Ptr = AV.getValue();
@ -921,10 +930,10 @@ void AccessAnalysis::processMemAccesses() {
// For a single memory access in AliasSetTracker, Accesses may contain
// both read and write, and they both need to be handled for CheckDeps.
for (const auto &AC : S) {
if (AC.getPointer() != Ptr)
if (AC.first.getPointer() != Ptr)
continue;
bool IsWrite = AC.getInt();
bool IsWrite = AC.first.getInt();
// If we're using the deferred access set, then it contains only
// reads.
@ -946,7 +955,9 @@ void AccessAnalysis::processMemAccesses() {
// consecutive as "read-only" pointers (so that we check
// "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
if (!UseDeferred && IsReadOnlyPtr) {
DeferredAccesses.insert(Access);
// We only use the pointer keys, the types vector values don't
// matter.
DeferredAccesses.insert({Access, {}});
continue;
}
@ -1518,8 +1529,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
Value *BPtr = B.getPointer();
bool AIsWrite = A.getInt();
bool BIsWrite = B.getInt();
Type *ATy = APtr->getType()->getPointerElementType();
Type *BTy = BPtr->getType()->getPointerElementType();
Type *ATy = getLoadStoreType(InstMap[AIdx]);
Type *BTy = getLoadStoreType(InstMap[BIdx]);
// Two reads are independent.
if (!AIsWrite && !BIsWrite)
@ -1842,8 +1853,6 @@ bool LoopAccessInfo::canAnalyzeLoop() {
void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
const TargetLibraryInfo *TLI,
DominatorTree *DT) {
typedef SmallPtrSet<Value*, 16> ValueSet;
// Holds the Load and Store instructions.
SmallVector<LoadInst *, 16> Loads;
SmallVector<StoreInst *, 16> Stores;
@ -1975,11 +1984,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// for read and once for write, it will only appear once (on the write
// list). This is okay, since we are going to check for conflicts between
// writes and between reads and writes, but not between reads and reads.
ValueSet Seen;
SmallSet<std::pair<Value *, Type *>, 16> Seen;
// Record uniform store addresses to identify if we have multiple stores
// to the same address.
ValueSet UniformStores;
SmallPtrSet<Value *, 16> UniformStores;
for (StoreInst *ST : Stores) {
Value *Ptr = ST->getPointerOperand();
@ -1990,7 +1999,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// If we did *not* see this pointer before, insert it to the read-write
// list. At this phase it is only a 'write' list.
if (Seen.insert(Ptr).second) {
Type *AccessTy = getLoadStoreType(ST);
if (Seen.insert({Ptr, AccessTy}).second) {
++NumReadWrites;
MemoryLocation Loc = MemoryLocation::get(ST);
@ -2001,9 +2011,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
Loc.AATags.TBAA = nullptr;
visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
[&Accesses, Loc](Value *Ptr) {
[&Accesses, AccessTy, Loc](Value *Ptr) {
MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
Accesses.addStore(NewLoc);
Accesses.addStore(NewLoc, AccessTy);
});
}
}
@ -2027,7 +2037,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// read a few words, modify, and write a few words, and some of the
// words may be written to the same address.
bool IsReadOnlyPtr = false;
if (Seen.insert(Ptr).second ||
Type *AccessTy = getLoadStoreType(LD);
if (Seen.insert({Ptr, AccessTy}).second ||
!getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
++NumReads;
IsReadOnlyPtr = true;
@ -2049,9 +2060,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
Loc.AATags.TBAA = nullptr;
visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
[&Accesses, Loc, IsReadOnlyPtr](Value *Ptr) {
[&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
Accesses.addLoad(NewLoc, IsReadOnlyPtr);
Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
});
}

View File

@ -0,0 +1,179 @@
; RUN: opt -S -disable-output --opaque-pointers -passes='print-access-info' < %s 2>&1 | FileCheck %s
; In the function below some of the accesses are done as float types and some
; are done as i32 types. When doing dependence analysis the type should not
; matter if it can be determined that they are the same size.
%int_pair = type { i32, i32 }
; CHECK-LABEL: function 'backdep_type_size_equivalence':
; CHECK-NEXT: loop:
; CHECK-NEXT: Memory dependences are safe with a maximum dependence distance of 800 bytes
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Forward:
; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 ->
; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8
; CHECK-EMPTY:
; CHECK-NEXT: Forward:
; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 ->
; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8
; CHECK-EMPTY:
; CHECK-NEXT: BackwardVectorizable:
; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 ->
; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
define void @backdep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
entry:
br label %loop
loop:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
;; Load from vec[indvars.iv].x as float
%gep.iv = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv, i32 0
%ld.f32 = load float, ptr %gep.iv, align 8
%val = fmul fast float %ld.f32, 5.0
;; Store to vec[indvars.iv - 100].x as float
%indvars.iv.min.100 = add nsw i64 %indvars.iv, -100
%gep.iv.min.100 = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv.min.100, i32 0
store float %val, ptr %gep.iv.min.100, align 8
;; Store to vec[indvars.iv].x as i32, creating a backward dependency between
;; the two stores with different element types but the same element size.
%indvars.iv.i32 = trunc i64 %indvars.iv to i32
store i32 %indvars.iv.i32, ptr %gep.iv, align 8
;; Store to vec[indvars.iv].y as i32, strided accesses should be independent
;; between the two stores with different element types but the same element size.
%gep.iv.1 = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv, i32 1
store i32 %indvars.iv.i32, ptr %gep.iv.1, align 8
;; Store to vec[indvars.iv + n].y as i32, to verify no dependence in the case
;; of unknown dependence distance.
%indvars.iv.n = add nuw nsw i64 %indvars.iv, %n
%gep.iv.n = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv.n, i32 1
store i32 %indvars.iv.i32, ptr %gep.iv.n, align 8
;; Loop condition.
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%cond = icmp eq i64 %indvars.iv.next, %n
br i1 %cond, label %exit, label %loop
exit:
ret void
}
; In the function below one of the accesses is done as i19 type, which has a
; different store size than the i32 type, even though their alloc sizes are
; equivalent. This is a negative test to ensure that they are not analyzed as
; in the tests above.
;
; CHECK-LABEL: function 'backdep_type_store_size_equivalence':
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop.
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 ->
; CHECK-NEXT: store i19 %indvars.iv.i19, ptr %gep.iv, align 8
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
define void @backdep_type_store_size_equivalence(ptr nocapture %vec, i64 %n) {
entry:
br label %loop
loop:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
;; Load from vec[indvars.iv].x as float
%gep.iv = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv, i32 0
%ld.f32 = load float, ptr %gep.iv, align 8
%val = fmul fast float %ld.f32, 5.0
;; Store to vec[indvars.iv].x as i19.
%indvars.iv.i19 = trunc i64 %indvars.iv to i19
store i19 %indvars.iv.i19, ptr %gep.iv, align 8
;; Loop condition.
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%cond = icmp eq i64 %indvars.iv.next, %n
br i1 %cond, label %exit, label %loop
exit:
ret void
}
; In the function below some of the accesses are done as double types and some
; are done as i64 and i32 types. This is a negative test to ensure that they
; are not analyzed as in the tests above.
; CHECK-LABEL: function 'neg_dist_dep_type_size_equivalence':
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop.
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
; CHECK-EMPTY:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 ->
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
; CHECK-EMPTY:
; CHECK-NEXT: BackwardVectorizableButPreventsForwarding:
; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
; CHECK-NEXT: store double %val, ptr %gep.iv.101, align 8
; CHECK-EMPTY:
; CHECK-NEXT: ForwardButPreventsForwarding:
; CHECK-NEXT: store double %val, ptr %gep.iv.101, align 8 ->
; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8
; CHECK-EMPTY:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: store double %val, ptr %gep.iv.101, align 8 ->
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
entry:
br label %loop
loop:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
;; Load from vec[indvars.iv] as double
%gep.iv = getelementptr i64, ptr %vec, i64 %indvars.iv
%ld.f64 = load double, ptr %gep.iv, align 8
%val = fmul fast double %ld.f64, 5.0
;; Store to vec[indvars.iv + 101] as double
%indvars.iv.101 = add nsw i64 %indvars.iv, 101
%gep.iv.101 = getelementptr i64, ptr %vec, i64 %indvars.iv.101
store double %val, ptr %gep.iv.101, align 8
;; Read from vec[indvars.iv] as i64 creating
;; a forward but prevents forwarding dependence
;; with different types but same sizes.
%ld.i64 = load i64, ptr %gep.iv, align 8
;; Different sizes
%indvars.iv.n = add nuw nsw i64 %indvars.iv, %n
%gep.iv.n = getelementptr inbounds i64, ptr %vec, i64 %indvars.iv.n
%ld.i64.i32 = trunc i64 %ld.i64 to i32
store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
;; Loop condition.
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%cond = icmp eq i64 %indvars.iv.next, %n
br i1 %cond, label %exit, label %loop
exit:
ret void
}