forked from OSchip/llvm-project
[OpaquePtr][LoopAccessAnalysis] Support opaque pointers
Previously we relied on the pointee type to determine what type we need to do runtime pointer access checks. With opaque pointers, we can access a pointer with more than one type, so now we keep track of all the types we're accessing a pointer's memory with. Also some other minor getPointerElementType() removals. Reviewed By: #opaque-pointers, nikic Differential Revision: https://reviews.llvm.org/D119047
This commit is contained in:
parent
022baf71ed
commit
ff31020ee6
|
@ -406,8 +406,8 @@ public:
|
|||
/// according to the assumptions that we've made during the analysis.
|
||||
/// The method might also version the pointer stride according to \p Strides,
|
||||
/// and add new predicates to \p PSE.
|
||||
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
|
||||
unsigned ASId, const ValueToValueMap &Strides,
|
||||
void insert(Loop *Lp, Value *Ptr, Type *AccessTy, bool WritePtr,
|
||||
unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides,
|
||||
PredicatedScalarEvolution &PSE);
|
||||
|
||||
/// No run-time memory checking is necessary.
|
||||
|
|
|
@ -189,8 +189,9 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
|
|||
///
|
||||
/// There is no conflict when the intervals are disjoint:
|
||||
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
|
||||
void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
|
||||
unsigned DepSetId, unsigned ASId,
|
||||
void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, Type *AccessTy,
|
||||
bool WritePtr, unsigned DepSetId,
|
||||
unsigned ASId,
|
||||
const ValueToValueMap &Strides,
|
||||
PredicatedScalarEvolution &PSE) {
|
||||
// Get the stride replaced scev.
|
||||
|
@ -227,8 +228,7 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
|
|||
// Add the size of the pointed element to ScEnd.
|
||||
auto &DL = Lp->getHeader()->getModule()->getDataLayout();
|
||||
Type *IdxTy = DL.getIndexType(Ptr->getType());
|
||||
const SCEV *EltSizeSCEV =
|
||||
SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType());
|
||||
const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
|
||||
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
|
||||
|
||||
Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
|
||||
|
@ -522,19 +522,19 @@ public:
|
|||
: TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {}
|
||||
|
||||
/// Register a load and whether it is only read from.
|
||||
void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
|
||||
void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
|
||||
Value *Ptr = const_cast<Value*>(Loc.Ptr);
|
||||
AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
|
||||
Accesses.insert(MemAccessInfo(Ptr, false));
|
||||
Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
|
||||
if (IsReadOnly)
|
||||
ReadOnlyPtr.insert(Ptr);
|
||||
}
|
||||
|
||||
/// Register a store.
|
||||
void addStore(MemoryLocation &Loc) {
|
||||
void addStore(MemoryLocation &Loc, Type *AccessTy) {
|
||||
Value *Ptr = const_cast<Value*>(Loc.Ptr);
|
||||
AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
|
||||
Accesses.insert(MemAccessInfo(Ptr, true));
|
||||
Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
|
||||
}
|
||||
|
||||
/// Check if we can emit a run-time no-alias check for \p Access.
|
||||
|
@ -545,12 +545,11 @@ public:
|
|||
/// we will attempt to use additional run-time checks in order to get
|
||||
/// the bounds of the pointer.
|
||||
bool createCheckForAccess(RuntimePointerChecking &RtCheck,
|
||||
MemAccessInfo Access,
|
||||
MemAccessInfo Access, Type *AccessTy,
|
||||
const ValueToValueMap &Strides,
|
||||
DenseMap<Value *, unsigned> &DepSetId,
|
||||
Loop *TheLoop, unsigned &RunningDepId,
|
||||
unsigned ASId, bool ShouldCheckStride,
|
||||
bool Assume);
|
||||
unsigned ASId, bool ShouldCheckStride, bool Assume);
|
||||
|
||||
/// Check whether we can check the pointers at runtime for
|
||||
/// non-intersection.
|
||||
|
@ -583,14 +582,15 @@ public:
|
|||
MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
|
||||
|
||||
private:
|
||||
typedef SetVector<MemAccessInfo> PtrAccessSet;
|
||||
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
|
||||
|
||||
/// Go over all memory access and check whether runtime pointer checks
|
||||
/// are needed and build sets of dependency check candidates.
|
||||
void processMemAccesses();
|
||||
|
||||
/// Set of all accesses.
|
||||
PtrAccessSet Accesses;
|
||||
/// Map of all accesses. Values are the types used to access memory pointed to
|
||||
/// by the pointer.
|
||||
PtrAccessMap Accesses;
|
||||
|
||||
/// The loop being checked.
|
||||
const Loop *TheLoop;
|
||||
|
@ -652,12 +652,12 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
|
|||
|
||||
/// Check whether a pointer address cannot wrap.
|
||||
static bool isNoWrap(PredicatedScalarEvolution &PSE,
|
||||
const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
|
||||
const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy,
|
||||
Loop *L) {
|
||||
const SCEV *PtrScev = PSE.getSCEV(Ptr);
|
||||
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
|
||||
return true;
|
||||
|
||||
Type *AccessTy = Ptr->getType()->getPointerElementType();
|
||||
int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
|
||||
if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
|
||||
return true;
|
||||
|
@ -689,7 +689,7 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
|
|||
}
|
||||
|
||||
bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
|
||||
MemAccessInfo Access,
|
||||
MemAccessInfo Access, Type *AccessTy,
|
||||
const ValueToValueMap &StridesMap,
|
||||
DenseMap<Value *, unsigned> &DepSetId,
|
||||
Loop *TheLoop, unsigned &RunningDepId,
|
||||
|
@ -702,7 +702,7 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
|
|||
|
||||
// When we run after a failing dependency check we have to make sure
|
||||
// we don't have wrapping pointers.
|
||||
if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
|
||||
if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
|
||||
auto *Expr = PSE.getSCEV(Ptr);
|
||||
if (!Assume || !isa<SCEVAddRecExpr>(Expr))
|
||||
return false;
|
||||
|
@ -723,11 +723,11 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
|
|||
DepId = RunningDepId++;
|
||||
|
||||
bool IsWrite = Access.getInt();
|
||||
RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
|
||||
RtCheck.insert(TheLoop, Ptr, AccessTy, IsWrite, DepId, ASId, StridesMap, PSE);
|
||||
LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
|
||||
ScalarEvolution *SE, Loop *TheLoop,
|
||||
|
@ -788,12 +788,15 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
|
|||
}
|
||||
|
||||
for (auto &Access : AccessInfos) {
|
||||
if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
|
||||
RunningDepId, ASId, ShouldCheckWrap, false)) {
|
||||
LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
|
||||
<< *Access.getPointer() << '\n');
|
||||
Retries.push_back(Access);
|
||||
CanDoAliasSetRT = false;
|
||||
for (auto &AccessTy : Accesses[Access]) {
|
||||
if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
|
||||
DepSetId, TheLoop, RunningDepId, ASId,
|
||||
ShouldCheckWrap, false)) {
|
||||
LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
|
||||
<< *Access.getPointer() << '\n');
|
||||
Retries.push_back(Access);
|
||||
CanDoAliasSetRT = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -815,13 +818,16 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
|
|||
// We know that we need these checks, so we can now be more aggressive
|
||||
// and add further checks if required (overflow checks).
|
||||
CanDoAliasSetRT = true;
|
||||
for (auto Access : Retries)
|
||||
if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
|
||||
TheLoop, RunningDepId, ASId,
|
||||
ShouldCheckWrap, /*Assume=*/true)) {
|
||||
CanDoAliasSetRT = false;
|
||||
break;
|
||||
for (auto Access : Retries) {
|
||||
for (auto &AccessTy : Accesses[Access]) {
|
||||
if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
|
||||
DepSetId, TheLoop, RunningDepId, ASId,
|
||||
ShouldCheckWrap, /*Assume=*/true)) {
|
||||
CanDoAliasSetRT = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CanDoRT &= CanDoAliasSetRT;
|
||||
|
@ -886,9 +892,12 @@ void AccessAnalysis::processMemAccesses() {
|
|||
LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
|
||||
LLVM_DEBUG({
|
||||
for (auto A : Accesses)
|
||||
dbgs() << "\t" << *A.getPointer() << " (" <<
|
||||
(A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
|
||||
"read-only" : "read")) << ")\n";
|
||||
dbgs() << "\t" << *A.first.getPointer() << " ("
|
||||
<< (A.first.getInt()
|
||||
? "write"
|
||||
: (ReadOnlyPtr.count(A.first.getPointer()) ? "read-only"
|
||||
: "read"))
|
||||
<< ")\n";
|
||||
});
|
||||
|
||||
// The AliasSetTracker has nicely partitioned our pointers by metadata
|
||||
|
@ -907,13 +916,13 @@ void AccessAnalysis::processMemAccesses() {
|
|||
UnderlyingObjToAccessMap ObjToLastAccess;
|
||||
|
||||
// Set of access to check after all writes have been processed.
|
||||
PtrAccessSet DeferredAccesses;
|
||||
PtrAccessMap DeferredAccesses;
|
||||
|
||||
// Iterate over each alias set twice, once to process read/write pointers,
|
||||
// and then to process read-only pointers.
|
||||
for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
|
||||
bool UseDeferred = SetIteration > 0;
|
||||
PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
|
||||
PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
|
||||
|
||||
for (const auto &AV : AS) {
|
||||
Value *Ptr = AV.getValue();
|
||||
|
@ -921,10 +930,10 @@ void AccessAnalysis::processMemAccesses() {
|
|||
// For a single memory access in AliasSetTracker, Accesses may contain
|
||||
// both read and write, and they both need to be handled for CheckDeps.
|
||||
for (const auto &AC : S) {
|
||||
if (AC.getPointer() != Ptr)
|
||||
if (AC.first.getPointer() != Ptr)
|
||||
continue;
|
||||
|
||||
bool IsWrite = AC.getInt();
|
||||
bool IsWrite = AC.first.getInt();
|
||||
|
||||
// If we're using the deferred access set, then it contains only
|
||||
// reads.
|
||||
|
@ -946,7 +955,9 @@ void AccessAnalysis::processMemAccesses() {
|
|||
// consecutive as "read-only" pointers (so that we check
|
||||
// "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
|
||||
if (!UseDeferred && IsReadOnlyPtr) {
|
||||
DeferredAccesses.insert(Access);
|
||||
// We only use the pointer keys, the types vector values don't
|
||||
// matter.
|
||||
DeferredAccesses.insert({Access, {}});
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1518,8 +1529,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
|
|||
Value *BPtr = B.getPointer();
|
||||
bool AIsWrite = A.getInt();
|
||||
bool BIsWrite = B.getInt();
|
||||
Type *ATy = APtr->getType()->getPointerElementType();
|
||||
Type *BTy = BPtr->getType()->getPointerElementType();
|
||||
Type *ATy = getLoadStoreType(InstMap[AIdx]);
|
||||
Type *BTy = getLoadStoreType(InstMap[BIdx]);
|
||||
|
||||
// Two reads are independent.
|
||||
if (!AIsWrite && !BIsWrite)
|
||||
|
@ -1842,8 +1853,6 @@ bool LoopAccessInfo::canAnalyzeLoop() {
|
|||
void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
|
||||
const TargetLibraryInfo *TLI,
|
||||
DominatorTree *DT) {
|
||||
typedef SmallPtrSet<Value*, 16> ValueSet;
|
||||
|
||||
// Holds the Load and Store instructions.
|
||||
SmallVector<LoadInst *, 16> Loads;
|
||||
SmallVector<StoreInst *, 16> Stores;
|
||||
|
@ -1975,11 +1984,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
|
|||
// for read and once for write, it will only appear once (on the write
|
||||
// list). This is okay, since we are going to check for conflicts between
|
||||
// writes and between reads and writes, but not between reads and reads.
|
||||
ValueSet Seen;
|
||||
SmallSet<std::pair<Value *, Type *>, 16> Seen;
|
||||
|
||||
// Record uniform store addresses to identify if we have multiple stores
|
||||
// to the same address.
|
||||
ValueSet UniformStores;
|
||||
SmallPtrSet<Value *, 16> UniformStores;
|
||||
|
||||
for (StoreInst *ST : Stores) {
|
||||
Value *Ptr = ST->getPointerOperand();
|
||||
|
@ -1990,7 +1999,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
|
|||
|
||||
// If we did *not* see this pointer before, insert it to the read-write
|
||||
// list. At this phase it is only a 'write' list.
|
||||
if (Seen.insert(Ptr).second) {
|
||||
Type *AccessTy = getLoadStoreType(ST);
|
||||
if (Seen.insert({Ptr, AccessTy}).second) {
|
||||
++NumReadWrites;
|
||||
|
||||
MemoryLocation Loc = MemoryLocation::get(ST);
|
||||
|
@ -2001,9 +2011,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
|
|||
Loc.AATags.TBAA = nullptr;
|
||||
|
||||
visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
|
||||
[&Accesses, Loc](Value *Ptr) {
|
||||
[&Accesses, AccessTy, Loc](Value *Ptr) {
|
||||
MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
|
||||
Accesses.addStore(NewLoc);
|
||||
Accesses.addStore(NewLoc, AccessTy);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -2027,7 +2037,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
|
|||
// read a few words, modify, and write a few words, and some of the
|
||||
// words may be written to the same address.
|
||||
bool IsReadOnlyPtr = false;
|
||||
if (Seen.insert(Ptr).second ||
|
||||
Type *AccessTy = getLoadStoreType(LD);
|
||||
if (Seen.insert({Ptr, AccessTy}).second ||
|
||||
!getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
|
||||
++NumReads;
|
||||
IsReadOnlyPtr = true;
|
||||
|
@ -2049,9 +2060,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
|
|||
Loc.AATags.TBAA = nullptr;
|
||||
|
||||
visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
|
||||
[&Accesses, Loc, IsReadOnlyPtr](Value *Ptr) {
|
||||
[&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
|
||||
MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
|
||||
Accesses.addLoad(NewLoc, IsReadOnlyPtr);
|
||||
Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,179 @@
|
|||
; RUN: opt -S -disable-output --opaque-pointers -passes='print-access-info' < %s 2>&1 | FileCheck %s
|
||||
|
||||
; In the function below some of the accesses are done as float types and some
|
||||
; are done as i32 types. When doing dependence analysis the type should not
|
||||
; matter if it can be determined that they are the same size.
|
||||
|
||||
%int_pair = type { i32, i32 }
|
||||
|
||||
; CHECK-LABEL: function 'backdep_type_size_equivalence':
|
||||
; CHECK-NEXT: loop:
|
||||
; CHECK-NEXT: Memory dependences are safe with a maximum dependence distance of 800 bytes
|
||||
; CHECK-NEXT: Dependences:
|
||||
; CHECK-NEXT: Forward:
|
||||
; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 ->
|
||||
; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Forward:
|
||||
; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 ->
|
||||
; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: BackwardVectorizable:
|
||||
; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 ->
|
||||
; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Run-time memory checks:
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
|
||||
define void @backdep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
|
||||
|
||||
;; Load from vec[indvars.iv].x as float
|
||||
%gep.iv = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv, i32 0
|
||||
%ld.f32 = load float, ptr %gep.iv, align 8
|
||||
%val = fmul fast float %ld.f32, 5.0
|
||||
|
||||
;; Store to vec[indvars.iv - 100].x as float
|
||||
%indvars.iv.min.100 = add nsw i64 %indvars.iv, -100
|
||||
%gep.iv.min.100 = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv.min.100, i32 0
|
||||
store float %val, ptr %gep.iv.min.100, align 8
|
||||
|
||||
;; Store to vec[indvars.iv].x as i32, creating a backward dependency between
|
||||
;; the two stores with different element types but the same element size.
|
||||
%indvars.iv.i32 = trunc i64 %indvars.iv to i32
|
||||
store i32 %indvars.iv.i32, ptr %gep.iv, align 8
|
||||
|
||||
;; Store to vec[indvars.iv].y as i32, strided accesses should be independent
|
||||
;; between the two stores with different element types but the same element size.
|
||||
%gep.iv.1 = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv, i32 1
|
||||
store i32 %indvars.iv.i32, ptr %gep.iv.1, align 8
|
||||
|
||||
;; Store to vec[indvars.iv + n].y as i32, to verify no dependence in the case
|
||||
;; of unknown dependence distance.
|
||||
%indvars.iv.n = add nuw nsw i64 %indvars.iv, %n
|
||||
%gep.iv.n = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv.n, i32 1
|
||||
store i32 %indvars.iv.i32, ptr %gep.iv.n, align 8
|
||||
|
||||
;; Loop condition.
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%cond = icmp eq i64 %indvars.iv.next, %n
|
||||
br i1 %cond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; In the function below one of the accesses is done as i19 type, which has a
|
||||
; different store size than the i32 type, even though their alloc sizes are
|
||||
; equivalent. This is a negative test to ensure that they are not analyzed as
|
||||
; in the tests above.
|
||||
;
|
||||
; CHECK-LABEL: function 'backdep_type_store_size_equivalence':
|
||||
; CHECK-NEXT: loop:
|
||||
; CHECK-NEXT: Report: unsafe dependent memory operations in loop.
|
||||
; CHECK-NEXT: Unknown data dependence.
|
||||
; CHECK-NEXT: Dependences:
|
||||
; CHECK-NEXT: Unknown:
|
||||
; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 ->
|
||||
; CHECK-NEXT: store i19 %indvars.iv.i19, ptr %gep.iv, align 8
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Run-time memory checks:
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
|
||||
define void @backdep_type_store_size_equivalence(ptr nocapture %vec, i64 %n) {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
|
||||
|
||||
;; Load from vec[indvars.iv].x as float
|
||||
%gep.iv = getelementptr inbounds %int_pair, ptr %vec, i64 %indvars.iv, i32 0
|
||||
%ld.f32 = load float, ptr %gep.iv, align 8
|
||||
%val = fmul fast float %ld.f32, 5.0
|
||||
|
||||
;; Store to vec[indvars.iv].x as i19.
|
||||
%indvars.iv.i19 = trunc i64 %indvars.iv to i19
|
||||
store i19 %indvars.iv.i19, ptr %gep.iv, align 8
|
||||
|
||||
;; Loop condition.
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%cond = icmp eq i64 %indvars.iv.next, %n
|
||||
br i1 %cond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; In the function below some of the accesses are done as double types and some
|
||||
; are done as i64 and i32 types. This is a negative test to ensure that they
|
||||
; are not analyzed as in the tests above.
|
||||
|
||||
; CHECK-LABEL: function 'neg_dist_dep_type_size_equivalence':
|
||||
; CHECK-NEXT: loop:
|
||||
; CHECK-NEXT: Report: unsafe dependent memory operations in loop.
|
||||
; CHECK-NEXT: Unknown data dependence.
|
||||
; CHECK-NEXT: Dependences:
|
||||
; CHECK-NEXT: Unknown:
|
||||
; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
|
||||
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Unknown:
|
||||
; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 ->
|
||||
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: BackwardVectorizableButPreventsForwarding:
|
||||
; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
|
||||
; CHECK-NEXT: store double %val, ptr %gep.iv.101, align 8
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: ForwardButPreventsForwarding:
|
||||
; CHECK-NEXT: store double %val, ptr %gep.iv.101, align 8 ->
|
||||
; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Unknown:
|
||||
; CHECK-NEXT: store double %val, ptr %gep.iv.101, align 8 ->
|
||||
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: Run-time memory checks:
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
|
||||
define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
|
||||
|
||||
;; Load from vec[indvars.iv] as double
|
||||
%gep.iv = getelementptr i64, ptr %vec, i64 %indvars.iv
|
||||
%ld.f64 = load double, ptr %gep.iv, align 8
|
||||
%val = fmul fast double %ld.f64, 5.0
|
||||
|
||||
;; Store to vec[indvars.iv + 101] as double
|
||||
%indvars.iv.101 = add nsw i64 %indvars.iv, 101
|
||||
%gep.iv.101 = getelementptr i64, ptr %vec, i64 %indvars.iv.101
|
||||
store double %val, ptr %gep.iv.101, align 8
|
||||
|
||||
;; Read from vec[indvars.iv] as i64 creating
|
||||
;; a forward but prevents forwarding dependence
|
||||
;; with different types but same sizes.
|
||||
%ld.i64 = load i64, ptr %gep.iv, align 8
|
||||
|
||||
;; Different sizes
|
||||
%indvars.iv.n = add nuw nsw i64 %indvars.iv, %n
|
||||
%gep.iv.n = getelementptr inbounds i64, ptr %vec, i64 %indvars.iv.n
|
||||
%ld.i64.i32 = trunc i64 %ld.i64 to i32
|
||||
store i32 %ld.i64.i32, ptr %gep.iv.n, align 8
|
||||
|
||||
;; Loop condition.
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%cond = icmp eq i64 %indvars.iv.next, %n
|
||||
br i1 %cond, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue