[LAA] Introduce enum for vectorization safety status (NFC).

This patch adds a VectorizationSafetyStatus enum, which will be extended
in a follow up patch to distinguish between 'safe with runtime checks'
and 'known unsafe' dependences.

Reviewers: anemet, anna, Ayal, hsaito

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D54892

llvm-svn: 349556
This commit is contained in:
Florian Hahn 2018-12-18 22:25:11 +00:00
parent 4e4920694c
commit 485f2826ba
3 changed files with 76 additions and 13 deletions

View File

@ -97,6 +97,17 @@ public:
/// Set of potential dependent memory accesses.
typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
/// Type to keep track of the status of the dependence check. The order of
/// the elements is important and has to be from most permissive to least
/// permissive.
enum class VectorizationSafetyStatus {
// Can vectorize safely without RT checks. All dependences are known to be
// safe.
Safe,
// Cannot vectorize due to unsafe or unknown dependencies.
Unsafe,
};
/// Dependece between memory access instructions.
struct Dependence {
/// The type of the dependence.
@ -146,7 +157,7 @@ public:
Instruction *getDestination(const LoopAccessInfo &LAI) const;
/// Dependence types that don't prevent vectorization.
static bool isSafeForVectorization(DepType Type);
static VectorizationSafetyStatus isSafeForVectorization(DepType Type);
/// Lexically forward dependence.
bool isForward() const;
@ -164,8 +175,8 @@ public:
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
: PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeRegisterWidth(-1U),
ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true),
RecordDependences(true) {}
ShouldRetryWithRuntimeCheck(false),
Status(VectorizationSafetyStatus::Safe), RecordDependences(true) {}
/// Register the location (instructions are given increasing numbers)
/// of a write access.
@ -193,7 +204,9 @@ public:
/// No memory dependence was encountered that would inhibit
/// vectorization.
bool isSafeForVectorization() const { return SafeForVectorization; }
bool isSafeForVectorization() const {
return Status == VectorizationSafetyStatus::Safe;
}
/// The maximum number of bytes of a vector register we can vectorize
/// the accesses safely with.
@ -269,9 +282,9 @@ private:
/// vectorize this loop with runtime checks.
bool ShouldRetryWithRuntimeCheck;
/// No memory dependence was encountered that would inhibit
/// vectorization.
bool SafeForVectorization;
/// Result of the dependence checks, indicating whether the checked
/// dependences are safe for vectorization or not.
VectorizationSafetyStatus Status;
//// True if Dependences reflects the dependences in the
//// loop. If false we exceeded MaxDependences and
@ -304,6 +317,10 @@ private:
/// \return false if we shouldn't vectorize at all or avoid larger
/// vectorization factors by limiting MaxSafeDepDistBytes.
bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
/// Updates the current safety status with \p S. We can go from Safe to
/// to Unsafe.
void mergeInStatus(VectorizationSafetyStatus S);
};
/// Holds information about the memory runtime legality checks to verify

View File

@ -1221,18 +1221,19 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
return X == PtrSCEVB;
}
bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
MemoryDepChecker::VectorizationSafetyStatus
MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
switch (Type) {
case NoDep:
case Forward:
case BackwardVectorizable:
return true;
return VectorizationSafetyStatus::Safe;
case Unknown:
case ForwardButPreventsForwarding:
case Backward:
case BackwardVectorizableButPreventsForwarding:
return false;
return VectorizationSafetyStatus::Unsafe;
}
llvm_unreachable("unexpected DepType!");
}
@ -1317,6 +1318,11 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
return false;
}
void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
if (Status < S)
Status = S;
}
/// Given a non-constant (unknown) dependence-distance \p Dist between two
/// memory accesses, that have the same stride whose absolute value is given
/// in \p Stride, and that have the same type size \p TypeByteSize,
@ -1652,7 +1658,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
Dependence::DepType Type =
isDependent(*A.first, A.second, *B.first, B.second, Strides);
SafeForVectorization &= Dependence::isSafeForVectorization(Type);
mergeInStatus(Dependence::isSafeForVectorization(Type));
// Gather dependences unless we accumulated MaxDependences
// dependences. In that case return as soon as we find the first
@ -1669,7 +1675,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
<< "Too many dependences, stopped recording\n");
}
}
if (!RecordDependences && !SafeForVectorization)
if (!RecordDependences && !isSafeForVectorization())
return false;
}
++OI;
@ -1679,7 +1685,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
}
LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
return SafeForVectorization;
return isSafeForVectorization();
}
SmallVector<Instruction *, 4>

View File

@ -117,6 +117,46 @@ loopexit:
ret void
}
; Check we do generate unnecessary runtime checks. They will always fail.
; void test_runtime_check2(float *a, float b, unsigned offset, unsigned offset2, unsigned n, float *c) {
; for (unsigned i = 1; i < n; i++) {
; a[i+o1] += a[i+o2] + b;
; c[i] = c[i-1] + b;
; }
; }
;
; CHECK-LABEL: test_runtime_check2
; CHECK: <4 x float>
define void @test_runtime_check2(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n, float* %c) {
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%ind.sum = add i64 %iv, %offset
%arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum
%l1 = load float, float* %arr.idx, align 4
%ind.sum2 = add i64 %iv, %offset2
%arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2
%l2 = load float, float* %arr.idx2, align 4
%m = fmul fast float %b, %l2
%ad = fadd fast float %l1, %m
store float %ad, float* %arr.idx, align 4
%c.ind = add i64 %iv, -1
%c.idx = getelementptr inbounds float, float* %c, i64 %c.ind
%lc = load float, float* %c.idx, align 4
%vc = fadd float %lc, 1.0
%c.idx2 = getelementptr inbounds float, float* %c, i64 %iv
store float %vc, float* %c.idx2
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, %n
br i1 %exitcond, label %loopexit, label %for.body
loopexit:
ret void
}
; CHECK: !9 = !DILocation(line: 101, column: 1, scope: !{{.*}})
!llvm.module.flags = !{!0, !1}