forked from OSchip/llvm-project
[LAA] Introduce enum for vectorization safety status (NFC).
This patch adds a VectorizationSafetyStatus enum, which will be extended in a follow up patch to distinguish between 'safe with runtime checks' and 'known unsafe' dependences. Reviewers: anemet, anna, Ayal, hsaito Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D54892 llvm-svn: 349556
This commit is contained in:
parent
4e4920694c
commit
485f2826ba
|
@ -97,6 +97,17 @@ public:
|
|||
/// Set of potential dependent memory accesses.
|
||||
typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
|
||||
|
||||
/// Type to keep track of the status of the dependence check. The order of
|
||||
/// the elements is important and has to be from most permissive to least
|
||||
/// permissive.
|
||||
enum class VectorizationSafetyStatus {
|
||||
// Can vectorize safely without RT checks. All dependences are known to be
|
||||
// safe.
|
||||
Safe,
|
||||
// Cannot vectorize due to unsafe or unknown dependencies.
|
||||
Unsafe,
|
||||
};
|
||||
|
||||
/// Dependece between memory access instructions.
|
||||
struct Dependence {
|
||||
/// The type of the dependence.
|
||||
|
@ -146,7 +157,7 @@ public:
|
|||
Instruction *getDestination(const LoopAccessInfo &LAI) const;
|
||||
|
||||
/// Dependence types that don't prevent vectorization.
|
||||
static bool isSafeForVectorization(DepType Type);
|
||||
static VectorizationSafetyStatus isSafeForVectorization(DepType Type);
|
||||
|
||||
/// Lexically forward dependence.
|
||||
bool isForward() const;
|
||||
|
@ -164,8 +175,8 @@ public:
|
|||
|
||||
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
|
||||
: PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeRegisterWidth(-1U),
|
||||
ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true),
|
||||
RecordDependences(true) {}
|
||||
ShouldRetryWithRuntimeCheck(false),
|
||||
Status(VectorizationSafetyStatus::Safe), RecordDependences(true) {}
|
||||
|
||||
/// Register the location (instructions are given increasing numbers)
|
||||
/// of a write access.
|
||||
|
@ -193,7 +204,9 @@ public:
|
|||
|
||||
/// No memory dependence was encountered that would inhibit
|
||||
/// vectorization.
|
||||
bool isSafeForVectorization() const { return SafeForVectorization; }
|
||||
bool isSafeForVectorization() const {
|
||||
return Status == VectorizationSafetyStatus::Safe;
|
||||
}
|
||||
|
||||
/// The maximum number of bytes of a vector register we can vectorize
|
||||
/// the accesses safely with.
|
||||
|
@ -269,9 +282,9 @@ private:
|
|||
/// vectorize this loop with runtime checks.
|
||||
bool ShouldRetryWithRuntimeCheck;
|
||||
|
||||
/// No memory dependence was encountered that would inhibit
|
||||
/// vectorization.
|
||||
bool SafeForVectorization;
|
||||
/// Result of the dependence checks, indicating whether the checked
|
||||
/// dependences are safe for vectorization or not.
|
||||
VectorizationSafetyStatus Status;
|
||||
|
||||
//// True if Dependences reflects the dependences in the
|
||||
//// loop. If false we exceeded MaxDependences and
|
||||
|
@ -304,6 +317,10 @@ private:
|
|||
/// \return false if we shouldn't vectorize at all or avoid larger
|
||||
/// vectorization factors by limiting MaxSafeDepDistBytes.
|
||||
bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
|
||||
|
||||
/// Updates the current safety status with \p S. We can go from Safe to
|
||||
/// to Unsafe.
|
||||
void mergeInStatus(VectorizationSafetyStatus S);
|
||||
};
|
||||
|
||||
/// Holds information about the memory runtime legality checks to verify
|
||||
|
|
|
@ -1221,18 +1221,19 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
|
|||
return X == PtrSCEVB;
|
||||
}
|
||||
|
||||
bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
|
||||
MemoryDepChecker::VectorizationSafetyStatus
|
||||
MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
|
||||
switch (Type) {
|
||||
case NoDep:
|
||||
case Forward:
|
||||
case BackwardVectorizable:
|
||||
return true;
|
||||
return VectorizationSafetyStatus::Safe;
|
||||
|
||||
case Unknown:
|
||||
case ForwardButPreventsForwarding:
|
||||
case Backward:
|
||||
case BackwardVectorizableButPreventsForwarding:
|
||||
return false;
|
||||
return VectorizationSafetyStatus::Unsafe;
|
||||
}
|
||||
llvm_unreachable("unexpected DepType!");
|
||||
}
|
||||
|
@ -1317,6 +1318,11 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
|
|||
return false;
|
||||
}
|
||||
|
||||
void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
|
||||
if (Status < S)
|
||||
Status = S;
|
||||
}
|
||||
|
||||
/// Given a non-constant (unknown) dependence-distance \p Dist between two
|
||||
/// memory accesses, that have the same stride whose absolute value is given
|
||||
/// in \p Stride, and that have the same type size \p TypeByteSize,
|
||||
|
@ -1652,7 +1658,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
|
|||
|
||||
Dependence::DepType Type =
|
||||
isDependent(*A.first, A.second, *B.first, B.second, Strides);
|
||||
SafeForVectorization &= Dependence::isSafeForVectorization(Type);
|
||||
mergeInStatus(Dependence::isSafeForVectorization(Type));
|
||||
|
||||
// Gather dependences unless we accumulated MaxDependences
|
||||
// dependences. In that case return as soon as we find the first
|
||||
|
@ -1669,7 +1675,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
|
|||
<< "Too many dependences, stopped recording\n");
|
||||
}
|
||||
}
|
||||
if (!RecordDependences && !SafeForVectorization)
|
||||
if (!RecordDependences && !isSafeForVectorization())
|
||||
return false;
|
||||
}
|
||||
++OI;
|
||||
|
@ -1679,7 +1685,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
|
|||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
|
||||
return SafeForVectorization;
|
||||
return isSafeForVectorization();
|
||||
}
|
||||
|
||||
SmallVector<Instruction *, 4>
|
||||
|
|
|
@ -117,6 +117,46 @@ loopexit:
|
|||
ret void
|
||||
}
|
||||
|
||||
; Check we do generate unnecessary runtime checks. They will always fail.
|
||||
|
||||
; void test_runtime_check2(float *a, float b, unsigned offset, unsigned offset2, unsigned n, float *c) {
|
||||
; for (unsigned i = 1; i < n; i++) {
|
||||
; a[i+o1] += a[i+o2] + b;
|
||||
; c[i] = c[i-1] + b;
|
||||
; }
|
||||
; }
|
||||
;
|
||||
; CHECK-LABEL: test_runtime_check2
|
||||
; CHECK: <4 x float>
|
||||
define void @test_runtime_check2(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n, float* %c) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%ind.sum = add i64 %iv, %offset
|
||||
%arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum
|
||||
%l1 = load float, float* %arr.idx, align 4
|
||||
%ind.sum2 = add i64 %iv, %offset2
|
||||
%arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2
|
||||
%l2 = load float, float* %arr.idx2, align 4
|
||||
%m = fmul fast float %b, %l2
|
||||
%ad = fadd fast float %l1, %m
|
||||
store float %ad, float* %arr.idx, align 4
|
||||
%c.ind = add i64 %iv, -1
|
||||
%c.idx = getelementptr inbounds float, float* %c, i64 %c.ind
|
||||
%lc = load float, float* %c.idx, align 4
|
||||
%vc = fadd float %lc, 1.0
|
||||
%c.idx2 = getelementptr inbounds float, float* %c, i64 %iv
|
||||
store float %vc, float* %c.idx2
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond = icmp eq i64 %iv.next, %n
|
||||
br i1 %exitcond, label %loopexit, label %for.body
|
||||
|
||||
loopexit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: !9 = !DILocation(line: 101, column: 1, scope: !{{.*}})
|
||||
|
||||
!llvm.module.flags = !{!0, !1}
|
||||
|
|
Loading…
Reference in New Issue