forked from OSchip/llvm-project
[SLP] Fix for PR32036: Vectorized horizontal reduction returning wrong
result Summary: If the same value is used several times as an extra value, SLP vectorizer takes it into account only once instead of actual number of using. For example: ``` int val = 1; for (int y = 0; y < 8; y++) { for (int x = 0; x < 8; x++) { val = val + input[y * 8 + x] + 3; } } ``` We have 2 extra rguments: `1` - initial value of horizontal reduction and `3`, which is added 8*8 times to the reduction. Before the patch we added `1` to the reduction value and added once `3`, though it must be added 64 times. Reviewers: mkuper, mzolotukhin Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D30262 llvm-svn: 295949
This commit is contained in:
parent
39af790bb1
commit
68f2402c61
|
@ -304,6 +304,7 @@ public:
|
||||||
typedef SmallVector<Instruction *, 16> InstrList;
|
typedef SmallVector<Instruction *, 16> InstrList;
|
||||||
typedef SmallPtrSet<Value *, 16> ValueSet;
|
typedef SmallPtrSet<Value *, 16> ValueSet;
|
||||||
typedef SmallVector<StoreInst *, 8> StoreList;
|
typedef SmallVector<StoreInst *, 8> StoreList;
|
||||||
|
typedef MapVector<Value *, std::vector<DebugLoc>> ExtraValueToDebugLocsMap;
|
||||||
|
|
||||||
BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
|
BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
|
||||||
TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
|
TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
|
||||||
|
@ -333,7 +334,7 @@ public:
|
||||||
/// Vectorize the tree but with the list of externally used values \p
|
/// Vectorize the tree but with the list of externally used values \p
|
||||||
/// ExternallyUsedValues. Values in this MapVector can be replaced but the
|
/// ExternallyUsedValues. Values in this MapVector can be replaced but the
|
||||||
/// generated extractvalue instructions.
|
/// generated extractvalue instructions.
|
||||||
Value *vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues);
|
Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues);
|
||||||
|
|
||||||
/// \returns the cost incurred by unwanted spills and fills, caused by
|
/// \returns the cost incurred by unwanted spills and fills, caused by
|
||||||
/// holding live values over call sites.
|
/// holding live values over call sites.
|
||||||
|
@ -352,7 +353,7 @@ public:
|
||||||
/// into account (anf updating it, if required) list of externally used
|
/// into account (anf updating it, if required) list of externally used
|
||||||
/// values stored in \p ExternallyUsedValues.
|
/// values stored in \p ExternallyUsedValues.
|
||||||
void buildTree(ArrayRef<Value *> Roots,
|
void buildTree(ArrayRef<Value *> Roots,
|
||||||
MapVector<Value *, DebugLoc> &ExternallyUsedValues,
|
ExtraValueToDebugLocsMap &ExternallyUsedValues,
|
||||||
ArrayRef<Value *> UserIgnoreLst = None);
|
ArrayRef<Value *> UserIgnoreLst = None);
|
||||||
|
|
||||||
/// Clear the internal data structures that are created by 'buildTree'.
|
/// Clear the internal data structures that are created by 'buildTree'.
|
||||||
|
@ -953,11 +954,11 @@ private:
|
||||||
|
|
||||||
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
|
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
|
||||||
ArrayRef<Value *> UserIgnoreLst) {
|
ArrayRef<Value *> UserIgnoreLst) {
|
||||||
MapVector<Value *, DebugLoc> ExternallyUsedValues;
|
ExtraValueToDebugLocsMap ExternallyUsedValues;
|
||||||
buildTree(Roots, ExternallyUsedValues, UserIgnoreLst);
|
buildTree(Roots, ExternallyUsedValues, UserIgnoreLst);
|
||||||
}
|
}
|
||||||
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
|
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
|
||||||
MapVector<Value *, DebugLoc> &ExternallyUsedValues,
|
ExtraValueToDebugLocsMap &ExternallyUsedValues,
|
||||||
ArrayRef<Value *> UserIgnoreLst) {
|
ArrayRef<Value *> UserIgnoreLst) {
|
||||||
deleteTree();
|
deleteTree();
|
||||||
UserIgnoreList = UserIgnoreLst;
|
UserIgnoreList = UserIgnoreLst;
|
||||||
|
@ -2801,12 +2802,12 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, TreeEntry *E) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Value *BoUpSLP::vectorizeTree() {
|
Value *BoUpSLP::vectorizeTree() {
|
||||||
MapVector<Value *, DebugLoc> ExternallyUsedValues;
|
ExtraValueToDebugLocsMap ExternallyUsedValues;
|
||||||
return vectorizeTree(ExternallyUsedValues);
|
return vectorizeTree(ExternallyUsedValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
Value *
|
Value *
|
||||||
BoUpSLP::vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues) {
|
BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
|
||||||
|
|
||||||
// All blocks must be scheduled before any instructions are inserted.
|
// All blocks must be scheduled before any instructions are inserted.
|
||||||
for (auto &BSIter : BlocksSchedules) {
|
for (auto &BSIter : BlocksSchedules) {
|
||||||
|
@ -2868,7 +2869,6 @@ BoUpSLP::vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues) {
|
||||||
assert(ExternallyUsedValues.count(Scalar) &&
|
assert(ExternallyUsedValues.count(Scalar) &&
|
||||||
"Scalar with nullptr as an external user must be registered in "
|
"Scalar with nullptr as an external user must be registered in "
|
||||||
"ExternallyUsedValues map");
|
"ExternallyUsedValues map");
|
||||||
DebugLoc DL = ExternallyUsedValues[Scalar];
|
|
||||||
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
|
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
|
||||||
Builder.SetInsertPoint(VecI->getParent(),
|
Builder.SetInsertPoint(VecI->getParent(),
|
||||||
std::next(VecI->getIterator()));
|
std::next(VecI->getIterator()));
|
||||||
|
@ -2878,8 +2878,8 @@ BoUpSLP::vectorizeTree(MapVector<Value *, DebugLoc> &ExternallyUsedValues) {
|
||||||
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
|
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
|
||||||
Ex = extend(ScalarRoot, Ex, Scalar->getType());
|
Ex = extend(ScalarRoot, Ex, Scalar->getType());
|
||||||
CSEBlocks.insert(cast<Instruction>(Scalar)->getParent());
|
CSEBlocks.insert(cast<Instruction>(Scalar)->getParent());
|
||||||
|
std::swap(ExternallyUsedValues[Ex], ExternallyUsedValues[Scalar]);
|
||||||
ExternallyUsedValues.erase(Scalar);
|
ExternallyUsedValues.erase(Scalar);
|
||||||
ExternallyUsedValues[Ex] = DL;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4439,9 +4439,11 @@ public:
|
||||||
Builder.setFastMathFlags(Unsafe);
|
Builder.setFastMathFlags(Unsafe);
|
||||||
unsigned i = 0;
|
unsigned i = 0;
|
||||||
|
|
||||||
MapVector<Value *, DebugLoc> ExternallyUsedValues;
|
BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
|
||||||
|
// The same extra argument may be used several time, so log each attempt
|
||||||
|
// to use it.
|
||||||
for (auto &Pair : ExtraArgs)
|
for (auto &Pair : ExtraArgs)
|
||||||
ExternallyUsedValues[Pair.second] = Pair.first->getDebugLoc();
|
ExternallyUsedValues[Pair.second].push_back(Pair.first->getDebugLoc());
|
||||||
while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
|
while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
|
||||||
auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
|
auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
|
||||||
V.buildTree(VL, ExternallyUsedValues, ReductionOps);
|
V.buildTree(VL, ExternallyUsedValues, ReductionOps);
|
||||||
|
@ -4489,10 +4491,14 @@ public:
|
||||||
Builder.CreateBinOp(ReductionOpcode, VectorizedTree, I);
|
Builder.CreateBinOp(ReductionOpcode, VectorizedTree, I);
|
||||||
}
|
}
|
||||||
for (auto &Pair : ExternallyUsedValues) {
|
for (auto &Pair : ExternallyUsedValues) {
|
||||||
Builder.SetCurrentDebugLocation(Pair.second);
|
// Add each externally used value to the final reduction.
|
||||||
|
assert(!Pair.second.empty() && "At least one DebugLoc must be added.");
|
||||||
|
for (auto &DL : Pair.second) {
|
||||||
|
Builder.SetCurrentDebugLocation(DL);
|
||||||
VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree,
|
VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree,
|
||||||
Pair.first, "bin.extra");
|
Pair.first, "bin.extra");
|
||||||
}
|
}
|
||||||
|
}
|
||||||
// Update users.
|
// Update users.
|
||||||
if (ReductionPHI && !isa<UndefValue>(ReductionPHI)) {
|
if (ReductionPHI && !isa<UndefValue>(ReductionPHI)) {
|
||||||
assert(ReductionRoot && "Need a reduction operation");
|
assert(ReductionRoot && "Need a reduction operation");
|
||||||
|
|
|
@ -1473,9 +1473,10 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a
|
||||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
|
||||||
; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
|
; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
|
||||||
; CHECK-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], 5.000000e+00
|
; CHECK-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], 5.000000e+00
|
||||||
; CHECK-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], [[CONV]]
|
; CHECK-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], 5.000000e+00
|
||||||
|
; CHECK-NEXT: [[BIN_EXTRA7:%.*]] = fadd fast float [[BIN_EXTRA6]], [[CONV]]
|
||||||
; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
|
; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
|
||||||
; CHECK-NEXT: ret float [[BIN_EXTRA6]]
|
; CHECK-NEXT: ret float [[BIN_EXTRA7]]
|
||||||
;
|
;
|
||||||
; THRESHOLD-LABEL: @extra_args_same_several_times(
|
; THRESHOLD-LABEL: @extra_args_same_several_times(
|
||||||
; THRESHOLD-NEXT: entry:
|
; THRESHOLD-NEXT: entry:
|
||||||
|
@ -1510,9 +1511,10 @@ define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a
|
||||||
; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
|
; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
|
||||||
; THRESHOLD-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
|
; THRESHOLD-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
|
||||||
; THRESHOLD-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], 5.000000e+00
|
; THRESHOLD-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], 5.000000e+00
|
||||||
; THRESHOLD-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], [[CONV]]
|
; THRESHOLD-NEXT: [[BIN_EXTRA6:%.*]] = fadd fast float [[BIN_EXTRA5]], 5.000000e+00
|
||||||
|
; THRESHOLD-NEXT: [[BIN_EXTRA7:%.*]] = fadd fast float [[BIN_EXTRA6]], [[CONV]]
|
||||||
; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
|
; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
|
||||||
; THRESHOLD-NEXT: ret float [[BIN_EXTRA6]]
|
; THRESHOLD-NEXT: ret float [[BIN_EXTRA7]]
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
%mul = mul nsw i32 %b, %a
|
%mul = mul nsw i32 %b, %a
|
||||||
|
|
Loading…
Reference in New Issue