forked from OSchip/llvm-project
[SLP]Fix PR39774: Set ReductionRoot if the original instruction is vectorized.
Summary: If the original reduction root instruction was vectorized, it might be removed from the tree. It means that the insertion point may become invalidated and the whole vectorization of the reduction leads to the incorrect output result. The ReductionRoot instruction must be marked as externally used so it could not be removed. Otherwise it might cause inconsistency with the cost model and we may end up with too optimistic optimization. Reviewers: RKSimon, spatel, hfinkel, mkuper Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D54955 llvm-svn: 347759
This commit is contained in:
parent
30ce962732
commit
579c2d9d64
|
@ -5453,7 +5453,7 @@ class HorizontalReduction {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
Instruction *ReductionRoot = nullptr;
|
WeakTrackingVH ReductionRoot;
|
||||||
|
|
||||||
/// The operation data of the reduction operation.
|
/// The operation data of the reduction operation.
|
||||||
OperationData ReductionData;
|
OperationData ReductionData;
|
||||||
|
@ -5738,7 +5738,7 @@ public:
|
||||||
unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
|
unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
|
||||||
|
|
||||||
Value *VectorizedTree = nullptr;
|
Value *VectorizedTree = nullptr;
|
||||||
IRBuilder<> Builder(ReductionRoot);
|
IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
|
||||||
FastMathFlags Unsafe;
|
FastMathFlags Unsafe;
|
||||||
Unsafe.setFast();
|
Unsafe.setFast();
|
||||||
Builder.setFastMathFlags(Unsafe);
|
Builder.setFastMathFlags(Unsafe);
|
||||||
|
@ -5747,8 +5747,13 @@ public:
|
||||||
BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
|
BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
|
||||||
// The same extra argument may be used several time, so log each attempt
|
// The same extra argument may be used several time, so log each attempt
|
||||||
// to use it.
|
// to use it.
|
||||||
for (auto &Pair : ExtraArgs)
|
for (auto &Pair : ExtraArgs) {
|
||||||
|
assert(Pair.first && "DebugLoc must be set.");
|
||||||
ExternallyUsedValues[Pair.second].push_back(Pair.first);
|
ExternallyUsedValues[Pair.second].push_back(Pair.first);
|
||||||
|
}
|
||||||
|
// The reduction root is used as the insertion point for new instructions,
|
||||||
|
// so set it as externally used to prevent it from being deleted.
|
||||||
|
ExternallyUsedValues[ReductionRoot];
|
||||||
SmallVector<Value *, 16> IgnoreList;
|
SmallVector<Value *, 16> IgnoreList;
|
||||||
for (auto &V : ReductionOps)
|
for (auto &V : ReductionOps)
|
||||||
IgnoreList.append(V.begin(), V.end());
|
IgnoreList.append(V.begin(), V.end());
|
||||||
|
@ -5800,6 +5805,7 @@ public:
|
||||||
Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
|
Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
|
||||||
|
|
||||||
// Emit a reduction.
|
// Emit a reduction.
|
||||||
|
Builder.SetInsertPoint(cast<Instruction>(ReductionRoot));
|
||||||
Value *ReducedSubTree =
|
Value *ReducedSubTree =
|
||||||
emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
|
emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
|
||||||
if (VectorizedTree) {
|
if (VectorizedTree) {
|
||||||
|
@ -5826,8 +5832,6 @@ public:
|
||||||
VectorizedTree = VectReductionData.createOp(Builder, "", ReductionOps);
|
VectorizedTree = VectReductionData.createOp(Builder, "", ReductionOps);
|
||||||
}
|
}
|
||||||
for (auto &Pair : ExternallyUsedValues) {
|
for (auto &Pair : ExternallyUsedValues) {
|
||||||
assert(!Pair.second.empty() &&
|
|
||||||
"At least one DebugLoc must be inserted");
|
|
||||||
// Add each externally used value to the final reduction.
|
// Add each externally used value to the final reduction.
|
||||||
for (auto *I : Pair.second) {
|
for (auto *I : Pair.second) {
|
||||||
Builder.SetCurrentDebugLocation(I->getDebugLoc());
|
Builder.SetCurrentDebugLocation(I->getDebugLoc());
|
||||||
|
|
|
@ -0,0 +1,108 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||||
|
; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-7 | FileCheck %s
|
||||||
|
|
||||||
|
define void @Test(i32) {
|
||||||
|
; CHECK-LABEL: @Test(
|
||||||
|
; CHECK-NEXT: entry:
|
||||||
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||||
|
; CHECK: loop:
|
||||||
|
; CHECK-NEXT: [[LOCAL_4_39_US:%.*]] = phi i32 [ [[VAL_42:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
|
||||||
|
; CHECK-NEXT: [[LOCAL_8_43_US:%.*]] = phi i32 [ [[VAL_43:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
|
||||||
|
; CHECK-NEXT: [[VAL_0:%.*]] = add i32 [[LOCAL_4_39_US]], 0
|
||||||
|
; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[LOCAL_8_43_US]], [[VAL_0]]
|
||||||
|
; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
|
||||||
|
; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_6:%.*]] = add i32 [[LOCAL_8_43_US]], 55
|
||||||
|
; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], [[VAL_6]]
|
||||||
|
; CHECK-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_11:%.*]] = add i32 [[LOCAL_8_43_US]], 285
|
||||||
|
; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], [[VAL_11]]
|
||||||
|
; CHECK-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_18:%.*]] = add i32 [[LOCAL_8_43_US]], 1240
|
||||||
|
; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], [[VAL_18]]
|
||||||
|
; CHECK-NEXT: [[VAL_20:%.*]] = add i32 [[LOCAL_8_43_US]], 1496
|
||||||
|
; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]]
|
||||||
|
; CHECK-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_34:%.*]] = add i32 [[LOCAL_8_43_US]], 8555
|
||||||
|
; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]]
|
||||||
|
; CHECK-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
|
||||||
|
; CHECK-NEXT: [[VAL_39:%.*]] = add i32 [[LOCAL_8_43_US]], 12529
|
||||||
|
; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], [[VAL_39]]
|
||||||
|
; CHECK-NEXT: [[VAL_41:%.*]] = add i32 [[LOCAL_8_43_US]], 13685
|
||||||
|
; CHECK-NEXT: [[VAL_42]] = and i32 [[VAL_40]], [[VAL_41]]
|
||||||
|
; CHECK-NEXT: [[VAL_43]] = add i32 [[LOCAL_8_43_US]], 14910
|
||||||
|
; CHECK-NEXT: br label [[LOOP]]
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
br label %loop
|
||||||
|
|
||||||
|
loop:
|
||||||
|
%local_4_39.us = phi i32 [ %val_42, %loop ], [ 0, %entry ]
|
||||||
|
%local_8_43.us = phi i32 [ %val_43, %loop ], [ 0, %entry ]
|
||||||
|
%val_0 = add i32 %local_4_39.us, 0
|
||||||
|
%val_1 = and i32 %local_8_43.us, %val_0
|
||||||
|
%val_2 = and i32 %val_1, %0
|
||||||
|
%val_3 = and i32 %val_2, %0
|
||||||
|
%val_4 = and i32 %val_3, %0
|
||||||
|
%val_5 = and i32 %val_4, %0
|
||||||
|
%val_6 = add i32 %local_8_43.us, 55
|
||||||
|
%val_7 = and i32 %val_5, %val_6
|
||||||
|
%val_8 = and i32 %val_7, %0
|
||||||
|
%val_9 = and i32 %val_8, %0
|
||||||
|
%val_10 = and i32 %val_9, %0
|
||||||
|
%val_11 = add i32 %local_8_43.us, 285
|
||||||
|
%val_12 = and i32 %val_10, %val_11
|
||||||
|
%val_13 = and i32 %val_12, %0
|
||||||
|
%val_14 = and i32 %val_13, %0
|
||||||
|
%val_15 = and i32 %val_14, %0
|
||||||
|
%val_16 = and i32 %val_15, %0
|
||||||
|
%val_17 = and i32 %val_16, %0
|
||||||
|
%val_18 = add i32 %local_8_43.us, 1240
|
||||||
|
%val_19 = and i32 %val_17, %val_18
|
||||||
|
%val_20 = add i32 %local_8_43.us, 1496
|
||||||
|
%val_21 = and i32 %val_19, %val_20
|
||||||
|
%val_22 = and i32 %val_21, %0
|
||||||
|
%val_23 = and i32 %val_22, %0
|
||||||
|
%val_24 = and i32 %val_23, %0
|
||||||
|
%val_25 = and i32 %val_24, %0
|
||||||
|
%val_26 = and i32 %val_25, %0
|
||||||
|
%val_27 = and i32 %val_26, %0
|
||||||
|
%val_28 = and i32 %val_27, %0
|
||||||
|
%val_29 = and i32 %val_28, %0
|
||||||
|
%val_30 = and i32 %val_29, %0
|
||||||
|
%val_31 = and i32 %val_30, %0
|
||||||
|
%val_32 = and i32 %val_31, %0
|
||||||
|
%val_33 = and i32 %val_32, %0
|
||||||
|
%val_34 = add i32 %local_8_43.us, 8555
|
||||||
|
%val_35 = and i32 %val_33, %val_34
|
||||||
|
%val_36 = and i32 %val_35, %0
|
||||||
|
%val_37 = and i32 %val_36, %0
|
||||||
|
%val_38 = and i32 %val_37, %0
|
||||||
|
%val_39 = add i32 %local_8_43.us, 12529
|
||||||
|
%val_40 = and i32 %val_38, %val_39
|
||||||
|
%val_41 = add i32 %local_8_43.us, 13685
|
||||||
|
%val_42 = and i32 %val_40, %val_41
|
||||||
|
%val_43 = add i32 %local_8_43.us, 14910
|
||||||
|
br label %loop
|
||||||
|
}
|
Loading…
Reference in New Issue