forked from OSchip/llvm-project
[SLP] Preserve IR flags for extra args.
Summary: We should preserve IR flags for extra args. These IR flags should be taken from original scalar operations, not from the reduction operations. Reviewers: mkuper, mzolotukhin, hfinkel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D30447 llvm-svn: 296613
This commit is contained in:
parent
ef6e672d04
commit
74e5a36856
|
@ -212,14 +212,14 @@ static unsigned getSameOpcode(ArrayRef<Value *> VL) {
|
|||
/// Flag set: NSW, NUW, exact, and all of fast-math.
|
||||
static void propagateIRFlags(Value *I, ArrayRef<Value *> VL) {
|
||||
if (auto *VecOp = dyn_cast<Instruction>(I)) {
|
||||
if (auto *Intersection = dyn_cast<Instruction>(VL[0])) {
|
||||
// Intersection is initialized to the 0th scalar,
|
||||
// so start counting from index '1'.
|
||||
if (auto *I0 = dyn_cast<Instruction>(VL[0])) {
|
||||
// VecOVp is initialized to the 0th scalar, so start counting from index
|
||||
// '1'.
|
||||
VecOp->copyIRFlags(I0);
|
||||
for (int i = 1, e = VL.size(); i < e; ++i) {
|
||||
if (auto *Scalar = dyn_cast<Instruction>(VL[i]))
|
||||
Intersection->andIRFlags(Scalar);
|
||||
VecOp->andIRFlags(Scalar);
|
||||
}
|
||||
VecOp->copyIRFlags(Intersection);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -304,7 +304,8 @@ public:
|
|||
typedef SmallVector<Instruction *, 16> InstrList;
|
||||
typedef SmallPtrSet<Value *, 16> ValueSet;
|
||||
typedef SmallVector<StoreInst *, 8> StoreList;
|
||||
typedef MapVector<Value *, SmallVector<DebugLoc, 2>> ExtraValueToDebugLocsMap;
|
||||
typedef MapVector<Value *, SmallVector<Instruction *, 2>>
|
||||
ExtraValueToDebugLocsMap;
|
||||
|
||||
BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
|
||||
TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
|
||||
|
@ -4430,7 +4431,7 @@ public:
|
|||
// The same extra argument may be used several time, so log each attempt
|
||||
// to use it.
|
||||
for (auto &Pair : ExtraArgs)
|
||||
ExternallyUsedValues[Pair.second].push_back(Pair.first->getDebugLoc());
|
||||
ExternallyUsedValues[Pair.second].push_back(Pair.first);
|
||||
while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
|
||||
auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
|
||||
V.buildTree(VL, ExternallyUsedValues, ReductionOps);
|
||||
|
@ -4481,10 +4482,11 @@ public:
|
|||
assert(!Pair.second.empty() &&
|
||||
"At least one DebugLoc must be inserted");
|
||||
// Add each externally used value to the final reduction.
|
||||
for (auto &DL : Pair.second) {
|
||||
Builder.SetCurrentDebugLocation(DL);
|
||||
for (auto *I : Pair.second) {
|
||||
Builder.SetCurrentDebugLocation(I->getDebugLoc());
|
||||
VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree,
|
||||
Pair.first, "bin.extra");
|
||||
propagateIRFlags(VectorizedTree, I);
|
||||
}
|
||||
}
|
||||
// Update users.
|
||||
|
|
|
@ -1679,8 +1679,8 @@ define i32 @wobble(i32 %arg, i32 %bar) {
|
|||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
|
||||
; CHECK-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP12]], [[ARG]]
|
||||
; CHECK-NEXT: [[BIN_EXTRA3:%.*]] = add i32 [[BIN_EXTRA]], [[TMP9]]
|
||||
; CHECK-NEXT: [[BIN_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
|
||||
; CHECK-NEXT: [[BIN_EXTRA3:%.*]] = add nsw i32 [[BIN_EXTRA]], [[TMP9]]
|
||||
; CHECK-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], undef
|
||||
; CHECK-NEXT: ret i32 [[BIN_EXTRA3]]
|
||||
;
|
||||
|
@ -1707,8 +1707,8 @@ define i32 @wobble(i32 %arg, i32 %bar) {
|
|||
; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
|
||||
; THRESHOLD-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
|
||||
; THRESHOLD-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP12]], [[ARG]]
|
||||
; THRESHOLD-NEXT: [[BIN_EXTRA3:%.*]] = add i32 [[BIN_EXTRA]], [[TMP9]]
|
||||
; THRESHOLD-NEXT: [[BIN_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
|
||||
; THRESHOLD-NEXT: [[BIN_EXTRA3:%.*]] = add nsw i32 [[BIN_EXTRA]], [[TMP9]]
|
||||
; THRESHOLD-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], undef
|
||||
; THRESHOLD-NEXT: ret i32 [[BIN_EXTRA3]]
|
||||
;
|
||||
|
|
|
@ -12,7 +12,7 @@ define i32 @foo(i32* nocapture readonly %diff) #0 {
|
|||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
|
||||
; CHECK: [[ADD52:%.*]] = add i32 [[TMP15]],
|
||||
; CHECK: [[ADD52:%.*]] = add nsw i32 [[TMP15]],
|
||||
; CHECK: ret i32 [[ADD52]]
|
||||
;
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue