forked from OSchip/llvm-project
[SLP]Fix reused extracts cost.
If the extractelement instruction is used multiple times in the different tree entries (either vectorized, or gathered), need to compensate the scalar cost of such instructions. They are completely removed if all users are part of the tree but we need to compensate the cost only once for each instruction. Differential Revision: https://reviews.llvm.org/D114958
This commit is contained in:
parent
78cc133c63
commit
ba74bb3a22
|
@ -4287,8 +4287,8 @@ bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
|
||||||
bool BoUpSLP::areAllUsersVectorized(Instruction *I,
|
bool BoUpSLP::areAllUsersVectorized(Instruction *I,
|
||||||
ArrayRef<Value *> VectorizedVals) const {
|
ArrayRef<Value *> VectorizedVals) const {
|
||||||
return (I->hasOneUse() && is_contained(VectorizedVals, I)) ||
|
return (I->hasOneUse() && is_contained(VectorizedVals, I)) ||
|
||||||
llvm::all_of(I->users(), [this](User *U) {
|
all_of(I->users(), [this](User *U) {
|
||||||
return ScalarToTreeEntry.count(U) > 0;
|
return ScalarToTreeEntry.count(U) > 0 || MustGather.contains(U);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4442,9 +4442,9 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
||||||
// FIXME: it tries to fix a problem with MSVC buildbots.
|
// FIXME: it tries to fix a problem with MSVC buildbots.
|
||||||
TargetTransformInfo &TTIRef = *TTI;
|
TargetTransformInfo &TTIRef = *TTI;
|
||||||
auto &&AdjustExtractsCost = [this, &TTIRef, CostKind, VL, VecTy,
|
auto &&AdjustExtractsCost = [this, &TTIRef, CostKind, VL, VecTy,
|
||||||
VectorizedVals](InstructionCost &Cost,
|
VectorizedVals, E](InstructionCost &Cost) {
|
||||||
bool IsGather) {
|
|
||||||
DenseMap<Value *, int> ExtractVectorsTys;
|
DenseMap<Value *, int> ExtractVectorsTys;
|
||||||
|
SmallPtrSet<Value *, 4> CheckedExtracts;
|
||||||
for (auto *V : VL) {
|
for (auto *V : VL) {
|
||||||
if (isa<UndefValue>(V))
|
if (isa<UndefValue>(V))
|
||||||
continue;
|
continue;
|
||||||
|
@ -4452,7 +4452,12 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
||||||
// instruction itself is not going to be vectorized, consider this
|
// instruction itself is not going to be vectorized, consider this
|
||||||
// instruction as dead and remove its cost from the final cost of the
|
// instruction as dead and remove its cost from the final cost of the
|
||||||
// vectorized tree.
|
// vectorized tree.
|
||||||
if (!areAllUsersVectorized(cast<Instruction>(V), VectorizedVals))
|
// Also, avoid adjusting the cost for extractelements with multiple uses
|
||||||
|
// in different graph entries.
|
||||||
|
const TreeEntry *VE = getTreeEntry(V);
|
||||||
|
if (!CheckedExtracts.insert(V).second ||
|
||||||
|
!areAllUsersVectorized(cast<Instruction>(V), VectorizedVals) ||
|
||||||
|
(VE && VE != E))
|
||||||
continue;
|
continue;
|
||||||
auto *EE = cast<ExtractElementInst>(V);
|
auto *EE = cast<ExtractElementInst>(V);
|
||||||
Optional<unsigned> EEIdx = getExtractIndex(EE);
|
Optional<unsigned> EEIdx = getExtractIndex(EE);
|
||||||
|
@ -4549,11 +4554,6 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
||||||
}
|
}
|
||||||
return GatherCost;
|
return GatherCost;
|
||||||
}
|
}
|
||||||
if (isSplat(VL)) {
|
|
||||||
// Found the broadcasting of the single scalar, calculate the cost as the
|
|
||||||
// broadcast.
|
|
||||||
return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
|
|
||||||
}
|
|
||||||
if ((E->getOpcode() == Instruction::ExtractElement ||
|
if ((E->getOpcode() == Instruction::ExtractElement ||
|
||||||
all_of(E->Scalars,
|
all_of(E->Scalars,
|
||||||
[](Value *V) {
|
[](Value *V) {
|
||||||
|
@ -4571,13 +4571,18 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
||||||
// single input vector or of 2 input vectors.
|
// single input vector or of 2 input vectors.
|
||||||
InstructionCost Cost =
|
InstructionCost Cost =
|
||||||
computeExtractCost(VL, VecTy, *ShuffleKind, Mask, *TTI);
|
computeExtractCost(VL, VecTy, *ShuffleKind, Mask, *TTI);
|
||||||
AdjustExtractsCost(Cost, /*IsGather=*/true);
|
AdjustExtractsCost(Cost);
|
||||||
if (NeedToShuffleReuses)
|
if (NeedToShuffleReuses)
|
||||||
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
|
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
|
||||||
FinalVecTy, E->ReuseShuffleIndices);
|
FinalVecTy, E->ReuseShuffleIndices);
|
||||||
return Cost;
|
return Cost;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (isSplat(VL)) {
|
||||||
|
// Found the broadcasting of the single scalar, calculate the cost as the
|
||||||
|
// broadcast.
|
||||||
|
return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
|
||||||
|
}
|
||||||
InstructionCost ReuseShuffleCost = 0;
|
InstructionCost ReuseShuffleCost = 0;
|
||||||
if (NeedToShuffleReuses)
|
if (NeedToShuffleReuses)
|
||||||
ReuseShuffleCost = TTI->getShuffleCost(
|
ReuseShuffleCost = TTI->getShuffleCost(
|
||||||
|
@ -4755,7 +4760,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
||||||
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, I);
|
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, I);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
AdjustExtractsCost(CommonCost, /*IsGather=*/false);
|
AdjustExtractsCost(CommonCost);
|
||||||
}
|
}
|
||||||
return CommonCost;
|
return CommonCost;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,24 +2,25 @@
|
||||||
; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux -march=core-avx2 -pass-remarks-output=%t | FileCheck %s
|
; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux -march=core-avx2 -pass-remarks-output=%t | FileCheck %s
|
||||||
; RUN: FileCheck %s --input-file=%t --check-prefix=YAML
|
; RUN: FileCheck %s --input-file=%t --check-prefix=YAML
|
||||||
|
|
||||||
; YAML: --- !Missed
|
; YAML: --- !Passed
|
||||||
; YAML: Pass: slp-vectorizer
|
; YAML: Pass: slp-vectorizer
|
||||||
; YAML: Name: NotBeneficial
|
; YAML: Name: VectorizedList
|
||||||
; YAML: Function: multi_uses
|
; YAML: Function: multi_uses
|
||||||
; YAML: Args:
|
; YAML: Args:
|
||||||
; YAML: - String: 'List vectorization was possible but not beneficial with cost '
|
; YAML: - String: 'SLP vectorized with cost '
|
||||||
; YAML: - Cost: '0'
|
; YAML: - Cost: '-1'
|
||||||
; YAML: - String: ' >= '
|
; YAML: - String: ' and with tree size '
|
||||||
; YAML: - Treshold: '0'
|
; YAML: - TreeSize: '3'
|
||||||
|
|
||||||
define float @multi_uses(<2 x float> %x, <2 x float> %y) {
|
define float @multi_uses(<2 x float> %x, <2 x float> %y) {
|
||||||
; CHECK-LABEL: @multi_uses(
|
; CHECK-LABEL: @multi_uses(
|
||||||
; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
|
|
||||||
; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1
|
|
||||||
; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 1
|
; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 1
|
||||||
; CHECK-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[Y1]]
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[Y1]], i32 0
|
||||||
; CHECK-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[Y1]]
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[Y1]], i32 1
|
||||||
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]]
|
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[X:%.*]], [[TMP2]]
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
|
||||||
|
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]]
|
||||||
; CHECK-NEXT: ret float [[ADD]]
|
; CHECK-NEXT: ret float [[ADD]]
|
||||||
;
|
;
|
||||||
%x0 = extractelement <2 x float> %x, i32 0
|
%x0 = extractelement <2 x float> %x, i32 0
|
||||||
|
|
Loading…
Reference in New Issue