forked from OSchip/llvm-project
[CostModel][X86] Don't count 2 shuffles on the last level of a pairwise arithmetic or min/max reduction
This is split from D55452 with the correct patch this time. Pairwise reductions require two shuffles on every level but the last. On the last level the two shuffles are <1, u, u, u...> and <0, u, u, u...>, but <0, u, u, u...> will be dropped by InstCombine/DAGCombine as being an identity shuffle. Differential Revision: https://reviews.llvm.org/D55615 llvm-svn: 349072
This commit is contained in:
parent
5f1706f3aa
commit
c6bfb05762
|
@ -1435,14 +1435,24 @@ public:
|
|||
Ty = SubTy;
|
||||
++LongVectorCount;
|
||||
}
|
||||
|
||||
NumReduxLevels -= LongVectorCount;
|
||||
|
||||
// The minimal length of the vector is limited by the real length of vector
|
||||
// operations performed on the current platform. That's why several final
|
||||
// reduction operations are performed on the vectors with the same
|
||||
// architecture-dependent length.
|
||||
ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
|
||||
|
||||
// Non pairwise reductions need one shuffle per reduction level. Pairwise
|
||||
// reductions need two shuffles on every level, but the last one. On that
|
||||
// level one of the shuffles is <0, u, u, ...> which is identity.
|
||||
unsigned NumShuffles = NumReduxLevels;
|
||||
if (IsPairwise && NumReduxLevels >= 1)
|
||||
NumShuffles += NumReduxLevels - 1;
|
||||
ShuffleCost += NumShuffles *
|
||||
ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
|
||||
0, Ty);
|
||||
ArithCost += (NumReduxLevels - LongVectorCount) *
|
||||
ArithCost += NumReduxLevels *
|
||||
ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
|
||||
return ShuffleCost + ArithCost +
|
||||
ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
|
||||
|
@ -1489,15 +1499,25 @@ public:
|
|||
Ty = SubTy;
|
||||
++LongVectorCount;
|
||||
}
|
||||
|
||||
NumReduxLevels -= LongVectorCount;
|
||||
|
||||
// The minimal length of the vector is limited by the real length of vector
|
||||
// operations performed on the current platform. That's why several final
|
||||
// reduction opertions are perfomed on the vectors with the same
|
||||
// architecture-dependent length.
|
||||
ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
|
||||
|
||||
// Non pairwise reductions need one shuffle per reduction level. Pairwise
|
||||
// reductions need two shuffles on every level, but the last one. On that
|
||||
// level one of the shuffles is <0, u, u, ...> which is identity.
|
||||
unsigned NumShuffles = NumReduxLevels;
|
||||
if (IsPairwise && NumReduxLevels >= 1)
|
||||
NumShuffles += NumReduxLevels - 1;
|
||||
ShuffleCost += NumShuffles *
|
||||
ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
|
||||
0, Ty);
|
||||
MinMaxCost +=
|
||||
(NumReduxLevels - LongVectorCount) *
|
||||
NumReduxLevels *
|
||||
(ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
|
||||
ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
|
||||
nullptr));
|
||||
|
|
|
@ -107,7 +107,7 @@ define fastcc float @pairwise_hadd(<4 x float> %rdx, float %f1) {
|
|||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
|
||||
;
|
||||
|
@ -118,7 +118,7 @@ define fastcc float @pairwise_hadd(<4 x float> %rdx, float %f1) {
|
|||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
|
||||
;
|
||||
|
@ -168,7 +168,7 @@ define fastcc float @pairwise_hadd_assoc(<4 x float> %rdx, float %f1) {
|
|||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
|
||||
;
|
||||
|
@ -179,7 +179,7 @@ define fastcc float @pairwise_hadd_assoc(<4 x float> %rdx, float %f1) {
|
|||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
|
||||
;
|
||||
|
@ -228,7 +228,7 @@ define fastcc float @pairwise_hadd_skip_first(<4 x float> %rdx, float %f1) {
|
|||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
|
||||
;
|
||||
|
@ -238,7 +238,7 @@ define fastcc float @pairwise_hadd_skip_first(<4 x float> %rdx, float %f1) {
|
|||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2
|
||||
;
|
||||
|
@ -669,14 +669,14 @@ define fastcc double @pairwise_reduction2double(<2 x double> %rdx, double %f1) {
|
|||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
|
||||
;
|
||||
; SSSE3-LABEL: 'pairwise_reduction2double'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
|
||||
;
|
||||
; SSE42-LABEL: 'pairwise_reduction2double'
|
||||
|
@ -709,7 +709,7 @@ define fastcc float @pairwise_reduction4float(<4 x float> %rdx, float %f1) {
|
|||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
|
||||
;
|
||||
; SSSE3-LABEL: 'pairwise_reduction4float'
|
||||
|
@ -719,7 +719,7 @@ define fastcc float @pairwise_reduction4float(<4 x float> %rdx, float %f1) {
|
|||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
|
||||
;
|
||||
; SSE42-LABEL: 'pairwise_reduction4float'
|
||||
|
@ -761,7 +761,7 @@ define fastcc double @pairwise_reduction4double(<4 x double> %rdx, double %f1) {
|
|||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
|
||||
;
|
||||
; SSSE3-LABEL: 'pairwise_reduction4double'
|
||||
|
@ -771,7 +771,7 @@ define fastcc double @pairwise_reduction4double(<4 x double> %rdx, double %f1) {
|
|||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r
|
||||
;
|
||||
; SSE42-LABEL: 'pairwise_reduction4double'
|
||||
|
@ -826,7 +826,7 @@ define fastcc float @pairwise_reduction8float(<8 x float> %rdx, float %f1) {
|
|||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
|
||||
;
|
||||
; SSSE3-LABEL: 'pairwise_reduction8float'
|
||||
|
@ -839,7 +839,7 @@ define fastcc float @pairwise_reduction8float(<8 x float> %rdx, float %f1) {
|
|||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r
|
||||
;
|
||||
; SSE42-LABEL: 'pairwise_reduction8float'
|
||||
|
@ -900,14 +900,14 @@ define fastcc i64 @pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) {
|
|||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 0, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
|
||||
;
|
||||
; SSSE3-LABEL: 'pairwise_reduction2i64'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 0, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
|
||||
;
|
||||
; SSE42-LABEL: 'pairwise_reduction2i64'
|
||||
|
@ -940,7 +940,7 @@ define fastcc i32 @pairwise_reduction4i32(<4 x i32> %rdx, i32 %f1) {
|
|||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
|
||||
;
|
||||
; SSSE3-LABEL: 'pairwise_reduction4i32'
|
||||
|
@ -950,7 +950,7 @@ define fastcc i32 @pairwise_reduction4i32(<4 x i32> %rdx, i32 %f1) {
|
|||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
|
||||
;
|
||||
; SSE42-LABEL: 'pairwise_reduction4i32'
|
||||
|
@ -985,35 +985,15 @@ define fastcc i32 @pairwise_reduction4i32(<4 x i32> %rdx, i32 %f1) {
|
|||
}
|
||||
|
||||
define fastcc i64 @pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) {
|
||||
; SSE2-LABEL: 'pairwise_reduction4i64'
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
|
||||
;
|
||||
; SSSE3-LABEL: 'pairwise_reduction4i64'
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
|
||||
;
|
||||
; SSE42-LABEL: 'pairwise_reduction4i64'
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0
|
||||
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
|
||||
; SSE-LABEL: 'pairwise_reduction4i64'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r
|
||||
;
|
||||
; AVX1-LABEL: 'pairwise_reduction4i64'
|
||||
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
|
||||
|
@ -1057,7 +1037,7 @@ define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
|
|||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
|
||||
;
|
||||
; SSSE3-LABEL: 'pairwise_reduction8i16'
|
||||
|
@ -1070,7 +1050,7 @@ define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) {
|
|||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r
|
||||
;
|
||||
; SSE42-LABEL: 'pairwise_reduction8i16'
|
||||
|
@ -1124,7 +1104,7 @@ define fastcc i32 @pairwise_reduction8i32(<8 x i32> %rdx, i32 %f1) {
|
|||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0
|
||||
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
|
||||
;
|
||||
; SSSE3-LABEL: 'pairwise_reduction8i32'
|
||||
|
@ -1137,7 +1117,7 @@ define fastcc i32 @pairwise_reduction8i32(<8 x i32> %rdx, i32 %f1) {
|
|||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0
|
||||
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r
|
||||
;
|
||||
; SSE42-LABEL: 'pairwise_reduction8i32'
|
||||
|
|
Loading…
Reference in New Issue