forked from OSchip/llvm-project
[CostModel] remove cost-kind predicate for vector reduction costs
This is similar in spirit to 01ea93d85d
(memcpy) except that
here the underlying caller assumptions were created for vectorizer
use (throughput) rather than other passes.
That meant targets could have an enormous throughput cost with no
corresponding size, latency, or blended cost increase.
The ARM costs show a small difference between throughput and
size because there's an underlying difference in cmp/sel
costs that is also predicated on cost-kind.
Paraphrasing from the previous commits:
This may not make sense for some callers, but at least now the
costs will be consistently wrong instead of mysteriously wrong.
Targets should provide better overrides if the current modeling
is not accurate.
This commit is contained in:
parent
39a0d6889d
commit
22d10b8ab4
|
@ -1202,9 +1202,6 @@ public:
|
|||
case Intrinsic::vector_reduce_fmin:
|
||||
case Intrinsic::vector_reduce_umax:
|
||||
case Intrinsic::vector_reduce_umin: {
|
||||
// FIXME: all cost kinds should default to the same thing?
|
||||
if (CostKind != TTI::TCK_RecipThroughput)
|
||||
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
|
||||
IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I);
|
||||
return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
|
||||
}
|
||||
|
|
|
@ -213,11 +213,11 @@ define void @reduce_fmax(<16 x float> %va) {
|
|||
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
; SIZE-LABEL: 'reduce_fmax'
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
; SIZE_LATE-LABEL: 'reduce_fmax'
|
||||
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
%v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
|
|
|
@ -22,19 +22,19 @@ define i32 @reduce_i64(i32 %arg) {
|
|||
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; V8M-SIZE-LABEL: 'reduce_i64'
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
|
||||
;
|
||||
; NEON-SIZE-LABEL: 'reduce_i64'
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
|
||||
;
|
||||
%V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
|
||||
|
@ -67,23 +67,23 @@ define i32 @reduce_i32(i32 %arg) {
|
|||
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
|
||||
;
|
||||
; V8M-SIZE-LABEL: 'reduce_i32'
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
|
||||
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
|
||||
;
|
||||
; NEON-SIZE-LABEL: 'reduce_i32'
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 681 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1066 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
|
||||
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
|
||||
;
|
||||
%V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
|
||||
|
|
|
@ -213,11 +213,11 @@ define void @reduce_fmax(<16 x float> %va) {
|
|||
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
; SIZE-LABEL: 'reduce_fmax'
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
; SIZE_LATE-LABEL: 'reduce_fmax'
|
||||
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
%v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
|
||||
|
|
Loading…
Reference in New Issue