forked from OSchip/llvm-project
[TTI] `BasicTTIImplBase::getInterleavedMemoryOpCost()`: fix load discounting
The math here is: Cost of 1 load = cost of n loads / n Cost of live loads = num live loads * Cost of 1 load Cost of live loads = num live loads * (cost of n loads / n) Cost of live loads = cost of n loads * (num live loads / n) But, all the variables here are integers, and integer division rounds down, but this calculation clearly expects float semantics. Instead multiply upfront, and then perform round-up-division. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D112302
This commit is contained in:
parent
8ae83a1baf
commit
2eaef53023
|
@ -1214,7 +1214,7 @@ public:
|
||||||
//
|
//
|
||||||
// TODO: Note that legalization can turn masked loads/stores into unmasked
|
// TODO: Note that legalization can turn masked loads/stores into unmasked
|
||||||
// (legalized) loads/stores. This can be reflected in the cost.
|
// (legalized) loads/stores. This can be reflected in the cost.
|
||||||
if (VecTySize > VecTyLTSize) {
|
if (Cost.isValid() && VecTySize > VecTyLTSize) {
|
||||||
// The number of loads of a legal type it will take to represent a load
|
// The number of loads of a legal type it will take to represent a load
|
||||||
// of the unlegalized vector type.
|
// of the unlegalized vector type.
|
||||||
unsigned NumLegalInsts = divideCeil(VecTySize, VecTyLTSize);
|
unsigned NumLegalInsts = divideCeil(VecTySize, VecTyLTSize);
|
||||||
|
@ -1231,7 +1231,8 @@ public:
|
||||||
|
|
||||||
// Scale the cost of the load by the fraction of legal instructions that
|
// Scale the cost of the load by the fraction of legal instructions that
|
||||||
// will be used.
|
// will be used.
|
||||||
Cost *= UsedInsts.count() / NumLegalInsts;
|
Cost = divideCeil(UsedInsts.count() * Cost.getValue().getValue(),
|
||||||
|
NumLegalInsts);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then plus the cost of interleave operation.
|
// Then plus the cost of interleave operation.
|
||||||
|
|
|
@ -168,7 +168,7 @@ entry:
|
||||||
; gaps.
|
; gaps.
|
||||||
;
|
;
|
||||||
; VF_2-LABEL: Checking a loop in "i64_factor_8"
|
; VF_2-LABEL: Checking a loop in "i64_factor_8"
|
||||||
; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
|
; VF_2: Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
|
||||||
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
|
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
|
||||||
; VF_2-NEXT: Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
|
; VF_2-NEXT: Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
|
||||||
; VF_2-NEXT: Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
|
; VF_2-NEXT: Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
|
||||||
|
|
Loading…
Reference in New Issue