From 2eaef530232e1fbf12dec087487346dcaaf97b1c Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 22 Oct 2021 14:04:38 +0300 Subject: [PATCH] [TTI] `BasicTTIImplBase::getInterleavedMemoryOpCost()`: fix load discounting The math here is: Cost of 1 load = cost of n loads / n Cost of live loads = num live loads * Cost of 1 load Cost of live loads = num live loads * (cost of n loads / n) Cost of live loads = cost of n loads * (num live loads / n) But, all the variables here are integers, and integer division rounds down, but this calculation clearly expects float semantics. Instead multiply upfront, and then perform round-up-division. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D112302 --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 5 +++-- .../Transforms/LoopVectorize/AArch64/interleaved_cost.ll | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index fcc8202f3bd8..9b116a8c6554 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1214,7 +1214,7 @@ public: // // TODO: Note that legalization can turn masked loads/stores into unmasked // (legalized) loads/stores. This can be reflected in the cost. - if (VecTySize > VecTyLTSize) { + if (Cost.isValid() && VecTySize > VecTyLTSize) { // The number of loads of a legal type it will take to represent a load // of the unlegalized vector type. unsigned NumLegalInsts = divideCeil(VecTySize, VecTyLTSize); @@ -1231,7 +1231,8 @@ public: // Scale the cost of the load by the fraction of legal instructions that // will be used. - Cost *= UsedInsts.count() / NumLegalInsts; + Cost = divideCeil(UsedInsts.count() * Cost.getValue().getValue(), + NumLegalInsts); } // Then plus the cost of interleave operation. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll index 54ee8fc6e73f..2b28a8ecf212 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll @@ -168,7 +168,7 @@ entry: ; gaps. ; ; VF_2-LABEL: Checking a loop in "i64_factor_8" -; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 +; VF_2: Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8