From 8ed0864fd76ded2646b33de8fc610519dd7f1eb5 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sat, 22 May 2021 11:47:08 +0300 Subject: [PATCH] Reland [X86] X86TTIImpl::getInterleavedMemoryOpCostAVX2(): use getMemoryOpCost() Now that getMemoryOpCost() correctly handles all the vector variants, we should no longer hand-roll our own version of it, but use it directly. The AVX512 variant probably needs a similar change, but there it is less obvious. This was initially landed in 69ed93a4355123a45c1d7216aea7cd53d07a361b, but was reverted in 6b95fd199d96e3ba5c28a23b17b74203522bdaa8 because the patch it depends on was reverted. --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 18 +++++------------- .../CostModel/X86/interleaved-load-i8.ll | 6 +++--- .../CostModel/X86/interleaved-store-i8.ll | 6 +++--- 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index d0e146cd4875..7be4742c7b40 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -4724,17 +4724,9 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2( ScalarTy = Type::getIntNTy(ScalarTy->getContext(), DL.getTypeSizeInBits(ScalarTy)); - // Calculate the number of memory operations (NumOfMemOps), required - // for load/store the VecTy. - unsigned VecTySize = DL.getTypeStoreSize(VecTy); - unsigned LegalVTSize = LegalVT.getStoreSize(); - unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize; - - // Get the cost of one memory operation. - auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(), - LegalVT.getVectorNumElements()); - InstructionCost MemOpCost = getMemoryOpCost( - Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind); + // Get the cost of all the memory operations. + InstructionCost MemOpCosts = getMemoryOpCost( + Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind); auto *VT = FixedVectorType::get(ScalarTy, VF); EVT ETy = TLI->getValueType(DL, VT); @@ -4789,13 +4781,13 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2( if (Opcode == Instruction::Load) { if (const auto *Entry = CostTableLookup(AVX2InterleavedLoadTbl, Factor, ETy.getSimpleVT())) - return NumOfMemOps * MemOpCost + Entry->Cost; + return MemOpCosts + Entry->Cost; } else { assert(Opcode == Instruction::Store && "Expected Store Instruction at this point"); if (const auto *Entry = CostTableLookup(AVX2InterleavedStoreTbl, Factor, ETy.getSimpleVT())) - return NumOfMemOps * MemOpCost + Entry->Cost; + return MemOpCosts + Entry->Cost; } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8.ll index f43d73e64652..d20745080981 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8.ll @@ -7,9 +7,9 @@ target triple = "x86_64-unknown-linux-gnu" ; Function Attrs: norecurse nounwind readonly uwtable define i32 @doit_stride3(i8* nocapture readonly %Ptr, i32 %Nels) { ;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %0 = load i8 -;CHECK: LV: Found an estimated cost of 11 for VF 2 For instruction: %0 = load i8 -;CHECK: LV: Found an estimated cost of 5 for VF 4 For instruction: %0 = load i8 -;CHECK: LV: Found an estimated cost of 10 for VF 8 For instruction: %0 = load i8 +;CHECK: LV: Found an estimated cost of 13 for VF 2 For instruction: %0 = load i8 +;CHECK: LV: Found an estimated cost of 7 for VF 4 For instruction: %0 = load i8 +;CHECK: LV: Found an estimated cost of 12 for VF 8 For instruction: %0 = load i8 ;CHECK: LV: Found an estimated cost of 13 for VF 16 For instruction: %0 = load i8 ;CHECK: LV: Found an estimated cost of 16 for VF 32 For instruction: %0 = load i8 entry: diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8.ll index 0923f131c004..349f5ad21a6f 100644 --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8.ll @@ -7,9 +7,9 @@ target triple = "x86_64-unknown-linux-gnu" ; Function Attrs: norecurse nounwind uwtable define void @doit_stride3(i8* nocapture %Ptr, i32 %Nels) local_unnamed_addr { ;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %conv4 -;CHECK: LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %conv4 -;CHECK: LV: Found an estimated cost of 9 for VF 4 For instruction: store i8 %conv4 -;CHECK: LV: Found an estimated cost of 12 for VF 8 For instruction: store i8 %conv4 +;CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: store i8 %conv4 +;CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: store i8 %conv4 +;CHECK: LV: Found an estimated cost of 14 for VF 8 For instruction: store i8 %conv4 ;CHECK: LV: Found an estimated cost of 13 for VF 16 For instruction: store i8 %conv4 ;CHECK: LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %conv4 entry: