Reland [X86] X86TTIImpl::getInterleavedMemoryOpCostAVX2(): use getMemoryOpCost()

Now that getMemoryOpCost() correctly handles all the vector variants,
we should no longer hand-roll our own version of it, but use it directly.

The AVX512 variant probably needs a similar change,
but there it is less obvious.

This was initially landed in 69ed93a435,
but was reverted in 6b95fd199d
because the patch it depends on was reverted.
This commit is contained in:
Roman Lebedev 2021-05-22 11:47:08 +03:00
parent 05a4e4a89c
commit 8ed0864fd7
No known key found for this signature in database
GPG Key ID: 083C3EBB4A1689E0
3 changed files with 11 additions and 19 deletions

View File

@ -4724,17 +4724,9 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
ScalarTy =
Type::getIntNTy(ScalarTy->getContext(), DL.getTypeSizeInBits(ScalarTy));
// Calculate the number of memory operations (NumOfMemOps), required
// for load/store the VecTy.
unsigned VecTySize = DL.getTypeStoreSize(VecTy);
unsigned LegalVTSize = LegalVT.getStoreSize();
unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
// Get the cost of one memory operation.
auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
LegalVT.getVectorNumElements());
InstructionCost MemOpCost = getMemoryOpCost(
Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind);
// Get the cost of all the memory operations.
InstructionCost MemOpCosts = getMemoryOpCost(
Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind);
auto *VT = FixedVectorType::get(ScalarTy, VF);
EVT ETy = TLI->getValueType(DL, VT);
@ -4789,13 +4781,13 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
if (Opcode == Instruction::Load) {
if (const auto *Entry =
CostTableLookup(AVX2InterleavedLoadTbl, Factor, ETy.getSimpleVT()))
return NumOfMemOps * MemOpCost + Entry->Cost;
return MemOpCosts + Entry->Cost;
} else {
assert(Opcode == Instruction::Store &&
"Expected Store Instruction at this point");
if (const auto *Entry =
CostTableLookup(AVX2InterleavedStoreTbl, Factor, ETy.getSimpleVT()))
return NumOfMemOps * MemOpCost + Entry->Cost;
return MemOpCosts + Entry->Cost;
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,

View File

@ -7,9 +7,9 @@ target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: norecurse nounwind readonly uwtable
define i32 @doit_stride3(i8* nocapture readonly %Ptr, i32 %Nels) {
;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 11 for VF 2 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 5 for VF 4 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 10 for VF 8 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 13 for VF 2 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 7 for VF 4 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 12 for VF 8 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 13 for VF 16 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 16 for VF 32 For instruction: %0 = load i8
entry:

View File

@ -7,9 +7,9 @@ target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: norecurse nounwind uwtable
define void @doit_stride3(i8* nocapture %Ptr, i32 %Nels) local_unnamed_addr {
;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 9 for VF 4 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 12 for VF 8 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 14 for VF 8 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 13 for VF 16 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %conv4
entry: