forked from OSchip/llvm-project
Reland [X86] X86TTIImpl::getInterleavedMemoryOpCostAVX2(): use getMemoryOpCost()
Now that getMemoryOpCost() correctly handles all the vector variants, we should no longer hand-roll our own version of it, but use it directly. The AVX512 variant probably needs a similar change, but there it is less obvious. This was initially landed in69ed93a435
, but was reverted in6b95fd199d
because the patch it depends on was reverted.
This commit is contained in:
parent
05a4e4a89c
commit
8ed0864fd7
|
@ -4724,17 +4724,9 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
|
|||
ScalarTy =
|
||||
Type::getIntNTy(ScalarTy->getContext(), DL.getTypeSizeInBits(ScalarTy));
|
||||
|
||||
// Calculate the number of memory operations (NumOfMemOps), required
|
||||
// for load/store the VecTy.
|
||||
unsigned VecTySize = DL.getTypeStoreSize(VecTy);
|
||||
unsigned LegalVTSize = LegalVT.getStoreSize();
|
||||
unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
|
||||
|
||||
// Get the cost of one memory operation.
|
||||
auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
|
||||
LegalVT.getVectorNumElements());
|
||||
InstructionCost MemOpCost = getMemoryOpCost(
|
||||
Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind);
|
||||
// Get the cost of all the memory operations.
|
||||
InstructionCost MemOpCosts = getMemoryOpCost(
|
||||
Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind);
|
||||
|
||||
auto *VT = FixedVectorType::get(ScalarTy, VF);
|
||||
EVT ETy = TLI->getValueType(DL, VT);
|
||||
|
@ -4789,13 +4781,13 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
|
|||
if (Opcode == Instruction::Load) {
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(AVX2InterleavedLoadTbl, Factor, ETy.getSimpleVT()))
|
||||
return NumOfMemOps * MemOpCost + Entry->Cost;
|
||||
return MemOpCosts + Entry->Cost;
|
||||
} else {
|
||||
assert(Opcode == Instruction::Store &&
|
||||
"Expected Store Instruction at this point");
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(AVX2InterleavedStoreTbl, Factor, ETy.getSimpleVT()))
|
||||
return NumOfMemOps * MemOpCost + Entry->Cost;
|
||||
return MemOpCosts + Entry->Cost;
|
||||
}
|
||||
|
||||
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
|
||||
|
|
|
@ -7,9 +7,9 @@ target triple = "x86_64-unknown-linux-gnu"
|
|||
; Function Attrs: norecurse nounwind readonly uwtable
|
||||
define i32 @doit_stride3(i8* nocapture readonly %Ptr, i32 %Nels) {
|
||||
;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %0 = load i8
|
||||
;CHECK: LV: Found an estimated cost of 11 for VF 2 For instruction: %0 = load i8
|
||||
;CHECK: LV: Found an estimated cost of 5 for VF 4 For instruction: %0 = load i8
|
||||
;CHECK: LV: Found an estimated cost of 10 for VF 8 For instruction: %0 = load i8
|
||||
;CHECK: LV: Found an estimated cost of 13 for VF 2 For instruction: %0 = load i8
|
||||
;CHECK: LV: Found an estimated cost of 7 for VF 4 For instruction: %0 = load i8
|
||||
;CHECK: LV: Found an estimated cost of 12 for VF 8 For instruction: %0 = load i8
|
||||
;CHECK: LV: Found an estimated cost of 13 for VF 16 For instruction: %0 = load i8
|
||||
;CHECK: LV: Found an estimated cost of 16 for VF 32 For instruction: %0 = load i8
|
||||
entry:
|
||||
|
|
|
@ -7,9 +7,9 @@ target triple = "x86_64-unknown-linux-gnu"
|
|||
; Function Attrs: norecurse nounwind uwtable
|
||||
define void @doit_stride3(i8* nocapture %Ptr, i32 %Nels) local_unnamed_addr {
|
||||
;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %conv4
|
||||
;CHECK: LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %conv4
|
||||
;CHECK: LV: Found an estimated cost of 9 for VF 4 For instruction: store i8 %conv4
|
||||
;CHECK: LV: Found an estimated cost of 12 for VF 8 For instruction: store i8 %conv4
|
||||
;CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: store i8 %conv4
|
||||
;CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: store i8 %conv4
|
||||
;CHECK: LV: Found an estimated cost of 14 for VF 8 For instruction: store i8 %conv4
|
||||
;CHECK: LV: Found an estimated cost of 13 for VF 16 For instruction: store i8 %conv4
|
||||
;CHECK: LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %conv4
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue