forked from OSchip/llvm-project
[SLP] Pass in correct alignment when query memory access cost
This patch fixes bug https://llvm.org/bugs/show_bug.cgi?id=27897. When query memory access cost, current SLP always passes in alignment value of 1 (unaligned), so it gets a very high cost of scalar memory access, and wrongly vectorize memory loads in the test case. It can be fixed by simply giving correct alignment. llvm-svn: 271333
This commit is contained in:
parent
9acb109930
commit
b994f4cdbc
|
@ -1726,16 +1726,20 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
}
|
||||
case Instruction::Load: {
|
||||
// Cost of wide load - cost of scalar loads.
|
||||
unsigned alignment = dyn_cast<LoadInst>(VL0)->getAlignment();
|
||||
int ScalarLdCost = VecTy->getNumElements() *
|
||||
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
|
||||
int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, 1, 0);
|
||||
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0);
|
||||
int VecLdCost = TTI->getMemoryOpCost(Instruction::Load,
|
||||
VecTy, alignment, 0);
|
||||
return VecLdCost - ScalarLdCost;
|
||||
}
|
||||
case Instruction::Store: {
|
||||
// We know that we can merge the stores. Calculate the cost.
|
||||
unsigned alignment = dyn_cast<StoreInst>(VL0)->getAlignment();
|
||||
int ScalarStCost = VecTy->getNumElements() *
|
||||
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
|
||||
int VecStCost = TTI->getMemoryOpCost(Instruction::Store, VecTy, 1, 0);
|
||||
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0);
|
||||
int VecStCost = TTI->getMemoryOpCost(Instruction::Store,
|
||||
VecTy, alignment, 0);
|
||||
return VecStCost - ScalarStCost;
|
||||
}
|
||||
case Instruction::Call: {
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
if not 'PowerPC' in config.root.targets:
|
||||
config.unsupported = True
|
|
@ -0,0 +1,29 @@
|
|||
; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr8 -mattr=+vsx -slp-vectorizer < %s | FileCheck %s
|
||||
|
||||
%struct.A = type { i8*, i8* }
|
||||
|
||||
define i64 @foo(%struct.A* nocapture readonly %this) {
|
||||
entry:
|
||||
%end.i = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 1
|
||||
%0 = bitcast i8** %end.i to i64*
|
||||
%1 = load i64, i64* %0, align 8
|
||||
%2 = bitcast %struct.A* %this to i64*
|
||||
%3 = load i64, i64* %2, align 8
|
||||
%sub.ptr.sub.i = sub i64 %1, %3
|
||||
%cmp = icmp sgt i64 %sub.ptr.sub.i, 9
|
||||
br i1 %cmp, label %return, label %lor.lhs.false
|
||||
|
||||
lor.lhs.false:
|
||||
%4 = inttoptr i64 %3 to i8*
|
||||
%5 = inttoptr i64 %1 to i8*
|
||||
%cmp2 = icmp ugt i8* %5, %4
|
||||
%. = select i1 %cmp2, i64 2, i64 -1
|
||||
ret i64 %.
|
||||
|
||||
return:
|
||||
ret i64 2
|
||||
}
|
||||
|
||||
; CHECK: load i64
|
||||
; CHECK-NOT: load <2 x i64>
|
||||
; CHECK-NOT: extractelement
|
Loading…
Reference in New Issue