[SLP] Pass in correct alignment when query memory access cost

This patch fixes bug https://llvm.org/bugs/show_bug.cgi?id=27897.

When query memory access cost, current SLP always passes in alignment value of 1 (unaligned), so it gets a very high cost of scalar memory access, and wrongly vectorize memory loads in the test case.

It can be fixed by simply giving correct alignment.

llvm-svn: 271333
This commit is contained in:
Guozhi Wei 2016-05-31 20:41:19 +00:00
parent 9acb109930
commit b994f4cdbc
3 changed files with 39 additions and 4 deletions

View File

@ -1726,16 +1726,20 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
case Instruction::Load: {
// Cost of wide load - cost of scalar loads.
unsigned alignment = dyn_cast<LoadInst>(VL0)->getAlignment();
int ScalarLdCost = VecTy->getNumElements() *
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, 1, 0);
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0);
int VecLdCost = TTI->getMemoryOpCost(Instruction::Load,
VecTy, alignment, 0);
return VecLdCost - ScalarLdCost;
}
case Instruction::Store: {
// We know that we can merge the stores. Calculate the cost.
unsigned alignment = dyn_cast<StoreInst>(VL0)->getAlignment();
int ScalarStCost = VecTy->getNumElements() *
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
int VecStCost = TTI->getMemoryOpCost(Instruction::Store, VecTy, 1, 0);
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0);
int VecStCost = TTI->getMemoryOpCost(Instruction::Store,
VecTy, alignment, 0);
return VecStCost - ScalarStCost;
}
case Instruction::Call: {

View File

@ -0,0 +1,2 @@
if not 'PowerPC' in config.root.targets:
config.unsupported = True

View File

@ -0,0 +1,29 @@
; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr8 -mattr=+vsx -slp-vectorizer < %s | FileCheck %s
%struct.A = type { i8*, i8* }
define i64 @foo(%struct.A* nocapture readonly %this) {
entry:
%end.i = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 1
%0 = bitcast i8** %end.i to i64*
%1 = load i64, i64* %0, align 8
%2 = bitcast %struct.A* %this to i64*
%3 = load i64, i64* %2, align 8
%sub.ptr.sub.i = sub i64 %1, %3
%cmp = icmp sgt i64 %sub.ptr.sub.i, 9
br i1 %cmp, label %return, label %lor.lhs.false
lor.lhs.false:
%4 = inttoptr i64 %3 to i8*
%5 = inttoptr i64 %1 to i8*
%cmp2 = icmp ugt i8* %5, %4
%. = select i1 %cmp2, i64 2, i64 -1
ret i64 %.
return:
ret i64 2
}
; CHECK: load i64
; CHECK-NOT: load <2 x i64>
; CHECK-NOT: extractelement