[LAA] Make getPointersDiff() API compatible with opaque pointers

Make getPointersDiff() and sortPtrAccesses() compatible with opaque
pointers by explicitly passing in the element type instead of
determining it from the pointer element type.

The SLPVectorizer result is slightly non-optimal in that unnecessary
pointer bitcasts are added.

Differential Revision: https://reviews.llvm.org/D104784
This commit is contained in:
Nikita Popov 2021-06-23 15:57:38 +02:00
parent 6cc6ada143
commit 00d3f7cc3c
4 changed files with 95 additions and 25 deletions

View File

@ -684,7 +684,8 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
/// is a simple API that does not depend on the analysis pass.
/// \param StrictCheck Ensure that the calculated distance matches the
/// type-based one after all the bitcasts removal in the provided pointers.
Optional<int> getPointersDiff(Value *PtrA, Value *PtrB, const DataLayout &DL,
Optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
Value *PtrB, const DataLayout &DL,
ScalarEvolution &SE, bool StrictCheck = false,
bool CheckType = true);
@ -698,7 +699,7 @@ Optional<int> getPointersDiff(Value *PtrA, Value *PtrB, const DataLayout &DL,
/// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and
/// saves the mask for actual memory accesses in program order in
/// \p SortedIndices as <1,2,0,3>
bool sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
ScalarEvolution &SE,
SmallVectorImpl<unsigned> &SortedIndices);

View File

@ -1124,16 +1124,22 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
return Stride;
}
Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB,
const DataLayout &DL, ScalarEvolution &SE,
bool StrictCheck, bool CheckType) {
Optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
Value *PtrB, const DataLayout &DL,
ScalarEvolution &SE, bool StrictCheck,
bool CheckType) {
assert(PtrA && PtrB && "Expected non-nullptr pointers.");
assert(cast<PointerType>(PtrA->getType())
->isOpaqueOrPointeeTypeMatches(ElemTyA) && "Wrong PtrA type");
assert(cast<PointerType>(PtrB->getType())
->isOpaqueOrPointeeTypeMatches(ElemTyB) && "Wrong PtrB type");
// Make sure that A and B are different pointers.
if (PtrA == PtrB)
return 0;
// Make sure that PtrA and PtrB have the same type if required
if (CheckType && PtrA->getType() != PtrB->getType())
// Make sure that the element types are the same if required.
if (CheckType && ElemTyA != ElemTyB)
return None;
unsigned ASA = PtrA->getType()->getPointerAddressSpace();
@ -1174,8 +1180,7 @@ Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB,
return None;
Val = Diff->getAPInt().getSExtValue();
}
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
int Size = DL.getTypeStoreSize(Ty);
int Size = DL.getTypeStoreSize(ElemTyA);
int Dist = Val / Size;
// Ensure that the calculated distance matches the type-based one after all
@ -1185,8 +1190,8 @@ Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB,
return None;
}
bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
ScalarEvolution &SE,
bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
const DataLayout &DL, ScalarEvolution &SE,
SmallVectorImpl<unsigned> &SortedIndices) {
assert(llvm::all_of(
VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
@ -1204,8 +1209,8 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
int Cnt = 1;
bool IsConsecutive = true;
for (auto *Ptr : VL.drop_front()) {
Optional<int> Diff =
getPointersDiff(Ptr0, Ptr, DL, SE, /*StrictCheck=*/true);
Optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
/*StrictCheck=*/true);
if (!Diff)
return false;
@ -1238,8 +1243,10 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
Value *PtrB = getLoadStorePointerOperand(B);
if (!PtrA || !PtrB)
return false;
Optional<int> Diff =
getPointersDiff(PtrA, PtrB, DL, SE, /*StrictCheck=*/true, CheckType);
Type *ElemTyA = getLoadStoreType(A);
Type *ElemTyB = getLoadStoreType(B);
Optional<int> Diff = getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
/*StrictCheck=*/true, CheckType);
return Diff && *Diff == 1;
}

View File

@ -987,9 +987,9 @@ public:
if (LI1->getParent() != LI2->getParent())
return VLOperands::ScoreFail;
Optional<int> Dist =
getPointersDiff(LI1->getPointerOperand(), LI2->getPointerOperand(),
DL, SE, /*StrictCheck=*/true);
Optional<int> Dist = getPointersDiff(
LI1->getType(), LI1->getPointerOperand(), LI2->getType(),
LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true);
return (Dist && *Dist == 1) ? VLOperands::ScoreConsecutiveLoads
: VLOperands::ScoreFail;
}
@ -2968,7 +2968,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
OrdersType CurrentOrder;
// Check the order of pointer operands.
if (llvm::sortPtrAccesses(PointerOps, *DL, *SE, CurrentOrder)) {
if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) {
Value *Ptr0;
Value *PtrN;
if (CurrentOrder.empty()) {
@ -2978,7 +2978,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Ptr0 = PointerOps[CurrentOrder.front()];
PtrN = PointerOps[CurrentOrder.back()];
}
Optional<int> Diff = getPointersDiff(Ptr0, PtrN, *DL, *SE);
Optional<int> Diff = getPointersDiff(
ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
// Check that the sorted loads are consecutive.
if (static_cast<unsigned>(*Diff) == VL.size() - 1) {
if (CurrentOrder.empty()) {
@ -3243,7 +3244,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
OrdersType CurrentOrder;
// Check the order of pointer operands.
if (llvm::sortPtrAccesses(PointerOps, *DL, *SE, CurrentOrder)) {
if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) {
Value *Ptr0;
Value *PtrN;
if (CurrentOrder.empty()) {
@ -3253,7 +3254,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Ptr0 = PointerOps[CurrentOrder.front()];
PtrN = PointerOps[CurrentOrder.back()];
}
Optional<int> Dist = getPointersDiff(Ptr0, PtrN, *DL, *SE);
Optional<int> Dist =
getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
// Check that the sorted pointer operands are consecutive.
if (static_cast<unsigned>(*Dist) == VL.size() - 1) {
if (CurrentOrder.empty()) {
@ -6893,9 +6895,10 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
++IterCnt;
CheckedPairs[Idx].set(K);
CheckedPairs[K].set(Idx);
Optional<int> Diff = getPointersDiff(Stores[K]->getPointerOperand(),
Stores[Idx]->getPointerOperand(), *DL,
*SE, /*StrictCheck=*/true);
Optional<int> Diff = getPointersDiff(
Stores[K]->getValueOperand()->getType(), Stores[K]->getPointerOperand(),
Stores[Idx]->getValueOperand()->getType(),
Stores[Idx]->getPointerOperand(), *DL, *SE, /*StrictCheck=*/true);
if (!Diff || *Diff == 0)
return false;
int Val = *Diff;

View File

@ -0,0 +1,59 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -slp-vectorizer -mtriple=x86_64-apple-macosx -mcpu=haswell < %s | FileCheck %s
define void @test(ptr %r, ptr %p, ptr %q) #0 {
; CHECK-LABEL: @test(
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 0
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3
; CHECK-NEXT: [[Q0:%.*]] = getelementptr inbounds i64, ptr [[Q:%.*]], i64 0
; CHECK-NEXT: [[Q1:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 1
; CHECK-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 2
; CHECK-NEXT: [[Q3:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[P0]] to <4 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* [[TMP1]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr [[Q0]] to <4 x i64>*
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP3]], align 2
; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <4 x i64> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
; CHECK-NEXT: [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP9]]
; CHECK-NEXT: ret void
;
%p0 = getelementptr inbounds i64, ptr %p, i64 0
%p1 = getelementptr inbounds i64, ptr %p, i64 1
%p2 = getelementptr inbounds i64, ptr %p, i64 2
%p3 = getelementptr inbounds i64, ptr %p, i64 3
%q0 = getelementptr inbounds i64, ptr %q, i64 0
%q1 = getelementptr inbounds i64, ptr %q, i64 1
%q2 = getelementptr inbounds i64, ptr %q, i64 2
%q3 = getelementptr inbounds i64, ptr %q, i64 3
%x0 = load i64, ptr %p0, align 2
%x1 = load i64, ptr %p1, align 2
%x2 = load i64, ptr %p2, align 2
%x3 = load i64, ptr %p3, align 2
%y0 = load i64, ptr %q0, align 2
%y1 = load i64, ptr %q1, align 2
%y2 = load i64, ptr %q2, align 2
%y3 = load i64, ptr %q3, align 2
%sub0 = sub nsw i64 %x0, %y0
%sub1 = sub nsw i64 %x1, %y1
%sub2 = sub nsw i64 %x2, %y2
%sub3 = sub nsw i64 %x3, %y3
%g0 = getelementptr inbounds i32, ptr %r, i64 %sub0
%g1 = getelementptr inbounds i32, ptr %r, i64 %sub1
%g2 = getelementptr inbounds i32, ptr %r, i64 %sub2
%g3 = getelementptr inbounds i32, ptr %r, i64 %sub3
ret void
}