forked from OSchip/llvm-project
[AMDGPU] Fix for vectorizer crash with pointers of different size
When vectorizer strips pointers it can eventually end up with pointers of two different sizes, then SCEV will crash. Differential Revision: https://reviews.llvm.org/D65480 llvm-svn: 367443
This commit is contained in:
parent
a4ea27de92
commit
ba1e845c21
|
@ -339,11 +339,16 @@ bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB,
|
||||||
const APInt &PtrDelta,
|
const APInt &PtrDelta,
|
||||||
unsigned Depth) const {
|
unsigned Depth) const {
|
||||||
unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType());
|
unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType());
|
||||||
|
unsigned PtrAS = PtrA->getType()->getPointerAddressSpace();
|
||||||
APInt OffsetA(PtrBitWidth, 0);
|
APInt OffsetA(PtrBitWidth, 0);
|
||||||
APInt OffsetB(PtrBitWidth, 0);
|
APInt OffsetB(PtrBitWidth, 0);
|
||||||
PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
|
PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
|
||||||
PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
|
PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
|
||||||
|
|
||||||
|
if (PtrA->getType()->getPointerAddressSpace() != PtrAS ||
|
||||||
|
PtrB->getType()->getPointerAddressSpace() != PtrAS)
|
||||||
|
return false;
|
||||||
|
|
||||||
APInt OffsetDelta = OffsetB - OffsetA;
|
APInt OffsetDelta = OffsetB - OffsetA;
|
||||||
|
|
||||||
// Check if they are based on the same pointer. That makes the offsets
|
// Check if they are based on the same pointer. That makes the offsets
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S < %s | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32"
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test
|
||||||
|
; CHECK: store i32* undef, i32** %tmp9, align 8
|
||||||
|
; CHECK: store i32* undef, i32** %tmp7, align 8
|
||||||
|
define amdgpu_kernel void @test() {
|
||||||
|
entry:
|
||||||
|
%a10.ascast.i = addrspacecast i32* addrspace(5)* null to i32**
|
||||||
|
%tmp4 = icmp eq i32 undef, 0
|
||||||
|
%tmp6 = select i1 false, i32** undef, i32** undef
|
||||||
|
%tmp7 = select i1 %tmp4, i32** null, i32** %tmp6
|
||||||
|
%tmp9 = select i1 %tmp4, i32** %a10.ascast.i, i32** null
|
||||||
|
store i32* undef, i32** %tmp9, align 8
|
||||||
|
store i32* undef, i32** %tmp7, align 8
|
||||||
|
unreachable
|
||||||
|
}
|
Loading…
Reference in New Issue