forked from OSchip/llvm-project
[InstCombine][SVE] Fix visitGetElementPtrInst for scalable type.
Summary: This patch fix the following issues in InstCombiner::visitGetElementPtrInst 1. Skip for scalable type if transformation requires fixed size number of vector element. 2. Skip for scalable type if transformation relies on compile-time known type alloc size. 3. Use VectorType::getElementCount when scalable property is used to construct new VectorType. 4. Use TypeSize::getKnownMinSize when minimal size of a scalable type is valid to determine GEP 'inbounds'. 5. Explicitly call TypeSize::getFixedSize to avoid implicit type conversion to uint64_t. Reviewers: sdesmalen, efriedma, spatel, ctetreau Reviewed By: efriedma Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D78081
This commit is contained in:
parent
cea112f422
commit
5c1d1a62e3
|
@ -1854,12 +1854,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
|
|||
SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
|
||||
Type *GEPType = GEP.getType();
|
||||
Type *GEPEltType = GEP.getSourceElementType();
|
||||
bool IsGEPSrcEleScalable =
|
||||
GEPEltType->isVectorTy() && cast<VectorType>(GEPEltType)->isScalable();
|
||||
if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP)))
|
||||
return replaceInstUsesWith(GEP, V);
|
||||
|
||||
// For vector geps, use the generic demanded vector support.
|
||||
if (auto *GEPVTy = dyn_cast<VectorType>(GEP.getType())) {
|
||||
auto VWidth = GEPVTy->getNumElements();
|
||||
// Skip if GEP return type is scalable. The number of elements is unknown at
|
||||
// compile-time.
|
||||
if (GEPType->isVectorTy() && !cast<VectorType>(GEPType)->isScalable()) {
|
||||
auto VWidth = cast<VectorType>(GEPType)->getNumElements();
|
||||
APInt UndefElts(VWidth, 0);
|
||||
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
|
||||
if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
|
||||
|
@ -1896,7 +1900,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
|
|||
Type *NewIndexType =
|
||||
IndexTy->isVectorTy()
|
||||
? VectorType::get(NewScalarIndexTy,
|
||||
cast<VectorType>(IndexTy)->getNumElements())
|
||||
cast<VectorType>(IndexTy)->getElementCount())
|
||||
: NewScalarIndexTy;
|
||||
|
||||
// If the element type has zero size then any index over it is equivalent
|
||||
|
@ -2146,11 +2150,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
|
|||
GEP.getName());
|
||||
}
|
||||
|
||||
if (GEP.getNumIndices() == 1) {
|
||||
// Skip if GEP source element type is scalable. The type alloc size is unknown
|
||||
// at compile-time.
|
||||
if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) {
|
||||
unsigned AS = GEP.getPointerAddressSpace();
|
||||
if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
|
||||
DL.getIndexSizeInBits(AS)) {
|
||||
uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType);
|
||||
uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
|
||||
|
||||
bool Matched = false;
|
||||
uint64_t C;
|
||||
|
@ -2263,10 +2269,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
|
|||
}
|
||||
}
|
||||
}
|
||||
} else if (GEP.getNumOperands() == 2) {
|
||||
// Transform things like:
|
||||
// %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
|
||||
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
|
||||
} else if (GEP.getNumOperands() == 2 && !IsGEPSrcEleScalable) {
|
||||
// Skip if GEP source element type is scalable. The type alloc size is
|
||||
// unknown at compile-time.
|
||||
// Transform things like: %t = getelementptr i32*
|
||||
// bitcast ([2 x i32]* %str to i32*), i32 %V into: %t1 = getelementptr [2
|
||||
// x i32]* %str, i32 0, i32 %V; bitcast
|
||||
if (StrippedPtrEltTy->isArrayTy() &&
|
||||
DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
|
||||
DL.getTypeAllocSize(GEPEltType)) {
|
||||
|
@ -2290,8 +2298,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
|
|||
if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
|
||||
// Check that changing the type amounts to dividing the index by a scale
|
||||
// factor.
|
||||
uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
|
||||
uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy);
|
||||
uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
|
||||
uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy).getFixedSize();
|
||||
if (ResSize && SrcSize % ResSize == 0) {
|
||||
Value *Idx = GEP.getOperand(1);
|
||||
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
|
||||
|
@ -2330,9 +2338,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
|
|||
StrippedPtrEltTy->isArrayTy()) {
|
||||
// Check that changing to the array element type amounts to dividing the
|
||||
// index by a scale factor.
|
||||
uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
|
||||
uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize();
|
||||
uint64_t ArrayEltSize =
|
||||
DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType());
|
||||
DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType())
|
||||
.getFixedSize();
|
||||
if (ResSize && ArrayEltSize % ResSize == 0) {
|
||||
Value *Idx = GEP.getOperand(1);
|
||||
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
|
||||
|
@ -2480,7 +2489,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
|
|||
if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
|
||||
if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
|
||||
BasePtrOffset.isNonNegative()) {
|
||||
APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType()));
|
||||
APInt AllocSize(
|
||||
IdxWidth,
|
||||
DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinSize());
|
||||
if (BasePtrOffset.ule(AllocSize)) {
|
||||
return GetElementPtrInst::CreateInBounds(
|
||||
GEP.getSourceElementType(), PtrOp, makeArrayRef(Ops).slice(1),
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -instcombine < %s | FileCheck %s
|
||||
|
||||
; This test is used to verify we are not crashing at Assertion `CastInst::castIsValid(opc, C, Ty) && "Invalid constantexpr cast!".
|
||||
define <vscale x 2 x i8*> @gep_index_type_is_scalable(i8* %p) {
|
||||
; CHECK-LABEL: @gep_index_type_is_scalable(
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[P:%.*]], <vscale x 2 x i64> undef
|
||||
; CHECK-NEXT: ret <vscale x 2 x i8*> [[GEP]]
|
||||
;
|
||||
%gep = getelementptr i8, i8* %p, <vscale x 2 x i64> undef
|
||||
ret <vscale x 2 x i8*> %gep
|
||||
}
|
||||
|
||||
; This test serves to verify code changes for "GEP.getNumIndices() == 1".
|
||||
define <vscale x 4 x i32>* @gep_num_of_indices_1(<vscale x 4 x i32>* %p) {
|
||||
; CHECK-LABEL: @gep_num_of_indices_1(
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[P:%.*]], i64 1
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32>* [[GEP]]
|
||||
;
|
||||
%gep = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %p, i64 1
|
||||
ret <vscale x 4 x i32>* %gep
|
||||
}
|
||||
|
||||
; This test serves to verify code changes for "GEP.getNumOperands() == 2".
|
||||
define void @gep_bitcast(i8* %p) {
|
||||
; CHECK-LABEL: @gep_bitcast(
|
||||
; CHECK-NEXT: [[CAST:%.*]] = bitcast i8* [[P:%.*]] to <vscale x 16 x i8>*
|
||||
; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8>* [[CAST]], align 16
|
||||
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[CAST]], i64 1
|
||||
; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8>* [[GEP2]], align 16
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%cast = bitcast i8* %p to <vscale x 16 x i8>*
|
||||
%gep1 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %cast, i64 0
|
||||
store <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8>* %gep1
|
||||
%gep2 = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %cast, i64 1
|
||||
store <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8>* %gep2
|
||||
ret void
|
||||
}
|
||||
|
||||
; These tests serve to verify code changes when underlying gep ptr is alloca.
|
||||
; This test is to verify 'inbounds' is added when it's valid to accumulate constant offset.
|
||||
define i32 @gep_alloca_inbounds_vscale_zero() {
|
||||
; CHECK-LABEL: @gep_alloca_inbounds_vscale_zero(
|
||||
; CHECK-NEXT: [[A:%.*]] = alloca <vscale x 4 x i32>, align 16
|
||||
; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds <vscale x 4 x i32>, <vscale x 4 x i32>* [[A]], i64 0, i64 2
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[TMP]], align 8
|
||||
; CHECK-NEXT: ret i32 [[LOAD]]
|
||||
;
|
||||
%a = alloca <vscale x 4 x i32>
|
||||
%tmp = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %a, i32 0, i32 2
|
||||
%load = load i32, i32* %tmp
|
||||
ret i32 %load
|
||||
}
|
||||
|
||||
; This test is to verify 'inbounds' is not added when a constant offset can not be determined at compile-time.
|
||||
define i32 @gep_alloca_inbounds_vscale_nonzero() {
|
||||
; CHECK-LABEL: @gep_alloca_inbounds_vscale_nonzero(
|
||||
; CHECK-NEXT: [[A:%.*]] = alloca <vscale x 4 x i32>, align 16
|
||||
; CHECK-NEXT: [[TMP:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[A]], i64 1, i64 2
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[TMP]], align 8
|
||||
; CHECK-NEXT: ret i32 [[LOAD]]
|
||||
;
|
||||
%a = alloca <vscale x 4 x i32>
|
||||
%tmp = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %a, i32 1, i32 2
|
||||
%load = load i32, i32* %tmp
|
||||
ret i32 %load
|
||||
}
|
Loading…
Reference in New Issue