From 5c1d1a62e3754e5f3aa3f5e5e03d13acd9f973e4 Mon Sep 17 00:00:00 2001 From: Huihui Zhang Date: Tue, 14 Apr 2020 12:38:03 -0700 Subject: [PATCH] [InstCombine][SVE] Fix visitGetElementPtrInst for scalable type. Summary: This patch fix the following issues in InstCombiner::visitGetElementPtrInst 1. Skip for scalable type if transformation requires fixed size number of vector element. 2. Skip for scalable type if transformation relies on compile-time known type alloc size. 3. Use VectorType::getElementCount when scalable property is used to construct new VectorType. 4. Use TypeSize::getKnownMinSize when minimal size of a scalable type is valid to determine GEP 'inbounds'. 5. Explicitly call TypeSize::getFixedSize to avoid implicit type conversion to uint64_t. Reviewers: sdesmalen, efriedma, spatel, ctetreau Reviewed By: efriedma Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D78081 --- .../InstCombine/InstructionCombining.cpp | 39 +++++++---- .../test/Transforms/InstCombine/vscale_gep.ll | 68 +++++++++++++++++++ 2 files changed, 93 insertions(+), 14 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/vscale_gep.ll diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index cd5126884dad..4d286f19f919 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1854,12 +1854,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector Ops(GEP.op_begin(), GEP.op_end()); Type *GEPType = GEP.getType(); Type *GEPEltType = GEP.getSourceElementType(); + bool IsGEPSrcEleScalable = + GEPEltType->isVectorTy() && cast(GEPEltType)->isScalable(); if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP))) return replaceInstUsesWith(GEP, V); // For vector geps, use the generic demanded vector support. - if (auto *GEPVTy = dyn_cast(GEP.getType())) { - auto VWidth = GEPVTy->getNumElements(); + // Skip if GEP return type is scalable. The number of elements is unknown at + // compile-time. + if (GEPType->isVectorTy() && !cast(GEPType)->isScalable()) { + auto VWidth = cast(GEPType)->getNumElements(); APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask, @@ -1896,7 +1900,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Type *NewIndexType = IndexTy->isVectorTy() ? VectorType::get(NewScalarIndexTy, - cast(IndexTy)->getNumElements()) + cast(IndexTy)->getElementCount()) : NewScalarIndexTy; // If the element type has zero size then any index over it is equivalent @@ -2146,11 +2150,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { GEP.getName()); } - if (GEP.getNumIndices() == 1) { + // Skip if GEP source element type is scalable. The type alloc size is unknown + // at compile-time. + if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) { unsigned AS = GEP.getPointerAddressSpace(); if (GEP.getOperand(1)->getType()->getScalarSizeInBits() == DL.getIndexSizeInBits(AS)) { - uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType); + uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedSize(); bool Matched = false; uint64_t C; @@ -2263,10 +2269,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } } - } else if (GEP.getNumOperands() == 2) { - // Transform things like: - // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V - // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast + } else if (GEP.getNumOperands() == 2 && !IsGEPSrcEleScalable) { + // Skip if GEP source element type is scalable. The type alloc size is + // unknown at compile-time. + // Transform things like: %t = getelementptr i32* + // bitcast ([2 x i32]* %str to i32*), i32 %V into: %t1 = getelementptr [2 + // x i32]* %str, i32 0, i32 %V; bitcast if (StrippedPtrEltTy->isArrayTy() && DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) == DL.getTypeAllocSize(GEPEltType)) { @@ -2290,8 +2298,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) { // Check that changing the type amounts to dividing the index by a scale // factor. - uint64_t ResSize = DL.getTypeAllocSize(GEPEltType); - uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy); + uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize(); + uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy).getFixedSize(); if (ResSize && SrcSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -2330,9 +2338,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { StrippedPtrEltTy->isArrayTy()) { // Check that changing to the array element type amounts to dividing the // index by a scale factor. - uint64_t ResSize = DL.getTypeAllocSize(GEPEltType); + uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize(); uint64_t ArrayEltSize = - DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()); + DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) + .getFixedSize(); if (ResSize && ArrayEltSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -2480,7 +2489,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (auto *AI = dyn_cast(UnderlyingPtrOp)) { if (GEP.accumulateConstantOffset(DL, BasePtrOffset) && BasePtrOffset.isNonNegative()) { - APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType())); + APInt AllocSize( + IdxWidth, + DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinSize()); if (BasePtrOffset.ule(AllocSize)) { return GetElementPtrInst::CreateInBounds( GEP.getSourceElementType(), PtrOp, makeArrayRef(Ops).slice(1), diff --git a/llvm/test/Transforms/InstCombine/vscale_gep.ll b/llvm/test/Transforms/InstCombine/vscale_gep.ll new file mode 100644 index 000000000000..65bede711c42 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/vscale_gep.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +; This test is used to verify we are not crashing at Assertion `CastInst::castIsValid(opc, C, Ty) && "Invalid constantexpr cast!". +define @gep_index_type_is_scalable(i8* %p) { +; CHECK-LABEL: @gep_index_type_is_scalable( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[P:%.*]], undef +; CHECK-NEXT: ret [[GEP]] +; + %gep = getelementptr i8, i8* %p, undef + ret %gep +} + +; This test serves to verify code changes for "GEP.getNumIndices() == 1". +define * @gep_num_of_indices_1(* %p) { +; CHECK-LABEL: @gep_num_of_indices_1( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr , * [[P:%.*]], i64 1 +; CHECK-NEXT: ret * [[GEP]] +; + %gep = getelementptr , * %p, i64 1 + ret * %gep +} + +; This test serves to verify code changes for "GEP.getNumOperands() == 2". +define void @gep_bitcast(i8* %p) { +; CHECK-LABEL: @gep_bitcast( +; CHECK-NEXT: [[CAST:%.*]] = bitcast i8* [[P:%.*]] to * +; CHECK-NEXT: store zeroinitializer, * [[CAST]], align 16 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr , * [[CAST]], i64 1 +; CHECK-NEXT: store zeroinitializer, * [[GEP2]], align 16 +; CHECK-NEXT: ret void +; + %cast = bitcast i8* %p to * + %gep1 = getelementptr , * %cast, i64 0 + store zeroinitializer, * %gep1 + %gep2 = getelementptr , * %cast, i64 1 + store zeroinitializer, * %gep2 + ret void +} + +; These tests serve to verify code changes when underlying gep ptr is alloca. +; This test is to verify 'inbounds' is added when it's valid to accumulate constant offset. +define i32 @gep_alloca_inbounds_vscale_zero() { +; CHECK-LABEL: @gep_alloca_inbounds_vscale_zero( +; CHECK-NEXT: [[A:%.*]] = alloca , align 16 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds , * [[A]], i64 0, i64 2 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[TMP]], align 8 +; CHECK-NEXT: ret i32 [[LOAD]] +; + %a = alloca + %tmp = getelementptr , * %a, i32 0, i32 2 + %load = load i32, i32* %tmp + ret i32 %load +} + +; This test is to verify 'inbounds' is not added when a constant offset can not be determined at compile-time. +define i32 @gep_alloca_inbounds_vscale_nonzero() { +; CHECK-LABEL: @gep_alloca_inbounds_vscale_nonzero( +; CHECK-NEXT: [[A:%.*]] = alloca , align 16 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr , * [[A]], i64 1, i64 2 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[TMP]], align 8 +; CHECK-NEXT: ret i32 [[LOAD]] +; + %a = alloca + %tmp = getelementptr , * %a, i32 1, i32 2 + %load = load i32, i32* %tmp + ret i32 %load +}