[InstCombine] Remove scalable vector restriction in InstCombineCasts

Differential Revision: https://reviews.llvm.org/D93389
This commit is contained in:
Jun Ma 2020-12-16 15:05:49 +08:00
parent 4c8276cdc1
commit 0138399903
6 changed files with 112 additions and 48 deletions

View File

@ -2961,8 +2961,8 @@ void Verifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
Assert(SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace(), Assert(SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace(),
"AddrSpaceCast must be between different address spaces", &I); "AddrSpaceCast must be between different address spaces", &I);
if (auto *SrcVTy = dyn_cast<VectorType>(SrcTy)) if (auto *SrcVTy = dyn_cast<VectorType>(SrcTy))
Assert(cast<FixedVectorType>(SrcVTy)->getNumElements() == Assert(SrcVTy->getElementCount() ==
cast<FixedVectorType>(DestTy)->getNumElements(), cast<VectorType>(DestTy)->getElementCount(),
"AddrSpaceCast vector pointer number of elements mismatch", &I); "AddrSpaceCast vector pointer number of elements mismatch", &I);
visitInstruction(I); visitInstruction(I);
} }

View File

@ -907,20 +907,21 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
Value *VecOp; Value *VecOp;
ConstantInt *Cst; ConstantInt *Cst;
if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) { if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
auto *VecOpTy = cast<FixedVectorType>(VecOp->getType()); auto *VecOpTy = cast<VectorType>(VecOp->getType());
unsigned VecNumElts = VecOpTy->getNumElements(); auto VecElts = VecOpTy->getElementCount();
// A badly fit destination size would result in an invalid cast. // A badly fit destination size would result in an invalid cast.
if (SrcWidth % DestWidth == 0) { if (SrcWidth % DestWidth == 0) {
uint64_t TruncRatio = SrcWidth / DestWidth; uint64_t TruncRatio = SrcWidth / DestWidth;
uint64_t BitCastNumElts = VecNumElts * TruncRatio; uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
uint64_t VecOpIdx = Cst->getZExtValue(); uint64_t VecOpIdx = Cst->getZExtValue();
uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1 uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1
: VecOpIdx * TruncRatio; : VecOpIdx * TruncRatio;
assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() && assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
"overflow 32-bits"); "overflow 32-bits");
auto *BitCastTo = FixedVectorType::get(DestTy, BitCastNumElts); auto *BitCastTo =
VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());
Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo); Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);
return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx)); return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));
} }
@ -1974,12 +1975,9 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
unsigned PtrSize = DL.getPointerSizeInBits(AS); unsigned PtrSize = DL.getPointerSizeInBits(AS);
if (TySize != PtrSize) { if (TySize != PtrSize) {
Type *IntPtrTy = DL.getIntPtrType(CI.getContext(), AS); Type *IntPtrTy = DL.getIntPtrType(CI.getContext(), AS);
if (auto *VecTy = dyn_cast<VectorType>(Ty)) {
// Handle vectors of pointers. // Handle vectors of pointers.
// FIXME: what should happen for scalable vectors? if (auto *VecTy = dyn_cast<VectorType>(Ty))
IntPtrTy = FixedVectorType::get( IntPtrTy = VectorType::get(IntPtrTy, VecTy->getElementCount());
IntPtrTy, cast<FixedVectorType>(VecTy)->getNumElements());
}
Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy); Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false); return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
@ -2660,13 +2658,11 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
// a bitcast to a vector with the same # elts. // a bitcast to a vector with the same # elts.
Value *ShufOp0 = Shuf->getOperand(0); Value *ShufOp0 = Shuf->getOperand(0);
Value *ShufOp1 = Shuf->getOperand(1); Value *ShufOp1 = Shuf->getOperand(1);
unsigned NumShufElts = auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
cast<FixedVectorType>(Shuf->getType())->getNumElements(); auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
unsigned NumSrcVecElts =
cast<FixedVectorType>(ShufOp0->getType())->getNumElements();
if (Shuf->hasOneUse() && DestTy->isVectorTy() && if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
cast<FixedVectorType>(DestTy)->getNumElements() == NumShufElts && cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
NumShufElts == NumSrcVecElts) { ShufElts == SrcVecElts) {
BitCastInst *Tmp; BitCastInst *Tmp;
// If either of the operands is a cast from CI.getType(), then // If either of the operands is a cast from CI.getType(), then
// evaluating the shuffle in the casted destination's type will allow // evaluating the shuffle in the casted destination's type will allow
@ -2689,8 +2685,9 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
// TODO: We should match the related pattern for bitreverse. // TODO: We should match the related pattern for bitreverse.
if (DestTy->isIntegerTy() && if (DestTy->isIntegerTy() &&
DL.isLegalInteger(DestTy->getScalarSizeInBits()) && DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
SrcTy->getScalarSizeInBits() == 8 && NumShufElts % 2 == 0 && SrcTy->getScalarSizeInBits() == 8 &&
Shuf->hasOneUse() && Shuf->isReverse()) { ShufElts.getKnownMinValue() % 2 == 0 && Shuf->hasOneUse() &&
Shuf->isReverse()) {
assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask"); assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
assert(isa<UndefValue>(ShufOp1) && "Unexpected shuffle op"); assert(isa<UndefValue>(ShufOp1) && "Unexpected shuffle op");
Function *Bswap = Function *Bswap =
@ -2730,12 +2727,9 @@ Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
Type *DestElemTy = DestTy->getElementType(); Type *DestElemTy = DestTy->getElementType();
if (SrcTy->getElementType() != DestElemTy) { if (SrcTy->getElementType() != DestElemTy) {
Type *MidTy = PointerType::get(DestElemTy, SrcTy->getAddressSpace()); Type *MidTy = PointerType::get(DestElemTy, SrcTy->getAddressSpace());
if (VectorType *VT = dyn_cast<VectorType>(CI.getType())) {
// Handle vectors of pointers. // Handle vectors of pointers.
// FIXME: what should happen for scalable vectors? if (VectorType *VT = dyn_cast<VectorType>(CI.getType()))
MidTy = FixedVectorType::get(MidTy, MidTy = VectorType::get(MidTy, VT->getElementCount());
cast<FixedVectorType>(VT)->getNumElements());
}
Value *NewBitCast = Builder.CreateBitCast(Src, MidTy); Value *NewBitCast = Builder.CreateBitCast(Src, MidTy);
return new AddrSpaceCastInst(NewBitCast, CI.getType()); return new AddrSpaceCastInst(NewBitCast, CI.getType());

View File

@ -102,6 +102,16 @@ define <4 x float addrspace(2)*> @combine_addrspacecast_types_vector(<4 x i32 ad
ret <4 x float addrspace(2)*> %y ret <4 x float addrspace(2)*> %y
} }
define <vscale x 4 x float addrspace(2)*> @combine_addrspacecast_types_scalevector(<vscale x 4 x i32 addrspace(1)*> %x) nounwind {
; CHECK-LABEL: @combine_addrspacecast_types_scalevector(
; CHECK-NEXT: bitcast <vscale x 4 x i32 addrspace(1)*> %x to <vscale x 4 x float addrspace(1)*>
; CHECK-NEXT: addrspacecast <vscale x 4 x float addrspace(1)*> %1 to <vscale x 4 x float addrspace(2)*>
; CHECK-NEXT: ret
%y = addrspacecast <vscale x 4 x i32 addrspace(1)*> %x to <vscale x 4 x float addrspace(2)*>
ret <vscale x 4 x float addrspace(2)*> %y
}
define i32 @canonicalize_addrspacecast([16 x i32] addrspace(1)* %arr) { define i32 @canonicalize_addrspacecast([16 x i32] addrspace(1)* %arr) {
; CHECK-LABEL: @canonicalize_addrspacecast( ; CHECK-LABEL: @canonicalize_addrspacecast(
; CHECK-NEXT: getelementptr [16 x i32], [16 x i32] addrspace(1)* %arr, i32 0, i32 0 ; CHECK-NEXT: getelementptr [16 x i32], [16 x i32] addrspace(1)* %arr, i32 0, i32 0

View File

@ -1,27 +1,38 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s ; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
define i1 @test1(i32 *%x) nounwind { define i1 @test1(i32 *%x) nounwind {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i32* [[X:%.*]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[TMP2]]
;
entry: entry:
; CHECK: test1
; CHECK: ptrtoint i32* %x to i64
%0 = ptrtoint i32* %x to i1 %0 = ptrtoint i32* %x to i1
ret i1 %0 ret i1 %0
} }
define i32* @test2(i128 %x) nounwind { define i32* @test2(i128 %x) nounwind {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[X:%.*]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to i32*
; CHECK-NEXT: ret i32* [[TMP1]]
;
entry: entry:
; CHECK: test2
; CHECK: inttoptr i64 %0 to i32*
%0 = inttoptr i128 %x to i32* %0 = inttoptr i128 %x to i32*
ret i32* %0 ret i32* %0
} }
; PR3574 ; PR3574
; CHECK: f0
; CHECK: %1 = zext i32 %a0 to i64
; CHECK: ret i64 %1
define i64 @f0(i32 %a0) nounwind { define i64 @f0(i32 %a0) nounwind {
; CHECK-LABEL: @f0(
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A0:%.*]] to i64
; CHECK-NEXT: ret i64 [[TMP1]]
;
%t0 = inttoptr i32 %a0 to i8* %t0 = inttoptr i32 %a0 to i8*
%t1 = ptrtoint i8* %t0 to i64 %t1 = ptrtoint i8* %t0 to i64
ret i64 %t1 ret i64 %t1
@ -29,32 +40,50 @@ define i64 @f0(i32 %a0) nounwind {
define <4 x i32> @test4(<4 x i8*> %arg) nounwind { define <4 x i32> @test4(<4 x i8*> %arg) nounwind {
; CHECK-LABEL: @test4( ; CHECK-LABEL: @test4(
; CHECK: ptrtoint <4 x i8*> %arg to <4 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <4 x i8*> [[ARG:%.*]] to <4 x i64>
; CHECK: trunc <4 x i64> %1 to <4 x i32> ; CHECK-NEXT: [[P1:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
; CHECK-NEXT: ret <4 x i32> [[P1]]
;
%p1 = ptrtoint <4 x i8*> %arg to <4 x i32> %p1 = ptrtoint <4 x i8*> %arg to <4 x i32>
ret <4 x i32> %p1 ret <4 x i32> %p1
} }
define <vscale x 4 x i32> @testvscale4(<vscale x 4 x i8*> %arg) nounwind {
; CHECK-LABEL: @testvscale4(
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <vscale x 4 x i8*> [[ARG:%.*]] to <vscale x 4 x i64>
; CHECK-NEXT: [[P1:%.*]] = trunc <vscale x 4 x i64> [[TMP1]] to <vscale x 4 x i32>
; CHECK-NEXT: ret <vscale x 4 x i32> [[P1]]
;
%p1 = ptrtoint <vscale x 4 x i8*> %arg to <vscale x 4 x i32>
ret <vscale x 4 x i32> %p1
}
define <4 x i128> @test5(<4 x i8*> %arg) nounwind { define <4 x i128> @test5(<4 x i8*> %arg) nounwind {
; CHECK-LABEL: @test5( ; CHECK-LABEL: @test5(
; CHECK: ptrtoint <4 x i8*> %arg to <4 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <4 x i8*> [[ARG:%.*]] to <4 x i64>
; CHECK: zext <4 x i64> %1 to <4 x i128> ; CHECK-NEXT: [[P1:%.*]] = zext <4 x i64> [[TMP1]] to <4 x i128>
; CHECK-NEXT: ret <4 x i128> [[P1]]
;
%p1 = ptrtoint <4 x i8*> %arg to <4 x i128> %p1 = ptrtoint <4 x i8*> %arg to <4 x i128>
ret <4 x i128> %p1 ret <4 x i128> %p1
} }
define <4 x i8*> @test6(<4 x i32> %arg) nounwind { define <4 x i8*> @test6(<4 x i32> %arg) nounwind {
; CHECK-LABEL: @test6( ; CHECK-LABEL: @test6(
; CHECK: zext <4 x i32> %arg to <4 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i32> [[ARG:%.*]] to <4 x i64>
; CHECK: inttoptr <4 x i64> %1 to <4 x i8*> ; CHECK-NEXT: [[P1:%.*]] = inttoptr <4 x i64> [[TMP1]] to <4 x i8*>
; CHECK-NEXT: ret <4 x i8*> [[P1]]
;
%p1 = inttoptr <4 x i32> %arg to <4 x i8*> %p1 = inttoptr <4 x i32> %arg to <4 x i8*>
ret <4 x i8*> %p1 ret <4 x i8*> %p1
} }
define <4 x i8*> @test7(<4 x i128> %arg) nounwind { define <4 x i8*> @test7(<4 x i128> %arg) nounwind {
; CHECK-LABEL: @test7( ; CHECK-LABEL: @test7(
; CHECK: trunc <4 x i128> %arg to <4 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i128> [[ARG:%.*]] to <4 x i64>
; CHECK: inttoptr <4 x i64> %1 to <4 x i8*> ; CHECK-NEXT: [[P1:%.*]] = inttoptr <4 x i64> [[TMP1]] to <4 x i8*>
; CHECK-NEXT: ret <4 x i8*> [[P1]]
;
%p1 = inttoptr <4 x i128> %arg to <4 x i8*> %p1 = inttoptr <4 x i128> %arg to <4 x i8*>
ret <4 x i8*> %p1 ret <4 x i8*> %p1
} }

View File

@ -18,6 +18,23 @@ define i32 @shrinkExtractElt_i64_to_i32_0(<3 x i64> %x) {
ret i32 %t ret i32 %t
} }
define i32 @vscale_shrinkExtractElt_i64_to_i32_0(<vscale x 3 x i64> %x) {
; LE-LABEL: @vscale_shrinkExtractElt_i64_to_i32_0(
; LE-NEXT: [[TMP1:%.*]] = bitcast <vscale x 3 x i64> [[X:%.*]] to <vscale x 6 x i32>
; LE-NEXT: [[T:%.*]] = extractelement <vscale x 6 x i32> [[TMP1]], i32 0
; LE-NEXT: ret i32 [[T]]
;
; BE-LABEL: @vscale_shrinkExtractElt_i64_to_i32_0(
; BE-NEXT: [[TMP1:%.*]] = bitcast <vscale x 3 x i64> [[X:%.*]] to <vscale x 6 x i32>
; BE-NEXT: [[T:%.*]] = extractelement <vscale x 6 x i32> [[TMP1]], i32 1
; BE-NEXT: ret i32 [[T]]
;
%e = extractelement <vscale x 3 x i64> %x, i32 0
%t = trunc i64 %e to i32
ret i32 %t
}
define i32 @shrinkExtractElt_i64_to_i32_1(<3 x i64> %x) { define i32 @shrinkExtractElt_i64_to_i32_1(<3 x i64> %x) {
; LE-LABEL: @shrinkExtractElt_i64_to_i32_1( ; LE-LABEL: @shrinkExtractElt_i64_to_i32_1(
; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32> ; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x i64> [[X:%.*]] to <6 x i32>

View File

@ -59,6 +59,20 @@ define float @test6(<4 x float> %X) {
ret float %r ret float %r
} }
define float @testvscale6(<vscale x 4 x float> %X) {
; CHECK-LABEL: @testvscale6(
; CHECK-NEXT: [[T:%.*]] = shufflevector <vscale x 4 x float> [[X:%.*]], <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x float> [[T]], i32 0
; CHECK-NEXT: ret float [[R]]
;
%X1 = bitcast <vscale x 4 x float> %X to <vscale x 4 x i32>
%t = shufflevector <vscale x 4 x i32> %X1, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%t2 = bitcast <vscale x 4 x i32> %t to <vscale x 4 x float>
%r = extractelement <vscale x 4 x float> %t2, i32 0
ret float %r
}
define <4 x float> @test7(<4 x float> %x) { define <4 x float> @test7(<4 x float> %x) {
; CHECK-LABEL: @test7( ; CHECK-LABEL: @test7(
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>