[InstCombine] combine a shuffle and an extract subvector shuffle

This is part of the missing IR-level folding noted in D52912.
This should be ok as a canonicalization because the new shuffle mask can't
be any more complicated than the existing shuffle mask. If there's some 
target where the shorter vector shuffle is not legal, it should just end up 
expanding to something like the pair of shuffles that we're starting with here.

Differential Revision: https://reviews.llvm.org/D53037

llvm-svn: 344476
This commit is contained in:
Sanjay Patel 2018-10-14 15:25:06 +00:00
parent 38bbf81ade
commit 7181146c6c
2 changed files with 41 additions and 5 deletions

View File

@ -1477,6 +1477,41 @@ static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
}
/// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
if (!Shuf.isIdentityWithExtract() || !isa<UndefValue>(Op1))
return nullptr;
Value *X, *Y;
Constant *Mask;
if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask))))
return nullptr;
// We are extracting a subvector from a shuffle. Remove excess elements from
// the 1st shuffle mask to eliminate the extract.
//
// This transform is conservatively limited to identity extracts because we do
// not allow arbitrary shuffle mask creation as a target-independent transform
// (because we can't guarantee that will lower efficiently).
//
// If the extracting shuffle has an undef mask element, it transfers to the
// new shuffle mask. Otherwise, copy the original mask element. Example:
// shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
// shuf X, Y, <C0, undef, C2, undef>
unsigned NumElts = Shuf.getType()->getVectorNumElements();
SmallVector<Constant *, 16> NewMask(NumElts);
assert(NumElts < Mask->getType()->getVectorNumElements() &&
"Identity with extract must have less elements than its inputs");
for (unsigned i = 0; i != NumElts; ++i) {
Constant *ExtractMaskElt = Shuf.getMask()->getAggregateElement(i);
Constant *MaskElt = Mask->getAggregateElement(i);
NewMask[i] = isa<UndefValue>(ExtractMaskElt) ? ExtractMaskElt : MaskElt;
}
return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
}
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
@ -1499,6 +1534,9 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
return &SVI;
}
if (Instruction *I = foldIdentityExtractShuffle(SVI))
return I;
SmallVector<int, 16> Mask = SVI.getShuffleMask();
Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
unsigned LHSWidth = LHS->getType()->getVectorNumElements();

View File

@ -170,12 +170,11 @@ define <8 x i8> @test12a(<8 x i8> %t6, <8 x i8> %t2) {
ret <8 x i8> %t3
}
; TODO: The mask length of the 1st shuffle can be reduced to eliminate the 2nd shuffle.
; The mask length of the 1st shuffle can be reduced to eliminate the 2nd shuffle.
define <2 x i8> @extract_subvector_of_shuffle(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @extract_subvector_of_shuffle(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <3 x i32> <i32 0, i32 2, i32 undef>
; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <3 x i8> [[SHUF]], <3 x i8> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: ret <2 x i8> [[EXTRACT_SUBV]]
;
%shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <3 x i32> <i32 0, i32 2, i32 0>
@ -183,7 +182,6 @@ define <2 x i8> @extract_subvector_of_shuffle(<2 x i8> %x, <2 x i8> %y) {
ret <2 x i8> %extract_subv
}
; TODO:
; Extra uses are ok.
; Undef elements in either mask are ok. Undefs from the 2nd shuffle mask should propagate to the new shuffle.
; The type of the inputs does not have to match the output type.
@ -194,7 +192,7 @@ define <4 x i8> @extract_subvector_of_shuffle_extra_use(<2 x i8> %x, <2 x i8> %y
; CHECK-LABEL: @extract_subvector_of_shuffle_extra_use(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <5 x i32> <i32 undef, i32 2, i32 0, i32 1, i32 0>
; CHECK-NEXT: call void @use_v5i8(<5 x i8> [[SHUF]])
; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <5 x i8> [[SHUF]], <5 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X]], <2 x i8> [[Y]], <4 x i32> <i32 undef, i32 2, i32 0, i32 undef>
; CHECK-NEXT: ret <4 x i8> [[EXTRACT_SUBV]]
;
%shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <5 x i32> <i32 undef, i32 2, i32 0, i32 1, i32 0>