forked from OSchip/llvm-project
[DAG] Further improve the logic in DAGCombiner that folds a pair of shuffles into a single shuffle if the resulting mask is legal.
This patch teaches the DAGCombiner how to fold shuffles according to the following new rules: 1. shuffle(shuffle(x, y), undef) -> x 2. shuffle(shuffle(x, y), undef) -> y 3. shuffle(shuffle(x, y), undef) -> shuffle(x, undef) 4. shuffle(shuffle(x, y), undef) -> shuffle(y, undef) The backend avoids to combine shuffles according to rules 3. and 4. if the resulting shuffle does not have a legal mask. This is to avoid introducing illegal shuffles that are potentially expanded into a sub-optimal sequence of target specific dag nodes during vector legalization. Added test case combine-vec-shuffle-2.ll to verify that we correctly triggers the new rules when combining shuffles. llvm-svn: 212748
This commit is contained in:
parent
7cc27649a6
commit
b2921c7ca0
|
@ -10677,17 +10677,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
|||
}
|
||||
|
||||
// If this shuffle node is simply a swizzle of another shuffle node,
|
||||
// and it reverses the swizzle of the previous shuffle then we can
|
||||
// optimize shuffle(shuffle(x, undef), undef) -> x.
|
||||
// then try to simplify it.
|
||||
if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
|
||||
N1.getOpcode() == ISD::UNDEF) {
|
||||
|
||||
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
|
||||
|
||||
// Shuffle nodes can only reverse shuffles with a single non-undef value.
|
||||
if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
|
||||
return SDValue();
|
||||
|
||||
// The incoming shuffle must be of the same type as the result of the
|
||||
// current shuffle.
|
||||
assert(OtherSV->getOperand(0).getValueType() == VT &&
|
||||
|
@ -10704,27 +10699,69 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
|||
Idx = OtherSV->getMaskElt(Idx);
|
||||
Mask.push_back(Idx);
|
||||
}
|
||||
|
||||
bool CommuteOperands = false;
|
||||
if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
|
||||
// To be valid, the combine shuffle mask should only reference elements
|
||||
// from one of the two vectors in input to the inner shufflevector.
|
||||
bool IsValidMask = true;
|
||||
for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
|
||||
// See if the combined mask only reference undefs or elements coming
|
||||
// from the first shufflevector operand.
|
||||
IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts;
|
||||
|
||||
if (!IsValidMask) {
|
||||
IsValidMask = true;
|
||||
for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
|
||||
// Check that all the elements come from the second shuffle operand.
|
||||
IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts;
|
||||
CommuteOperands = IsValidMask;
|
||||
}
|
||||
|
||||
// Early exit if the combined shuffle mask is not valid.
|
||||
if (!IsValidMask)
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// See if this pair of shuffles can be safely folded according to either
|
||||
// of the following rules:
|
||||
// shuffle(shuffle(x, y), undef) -> x
|
||||
// shuffle(shuffle(x, undef), undef) -> x
|
||||
// shuffle(shuffle(x, y), undef) -> y
|
||||
bool IsIdentityMask = true;
|
||||
unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0;
|
||||
for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) {
|
||||
// Skip Undefs.
|
||||
if (Mask[i] < 0)
|
||||
continue;
|
||||
|
||||
// The combined shuffle must map each index to itself.
|
||||
IsIdentityMask = (unsigned)Mask[i] == i;
|
||||
IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
|
||||
}
|
||||
|
||||
if (IsIdentityMask)
|
||||
// optimize shuffle(shuffle(x, undef), undef) -> x.
|
||||
|
||||
if (IsIdentityMask) {
|
||||
if (CommuteOperands)
|
||||
// optimize shuffle(shuffle(x, y), undef) -> y.
|
||||
return OtherSV->getOperand(1);
|
||||
|
||||
// optimize shuffle(shuffle(x, undef), undef) -> x
|
||||
// optimize shuffle(shuffle(x, y), undef) -> x
|
||||
return OtherSV->getOperand(0);
|
||||
}
|
||||
|
||||
// It may still be beneficial to combine the two shuffles if the
|
||||
// resulting shuffle is legal.
|
||||
// shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
|
||||
if (TLI.isShuffleMaskLegal(Mask, VT))
|
||||
return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
|
||||
if (TLI.isShuffleMaskLegal(Mask, VT)) {
|
||||
if (!CommuteOperands)
|
||||
// shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
|
||||
// shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
|
||||
return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
|
||||
&Mask[0]);
|
||||
|
||||
// shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3)
|
||||
return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1),
|
||||
&Mask[0]);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
|
|
@ -0,0 +1,164 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
|
||||
|
||||
; Check that DAGCombiner correctly folds the following pairs of shuffles
|
||||
; using the following rules:
|
||||
; 1. shuffle(shuffle(x, y), undef) -> x
|
||||
; 2. shuffle(shuffle(x, y), undef) -> y
|
||||
; 3. shuffle(shuffle(x, y), undef) -> shuffle(x, undef)
|
||||
; 4. shuffle(shuffle(x, y), undef) -> shuffle(undef, y)
|
||||
;
|
||||
; Rules 3. and 4. are used only if the resulting shuffle mask is legal.
|
||||
|
||||
define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
|
||||
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
|
||||
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
; CHECK-LABEL: test1
|
||||
; Mask: [3,0,0,1]
|
||||
; CHECK: pshufd $67
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
|
||||
define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
|
||||
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
|
||||
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
; CHECK-LABEL: test2
|
||||
; Mask: [2,0,0,3]
|
||||
; CHECK: pshufd $-62
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
|
||||
define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) {
|
||||
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
|
||||
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
; CHECK-LABEL: test3
|
||||
; Mask: [2,0,0,3]
|
||||
; CHECK: pshufd $-62
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
|
||||
define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) {
|
||||
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 7, i32 1>
|
||||
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 4, i32 0, i32 3>
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
; CHECK-LABEL: test4
|
||||
; Mask: [0,0,0,1]
|
||||
; CHECK: pshufd $64
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
|
||||
define <4 x i32> @test5(<4 x i32> %A, <4 x i32> %B) {
|
||||
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
|
||||
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 4, i32 3>
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
; CHECK-LABEL: test5
|
||||
; Mask: [1,1]
|
||||
; CHECK: movhlps
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
|
||||
define <4 x i32> @test6(<4 x i32> %A, <4 x i32> %B) {
|
||||
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
|
||||
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 4>
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
; CHECK-LABEL: test6
|
||||
; Mask: [2,0,0,0]
|
||||
; CHECK: pshufd $2
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
|
||||
define <4 x i32> @test7(<4 x i32> %A, <4 x i32> %B) {
|
||||
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
; CHECK-LABEL: test7
|
||||
; Mask: [0,2,0,2]
|
||||
; CHECK: pshufd $-120
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
|
||||
define <4 x i32> @test8(<4 x i32> %A, <4 x i32> %B) {
|
||||
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
|
||||
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
; CHECK-LABEL: test8
|
||||
; Mask: [1,0,3,0]
|
||||
; CHECK: pshufd $49
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
|
||||
define <4 x i32> @test9(<4 x i32> %A, <4 x i32> %B) {
|
||||
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 2, i32 5>
|
||||
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
; CHECK-LABEL: test9
|
||||
; Mask: [1,3,0,2]
|
||||
; CHECK: pshufd $-115
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
|
||||
define <4 x i32> @test10(<4 x i32> %A, <4 x i32> %B) {
|
||||
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
|
||||
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
; CHECK-LABEL: test10
|
||||
; Mask: [1,0,1,0]
|
||||
; CHECK: pshufd $17
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
|
||||
define <4 x i32> @test11(<4 x i32> %A, <4 x i32> %B) {
|
||||
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 2, i32 5, i32 4>
|
||||
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 0>
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
; CHECK-LABEL: test11
|
||||
; Mask: [1,0,2,1]
|
||||
; CHECK: pshufd $97
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
|
||||
define <4 x i32> @test12(<4 x i32> %A, <4 x i32> %B) {
|
||||
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 0, i32 2, i32 4>
|
||||
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 0, i32 4>
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
; CHECK-LABEL: test12
|
||||
; Mask: [0,0,0,0]
|
||||
; CHECK: pshufd $0
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
|
||||
; The following pair of shuffles is folded into vector %A.
|
||||
define <4 x i32> @test13(<4 x i32> %A, <4 x i32> %B) {
|
||||
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 4, i32 2, i32 6>
|
||||
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 0, i32 2, i32 4>
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
; CHECK-LABEL: test13
|
||||
; CHECK-NOT: pshufd
|
||||
; CHECK: ret
|
||||
|
||||
|
||||
; The following pair of shuffles is folded into vector %B.
|
||||
define <4 x i32> @test14(<4 x i32> %A, <4 x i32> %B) {
|
||||
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
|
||||
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 4, i32 1, i32 4>
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
; CHECK-LABEL: test14
|
||||
; CHECK-NOT: pshufd
|
||||
; CHECK: ret
|
||||
|
|
@ -33,7 +33,9 @@ entry:
|
|||
define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: shuf3:
|
||||
; CHECK: shufps
|
||||
; CHECK-NOT: movlhps
|
||||
; CHECK-NOT: shufps
|
||||
; CHECK: pshufd
|
||||
%shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
%tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
|
Loading…
Reference in New Issue