[DAGCombiner] don't group bswap with casts in logic hoisting fold

This was probably organized as it was because bswap is a unary op.
But that's where the similarity to the other opcodes ends. We should
not limit this transform to scalars, and we should not try it if
either input has other uses. This is another step towards trying to
clean this whole function up to prevent it from causing infinite loops
and memory explosions. 

Earlier commits in this series:
rL348501
rL348508
rL348518

llvm-svn: 348534
This commit is contained in:
Sanjay Patel 2018-12-06 22:10:44 +00:00
parent b0b61955a1
commit 70af85b0ac
2 changed files with 26 additions and 31 deletions
llvm
lib/CodeGen/SelectionDAG
test/CodeGen/X86

View File

@ -3718,13 +3718,13 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
// FIXME: We should check number of uses of the operands to not increase
// the instruction count for all transforms.
// Handle size-changing casts.
EVT Op0VT = N0.getOperand(0).getValueType();
switch (HandOpcode) {
case ISD::ANY_EXTEND:
case ISD::TRUNCATE:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::BSWAP:
// If both operands have other uses, this transform would create extra
// instructions without eliminating anything.
if (!N0.hasOneUse() && !N1.hasOneUse())
@ -3732,7 +3732,7 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
// We need matching integer source types.
// Do not hoist logic op inside of a vector extend, since it may combine
// into a vsetcc.
// TODO: Should the vector check apply to truncate and bswap though?
// TODO: Should the vector check apply to truncate though?
if (VT.isVector() || Op0VT != N1.getOperand(0).getValueType())
return SDValue();
// Don't create an illegal op during or after legalization.
@ -3757,10 +3757,8 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
return DAG.getNode(HandOpcode, SDLoc(N), VT, Logic);
}
// For each of OP in SHL/SRL/SRA/AND...
// fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
// fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
// fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
// For binops SHL/SRL/SRA/AND:
// logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
N0.getOperand(1) == N1.getOperand(1)) {
@ -3773,6 +3771,17 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
return DAG.getNode(HandOpcode, SDLoc(N), VT, Logic, N0.getOperand(1));
}
// Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
if (HandOpcode == ISD::BSWAP) {
// If either operand has other uses, this transform is not an improvement.
if (!N0.hasOneUse() || !N1.hasOneUse())
return SDValue();
SDValue Logic = DAG.getNode(LogicOpcode, SDLoc(N0), Op0VT,
N0.getOperand(0), N1.getOperand(0));
AddToWorklist(Logic.getNode());
return DAG.getNode(HandOpcode, SDLoc(N), VT, Logic);
}
// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
// Only perform this optimization up until type legalization, before
// LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by

View File

@ -76,48 +76,34 @@ define <4 x i32> @test2(<4 x i32> %v) {
define <4 x i32> @or_bswap(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %p1, <4 x i32>* %p2) {
; CHECK-NOSSSE3-LABEL: or_bswap:
; CHECK-NOSSSE3: # %bb.0:
; CHECK-NOSSSE3-NEXT: pxor %xmm2, %xmm2
; CHECK-NOSSSE3-NEXT: movdqa %xmm0, %xmm3
; CHECK-NOSSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
; CHECK-NOSSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; CHECK-NOSSSE3-NEXT: por %xmm1, %xmm0
; CHECK-NOSSSE3-NEXT: pxor %xmm1, %xmm1
; CHECK-NOSSSE3-NEXT: movdqa %xmm0, %xmm2
; CHECK-NOSSSE3-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
; CHECK-NOSSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm4 = xmm0[0,1,2,3,7,6,5,4]
; CHECK-NOSSSE3-NEXT: packuswb %xmm3, %xmm4
; CHECK-NOSSSE3-NEXT: movdqa %xmm1, %xmm0
; CHECK-NOSSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm0[0,1,2,3,7,6,5,4]
; CHECK-NOSSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[3,2,1,0,4,5,6,7]
; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
; CHECK-NOSSSE3-NEXT: packuswb %xmm3, %xmm0
; CHECK-NOSSSE3-NEXT: por %xmm4, %xmm0
; CHECK-NOSSSE3-NEXT: packuswb %xmm2, %xmm0
; CHECK-NOSSSE3-NEXT: retq
;
; CHECK-SSSE3-LABEL: or_bswap:
; CHECK-SSSE3: # %bb.0:
; CHECK-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-SSSE3-NEXT: pshufb %xmm2, %xmm0
; CHECK-SSSE3-NEXT: pshufb %xmm2, %xmm1
; CHECK-SSSE3-NEXT: por %xmm1, %xmm0
; CHECK-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-SSSE3-NEXT: retq
;
; CHECK-AVX-LABEL: or_bswap:
; CHECK-AVX: # %bb.0:
; CHECK-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; CHECK-AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; CHECK-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-AVX-NEXT: retq
;
; CHECK-WIDE-AVX-LABEL: or_bswap:
; CHECK-WIDE-AVX: # %bb.0:
; CHECK-WIDE-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-WIDE-AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; CHECK-WIDE-AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; CHECK-WIDE-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-WIDE-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; CHECK-WIDE-AVX-NEXT: retq
%xt = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %x)
%yt = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %y)