forked from OSchip/llvm-project
[X86] Fix an issue in the matching for ADDUS.
We were basically assuming only one operand of the compare could be an ADD node and using that to swap operands. But we can have a normal add followed by a saturing add. This rewrites the canonicalization to just be based on the condition code. llvm-svn: 340134
This commit is contained in:
parent
a85d7e927b
commit
803912ea57
|
@ -33111,12 +33111,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
|
|||
SDValue CondLHS = Cond->getOperand(0);
|
||||
SDValue CondRHS = Cond->getOperand(1);
|
||||
|
||||
// Canonicalize ADD to CondRHS to simplify the logic below.
|
||||
if (CondLHS.getOpcode() == ISD::ADD) {
|
||||
std::swap(CondLHS, CondRHS);
|
||||
CC = ISD::getSetCCSwappedOperands(CC);
|
||||
}
|
||||
|
||||
// Check if one of the arms of the VSELECT is vector with all bits set.
|
||||
// If it's on the left side invert the predicate to simplify logic below.
|
||||
SDValue Other;
|
||||
|
@ -33127,10 +33121,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
|
|||
Other = LHS;
|
||||
}
|
||||
|
||||
// We can test against either of the addition operands.
|
||||
if (Other.getNode() && Other.getNumOperands() == 2 &&
|
||||
(Other.getOperand(0) == CondLHS ||
|
||||
Other.getOperand(1) == CondLHS)) {
|
||||
if (Other.getNode() && Other.getOpcode() == ISD::ADD) {
|
||||
SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
|
||||
|
||||
auto ADDUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
|
||||
|
@ -33138,9 +33129,17 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
|
|||
return DAG.getNode(X86ISD::ADDUS, DL, Ops[0].getValueType(), Ops);
|
||||
};
|
||||
|
||||
// Canonicalize condition operands.
|
||||
if (CC == ISD::SETUGE) {
|
||||
std::swap(CondLHS, CondRHS);
|
||||
CC = ISD::SETULE;
|
||||
}
|
||||
|
||||
// We can test against either of the addition operands.
|
||||
// x <= x+y ? x+y : ~0 --> addus x, y
|
||||
if ((CC == ISD::SETULE) &&
|
||||
Other.getOpcode() == ISD::ADD && Other == CondRHS)
|
||||
// x+y >= x ? x+y : ~0 --> addus x, y
|
||||
if (CC == ISD::SETULE && Other == CondRHS &&
|
||||
(OpLHS == CondLHS || OpRHS == CondLHS))
|
||||
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
|
||||
ADDUSBuilder);
|
||||
}
|
||||
|
|
|
@ -278,34 +278,19 @@ define <8 x i16> @add_addusw(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
|
|||
; SSE-LABEL: add_addusw:
|
||||
; SSE: ## %bb.0:
|
||||
; SSE-NEXT: paddw %xmm2, %xmm1 ## encoding: [0x66,0x0f,0xfd,0xca]
|
||||
; SSE-NEXT: paddw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xfd,0xc1]
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
|
||||
; SSE-NEXT: ## encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A]
|
||||
; SSE-NEXT: ## fixup A - offset: 4, value: LCPI8_0, kind: FK_Data_4
|
||||
; SSE-NEXT: pxor %xmm2, %xmm1 ## encoding: [0x66,0x0f,0xef,0xca]
|
||||
; SSE-NEXT: pxor %xmm0, %xmm2 ## encoding: [0x66,0x0f,0xef,0xd0]
|
||||
; SSE-NEXT: pcmpgtw %xmm2, %xmm1 ## encoding: [0x66,0x0f,0x65,0xca]
|
||||
; SSE-NEXT: por %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xeb,0xc1]
|
||||
; SSE-NEXT: paddusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdd,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX2-LABEL: add_addusw:
|
||||
; AVX2: ## %bb.0:
|
||||
; AVX2-NEXT: vpaddw %xmm2, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0xfd,0xca]
|
||||
; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1]
|
||||
; AVX2-NEXT: vpminuw %xmm0, %xmm1, %xmm2 ## encoding: [0xc4,0xe2,0x71,0x3a,0xd0]
|
||||
; AVX2-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x75,0xca]
|
||||
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x76,0xd2]
|
||||
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0xef,0xca]
|
||||
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0xeb,0xc0]
|
||||
; AVX2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdd,0xc1]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: add_addusw:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xca]
|
||||
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
|
||||
; SKX-NEXT: vpcmpnleuw %xmm0, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x3e,0xc8,0x06]
|
||||
; SKX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
|
||||
; SKX-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1]
|
||||
; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
%a = add <8 x i16> %y, %z
|
||||
%b = add <8 x i16> %x, %a
|
||||
|
|
Loading…
Reference in New Issue