forked from OSchip/llvm-project
[X86][SSE] Add missing X86ISD::ANDNP combines.
llvm-svn: 292767
This commit is contained in:
parent
7e1cc97513
commit
0218ce1080
|
@ -32996,6 +32996,20 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
|
|||
return DAG.getNode(SelectOpcode, DL, VT, IsOp0Nan, Op1, MinOrMax);
|
||||
}
|
||||
|
||||
/// Do target-specific dag combines on X86ISD::ANDNP nodes.
|
||||
static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
// ANDNP(0, x) -> x
|
||||
if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
|
||||
return N->getOperand(1);
|
||||
|
||||
// ANDNP(x, 0) -> 0
|
||||
if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode()))
|
||||
return getZeroVector(N->getSimpleValueType(0), Subtarget, DAG, SDLoc(N));
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
// BT ignores high bits in the bit index operand.
|
||||
|
@ -34062,6 +34076,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
|
||||
case ISD::FNEG: return combineFneg(N, DAG, Subtarget);
|
||||
case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget);
|
||||
case X86ISD::ANDNP: return combineAndnp(N, DAG, Subtarget);
|
||||
case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget);
|
||||
case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget);
|
||||
case X86ISD::FXOR:
|
||||
|
|
|
@ -393,15 +393,7 @@ define <8 x i16> @_clearupper8xi16b(<8 x i16>) nounwind {
|
|||
; SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
|
||||
; SSE-NEXT: pandn %xmm1, %xmm2
|
||||
; SSE-NEXT: por %xmm2, %xmm0
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
; SSE-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE-NEXT: pandn %xmm2, %xmm1
|
||||
; SSE-NEXT: por %xmm1, %xmm0
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
|
||||
; SSE-NEXT: pand %xmm1, %xmm0
|
||||
; SSE-NEXT: pandn %xmm2, %xmm1
|
||||
; SSE-NEXT: por %xmm1, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: _clearupper8xi16b:
|
||||
|
|
|
@ -449,11 +449,7 @@ define <16 x i8> @insert_v16i8_z123456789ABCDEz(<16 x i8> %a) {
|
|||
; SSE2-NEXT: movd %eax, %xmm2
|
||||
; SSE2-NEXT: pandn %xmm2, %xmm1
|
||||
; SSE2-NEXT: por %xmm1, %xmm0
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
|
||||
; SSE2-NEXT: pand %xmm1, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE2-NEXT: pandn %xmm2, %xmm1
|
||||
; SSE2-NEXT: por %xmm1, %xmm0
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: insert_v16i8_z123456789ABCDEz:
|
||||
|
@ -464,11 +460,7 @@ define <16 x i8> @insert_v16i8_z123456789ABCDEz(<16 x i8> %a) {
|
|||
; SSE3-NEXT: movd %eax, %xmm2
|
||||
; SSE3-NEXT: pandn %xmm2, %xmm1
|
||||
; SSE3-NEXT: por %xmm1, %xmm0
|
||||
; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
|
||||
; SSE3-NEXT: pand %xmm1, %xmm0
|
||||
; SSE3-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE3-NEXT: pandn %xmm2, %xmm1
|
||||
; SSE3-NEXT: por %xmm1, %xmm0
|
||||
; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_v16i8_z123456789ABCDEz:
|
||||
|
@ -503,17 +495,8 @@ define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) {
|
|||
; SSE2-NEXT: movd %eax, %xmm3
|
||||
; SSE2-NEXT: pandn %xmm3, %xmm2
|
||||
; SSE2-NEXT: por %xmm2, %xmm0
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
|
||||
; SSE2-NEXT: pand %xmm4, %xmm1
|
||||
; SSE2-NEXT: pandn %xmm3, %xmm4
|
||||
; SSE2-NEXT: por %xmm4, %xmm1
|
||||
; SSE2-NEXT: pand %xmm2, %xmm1
|
||||
; SSE2-NEXT: pandn %xmm3, %xmm2
|
||||
; SSE2-NEXT: por %xmm2, %xmm0
|
||||
; SSE2-NEXT: por %xmm2, %xmm1
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: andps {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
|
||||
|
@ -524,17 +507,8 @@ define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) {
|
|||
; SSE3-NEXT: movd %eax, %xmm3
|
||||
; SSE3-NEXT: pandn %xmm3, %xmm2
|
||||
; SSE3-NEXT: por %xmm2, %xmm0
|
||||
; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
|
||||
; SSE3-NEXT: pand %xmm2, %xmm0
|
||||
; SSE3-NEXT: pxor %xmm3, %xmm3
|
||||
; SSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
|
||||
; SSE3-NEXT: pand %xmm4, %xmm1
|
||||
; SSE3-NEXT: pandn %xmm3, %xmm4
|
||||
; SSE3-NEXT: por %xmm4, %xmm1
|
||||
; SSE3-NEXT: pand %xmm2, %xmm1
|
||||
; SSE3-NEXT: pandn %xmm3, %xmm2
|
||||
; SSE3-NEXT: por %xmm2, %xmm0
|
||||
; SSE3-NEXT: por %xmm2, %xmm1
|
||||
; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE3-NEXT: andps {{.*}}(%rip), %xmm1
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz:
|
||||
|
|
Loading…
Reference in New Issue