[X86] Improve parity idiom recognition to handle (and (truncate (ctpop X)), 1).

Fixes part of PR46954
This commit is contained in:
Craig Topper 2020-08-01 22:05:10 -07:00
parent f89d59a085
commit 56166a3a52
2 changed files with 49 additions and 103 deletions

View File

@ -42744,7 +42744,22 @@ static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG,
// Turn it into series of XORs and a setnp.
static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// RHS needs to be 1.
if (!isOneConstant(N1))
return SDValue();
// Popcnt may be truncated.
if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
N0 = N0.getOperand(0);
// LHS needs to be a single use CTPOP.
if (N0.getOpcode() != ISD::CTPOP || !N0.hasOneUse())
return SDValue();
EVT VT = N0.getValueType();
// We only support 64-bit and 32-bit. 64-bit requires special handling
// unless the 64-bit popcnt instruction is legal.
@ -42755,17 +42770,6 @@ static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
if (TLI.isTypeLegal(VT) && TLI.isOperationLegal(ISD::CTPOP, VT))
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// LHS needs to be a single use CTPOP.
if (N0.getOpcode() != ISD::CTPOP || !N0.hasOneUse())
return SDValue();
// RHS needs to be 1.
if (!isOneConstant(N1))
return SDValue();
SDLoc DL(N);
SDValue X = N0.getOperand(0);
@ -42782,7 +42786,7 @@ static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
SDValue Parity = DAG.getNode(ISD::AND, DL, MVT::i32,
DAG.getNode(ISD::CTPOP, DL, MVT::i32, X),
DAG.getConstant(1, DL, MVT::i32));
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Parity);
return DAG.getZExtOrTrunc(Parity, DL, N->getValueType(0));
}
assert(VT == MVT::i32 && "Unexpected VT!");
@ -42803,8 +42807,8 @@ static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
// Copy the inverse of the parity flag into a register with setcc.
SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG);
// Zero extend to original type.
return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0), Setnp);
// Extend or truncate to the original type.
return DAG.getZExtOrTrunc(Setnp, DL, N->getValueType(0));
}

View File

@ -96,71 +96,35 @@ define i32 @parity_64_trunc(i64 %x) {
; X86-NOPOPCNT-LABEL: parity_64_trunc:
; X86-NOPOPCNT: # %bb.0:
; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOPOPCNT-NEXT: movl %ecx, %edx
; X86-NOPOPCNT-NEXT: shrl %edx
; X86-NOPOPCNT-NEXT: andl $1431655765, %edx # imm = 0x55555555
; X86-NOPOPCNT-NEXT: subl %edx, %ecx
; X86-NOPOPCNT-NEXT: movl %ecx, %edx
; X86-NOPOPCNT-NEXT: andl $858993459, %edx # imm = 0x33333333
; X86-NOPOPCNT-NEXT: shrl $2, %ecx
; X86-NOPOPCNT-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NOPOPCNT-NEXT: addl %edx, %ecx
; X86-NOPOPCNT-NEXT: movl %ecx, %edx
; X86-NOPOPCNT-NEXT: shrl $4, %edx
; X86-NOPOPCNT-NEXT: addl %ecx, %edx
; X86-NOPOPCNT-NEXT: andl $17764111, %edx # imm = 0x10F0F0F
; X86-NOPOPCNT-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101
; X86-NOPOPCNT-NEXT: shrl $24, %ecx
; X86-NOPOPCNT-NEXT: movl %eax, %edx
; X86-NOPOPCNT-NEXT: shrl %edx
; X86-NOPOPCNT-NEXT: andl $1431655765, %edx # imm = 0x55555555
; X86-NOPOPCNT-NEXT: subl %edx, %eax
; X86-NOPOPCNT-NEXT: movl %eax, %edx
; X86-NOPOPCNT-NEXT: andl $858993459, %edx # imm = 0x33333333
; X86-NOPOPCNT-NEXT: shrl $2, %eax
; X86-NOPOPCNT-NEXT: andl $858993459, %eax # imm = 0x33333333
; X86-NOPOPCNT-NEXT: addl %edx, %eax
; X86-NOPOPCNT-NEXT: movl %eax, %edx
; X86-NOPOPCNT-NEXT: shrl $4, %edx
; X86-NOPOPCNT-NEXT: addl %eax, %edx
; X86-NOPOPCNT-NEXT: andl $17764111, %edx # imm = 0x10F0F0F
; X86-NOPOPCNT-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
; X86-NOPOPCNT-NEXT: shrl $24, %eax
; X86-NOPOPCNT-NEXT: addl %ecx, %eax
; X86-NOPOPCNT-NEXT: andl $1, %eax
; X86-NOPOPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax
; X86-NOPOPCNT-NEXT: movl %eax, %ecx
; X86-NOPOPCNT-NEXT: shrl $16, %ecx
; X86-NOPOPCNT-NEXT: xorl %eax, %ecx
; X86-NOPOPCNT-NEXT: xorl %eax, %eax
; X86-NOPOPCNT-NEXT: xorb %ch, %cl
; X86-NOPOPCNT-NEXT: setnp %al
; X86-NOPOPCNT-NEXT: retl
;
; X64-NOPOPCNT-LABEL: parity_64_trunc:
; X64-NOPOPCNT: # %bb.0:
; X64-NOPOPCNT-NEXT: movq %rdi, %rax
; X64-NOPOPCNT-NEXT: shrq %rax
; X64-NOPOPCNT-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NOPOPCNT-NEXT: andq %rax, %rcx
; X64-NOPOPCNT-NEXT: subq %rcx, %rdi
; X64-NOPOPCNT-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
; X64-NOPOPCNT-NEXT: movq %rdi, %rcx
; X64-NOPOPCNT-NEXT: andq %rax, %rcx
; X64-NOPOPCNT-NEXT: shrq $2, %rdi
; X64-NOPOPCNT-NEXT: andq %rax, %rdi
; X64-NOPOPCNT-NEXT: addq %rcx, %rdi
; X64-NOPOPCNT-NEXT: movq %rdi, %rax
; X64-NOPOPCNT-NEXT: shrq $4, %rax
; X64-NOPOPCNT-NEXT: addq %rdi, %rax
; X64-NOPOPCNT-NEXT: movabsq $76296276040158991, %rcx # imm = 0x10F0F0F0F0F0F0F
; X64-NOPOPCNT-NEXT: andq %rax, %rcx
; X64-NOPOPCNT-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
; X64-NOPOPCNT-NEXT: imulq %rcx, %rax
; X64-NOPOPCNT-NEXT: shrq $56, %rax
; X64-NOPOPCNT-NEXT: andl $1, %eax
; X64-NOPOPCNT-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NOPOPCNT-NEXT: shrq $32, %rax
; X64-NOPOPCNT-NEXT: xorl %edi, %eax
; X64-NOPOPCNT-NEXT: movl %eax, %ecx
; X64-NOPOPCNT-NEXT: shrl $16, %ecx
; X64-NOPOPCNT-NEXT: xorl %eax, %ecx
; X64-NOPOPCNT-NEXT: movl %ecx, %edx
; X64-NOPOPCNT-NEXT: shrl $8, %edx
; X64-NOPOPCNT-NEXT: xorl %eax, %eax
; X64-NOPOPCNT-NEXT: xorb %cl, %dl
; X64-NOPOPCNT-NEXT: setnp %al
; X64-NOPOPCNT-NEXT: retq
;
; X86-POPCNT-LABEL: parity_64_trunc:
; X86-POPCNT: # %bb.0:
; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT: addl %ecx, %eax
; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT: popcntl %eax, %eax
; X86-POPCNT-NEXT: andl $1, %eax
; X86-POPCNT-NEXT: retl
;
@ -181,43 +145,21 @@ define i8 @parity_32_trunc(i32 %x) {
; X86-NOPOPCNT: # %bb.0:
; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOPOPCNT-NEXT: movl %eax, %ecx
; X86-NOPOPCNT-NEXT: shrl %ecx
; X86-NOPOPCNT-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NOPOPCNT-NEXT: subl %ecx, %eax
; X86-NOPOPCNT-NEXT: movl %eax, %ecx
; X86-NOPOPCNT-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NOPOPCNT-NEXT: shrl $2, %eax
; X86-NOPOPCNT-NEXT: andl $858993459, %eax # imm = 0x33333333
; X86-NOPOPCNT-NEXT: addl %ecx, %eax
; X86-NOPOPCNT-NEXT: movl %eax, %ecx
; X86-NOPOPCNT-NEXT: shrl $4, %ecx
; X86-NOPOPCNT-NEXT: addl %eax, %ecx
; X86-NOPOPCNT-NEXT: andl $17764111, %ecx # imm = 0x10F0F0F
; X86-NOPOPCNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
; X86-NOPOPCNT-NEXT: shrl $24, %eax
; X86-NOPOPCNT-NEXT: andb $1, %al
; X86-NOPOPCNT-NEXT: # kill: def $al killed $al killed $eax
; X86-NOPOPCNT-NEXT: shrl $16, %ecx
; X86-NOPOPCNT-NEXT: xorl %eax, %ecx
; X86-NOPOPCNT-NEXT: xorb %ch, %cl
; X86-NOPOPCNT-NEXT: setnp %al
; X86-NOPOPCNT-NEXT: retl
;
; X64-NOPOPCNT-LABEL: parity_32_trunc:
; X64-NOPOPCNT: # %bb.0:
; X64-NOPOPCNT-NEXT: movl %edi, %eax
; X64-NOPOPCNT-NEXT: shrl %eax
; X64-NOPOPCNT-NEXT: andl $1431655765, %eax # imm = 0x55555555
; X64-NOPOPCNT-NEXT: subl %eax, %edi
; X64-NOPOPCNT-NEXT: movl %edi, %eax
; X64-NOPOPCNT-NEXT: andl $858993459, %eax # imm = 0x33333333
; X64-NOPOPCNT-NEXT: shrl $2, %edi
; X64-NOPOPCNT-NEXT: andl $858993459, %edi # imm = 0x33333333
; X64-NOPOPCNT-NEXT: addl %eax, %edi
; X64-NOPOPCNT-NEXT: movl %edi, %eax
; X64-NOPOPCNT-NEXT: shrl $4, %eax
; X64-NOPOPCNT-NEXT: addl %edi, %eax
; X64-NOPOPCNT-NEXT: andl $17764111, %eax # imm = 0x10F0F0F
; X64-NOPOPCNT-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
; X64-NOPOPCNT-NEXT: shrl $24, %eax
; X64-NOPOPCNT-NEXT: andb $1, %al
; X64-NOPOPCNT-NEXT: # kill: def $al killed $al killed $eax
; X64-NOPOPCNT-NEXT: shrl $16, %eax
; X64-NOPOPCNT-NEXT: xorl %edi, %eax
; X64-NOPOPCNT-NEXT: movl %eax, %ecx
; X64-NOPOPCNT-NEXT: shrl $8, %ecx
; X64-NOPOPCNT-NEXT: xorb %al, %cl
; X64-NOPOPCNT-NEXT: setnp %al
; X64-NOPOPCNT-NEXT: retq
;
; X86-POPCNT-LABEL: parity_32_trunc: