[X86] Teach X86TargetLowering::targetShrinkDemandedConstant to set non-demanded bits if it helps created an and mask that can be matched as a zero extend.

I had to modify the bswap recognition to allow unshrunk masks to make this work.

Fixes PR36689.

Differential Revision: https://reviews.llvm.org/D44442

llvm-svn: 327530
This commit is contained in:
Craig Topper 2018-03-14 16:55:15 +00:00
parent e392ce4315
commit b36cb20ef9
5 changed files with 37 additions and 16 deletions

View File

@ -4275,7 +4275,10 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
if (!N0.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!N01C || N01C->getZExtValue() != 0xFF00)
// Also handle 0xffff since the LHS is guaranteed to have zeros there.
// This is needed for X86.
if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
N01C->getZExtValue() != 0xFFFF))
return SDValue();
N0 = N0.getOperand(0);
LookPassAnd0 = true;
@ -4322,7 +4325,10 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
if (!N10.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
if (!N101C || N101C->getZExtValue() != 0xFF00)
// Also allow 0xFFFF since the bits will be shifted out. This is needed
// for X86.
if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
N101C->getZExtValue() != 0xFFFF))
return SDValue();
N10 = N10.getOperand(0);
LookPassAnd1 = true;
@ -4393,6 +4399,14 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
return false;
case 0xFF: MaskByteOffset = 0; break;
case 0xFF00: MaskByteOffset = 1; break;
case 0xFFFF:
// In case demanded bits didn't clear the bits that will be shifted out.
// This is needed for X86.
if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
MaskByteOffset = 1;
break;
}
return false;
case 0xFF0000: MaskByteOffset = 2; break;
case 0xFF000000: MaskByteOffset = 3; break;
}

View File

@ -28350,9 +28350,9 @@ X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
if (ZeroExtendMask == Mask)
return true;
// Make sure the bits in the ZeroExtendMask are also set in the original mask.
// TODO: We should be able to set bits that aren't demanded too.
if (!ZeroExtendMask.isSubsetOf(Mask))
// Make sure the new mask can be represented by a combination of mask bits
// and non-demanded bits.
if (!ZeroExtendMask.isSubsetOf(Mask | ~Demanded))
return false;
// Replace the constant with the zero extend mask.

View File

@ -45,9 +45,9 @@ entry:
define zeroext i1 @f4(i32 %x) {
; CHECK-LABEL: f4:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: shrl $15, %edi
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: movzwl %di, %eax
; CHECK-NEXT: shrl $15, %eax
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
entry:

View File

@ -514,8 +514,6 @@ define void @f3() #0 {
; 686-O0-NEXT: movl %ecx, %edi
; 686-O0-NEXT: xorl %esi, %edi
; 686-O0-NEXT: andl %edi, %eax
; 686-O0-NEXT: movb %al, %dl
; 686-O0-NEXT: movzbl %dl, %eax
; 686-O0-NEXT: orl %eax, %ecx
; 686-O0-NEXT: movl %ecx, (%esp)
; 686-O0-NEXT: movl $0, {{[0-9]+}}(%esp)
@ -545,9 +543,8 @@ define void @f3() #0 {
; 686-NEXT: movl var_16, %edx
; 686-NEXT: xorl %ecx, %edx
; 686-NEXT: andl %eax, %edx
; 686-NEXT: movzbl %dl, %eax
; 686-NEXT: orl %ecx, %eax
; 686-NEXT: movl %eax, (%esp)
; 686-NEXT: orl %ecx, %edx
; 686-NEXT: movl %edx, (%esp)
; 686-NEXT: movl $0, {{[0-9]+}}(%esp)
; 686-NEXT: movl %ecx, var_46
; 686-NEXT: movl %ebp, %esp

View File

@ -76,9 +76,8 @@ define i32 @test6(i32 %x) {
define i32 @test7(i32 %x) {
; CHECK-LABEL: test7:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: andl $65534, %edi # imm = 0xFFFE
; CHECK-NEXT: leal 1(%rdi), %eax
; CHECK-NEXT: orl $1, %edi
; CHECK-NEXT: movzwl %di, %eax
; CHECK-NEXT: retq
%y = and i32 %x, 65534
%z = or i32 %y, 1
@ -138,3 +137,14 @@ define i64 @mul_neg_one(i64 %x) {
ret i64 %r
}
define i32 @PR36689(i32*) {
; CHECK-LABEL: PR36689:
; CHECK: # %bb.0:
; CHECK-NEXT: movzwl (%rdi), %eax
; CHECK-NEXT: orl $255, %eax
; CHECK-NEXT: retq
%2 = load i32, i32* %0
%3 = and i32 %2, 65280
%4 = or i32 %3, 255
ret i32 %4
}