diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1415b1e37d15..5dbe29f5fbdb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3104,6 +3104,21 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1), N1.getOperand(0))); + // A - (A & (B - 1)) -> A & (0 - B) + if (N1.getOpcode() == ISD::AND && N1.hasOneUse()) { + SDValue A = N1.getOperand(0); + SDValue BDec = N1.getOperand(1); + if (A != N0) + std::swap(A, BDec); + if (A == N0 && BDec.getOpcode() == ISD::ADD && + isAllOnesOrAllOnesSplat(BDec->getOperand(1))) { + SDValue B = BDec.getOperand(0); + SDValue NegB = + DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), B); + return DAG.getNode(ISD::AND, DL, VT, A, NegB); + } + } + // fold (X - (-Y * Z)) -> (X + (Y * Z)) if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { if (N1.getOperand(0).getOpcode() == ISD::SUB && diff --git a/llvm/test/CodeGen/AArch64/align-down.ll b/llvm/test/CodeGen/AArch64/align-down.ll index 23ff194908cb..4ad4d115157f 100644 --- a/llvm/test/CodeGen/AArch64/align-down.ll +++ b/llvm/test/CodeGen/AArch64/align-down.ll @@ -17,9 +17,8 @@ define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind { ; CHECK-LABEL: t0_32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, #1 // =1 -; CHECK-NEXT: and w8, w0, w8 -; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: and w0, w0, w8 ; CHECK-NEXT: ret %mask = add i32 %alignment, -1 %bias = and i32 %ptr, %mask @@ -29,9 +28,8 @@ define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind { define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind { ; CHECK-LABEL: t1_64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x1, #1 // =1 -; CHECK-NEXT: and x8, x0, x8 -; CHECK-NEXT: sub x0, x0, x8 +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: and x0, x0, x8 ; CHECK-NEXT: ret %mask = add i64 %alignment, -1 %bias = and i64 %ptr, %mask @@ -42,9 +40,8 @@ define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind { define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind { ; CHECK-LABEL: t2_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w1, #1 // =1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: and w0, w0, w8 ; CHECK-NEXT: ret %mask = add i32 %alignment, -1 %bias = and i32 %mask, %ptr ; swapped @@ -57,9 +54,9 @@ define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind { define i32 @t3_extrause0(i32 %ptr, i32 %alignment, i32* %mask_storage) nounwind { ; CHECK-LABEL: t3_extrause0: ; CHECK: // %bb.0: +; CHECK-NEXT: neg w9, w1 ; CHECK-NEXT: sub w8, w1, #1 // =1 -; CHECK-NEXT: and w9, w0, w8 -; CHECK-NEXT: sub w0, w0, w9 +; CHECK-NEXT: and w0, w0, w9 ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret %mask = add i32 %alignment, -1 diff --git a/llvm/test/CodeGen/X86/align-down.ll b/llvm/test/CodeGen/X86/align-down.ll index 261740fee5f8..b546324f95e7 100644 --- a/llvm/test/CodeGen/X86/align-down.ll +++ b/llvm/test/CodeGen/X86/align-down.ll @@ -18,19 +18,16 @@ define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind { ; X86-LABEL: t0_32: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: decl %ecx -; X86-NEXT: andl %eax, %ecx -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; ; X64-LABEL: t0_32: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: decl %esi -; X64-NEXT: andl %edi, %esi -; X64-NEXT: subl %esi, %eax +; X64-NEXT: movl %esi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: andl %edi, %eax ; X64-NEXT: retq %mask = add i32 %alignment, -1 %bias = and i32 %ptr, %mask @@ -40,26 +37,19 @@ define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind { define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind { ; X86-LABEL: t1_64: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: addl $-1, %ecx -; X86-NEXT: adcl $-1, %esi -; X86-NEXT: andl %edx, %esi -; X86-NEXT: andl %eax, %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: sbbl %esi, %edx -; X86-NEXT: popl %esi +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; ; X64-LABEL: t1_64: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: decq %rsi -; X64-NEXT: andq %rdi, %rsi -; X64-NEXT: subq %rsi, %rax +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: negq %rax +; X64-NEXT: andq %rdi, %rax ; X64-NEXT: retq %mask = add i64 %alignment, -1 %bias = and i64 %ptr, %mask @@ -70,19 +60,16 @@ define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind { define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind { ; X86-LABEL: t2_commutative: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: decl %ecx -; X86-NEXT: andl %eax, %ecx -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; ; X64-LABEL: t2_commutative: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: decl %esi -; X64-NEXT: andl %edi, %esi -; X64-NEXT: subl %esi, %eax +; X64-NEXT: movl %esi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: andl %edi, %eax ; X64-NEXT: retq %mask = add i32 %alignment, -1 %bias = and i32 %mask, %ptr ; swapped @@ -95,22 +82,22 @@ define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind { define i32 @t3_extrause0(i32 %ptr, i32 %alignment, i32* %mask_storage) nounwind { ; X86-LABEL: t3_extrause0: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: decl %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal -1(%eax), %edx ; X86-NEXT: movl %edx, (%ecx) -; X86-NEXT: andl %eax, %edx -; X86-NEXT: subl %edx, %eax +; X86-NEXT: negl %eax +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; ; X64-LABEL: t3_extrause0: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: decl %esi -; X64-NEXT: movl %esi, (%rdx) -; X64-NEXT: andl %edi, %esi -; X64-NEXT: subl %esi, %eax +; X64-NEXT: movl %esi, %eax +; X64-NEXT: leal -1(%rax), %ecx +; X64-NEXT: movl %ecx, (%rdx) +; X64-NEXT: negl %eax +; X64-NEXT: andl %edi, %eax +; X64-NEXT: # kill: def $eax killed $eax killed $rax ; X64-NEXT: retq %mask = add i32 %alignment, -1 store i32 %mask, i32* %mask_storage