[X86] Improve mul combine for negative multiplayer (2^c - 1)

This patch improves the mul instruction combine function (combineMul) 
by adding new layer of logic. 
In this patch, we are adding the ability to fold (mul x, -((1 << c) -1)) 
or (mul x, -((1 << c) +1)) into (neg(X << c) -x) or (neg((x << c) + x) respective.

Differential Revision: https://reviews.llvm.org/D28232

llvm-svn: 292358
This commit is contained in:
Michael Zuckerman 2017-01-18 09:31:13 +00:00
parent 03c5e69d07
commit 0c0240ce84
2 changed files with 261 additions and 16 deletions

View File

@ -30256,22 +30256,37 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
}
if (!NewMul) {
assert(MulAmt != 0 && MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX)
&& "Both cases that could cause potential overflows should have "
assert(MulAmt != 0 &&
MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
"Both cases that could cause potential overflows should have "
"already been handled.");
if (isPowerOf2_64(MulAmt - 1))
int64_t SignMulAmt = C->getSExtValue();
if ((SignMulAmt != INT64_MIN) && (SignMulAmt != INT64_MAX) &&
(SignMulAmt != -INT64_MAX)) {
int NumSign = SignMulAmt > 0 ? 1 : -1;
bool IsPowerOf2_64PlusOne = isPowerOf2_64(NumSign * SignMulAmt - 1);
bool IsPowerOf2_64MinusOne = isPowerOf2_64(NumSign * SignMulAmt + 1);
if (IsPowerOf2_64PlusOne) {
// (mul x, 2^N + 1) => (add (shl x, N), x)
NewMul = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
NewMul = DAG.getNode(
ISD::ADD, DL, VT, N->getOperand(0),
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant(Log2_64(MulAmt - 1), DL,
DAG.getConstant(Log2_64(NumSign * SignMulAmt - 1), DL,
MVT::i8)));
else if (isPowerOf2_64(MulAmt + 1))
} else if (IsPowerOf2_64MinusOne) {
// (mul x, 2^N - 1) => (sub (shl x, N), x)
NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT,
N->getOperand(0),
DAG.getConstant(Log2_64(MulAmt + 1),
DL, MVT::i8)), N->getOperand(0));
NewMul = DAG.getNode(
ISD::SUB, DL, VT,
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant(Log2_64(NumSign * SignMulAmt + 1), DL,
MVT::i8)),
N->getOperand(0));
}
// To negate, subtract the number from zero
if ((IsPowerOf2_64PlusOne || IsPowerOf2_64MinusOne) && NumSign == -1)
NewMul =
DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
}
}
if (NewMul)

View File

@ -171,3 +171,233 @@ define i64 @mul18446744073709551615_64(i64 %A) {
%mul = mul i64 %A, 18446744073709551615
ret i64 %mul
}
define i32 @test(i32 %a) {
; X64-LABEL: test:
; X64: # BB#0: # %entry
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shll $5, %eax
; X64-NEXT: subl %edi, %eax
; X64-NEXT: retq
;
; X86-LABEL: test:
; X86: # BB#0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shll $5, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
entry:
%tmp3 = mul i32 %a, 31
ret i32 %tmp3
}
define i32 @test1(i32 %a) {
; X64-LABEL: test1:
; X64: # BB#0: # %entry
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shll $5, %eax
; X64-NEXT: subl %edi, %eax
; X64-NEXT: negl %eax
; X64-NEXT: retq
;
; X86-LABEL: test1:
; X86: # BB#0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shll $5, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: negl %eax
; X86-NEXT: retl
entry:
%tmp3 = mul i32 %a, -31
ret i32 %tmp3
}
define i32 @test2(i32 %a) {
; X64-LABEL: test2:
; X64: # BB#0: # %entry
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shll $5, %eax
; X64-NEXT: leal (%rax,%rdi), %eax
; X64-NEXT: retq
;
; X86-LABEL: test2:
; X86: # BB#0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shll $5, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
entry:
%tmp3 = mul i32 %a, 33
ret i32 %tmp3
}
define i32 @test3(i32 %a) {
; X64-LABEL: test3:
; X64: # BB#0: # %entry
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shll $5, %eax
; X64-NEXT: leal (%rax,%rdi), %eax
; X64-NEXT: negl %eax
; X64-NEXT: retq
;
; X86-LABEL: test3:
; X86: # BB#0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shll $5, %eax
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: negl %eax
; X86-NEXT: retl
entry:
%tmp3 = mul i32 %a, -33
ret i32 %tmp3
}
define i64 @test4(i64 %a) {
; X64-LABEL: test4:
; X64: # BB#0: # %entry
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shlq $5, %rax
; X64-NEXT: subq %rdi, %rax
; X64-NEXT: retq
;
; X86-LABEL: test4:
; X86: # BB#0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shll $5, %ecx
; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl $31, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
entry:
%tmp3 = mul i64 %a, 31
ret i64 %tmp3
}
define i64 @test5(i64 %a) {
; X64-LABEL: test5:
; X64: # BB#0: # %entry
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shlq $5, %rax
; X64-NEXT: subq %rdi, %rax
; X64-NEXT: negq %rax
; X64-NEXT: retq
;
; X86-LABEL: test5:
; X86: # BB#0: # %entry
; X86-NEXT: pushl %esi
; X86-NEXT: .Lcfi0:
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .Lcfi1:
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %esi
; X86-NEXT: shll $5, %esi
; X86-NEXT: subl %eax, %esi
; X86-NEXT: movl $-31, %edx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: subl %ecx, %edx
; X86-NEXT: subl %esi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: retl
entry:
%tmp3 = mul i64 %a, -31
ret i64 %tmp3
}
define i64 @test6(i64 %a) {
; X64-LABEL: test6:
; X64: # BB#0: # %entry
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shlq $5, %rax
; X64-NEXT: leaq (%rax,%rdi), %rax
; X64-NEXT: retq
;
; X86-LABEL: test6:
; X86: # BB#0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shll $5, %ecx
; X86-NEXT: addl %eax, %ecx
; X86-NEXT: movl $33, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
entry:
%tmp3 = mul i64 %a, 33
ret i64 %tmp3
}
define i64 @test7(i64 %a) {
; X64-LABEL: test7:
; X64: # BB#0: # %entry
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shlq $5, %rax
; X64-NEXT: leaq (%rax,%rdi), %rax
; X64-NEXT: negq %rax
; X64-NEXT: retq
;
; X86-LABEL: test7:
; X86: # BB#0: # %entry
; X86-NEXT: pushl %esi
; X86-NEXT: .Lcfi2:
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .Lcfi3:
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %esi
; X86-NEXT: shll $5, %esi
; X86-NEXT: addl %eax, %esi
; X86-NEXT: movl $-33, %edx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: subl %ecx, %edx
; X86-NEXT: subl %esi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: retl
entry:
%tmp3 = mul i64 %a, -33
ret i64 %tmp3
}
define i64 @testOverflow(i64 %a) {
; X64-LABEL: testOverflow:
; X64: # BB#0: # %entry
; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
; X64-NEXT: imulq %rdi, %rax
; X64-NEXT: retq
;
; X86-LABEL: testOverflow:
; X86: # BB#0: # %entry
; X86-NEXT: pushl %esi
; X86-NEXT: .Lcfi4:
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .Lcfi5:
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-1, %edx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: shll $31, %esi
; X86-NEXT: subl %ecx, %esi
; X86-NEXT: addl %esi, %edx
; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
; X86-NEXT: popl %esi
; X86-NEXT: retl
entry:
%tmp3 = mul i64 %a, 9223372036854775807
ret i64 %tmp3
}