forked from OSchip/llvm-project
[X86] Improve mul combine for negative multiplayer (2^c - 1)
This patch improves the mul instruction combine function (combineMul) by adding new layer of logic. In this patch, we are adding the ability to fold (mul x, -((1 << c) -1)) or (mul x, -((1 << c) +1)) into (neg(X << c) -x) or (neg((x << c) + x) respective. Differential Revision: https://reviews.llvm.org/D28232 llvm-svn: 292358
This commit is contained in:
parent
03c5e69d07
commit
0c0240ce84
|
@ -30256,22 +30256,37 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
|
|||
}
|
||||
|
||||
if (!NewMul) {
|
||||
assert(MulAmt != 0 && MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX)
|
||||
&& "Both cases that could cause potential overflows should have "
|
||||
"already been handled.");
|
||||
if (isPowerOf2_64(MulAmt - 1))
|
||||
// (mul x, 2^N + 1) => (add (shl x, N), x)
|
||||
NewMul = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
|
||||
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
|
||||
DAG.getConstant(Log2_64(MulAmt - 1), DL,
|
||||
MVT::i8)));
|
||||
|
||||
else if (isPowerOf2_64(MulAmt + 1))
|
||||
// (mul x, 2^N - 1) => (sub (shl x, N), x)
|
||||
NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT,
|
||||
N->getOperand(0),
|
||||
DAG.getConstant(Log2_64(MulAmt + 1),
|
||||
DL, MVT::i8)), N->getOperand(0));
|
||||
assert(MulAmt != 0 &&
|
||||
MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
|
||||
"Both cases that could cause potential overflows should have "
|
||||
"already been handled.");
|
||||
int64_t SignMulAmt = C->getSExtValue();
|
||||
if ((SignMulAmt != INT64_MIN) && (SignMulAmt != INT64_MAX) &&
|
||||
(SignMulAmt != -INT64_MAX)) {
|
||||
int NumSign = SignMulAmt > 0 ? 1 : -1;
|
||||
bool IsPowerOf2_64PlusOne = isPowerOf2_64(NumSign * SignMulAmt - 1);
|
||||
bool IsPowerOf2_64MinusOne = isPowerOf2_64(NumSign * SignMulAmt + 1);
|
||||
if (IsPowerOf2_64PlusOne) {
|
||||
// (mul x, 2^N + 1) => (add (shl x, N), x)
|
||||
NewMul = DAG.getNode(
|
||||
ISD::ADD, DL, VT, N->getOperand(0),
|
||||
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
|
||||
DAG.getConstant(Log2_64(NumSign * SignMulAmt - 1), DL,
|
||||
MVT::i8)));
|
||||
} else if (IsPowerOf2_64MinusOne) {
|
||||
// (mul x, 2^N - 1) => (sub (shl x, N), x)
|
||||
NewMul = DAG.getNode(
|
||||
ISD::SUB, DL, VT,
|
||||
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
|
||||
DAG.getConstant(Log2_64(NumSign * SignMulAmt + 1), DL,
|
||||
MVT::i8)),
|
||||
N->getOperand(0));
|
||||
}
|
||||
// To negate, subtract the number from zero
|
||||
if ((IsPowerOf2_64PlusOne || IsPowerOf2_64MinusOne) && NumSign == -1)
|
||||
NewMul =
|
||||
DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
|
||||
}
|
||||
}
|
||||
|
||||
if (NewMul)
|
||||
|
|
|
@ -171,3 +171,233 @@ define i64 @mul18446744073709551615_64(i64 %A) {
|
|||
%mul = mul i64 %A, 18446744073709551615
|
||||
ret i64 %mul
|
||||
}
|
||||
|
||||
define i32 @test(i32 %a) {
|
||||
; X64-LABEL: test:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: shll $5, %eax
|
||||
; X64-NEXT: subl %edi, %eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: test:
|
||||
; X86: # BB#0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: shll $5, %eax
|
||||
; X86-NEXT: subl %ecx, %eax
|
||||
; X86-NEXT: retl
|
||||
entry:
|
||||
%tmp3 = mul i32 %a, 31
|
||||
ret i32 %tmp3
|
||||
}
|
||||
|
||||
define i32 @test1(i32 %a) {
|
||||
; X64-LABEL: test1:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: shll $5, %eax
|
||||
; X64-NEXT: subl %edi, %eax
|
||||
; X64-NEXT: negl %eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: test1:
|
||||
; X86: # BB#0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: shll $5, %eax
|
||||
; X86-NEXT: subl %ecx, %eax
|
||||
; X86-NEXT: negl %eax
|
||||
; X86-NEXT: retl
|
||||
entry:
|
||||
%tmp3 = mul i32 %a, -31
|
||||
ret i32 %tmp3
|
||||
}
|
||||
|
||||
|
||||
define i32 @test2(i32 %a) {
|
||||
; X64-LABEL: test2:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: shll $5, %eax
|
||||
; X64-NEXT: leal (%rax,%rdi), %eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: test2:
|
||||
; X86: # BB#0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: shll $5, %eax
|
||||
; X86-NEXT: addl %ecx, %eax
|
||||
; X86-NEXT: retl
|
||||
entry:
|
||||
%tmp3 = mul i32 %a, 33
|
||||
ret i32 %tmp3
|
||||
}
|
||||
|
||||
define i32 @test3(i32 %a) {
|
||||
; X64-LABEL: test3:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: shll $5, %eax
|
||||
; X64-NEXT: leal (%rax,%rdi), %eax
|
||||
; X64-NEXT: negl %eax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: test3:
|
||||
; X86: # BB#0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: shll $5, %eax
|
||||
; X86-NEXT: addl %ecx, %eax
|
||||
; X86-NEXT: negl %eax
|
||||
; X86-NEXT: retl
|
||||
entry:
|
||||
%tmp3 = mul i32 %a, -33
|
||||
ret i32 %tmp3
|
||||
}
|
||||
|
||||
define i64 @test4(i64 %a) {
|
||||
; X64-LABEL: test4:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: shlq $5, %rax
|
||||
; X64-NEXT: subq %rdi, %rax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: test4:
|
||||
; X86: # BB#0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: shll $5, %ecx
|
||||
; X86-NEXT: subl %eax, %ecx
|
||||
; X86-NEXT: movl $31, %eax
|
||||
; X86-NEXT: mull {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: addl %ecx, %edx
|
||||
; X86-NEXT: retl
|
||||
entry:
|
||||
%tmp3 = mul i64 %a, 31
|
||||
ret i64 %tmp3
|
||||
}
|
||||
|
||||
define i64 @test5(i64 %a) {
|
||||
; X64-LABEL: test5:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: shlq $5, %rax
|
||||
; X64-NEXT: subq %rdi, %rax
|
||||
; X64-NEXT: negq %rax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: test5:
|
||||
; X86: # BB#0: # %entry
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: .Lcfi0:
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: .Lcfi1:
|
||||
; X86-NEXT: .cfi_offset %esi, -8
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl %eax, %esi
|
||||
; X86-NEXT: shll $5, %esi
|
||||
; X86-NEXT: subl %eax, %esi
|
||||
; X86-NEXT: movl $-31, %edx
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: subl %ecx, %edx
|
||||
; X86-NEXT: subl %esi, %edx
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: retl
|
||||
entry:
|
||||
%tmp3 = mul i64 %a, -31
|
||||
ret i64 %tmp3
|
||||
}
|
||||
|
||||
|
||||
define i64 @test6(i64 %a) {
|
||||
; X64-LABEL: test6:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: shlq $5, %rax
|
||||
; X64-NEXT: leaq (%rax,%rdi), %rax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: test6:
|
||||
; X86: # BB#0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: shll $5, %ecx
|
||||
; X86-NEXT: addl %eax, %ecx
|
||||
; X86-NEXT: movl $33, %eax
|
||||
; X86-NEXT: mull {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: addl %ecx, %edx
|
||||
; X86-NEXT: retl
|
||||
entry:
|
||||
%tmp3 = mul i64 %a, 33
|
||||
ret i64 %tmp3
|
||||
}
|
||||
|
||||
define i64 @test7(i64 %a) {
|
||||
; X64-LABEL: test7:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: movq %rdi, %rax
|
||||
; X64-NEXT: shlq $5, %rax
|
||||
; X64-NEXT: leaq (%rax,%rdi), %rax
|
||||
; X64-NEXT: negq %rax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: test7:
|
||||
; X86: # BB#0: # %entry
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: .Lcfi2:
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: .Lcfi3:
|
||||
; X86-NEXT: .cfi_offset %esi, -8
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl %eax, %esi
|
||||
; X86-NEXT: shll $5, %esi
|
||||
; X86-NEXT: addl %eax, %esi
|
||||
; X86-NEXT: movl $-33, %edx
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: subl %ecx, %edx
|
||||
; X86-NEXT: subl %esi, %edx
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: retl
|
||||
entry:
|
||||
%tmp3 = mul i64 %a, -33
|
||||
ret i64 %tmp3
|
||||
}
|
||||
|
||||
define i64 @testOverflow(i64 %a) {
|
||||
; X64-LABEL: testOverflow:
|
||||
; X64: # BB#0: # %entry
|
||||
; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
|
||||
; X64-NEXT: imulq %rdi, %rax
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: testOverflow:
|
||||
; X86: # BB#0: # %entry
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: .Lcfi4:
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: .Lcfi5:
|
||||
; X86-NEXT: .cfi_offset %esi, -8
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl $-1, %edx
|
||||
; X86-NEXT: movl %ecx, %eax
|
||||
; X86-NEXT: mull %edx
|
||||
; X86-NEXT: movl %ecx, %esi
|
||||
; X86-NEXT: shll $31, %esi
|
||||
; X86-NEXT: subl %ecx, %esi
|
||||
; X86-NEXT: addl %esi, %edx
|
||||
; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: retl
|
||||
entry:
|
||||
%tmp3 = mul i64 %a, 9223372036854775807
|
||||
ret i64 %tmp3
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue