forked from OSchip/llvm-project
Teach DAGCombine to fold fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2) when c1 equals the amount of bits that are truncated off.
This happens all the time when a smul is promoted to a larger type. On x86-64 we now compile "int test(int x) { return x/10; }" into movslq %edi, %rax imulq $1717986919, %rax, %rax movq %rax, %rcx shrq $63, %rcx sarq $34, %rax <- used to be "shrq $32, %rax; sarl $2, %eax" addl %ecx, %eax This fires 96 times in gcc.c on x86-64. llvm-svn: 124559
This commit is contained in:
parent
115f0fa397
commit
946e1522b6
|
@ -3154,6 +3154,29 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
|
|||
}
|
||||
}
|
||||
|
||||
// fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
|
||||
// if c1 is equal to the number of bits the trunc removes
|
||||
if (N0.getOpcode() == ISD::TRUNCATE &&
|
||||
(N0.getOperand(0).getOpcode() == ISD::SRL ||
|
||||
N0.getOperand(0).getOpcode() == ISD::SRA) &&
|
||||
N0.getOperand(0).hasOneUse() &&
|
||||
N0.getOperand(0).getOperand(1).hasOneUse() &&
|
||||
N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
|
||||
EVT LargeVT = N0.getOperand(0).getValueType();
|
||||
ConstantSDNode *LargeShiftAmt =
|
||||
cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
|
||||
|
||||
if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
|
||||
LargeShiftAmt->getZExtValue()) {
|
||||
SDValue Amt =
|
||||
DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
|
||||
getShiftAmountTy());
|
||||
SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT,
|
||||
N0.getOperand(0).getOperand(0), Amt);
|
||||
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA);
|
||||
}
|
||||
}
|
||||
|
||||
// Simplify, based on bits shifted out of the LHS.
|
||||
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
|
||||
return SDValue(N, 0);
|
||||
|
|
|
@ -2274,24 +2274,3 @@ llc time when it gets inlined, because we can use smaller transfers. This also
|
|||
avoids partial register stalls in some important cases.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
We miss an optzn when lowering divide by some constants. For example:
|
||||
int test(int x) { return x/10; }
|
||||
|
||||
We produce:
|
||||
|
||||
_test: ## @test
|
||||
## BB#0: ## %entry
|
||||
movslq %edi, %rax
|
||||
imulq $1717986919, %rax, %rax ## imm = 0x66666667
|
||||
movq %rax, %rcx
|
||||
shrq $63, %rcx
|
||||
** shrq $32, %rax
|
||||
** sarl $2, %eax
|
||||
addl %ecx, %eax
|
||||
ret
|
||||
|
||||
The two starred instructions could be replaced with a "sarl $34, %rax". This
|
||||
occurs in 186.crafty very frequently.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
|
|
@ -51,3 +51,12 @@ define i32 @test5(i32 %A) nounwind {
|
|||
; CHECK: mull 4(%esp)
|
||||
}
|
||||
|
||||
define signext i16 @test6(i16 signext %x) nounwind {
|
||||
entry:
|
||||
%div = sdiv i16 %x, 10
|
||||
ret i16 %div
|
||||
; CHECK: test6:
|
||||
; CHECK: imull $26215, %eax, %eax
|
||||
; CHECK: shrl $31, %ecx
|
||||
; CHECK: sarl $18, %eax
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue