forked from OSchip/llvm-project
implement a readme entry, compiling the code into:
_foo: movl $12, %eax andl 4(%esp), %eax movl _array(%eax), %eax ret instead of: _foo: movl 4(%esp), %eax shrl $2, %eax andl $3, %eax movl _array(,%eax,4), %eax ret As it turns out, this triggers all the time, in a wide variety of situations, for example, I see diffs like this in various programs: - movl 8(%eax), %eax - shll $2, %eax - andl $1020, %eax - movl (%esi,%eax), %eax + movzbl 8(%eax), %eax + movl (%esi,%eax,4), %eax - shll $2, %edx - andl $1020, %edx - movl (%edi,%edx), %edx + andl $255, %edx + movl (%edi,%edx,4), %edx Unfortunately, I also see stuff like this, which can be fixed in the X86 backend: - andl $85, %ebx - addl _bit_count(,%ebx,4), %ebp + shll $2, %ebx + andl $340, %ebx + addl _bit_count(%ebx), %ebp llvm-svn: 44656
This commit is contained in:
parent
d2bbbabbfb
commit
7c709a5d08
|
@ -10,22 +10,6 @@
|
||||||
// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
|
// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
|
||||||
// both before and after the DAG is legalized.
|
// both before and after the DAG is legalized.
|
||||||
//
|
//
|
||||||
// FIXME: Missing folds
|
|
||||||
// sdiv, udiv, srem, urem (X, const) where X is an integer can be expanded into
|
|
||||||
// a sequence of multiplies, shifts, and adds. This should be controlled by
|
|
||||||
// some kind of hint from the target that int div is expensive.
|
|
||||||
// various folds of mulh[s,u] by constants such as -1, powers of 2, etc.
|
|
||||||
//
|
|
||||||
// FIXME: select C, pow2, pow2 -> something smart
|
|
||||||
// FIXME: trunc(select X, Y, Z) -> select X, trunc(Y), trunc(Z)
|
|
||||||
// FIXME: Dead stores -> nuke
|
|
||||||
// FIXME: shr X, (and Y,31) -> shr X, Y (TRICKY!)
|
|
||||||
// FIXME: mul (x, const) -> shifts + adds
|
|
||||||
// FIXME: undef values
|
|
||||||
// FIXME: divide by zero is currently left unfolded. do we want to turn this
|
|
||||||
// into an undef?
|
|
||||||
// FIXME: select ne (select cc, 1, 0), 0, true, false -> select cc, true, false
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#define DEBUG_TYPE "dagcombine"
|
#define DEBUG_TYPE "dagcombine"
|
||||||
|
@ -280,6 +264,8 @@ namespace {
|
||||||
SDOperand XformToShuffleWithZero(SDNode *N);
|
SDOperand XformToShuffleWithZero(SDNode *N);
|
||||||
SDOperand ReassociateOps(unsigned Opc, SDOperand LHS, SDOperand RHS);
|
SDOperand ReassociateOps(unsigned Opc, SDOperand LHS, SDOperand RHS);
|
||||||
|
|
||||||
|
SDOperand visitShiftByConstant(SDNode *N, unsigned Amt);
|
||||||
|
|
||||||
bool SimplifySelectOps(SDNode *SELECT, SDOperand LHS, SDOperand RHS);
|
bool SimplifySelectOps(SDNode *SELECT, SDOperand LHS, SDOperand RHS);
|
||||||
SDOperand SimplifyBinOpWithSameOpcodeHands(SDNode *N);
|
SDOperand SimplifyBinOpWithSameOpcodeHands(SDNode *N);
|
||||||
SDOperand SimplifySelect(SDOperand N0, SDOperand N1, SDOperand N2);
|
SDOperand SimplifySelect(SDOperand N0, SDOperand N1, SDOperand N2);
|
||||||
|
@ -2145,6 +2131,64 @@ SDOperand DAGCombiner::visitXOR(SDNode *N) {
|
||||||
return SDOperand();
|
return SDOperand();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// visitShiftByConstant - Handle transforms common to the three shifts, when
|
||||||
|
/// the shift amount is a constant.
|
||||||
|
SDOperand DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
|
||||||
|
SDNode *LHS = N->getOperand(0).Val;
|
||||||
|
if (!LHS->hasOneUse()) return SDOperand();
|
||||||
|
|
||||||
|
// We want to pull some binops through shifts, so that we have (and (shift))
|
||||||
|
// instead of (shift (and)), likewise for add, or, xor, etc. This sort of
|
||||||
|
// thing happens with address calculations, so it's important to canonicalize
|
||||||
|
// it.
|
||||||
|
bool HighBitSet = false; // Can we transform this if the high bit is set?
|
||||||
|
|
||||||
|
switch (LHS->getOpcode()) {
|
||||||
|
default: return SDOperand();
|
||||||
|
case ISD::OR:
|
||||||
|
case ISD::XOR:
|
||||||
|
HighBitSet = false; // We can only transform sra if the high bit is clear.
|
||||||
|
break;
|
||||||
|
case ISD::AND:
|
||||||
|
HighBitSet = true; // We can only transform sra if the high bit is set.
|
||||||
|
break;
|
||||||
|
case ISD::ADD:
|
||||||
|
if (N->getOpcode() != ISD::SHL)
|
||||||
|
return SDOperand(); // only shl(add) not sr[al](add).
|
||||||
|
HighBitSet = false; // We can only transform sra if the high bit is clear.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We require the RHS of the binop to be a constant as well.
|
||||||
|
ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
|
||||||
|
if (!BinOpCst) return SDOperand();
|
||||||
|
|
||||||
|
MVT::ValueType VT = N->getValueType(0);
|
||||||
|
|
||||||
|
// If this is a signed shift right, and the high bit is modified
|
||||||
|
// by the logical operation, do not perform the transformation.
|
||||||
|
// The highBitSet boolean indicates the value of the high bit of
|
||||||
|
// the constant which would cause it to be modified for this
|
||||||
|
// operation.
|
||||||
|
if (N->getOpcode() == ISD::SRA) {
|
||||||
|
uint64_t BinOpRHSSign = BinOpCst->getValue() >> MVT::getSizeInBits(VT)-1;
|
||||||
|
if ((bool)BinOpRHSSign != HighBitSet)
|
||||||
|
return SDOperand();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fold the constants, shifting the binop RHS by the shift amount.
|
||||||
|
SDOperand NewRHS = DAG.getNode(N->getOpcode(), N->getValueType(0),
|
||||||
|
LHS->getOperand(1), N->getOperand(1));
|
||||||
|
|
||||||
|
// Create the new shift.
|
||||||
|
SDOperand NewShift = DAG.getNode(N->getOpcode(), VT, LHS->getOperand(0),
|
||||||
|
N->getOperand(1));
|
||||||
|
|
||||||
|
// Create the new binop.
|
||||||
|
return DAG.getNode(LHS->getOpcode(), VT, NewShift, NewRHS);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
SDOperand DAGCombiner::visitSHL(SDNode *N) {
|
SDOperand DAGCombiner::visitSHL(SDNode *N) {
|
||||||
SDOperand N0 = N->getOperand(0);
|
SDOperand N0 = N->getOperand(0);
|
||||||
SDOperand N1 = N->getOperand(1);
|
SDOperand N1 = N->getOperand(1);
|
||||||
|
@ -2199,7 +2243,8 @@ SDOperand DAGCombiner::visitSHL(SDNode *N) {
|
||||||
if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1))
|
if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1))
|
||||||
return DAG.getNode(ISD::AND, VT, N0.getOperand(0),
|
return DAG.getNode(ISD::AND, VT, N0.getOperand(0),
|
||||||
DAG.getConstant(~0ULL << N1C->getValue(), VT));
|
DAG.getConstant(~0ULL << N1C->getValue(), VT));
|
||||||
return SDOperand();
|
|
||||||
|
return N1C ? visitShiftByConstant(N, N1C->getValue()) : SDOperand();
|
||||||
}
|
}
|
||||||
|
|
||||||
SDOperand DAGCombiner::visitSRA(SDNode *N) {
|
SDOperand DAGCombiner::visitSRA(SDNode *N) {
|
||||||
|
@ -2259,7 +2304,8 @@ SDOperand DAGCombiner::visitSRA(SDNode *N) {
|
||||||
// If the sign bit is known to be zero, switch this to a SRL.
|
// If the sign bit is known to be zero, switch this to a SRL.
|
||||||
if (DAG.MaskedValueIsZero(N0, MVT::getIntVTSignBit(VT)))
|
if (DAG.MaskedValueIsZero(N0, MVT::getIntVTSignBit(VT)))
|
||||||
return DAG.getNode(ISD::SRL, VT, N0, N1);
|
return DAG.getNode(ISD::SRL, VT, N0, N1);
|
||||||
return SDOperand();
|
|
||||||
|
return N1C ? visitShiftByConstant(N, N1C->getValue()) : SDOperand();
|
||||||
}
|
}
|
||||||
|
|
||||||
SDOperand DAGCombiner::visitSRL(SDNode *N) {
|
SDOperand DAGCombiner::visitSRL(SDNode *N) {
|
||||||
|
@ -2353,7 +2399,7 @@ SDOperand DAGCombiner::visitSRL(SDNode *N) {
|
||||||
if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
|
if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
|
||||||
return SDOperand(N, 0);
|
return SDOperand(N, 0);
|
||||||
|
|
||||||
return SDOperand();
|
return N1C ? visitShiftByConstant(N, N1C->getValue()) : SDOperand();
|
||||||
}
|
}
|
||||||
|
|
||||||
SDOperand DAGCombiner::visitCTLZ(SDNode *N) {
|
SDOperand DAGCombiner::visitCTLZ(SDNode *N) {
|
||||||
|
|
|
@ -464,41 +464,3 @@ entry:
|
||||||
}
|
}
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
on this code:
|
|
||||||
|
|
||||||
unsigned array[4];
|
|
||||||
unsigned foo(unsigned long x) {
|
|
||||||
return array[(x>>2)&3ul];
|
|
||||||
}
|
|
||||||
|
|
||||||
we should dag combine the left+right shift together. We currently get:
|
|
||||||
|
|
||||||
_foo:
|
|
||||||
movl 4(%esp), %eax
|
|
||||||
shrl $2, %eax
|
|
||||||
andl $3, %eax
|
|
||||||
movl _array(,%eax,4), %eax
|
|
||||||
ret
|
|
||||||
|
|
||||||
similar on ppc:
|
|
||||||
|
|
||||||
_foo:
|
|
||||||
lis r2, ha16(_array)
|
|
||||||
srwi r3, r3, 2
|
|
||||||
la r2, lo16(_array)(r2)
|
|
||||||
rlwinm r3, r3, 2, 28, 29
|
|
||||||
lwzx r3, r2, r3
|
|
||||||
blr
|
|
||||||
|
|
||||||
similar on arm:
|
|
||||||
|
|
||||||
_foo:
|
|
||||||
mov r3, #3
|
|
||||||
and r3, r3, r0, lsr #2
|
|
||||||
ldr r2, LCPI1_0
|
|
||||||
ldr r0, [r2, +r3, lsl #2]
|
|
||||||
bx lr
|
|
||||||
|
|
||||||
this is a trivial dag combine xform.
|
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
; RUN: llvm-as < %s | llc | not grep shrl
|
||||||
|
|
||||||
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||||
|
target triple = "i686-apple-darwin8"
|
||||||
|
@array = weak global [4 x i32] zeroinitializer ; <[4 x i32]*> [#uses=1]
|
||||||
|
|
||||||
|
define i32 @foo(i32 %x) {
|
||||||
|
entry:
|
||||||
|
%tmp2 = lshr i32 %x, 2 ; <i32> [#uses=1]
|
||||||
|
%tmp3 = and i32 %tmp2, 3 ; <i32> [#uses=1]
|
||||||
|
%tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3 ; <i32*> [#uses=1]
|
||||||
|
%tmp5 = load i32* %tmp4, align 4 ; <i32> [#uses=1]
|
||||||
|
ret i32 %tmp5
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue