Fold AND and ROTL more often

llvm-svn: 30577
This commit is contained in:
Nate Begeman 2006-09-22 05:01:56 +00:00
parent 81c9e42bea
commit d31efd190f
3 changed files with 62 additions and 62 deletions

View File

@ -76,6 +76,17 @@ namespace {
return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy());
}
/// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
/// with any number of 0s on either side. The 1s are allowed to wrap from
/// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
/// 0x0F0F0000 is not, since all 1s are not contiguous.
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
/// isRotateAndMask - Returns true if Mask and Shift can be folded into a
/// rotate and mask opcode and mask operation.
static bool isRotateAndMask(SDNode *N, unsigned Mask, bool IsShiftMask,
unsigned &SH, unsigned &MB, unsigned &ME);
/// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
/// base register. Return the virtual register that holds this value.
@ -324,12 +335,7 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
return N->getOpcode() == Opc && isInt32Immediate(N->getOperand(1).Val, Imm);
}
// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
// any number of 0s on either side. The 1s are allowed to wrap from LSB to
// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is
// not, since all 1s are not contiguous.
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
if (isShiftedMask_32(Val)) {
// look for the first non-zero bit
MB = CountLeadingZeros_32(Val);
@ -350,10 +356,9 @@ static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
return false;
}
// isRotateAndMask - Returns true if Mask and Shift can be folded into a rotate
// and mask opcode and mask operation.
static bool isRotateAndMask(SDNode *N, unsigned Mask, bool IsShiftMask,
unsigned &SH, unsigned &MB, unsigned &ME) {
bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
bool IsShiftMask, unsigned &SH,
unsigned &MB, unsigned &ME) {
// Don't even go down this path for i64, since different logic will be
// necessary for rldicl/rldicr/rldimi.
if (N->getValueType(0) != MVT::i32)
@ -378,6 +383,8 @@ static bool isRotateAndMask(SDNode *N, unsigned Mask, bool IsShiftMask,
Indeterminant = ~(0xFFFFFFFFu >> Shift);
// adjust for the left rotate
Shift = 32 - Shift;
} else if (Opcode == ISD::ROTL) {
Indeterminant = 0;
} else {
return false;
}
@ -1024,30 +1031,33 @@ SDNode *PPCDAGToDAGISel::Select(SDOperand Op) {
break;
}
case ISD::AND: {
unsigned Imm, Imm2;
unsigned Imm, Imm2, SH, MB, ME;
// If this is an and of a value rotated between 0 and 31 bits and then and'd
// with a mask, emit rlwinm
if (isInt32Immediate(N->getOperand(1), Imm) &&
(isShiftedMask_32(Imm) || isShiftedMask_32(~Imm))) {
SDOperand Val;
unsigned SH, MB, ME;
if (isRotateAndMask(N->getOperand(0).Val, Imm, false, SH, MB, ME)) {
Val = N->getOperand(0).getOperand(0);
AddToISelQueue(Val);
} else if (Imm == 0) {
// AND X, 0 -> 0, not "rlwinm 32".
AddToISelQueue(N->getOperand(1));
ReplaceUses(SDOperand(N, 0), N->getOperand(1));
return NULL;
} else {
Val = N->getOperand(0);
AddToISelQueue(Val);
isRunOfOnes(Imm, MB, ME);
SH = 0;
}
isRotateAndMask(N->getOperand(0).Val, Imm, false, SH, MB, ME)) {
SDOperand Val = N->getOperand(0).getOperand(0);
AddToISelQueue(Val);
SDOperand Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
}
// If this is just a masked value where the input is not handled above, and
// is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
if (isInt32Immediate(N->getOperand(1), Imm) &&
isRunOfOnes(Imm, MB, ME) &&
N->getOperand(0).getOpcode() != ISD::ROTL) {
SDOperand Val = N->getOperand(0);
AddToISelQueue(Val);
SDOperand Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
}
// AND X, 0 -> 0, not "rlwinm 32".
if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
AddToISelQueue(N->getOperand(1));
ReplaceUses(SDOperand(N, 0), N->getOperand(1));
return NULL;
}
// ISD::OR doesn't get all the bitfield insertion fun.
// (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
if (isInt32Immediate(N->getOperand(1), Imm) &&

View File

@ -129,7 +129,27 @@ def HA16 : SDNodeXForm<imm, [{
signed int Val = N->getValue();
return getI32Imm((Val - (signed short)Val) >> 16);
}]>;
def MB : SDNodeXForm<imm, [{
// Transformation function: get the start bit of a mask
unsigned mb, me;
(void)isRunOfOnes((unsigned)N->getValue(), mb, me);
return getI32Imm(mb);
}]>;
def ME : SDNodeXForm<imm, [{
// Transformation function: get the end bit of a mask
unsigned mb, me;
(void)isRunOfOnes((unsigned)N->getValue(), mb, me);
return getI32Imm(me);
}]>;
def maskimm32 : PatLeaf<(imm), [{
// maskImm predicate - True if immediate is a run of ones.
unsigned mb, me;
if (N->getValueType(0) == MVT::i32)
return isRunOfOnes((unsigned)N->getValue(), mb, me);
else
return false;
}]>;
def immSExt16 : PatLeaf<(imm), [{
// immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
@ -923,6 +943,10 @@ def : Pat<(rotl GPRC:$in, GPRC:$sh),
def : Pat<(rotl GPRC:$in, (i32 imm:$imm)),
(RLWINM GPRC:$in, imm:$imm, 0, 31)>;
// RLWNM
def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm),
(RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
// Calls
def : Pat<(PPCcall tglobaladdr:$dst),
(BL tglobaladdr:$dst)>;

View File

@ -6,40 +6,6 @@ TODO:
===-------------------------------------------------------------------------===
We only produce the rlwnm instruction for rotate instructions. We should
at least match stuff like:
unsigned rot_and(unsigned X, int Y) {
unsigned T = (X << Y) | (X >> (32-Y));
T &= 127;
return T;
}
_foo3:
rlwnm r2, r3, r4, 0, 31
rlwinm r3, r2, 0, 25, 31
blr
... which is the basic pattern that should be written in the instr. It may
also be useful for stuff like:
long long foo2(long long X, int C) {
return X << (C&~32);
}
which currently produces:
_foo2:
rlwinm r2, r5, 0, 27, 25
subfic r5, r2, 32
slw r3, r3, r2
srw r5, r4, r5
or r3, r3, r5
slw r4, r4, r2
blr
===-------------------------------------------------------------------------===
Support 'update' load/store instructions. These are cracked on the G5, but are
still a codesize win.