forked from OSchip/llvm-project
Optimize zext on PPC64.
The zeroextend IR instruction is lowered to an 'and' node with an immediate mask operand, which in turn gets legalised to a sequence of ori's & ands. This can be done more efficiently using the rldicl instruction. Patch by Tobias von Koch. llvm-svn: 162724
This commit is contained in:
parent
557a8d568b
commit
e39526a789
|
@ -975,6 +975,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
|
||||||
|
|
||||||
case ISD::AND: {
|
case ISD::AND: {
|
||||||
unsigned Imm, Imm2, SH, MB, ME;
|
unsigned Imm, Imm2, SH, MB, ME;
|
||||||
|
uint64_t Imm64;
|
||||||
|
|
||||||
// If this is an and of a value rotated between 0 and 31 bits and then and'd
|
// If this is an and of a value rotated between 0 and 31 bits and then and'd
|
||||||
// with a mask, emit rlwinm
|
// with a mask, emit rlwinm
|
||||||
|
@ -993,6 +994,14 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
|
||||||
SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
|
SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
|
||||||
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
|
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
|
||||||
}
|
}
|
||||||
|
// If this is a 64-bit zero-extension mask, emit rldicl.
|
||||||
|
if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
|
||||||
|
isMask_64(Imm64)) {
|
||||||
|
SDValue Val = N->getOperand(0);
|
||||||
|
MB = 64 - CountTrailingOnes_64(Imm64);
|
||||||
|
SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB) };
|
||||||
|
return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3);
|
||||||
|
}
|
||||||
// AND X, 0 -> 0, not "rlwinm 32".
|
// AND X, 0 -> 0, not "rlwinm 32".
|
||||||
if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
|
if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
|
||||||
ReplaceUses(SDValue(N, 0), N->getOperand(1));
|
ReplaceUses(SDValue(N, 0), N->getOperand(1));
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
; RUN: llc < %s | FileCheck %s
|
||||||
|
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
|
||||||
|
target triple = "powerpc64-unknown-linux"
|
||||||
|
|
||||||
|
define i64 @fun(i32 %arg32) nounwind {
|
||||||
|
entry:
|
||||||
|
; CHECK: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
|
||||||
|
%o = zext i32 %arg32 to i64
|
||||||
|
ret i64 %o
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue