forked from OSchip/llvm-project
[PowerPC] Remove zexts after i32 ctlz
The 64-bit semantics of cntlzw are not special, the 32-bit population count is stored as a 64-bit value in the range [0,32]. As a result, it is always zero extended, and it can be added to the PPCISelDAGToDAG peephole optimization as a frontier instruction for the removal of unnecessary zero extensions. llvm-svn: 225192
This commit is contained in:
parent
58d4c5a9c0
commit
49557f1b42
|
@ -3736,6 +3736,12 @@ static bool PeepholePPC64ZExtGather(SDValue Op32,
|
|||
return true;
|
||||
}
|
||||
|
||||
// CNTLZW always produces a 64-bit value in [0,32], and so is zero extended.
|
||||
if (Op32.getMachineOpcode() == PPC::CNTLZW) {
|
||||
ToPromote.insert(Op32.getNode());
|
||||
return true;
|
||||
}
|
||||
|
||||
// Next, check for those instructions we can look through.
|
||||
|
||||
// Assuming the mask does not wrap around, then the higher-order bits are
|
||||
|
@ -3925,6 +3931,7 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() {
|
|||
case PPC::LIS: NewOpcode = PPC::LIS8; break;
|
||||
case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
|
||||
case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
|
||||
case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
|
||||
case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
|
||||
case PPC::OR: NewOpcode = PPC::OR8; break;
|
||||
case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
|
||||
|
|
|
@ -552,6 +552,9 @@ defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
|
|||
[(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
|
||||
|
||||
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
|
||||
defm CNTLZW8 : XForm_11r<31, 26, (outs g8rc:$rA), (ins g8rc:$rS),
|
||||
"cntlzw", "$rA, $rS", IIC_IntGeneral, []>;
|
||||
|
||||
defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS),
|
||||
"extsb", "$rA, $rS", IIC_IntSimple,
|
||||
[(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
|
||||
|
|
|
@ -40,10 +40,10 @@ entry:
|
|||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.bswap.i32(i32) #1
|
||||
declare i32 @llvm.bswap.i32(i32) #0
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
define zeroext i32 @bs32(i32* nocapture readonly %x) #0 {
|
||||
define zeroext i32 @bs32(i32* nocapture readonly %x) #1 {
|
||||
entry:
|
||||
%0 = load i32* %x, align 4
|
||||
%1 = tail call i32 @llvm.bswap.i32(i32 %0)
|
||||
|
@ -55,7 +55,7 @@ entry:
|
|||
}
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
define zeroext i16 @bs16(i16* nocapture readonly %x) #0 {
|
||||
define zeroext i16 @bs16(i16* nocapture readonly %x) #1 {
|
||||
entry:
|
||||
%0 = load i16* %x, align 2
|
||||
%1 = tail call i16 @llvm.bswap.i16(i16 %0)
|
||||
|
@ -67,7 +67,23 @@ entry:
|
|||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i16 @llvm.bswap.i16(i16) #1
|
||||
declare i16 @llvm.bswap.i16(i16) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define zeroext i32 @ctlz32(i32 zeroext %x) #0 {
|
||||
entry:
|
||||
%0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
|
||||
ret i32 %0
|
||||
|
||||
; CHECK-LABEL: @ctlz32
|
||||
; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.ctlz.i32(i32, i1) #0
|
||||
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
||||
|
|
Loading…
Reference in New Issue