forked from OSchip/llvm-project
[SelectionDAG] https://reviews.llvm.org/D48278
D48278 Allow to reduce redundant shift masks. For example: x1 = x & 0xAB00 x2 = (x >> 8) & 0xAB can be reduced to: x1 = x & 0xAB00 x2 = x1 >> 8 It only allows folding when the masks and shift values are constants. llvm-svn: 336426
This commit is contained in:
parent
b370615451
commit
742bf1a255
|
@ -447,6 +447,7 @@ namespace {
|
|||
SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
|
||||
SDValue MatchLoadCombine(SDNode *N);
|
||||
SDValue ReduceLoadWidth(SDNode *N);
|
||||
SDValue foldRedundantShiftedMasks(SDNode *N);
|
||||
SDValue ReduceLoadOpStoreWidth(SDNode *N);
|
||||
SDValue splitMergedValStore(StoreSDNode *ST);
|
||||
SDValue TransformFPLoadStorePair(SDNode *N);
|
||||
|
@ -4378,6 +4379,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
|
|||
}
|
||||
}
|
||||
|
||||
if (SDValue R = foldRedundantShiftedMasks(N))
|
||||
return R;
|
||||
|
||||
if (Level >= AfterLegalizeTypes) {
|
||||
// Attempt to propagate the AND back up to the leaves which, if they're
|
||||
// loads, can be combined to narrow loads and the AND node can be removed.
|
||||
|
@ -5944,6 +5948,108 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
// fold expressions x1 and x2 alike:
|
||||
// x1 = ( and, x, 0x00FF )
|
||||
// x2 = (( shl x, 8 ) and 0xFF00 )
|
||||
// into
|
||||
// x2 = shl x1, 8 ; reuse the computation of x1
|
||||
SDValue DAGCombiner::foldRedundantShiftedMasks(SDNode *AND) {
|
||||
if (!AND)
|
||||
return SDValue();
|
||||
|
||||
const SDValue &SHIFT = AND->getOperand(0);
|
||||
if ((SHIFT.getNumOperands() != 2) || (!SHIFT.hasOneUse()))
|
||||
return SDValue();
|
||||
|
||||
const ConstantSDNode *ShiftAmount =
|
||||
dyn_cast<ConstantSDNode>(SHIFT.getOperand(1));
|
||||
if (!ShiftAmount)
|
||||
return SDValue();
|
||||
|
||||
const ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(AND->getOperand(1));
|
||||
if (!Mask)
|
||||
return SDValue();
|
||||
|
||||
SDValue MASKED = SHIFT.getOperand(0);
|
||||
const auto &MaskedValue = dyn_cast<SDNode>(MASKED);
|
||||
unsigned N0Opcode = SHIFT.getOpcode();
|
||||
for (SDNode *OtherUser : MaskedValue->uses()) {
|
||||
if ((&(*OtherUser) == ShiftAmount) || (OtherUser->getOpcode() != ISD::AND))
|
||||
continue;
|
||||
|
||||
ConstantSDNode *OtherMask =
|
||||
dyn_cast<ConstantSDNode>(OtherUser->getOperand(1));
|
||||
|
||||
if (!OtherMask)
|
||||
continue;
|
||||
|
||||
bool CanReduce = false;
|
||||
|
||||
const APInt &MaskValue = Mask->getAPIntValue();
|
||||
const APInt &ShiftValue = ShiftAmount->getAPIntValue();
|
||||
const APInt &OtherMaskValue = OtherMask->getAPIntValue();
|
||||
|
||||
KnownBits MaskedValueBits;
|
||||
DAG.computeKnownBits(MASKED, MaskedValueBits);
|
||||
KnownBits ShiftedValueBits;
|
||||
DAG.computeKnownBits(SHIFT, ShiftedValueBits);
|
||||
|
||||
const APInt EffectiveOtherMask = OtherMaskValue & ~MaskedValueBits.Zero;
|
||||
const APInt EffectiveMask = MaskValue & ~ShiftedValueBits.Zero;
|
||||
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "\tValue being masked and shift-masked: "; MASKED.dump();
|
||||
dbgs() << "\t\tValue zero bits: 0x"
|
||||
<< MaskedValueBits.Zero.toString(16, false)
|
||||
<< "\n\n\t\tApplied mask: 0x"
|
||||
<< OtherMaskValue.toString(16, false) << " : ";
|
||||
OtherUser->dump();
|
||||
dbgs() << "\t\tEffective mask: 0x"
|
||||
<< EffectiveOtherMask.toString(16, false)
|
||||
<< "\n\n\tShifted by: " << ShiftValue.getZExtValue() << " : ";
|
||||
SHIFT.dump(); dbgs() << "\t\tAnd masked by: 0x"
|
||||
<< MaskValue.toString(16, false) << " : ";
|
||||
AND->dump(); dbgs() << "\t\tEffective mask to shifted value: 0x"
|
||||
<< EffectiveMask.toString(16, false) << '\n';);
|
||||
|
||||
switch (N0Opcode) {
|
||||
case ISD::SHL:
|
||||
CanReduce = (EffectiveOtherMask.shl(EffectiveMask) == EffectiveMask) ||
|
||||
(EffectiveMask.lshr(ShiftValue) == EffectiveOtherMask);
|
||||
break;
|
||||
case ISD::SRA:
|
||||
if (!MaskedValueBits.Zero.isSignBitSet()) {
|
||||
CanReduce = (EffectiveOtherMask.ashr(ShiftValue) == EffectiveMask);
|
||||
break;
|
||||
} else // Same as SRL
|
||||
N0Opcode = ISD::SRL;
|
||||
LLVM_FALLTHROUGH
|
||||
/* fall-through */
|
||||
case ISD::SRL:
|
||||
CanReduce = (EffectiveOtherMask.lshr(ShiftValue) == EffectiveMask) ||
|
||||
(EffectiveMask.shl(ShiftValue) == EffectiveOtherMask);
|
||||
break;
|
||||
case ISD::ROTR:
|
||||
CanReduce = (EffectiveOtherMask.rotr(ShiftValue) == EffectiveMask);
|
||||
break;
|
||||
default:
|
||||
return SDValue();
|
||||
}
|
||||
if (CanReduce) {
|
||||
LLVM_DEBUG(dbgs() << "\tCan just shift the masked value\n");
|
||||
|
||||
SDValue ShiftTheAND(OtherUser, 0);
|
||||
const SDLoc DL(SHIFT);
|
||||
EVT VT = AND->getValueType(0);
|
||||
SDValue NewShift =
|
||||
DAG.getNode(N0Opcode, DL, VT, ShiftTheAND, SHIFT.getOperand(1));
|
||||
AddToWorklist(OtherUser);
|
||||
return NewShift;
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitSHL(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
; RUN: llc -march=aarch64 < %s | FileCheck %s -check-prefix=A64
|
||||
|
||||
define i32 @ror(i32 %a) {
|
||||
entry:
|
||||
%m2 = and i32 %a, 3855
|
||||
%shl = shl i32 %a, 24
|
||||
%shr = lshr i32 %a, 8
|
||||
%or = or i32 %shl, %shr
|
||||
%m1 = and i32 %or, 251658255
|
||||
%or2 = or i32 %m1, %m2
|
||||
ret i32 %or2
|
||||
}
|
||||
; A64-LABEL: ror
|
||||
; A64: mov [[R1:w[0-9]]], #3855
|
||||
; A64-NEXT: and [[R2:w[0-9]]], w0, [[R1]]
|
||||
; A64-NEXT: orr [[R3:w[0-9]]], [[R1]], [[R1]], ror #8
|
||||
|
||||
define i32 @shl(i16 %a) {
|
||||
entry:
|
||||
%0 = sext i16 %a to i32
|
||||
%1 = and i32 %0, 172
|
||||
%2 = shl i32 %0, 8
|
||||
%3 = and i32 %2, 44032
|
||||
%4 = or i32 %1, %3
|
||||
ret i32 %4
|
||||
}
|
||||
; A64-LABEL:shl:
|
||||
; A64: mov w8, #172
|
||||
; A64-NEXT: and w8, w0, w8
|
||||
; A64-NEXT: orr w0, w8, w8, lsl #8
|
||||
|
||||
define i32 @lshr(i16 %a) {
|
||||
entry:
|
||||
%0 = sext i16 %a to i32
|
||||
%1 = and i32 %0, 44032
|
||||
%2 = lshr i32 %0, 8
|
||||
%3 = and i32 %2, 172
|
||||
%4 = or i32 %1, %3
|
||||
ret i32 %4
|
||||
}
|
||||
; A64-LABEL:lshr:
|
||||
; A64: mov w8, #44032
|
||||
; A64-NEXT: and w8, w0, w8
|
||||
; A64-NEXT: orr w0, w8, w8, lsr #8
|
||||
|
||||
define i32 @ashr(i16 %a) {
|
||||
entry:
|
||||
%0 = sext i16 %a to i32
|
||||
%1 = and i32 %0, 44032
|
||||
%2 = ashr i32 %0, 8
|
||||
%3 = and i32 %2, 172
|
||||
%4 = or i32 %1, %3
|
||||
ret i32 %4
|
||||
}
|
||||
; A64-LABEL:ashr:
|
||||
; A64: mov w8, #44032
|
||||
; A64-NEXT: and w8, w0, w8
|
||||
; A64-NEXT: orr w0, w8, w8, lsr #8
|
||||
|
||||
|
||||
define i32 @shl_nogood(i16 %a) {
|
||||
entry:
|
||||
%0 = sext i16 %a to i32
|
||||
%1 = and i32 %0, 172
|
||||
%2 = shl i32 %0, %1
|
||||
%3 = and i32 %2, 44032
|
||||
%4 = or i32 %1, %3
|
||||
ret i32 %4
|
||||
}
|
||||
|
||||
define i32 @shl_nogood2(i16 %a) {
|
||||
entry:
|
||||
%0 = sext i16 %a to i32
|
||||
%1 = and i32 %0, 172
|
||||
%2 = shl i32 %0, 8
|
||||
%3 = and i32 %2, %0
|
||||
%4 = or i32 %1, %3
|
||||
ret i32 %4
|
||||
}
|
||||
; A64-LABEL:shl_nogood: // @shl_nogood
|
||||
; A64: sxth w8, w0
|
||||
; A64-NEXT: mov w9, #172
|
||||
; A64-NEXT: and w9, w8, w9
|
||||
; A64-NEXT: lsl w8, w8, w9
|
||||
; A64-NEXT: mov w10, #44032
|
||||
; A64-NEXT: and w8, w8, w10
|
||||
; A64-NEXT: orr w0, w9, w8
|
||||
; A64-NEXT: ret
|
||||
; A64-LABEL:shl_nogood2: // @shl_nogood2
|
||||
; A64: sxth w8, w0
|
||||
; A64-NEXT: mov w9, #172
|
||||
; A64-NEXT: and w9, w8, w9
|
||||
; A64-NEXT: and w8, w8, w8, lsl #8
|
||||
; A64-NEXT: orr w0, w9, w8
|
||||
; A64-NEXT: ret
|
|
@ -0,0 +1,98 @@
|
|||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "armv4t-arm-none-eabi"
|
||||
|
||||
; RUN: llc -march=arm < %s | FileCheck %s -check-prefix=ARM
|
||||
|
||||
define i32 @ror(i32 %a) {
|
||||
entry:
|
||||
%m2 = and i32 %a, 3855
|
||||
%shl = shl i32 %a, 24
|
||||
%shr = lshr i32 %a, 8
|
||||
%or = or i32 %shl, %shr
|
||||
%m1 = and i32 %or, 251658255
|
||||
%or2 = or i32 %m1, %m2
|
||||
ret i32 %or2
|
||||
}
|
||||
; ARM-LABEL: ror
|
||||
; ARM: mov [[R1:r[0-9]]], #15
|
||||
; ARM-NEXT: orr [[R2:r[0-9]]], [[R1]], #3840
|
||||
; ARM-NEXT: and [[R3:r[0-9]]], r0, [[R1]]
|
||||
; ARM-NEXT: orr [[R4:r[0-9]]], [[R3]], [[R3]], ror #8
|
||||
; ARM-NEXT: mov pc, lr
|
||||
|
||||
define i32 @shl(i16 %a) {
|
||||
entry:
|
||||
%0 = sext i16 %a to i32
|
||||
%1 = and i32 %0, 172
|
||||
%2 = shl i32 %0, 8
|
||||
%3 = and i32 %2, 44032
|
||||
%4 = or i32 %1, %3
|
||||
ret i32 %4
|
||||
}
|
||||
; ARM-LABEL: shl:
|
||||
; ARM: and r0, r0, #172
|
||||
; ARM-NEXT: orr r0, r0, r0, lsl #8
|
||||
|
||||
define i32 @lshr(i16 %a) {
|
||||
entry:
|
||||
%0 = sext i16 %a to i32
|
||||
%1 = and i32 %0, 44032
|
||||
%2 = lshr i32 %0, 8
|
||||
%3 = and i32 %2, 172
|
||||
%4 = or i32 %1, %3
|
||||
ret i32 %4
|
||||
}
|
||||
; ARM-LABEL: lshr:
|
||||
; ARM: and r0, r0, #44032
|
||||
; ARM-NEXT: orr r0, r0, r0, lsr #8
|
||||
|
||||
define i32 @ashr(i16 %a) {
|
||||
entry:
|
||||
%0 = sext i16 %a to i32
|
||||
%1 = and i32 %0, 44032
|
||||
%2 = ashr i32 %0, 8
|
||||
%3 = and i32 %2, 172
|
||||
%4 = or i32 %1, %3
|
||||
ret i32 %4
|
||||
}
|
||||
; ARM-LABEL: ashr:
|
||||
; ARM: and r0, r0, #44032
|
||||
; ARM-NEXT: orr r0, r0, r0, lsr #8
|
||||
|
||||
define i32 @shl_nogood(i16 %a) {
|
||||
entry:
|
||||
%0 = sext i16 %a to i32
|
||||
%1 = and i32 %0, 172
|
||||
%2 = shl i32 %0, %1
|
||||
%3 = and i32 %2, 44032
|
||||
%4 = or i32 %1, %3
|
||||
ret i32 %4
|
||||
}
|
||||
|
||||
define i32 @shl_nogood2(i16 %a) {
|
||||
entry:
|
||||
%0 = sext i16 %a to i32
|
||||
%1 = and i32 %0, 172
|
||||
%2 = shl i32 %0, 8
|
||||
%3 = and i32 %2, %0
|
||||
%4 = or i32 %1, %3
|
||||
ret i32 %4
|
||||
}
|
||||
; ARM-LABEL:shl_nogood:
|
||||
; ARM: lsl r0, r0, #16
|
||||
; ARM-NEXT: mov r1, #172
|
||||
; ARM-NEXT: and r1, r1, r0, asr #16
|
||||
; ARM-NEXT: asr r0, r0, #16
|
||||
; ARM-NEXT: mov r2, #44032
|
||||
; ARM-NEXT: and r0, r2, r0, lsl r1
|
||||
; ARM-NEXT: orr r0, r1, r0
|
||||
; ARM-NEXT: mov pc, lr
|
||||
; ARM-LABEL:shl_nogood2:
|
||||
; ARM: lsl r0, r0, #16
|
||||
; ARM-NEXT: mov r1, #172
|
||||
; ARM-NEXT: asr r2, r0, #16
|
||||
; ARM-NEXT: and r1, r1, r0, asr #16
|
||||
; ARM-NEXT: lsl r2, r2, #8
|
||||
; ARM-NEXT: and r0, r2, r0, asr #16
|
||||
; ARM-NEXT: orr r0, r1, r0
|
||||
; ARM-NEXT: mov pc, lr
|
|
@ -29,18 +29,18 @@ define void @foo() local_unnamed_addr {
|
|||
; X86-NEXT: .cfi_offset %edi, -16
|
||||
; X86-NEXT: .cfi_offset %ebx, -12
|
||||
; X86-NEXT: .cfi_offset %ebp, -8
|
||||
; X86-NEXT: movl obj, %edx
|
||||
; X86-NEXT: movsbl var_27, %eax
|
||||
; X86-NEXT: movzwl var_2, %esi
|
||||
; X86-NEXT: movl var_310, %ecx
|
||||
; X86-NEXT: imull %eax, %ecx
|
||||
; X86-NEXT: addl var_24, %ecx
|
||||
; X86-NEXT: andl $4194303, %edx # imm = 0x3FFFFF
|
||||
; X86-NEXT: leal (%edx,%edx), %ebx
|
||||
; X86-NEXT: subl %eax, %ebx
|
||||
; X86-NEXT: movl %ebx, %edi
|
||||
; X86-NEXT: subl %esi, %edi
|
||||
; X86-NEXT: imull %edi, %ecx
|
||||
; X86-NEXT: movl $4194303, %edi # imm = 0x3FFFFF
|
||||
; X86-NEXT: andl obj, %edi
|
||||
; X86-NEXT: leal (%edi,%edi), %edx
|
||||
; X86-NEXT: subl %eax, %edx
|
||||
; X86-NEXT: movl %edx, %ebx
|
||||
; X86-NEXT: subl %esi, %ebx
|
||||
; X86-NEXT: imull %ebx, %ecx
|
||||
; X86-NEXT: addl $-1437483407, %ecx # imm = 0xAA51BE71
|
||||
; X86-NEXT: movl $9, %esi
|
||||
; X86-NEXT: xorl %ebp, %ebp
|
||||
|
@ -50,12 +50,12 @@ define void @foo() local_unnamed_addr {
|
|||
; X86-NEXT: cmovnel %esi, %ebp
|
||||
; X86-NEXT: movl $0, %ecx
|
||||
; X86-NEXT: cmovnel %ecx, %esi
|
||||
; X86-NEXT: cmpl %edx, %edi
|
||||
; X86-NEXT: cmpl %edi, %ebx
|
||||
; X86-NEXT: movl %ebp, var_50+4
|
||||
; X86-NEXT: movl %esi, var_50
|
||||
; X86-NEXT: setge var_205
|
||||
; X86-NEXT: imull %eax, %ebx
|
||||
; X86-NEXT: movb %bl, var_218
|
||||
; X86-NEXT: imull %eax, %edx
|
||||
; X86-NEXT: movb %dl, var_218
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-NEXT: popl %edi
|
||||
|
@ -68,24 +68,24 @@ define void @foo() local_unnamed_addr {
|
|||
;
|
||||
; X64-LABEL: foo:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: movl {{.*}}(%rip), %eax
|
||||
; X64-NEXT: movsbl {{.*}}(%rip), %r9d
|
||||
; X64-NEXT: movzwl {{.*}}(%rip), %r8d
|
||||
; X64-NEXT: movl {{.*}}(%rip), %ecx
|
||||
; X64-NEXT: imull %r9d, %ecx
|
||||
; X64-NEXT: addl {{.*}}(%rip), %ecx
|
||||
; X64-NEXT: andl $4194303, %eax # imm = 0x3FFFFF
|
||||
; X64-NEXT: leal (%rax,%rax), %edi
|
||||
; X64-NEXT: movl $4194303, %esi
|
||||
; X64-NEXT: andl obj(%rip), %esi
|
||||
; X64-NEXT: leal (%rsi,%rsi), %edi
|
||||
; X64-NEXT: subl %r9d, %edi
|
||||
; X64-NEXT: movl %edi, %esi
|
||||
; X64-NEXT: subl %r8d, %esi
|
||||
; X64-NEXT: imull %esi, %ecx
|
||||
; X64-NEXT: movl %edi, %edx
|
||||
; X64-NEXT: subl %r8d, %edx
|
||||
; X64-NEXT: imull %edx, %ecx
|
||||
; X64-NEXT: addl $-1437483407, %ecx # imm = 0xAA51BE71
|
||||
; X64-NEXT: movl $9, %edx
|
||||
; X64-NEXT: movl $9, %eax
|
||||
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
|
||||
; X64-NEXT: shlq %cl, %rdx
|
||||
; X64-NEXT: movq %rdx, {{.*}}(%rip)
|
||||
; X64-NEXT: cmpl %eax, %esi
|
||||
; X64-NEXT: shlq %cl, %rax
|
||||
; X64-NEXT: movq %rax, {{.*}}(%rip)
|
||||
; X64-NEXT: cmpl %esi, %edx
|
||||
; X64-NEXT: setge {{.*}}(%rip)
|
||||
; X64-NEXT: imull %r9d, %edi
|
||||
; X64-NEXT: movb %dil, {{.*}}(%rip)
|
||||
|
|
Loading…
Reference in New Issue