[DAGCombiner] Fold (zext (and/or/xor (shl/shr (load x), cst), cst))

In our real world application, we found the following optimization is missed in DAGCombiner

(zext (and/or/xor (shl/shr (load x), cst), cst)) -> (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))

If the user of original zext is an add, it may enable further lea optimization on x86.

This patch add a new function CombineZExtLogicopShiftLoad to do this optimization.

Differential Revision: https://reviews.llvm.org/D44402

llvm-svn: 329516
This commit is contained in:
Guozhi Wei 2018-04-07 23:36:10 +00:00
parent e46ac5fb9d
commit 0eb86c8efc
4 changed files with 231 additions and 0 deletions

View File

@ -426,6 +426,7 @@ namespace {
unsigned HiOp); unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue CombineExtLoad(SDNode *N); SDValue CombineExtLoad(SDNode *N);
SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N); SDValue combineRepeatedFPDivisors(SDNode *N);
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
@ -7471,6 +7472,78 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked! return SDValue(N, 0); // Return N so it doesn't get rechecked!
} }
// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
assert(N->getOpcode() == ISD::ZERO_EXTEND);
EVT VT = N->getValueType(0);
// and/or/xor
SDValue N0 = N->getOperand(0);
if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
N0.getOpcode() == ISD::XOR) ||
N0.getOperand(1).getOpcode() != ISD::Constant ||
(LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
return SDValue();
// shl/shr
SDValue N1 = N0->getOperand(0);
if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
N1.getOperand(1).getOpcode() != ISD::Constant ||
(LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
return SDValue();
// load
if (!isa<LoadSDNode>(N1.getOperand(0)))
return SDValue();
LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
EVT MemVT = Load->getMemoryVT();
if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
return SDValue();
// If the shift op is SHL, the logic op must be AND, otherwise the result
// will be wrong.
if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
return SDValue();
if (!N0.hasOneUse() || !N1.hasOneUse())
return SDValue();
SmallVector<SDNode*, 4> SetCCs;
if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
ISD::ZERO_EXTEND, SetCCs, TLI))
return SDValue();
// Actually do the transformation.
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
Load->getChain(), Load->getBasePtr(),
Load->getMemoryVT(), Load->getMemOperand());
SDLoc DL1(N1);
SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
N1.getOperand(1));
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
SDLoc DL0(N0);
SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
DAG.getConstant(Mask, DL0, VT));
ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, SDLoc(Load),
ISD::ZERO_EXTEND);
CombineTo(N, And);
if (SDValue(Load, 0).hasOneUse()) {
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
} else {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
Load->getValueType(0), ExtLoad);
CombineTo(Load, Trunc, ExtLoad.getValue(1));
}
return SDValue(N,0); // Return N so it doesn't get rechecked!
}
/// If we're narrowing or widening the result of a vector select and the final /// If we're narrowing or widening the result of a vector select and the final
/// size is the same size as a setcc (compare) feeding the select, then try to /// size is the same size as a setcc (compare) feeding the select, then try to
/// apply the cast operation to the select's operands because matching vector /// apply the cast operation to the select's operands because matching vector
@ -7989,6 +8062,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
} }
} }
// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
return ZExtLoad;
// fold (zext (zextload x)) -> (zext (truncate (zextload x))) // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
// fold (zext ( extload x)) -> (zext (truncate (zextload x))) // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&

View File

@ -0,0 +1,14 @@
; RUN: llc -mtriple=aarch64-linux-gnu < %s -o - | FileCheck %s
define i32 @test1(i8* %p) {
; CHECK: ldrb
; CHECK-NEXT: ubfx
; CHECK-NEXT: ret
%1 = load i8, i8* %p
%2 = lshr i8 %1, 1
%3 = and i8 %2, 1
%4 = zext i8 %3 to i32
ret i32 %4
}

View File

@ -0,0 +1,17 @@
; RUN: llc -mtriple=armv7-linux-gnu < %s -o - | FileCheck %s
define void @test1(i8* %p, i16* %q) {
; CHECK: ldrb
; CHECK-NEXT: mov
; CHECK-NEXT: and
; CHECK-NEXT: strh
; CHECK-NEXT: bx
%1 = load i8, i8* %p
%2 = shl i8 %1, 2
%3 = and i8 %2, 12
%4 = zext i8 %3 to i16
store i16 %4, i16* %q
ret void
}

View File

@ -0,0 +1,122 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
define i64 @test1(i8* %data) {
; CHECK-LABEL: test1:
; CHECK: movzbl
; CHECK-NEXT: shlq
; CHECK-NEXT: andl
; CHECK-NEXT: retq
entry:
%bf.load = load i8, i8* %data, align 4
%bf.clear = shl i8 %bf.load, 2
%0 = and i8 %bf.clear, 60
%mul = zext i8 %0 to i64
ret i64 %mul
}
define i8* @test2(i8* %data) {
; CHECK-LABEL: test2:
; CHECK: movzbl
; CHECK-NEXT: andl
; CHECK-NEXT: leaq
; CHECK-NEXT: retq
entry:
%bf.load = load i8, i8* %data, align 4
%bf.clear = shl i8 %bf.load, 2
%0 = and i8 %bf.clear, 60
%mul = zext i8 %0 to i64
%add.ptr = getelementptr inbounds i8, i8* %data, i64 %mul
ret i8* %add.ptr
}
; If the shift op is SHL, the logic op can only be AND.
define i64 @test3(i8* %data) {
; CHECK-LABEL: test3:
; CHECK: movb
; CHECK-NEXT: shlb
; CHECK-NEXT: xorb
; CHECK-NEXT: movzbl
; CHECK-NEXT: retq
entry:
%bf.load = load i8, i8* %data, align 4
%bf.clear = shl i8 %bf.load, 2
%0 = xor i8 %bf.clear, 60
%mul = zext i8 %0 to i64
ret i64 %mul
}
define i64 @test4(i8* %data) {
; CHECK-LABEL: test4:
; CHECK: movzbl
; CHECK-NEXT: shrq
; CHECK-NEXT: andl
; CHECK-NEXT: retq
entry:
%bf.load = load i8, i8* %data, align 4
%bf.clear = lshr i8 %bf.load, 2
%0 = and i8 %bf.clear, 60
%1 = zext i8 %0 to i64
ret i64 %1
}
define i64 @test5(i8* %data) {
; CHECK-LABEL: test5:
; CHECK: movzbl
; CHECK-NEXT: shrq
; CHECK-NEXT: xorq
; CHECK-NEXT: retq
entry:
%bf.load = load i8, i8* %data, align 4
%bf.clear = lshr i8 %bf.load, 2
%0 = xor i8 %bf.clear, 60
%1 = zext i8 %0 to i64
ret i64 %1
}
define i64 @test6(i8* %data) {
; CHECK-LABEL: test6:
; CHECK: movzbl
; CHECK-NEXT: shrq
; CHECK-NEXT: orq
; CHECK-NEXT: retq
entry:
%bf.load = load i8, i8* %data, align 4
%bf.clear = lshr i8 %bf.load, 2
%0 = or i8 %bf.clear, 60
%1 = zext i8 %0 to i64
ret i64 %1
}
; Don't do the folding if the other operand isn't a constant.
define i64 @test7(i8* %data, i8 %logop) {
; CHECK-LABEL: test7:
; CHECK: movb
; CHECK-NEXT: shrb
; CHECK-NEXT: orb
; CHECK-NEXT: movzbl
; CHECK-NEXT: retq
entry:
%bf.load = load i8, i8* %data, align 4
%bf.clear = lshr i8 %bf.load, 2
%0 = or i8 %bf.clear, %logop
%1 = zext i8 %0 to i64
ret i64 %1
}
; Load is folded with sext.
define i64 @test8(i8* %data) {
; CHECK-LABEL: test8:
; CHECK: movsbl
; CHECK-NEXT: movzwl
; CHECK-NEXT: shrl
; CHECK-NEXT: orl
entry:
%bf.load = load i8, i8* %data, align 4
%ext = sext i8 %bf.load to i16
%bf.clear = lshr i16 %ext, 2
%0 = or i16 %bf.clear, 60
%1 = zext i16 %0 to i64
ret i64 %1
}