forked from OSchip/llvm-project
[DAGCombiner] Fold (zext (and/or/xor (shl/shr (load x), cst), cst))
In our real world application, we found the following optimization is missed in DAGCombiner (zext (and/or/xor (shl/shr (load x), cst), cst)) -> (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) If the user of original zext is an add, it may enable further lea optimization on x86. This patch add a new function CombineZExtLogicopShiftLoad to do this optimization. Differential Revision: https://reviews.llvm.org/D44402 llvm-svn: 329516
This commit is contained in:
parent
e46ac5fb9d
commit
0eb86c8efc
|
@ -426,6 +426,7 @@ namespace {
|
|||
unsigned HiOp);
|
||||
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
|
||||
SDValue CombineExtLoad(SDNode *N);
|
||||
SDValue CombineZExtLogicopShiftLoad(SDNode *N);
|
||||
SDValue combineRepeatedFPDivisors(SDNode *N);
|
||||
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
|
||||
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
|
||||
|
@ -7471,6 +7472,78 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
|
|||
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
||||
}
|
||||
|
||||
// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
|
||||
// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
|
||||
SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
|
||||
assert(N->getOpcode() == ISD::ZERO_EXTEND);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
// and/or/xor
|
||||
SDValue N0 = N->getOperand(0);
|
||||
if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
|
||||
N0.getOpcode() == ISD::XOR) ||
|
||||
N0.getOperand(1).getOpcode() != ISD::Constant ||
|
||||
(LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
|
||||
return SDValue();
|
||||
|
||||
// shl/shr
|
||||
SDValue N1 = N0->getOperand(0);
|
||||
if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
|
||||
N1.getOperand(1).getOpcode() != ISD::Constant ||
|
||||
(LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
|
||||
return SDValue();
|
||||
|
||||
// load
|
||||
if (!isa<LoadSDNode>(N1.getOperand(0)))
|
||||
return SDValue();
|
||||
LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
|
||||
EVT MemVT = Load->getMemoryVT();
|
||||
if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
|
||||
Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
|
||||
return SDValue();
|
||||
|
||||
|
||||
// If the shift op is SHL, the logic op must be AND, otherwise the result
|
||||
// will be wrong.
|
||||
if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
|
||||
return SDValue();
|
||||
|
||||
if (!N0.hasOneUse() || !N1.hasOneUse())
|
||||
return SDValue();
|
||||
|
||||
SmallVector<SDNode*, 4> SetCCs;
|
||||
if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
|
||||
ISD::ZERO_EXTEND, SetCCs, TLI))
|
||||
return SDValue();
|
||||
|
||||
// Actually do the transformation.
|
||||
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
|
||||
Load->getChain(), Load->getBasePtr(),
|
||||
Load->getMemoryVT(), Load->getMemOperand());
|
||||
|
||||
SDLoc DL1(N1);
|
||||
SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
|
||||
N1.getOperand(1));
|
||||
|
||||
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
|
||||
Mask = Mask.zext(VT.getSizeInBits());
|
||||
SDLoc DL0(N0);
|
||||
SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
|
||||
DAG.getConstant(Mask, DL0, VT));
|
||||
|
||||
ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, SDLoc(Load),
|
||||
ISD::ZERO_EXTEND);
|
||||
CombineTo(N, And);
|
||||
if (SDValue(Load, 0).hasOneUse()) {
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
|
||||
} else {
|
||||
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
|
||||
Load->getValueType(0), ExtLoad);
|
||||
CombineTo(Load, Trunc, ExtLoad.getValue(1));
|
||||
}
|
||||
return SDValue(N,0); // Return N so it doesn't get rechecked!
|
||||
}
|
||||
|
||||
/// If we're narrowing or widening the result of a vector select and the final
|
||||
/// size is the same size as a setcc (compare) feeding the select, then try to
|
||||
/// apply the cast operation to the select's operands because matching vector
|
||||
|
@ -7989,6 +8062,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
|
|||
}
|
||||
}
|
||||
|
||||
// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
|
||||
// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
|
||||
if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
|
||||
return ZExtLoad;
|
||||
|
||||
// fold (zext (zextload x)) -> (zext (truncate (zextload x)))
|
||||
// fold (zext ( extload x)) -> (zext (truncate (zextload x)))
|
||||
if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
; RUN: llc -mtriple=aarch64-linux-gnu < %s -o - | FileCheck %s
|
||||
|
||||
define i32 @test1(i8* %p) {
|
||||
; CHECK: ldrb
|
||||
; CHECK-NEXT: ubfx
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%1 = load i8, i8* %p
|
||||
%2 = lshr i8 %1, 1
|
||||
%3 = and i8 %2, 1
|
||||
%4 = zext i8 %3 to i32
|
||||
ret i32 %4
|
||||
}
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
; RUN: llc -mtriple=armv7-linux-gnu < %s -o - | FileCheck %s
|
||||
|
||||
define void @test1(i8* %p, i16* %q) {
|
||||
; CHECK: ldrb
|
||||
; CHECK-NEXT: mov
|
||||
; CHECK-NEXT: and
|
||||
; CHECK-NEXT: strh
|
||||
; CHECK-NEXT: bx
|
||||
|
||||
%1 = load i8, i8* %p
|
||||
%2 = shl i8 %1, 2
|
||||
%3 = and i8 %2, 12
|
||||
%4 = zext i8 %3 to i16
|
||||
store i16 %4, i16* %q
|
||||
ret void
|
||||
}
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
|
||||
|
||||
|
||||
define i64 @test1(i8* %data) {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: movzbl
|
||||
; CHECK-NEXT: shlq
|
||||
; CHECK-NEXT: andl
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%bf.load = load i8, i8* %data, align 4
|
||||
%bf.clear = shl i8 %bf.load, 2
|
||||
%0 = and i8 %bf.clear, 60
|
||||
%mul = zext i8 %0 to i64
|
||||
ret i64 %mul
|
||||
}
|
||||
|
||||
define i8* @test2(i8* %data) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: movzbl
|
||||
; CHECK-NEXT: andl
|
||||
; CHECK-NEXT: leaq
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%bf.load = load i8, i8* %data, align 4
|
||||
%bf.clear = shl i8 %bf.load, 2
|
||||
%0 = and i8 %bf.clear, 60
|
||||
%mul = zext i8 %0 to i64
|
||||
%add.ptr = getelementptr inbounds i8, i8* %data, i64 %mul
|
||||
ret i8* %add.ptr
|
||||
}
|
||||
|
||||
; If the shift op is SHL, the logic op can only be AND.
|
||||
define i64 @test3(i8* %data) {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: movb
|
||||
; CHECK-NEXT: shlb
|
||||
; CHECK-NEXT: xorb
|
||||
; CHECK-NEXT: movzbl
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%bf.load = load i8, i8* %data, align 4
|
||||
%bf.clear = shl i8 %bf.load, 2
|
||||
%0 = xor i8 %bf.clear, 60
|
||||
%mul = zext i8 %0 to i64
|
||||
ret i64 %mul
|
||||
}
|
||||
|
||||
define i64 @test4(i8* %data) {
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: movzbl
|
||||
; CHECK-NEXT: shrq
|
||||
; CHECK-NEXT: andl
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%bf.load = load i8, i8* %data, align 4
|
||||
%bf.clear = lshr i8 %bf.load, 2
|
||||
%0 = and i8 %bf.clear, 60
|
||||
%1 = zext i8 %0 to i64
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @test5(i8* %data) {
|
||||
; CHECK-LABEL: test5:
|
||||
; CHECK: movzbl
|
||||
; CHECK-NEXT: shrq
|
||||
; CHECK-NEXT: xorq
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%bf.load = load i8, i8* %data, align 4
|
||||
%bf.clear = lshr i8 %bf.load, 2
|
||||
%0 = xor i8 %bf.clear, 60
|
||||
%1 = zext i8 %0 to i64
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @test6(i8* %data) {
|
||||
; CHECK-LABEL: test6:
|
||||
; CHECK: movzbl
|
||||
; CHECK-NEXT: shrq
|
||||
; CHECK-NEXT: orq
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%bf.load = load i8, i8* %data, align 4
|
||||
%bf.clear = lshr i8 %bf.load, 2
|
||||
%0 = or i8 %bf.clear, 60
|
||||
%1 = zext i8 %0 to i64
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
; Don't do the folding if the other operand isn't a constant.
|
||||
define i64 @test7(i8* %data, i8 %logop) {
|
||||
; CHECK-LABEL: test7:
|
||||
; CHECK: movb
|
||||
; CHECK-NEXT: shrb
|
||||
; CHECK-NEXT: orb
|
||||
; CHECK-NEXT: movzbl
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%bf.load = load i8, i8* %data, align 4
|
||||
%bf.clear = lshr i8 %bf.load, 2
|
||||
%0 = or i8 %bf.clear, %logop
|
||||
%1 = zext i8 %0 to i64
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
; Load is folded with sext.
|
||||
define i64 @test8(i8* %data) {
|
||||
; CHECK-LABEL: test8:
|
||||
; CHECK: movsbl
|
||||
; CHECK-NEXT: movzwl
|
||||
; CHECK-NEXT: shrl
|
||||
; CHECK-NEXT: orl
|
||||
entry:
|
||||
%bf.load = load i8, i8* %data, align 4
|
||||
%ext = sext i8 %bf.load to i16
|
||||
%bf.clear = lshr i16 %ext, 2
|
||||
%0 = or i16 %bf.clear, 60
|
||||
%1 = zext i16 %0 to i64
|
||||
ret i64 %1
|
||||
}
|
||||
|
Loading…
Reference in New Issue