forked from OSchip/llvm-project
[DAGCombine] More diamong carry pattern optimization.
Summary: This diff improve the capability of DAGCOmbine to generate linear carries propagation in presence of a diamond pattern. It is now able to match a large variety of different patterns rather than some hardcoded one. Arguably, the codegen in test cases is not better, but this is to be expected. The goal of this transformation is more about canonicalisation than actual optimisation. Reviewers: hfinkel, RKSimon, craig.topper Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D57302 llvm-svn: 365051
This commit is contained in:
parent
783dbe402f
commit
bddb8c3597
|
@ -2875,6 +2875,93 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* If we are facing some sort of diamond carry propapagtion pattern try to
|
||||
* break it up to generate something like:
|
||||
* (addcarry X, 0, (addcarry A, B, Z):Carry)
|
||||
*
|
||||
* The end result is usually an increase in operation required, but because the
|
||||
* carry is now linearized, other tranforms can kick in and optimize the DAG.
|
||||
*
|
||||
* Patterns typically look something like
|
||||
* (uaddo A, B)
|
||||
* / \
|
||||
* Carry Sum
|
||||
* | \
|
||||
* | (addcarry *, 0, Z)
|
||||
* | /
|
||||
* \ Carry
|
||||
* | /
|
||||
* (addcarry X, *, *)
|
||||
*
|
||||
* But numerous variation exist. Our goal is to identify A, B, X and Z and
|
||||
* produce a combine with a single path for carry propagation.
|
||||
*/
|
||||
static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
|
||||
SDValue X, SDValue Carry0, SDValue Carry1,
|
||||
SDNode *N) {
|
||||
if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
|
||||
return SDValue();
|
||||
if (Carry1.getOpcode() != ISD::UADDO)
|
||||
return SDValue();
|
||||
|
||||
SDValue Z;
|
||||
|
||||
/**
|
||||
* First look for a suitable Z. It will present itself in the form of
|
||||
* (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
|
||||
*/
|
||||
if (Carry0.getOpcode() == ISD::ADDCARRY &&
|
||||
isNullConstant(Carry0.getOperand(1))) {
|
||||
Z = Carry0.getOperand(2);
|
||||
} else if (Carry0.getOpcode() == ISD::UADDO &&
|
||||
isOneConstant(Carry0.getOperand(1))) {
|
||||
EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
|
||||
Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
|
||||
} else {
|
||||
// We couldn't find a suitable Z.
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
||||
auto cancelDiamond = [&](SDValue A,SDValue B) {
|
||||
SDLoc DL(N);
|
||||
SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
|
||||
Combiner.AddToWorklist(NewY.getNode());
|
||||
return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
|
||||
DAG.getConstant(0, DL, X.getValueType()),
|
||||
NewY.getValue(1));
|
||||
};
|
||||
|
||||
/**
|
||||
* (uaddo A, B)
|
||||
* |
|
||||
* Sum
|
||||
* |
|
||||
* (addcarry *, 0, Z)
|
||||
*/
|
||||
if (Carry0.getOperand(0) == Carry1.getValue(0)) {
|
||||
return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
|
||||
}
|
||||
|
||||
/**
|
||||
* (addcarry A, 0, Z)
|
||||
* |
|
||||
* Sum
|
||||
* |
|
||||
* (uaddo *, B)
|
||||
*/
|
||||
if (Carry1.getOperand(0) == Carry0.getValue(0)) {
|
||||
return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
|
||||
}
|
||||
|
||||
if (Carry1.getOperand(1) == Carry0.getValue(0)) {
|
||||
return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
|
||||
SDNode *N) {
|
||||
// Iff the flag result is dead:
|
||||
|
@ -2889,35 +2976,13 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
|
|||
* When one of the addcarry argument is itself a carry, we may be facing
|
||||
* a diamond carry propagation. In which case we try to transform the DAG
|
||||
* to ensure linear carry propagation if that is possible.
|
||||
*
|
||||
* We are trying to get:
|
||||
* (addcarry X, 0, (addcarry A, B, Z):Carry)
|
||||
*/
|
||||
if (auto Y = getAsCarry(TLI, N1)) {
|
||||
/**
|
||||
* (uaddo A, B)
|
||||
* / \
|
||||
* Carry Sum
|
||||
* | \
|
||||
* | (addcarry *, 0, Z)
|
||||
* | /
|
||||
* \ Carry
|
||||
* | /
|
||||
* (addcarry X, *, *)
|
||||
*/
|
||||
if (Y.getOpcode() == ISD::UADDO &&
|
||||
CarryIn.getResNo() == 1 &&
|
||||
CarryIn.getOpcode() == ISD::ADDCARRY &&
|
||||
isNullConstant(CarryIn.getOperand(1)) &&
|
||||
CarryIn.getOperand(0) == Y.getValue(0)) {
|
||||
auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
|
||||
Y.getOperand(0), Y.getOperand(1),
|
||||
CarryIn.getOperand(2));
|
||||
AddToWorklist(NewY.getNode());
|
||||
return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
|
||||
DAG.getConstant(0, SDLoc(N), N0.getValueType()),
|
||||
NewY.getValue(1));
|
||||
}
|
||||
// Because both are carries, Y and Z can be swapped.
|
||||
if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
|
||||
return R;
|
||||
if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
|
||||
return R;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
|
|
@ -390,13 +390,13 @@ define i128 @addcarry1_not(i128 %n) {
|
|||
define i128 @addcarry_to_subcarry(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: addcarry_to_subcarry:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: notq %rsi
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: movb $1, %cl
|
||||
; CHECK-NEXT: addb $-1, %cl
|
||||
; CHECK-NEXT: movq %rdi, %rcx
|
||||
; CHECK-NEXT: addq %rsi, %rcx
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: addq $1, %rcx
|
||||
; CHECK-NEXT: adcq %rdi, %rax
|
||||
; CHECK-NEXT: adcq %rsi, %rcx
|
||||
; CHECK-NEXT: adcq $0, %rax
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: movzbl %cl, %edx
|
||||
; CHECK-NEXT: addq %rsi, %rax
|
||||
|
|
|
@ -90,29 +90,37 @@ entry:
|
|||
define %S @sub(%S* nocapture readonly %this, %S %arg.b) local_unnamed_addr {
|
||||
; CHECK-LABEL: sub:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: pushq %rbx
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: .cfi_offset %rbx, -16
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: movq (%rsi), %r10
|
||||
; CHECK-NEXT: movq 8(%rsi), %rdi
|
||||
; CHECK-NEXT: movq %r10, %r11
|
||||
; CHECK-NEXT: subq %rdx, %r11
|
||||
; CHECK-NEXT: notq %rdx
|
||||
; CHECK-NEXT: xorl %edi, %edi
|
||||
; CHECK-NEXT: addq (%rsi), %rdx
|
||||
; CHECK-NEXT: setb %dil
|
||||
; CHECK-NEXT: addq $1, %rdx
|
||||
; CHECK-NEXT: adcq 8(%rsi), %rdi
|
||||
; CHECK-NEXT: setb %r10b
|
||||
; CHECK-NEXT: movzbl %r10b, %r10d
|
||||
; CHECK-NEXT: movb $1, %bl
|
||||
; CHECK-NEXT: addb $-1, %bl
|
||||
; CHECK-NEXT: adcq %r10, %rdx
|
||||
; CHECK-NEXT: adcq $0, %rdi
|
||||
; CHECK-NEXT: setb %dl
|
||||
; CHECK-NEXT: movzbl %dl, %edx
|
||||
; CHECK-NEXT: notq %rcx
|
||||
; CHECK-NEXT: addq %rdi, %rcx
|
||||
; CHECK-NEXT: adcq 16(%rsi), %r10
|
||||
; CHECK-NEXT: setb %dil
|
||||
; CHECK-NEXT: movzbl %dil, %edi
|
||||
; CHECK-NEXT: adcq 16(%rsi), %rdx
|
||||
; CHECK-NEXT: setb %bl
|
||||
; CHECK-NEXT: movzbl %bl, %edi
|
||||
; CHECK-NEXT: notq %r8
|
||||
; CHECK-NEXT: addq %r10, %r8
|
||||
; CHECK-NEXT: addq %rdx, %r8
|
||||
; CHECK-NEXT: adcq 24(%rsi), %rdi
|
||||
; CHECK-NEXT: notq %r9
|
||||
; CHECK-NEXT: addq %rdi, %r9
|
||||
; CHECK-NEXT: movq %rdx, (%rax)
|
||||
; CHECK-NEXT: movq %r11, (%rax)
|
||||
; CHECK-NEXT: movq %rcx, 8(%rax)
|
||||
; CHECK-NEXT: movq %r8, 16(%rax)
|
||||
; CHECK-NEXT: movq %r9, 24(%rax)
|
||||
; CHECK-NEXT: popq %rbx
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = extractvalue %S %arg.b, 0
|
||||
|
|
Loading…
Reference in New Issue