[DAGCombine] More diamong carry pattern optimization.

Summary:
This diff improve the capability of DAGCOmbine to generate linear carries propagation in presence of a diamond pattern. It is now able to match a large variety of different patterns rather than some hardcoded one.

Arguably, the codegen in test cases is not better, but this is to be expected. The goal of this transformation is more about canonicalisation than actual optimisation.

Reviewers: hfinkel, RKSimon, craig.topper

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D57302

llvm-svn: 365051
This commit is contained in:
Amaury Sechet 2019-07-03 16:15:59 +00:00
parent 783dbe402f
commit bddb8c3597
3 changed files with 117 additions and 44 deletions

View File

@ -2875,6 +2875,93 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
return SDValue();
}
/**
* If we are facing some sort of diamond carry propapagtion pattern try to
* break it up to generate something like:
* (addcarry X, 0, (addcarry A, B, Z):Carry)
*
* The end result is usually an increase in operation required, but because the
* carry is now linearized, other tranforms can kick in and optimize the DAG.
*
* Patterns typically look something like
* (uaddo A, B)
* / \
* Carry Sum
* | \
* | (addcarry *, 0, Z)
* | /
* \ Carry
* | /
* (addcarry X, *, *)
*
* But numerous variation exist. Our goal is to identify A, B, X and Z and
* produce a combine with a single path for carry propagation.
*/
static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
SDValue X, SDValue Carry0, SDValue Carry1,
SDNode *N) {
if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
return SDValue();
if (Carry1.getOpcode() != ISD::UADDO)
return SDValue();
SDValue Z;
/**
* First look for a suitable Z. It will present itself in the form of
* (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
*/
if (Carry0.getOpcode() == ISD::ADDCARRY &&
isNullConstant(Carry0.getOperand(1))) {
Z = Carry0.getOperand(2);
} else if (Carry0.getOpcode() == ISD::UADDO &&
isOneConstant(Carry0.getOperand(1))) {
EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
} else {
// We couldn't find a suitable Z.
return SDValue();
}
auto cancelDiamond = [&](SDValue A,SDValue B) {
SDLoc DL(N);
SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
Combiner.AddToWorklist(NewY.getNode());
return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
DAG.getConstant(0, DL, X.getValueType()),
NewY.getValue(1));
};
/**
* (uaddo A, B)
* |
* Sum
* |
* (addcarry *, 0, Z)
*/
if (Carry0.getOperand(0) == Carry1.getValue(0)) {
return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
}
/**
* (addcarry A, 0, Z)
* |
* Sum
* |
* (uaddo *, B)
*/
if (Carry1.getOperand(0) == Carry0.getValue(0)) {
return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
}
if (Carry1.getOperand(1) == Carry0.getValue(0)) {
return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
}
return SDValue();
}
SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
SDNode *N) {
// Iff the flag result is dead:
@ -2889,35 +2976,13 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
* When one of the addcarry argument is itself a carry, we may be facing
* a diamond carry propagation. In which case we try to transform the DAG
* to ensure linear carry propagation if that is possible.
*
* We are trying to get:
* (addcarry X, 0, (addcarry A, B, Z):Carry)
*/
if (auto Y = getAsCarry(TLI, N1)) {
/**
* (uaddo A, B)
* / \
* Carry Sum
* | \
* | (addcarry *, 0, Z)
* | /
* \ Carry
* | /
* (addcarry X, *, *)
*/
if (Y.getOpcode() == ISD::UADDO &&
CarryIn.getResNo() == 1 &&
CarryIn.getOpcode() == ISD::ADDCARRY &&
isNullConstant(CarryIn.getOperand(1)) &&
CarryIn.getOperand(0) == Y.getValue(0)) {
auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
Y.getOperand(0), Y.getOperand(1),
CarryIn.getOperand(2));
AddToWorklist(NewY.getNode());
return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
DAG.getConstant(0, SDLoc(N), N0.getValueType()),
NewY.getValue(1));
}
// Because both are carries, Y and Z can be swapped.
if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
return R;
if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
return R;
}
return SDValue();

View File

@ -390,13 +390,13 @@ define i128 @addcarry1_not(i128 %n) {
define i128 @addcarry_to_subcarry(i64 %a, i64 %b) {
; CHECK-LABEL: addcarry_to_subcarry:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: notq %rsi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: movb $1, %cl
; CHECK-NEXT: addb $-1, %cl
; CHECK-NEXT: movq %rdi, %rcx
; CHECK-NEXT: addq %rsi, %rcx
; CHECK-NEXT: setb %al
; CHECK-NEXT: addq $1, %rcx
; CHECK-NEXT: adcq %rdi, %rax
; CHECK-NEXT: adcq %rsi, %rcx
; CHECK-NEXT: adcq $0, %rax
; CHECK-NEXT: setb %cl
; CHECK-NEXT: movzbl %cl, %edx
; CHECK-NEXT: addq %rsi, %rax

View File

@ -90,29 +90,37 @@ entry:
define %S @sub(%S* nocapture readonly %this, %S %arg.b) local_unnamed_addr {
; CHECK-LABEL: sub:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbx, -16
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: movq (%rsi), %r10
; CHECK-NEXT: movq 8(%rsi), %rdi
; CHECK-NEXT: movq %r10, %r11
; CHECK-NEXT: subq %rdx, %r11
; CHECK-NEXT: notq %rdx
; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: addq (%rsi), %rdx
; CHECK-NEXT: setb %dil
; CHECK-NEXT: addq $1, %rdx
; CHECK-NEXT: adcq 8(%rsi), %rdi
; CHECK-NEXT: setb %r10b
; CHECK-NEXT: movzbl %r10b, %r10d
; CHECK-NEXT: movb $1, %bl
; CHECK-NEXT: addb $-1, %bl
; CHECK-NEXT: adcq %r10, %rdx
; CHECK-NEXT: adcq $0, %rdi
; CHECK-NEXT: setb %dl
; CHECK-NEXT: movzbl %dl, %edx
; CHECK-NEXT: notq %rcx
; CHECK-NEXT: addq %rdi, %rcx
; CHECK-NEXT: adcq 16(%rsi), %r10
; CHECK-NEXT: setb %dil
; CHECK-NEXT: movzbl %dil, %edi
; CHECK-NEXT: adcq 16(%rsi), %rdx
; CHECK-NEXT: setb %bl
; CHECK-NEXT: movzbl %bl, %edi
; CHECK-NEXT: notq %r8
; CHECK-NEXT: addq %r10, %r8
; CHECK-NEXT: addq %rdx, %r8
; CHECK-NEXT: adcq 24(%rsi), %rdi
; CHECK-NEXT: notq %r9
; CHECK-NEXT: addq %rdi, %r9
; CHECK-NEXT: movq %rdx, (%rax)
; CHECK-NEXT: movq %r11, (%rax)
; CHECK-NEXT: movq %rcx, 8(%rax)
; CHECK-NEXT: movq %r8, 16(%rax)
; CHECK-NEXT: movq %r9, 24(%rax)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%0 = extractvalue %S %arg.b, 0