[AArch64] Fix sub with carry

13403a70e4 introduced a bug where we
generate the outgoing carry inverted, which in turn breaks the
lowering of @llvm.usub.sat.i128, returning the normal difference on
saturation and zero otherwise.

Note that AArch64 has peculiar semantics where the subtraction
instructions generate borrow inverted.  The problem is that we mix the
two forms of semantics -- the normal carry and inverted carry -- in
the area of extended precision subtractions.  Specifically, we have
three problems:

- lowerADDSUBCARRY takes the non-inverted incoming carry from a
  subtraction and feeds it to SBCS without inverting it first.

- lowerADDSUBCARRY makes available the outgoing carry from SBCS
  without inverting it.

- foldOverflowCheck folds:

  (SBC{S} l r (CMP (CSET LO carry) 1)) => (SBC{S} l r carry)

  When the incoming carry flag is set, CSET LO results in zero.  CMP
  in turn generates a borrow, *clearing* the carry flag.  Instead, we
  should fold:

  (SBC{S} l r (CMP 0 (CSET LO carry))) => (SBC{S} l r carry)

  When the incoming carry flag is set, CSET LO results in zero.  CMP
  does not generate a borrow, *setting* the carry flag.

IIUC, we should use the normal (that is, non-inverted) semantics for
carry everywhere.

This patch fixes the three problems above.

This patch does not add any new testcases because we have a plenty of
them covering the instruction in question.  In particular,
@u128_saturating_sub is identical to the testcase in the motivating
issue.

Fixes: #55253

Differential Revision: https://reviews.llvm.org/D124976
This commit is contained in:
Kazu Hirata 2022-05-06 11:04:17 -07:00
parent 891c3cf99e
commit fffb6e6afd
3 changed files with 35 additions and 19 deletions

View File

@ -3308,23 +3308,29 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(2));
}
// Sets 'C' bit of NZCV to 0 if value is 0, else sets 'C' bit to 1
static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG) {
// If Invert is false, sets 'C' bit of NZCV to 0 if value is 0, else sets 'C'
// bit to 1. If Invert is true, sets 'C' bit of NZCV to 1 if value is 0, else
// sets 'C' bit to 0.
static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert) {
SDLoc DL(Value);
SDValue One = DAG.getConstant(1, DL, Value.getValueType());
EVT VT = Value.getValueType();
SDValue Op0 = Invert ? DAG.getConstant(0, DL, VT) : Value;
SDValue Op1 = Invert ? Value : DAG.getConstant(1, DL, VT);
SDValue Cmp =
DAG.getNode(AArch64ISD::SUBS, DL,
DAG.getVTList(Value.getValueType(), MVT::Glue), Value, One);
DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::Glue), Op0, Op1);
return Cmp.getValue(1);
}
// Value is 1 if 'C' bit of NZCV is 1, else 0
static SDValue carryFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG) {
// If Invert is false, value is 1 if 'C' bit of NZCV is 1, else 0.
// If Invert is true, value is 0 if 'C' bit of NZCV is 1, else 1.
static SDValue carryFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG,
bool Invert) {
assert(Flag.getResNo() == 1);
SDLoc DL(Flag);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
SDValue CC = DAG.getConstant(AArch64CC::HS, DL, MVT::i32);
unsigned Cond = Invert ? AArch64CC::LO : AArch64CC::HS;
SDValue CC = DAG.getConstant(Cond, DL, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag);
}
@ -3348,9 +3354,10 @@ static SDValue lowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG, unsigned Opcode,
if (VT0 != MVT::i32 && VT0 != MVT::i64)
return SDValue();
bool InvertCarry = Opcode == AArch64ISD::SBCS;
SDValue OpLHS = Op.getOperand(0);
SDValue OpRHS = Op.getOperand(1);
SDValue OpCarryIn = valueToCarryFlag(Op.getOperand(2), DAG);
SDValue OpCarryIn = valueToCarryFlag(Op.getOperand(2), DAG, InvertCarry);
SDLoc DL(Op);
SDVTList VTs = DAG.getVTList(VT0, VT1);
@ -3358,8 +3365,9 @@ static SDValue lowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG, unsigned Opcode,
SDValue Sum = DAG.getNode(Opcode, DL, DAG.getVTList(VT0, MVT::Glue), OpLHS,
OpRHS, OpCarryIn);
SDValue OutFlag = IsSigned ? overflowFlagToValue(Sum.getValue(1), VT1, DAG)
: carryFlagToValue(Sum.getValue(1), VT1, DAG);
SDValue OutFlag =
IsSigned ? overflowFlagToValue(Sum.getValue(1), VT1, DAG)
: carryFlagToValue(Sum.getValue(1), VT1, DAG, InvertCarry);
return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, Sum, OutFlag);
}
@ -15517,13 +15525,21 @@ static Optional<AArch64CC::CondCode> getCSETCondCode(SDValue Op) {
}
// (ADC{S} l r (CMP (CSET HS carry) 1)) => (ADC{S} l r carry)
// (SBC{S} l r (CMP (CSET LO carry) 1)) => (SBC{S} l r carry)
// (SBC{S} l r (CMP 0 (CSET LO carry))) => (SBC{S} l r carry)
static SDValue foldOverflowCheck(SDNode *Op, SelectionDAG &DAG, bool IsAdd) {
SDValue CmpOp = Op->getOperand(2);
if (!(isCMP(CmpOp) && isOneConstant(CmpOp.getOperand(1))))
if (!isCMP(CmpOp))
return SDValue();
SDValue CsetOp = CmpOp->getOperand(0);
if (IsAdd) {
if (!isOneConstant(CmpOp.getOperand(1)))
return SDValue();
} else {
if (!isNullConstant(CmpOp.getOperand(0)))
return SDValue();
}
SDValue CsetOp = CmpOp->getOperand(IsAdd ? 0 : 1);
auto CC = getCSETCondCode(CsetOp);
if (CC != (IsAdd ? AArch64CC::HS : AArch64CC::LO))
return SDValue();

View File

@ -92,7 +92,7 @@ define { i128, i8 } @u128_checked_sub(i128 %x, i128 %y) {
; CHECK: // %bb.0:
; CHECK-NEXT: subs x0, x0, x2
; CHECK-NEXT: sbcs x1, x1, x3
; CHECK-NEXT: cset w8, hs
; CHECK-NEXT: cset w8, lo
; CHECK-NEXT: eor w2, w8, #0x1
; CHECK-NEXT: ret
%1 = tail call { i128, i1 } @llvm.usub.with.overflow.i128(i128 %x, i128 %y)
@ -110,7 +110,7 @@ define { i128, i8 } @u128_overflowing_sub(i128 %x, i128 %y) {
; CHECK: // %bb.0:
; CHECK-NEXT: subs x0, x0, x2
; CHECK-NEXT: sbcs x1, x1, x3
; CHECK-NEXT: cset w2, hs
; CHECK-NEXT: cset w2, lo
; CHECK-NEXT: ret
%1 = tail call { i128, i1 } @llvm.usub.with.overflow.i128(i128 %x, i128 %y)
%2 = extractvalue { i128, i1 } %1, 0
@ -126,7 +126,7 @@ define i128 @u128_saturating_sub(i128 %x, i128 %y) {
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x2
; CHECK-NEXT: sbcs x9, x1, x3
; CHECK-NEXT: cset w10, hs
; CHECK-NEXT: cset w10, lo
; CHECK-NEXT: cmp w10, #0
; CHECK-NEXT: csel x0, xzr, x8, ne
; CHECK-NEXT: csel x1, xzr, x9, ne

View File

@ -346,13 +346,13 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x2, x6
; CHECK-NEXT: sbcs x9, x3, x7
; CHECK-NEXT: cset w10, hs
; CHECK-NEXT: cset w10, lo
; CHECK-NEXT: cmp w10, #0
; CHECK-NEXT: csel x2, xzr, x8, ne
; CHECK-NEXT: csel x3, xzr, x9, ne
; CHECK-NEXT: subs x8, x0, x4
; CHECK-NEXT: sbcs x9, x1, x5
; CHECK-NEXT: cset w10, hs
; CHECK-NEXT: cset w10, lo
; CHECK-NEXT: cmp w10, #0
; CHECK-NEXT: csel x8, xzr, x8, ne
; CHECK-NEXT: csel x1, xzr, x9, ne