forked from OSchip/llvm-project
[AArch64] Combine UADDVs to generate vector add
ADD(UADDV a, UADDV b) --> UADDV(ADD a, b) This partially solves the bug: https://bugs.llvm.org/show_bug.cgi?id=46888 Meta ticket: https://bugs.llvm.org/show_bug.cgi?id=46929 Differential Revision: https://reviews.llvm.org/D88731
This commit is contained in:
parent
dde4e0318c
commit
159b2d8e62
|
@ -12336,6 +12336,43 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
|
|||
return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
|
||||
}
|
||||
|
||||
// ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
|
||||
static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
EVT VT = N->getValueType(0);
|
||||
// Only scalar integer and vector types.
|
||||
if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
|
||||
return SDValue();
|
||||
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = N->getOperand(1);
|
||||
if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
|
||||
RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT)
|
||||
return SDValue();
|
||||
|
||||
auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
|
||||
auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
|
||||
if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isNullValue())
|
||||
return SDValue();
|
||||
|
||||
SDValue Op1 = LHS->getOperand(0);
|
||||
SDValue Op2 = RHS->getOperand(0);
|
||||
EVT OpVT1 = Op1.getValueType();
|
||||
EVT OpVT2 = Op2.getValueType();
|
||||
if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
|
||||
Op2.getOpcode() != AArch64ISD::UADDV ||
|
||||
OpVT1.getVectorElementType() != VT)
|
||||
return SDValue();
|
||||
|
||||
SDValue Val1 = Op1.getOperand(0);
|
||||
SDValue Val2 = Op2.getOperand(0);
|
||||
EVT ValVT = Val1->getValueType(0);
|
||||
SDLoc DL(N);
|
||||
SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
|
||||
DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
|
||||
DAG.getConstant(0, DL, MVT::i64));
|
||||
}
|
||||
|
||||
// The basic add/sub long vector instructions have variants with "2" on the end
|
||||
// which act on the high-half of their inputs. They are normally matched by
|
||||
// patterns like:
|
||||
|
@ -12389,6 +12426,16 @@ static SDValue performAddSubLongCombine(SDNode *N,
|
|||
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
|
||||
}
|
||||
|
||||
static SDValue performAddSubCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
SelectionDAG &DAG) {
|
||||
// Try to change sum of two reductions.
|
||||
if (SDValue Val = performUADDVCombine(N, DAG))
|
||||
return Val;
|
||||
|
||||
return performAddSubLongCombine(N, DCI, DAG);
|
||||
}
|
||||
|
||||
// Massage DAGs which we can use the high-half "long" operations on into
|
||||
// something isel will recognize better. E.g.
|
||||
//
|
||||
|
@ -14739,7 +14786,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
return performABSCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::ADD:
|
||||
case ISD::SUB:
|
||||
return performAddSubLongCombine(N, DCI, DAG);
|
||||
return performAddSubCombine(N, DCI, DAG);
|
||||
case ISD::XOR:
|
||||
return performXorCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::MUL:
|
||||
|
|
|
@ -138,11 +138,9 @@ entry:
|
|||
define i32 @addv_combine_i32(<4 x i32> %a1, <4 x i32> %a2) {
|
||||
; CHECK-LABEL: addv_combine_i32:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: addv s0, v0.4s
|
||||
; CHECK-NEXT: addv s1, v1.4s
|
||||
; CHECK-NEXT: fmov w8, s0
|
||||
; CHECK-NEXT: fmov w9, s1
|
||||
; CHECK-NEXT: add w0, w8, w9
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%rdx.1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a1)
|
||||
|
@ -154,11 +152,9 @@ entry:
|
|||
define i64 @addv_combine_i64(<2 x i64> %a1, <2 x i64> %a2) {
|
||||
; CHECK-LABEL: addv_combine_i64:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
|
||||
; CHECK-NEXT: addp d0, v0.2d
|
||||
; CHECK-NEXT: addp d1, v1.2d
|
||||
; CHECK-NEXT: fmov x8, d0
|
||||
; CHECK-NEXT: fmov x9, d1
|
||||
; CHECK-NEXT: add x0, x8, x9
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%rdx.1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1)
|
||||
|
|
Loading…
Reference in New Issue