[AArch64] Perform first active true vector combine

Materialize : i1 = extract_vector_elt t37, Constant:i64<0>
   ... into: "ptrue p, all" + PTEST
Test bit of lane 0 can use P register directly, and the instruction “pture all”
is loop invariant, which will beneficial to SVE after hoisting out the loop.

Reviewed By: david-arm, paulwalker-arm

Differential Revision: https://reviews.llvm.org/D120891
This commit is contained in:
zhongyunde 2022-03-08 00:31:07 +08:00
parent f4368487aa
commit c22c8b151b
2 changed files with 55 additions and 2 deletions

View File

@ -14364,7 +14364,46 @@ static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
}
}
static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) {
static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
AArch64CC::CondCode Cond);
// Materialize : i1 = extract_vector_elt t37, Constant:i64<0>
// ... into: "ptrue p, all" + PTEST
static SDValue
performFirstTrueTestVectorCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
// Make sure PTEST can be legalised with illegal types.
if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
return SDValue();
SDValue SetCC = N->getOperand(0);
EVT VT = SetCC.getValueType();
if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
return SDValue();
// Restricted the DAG combine to only cases where we're extracting from a
// flag-setting operation
auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!Idx || !Idx->isZero() || SetCC.getOpcode() != ISD::SETCC)
return SDValue();
// Extracts of lane 0 for SVE can be expressed as PTEST(Op, FIRST) ? 1 : 0
SelectionDAG &DAG = DCI.DAG;
SDValue Pg = getPTrue(DAG, SDLoc(N), VT, AArch64SVEPredPattern::all);
return getPTest(DAG, N->getValueType(0), Pg, SetCC, AArch64CC::FIRST_ACTIVE);
}
static SDValue
performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
if (SDValue Res = performFirstTrueTestVectorCombine(N, DCI, Subtarget))
return Res;
SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
@ -18356,7 +18395,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INSERT_VECTOR_ELT:
return performInsertVectorEltCombine(N, DCI);
case ISD::EXTRACT_VECTOR_ELT:
return performExtractVectorEltCombine(N, DAG);
return performExtractVectorEltCombine(N, DCI, Subtarget);
case ISD::VECREDUCE_ADD:
return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
case AArch64ISD::UADDV:

View File

@ -52,3 +52,17 @@ define <vscale x 4 x i1> @not_fcmp_uge_nxv4f32(<vscale x 4 x float> %a, <vscale
%not = xor <vscale x 4 x i1> %icmp, %ones
ret <vscale x 4 x i1> %not
}
define i1 @foo(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: foo:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, mi
; CHECK-NEXT: ret
%vcond = fcmp oeq <vscale x 4 x float> %a, %b
%bit = extractelement <vscale x 4 x i1> %vcond, i64 0
ret i1 %bit
}