forked from OSchip/llvm-project
[X86][AARCH64] Improve ISD::ABS support
This patch takes some of the code from D49837 to allow us to enable ISD::ABS support for all SSE vector types. Differential Revision: https://reviews.llvm.org/D56544 llvm-svn: 350998
This commit is contained in:
parent
5f393eb5da
commit
ca0de0363b
|
@ -3787,6 +3787,14 @@ public:
|
|||
/// \returns True, if the expansion was successful, false otherwise
|
||||
bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
|
||||
|
||||
/// Expand ABS nodes. Expands vector/scalar ABS nodes,
|
||||
/// vector nodes can only succeed if all operations are legal/custom.
|
||||
/// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size))
|
||||
/// \param N Node to expand
|
||||
/// \param Result output after conversion
|
||||
/// \returns True, if the expansion was successful, false otherwise
|
||||
bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
|
||||
|
||||
/// Turn load of vector type into a load of the individual elements.
|
||||
/// \param LD load to expand
|
||||
/// \returns MERGE_VALUEs of the scalar loads with their chains.
|
||||
|
|
|
@ -2645,6 +2645,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
|||
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
|
||||
bool NeedInvert;
|
||||
switch (Node->getOpcode()) {
|
||||
case ISD::ABS:
|
||||
if (TLI.expandABS(Node, Tmp1, DAG))
|
||||
Results.push_back(Tmp1);
|
||||
break;
|
||||
case ISD::CTPOP:
|
||||
if (TLI.expandCTPOP(Node, Tmp1, DAG))
|
||||
Results.push_back(Tmp1);
|
||||
|
|
|
@ -117,6 +117,12 @@ class VectorLegalizer {
|
|||
/// the remaining lanes, finally bitcasting to the proper type.
|
||||
SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
|
||||
|
||||
/// Implement expand-based legalization of ABS vector operations.
|
||||
/// If following expanding is legal/custom then do it:
|
||||
/// (ABS x) --> (XOR (ADD x, (SRA x, sizeof(x)-1)), (SRA x, sizeof(x)-1))
|
||||
/// else unroll the operation.
|
||||
SDValue ExpandABS(SDValue Op);
|
||||
|
||||
/// Expand bswap of vectors into a shuffle if legal.
|
||||
SDValue ExpandBSWAP(SDValue Op);
|
||||
|
||||
|
@ -355,6 +361,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
|||
case ISD::FSHR:
|
||||
case ISD::ROTL:
|
||||
case ISD::ROTR:
|
||||
case ISD::ABS:
|
||||
case ISD::BSWAP:
|
||||
case ISD::BITREVERSE:
|
||||
case ISD::CTLZ:
|
||||
|
@ -749,6 +756,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
|
|||
return ExpandFSUB(Op);
|
||||
case ISD::SETCC:
|
||||
return UnrollVSETCC(Op);
|
||||
case ISD::ABS:
|
||||
return ExpandABS(Op);
|
||||
case ISD::BITREVERSE:
|
||||
return ExpandBITREVERSE(Op);
|
||||
case ISD::CTPOP:
|
||||
|
@ -1064,6 +1073,16 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
|
|||
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
|
||||
}
|
||||
|
||||
SDValue VectorLegalizer::ExpandABS(SDValue Op) {
|
||||
// Attempt to expand using TargetLowering.
|
||||
SDValue Result;
|
||||
if (TLI.expandABS(Op.getNode(), Result, DAG))
|
||||
return Result;
|
||||
|
||||
// Otherwise go ahead and unroll.
|
||||
return DAG.UnrollVectorOp(Op.getNode());
|
||||
}
|
||||
|
||||
SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
|
||||
// Attempt to expand using TargetLowering.
|
||||
SDValue Result;
|
||||
|
|
|
@ -4715,6 +4715,26 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc dl(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue Op = N->getOperand(0);
|
||||
|
||||
// Only expand vector types if we have the appropriate vector operations.
|
||||
if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
|
||||
!isOperationLegalOrCustom(ISD::ADD, VT) ||
|
||||
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
|
||||
return false;
|
||||
|
||||
SDValue Shift =
|
||||
DAG.getNode(ISD::SRA, dl, VT, Op,
|
||||
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT));
|
||||
SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
|
||||
Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
|
||||
return true;
|
||||
}
|
||||
|
||||
SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc SL(LD);
|
||||
|
|
|
@ -867,6 +867,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
|
||||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
setOperationAction(ISD::CTPOP, VT, Custom);
|
||||
setOperationAction(ISD::ABS, VT, Custom);
|
||||
|
||||
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
|
||||
// setcc all the way to isel and prefer SETGT in some isel patterns.
|
||||
|
@ -1207,6 +1208,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
|
||||
setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
|
||||
|
||||
setOperationAction(ISD::ABS, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
|
||||
|
@ -23585,7 +23587,8 @@ static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG) {
|
|||
return split256IntArith(Op, DAG);
|
||||
}
|
||||
|
||||
static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
|
||||
static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) {
|
||||
// Since X86 does not have CMOV for 8-bit integer, we don't convert
|
||||
|
@ -23599,10 +23602,14 @@ static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
|
|||
return DAG.getNode(X86ISD::CMOV, DL, VT, Ops);
|
||||
}
|
||||
|
||||
assert(Op.getSimpleValueType().is256BitVector() &&
|
||||
Op.getSimpleValueType().isInteger() &&
|
||||
"Only handle AVX 256-bit vector integer operation");
|
||||
return Lower256IntUnary(Op, DAG);
|
||||
if (VT.is256BitVector() && !Subtarget.hasInt256()) {
|
||||
assert(VT.isInteger() &&
|
||||
"Only handle AVX 256-bit vector integer operation");
|
||||
return Lower256IntUnary(Op, DAG);
|
||||
}
|
||||
|
||||
// Default to expand.
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
|
||||
|
@ -26287,7 +26294,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
case ISD::SMIN:
|
||||
case ISD::UMAX:
|
||||
case ISD::UMIN: return LowerMINMAX(Op, DAG);
|
||||
case ISD::ABS: return LowerABS(Op, DAG);
|
||||
case ISD::ABS: return LowerABS(Op, Subtarget, DAG);
|
||||
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
|
||||
case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
|
||||
case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
|
||||
|
|
|
@ -542,7 +542,8 @@ define <1 x i64> @abs_1d(<1 x i64> %A) nounwind {
|
|||
|
||||
define i64 @abs_1d_honestly(i64 %A) nounwind {
|
||||
; CHECK-LABEL: abs_1d_honestly:
|
||||
; CHECK: abs d0, d0
|
||||
; CHECK: cmp x0, #0
|
||||
; CHECK-NEXT: cneg x0, x0, mi
|
||||
%abs = call i64 @llvm.aarch64.neon.abs.i64(i64 %A)
|
||||
ret i64 %abs
|
||||
}
|
||||
|
|
|
@ -67,9 +67,6 @@ define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) {
|
|||
; AVX2-LABEL: combine_v4i64_abs_abs:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
|
||||
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
||||
|
|
|
@ -583,12 +583,12 @@ define <4 x i64> @test_abs_gt_v4i64(<4 x i64> %a) nounwind {
|
|||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm4
|
||||
; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vxorps %ymm4, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_abs_gt_v4i64:
|
||||
|
@ -639,20 +639,20 @@ define <8 x i64> @test_abs_le_v8i64(<8 x i64> %a) nounwind {
|
|||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4
|
||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm5
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm6
|
||||
; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddq %xmm5, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm4
|
||||
; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vxorps %ymm6, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4
|
||||
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm5
|
||||
; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vxorps %ymm5, %ymm1, %ymm1
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_abs_le_v8i64:
|
||||
|
@ -713,19 +713,19 @@ define <8 x i64> @test_abs_le_v8i64_fold(<8 x i64>* %a.ptr) nounwind {
|
|||
; AVX1-NEXT: vmovdqu 48(%rdi), %xmm3
|
||||
; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5
|
||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm6
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm7
|
||||
; AVX1-NEXT: vpaddq %xmm5, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddq %xmm6, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm5
|
||||
; AVX1-NEXT: vpaddq %xmm5, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vxorps %ymm7, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm1
|
||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm4, %ymm5
|
||||
; AVX1-NEXT: vpaddq %xmm1, %xmm3, %xmm1
|
||||
; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddq %xmm1, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm3, %xmm1
|
||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm3
|
||||
; AVX1-NEXT: vpaddq %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
|
||||
; AVX1-NEXT: vxorps %ymm5, %ymm1, %ymm1
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_abs_le_v8i64_fold:
|
||||
|
|
Loading…
Reference in New Issue