forked from OSchip/llvm-project
[DAGCombiner] Allow operand of step_vector to be negative.
It is proper to relax non-negative limitation of step_vector. Also this patch adds more combines for step_vector: (sub X, step_vector(C)) -> (add X, step_vector(-C)) Differential Revision: https://reviews.llvm.org/D100812
This commit is contained in:
parent
0724911d2a
commit
978eb3f168
|
@ -594,10 +594,10 @@ enum NodeType {
|
|||
|
||||
/// STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised
|
||||
/// of a linear sequence of unsigned values starting from 0 with a step of
|
||||
/// IMM, where IMM must be a vector index constant positive integer value
|
||||
/// which must fit in the vector element type.
|
||||
/// IMM, where IMM must be a vector index constant integer value which must
|
||||
/// fit in the vector element type.
|
||||
/// Note that IMM may be a smaller type than the vector element type, in
|
||||
/// which case the step is implicitly zero-extended to the vector element
|
||||
/// which case the step is implicitly sign-extended to the vector element
|
||||
/// type. IMM may also be a larger type than the vector element type, in
|
||||
/// which case the step is implicitly truncated to the vector element type.
|
||||
/// The operation does not support returning fixed-width vectors or
|
||||
|
|
|
@ -3544,6 +3544,14 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
|
|||
return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
|
||||
}
|
||||
|
||||
// canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
|
||||
if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
|
||||
SDValue NewStep = DAG.getConstant(-N1.getConstantOperandAPInt(0), DL,
|
||||
N1.getOperand(0).getValueType());
|
||||
return DAG.getNode(ISD::ADD, DL, VT, N0,
|
||||
DAG.getStepVector(DL, VT, NewStep));
|
||||
}
|
||||
|
||||
// Prefer an add for more folding potential and possibly better codegen:
|
||||
// sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
|
||||
if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
|
||||
|
|
|
@ -4791,7 +4791,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) {
|
|||
EVT NOutElemVT = TLI.getTypeToTransformTo(*DAG.getContext(),
|
||||
NOutVT.getVectorElementType());
|
||||
APInt StepVal = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
|
||||
SDValue Step = DAG.getConstant(StepVal.getZExtValue(), dl, NOutElemVT);
|
||||
SDValue Step = DAG.getConstant(StepVal.getSExtValue(), dl, NOutElemVT);
|
||||
return DAG.getStepVector(dl, NOutVT, Step);
|
||||
}
|
||||
|
||||
|
|
|
@ -1655,11 +1655,10 @@ void DAGTypeLegalizer::SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo,
|
|||
|
||||
// Hi = Lo + (EltCnt * Step)
|
||||
EVT EltVT = Step.getValueType();
|
||||
APInt StepVal = cast<ConstantSDNode>(Step)->getAPIntValue();
|
||||
SDValue StartOfHi =
|
||||
DAG.getVScale(dl, EltVT,
|
||||
cast<ConstantSDNode>(Step)->getAPIntValue() *
|
||||
LoVT.getVectorMinNumElements());
|
||||
StartOfHi = DAG.getZExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType());
|
||||
DAG.getVScale(dl, EltVT, StepVal * LoVT.getVectorMinNumElements());
|
||||
StartOfHi = DAG.getSExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType());
|
||||
StartOfHi = DAG.getNode(ISD::SPLAT_VECTOR, dl, HiVT, StartOfHi);
|
||||
|
||||
Hi = DAG.getNode(ISD::STEP_VECTOR, dl, HiVT, Step);
|
||||
|
|
|
@ -4717,10 +4717,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
|||
"STEP_VECTOR can only be used with vectors of integers that are at "
|
||||
"least 8 bits wide");
|
||||
assert(isa<ConstantSDNode>(Operand) &&
|
||||
cast<ConstantSDNode>(Operand)->getAPIntValue().isNonNegative() &&
|
||||
cast<ConstantSDNode>(Operand)->getAPIntValue().isSignedIntN(
|
||||
VT.getScalarSizeInBits()) &&
|
||||
"Expected STEP_VECTOR integer constant to be positive and fit in "
|
||||
"Expected STEP_VECTOR integer constant to fit in "
|
||||
"the vector element type");
|
||||
break;
|
||||
case ISD::FREEZE:
|
||||
|
|
|
@ -259,6 +259,69 @@ entry:
|
|||
ret <vscale x 8 x i8> %3
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @sub_multiple_use_stepvector_nxv8i16() {
|
||||
; CHECK-LABEL: sub_multiple_use_stepvector_nxv8i16:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: index z0.h, #0, #1
|
||||
; CHECK-NEXT: mov z1.d, z0.d
|
||||
; CHECK-NEXT: subr z1.h, z1.h, #2 // =0x2
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = insertelement <vscale x 8 x i16> poison, i16 2, i32 0
|
||||
%1 = shufflevector <vscale x 8 x i16> %0, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
|
||||
%2 = call <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
|
||||
%3 = sub <vscale x 8 x i16> %1, %2
|
||||
%4 = shl <vscale x 8 x i16> %2, %3
|
||||
ret <vscale x 8 x i16> %4
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @sub_stepvector_nxv8i16() {
|
||||
; CHECK-LABEL: sub_stepvector_nxv8i16:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: index z0.h, #2, #-1
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = insertelement <vscale x 8 x i16> poison, i16 2, i32 0
|
||||
%1 = shufflevector <vscale x 8 x i16> %0, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
|
||||
%2 = call <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
|
||||
%3 = sub <vscale x 8 x i16> %1, %2
|
||||
ret <vscale x 8 x i16> %3
|
||||
}
|
||||
|
||||
define <vscale x 8 x i8> @promote_sub_stepvector_nxv8i8() {
|
||||
; CHECK-LABEL: promote_sub_stepvector_nxv8i8:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: index z0.h, #2, #-1
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = insertelement <vscale x 8 x i8> poison, i8 2, i32 0
|
||||
%1 = shufflevector <vscale x 8 x i8> %0, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
|
||||
%2 = call <vscale x 8 x i8> @llvm.experimental.stepvector.nxv8i8()
|
||||
%3 = sub <vscale x 8 x i8> %1, %2
|
||||
ret <vscale x 8 x i8> %3
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @split_sub_stepvector_nxv16i32() {
|
||||
; CHECK-LABEL: split_sub_stepvector_nxv16i32:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: cntw x9
|
||||
; CHECK-NEXT: cnth x8
|
||||
; CHECK-NEXT: neg x9, x9
|
||||
; CHECK-NEXT: index z0.s, #0, #-1
|
||||
; CHECK-NEXT: neg x8, x8
|
||||
; CHECK-NEXT: mov z1.s, w9
|
||||
; CHECK-NEXT: mov z3.s, w8
|
||||
; CHECK-NEXT: add z1.s, z0.s, z1.s
|
||||
; CHECK-NEXT: add z2.s, z0.s, z3.s
|
||||
; CHECK-NEXT: add z3.s, z1.s, z3.s
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = call <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
|
||||
%1 = sub <vscale x 16 x i32> zeroinitializer, %0
|
||||
ret <vscale x 16 x i32> %1
|
||||
}
|
||||
|
||||
declare <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
|
||||
declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s \
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
|
||||
; RUN: | FileCheck %s --check-prefixes=CHECK
|
||||
|
||||
declare <vscale x 1 x i8> @llvm.experimental.stepvector.nxv1i8()
|
||||
|
||||
|
@ -271,25 +271,13 @@ define <vscale x 8 x i64> @stepvector_nxv8i64() {
|
|||
declare <vscale x 16 x i64> @llvm.experimental.stepvector.nxv16i64()
|
||||
|
||||
define <vscale x 16 x i64> @stepvector_nxv16i64() {
|
||||
; RV32-LABEL: stepvector_nxv16i64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: csrr a0, vlenb
|
||||
; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu
|
||||
; RV32-NEXT: vmv.v.x v8, a0
|
||||
; RV32-NEXT: addi a0, zero, 32
|
||||
; RV32-NEXT: vsll.vx v8, v8, a0
|
||||
; RV32-NEXT: vsrl.vx v16, v8, a0
|
||||
; RV32-NEXT: vid.v v8
|
||||
; RV32-NEXT: vadd.vv v16, v8, v16
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: stepvector_nxv16i64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: csrr a0, vlenb
|
||||
; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu
|
||||
; RV64-NEXT: vid.v v8
|
||||
; RV64-NEXT: vadd.vx v16, v8, a0
|
||||
; RV64-NEXT: ret
|
||||
; CHECK-LABEL: stepvector_nxv16i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu
|
||||
; CHECK-NEXT: vid.v v8
|
||||
; CHECK-NEXT: vadd.vx v16, v8, a0
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 16 x i64> @llvm.experimental.stepvector.nxv16i64()
|
||||
ret <vscale x 16 x i64> %v
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue