forked from OSchip/llvm-project
Revert "[ARM] Fix conditions for lowering to S[LR]I"
This reverts commit cabfcf840a
.
The patch introduced another bug in the optimization.
This commit is contained in:
parent
6cb204eb64
commit
52474992b1
|
@ -99,6 +99,11 @@ STATISTIC(NumTailCalls, "Number of tail calls");
|
|||
STATISTIC(NumShiftInserts, "Number of vector shift inserts");
|
||||
STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
|
||||
|
||||
static cl::opt<bool>
|
||||
EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
|
||||
cl::desc("Allow AArch64 SLI/SRI formation"),
|
||||
cl::init(false));
|
||||
|
||||
// FIXME: The necessary dtprel relocations don't seem to be supported
|
||||
// well in the GNU bfd and gold linkers at the moment. Therefore, by
|
||||
// default, for now, fall back to GeneralDynamic code generation.
|
||||
|
@ -1318,8 +1323,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
|
||||
case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
|
||||
case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
|
||||
case AArch64ISD::VSLI: return "AArch64ISD::VSLI";
|
||||
case AArch64ISD::VSRI: return "AArch64ISD::VSRI";
|
||||
case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
|
||||
case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
|
||||
case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
|
||||
|
@ -3146,21 +3149,6 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
"llvm.eh.recoverfp must take a function as the first argument");
|
||||
return IncomingFPOp;
|
||||
}
|
||||
|
||||
case Intrinsic::aarch64_neon_vsri:
|
||||
case Intrinsic::aarch64_neon_vsli: {
|
||||
EVT Ty = Op.getValueType();
|
||||
|
||||
if (!Ty.isVector())
|
||||
report_fatal_error("Unexpected type for aarch64_neon_vsli");
|
||||
|
||||
assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits());
|
||||
|
||||
bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
|
||||
unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
|
||||
return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
|
||||
Op.getOperand(3));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7909,9 +7897,8 @@ static unsigned getIntrinsicID(const SDNode *N) {
|
|||
|
||||
// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
|
||||
// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
|
||||
// BUILD_VECTORs with constant element C1, C2 is a constant, and:
|
||||
// - for the SLI case: C1 == Ones(ElemSizeInBits) >> (ElemSizeInBits - C2)
|
||||
// - for the SRI case: C1 == Ones(ElemSizeInBits) << (ElemSizeInBits - C2)
|
||||
// BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2.
|
||||
// Also, logical shift right -> sri, with the same structure.
|
||||
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
|
@ -7944,27 +7931,25 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
|
|||
if (!isAllConstantBuildVector(And.getOperand(1), C1))
|
||||
return SDValue();
|
||||
|
||||
// Is C1 == Ones(ElemSizeInBits) << (ElemSizeInBits - C2) or
|
||||
// C1 == Ones(ElemSizeInBits) >> (ElemSizeInBits - C2), taking into account
|
||||
// how much one can shift elements of a particular size?
|
||||
// Is C1 == ~C2, taking into account how much one can shift elements of a
|
||||
// particular size?
|
||||
uint64_t C2 = C2node->getZExtValue();
|
||||
unsigned ElemSizeInBits = VT.getScalarSizeInBits();
|
||||
if (C2 > ElemSizeInBits)
|
||||
return SDValue();
|
||||
unsigned ElemMask = (1 << ElemSizeInBits) - 1;
|
||||
if (IsShiftRight) {
|
||||
if ((C1 & ElemMask) != ((ElemMask << (ElemSizeInBits - C2)) & ElemMask))
|
||||
if ((C1 & ElemMask) != (~C2 & ElemMask))
|
||||
return SDValue();
|
||||
} else {
|
||||
if ((C1 & ElemMask) != ((ElemMask >> (ElemSizeInBits - C2)) & ElemMask))
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue X = And.getOperand(0);
|
||||
SDValue Y = Shift.getOperand(0);
|
||||
|
||||
unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
|
||||
SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));
|
||||
unsigned Intrin =
|
||||
IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;
|
||||
SDValue ResultSLI =
|
||||
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrin, DL, MVT::i32), X, Y,
|
||||
Shift.getOperand(1));
|
||||
|
||||
LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
|
||||
LLVM_DEBUG(N->dump(&DAG));
|
||||
|
@ -7978,8 +7963,10 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
|
|||
SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
|
||||
if (EnableAArch64SlrGeneration) {
|
||||
if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
|
||||
return Res;
|
||||
}
|
||||
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
|
|
|
@ -121,10 +121,6 @@ enum NodeType : unsigned {
|
|||
SRSHR_I,
|
||||
URSHR_I,
|
||||
|
||||
// Vector shift by constant and insert
|
||||
VSLI,
|
||||
VSRI,
|
||||
|
||||
// Vector comparisons
|
||||
CMEQ,
|
||||
CMGE,
|
||||
|
@ -200,10 +196,8 @@ enum NodeType : unsigned {
|
|||
UMULL,
|
||||
|
||||
// Reciprocal estimates and steps.
|
||||
FRECPE,
|
||||
FRECPS,
|
||||
FRSQRTE,
|
||||
FRSQRTS,
|
||||
FRECPE, FRECPS,
|
||||
FRSQRTE, FRSQRTS,
|
||||
|
||||
SUNPKHI,
|
||||
SUNPKLO,
|
||||
|
|
|
@ -231,10 +231,6 @@ def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
|||
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
|
||||
def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
|
||||
|
||||
def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
|
||||
SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>;
|
||||
|
||||
def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
|
||||
def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
|
||||
def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
|
||||
|
@ -473,8 +469,6 @@ def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
|
|||
def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
|
||||
def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
|
||||
def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
|
||||
def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
|
||||
def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
|
||||
|
||||
def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>;
|
||||
def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
|
||||
|
@ -5834,8 +5828,8 @@ defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
|
|||
defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
|
||||
defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
|
||||
BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
|
||||
defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
|
||||
def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
|
||||
defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>;
|
||||
def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
|
||||
(i32 vecshiftL64:$imm))),
|
||||
(SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
|
||||
defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
|
||||
|
@ -5848,8 +5842,8 @@ defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
|
|||
int_aarch64_neon_sqshrn>;
|
||||
defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
|
||||
int_aarch64_neon_sqshrun>;
|
||||
defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
|
||||
def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
|
||||
defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>;
|
||||
def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
|
||||
(i32 vecshiftR64:$imm))),
|
||||
(SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
|
||||
defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
|
||||
; RUN: llc < %s -aarch64-shift-insert-generation=true -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
|
||||
|
||||
define void @testLeftGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
|
||||
; CHECK-LABEL: testLeftGood:
|
||||
; CHECK: sli.16b v0, v1, #3
|
||||
%and.i = and <16 x i8> %src1, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
|
||||
%and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252>
|
||||
%vshl_n = shl <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
|
||||
%result = or <16 x i8> %and.i, %vshl_n
|
||||
store <16 x i8> %result, <16 x i8>* %dest, align 16
|
||||
|
@ -23,7 +23,7 @@ define void @testLeftBad(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nou
|
|||
define void @testRightGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
|
||||
; CHECK-LABEL: testRightGood:
|
||||
; CHECK: sri.16b v0, v1, #3
|
||||
%and.i = and <16 x i8> %src1, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
|
||||
%and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252>
|
||||
%vshl_n = lshr <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
|
||||
%result = or <16 x i8> %and.i, %vshl_n
|
||||
store <16 x i8> %result, <16 x i8>* %dest, align 16
|
||||
|
|
Loading…
Reference in New Issue