forked from OSchip/llvm-project
AArch64/ARM64: Port NEON post-increment load/store with 2/3/4 vectors to ARM64 backend.
llvm-svn: 208284
This commit is contained in:
parent
ecfe9d06eb
commit
1187a3d8db
|
@ -150,10 +150,15 @@ public:
|
||||||
|
|
||||||
SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
|
SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
|
||||||
unsigned SubRegIdx);
|
unsigned SubRegIdx);
|
||||||
|
SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
|
||||||
|
unsigned SubRegIdx);
|
||||||
SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
|
SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
|
||||||
|
SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
|
||||||
|
|
||||||
SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
|
SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
|
||||||
|
SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
|
||||||
SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
|
SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
|
||||||
|
SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
|
||||||
|
|
||||||
SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node);
|
SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node);
|
||||||
SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node);
|
SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node);
|
||||||
|
@ -952,33 +957,43 @@ SDNode *ARM64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
|
||||||
|
|
||||||
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
|
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
|
||||||
SDValue SuperReg = SDValue(Ld, 0);
|
SDValue SuperReg = SDValue(Ld, 0);
|
||||||
|
for (unsigned i = 0; i < NumVecs; ++i)
|
||||||
// MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
|
ReplaceUses(SDValue(N, i),
|
||||||
// MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
|
CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
|
||||||
// cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
|
|
||||||
|
|
||||||
switch (NumVecs) {
|
|
||||||
case 4:
|
|
||||||
ReplaceUses(SDValue(N, 3), CurDAG->getTargetExtractSubreg(SubRegIdx + 3, dl,
|
|
||||||
VT, SuperReg));
|
|
||||||
// FALLTHROUGH
|
|
||||||
case 3:
|
|
||||||
ReplaceUses(SDValue(N, 2), CurDAG->getTargetExtractSubreg(SubRegIdx + 2, dl,
|
|
||||||
VT, SuperReg));
|
|
||||||
// FALLTHROUGH
|
|
||||||
case 2:
|
|
||||||
ReplaceUses(SDValue(N, 1), CurDAG->getTargetExtractSubreg(SubRegIdx + 1, dl,
|
|
||||||
VT, SuperReg));
|
|
||||||
ReplaceUses(SDValue(N, 0),
|
|
||||||
CurDAG->getTargetExtractSubreg(SubRegIdx, dl, VT, SuperReg));
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
ReplaceUses(SDValue(N, 0), SuperReg);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
|
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
SDNode *ARM64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
|
||||||
|
unsigned Opc, unsigned SubRegIdx) {
|
||||||
|
SDLoc dl(N);
|
||||||
|
EVT VT = N->getValueType(0);
|
||||||
|
SDValue Chain = N->getOperand(0);
|
||||||
|
|
||||||
|
SmallVector<SDValue, 6> Ops;
|
||||||
|
Ops.push_back(N->getOperand(1)); // Mem operand
|
||||||
|
Ops.push_back(N->getOperand(2)); // Incremental
|
||||||
|
Ops.push_back(Chain);
|
||||||
|
|
||||||
|
std::vector<EVT> ResTys;
|
||||||
|
ResTys.push_back(MVT::i64); // Type of the write back register
|
||||||
|
ResTys.push_back(MVT::Untyped);
|
||||||
|
ResTys.push_back(MVT::Other);
|
||||||
|
|
||||||
|
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
|
||||||
|
|
||||||
|
// Update uses of write back register
|
||||||
|
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
|
||||||
|
|
||||||
|
// Update uses of vector list
|
||||||
|
SDValue SuperReg = SDValue(Ld, 1);
|
||||||
|
for (unsigned i = 0; i < NumVecs; ++i)
|
||||||
|
ReplaceUses(SDValue(N, i),
|
||||||
|
CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
|
||||||
|
|
||||||
|
// Update the chain
|
||||||
|
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1001,6 +1016,29 @@ SDNode *ARM64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
|
||||||
return St;
|
return St;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDNode *ARM64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
|
||||||
|
unsigned Opc) {
|
||||||
|
SDLoc dl(N);
|
||||||
|
EVT VT = N->getOperand(2)->getValueType(0);
|
||||||
|
SmallVector<EVT, 2> ResTys;
|
||||||
|
ResTys.push_back(MVT::i64); // Type of the write back register
|
||||||
|
ResTys.push_back(MVT::Other); // Type for the Chain
|
||||||
|
|
||||||
|
// Form a REG_SEQUENCE to force register allocation.
|
||||||
|
bool Is128Bit = VT.getSizeInBits() == 128;
|
||||||
|
SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
|
||||||
|
SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
|
||||||
|
|
||||||
|
SmallVector<SDValue, 6> Ops;
|
||||||
|
Ops.push_back(RegSeq);
|
||||||
|
Ops.push_back(N->getOperand(NumVecs + 1)); // base register
|
||||||
|
Ops.push_back(N->getOperand(NumVecs + 2)); // Incremental
|
||||||
|
Ops.push_back(N->getOperand(0)); // Chain
|
||||||
|
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
|
||||||
|
|
||||||
|
return St;
|
||||||
|
}
|
||||||
|
|
||||||
/// WidenVector - Given a value in the V64 register class, produce the
|
/// WidenVector - Given a value in the V64 register class, produce the
|
||||||
/// equivalent value in the V128 register class.
|
/// equivalent value in the V128 register class.
|
||||||
class WidenVector {
|
class WidenVector {
|
||||||
|
@ -1065,39 +1103,13 @@ SDNode *ARM64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
|
||||||
SDValue SuperReg = SDValue(Ld, 0);
|
SDValue SuperReg = SDValue(Ld, 0);
|
||||||
|
|
||||||
EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
|
EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
|
||||||
switch (NumVecs) {
|
static unsigned QSubs[] = { ARM64::qsub0, ARM64::qsub1, ARM64::qsub2,
|
||||||
case 4: {
|
ARM64::qsub3 };
|
||||||
SDValue NV3 =
|
for (unsigned i = 0; i < NumVecs; ++i) {
|
||||||
CurDAG->getTargetExtractSubreg(ARM64::qsub3, dl, WideVT, SuperReg);
|
SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
|
||||||
if (Narrow)
|
if (Narrow)
|
||||||
ReplaceUses(SDValue(N, 3), NarrowVector(NV3, *CurDAG));
|
NV = NarrowVector(NV, *CurDAG);
|
||||||
else
|
ReplaceUses(SDValue(N, i), NV);
|
||||||
ReplaceUses(SDValue(N, 3), NV3);
|
|
||||||
}
|
|
||||||
// FALLTHROUGH
|
|
||||||
case 3: {
|
|
||||||
SDValue NV2 =
|
|
||||||
CurDAG->getTargetExtractSubreg(ARM64::qsub2, dl, WideVT, SuperReg);
|
|
||||||
if (Narrow)
|
|
||||||
ReplaceUses(SDValue(N, 2), NarrowVector(NV2, *CurDAG));
|
|
||||||
else
|
|
||||||
ReplaceUses(SDValue(N, 2), NV2);
|
|
||||||
}
|
|
||||||
// FALLTHROUGH
|
|
||||||
case 2: {
|
|
||||||
SDValue NV1 =
|
|
||||||
CurDAG->getTargetExtractSubreg(ARM64::qsub1, dl, WideVT, SuperReg);
|
|
||||||
SDValue NV0 =
|
|
||||||
CurDAG->getTargetExtractSubreg(ARM64::qsub0, dl, WideVT, SuperReg);
|
|
||||||
if (Narrow) {
|
|
||||||
ReplaceUses(SDValue(N, 1), NarrowVector(NV1, *CurDAG));
|
|
||||||
ReplaceUses(SDValue(N, 0), NarrowVector(NV0, *CurDAG));
|
|
||||||
} else {
|
|
||||||
ReplaceUses(SDValue(N, 1), NV1);
|
|
||||||
ReplaceUses(SDValue(N, 0), NV0);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
|
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
|
||||||
|
@ -1105,6 +1117,58 @@ SDNode *ARM64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
|
||||||
return Ld;
|
return Ld;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDNode *ARM64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
|
||||||
|
unsigned Opc) {
|
||||||
|
SDLoc dl(N);
|
||||||
|
EVT VT = N->getValueType(0);
|
||||||
|
bool Narrow = VT.getSizeInBits() == 64;
|
||||||
|
|
||||||
|
// Form a REG_SEQUENCE to force register allocation.
|
||||||
|
SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
|
||||||
|
|
||||||
|
if (Narrow)
|
||||||
|
std::transform(Regs.begin(), Regs.end(), Regs.begin(),
|
||||||
|
WidenVector(*CurDAG));
|
||||||
|
|
||||||
|
SDValue RegSeq = createQTuple(Regs);
|
||||||
|
|
||||||
|
std::vector<EVT> ResTys;
|
||||||
|
ResTys.push_back(MVT::i64); // Type of the write back register
|
||||||
|
ResTys.push_back(MVT::Untyped);
|
||||||
|
ResTys.push_back(MVT::Other);
|
||||||
|
|
||||||
|
unsigned LaneNo =
|
||||||
|
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
|
||||||
|
|
||||||
|
SmallVector<SDValue, 6> Ops;
|
||||||
|
Ops.push_back(RegSeq);
|
||||||
|
Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); // Lane Number
|
||||||
|
Ops.push_back(N->getOperand(NumVecs + 2)); // Base register
|
||||||
|
Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
|
||||||
|
Ops.push_back(N->getOperand(0));
|
||||||
|
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
|
||||||
|
|
||||||
|
// Update uses of the write back register
|
||||||
|
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
|
||||||
|
|
||||||
|
// Update uses of the vector list
|
||||||
|
SDValue SuperReg = SDValue(Ld, 1);
|
||||||
|
EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
|
||||||
|
static unsigned QSubs[] = { ARM64::qsub0, ARM64::qsub1, ARM64::qsub2,
|
||||||
|
ARM64::qsub3 };
|
||||||
|
for (unsigned i = 0; i < NumVecs; ++i) {
|
||||||
|
SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
|
||||||
|
if (Narrow)
|
||||||
|
NV = NarrowVector(NV, *CurDAG);
|
||||||
|
ReplaceUses(SDValue(N, i), NV);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the Chain
|
||||||
|
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
|
||||||
|
|
||||||
|
return Ld;
|
||||||
|
}
|
||||||
|
|
||||||
SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
|
SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
|
||||||
unsigned Opc) {
|
unsigned Opc) {
|
||||||
SDLoc dl(N);
|
SDLoc dl(N);
|
||||||
|
@ -1138,6 +1202,44 @@ SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
|
||||||
return St;
|
return St;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDNode *ARM64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
|
||||||
|
unsigned Opc) {
|
||||||
|
SDLoc dl(N);
|
||||||
|
EVT VT = N->getOperand(2)->getValueType(0);
|
||||||
|
bool Narrow = VT.getSizeInBits() == 64;
|
||||||
|
|
||||||
|
// Form a REG_SEQUENCE to force register allocation.
|
||||||
|
SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
|
||||||
|
|
||||||
|
if (Narrow)
|
||||||
|
std::transform(Regs.begin(), Regs.end(), Regs.begin(),
|
||||||
|
WidenVector(*CurDAG));
|
||||||
|
|
||||||
|
SDValue RegSeq = createQTuple(Regs);
|
||||||
|
|
||||||
|
SmallVector<EVT, 2> ResTys;
|
||||||
|
ResTys.push_back(MVT::i64); // Type of the write back register
|
||||||
|
ResTys.push_back(MVT::Other);
|
||||||
|
|
||||||
|
unsigned LaneNo =
|
||||||
|
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
|
||||||
|
|
||||||
|
SmallVector<SDValue, 6> Ops;
|
||||||
|
Ops.push_back(RegSeq);
|
||||||
|
Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
|
||||||
|
Ops.push_back(N->getOperand(NumVecs + 2)); // Base Register
|
||||||
|
Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
|
||||||
|
Ops.push_back(N->getOperand(0));
|
||||||
|
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
|
||||||
|
|
||||||
|
// Transfer memoperands.
|
||||||
|
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
|
||||||
|
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
|
||||||
|
cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
|
||||||
|
|
||||||
|
return St;
|
||||||
|
}
|
||||||
|
|
||||||
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
|
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
|
||||||
unsigned &Opc, SDValue &Opd0,
|
unsigned &Opc, SDValue &Opd0,
|
||||||
unsigned &LSB, unsigned &MSB,
|
unsigned &LSB, unsigned &MSB,
|
||||||
|
@ -2441,6 +2543,378 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case ARM64ISD::LD2post: {
|
||||||
|
if (VT == MVT::v8i8)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD2Twov8b_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v16i8)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD2Twov16b_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v4i16)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD2Twov4h_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v8i16)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD2Twov8h_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD2Twov2s_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD2Twov4s_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD1Twov1d_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD2Twov2d_POST, ARM64::qsub0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::LD3post: {
|
||||||
|
if (VT == MVT::v8i8)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD3Threev8b_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v16i8)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD3Threev16b_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v4i16)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD3Threev4h_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v8i16)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD3Threev8h_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD3Threev2s_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD3Threev4s_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD1Threev1d_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD3Threev2d_POST, ARM64::qsub0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::LD4post: {
|
||||||
|
if (VT == MVT::v8i8)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD4Fourv8b_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v16i8)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD4Fourv16b_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v4i16)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD4Fourv4h_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v8i16)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD4Fourv8h_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD4Fourv2s_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD4Fourv4s_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD1Fourv1d_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD4Fourv2d_POST, ARM64::qsub0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::LD1x2post: {
|
||||||
|
if (VT == MVT::v8i8)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD1Twov8b_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v16i8)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD1Twov16b_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v4i16)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD1Twov4h_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v8i16)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD1Twov8h_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD1Twov2s_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD1Twov4s_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD1Twov1d_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD1Twov2d_POST, ARM64::qsub0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::LD1x3post: {
|
||||||
|
if (VT == MVT::v8i8)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD1Threev8b_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v16i8)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD1Threev16b_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v4i16)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD1Threev4h_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v8i16)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD1Threev8h_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD1Threev2s_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD1Threev4s_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD1Threev1d_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD1Threev2d_POST, ARM64::qsub0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::LD1x4post: {
|
||||||
|
if (VT == MVT::v8i8)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD1Fourv8b_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v16i8)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD1Fourv16b_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v4i16)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD1Fourv4h_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v8i16)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD1Fourv8h_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD1Fourv2s_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD1Fourv4s_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD1Fourv1d_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD1Fourv2d_POST, ARM64::qsub0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::LD2DUPpost: {
|
||||||
|
if (VT == MVT::v8i8)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD2Rv8b_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v16i8)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD2Rv16b_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v4i16)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD2Rv4h_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v8i16)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD2Rv8h_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD2Rv2s_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD2Rv4s_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD2Rv1d_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
|
||||||
|
return SelectPostLoad(Node, 2, ARM64::LD2Rv2d_POST, ARM64::qsub0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::LD3DUPpost: {
|
||||||
|
if (VT == MVT::v8i8)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD3Rv8b_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v16i8)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD3Rv16b_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v4i16)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD3Rv4h_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v8i16)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD3Rv8h_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD3Rv2s_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD3Rv4s_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD3Rv1d_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
|
||||||
|
return SelectPostLoad(Node, 3, ARM64::LD3Rv2d_POST, ARM64::qsub0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::LD4DUPpost: {
|
||||||
|
if (VT == MVT::v8i8)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD4Rv8b_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v16i8)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD4Rv16b_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v4i16)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD4Rv4h_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v8i16)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD4Rv8h_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD4Rv2s_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD4Rv4s_POST, ARM64::qsub0);
|
||||||
|
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD4Rv1d_POST, ARM64::dsub0);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
|
||||||
|
return SelectPostLoad(Node, 4, ARM64::LD4Rv2d_POST, ARM64::qsub0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::LD2LANEpost: {
|
||||||
|
if (VT == MVT::v16i8 || VT == MVT::v8i8)
|
||||||
|
return SelectPostLoadLane(Node, 2, ARM64::LD2i8_POST);
|
||||||
|
else if (VT == MVT::v8i16 || VT == MVT::v4i16)
|
||||||
|
return SelectPostLoadLane(Node, 2, ARM64::LD2i16_POST);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
|
||||||
|
VT == MVT::v2f32)
|
||||||
|
return SelectPostLoadLane(Node, 2, ARM64::LD2i32_POST);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
|
||||||
|
VT == MVT::v1f64)
|
||||||
|
return SelectPostLoadLane(Node, 2, ARM64::LD2i64_POST);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::LD3LANEpost: {
|
||||||
|
if (VT == MVT::v16i8 || VT == MVT::v8i8)
|
||||||
|
return SelectPostLoadLane(Node, 3, ARM64::LD3i8_POST);
|
||||||
|
else if (VT == MVT::v8i16 || VT == MVT::v4i16)
|
||||||
|
return SelectPostLoadLane(Node, 3, ARM64::LD3i16_POST);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
|
||||||
|
VT == MVT::v2f32)
|
||||||
|
return SelectPostLoadLane(Node, 3, ARM64::LD3i32_POST);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
|
||||||
|
VT == MVT::v1f64)
|
||||||
|
return SelectPostLoadLane(Node, 3, ARM64::LD3i64_POST);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::LD4LANEpost: {
|
||||||
|
if (VT == MVT::v16i8 || VT == MVT::v8i8)
|
||||||
|
return SelectPostLoadLane(Node, 4, ARM64::LD4i8_POST);
|
||||||
|
else if (VT == MVT::v8i16 || VT == MVT::v4i16)
|
||||||
|
return SelectPostLoadLane(Node, 4, ARM64::LD4i16_POST);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
|
||||||
|
VT == MVT::v2f32)
|
||||||
|
return SelectPostLoadLane(Node, 4, ARM64::LD4i32_POST);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
|
||||||
|
VT == MVT::v1f64)
|
||||||
|
return SelectPostLoadLane(Node, 4, ARM64::LD4i64_POST);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::ST2post: {
|
||||||
|
VT = Node->getOperand(1).getValueType();
|
||||||
|
if (VT == MVT::v8i8)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST2Twov8b_POST);
|
||||||
|
else if (VT == MVT::v16i8)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST2Twov16b_POST);
|
||||||
|
else if (VT == MVT::v4i16)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST2Twov4h_POST);
|
||||||
|
else if (VT == MVT::v8i16)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST2Twov8h_POST);
|
||||||
|
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST2Twov2s_POST);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST2Twov4s_POST);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST2Twov2d_POST);
|
||||||
|
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST1Twov1d_POST);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::ST3post: {
|
||||||
|
VT = Node->getOperand(1).getValueType();
|
||||||
|
if (VT == MVT::v8i8)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST3Threev8b_POST);
|
||||||
|
else if (VT == MVT::v16i8)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST3Threev16b_POST);
|
||||||
|
else if (VT == MVT::v4i16)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST3Threev4h_POST);
|
||||||
|
else if (VT == MVT::v8i16)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST3Threev8h_POST);
|
||||||
|
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST3Threev2s_POST);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST3Threev4s_POST);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST3Threev2d_POST);
|
||||||
|
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST1Threev1d_POST);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::ST4post: {
|
||||||
|
VT = Node->getOperand(1).getValueType();
|
||||||
|
if (VT == MVT::v8i8)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST4Fourv8b_POST);
|
||||||
|
else if (VT == MVT::v16i8)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST4Fourv16b_POST);
|
||||||
|
else if (VT == MVT::v4i16)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST4Fourv4h_POST);
|
||||||
|
else if (VT == MVT::v8i16)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST4Fourv8h_POST);
|
||||||
|
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST4Fourv2s_POST);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST4Fourv4s_POST);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST4Fourv2d_POST);
|
||||||
|
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST1Fourv1d_POST);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::ST1x2post: {
|
||||||
|
VT = Node->getOperand(1).getValueType();
|
||||||
|
if (VT == MVT::v8i8)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST1Twov8b_POST);
|
||||||
|
else if (VT == MVT::v16i8)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST1Twov16b_POST);
|
||||||
|
else if (VT == MVT::v4i16)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST1Twov4h_POST);
|
||||||
|
else if (VT == MVT::v8i16)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST1Twov8h_POST);
|
||||||
|
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST1Twov2s_POST);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST1Twov4s_POST);
|
||||||
|
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST1Twov1d_POST);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
|
||||||
|
return SelectPostStore(Node, 2, ARM64::ST1Twov2d_POST);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::ST1x3post: {
|
||||||
|
VT = Node->getOperand(1).getValueType();
|
||||||
|
if (VT == MVT::v8i8)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST1Threev8b_POST);
|
||||||
|
else if (VT == MVT::v16i8)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST1Threev16b_POST);
|
||||||
|
else if (VT == MVT::v4i16)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST1Threev4h_POST);
|
||||||
|
else if (VT == MVT::v8i16)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST1Threev8h_POST);
|
||||||
|
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST1Threev2s_POST);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST1Threev4s_POST);
|
||||||
|
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST1Threev1d_POST);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
|
||||||
|
return SelectPostStore(Node, 3, ARM64::ST1Threev2d_POST);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::ST1x4post: {
|
||||||
|
VT = Node->getOperand(1).getValueType();
|
||||||
|
if (VT == MVT::v8i8)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST1Fourv8b_POST);
|
||||||
|
else if (VT == MVT::v16i8)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST1Fourv16b_POST);
|
||||||
|
else if (VT == MVT::v4i16)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST1Fourv4h_POST);
|
||||||
|
else if (VT == MVT::v8i16)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST1Fourv8h_POST);
|
||||||
|
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST1Fourv2s_POST);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v4f32)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST1Fourv4s_POST);
|
||||||
|
else if (VT == MVT::v1i64 || VT == MVT::v1f64)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST1Fourv1d_POST);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v2f64)
|
||||||
|
return SelectPostStore(Node, 4, ARM64::ST1Fourv2d_POST);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::ST2LANEpost: {
|
||||||
|
VT = Node->getOperand(1).getValueType();
|
||||||
|
if (VT == MVT::v16i8 || VT == MVT::v8i8)
|
||||||
|
return SelectPostStoreLane(Node, 2, ARM64::ST2i8_POST);
|
||||||
|
else if (VT == MVT::v8i16 || VT == MVT::v4i16)
|
||||||
|
return SelectPostStoreLane(Node, 2, ARM64::ST2i16_POST);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
|
||||||
|
VT == MVT::v2f32)
|
||||||
|
return SelectPostStoreLane(Node, 2, ARM64::ST2i32_POST);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
|
||||||
|
VT == MVT::v1f64)
|
||||||
|
return SelectPostStoreLane(Node, 2, ARM64::ST2i64_POST);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::ST3LANEpost: {
|
||||||
|
VT = Node->getOperand(1).getValueType();
|
||||||
|
if (VT == MVT::v16i8 || VT == MVT::v8i8)
|
||||||
|
return SelectPostStoreLane(Node, 3, ARM64::ST3i8_POST);
|
||||||
|
else if (VT == MVT::v8i16 || VT == MVT::v4i16)
|
||||||
|
return SelectPostStoreLane(Node, 3, ARM64::ST3i16_POST);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
|
||||||
|
VT == MVT::v2f32)
|
||||||
|
return SelectPostStoreLane(Node, 3, ARM64::ST3i32_POST);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
|
||||||
|
VT == MVT::v1f64)
|
||||||
|
return SelectPostStoreLane(Node, 3, ARM64::ST3i64_POST);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ARM64ISD::ST4LANEpost: {
|
||||||
|
VT = Node->getOperand(1).getValueType();
|
||||||
|
if (VT == MVT::v16i8 || VT == MVT::v8i8)
|
||||||
|
return SelectPostStoreLane(Node, 4, ARM64::ST4i8_POST);
|
||||||
|
else if (VT == MVT::v8i16 || VT == MVT::v4i16)
|
||||||
|
return SelectPostStoreLane(Node, 4, ARM64::ST4i16_POST);
|
||||||
|
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
|
||||||
|
VT == MVT::v2f32)
|
||||||
|
return SelectPostStoreLane(Node, 4, ARM64::ST4i32_POST);
|
||||||
|
else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
|
||||||
|
VT == MVT::v1f64)
|
||||||
|
return SelectPostStoreLane(Node, 4, ARM64::ST4i64_POST);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case ISD::FCEIL:
|
case ISD::FCEIL:
|
||||||
case ISD::FFLOOR:
|
case ISD::FFLOOR:
|
||||||
|
|
|
@ -369,6 +369,9 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
|
||||||
setTargetDAGCombine(ISD::SELECT);
|
setTargetDAGCombine(ISD::SELECT);
|
||||||
setTargetDAGCombine(ISD::VSELECT);
|
setTargetDAGCombine(ISD::VSELECT);
|
||||||
|
|
||||||
|
setTargetDAGCombine(ISD::INTRINSIC_VOID);
|
||||||
|
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
|
||||||
|
|
||||||
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
|
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
|
||||||
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
|
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
|
||||||
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
|
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
|
||||||
|
@ -729,6 +732,27 @@ const char *ARM64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||||
case ARM64ISD::URSHR_I: return "ARM64ISD::URSHR_I";
|
case ARM64ISD::URSHR_I: return "ARM64ISD::URSHR_I";
|
||||||
case ARM64ISD::SQSHLU_I: return "ARM64ISD::SQSHLU_I";
|
case ARM64ISD::SQSHLU_I: return "ARM64ISD::SQSHLU_I";
|
||||||
case ARM64ISD::WrapperLarge: return "ARM64ISD::WrapperLarge";
|
case ARM64ISD::WrapperLarge: return "ARM64ISD::WrapperLarge";
|
||||||
|
case ARM64ISD::LD2post: return "ARM64ISD::LD2post";
|
||||||
|
case ARM64ISD::LD3post: return "ARM64ISD::LD3post";
|
||||||
|
case ARM64ISD::LD4post: return "ARM64ISD::LD4post";
|
||||||
|
case ARM64ISD::ST2post: return "ARM64ISD::ST2post";
|
||||||
|
case ARM64ISD::ST3post: return "ARM64ISD::ST3post";
|
||||||
|
case ARM64ISD::ST4post: return "ARM64ISD::ST4post";
|
||||||
|
case ARM64ISD::LD1x2post: return "ARM64ISD::LD1x2post";
|
||||||
|
case ARM64ISD::LD1x3post: return "ARM64ISD::LD1x3post";
|
||||||
|
case ARM64ISD::LD1x4post: return "ARM64ISD::LD1x4post";
|
||||||
|
case ARM64ISD::ST1x2post: return "ARM64ISD::ST1x2post";
|
||||||
|
case ARM64ISD::ST1x3post: return "ARM64ISD::ST1x3post";
|
||||||
|
case ARM64ISD::ST1x4post: return "ARM64ISD::ST1x4post";
|
||||||
|
case ARM64ISD::LD2DUPpost: return "ARM64ISD::LD2DUPpost";
|
||||||
|
case ARM64ISD::LD3DUPpost: return "ARM64ISD::LD3DUPpost";
|
||||||
|
case ARM64ISD::LD4DUPpost: return "ARM64ISD::LD4DUPpost";
|
||||||
|
case ARM64ISD::LD2LANEpost: return "ARM64ISD::LD2LANEpost";
|
||||||
|
case ARM64ISD::LD3LANEpost: return "ARM64ISD::LD3LANEpost";
|
||||||
|
case ARM64ISD::LD4LANEpost: return "ARM64ISD::LD4LANEpost";
|
||||||
|
case ARM64ISD::ST2LANEpost: return "ARM64ISD::ST2LANEpost";
|
||||||
|
case ARM64ISD::ST3LANEpost: return "ARM64ISD::ST3LANEpost";
|
||||||
|
case ARM64ISD::ST4LANEpost: return "ARM64ISD::ST4LANEpost";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5683,6 +5707,9 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||||
case Intrinsic::arm64_neon_ld2:
|
case Intrinsic::arm64_neon_ld2:
|
||||||
case Intrinsic::arm64_neon_ld3:
|
case Intrinsic::arm64_neon_ld3:
|
||||||
case Intrinsic::arm64_neon_ld4:
|
case Intrinsic::arm64_neon_ld4:
|
||||||
|
case Intrinsic::arm64_neon_ld1x2:
|
||||||
|
case Intrinsic::arm64_neon_ld1x3:
|
||||||
|
case Intrinsic::arm64_neon_ld1x4:
|
||||||
case Intrinsic::arm64_neon_ld2lane:
|
case Intrinsic::arm64_neon_ld2lane:
|
||||||
case Intrinsic::arm64_neon_ld3lane:
|
case Intrinsic::arm64_neon_ld3lane:
|
||||||
case Intrinsic::arm64_neon_ld4lane:
|
case Intrinsic::arm64_neon_ld4lane:
|
||||||
|
@ -5704,6 +5731,9 @@ bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||||
case Intrinsic::arm64_neon_st2:
|
case Intrinsic::arm64_neon_st2:
|
||||||
case Intrinsic::arm64_neon_st3:
|
case Intrinsic::arm64_neon_st3:
|
||||||
case Intrinsic::arm64_neon_st4:
|
case Intrinsic::arm64_neon_st4:
|
||||||
|
case Intrinsic::arm64_neon_st1x2:
|
||||||
|
case Intrinsic::arm64_neon_st1x3:
|
||||||
|
case Intrinsic::arm64_neon_st1x4:
|
||||||
case Intrinsic::arm64_neon_st2lane:
|
case Intrinsic::arm64_neon_st2lane:
|
||||||
case Intrinsic::arm64_neon_st3lane:
|
case Intrinsic::arm64_neon_st3lane:
|
||||||
case Intrinsic::arm64_neon_st4lane: {
|
case Intrinsic::arm64_neon_st4lane: {
|
||||||
|
@ -7038,6 +7068,138 @@ static SDValue performSTORECombine(SDNode *N,
|
||||||
S->getAlignment());
|
S->getAlignment());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Target-specific DAG combine function for NEON load/store intrinsics
|
||||||
|
/// to merge base address updates.
|
||||||
|
static SDValue performNEONPostLDSTCombine(SDNode *N,
|
||||||
|
TargetLowering::DAGCombinerInfo &DCI,
|
||||||
|
SelectionDAG &DAG) {
|
||||||
|
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
unsigned AddrOpIdx = N->getNumOperands() - 1;
|
||||||
|
SDValue Addr = N->getOperand(AddrOpIdx);
|
||||||
|
|
||||||
|
// Search for a use of the address operand that is an increment.
|
||||||
|
for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
|
||||||
|
UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
|
||||||
|
SDNode *User = *UI;
|
||||||
|
if (User->getOpcode() != ISD::ADD ||
|
||||||
|
UI.getUse().getResNo() != Addr.getResNo())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Check that the add is independent of the load/store. Otherwise, folding
|
||||||
|
// it would create a cycle.
|
||||||
|
if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Find the new opcode for the updating load/store.
|
||||||
|
bool IsStore = false;
|
||||||
|
bool IsLaneOp = false;
|
||||||
|
bool IsDupOp = false;
|
||||||
|
unsigned NewOpc = 0;
|
||||||
|
unsigned NumVecs = 0;
|
||||||
|
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
|
||||||
|
switch (IntNo) {
|
||||||
|
default: llvm_unreachable("unexpected intrinsic for Neon base update");
|
||||||
|
case Intrinsic::arm64_neon_ld2: NewOpc = ARM64ISD::LD2post;
|
||||||
|
NumVecs = 2; break;
|
||||||
|
case Intrinsic::arm64_neon_ld3: NewOpc = ARM64ISD::LD3post;
|
||||||
|
NumVecs = 3; break;
|
||||||
|
case Intrinsic::arm64_neon_ld4: NewOpc = ARM64ISD::LD4post;
|
||||||
|
NumVecs = 4; break;
|
||||||
|
case Intrinsic::arm64_neon_st2: NewOpc = ARM64ISD::ST2post;
|
||||||
|
NumVecs = 2; IsStore = true; break;
|
||||||
|
case Intrinsic::arm64_neon_st3: NewOpc = ARM64ISD::ST3post;
|
||||||
|
NumVecs = 3; IsStore = true; break;
|
||||||
|
case Intrinsic::arm64_neon_st4: NewOpc = ARM64ISD::ST4post;
|
||||||
|
NumVecs = 4; IsStore = true; break;
|
||||||
|
case Intrinsic::arm64_neon_ld1x2: NewOpc = ARM64ISD::LD1x2post;
|
||||||
|
NumVecs = 2; break;
|
||||||
|
case Intrinsic::arm64_neon_ld1x3: NewOpc = ARM64ISD::LD1x3post;
|
||||||
|
NumVecs = 3; break;
|
||||||
|
case Intrinsic::arm64_neon_ld1x4: NewOpc = ARM64ISD::LD1x4post;
|
||||||
|
NumVecs = 4; break;
|
||||||
|
case Intrinsic::arm64_neon_st1x2: NewOpc = ARM64ISD::ST1x2post;
|
||||||
|
NumVecs = 2; IsStore = true; break;
|
||||||
|
case Intrinsic::arm64_neon_st1x3: NewOpc = ARM64ISD::ST1x3post;
|
||||||
|
NumVecs = 3; IsStore = true; break;
|
||||||
|
case Intrinsic::arm64_neon_st1x4: NewOpc = ARM64ISD::ST1x4post;
|
||||||
|
NumVecs = 4; IsStore = true; break;
|
||||||
|
case Intrinsic::arm64_neon_ld2r: NewOpc = ARM64ISD::LD2DUPpost;
|
||||||
|
NumVecs = 2; IsDupOp = true; break;
|
||||||
|
case Intrinsic::arm64_neon_ld3r: NewOpc = ARM64ISD::LD3DUPpost;
|
||||||
|
NumVecs = 3; IsDupOp = true; break;
|
||||||
|
case Intrinsic::arm64_neon_ld4r: NewOpc = ARM64ISD::LD4DUPpost;
|
||||||
|
NumVecs = 4; IsDupOp = true; break;
|
||||||
|
case Intrinsic::arm64_neon_ld2lane: NewOpc = ARM64ISD::LD2LANEpost;
|
||||||
|
NumVecs = 2; IsLaneOp = true; break;
|
||||||
|
case Intrinsic::arm64_neon_ld3lane: NewOpc = ARM64ISD::LD3LANEpost;
|
||||||
|
NumVecs = 3; IsLaneOp = true; break;
|
||||||
|
case Intrinsic::arm64_neon_ld4lane: NewOpc = ARM64ISD::LD4LANEpost;
|
||||||
|
NumVecs = 4; IsLaneOp = true; break;
|
||||||
|
case Intrinsic::arm64_neon_st2lane: NewOpc = ARM64ISD::ST2LANEpost;
|
||||||
|
NumVecs = 2; IsStore = true; IsLaneOp = true; break;
|
||||||
|
case Intrinsic::arm64_neon_st3lane: NewOpc = ARM64ISD::ST3LANEpost;
|
||||||
|
NumVecs = 3; IsStore = true; IsLaneOp = true; break;
|
||||||
|
case Intrinsic::arm64_neon_st4lane: NewOpc = ARM64ISD::ST4LANEpost;
|
||||||
|
NumVecs = 4; IsStore = true; IsLaneOp = true; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
EVT VecTy;
|
||||||
|
if (IsStore)
|
||||||
|
VecTy = N->getOperand(2).getValueType();
|
||||||
|
else
|
||||||
|
VecTy = N->getValueType(0);
|
||||||
|
|
||||||
|
// If the increment is a constant, it must match the memory ref size.
|
||||||
|
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
|
||||||
|
if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
|
||||||
|
uint32_t IncVal = CInc->getZExtValue();
|
||||||
|
unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
|
||||||
|
if (IsLaneOp || IsDupOp)
|
||||||
|
NumBytes /= VecTy.getVectorNumElements();
|
||||||
|
if (IncVal != NumBytes)
|
||||||
|
continue;
|
||||||
|
Inc = DAG.getRegister(ARM64::XZR, MVT::i64);
|
||||||
|
}
|
||||||
|
SmallVector<SDValue, 8> Ops;
|
||||||
|
Ops.push_back(N->getOperand(0)); // Incoming chain
|
||||||
|
// Load lane and store have vector list as input.
|
||||||
|
if (IsLaneOp || IsStore)
|
||||||
|
for (unsigned i = 2; i < AddrOpIdx; ++i)
|
||||||
|
Ops.push_back(N->getOperand(i));
|
||||||
|
Ops.push_back(N->getOperand(AddrOpIdx)); // Base register
|
||||||
|
Ops.push_back(Inc);
|
||||||
|
|
||||||
|
// Return Types.
|
||||||
|
EVT Tys[6];
|
||||||
|
unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
|
||||||
|
unsigned n;
|
||||||
|
for (n = 0; n < NumResultVecs; ++n)
|
||||||
|
Tys[n] = VecTy;
|
||||||
|
Tys[n++] = MVT::i64; // Type of write back register
|
||||||
|
Tys[n] = MVT::Other; // Type of the chain
|
||||||
|
SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs + 2));
|
||||||
|
|
||||||
|
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
|
||||||
|
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
|
||||||
|
MemInt->getMemoryVT(),
|
||||||
|
MemInt->getMemOperand());
|
||||||
|
|
||||||
|
// Update the uses.
|
||||||
|
std::vector<SDValue> NewResults;
|
||||||
|
for (unsigned i = 0; i < NumResultVecs; ++i) {
|
||||||
|
NewResults.push_back(SDValue(UpdN.getNode(), i));
|
||||||
|
}
|
||||||
|
NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
|
||||||
|
DCI.CombineTo(N, NewResults);
|
||||||
|
DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
// Optimize compare with zero and branch.
|
// Optimize compare with zero and branch.
|
||||||
static SDValue performBRCONDCombine(SDNode *N,
|
static SDValue performBRCONDCombine(SDNode *N,
|
||||||
TargetLowering::DAGCombinerInfo &DCI,
|
TargetLowering::DAGCombinerInfo &DCI,
|
||||||
|
@ -7196,6 +7358,34 @@ SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N,
|
||||||
return performSTORECombine(N, DCI, DAG, Subtarget);
|
return performSTORECombine(N, DCI, DAG, Subtarget);
|
||||||
case ARM64ISD::BRCOND:
|
case ARM64ISD::BRCOND:
|
||||||
return performBRCONDCombine(N, DCI, DAG);
|
return performBRCONDCombine(N, DCI, DAG);
|
||||||
|
case ISD::INTRINSIC_VOID:
|
||||||
|
case ISD::INTRINSIC_W_CHAIN:
|
||||||
|
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
|
||||||
|
case Intrinsic::arm64_neon_ld2:
|
||||||
|
case Intrinsic::arm64_neon_ld3:
|
||||||
|
case Intrinsic::arm64_neon_ld4:
|
||||||
|
case Intrinsic::arm64_neon_ld1x2:
|
||||||
|
case Intrinsic::arm64_neon_ld1x3:
|
||||||
|
case Intrinsic::arm64_neon_ld1x4:
|
||||||
|
case Intrinsic::arm64_neon_ld2lane:
|
||||||
|
case Intrinsic::arm64_neon_ld3lane:
|
||||||
|
case Intrinsic::arm64_neon_ld4lane:
|
||||||
|
case Intrinsic::arm64_neon_ld2r:
|
||||||
|
case Intrinsic::arm64_neon_ld3r:
|
||||||
|
case Intrinsic::arm64_neon_ld4r:
|
||||||
|
case Intrinsic::arm64_neon_st2:
|
||||||
|
case Intrinsic::arm64_neon_st3:
|
||||||
|
case Intrinsic::arm64_neon_st4:
|
||||||
|
case Intrinsic::arm64_neon_st1x2:
|
||||||
|
case Intrinsic::arm64_neon_st1x3:
|
||||||
|
case Intrinsic::arm64_neon_st1x4:
|
||||||
|
case Intrinsic::arm64_neon_st2lane:
|
||||||
|
case Intrinsic::arm64_neon_st3lane:
|
||||||
|
case Intrinsic::arm64_neon_st4lane:
|
||||||
|
return performNEONPostLDSTCombine(N, DCI, DAG);
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
|
@ -160,7 +160,30 @@ enum {
|
||||||
|
|
||||||
// {s|u}int to FP within a FP register.
|
// {s|u}int to FP within a FP register.
|
||||||
SITOF,
|
SITOF,
|
||||||
UITOF
|
UITOF,
|
||||||
|
|
||||||
|
// NEON Load/Store with post-increment base updates
|
||||||
|
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||||||
|
LD3post,
|
||||||
|
LD4post,
|
||||||
|
ST2post,
|
||||||
|
ST3post,
|
||||||
|
ST4post,
|
||||||
|
LD1x2post,
|
||||||
|
LD1x3post,
|
||||||
|
LD1x4post,
|
||||||
|
ST1x2post,
|
||||||
|
ST1x3post,
|
||||||
|
ST1x4post,
|
||||||
|
LD2DUPpost,
|
||||||
|
LD3DUPpost,
|
||||||
|
LD4DUPpost,
|
||||||
|
LD2LANEpost,
|
||||||
|
LD3LANEpost,
|
||||||
|
LD4LANEpost,
|
||||||
|
ST2LANEpost,
|
||||||
|
ST3LANEpost,
|
||||||
|
ST4LANEpost
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace ARM64ISD
|
} // end namespace ARM64ISD
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue