forked from OSchip/llvm-project
Use pseudo instructions for VST1 and VST2.
llvm-svn: 112357
This commit is contained in:
parent
13ee795c42
commit
950882be07
|
@ -119,8 +119,9 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
|
|||
}
|
||||
|
||||
MIB.addReg(D0, getKillRegState(SrcIsKill))
|
||||
.addReg(D1, getKillRegState(SrcIsKill))
|
||||
.addReg(D2, getKillRegState(SrcIsKill));
|
||||
.addReg(D1, getKillRegState(SrcIsKill));
|
||||
if (NumRegs > 2)
|
||||
MIB.addReg(D2, getKillRegState(SrcIsKill));
|
||||
if (NumRegs > 3)
|
||||
MIB.addReg(D3, getKillRegState(SrcIsKill));
|
||||
MIB = AddDefaultPred(MIB);
|
||||
|
@ -224,6 +225,48 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
|||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
case ARM::VST1q8Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
|
||||
case ARM::VST1q16Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break;
|
||||
case ARM::VST1q32Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break;
|
||||
case ARM::VST1q64Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break;
|
||||
case ARM::VST1q8Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break;
|
||||
case ARM::VST1q16Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break;
|
||||
case ARM::VST1q32Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break;
|
||||
case ARM::VST1q64Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break;
|
||||
|
||||
case ARM::VST2d8Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break;
|
||||
case ARM::VST2d16Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break;
|
||||
case ARM::VST2d32Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break;
|
||||
case ARM::VST2q8Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break;
|
||||
case ARM::VST2q16Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break;
|
||||
case ARM::VST2q32Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break;
|
||||
case ARM::VST2d8Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break;
|
||||
case ARM::VST2d16Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break;
|
||||
case ARM::VST2d32Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break;
|
||||
case ARM::VST2q8Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VST2q16Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VST2q32Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break;
|
||||
|
||||
case ARM::VST3d8Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break;
|
||||
case ARM::VST3d16Pseudo:
|
||||
|
|
|
@ -1256,16 +1256,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
|||
SDValue Pred = getAL(CurDAG);
|
||||
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
|
||||
|
||||
SmallVector<SDValue, 10> Ops;
|
||||
SmallVector<SDValue, 7> Ops;
|
||||
Ops.push_back(MemAddr);
|
||||
Ops.push_back(Align);
|
||||
|
||||
// FIXME: This is a temporary flag to distinguish VSTs that have been
|
||||
// converted to pseudo instructions.
|
||||
bool usePseudoInstrs = (NumVecs >= 3);
|
||||
|
||||
if (is64BitVector) {
|
||||
if (NumVecs >= 2) {
|
||||
if (NumVecs == 1) {
|
||||
Ops.push_back(N->getOperand(3));
|
||||
} else {
|
||||
SDValue RegSeq;
|
||||
SDValue V0 = N->getOperand(0+3);
|
||||
SDValue V1 = N->getOperand(1+3);
|
||||
|
@ -1282,124 +1280,61 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
|||
: N->getOperand(3+3);
|
||||
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
|
||||
}
|
||||
if (usePseudoInstrs)
|
||||
Ops.push_back(RegSeq);
|
||||
else {
|
||||
|
||||
// Now extract the D registers back out.
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
|
||||
RegSeq));
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
|
||||
RegSeq));
|
||||
if (NumVecs > 2)
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
|
||||
RegSeq));
|
||||
if (NumVecs > 3)
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
|
||||
RegSeq));
|
||||
}
|
||||
} else {
|
||||
Ops.push_back(N->getOperand(3));
|
||||
Ops.push_back(RegSeq);
|
||||
}
|
||||
Ops.push_back(Pred);
|
||||
Ops.push_back(Reg0); // predicate register
|
||||
Ops.push_back(Chain);
|
||||
unsigned Opc = DOpcodes[OpcodeIndex];
|
||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
|
||||
usePseudoInstrs ? 6 : NumVecs+5);
|
||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
|
||||
}
|
||||
|
||||
EVT RegVT = GetNEONSubregVT(VT);
|
||||
if (NumVecs <= 2) {
|
||||
// Quad registers are directly supported for VST1 and VST2,
|
||||
// storing pairs of D regs.
|
||||
// Quad registers are directly supported for VST1 and VST2.
|
||||
unsigned Opc = QOpcodes0[OpcodeIndex];
|
||||
if (NumVecs == 2) {
|
||||
// First extract the pair of Q registers.
|
||||
if (NumVecs == 1) {
|
||||
Ops.push_back(N->getOperand(3));
|
||||
} else {
|
||||
// Form a QQ register.
|
||||
SDValue Q0 = N->getOperand(3);
|
||||
SDValue Q1 = N->getOperand(4);
|
||||
|
||||
// Form a QQ register.
|
||||
SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
|
||||
|
||||
// Now extract the D registers back out.
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
|
||||
QQ));
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
|
||||
QQ));
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT,
|
||||
QQ));
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT,
|
||||
QQ));
|
||||
Ops.push_back(Pred);
|
||||
Ops.push_back(Reg0); // predicate register
|
||||
Ops.push_back(Chain);
|
||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4);
|
||||
} else {
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
|
||||
N->getOperand(Vec+3)));
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
|
||||
N->getOperand(Vec+3)));
|
||||
}
|
||||
Ops.push_back(Pred);
|
||||
Ops.push_back(Reg0); // predicate register
|
||||
Ops.push_back(Chain);
|
||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
|
||||
5 + 2 * NumVecs);
|
||||
Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0));
|
||||
}
|
||||
Ops.push_back(Pred);
|
||||
Ops.push_back(Reg0); // predicate register
|
||||
Ops.push_back(Chain);
|
||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
|
||||
}
|
||||
|
||||
// Otherwise, quad registers are stored with two separate instructions,
|
||||
// where one stores the even registers and the other stores the odd registers.
|
||||
|
||||
// Form the QQQQ REG_SEQUENCE.
|
||||
SDValue V[8];
|
||||
for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
|
||||
V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
|
||||
N->getOperand(Vec+3));
|
||||
V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
|
||||
N->getOperand(Vec+3));
|
||||
}
|
||||
if (NumVecs == 3)
|
||||
V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
|
||||
dl, RegVT), 0);
|
||||
|
||||
SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
|
||||
V[4], V[5], V[6], V[7]), 0);
|
||||
SDValue V0 = N->getOperand(0+3);
|
||||
SDValue V1 = N->getOperand(1+3);
|
||||
SDValue V2 = N->getOperand(2+3);
|
||||
SDValue V3 = (NumVecs == 3)
|
||||
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
|
||||
: N->getOperand(3+3);
|
||||
SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
|
||||
|
||||
// Store the even D registers.
|
||||
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
|
||||
Ops.push_back(Reg0); // post-access address offset
|
||||
if (usePseudoInstrs)
|
||||
Ops.push_back(RegSeq);
|
||||
else
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
|
||||
RegVT, RegSeq));
|
||||
Ops.push_back(RegSeq);
|
||||
Ops.push_back(Pred);
|
||||
Ops.push_back(Reg0); // predicate register
|
||||
Ops.push_back(Chain);
|
||||
unsigned Opc = QOpcodes0[OpcodeIndex];
|
||||
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
|
||||
MVT::Other, Ops.data(),
|
||||
usePseudoInstrs ? 7 : NumVecs+6);
|
||||
MVT::Other, Ops.data(), 7);
|
||||
Chain = SDValue(VStA, 1);
|
||||
|
||||
// Store the odd D registers.
|
||||
Ops[0] = SDValue(VStA, 0); // MemAddr
|
||||
if (usePseudoInstrs)
|
||||
Ops[6] = Chain;
|
||||
else {
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
|
||||
Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
|
||||
RegVT, RegSeq);
|
||||
Ops[NumVecs+5] = Chain;
|
||||
}
|
||||
Ops[6] = Chain;
|
||||
Opc = QOpcodes1[OpcodeIndex];
|
||||
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
|
||||
MVT::Other, Ops.data(),
|
||||
usePseudoInstrs ? 7 : NumVecs+6);
|
||||
MVT::Other, Ops.data(), 7);
|
||||
Chain = SDValue(VStB, 1);
|
||||
ReplaceUses(SDValue(N, 0), Chain);
|
||||
return NULL;
|
||||
|
@ -2267,15 +2202,16 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
|||
case Intrinsic::arm_neon_vst1: {
|
||||
unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
|
||||
ARM::VST1d32, ARM::VST1d64 };
|
||||
unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
|
||||
ARM::VST1q32, ARM::VST1q64 };
|
||||
unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
|
||||
ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
|
||||
return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst2: {
|
||||
unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
|
||||
ARM::VST2d32, ARM::VST1q64 };
|
||||
unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
|
||||
unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
|
||||
ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
|
||||
unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
|
||||
ARM::VST2q32Pseudo };
|
||||
return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -490,6 +490,12 @@ let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
|
|||
|
||||
// Classes for VST* pseudo-instructions with multi-register operands.
|
||||
// These are expanded to real instructions after register allocation.
|
||||
class VSTQPseudo
|
||||
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "">;
|
||||
class VSTQWBPseudo
|
||||
: PseudoNLdSt<(outs GPR:$wb),
|
||||
(ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
|
||||
"$addr.addr = $wb">;
|
||||
class VSTQQPseudo
|
||||
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">;
|
||||
class VSTQQWBPseudo
|
||||
|
@ -520,6 +526,11 @@ def VST1q16 : VST1Q<0b0100, "16">;
|
|||
def VST1q32 : VST1Q<0b1000, "32">;
|
||||
def VST1q64 : VST1Q<0b1100, "64">;
|
||||
|
||||
def VST1q8Pseudo : VSTQPseudo;
|
||||
def VST1q16Pseudo : VSTQPseudo;
|
||||
def VST1q32Pseudo : VSTQPseudo;
|
||||
def VST1q64Pseudo : VSTQPseudo;
|
||||
|
||||
// ...with address register writeback:
|
||||
class VST1DWB<bits<4> op7_4, string Dt>
|
||||
: NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
|
||||
|
@ -540,6 +551,11 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">;
|
|||
def VST1q32_UPD : VST1QWB<0b1000, "32">;
|
||||
def VST1q64_UPD : VST1QWB<0b1100, "64">;
|
||||
|
||||
def VST1q8Pseudo_UPD : VSTQWBPseudo;
|
||||
def VST1q16Pseudo_UPD : VSTQWBPseudo;
|
||||
def VST1q32Pseudo_UPD : VSTQWBPseudo;
|
||||
def VST1q64Pseudo_UPD : VSTQWBPseudo;
|
||||
|
||||
// ...with 3 registers (some of these are only for the disassembler):
|
||||
class VST1D3<bits<4> op7_4, string Dt>
|
||||
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
|
||||
|
@ -610,6 +626,14 @@ def VST2q8 : VST2Q<0b0000, "8">;
|
|||
def VST2q16 : VST2Q<0b0100, "16">;
|
||||
def VST2q32 : VST2Q<0b1000, "32">;
|
||||
|
||||
def VST2d8Pseudo : VSTQPseudo;
|
||||
def VST2d16Pseudo : VSTQPseudo;
|
||||
def VST2d32Pseudo : VSTQPseudo;
|
||||
|
||||
def VST2q8Pseudo : VSTQQPseudo;
|
||||
def VST2q16Pseudo : VSTQQPseudo;
|
||||
def VST2q32Pseudo : VSTQQPseudo;
|
||||
|
||||
// ...with address register writeback:
|
||||
class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
|
||||
|
@ -631,6 +655,14 @@ def VST2q8_UPD : VST2QWB<0b0000, "8">;
|
|||
def VST2q16_UPD : VST2QWB<0b0100, "16">;
|
||||
def VST2q32_UPD : VST2QWB<0b1000, "32">;
|
||||
|
||||
def VST2d8Pseudo_UPD : VSTQWBPseudo;
|
||||
def VST2d16Pseudo_UPD : VSTQWBPseudo;
|
||||
def VST2d32Pseudo_UPD : VSTQWBPseudo;
|
||||
|
||||
def VST2q8Pseudo_UPD : VSTQQWBPseudo;
|
||||
def VST2q16Pseudo_UPD : VSTQQWBPseudo;
|
||||
def VST2q32Pseudo_UPD : VSTQQWBPseudo;
|
||||
|
||||
// ...with double-spaced registers (for disassembly only):
|
||||
def VST2b8 : VST2D<0b1001, 0b0000, "8">;
|
||||
def VST2b16 : VST2D<0b1001, 0b0100, "16">;
|
||||
|
|
|
@ -178,13 +178,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
|||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST1q8:
|
||||
case ARM::VST1q16:
|
||||
case ARM::VST1q32:
|
||||
case ARM::VST1q64:
|
||||
case ARM::VST2d8:
|
||||
case ARM::VST2d16:
|
||||
case ARM::VST2d32:
|
||||
case ARM::VST2LNd8:
|
||||
case ARM::VST2LNd16:
|
||||
case ARM::VST2LNd32:
|
||||
|
@ -192,13 +185,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
|||
NumRegs = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST2q8:
|
||||
case ARM::VST2q16:
|
||||
case ARM::VST2q32:
|
||||
FirstOpnd = 2;
|
||||
NumRegs = 4;
|
||||
return true;
|
||||
|
||||
case ARM::VST2LNq16:
|
||||
case ARM::VST2LNq32:
|
||||
FirstOpnd = 2;
|
||||
|
|
Loading…
Reference in New Issue