Use pseudo instructions for VST1 and VST2.

llvm-svn: 112357
This commit is contained in:
Bob Wilson 2010-08-28 05:12:57 +00:00
parent 13ee795c42
commit 950882be07
4 changed files with 110 additions and 113 deletions

View File

@ -119,8 +119,9 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
}
MIB.addReg(D0, getKillRegState(SrcIsKill))
.addReg(D1, getKillRegState(SrcIsKill))
.addReg(D2, getKillRegState(SrcIsKill));
.addReg(D1, getKillRegState(SrcIsKill));
if (NumRegs > 2)
MIB.addReg(D2, getKillRegState(SrcIsKill));
if (NumRegs > 3)
MIB.addReg(D3, getKillRegState(SrcIsKill));
MIB = AddDefaultPred(MIB);
@ -224,6 +225,48 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MI.eraseFromParent();
}
case ARM::VST1q8Pseudo:
ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
case ARM::VST1q16Pseudo:
ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break;
case ARM::VST1q32Pseudo:
ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break;
case ARM::VST1q64Pseudo:
ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break;
case ARM::VST1q8Pseudo_UPD:
ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break;
case ARM::VST1q16Pseudo_UPD:
ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break;
case ARM::VST1q32Pseudo_UPD:
ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break;
case ARM::VST1q64Pseudo_UPD:
ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break;
case ARM::VST2d8Pseudo:
ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break;
case ARM::VST2d16Pseudo:
ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break;
case ARM::VST2d32Pseudo:
ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break;
case ARM::VST2q8Pseudo:
ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break;
case ARM::VST2q16Pseudo:
ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break;
case ARM::VST2q32Pseudo:
ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break;
case ARM::VST2d8Pseudo_UPD:
ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break;
case ARM::VST2d16Pseudo_UPD:
ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break;
case ARM::VST2d32Pseudo_UPD:
ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break;
case ARM::VST2q8Pseudo_UPD:
ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break;
case ARM::VST2q16Pseudo_UPD:
ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break;
case ARM::VST2q32Pseudo_UPD:
ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break;
case ARM::VST3d8Pseudo:
ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break;
case ARM::VST3d16Pseudo:

View File

@ -1256,16 +1256,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SmallVector<SDValue, 10> Ops;
SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
Ops.push_back(Align);
// FIXME: This is a temporary flag to distinguish VSTs that have been
// converted to pseudo instructions.
bool usePseudoInstrs = (NumVecs >= 3);
if (is64BitVector) {
if (NumVecs >= 2) {
if (NumVecs == 1) {
Ops.push_back(N->getOperand(3));
} else {
SDValue RegSeq;
SDValue V0 = N->getOperand(0+3);
SDValue V1 = N->getOperand(1+3);
@ -1282,124 +1280,61 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
: N->getOperand(3+3);
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
if (usePseudoInstrs)
Ops.push_back(RegSeq);
else {
// Now extract the D registers back out.
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
RegSeq));
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
RegSeq));
if (NumVecs > 2)
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
RegSeq));
if (NumVecs > 3)
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
RegSeq));
}
} else {
Ops.push_back(N->getOperand(3));
Ops.push_back(RegSeq);
}
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = DOpcodes[OpcodeIndex];
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
usePseudoInstrs ? 6 : NumVecs+5);
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
EVT RegVT = GetNEONSubregVT(VT);
if (NumVecs <= 2) {
// Quad registers are directly supported for VST1 and VST2,
// storing pairs of D regs.
// Quad registers are directly supported for VST1 and VST2.
unsigned Opc = QOpcodes0[OpcodeIndex];
if (NumVecs == 2) {
// First extract the pair of Q registers.
if (NumVecs == 1) {
Ops.push_back(N->getOperand(3));
} else {
// Form a QQ register.
SDValue Q0 = N->getOperand(3);
SDValue Q1 = N->getOperand(4);
// Form a QQ register.
SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
// Now extract the D registers back out.
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
QQ));
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
QQ));
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT,
QQ));
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT,
QQ));
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4);
} else {
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
N->getOperand(Vec+3)));
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
N->getOperand(Vec+3)));
}
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
5 + 2 * NumVecs);
Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0));
}
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
// Form the QQQQ REG_SEQUENCE.
SDValue V[8];
for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
N->getOperand(Vec+3));
V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
N->getOperand(Vec+3));
}
if (NumVecs == 3)
V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, RegVT), 0);
SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
V[4], V[5], V[6], V[7]), 0);
SDValue V0 = N->getOperand(0+3);
SDValue V1 = N->getOperand(1+3);
SDValue V2 = N->getOperand(2+3);
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
: N->getOperand(3+3);
SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
// Store the even D registers.
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
Ops.push_back(Reg0); // post-access address offset
if (usePseudoInstrs)
Ops.push_back(RegSeq);
else
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
RegVT, RegSeq));
Ops.push_back(RegSeq);
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = QOpcodes0[OpcodeIndex];
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
MVT::Other, Ops.data(),
usePseudoInstrs ? 7 : NumVecs+6);
MVT::Other, Ops.data(), 7);
Chain = SDValue(VStA, 1);
// Store the odd D registers.
Ops[0] = SDValue(VStA, 0); // MemAddr
if (usePseudoInstrs)
Ops[6] = Chain;
else {
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
RegVT, RegSeq);
Ops[NumVecs+5] = Chain;
}
Ops[6] = Chain;
Opc = QOpcodes1[OpcodeIndex];
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
MVT::Other, Ops.data(),
usePseudoInstrs ? 7 : NumVecs+6);
MVT::Other, Ops.data(), 7);
Chain = SDValue(VStB, 1);
ReplaceUses(SDValue(N, 0), Chain);
return NULL;
@ -2267,15 +2202,16 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vst1: {
unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
ARM::VST1d32, ARM::VST1d64 };
unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
ARM::VST1q32, ARM::VST1q64 };
unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
ARM::VST2d32, ARM::VST1q64 };
unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
ARM::VST2q32Pseudo };
return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
}

View File

@ -490,6 +490,12 @@ let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
// Classes for VST* pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
class VSTQPseudo
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "">;
class VSTQWBPseudo
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
"$addr.addr = $wb">;
class VSTQQPseudo
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">;
class VSTQQWBPseudo
@ -520,6 +526,11 @@ def VST1q16 : VST1Q<0b0100, "16">;
def VST1q32 : VST1Q<0b1000, "32">;
def VST1q64 : VST1Q<0b1100, "64">;
def VST1q8Pseudo : VSTQPseudo;
def VST1q16Pseudo : VSTQPseudo;
def VST1q32Pseudo : VSTQPseudo;
def VST1q64Pseudo : VSTQPseudo;
// ...with address register writeback:
class VST1DWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
@ -540,6 +551,11 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">;
def VST1q32_UPD : VST1QWB<0b1000, "32">;
def VST1q64_UPD : VST1QWB<0b1100, "64">;
def VST1q8Pseudo_UPD : VSTQWBPseudo;
def VST1q16Pseudo_UPD : VSTQWBPseudo;
def VST1q32Pseudo_UPD : VSTQWBPseudo;
def VST1q64Pseudo_UPD : VSTQWBPseudo;
// ...with 3 registers (some of these are only for the disassembler):
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
@ -610,6 +626,14 @@ def VST2q8 : VST2Q<0b0000, "8">;
def VST2q16 : VST2Q<0b0100, "16">;
def VST2q32 : VST2Q<0b1000, "32">;
def VST2d8Pseudo : VSTQPseudo;
def VST2d16Pseudo : VSTQPseudo;
def VST2d32Pseudo : VSTQPseudo;
def VST2q8Pseudo : VSTQQPseudo;
def VST2q16Pseudo : VSTQQPseudo;
def VST2q32Pseudo : VSTQQPseudo;
// ...with address register writeback:
class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@ -631,6 +655,14 @@ def VST2q8_UPD : VST2QWB<0b0000, "8">;
def VST2q16_UPD : VST2QWB<0b0100, "16">;
def VST2q32_UPD : VST2QWB<0b1000, "32">;
def VST2d8Pseudo_UPD : VSTQWBPseudo;
def VST2d16Pseudo_UPD : VSTQWBPseudo;
def VST2d32Pseudo_UPD : VSTQWBPseudo;
def VST2q8Pseudo_UPD : VSTQQWBPseudo;
def VST2q16Pseudo_UPD : VSTQQWBPseudo;
def VST2q32Pseudo_UPD : VSTQQWBPseudo;
// ...with double-spaced registers (for disassembly only):
def VST2b8 : VST2D<0b1001, 0b0000, "8">;
def VST2b16 : VST2D<0b1001, 0b0100, "16">;

View File

@ -178,13 +178,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
case ARM::VST1q8:
case ARM::VST1q16:
case ARM::VST1q32:
case ARM::VST1q64:
case ARM::VST2d8:
case ARM::VST2d16:
case ARM::VST2d32:
case ARM::VST2LNd8:
case ARM::VST2LNd16:
case ARM::VST2LNd32:
@ -192,13 +185,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 2;
return true;
case ARM::VST2q8:
case ARM::VST2q16:
case ARM::VST2q32:
FirstOpnd = 2;
NumRegs = 4;
return true;
case ARM::VST2LNq16:
case ARM::VST2LNq32:
FirstOpnd = 2;