Add NEON VST1-lane instructions. Partial fix for Radar 8599955.

llvm-svn: 118069
This commit is contained in:
Bob Wilson 2010-11-02 21:18:25 +00:00
parent fa08e1e277
commit d80b29d6f7
5 changed files with 80 additions and 4 deletions

View File

@ -111,11 +111,11 @@ namespace {
static const NEONLdStTableEntry NEONLdStTable[] = { static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 }, { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 },
{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, false, EvenDblSpc, 1, 4 }, { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, EvenDblSpc, 1, 4 },
{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 }, { ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 },
{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, false, EvenDblSpc, 1, 2 }, { ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, EvenDblSpc, 1, 2 },
{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 }, { ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 },
{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, false, EvenDblSpc, 1, 8 }, { ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 },
{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 }, { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 },
{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 }, { ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 },
@ -206,6 +206,13 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 }, { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 },
{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 }, { ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 },
{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, EvenDblSpc, 1, 4 },
{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true, EvenDblSpc, 1, 4 },
{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, EvenDblSpc, 1, 2 },
{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true, EvenDblSpc, 1, 2 },
{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, EvenDblSpc, 1, 8 },
{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, EvenDblSpc, 1, 8 },
{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 }, { ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 },
{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 }, { ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 },
{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 }, { ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 },
@ -989,6 +996,12 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
case ARM::VLD4LNd32Pseudo_UPD: case ARM::VLD4LNd32Pseudo_UPD:
case ARM::VLD4LNq16Pseudo_UPD: case ARM::VLD4LNq16Pseudo_UPD:
case ARM::VLD4LNq32Pseudo_UPD: case ARM::VLD4LNq32Pseudo_UPD:
case ARM::VST1LNq8Pseudo:
case ARM::VST1LNq16Pseudo:
case ARM::VST1LNq32Pseudo:
case ARM::VST1LNq8Pseudo_UPD:
case ARM::VST1LNq16Pseudo_UPD:
case ARM::VST1LNq32Pseudo_UPD:
case ARM::VST2LNd8Pseudo: case ARM::VST2LNd8Pseudo:
case ARM::VST2LNd16Pseudo: case ARM::VST2LNd16Pseudo:
case ARM::VST2LNd32Pseudo: case ARM::VST2LNd32Pseudo:

View File

@ -1087,6 +1087,8 @@ def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
// Classes for VST*LN pseudo-instructions with multi-register operands. // Classes for VST*LN pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation. // These are expanded to real instructions after register allocation.
class VSTQLNPseudo<InstrItinClass itin> class VSTQLNPseudo<InstrItinClass itin>
@ -1112,7 +1114,36 @@ class VSTQQQQLNWBPseudo<InstrItinClass itin>
nohash_imm:$lane), itin, "$addr.addr = $wb">; nohash_imm:$lane), itin, "$addr.addr = $wb">;
// VST1LN : Vector Store (single element from one lane) // VST1LN : Vector Store (single element from one lane)
// FIXME: Not yet implemented. class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src, nohash_imm:$lane),
IIC_VST1ln, "vst1", Dt, "\\{$src[$lane]\\}, $addr", "", []>;
def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8">;
def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16">;
def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32">;
def VST1LNq8Pseudo : VSTQLNPseudo<IIC_VST1ln>;
def VST1LNq16Pseudo : VSTQLNPseudo<IIC_VST1ln>;
def VST1LNq32Pseudo : VSTQLNPseudo<IIC_VST1ln>;
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
// ...with address register writeback:
class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
"\\{$src[$lane]\\}, $addr$offset",
"$addr.addr = $wb", []>;
def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8">;
def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16">;
def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32">;
def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
// VST2LN : Vector Store (single 2-element structure from one lane) // VST2LN : Vector Store (single 2-element structure from one lane)
class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>

View File

@ -158,6 +158,8 @@ def IIC_VST1u : InstrItinClass;
def IIC_VST1x2u : InstrItinClass; def IIC_VST1x2u : InstrItinClass;
def IIC_VST1x3u : InstrItinClass; def IIC_VST1x3u : InstrItinClass;
def IIC_VST1x4u : InstrItinClass; def IIC_VST1x4u : InstrItinClass;
def IIC_VST1ln : InstrItinClass;
def IIC_VST1lnu : InstrItinClass;
def IIC_VST2 : InstrItinClass; def IIC_VST2 : InstrItinClass;
def IIC_VST2x2 : InstrItinClass; def IIC_VST2x2 : InstrItinClass;
def IIC_VST2u : InstrItinClass; def IIC_VST2u : InstrItinClass;

View File

@ -601,6 +601,18 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrStage<3, [A8_LSPipe]>], InstrStage<3, [A8_LSPipe]>],
[2, 1, 1, 1, 1, 1, 2, 2]>, [2, 1, 1, 1, 1, 1, 2, 2]>,
// //
// VST1ln
InstrItinData<IIC_VST1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NLSPipe], 1>,
InstrStage<2, [A8_LSPipe]>],
[1, 1, 1]>,
//
// VST1lnu
InstrItinData<IIC_VST1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NLSPipe], 1>,
InstrStage<2, [A8_LSPipe]>],
[2, 1, 1, 1, 1]>,
//
// VST2 // VST2
InstrItinData<IIC_VST2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrItinData<IIC_VST2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<2, [A8_NLSPipe], 1>, InstrStage<2, [A8_NLSPipe], 1>,

View File

@ -1005,6 +1005,24 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<3, [A9_LSUnit]>], InstrStage<3, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1, 2, 2]>, [2, 1, 1, 1, 1, 1, 2, 2]>,
// //
// VST1ln
InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
InstrStage<2, [A9_NPipe], 1>,
InstrStage<2, [A9_LSUnit]>],
[1, 1, 1]>,
//
// VST1lnu
InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
InstrStage<3, [A9_NPipe], 1>,
InstrStage<3, [A9_LSUnit]>],
[2, 1, 1, 1, 1]>,
//
// VST2 // VST2
InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_MUX0], 0>,