forked from OSchip/llvm-project
ARM NEON refactor VST2 w/ writeback instructions.
In addition to improving the representation, this adds support for assembly parsing of these instructions. llvm-svn: 146588
This commit is contained in:
parent
7234103225
commit
88ac761aa4
|
@ -308,18 +308,24 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
|
|||
{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true},
|
||||
|
||||
{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,false},
|
||||
{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, true, SingleSpc, 2, 4 ,false},
|
||||
{ ARM::VST2d16PseudoWB_fixed, ARM::VST2d16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false},
|
||||
{ ARM::VST2d16PseudoWB_register, ARM::VST2d16wb_register, false, true, true, SingleSpc, 2, 4 ,false},
|
||||
{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,false},
|
||||
{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, true, SingleSpc, 2, 2 ,false},
|
||||
{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, true, SingleSpc, 2, 2 ,false},
|
||||
{ ARM::VST2d32PseudoWB_register, ARM::VST2d32wb_register, false, true, true, SingleSpc, 2, 2 ,false},
|
||||
{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,false},
|
||||
{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, true, SingleSpc, 2, 8 ,false},
|
||||
{ ARM::VST2d8PseudoWB_fixed, ARM::VST2d8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false},
|
||||
{ ARM::VST2d8PseudoWB_register, ARM::VST2d8wb_register, false, true, true, SingleSpc, 2, 8 ,false},
|
||||
|
||||
{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false},
|
||||
{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, true, SingleSpc, 4, 4 ,false},
|
||||
{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
|
||||
{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false},
|
||||
{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false},
|
||||
{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, true, SingleSpc, 4, 2 ,false},
|
||||
{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
|
||||
{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false},
|
||||
{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false},
|
||||
{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, true, SingleSpc, 4, 8 ,false},
|
||||
{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
|
||||
{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false},
|
||||
|
||||
{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true},
|
||||
{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
|
||||
|
@ -1193,12 +1199,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
|||
case ARM::VST2q8Pseudo:
|
||||
case ARM::VST2q16Pseudo:
|
||||
case ARM::VST2q32Pseudo:
|
||||
case ARM::VST2d8Pseudo_UPD:
|
||||
case ARM::VST2d16Pseudo_UPD:
|
||||
case ARM::VST2d32Pseudo_UPD:
|
||||
case ARM::VST2q8Pseudo_UPD:
|
||||
case ARM::VST2q16Pseudo_UPD:
|
||||
case ARM::VST2q32Pseudo_UPD:
|
||||
case ARM::VST2d8PseudoWB_fixed:
|
||||
case ARM::VST2d16PseudoWB_fixed:
|
||||
case ARM::VST2d32PseudoWB_fixed:
|
||||
case ARM::VST2q8PseudoWB_fixed:
|
||||
case ARM::VST2q16PseudoWB_fixed:
|
||||
case ARM::VST2q32PseudoWB_fixed:
|
||||
case ARM::VST2d8PseudoWB_register:
|
||||
case ARM::VST2d16PseudoWB_register:
|
||||
case ARM::VST2d32PseudoWB_register:
|
||||
case ARM::VST2q8PseudoWB_register:
|
||||
case ARM::VST2q16PseudoWB_register:
|
||||
case ARM::VST2q32PseudoWB_register:
|
||||
case ARM::VST3d8Pseudo:
|
||||
case ARM::VST3d16Pseudo:
|
||||
case ARM::VST3d32Pseudo:
|
||||
|
|
|
@ -1589,6 +1589,12 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
|
|||
case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
|
||||
case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
|
||||
|
||||
case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register;
|
||||
case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register;
|
||||
case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register;
|
||||
case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
|
||||
case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
|
||||
case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
|
||||
}
|
||||
return Opc; // If not one we handle, return it unchanged.
|
||||
}
|
||||
|
@ -1806,9 +1812,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
|
|||
Ops.push_back(Align);
|
||||
if (isUpdating) {
|
||||
SDValue Inc = N->getOperand(AddrOpIdx + 1);
|
||||
// FIXME: VST1 fixed increment doesn't need Reg0. Remove the reg0
|
||||
// FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
|
||||
// case entirely when the rest are updated to that form, too.
|
||||
if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode()))
|
||||
if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
|
||||
Opc = getVLDSTRegisterUpdateOpcode(Opc);
|
||||
// We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
|
||||
// check for that explicitly too. Horribly hacky, but temporary.
|
||||
|
@ -2889,10 +2895,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
|||
}
|
||||
|
||||
case ARMISD::VST2_UPD: {
|
||||
unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
|
||||
ARM::VST2d32Pseudo_UPD, ARM::VST1q64PseudoWB_fixed};
|
||||
unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
|
||||
ARM::VST2q32Pseudo_UPD };
|
||||
unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed,
|
||||
ARM::VST2d16PseudoWB_fixed,
|
||||
ARM::VST2d32PseudoWB_fixed,
|
||||
ARM::VST1q64PseudoWB_fixed};
|
||||
unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
|
||||
ARM::VST2q16PseudoWB_fixed,
|
||||
ARM::VST2q32PseudoWB_fixed };
|
||||
return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -1523,44 +1523,90 @@ def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
|
|||
def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
|
||||
|
||||
// ...with address register writeback:
|
||||
class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
|
||||
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
|
||||
(ins addrmode6:$Rn, am6offset:$Rm, VdTy:$Vd),
|
||||
IIC_VST2u, "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> {
|
||||
let Inst{5-4} = Rn{5-4};
|
||||
let DecoderMethod = "DecodeVSTInstruction";
|
||||
//class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
|
||||
// : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
|
||||
// (ins addrmode6:$Rn, am6offset:$Rm, VdTy:$Vd),
|
||||
// IIC_VST2u, "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> {
|
||||
// let Inst{5-4} = Rn{5-4};
|
||||
// let DecoderMethod = "DecodeVSTInstruction";
|
||||
//}
|
||||
multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
|
||||
RegisterOperand VdTy> {
|
||||
def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
|
||||
(ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
|
||||
"vst2", Dt, "$Vd, $Rn!",
|
||||
"$Rn.addr = $wb", []> {
|
||||
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
|
||||
let Inst{4} = Rn{4};
|
||||
let DecoderMethod = "DecodeVSTInstruction";
|
||||
let AsmMatchConverter = "cvtVSTwbFixed";
|
||||
}
|
||||
def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
|
||||
(ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
|
||||
"vst2", Dt, "$Vd, $Rn, $Rm",
|
||||
"$Rn.addr = $wb", []> {
|
||||
let Inst{4} = Rn{4};
|
||||
let DecoderMethod = "DecodeVSTInstruction";
|
||||
let AsmMatchConverter = "cvtVSTwbRegister";
|
||||
}
|
||||
}
|
||||
class VST2QWB<bits<4> op7_4, string Dt>
|
||||
: NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
|
||||
(ins addrmode6:$Rn, am6offset:$Rm, VecListFourD:$Vd), IIC_VST2x2u,
|
||||
"vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> {
|
||||
let Inst{5-4} = Rn{5-4};
|
||||
let DecoderMethod = "DecodeVSTInstruction";
|
||||
//class VST2QWB<bits<4> op7_4, string Dt>
|
||||
// : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
|
||||
// (ins addrmode6:$Rn, am6offset:$Rm, VecListFourD:$Vd), IIC_VST2x2u,
|
||||
// "vst2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> {
|
||||
// let Inst{5-4} = Rn{5-4};
|
||||
// let DecoderMethod = "DecodeVSTInstruction";
|
||||
//}
|
||||
multiclass VST2QWB<bits<4> op7_4, string Dt> {
|
||||
def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
|
||||
(ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
|
||||
"vst2", Dt, "$Vd, $Rn!",
|
||||
"$Rn.addr = $wb", []> {
|
||||
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
|
||||
let Inst{4} = Rn{4};
|
||||
let DecoderMethod = "DecodeVSTInstruction";
|
||||
let AsmMatchConverter = "cvtVSTwbFixed";
|
||||
}
|
||||
def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
|
||||
(ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
|
||||
IIC_VLD1u,
|
||||
"vst2", Dt, "$Vd, $Rn, $Rm",
|
||||
"$Rn.addr = $wb", []> {
|
||||
let Inst{4} = Rn{4};
|
||||
let DecoderMethod = "DecodeVSTInstruction";
|
||||
let AsmMatchConverter = "cvtVSTwbRegister";
|
||||
}
|
||||
}
|
||||
|
||||
def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>;
|
||||
def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
|
||||
def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
|
||||
defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>;
|
||||
defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
|
||||
defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
|
||||
|
||||
def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">;
|
||||
def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
|
||||
def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
|
||||
defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
|
||||
defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
|
||||
defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
|
||||
|
||||
def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
|
||||
def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
|
||||
def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
|
||||
def VST2d8PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
|
||||
def VST2d16PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
|
||||
def VST2d32PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
|
||||
def VST2d8PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
|
||||
def VST2d16PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
|
||||
def VST2d32PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
|
||||
|
||||
def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
|
||||
def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
|
||||
def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
|
||||
def VST2q8PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
|
||||
def VST2q16PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
|
||||
def VST2q32PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
|
||||
def VST2q8PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
|
||||
def VST2q16PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
|
||||
def VST2q32PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
|
||||
|
||||
// ...with double-spaced registers
|
||||
def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>;
|
||||
def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>;
|
||||
def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>;
|
||||
def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
|
||||
def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
|
||||
def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
|
||||
defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
|
||||
defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
|
||||
defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
|
||||
|
||||
// VST3 : Vector Store (multiple 3-element structures)
|
||||
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
|
|
|
@ -2221,15 +2221,24 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
|
|||
case ARM::VST1d16Qwb_register:
|
||||
case ARM::VST1d32Qwb_register:
|
||||
case ARM::VST1d64Qwb_register:
|
||||
case ARM::VST2d8_UPD:
|
||||
case ARM::VST2d16_UPD:
|
||||
case ARM::VST2d32_UPD:
|
||||
case ARM::VST2q8_UPD:
|
||||
case ARM::VST2q16_UPD:
|
||||
case ARM::VST2q32_UPD:
|
||||
case ARM::VST2b8_UPD:
|
||||
case ARM::VST2b16_UPD:
|
||||
case ARM::VST2b32_UPD:
|
||||
case ARM::VST2d8wb_fixed:
|
||||
case ARM::VST2d16wb_fixed:
|
||||
case ARM::VST2d32wb_fixed:
|
||||
case ARM::VST2d8wb_register:
|
||||
case ARM::VST2d16wb_register:
|
||||
case ARM::VST2d32wb_register:
|
||||
case ARM::VST2q8wb_fixed:
|
||||
case ARM::VST2q16wb_fixed:
|
||||
case ARM::VST2q32wb_fixed:
|
||||
case ARM::VST2q8wb_register:
|
||||
case ARM::VST2q16wb_register:
|
||||
case ARM::VST2q32wb_register:
|
||||
case ARM::VST2b8wb_fixed:
|
||||
case ARM::VST2b16wb_fixed:
|
||||
case ARM::VST2b32wb_fixed:
|
||||
case ARM::VST2b8wb_register:
|
||||
case ARM::VST2b16wb_register:
|
||||
case ARM::VST2b32wb_register:
|
||||
case ARM::VST3d8_UPD:
|
||||
case ARM::VST3d16_UPD:
|
||||
case ARM::VST3d32_UPD:
|
||||
|
|
Loading…
Reference in New Issue