ARM refactor away a bunch of VLD/VST pseudo instructions.

With the new composite physical registers to represent arbitrary pairs
of DPR registers, we don't need the pseudo-registers anymore. Get rid of
a bunch of them that use DPR register pairs and just use the real
instructions directly instead.

llvm-svn: 152045
This commit is contained in:
Jim Grosbach 2012-03-05 19:33:30 +00:00
parent efb4f8ff00
commit c988e0c521
13 changed files with 241 additions and 247 deletions

View File

@ -252,7 +252,7 @@ public:
/// Reg so its sub-register of index SubIdx is Reg.
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
const MCRegisterClass *RC) const {
for (const unsigned *SRs = getSuperRegisters(Reg); unsigned SR = *SRs;++SRs)
for (const uint16_t *SRs = getSuperRegisters(Reg); unsigned SR = *SRs;++SRs)
if (Reg == getSubReg(SR, SubIdx) && RC->contains(SR))
return SR;
return 0;

View File

@ -760,7 +760,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (ARM::QPRRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
.addFrameIndex(FI).addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO));
@ -845,7 +845,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return MI->getOperand(0).getReg();
}
break;
case ARM::VST1q64Pseudo:
case ARM::VST1q64:
if (MI->getOperand(0).isFI() &&
MI->getOperand(2).getSubReg() == 0) {
FrameIndex = MI->getOperand(0).getIndex();
@ -909,7 +909,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case 16:
if (ARM::QPRRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
.addFrameIndex(FI).addImm(16)
.addMemOperand(MMO));
} else {
@ -989,7 +989,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
return MI->getOperand(0).getReg();
}
break;
case ARM::VLD1q64Pseudo:
case ARM::VLD1q64:
if (MI->getOperand(1).isFI() &&
MI->getOperand(0).getSubReg() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
@ -2694,33 +2694,33 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
if (DefAlign < 8 && Subtarget.isCortexA9())
switch (DefMCID.getOpcode()) {
default: break;
case ARM::VLD1q8Pseudo:
case ARM::VLD1q16Pseudo:
case ARM::VLD1q32Pseudo:
case ARM::VLD1q64Pseudo:
case ARM::VLD1q8PseudoWB_register:
case ARM::VLD1q16PseudoWB_register:
case ARM::VLD1q32PseudoWB_register:
case ARM::VLD1q64PseudoWB_register:
case ARM::VLD1q8PseudoWB_fixed:
case ARM::VLD1q16PseudoWB_fixed:
case ARM::VLD1q32PseudoWB_fixed:
case ARM::VLD1q64PseudoWB_fixed:
case ARM::VLD2d8Pseudo:
case ARM::VLD2d16Pseudo:
case ARM::VLD2d32Pseudo:
case ARM::VLD1q8:
case ARM::VLD1q16:
case ARM::VLD1q32:
case ARM::VLD1q64:
case ARM::VLD1q8wb_register:
case ARM::VLD1q16wb_register:
case ARM::VLD1q32wb_register:
case ARM::VLD1q64wb_register:
case ARM::VLD1q8wb_fixed:
case ARM::VLD1q16wb_fixed:
case ARM::VLD1q32wb_fixed:
case ARM::VLD1q64wb_fixed:
case ARM::VLD2d8:
case ARM::VLD2d16:
case ARM::VLD2d32:
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
case ARM::VLD2d8PseudoWB_fixed:
case ARM::VLD2d16PseudoWB_fixed:
case ARM::VLD2d32PseudoWB_fixed:
case ARM::VLD2d8wb_fixed:
case ARM::VLD2d16wb_fixed:
case ARM::VLD2d32wb_fixed:
case ARM::VLD2q8PseudoWB_fixed:
case ARM::VLD2q16PseudoWB_fixed:
case ARM::VLD2q32PseudoWB_fixed:
case ARM::VLD2d8PseudoWB_register:
case ARM::VLD2d16PseudoWB_register:
case ARM::VLD2d32PseudoWB_register:
case ARM::VLD2d8wb_register:
case ARM::VLD2d16wb_register:
case ARM::VLD2d32wb_register:
case ARM::VLD2q8PseudoWB_register:
case ARM::VLD2q16PseudoWB_register:
case ARM::VLD2q32PseudoWB_register:

View File

@ -148,18 +148,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, false, SingleSpc, 2, 4 ,false},
{ ARM::VLD1q16PseudoWB_fixed, ARM::VLD1q16wb_fixed,true,false,false,SingleSpc, 2, 4 ,false},
{ ARM::VLD1q16PseudoWB_register, ARM::VLD1q16wb_register, true, true, true, SingleSpc, 2, 4 ,false},
{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, false, SingleSpc, 2, 2 ,false},
{ ARM::VLD1q32PseudoWB_fixed, ARM::VLD1q32wb_fixed,true,false, false,SingleSpc, 2, 2 ,false},
{ ARM::VLD1q32PseudoWB_register, ARM::VLD1q32wb_register, true, true, true, SingleSpc, 2, 2 ,false},
{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, false, SingleSpc, 2, 1 ,false},
{ ARM::VLD1q64PseudoWB_fixed, ARM::VLD1q64wb_fixed,true,false, false,SingleSpc, 2, 2 ,false},
{ ARM::VLD1q64PseudoWB_register, ARM::VLD1q64wb_register, true, true, true, SingleSpc, 2, 1 ,false},
{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, false, SingleSpc, 2, 8 ,false},
{ ARM::VLD1q8PseudoWB_fixed, ARM::VLD1q8wb_fixed,true,false, false, SingleSpc, 2, 8 ,false},
{ ARM::VLD1q8PseudoWB_register, ARM::VLD1q8wb_register,true,true, true,SingleSpc,2,8,false},
{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, false, SingleSpc, 2, 4,false},
{ ARM::VLD2DUPd16PseudoWB_fixed, ARM::VLD2DUPd16wb_fixed, true, true, false, SingleSpc, 2, 4,false},
@ -182,16 +170,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true},
{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true},
{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, false, SingleSpc, 2, 4 ,false},
{ ARM::VLD2d16PseudoWB_fixed, ARM::VLD2d16wb_fixed, true, true, false, SingleSpc, 2, 4 ,false},
{ ARM::VLD2d16PseudoWB_register, ARM::VLD2d16wb_register, true, true, true, SingleSpc, 2, 4 ,false},
{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, false, SingleSpc, 2, 2 ,false},
{ ARM::VLD2d32PseudoWB_fixed, ARM::VLD2d32wb_fixed, true, true, false, SingleSpc, 2, 2 ,false},
{ ARM::VLD2d32PseudoWB_register, ARM::VLD2d32wb_register, true, true, true, SingleSpc, 2, 2 ,false},
{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, false, SingleSpc, 2, 8 ,false},
{ ARM::VLD2d8PseudoWB_fixed, ARM::VLD2d8wb_fixed, true, true, false, SingleSpc, 2, 8 ,false},
{ ARM::VLD2d8PseudoWB_register, ARM::VLD2d8wb_register, true, true, true, SingleSpc, 2, 8 ,false},
{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false},
{ ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
{ ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false},
@ -286,19 +264,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false},
{ ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false},
{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,false},
{ ARM::VST1q16PseudoWB_fixed, ARM::VST1q16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false},
{ ARM::VST1q16PseudoWB_register, ARM::VST1q16wb_register, false, true, true, SingleSpc, 2, 4 ,false},
{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, false, SingleSpc, 2, 2 ,false},
{ ARM::VST1q32PseudoWB_fixed, ARM::VST1q32wb_fixed, false, true, false, SingleSpc, 2, 2 ,false},
{ ARM::VST1q32PseudoWB_register, ARM::VST1q32wb_register, false, true, true, SingleSpc, 2, 2 ,false},
{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, false, SingleSpc, 2, 1 ,false},
{ ARM::VST1q64PseudoWB_fixed, ARM::VST1q64wb_fixed, false, true, false, SingleSpc, 2, 1 ,false},
{ ARM::VST1q64PseudoWB_register, ARM::VST1q64wb_register, false, true, true, SingleSpc, 2, 1 ,false},
{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, false, SingleSpc, 2, 8 ,false},
{ ARM::VST1q8PseudoWB_fixed, ARM::VST1q8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false},
{ ARM::VST1q8PseudoWB_register, ARM::VST1q8wb_register, false, true, true, SingleSpc, 2, 8 ,false},
{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true},
{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true},
{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true},
@ -310,16 +275,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true},
{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true},
{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,false},
{ ARM::VST2d16PseudoWB_fixed, ARM::VST2d16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false},
{ ARM::VST2d16PseudoWB_register, ARM::VST2d16wb_register, false, true, true, SingleSpc, 2, 4 ,false},
{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,false},
{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, false, SingleSpc, 2, 2 ,false},
{ ARM::VST2d32PseudoWB_register, ARM::VST2d32wb_register, false, true, true, SingleSpc, 2, 2 ,false},
{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,false},
{ ARM::VST2d8PseudoWB_fixed, ARM::VST2d8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false},
{ ARM::VST2d8PseudoWB_register, ARM::VST2d8wb_register, false, true, true, SingleSpc, 2, 8 ,false},
{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false},
{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false},
@ -1094,33 +1049,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
return true;
}
case ARM::VLD1q8Pseudo:
case ARM::VLD1q16Pseudo:
case ARM::VLD1q32Pseudo:
case ARM::VLD1q64Pseudo:
case ARM::VLD1q8PseudoWB_register:
case ARM::VLD1q16PseudoWB_register:
case ARM::VLD1q32PseudoWB_register:
case ARM::VLD1q64PseudoWB_register:
case ARM::VLD1q8PseudoWB_fixed:
case ARM::VLD1q16PseudoWB_fixed:
case ARM::VLD1q32PseudoWB_fixed:
case ARM::VLD1q64PseudoWB_fixed:
case ARM::VLD2d8Pseudo:
case ARM::VLD2d16Pseudo:
case ARM::VLD2d32Pseudo:
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
case ARM::VLD2d8PseudoWB_fixed:
case ARM::VLD2d16PseudoWB_fixed:
case ARM::VLD2d32PseudoWB_fixed:
case ARM::VLD2q8PseudoWB_fixed:
case ARM::VLD2q16PseudoWB_fixed:
case ARM::VLD2q32PseudoWB_fixed:
case ARM::VLD2d8PseudoWB_register:
case ARM::VLD2d16PseudoWB_register:
case ARM::VLD2d32PseudoWB_register:
case ARM::VLD2q8PseudoWB_register:
case ARM::VLD2q16PseudoWB_register:
case ARM::VLD2q32PseudoWB_register:
@ -1189,33 +1123,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandVLD(MBBI);
return true;
case ARM::VST1q8Pseudo:
case ARM::VST1q16Pseudo:
case ARM::VST1q32Pseudo:
case ARM::VST1q64Pseudo:
case ARM::VST1q8PseudoWB_fixed:
case ARM::VST1q16PseudoWB_fixed:
case ARM::VST1q32PseudoWB_fixed:
case ARM::VST1q64PseudoWB_fixed:
case ARM::VST1q8PseudoWB_register:
case ARM::VST1q16PseudoWB_register:
case ARM::VST1q32PseudoWB_register:
case ARM::VST1q64PseudoWB_register:
case ARM::VST2d8Pseudo:
case ARM::VST2d16Pseudo:
case ARM::VST2d32Pseudo:
case ARM::VST2q8Pseudo:
case ARM::VST2q16Pseudo:
case ARM::VST2q32Pseudo:
case ARM::VST2d8PseudoWB_fixed:
case ARM::VST2d16PseudoWB_fixed:
case ARM::VST2d32PseudoWB_fixed:
case ARM::VST2q8PseudoWB_fixed:
case ARM::VST2q16PseudoWB_fixed:
case ARM::VST2q32PseudoWB_fixed:
case ARM::VST2d8PseudoWB_register:
case ARM::VST2d16PseudoWB_register:
case ARM::VST2d32PseudoWB_register:
case ARM::VST2q8PseudoWB_register:
case ARM::VST2q16PseudoWB_register:
case ARM::VST2q32PseudoWB_register:
@ -1333,10 +1246,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandLaneOp(MBBI);
return true;
case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false); return true;
case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true); return true;
case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
}

View File

@ -830,8 +830,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
ARM::QPRRegisterClass);
MBB.addLiveIn(SupReg);
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
.addReg(ARM::R4).addImm(16).addReg(NextReg)
.addReg(SupReg, RegState::ImplicitKill));
.addReg(ARM::R4).addImm(16).addReg(SupReg));
NextReg += 2;
NumAlignedDPRCS2Regs -= 2;
}
@ -944,9 +943,8 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
if (NumAlignedDPRCS2Regs >= 2) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
ARM::QPRRegisterClass);
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), NextReg)
.addReg(ARM::R4).addImm(16)
.addReg(SupReg, RegState::ImplicitDefine));
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
.addReg(ARM::R4).addImm(16));
NextReg += 2;
NumAlignedDPRCS2Regs -= 2;
}

View File

@ -1563,10 +1563,6 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
case ARM::VLD1q8PseudoWB_fixed: return ARM::VLD1q8PseudoWB_register;
case ARM::VLD1q16PseudoWB_fixed: return ARM::VLD1q16PseudoWB_register;
case ARM::VLD1q32PseudoWB_fixed: return ARM::VLD1q32PseudoWB_register;
case ARM::VLD1q64PseudoWB_fixed: return ARM::VLD1q64PseudoWB_register;
case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
@ -1576,23 +1572,19 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
case ARM::VST1q8PseudoWB_fixed: return ARM::VST1q8PseudoWB_register;
case ARM::VST1q16PseudoWB_fixed: return ARM::VST1q16PseudoWB_register;
case ARM::VST1q32PseudoWB_fixed: return ARM::VST1q32PseudoWB_register;
case ARM::VST1q64PseudoWB_fixed: return ARM::VST1q64PseudoWB_register;
case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
case ARM::VLD2d8PseudoWB_fixed: return ARM::VLD2d8PseudoWB_register;
case ARM::VLD2d16PseudoWB_fixed: return ARM::VLD2d16PseudoWB_register;
case ARM::VLD2d32PseudoWB_fixed: return ARM::VLD2d32PseudoWB_register;
case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register;
case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register;
case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register;
case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
@ -1673,7 +1665,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64PseudoWB_fixed) ||
if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) ||
!isa<ConstantSDNode>(Inc.getNode()))
Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
}
@ -1823,7 +1815,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
if ((NumVecs > 2 && Opc != ARM::VST1q64PseudoWB_fixed) ||
if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) ||
!isa<ConstantSDNode>(Inc.getNode()))
Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
}
@ -2938,18 +2930,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ARMISD::VLD1_UPD: {
unsigned DOpcodes[] = { ARM::VLD1d8wb_fixed, ARM::VLD1d16wb_fixed,
ARM::VLD1d32wb_fixed, ARM::VLD1d64wb_fixed };
unsigned QOpcodes[] = { ARM::VLD1q8PseudoWB_fixed,
ARM::VLD1q16PseudoWB_fixed,
ARM::VLD1q32PseudoWB_fixed,
ARM::VLD1q64PseudoWB_fixed };
unsigned QOpcodes[] = { ARM::VLD1q8wb_fixed,
ARM::VLD1q16wb_fixed,
ARM::VLD1q32wb_fixed,
ARM::VLD1q64wb_fixed };
return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
}
case ARMISD::VLD2_UPD: {
unsigned DOpcodes[] = { ARM::VLD2d8PseudoWB_fixed,
ARM::VLD2d16PseudoWB_fixed,
ARM::VLD2d32PseudoWB_fixed,
ARM::VLD1q64PseudoWB_fixed};
unsigned DOpcodes[] = { ARM::VLD2d8wb_fixed,
ARM::VLD2d16wb_fixed,
ARM::VLD2d32wb_fixed,
ARM::VLD1q64wb_fixed};
unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
ARM::VLD2q16PseudoWB_fixed,
ARM::VLD2q32PseudoWB_fixed };
@ -2958,7 +2950,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ARMISD::VLD3_UPD: {
unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed};
ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64wb_fixed};
unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
ARM::VLD3q16Pseudo_UPD,
ARM::VLD3q32Pseudo_UPD };
@ -2970,7 +2962,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ARMISD::VLD4_UPD: {
unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed};
ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64wb_fixed};
unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
ARM::VLD4q16Pseudo_UPD,
ARM::VLD4q32Pseudo_UPD };
@ -3007,18 +2999,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ARMISD::VST1_UPD: {
unsigned DOpcodes[] = { ARM::VST1d8wb_fixed, ARM::VST1d16wb_fixed,
ARM::VST1d32wb_fixed, ARM::VST1d64wb_fixed };
unsigned QOpcodes[] = { ARM::VST1q8PseudoWB_fixed,
ARM::VST1q16PseudoWB_fixed,
ARM::VST1q32PseudoWB_fixed,
ARM::VST1q64PseudoWB_fixed };
unsigned QOpcodes[] = { ARM::VST1q8wb_fixed,
ARM::VST1q16wb_fixed,
ARM::VST1q32wb_fixed,
ARM::VST1q64wb_fixed };
return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
}
case ARMISD::VST2_UPD: {
unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed,
ARM::VST2d16PseudoWB_fixed,
ARM::VST2d32PseudoWB_fixed,
ARM::VST1q64PseudoWB_fixed};
unsigned DOpcodes[] = { ARM::VST2d8wb_fixed,
ARM::VST2d16wb_fixed,
ARM::VST2d32wb_fixed,
ARM::VST1q64wb_fixed};
unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
ARM::VST2q16PseudoWB_fixed,
ARM::VST2q32PseudoWB_fixed };
@ -3188,14 +3180,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vld1: {
unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
ARM::VLD1d32, ARM::VLD1d64 };
unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
ARM::VLD1q32, ARM::VLD1q64};
return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld2: {
unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
ARM::VLD2d32, ARM::VLD1q64 };
unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
ARM::VLD2q32Pseudo };
return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
@ -3249,14 +3241,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vst1: {
unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
ARM::VST1d32, ARM::VST1d64 };
unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
ARM::VST1q32, ARM::VST1q64 };
return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
ARM::VST2d32, ARM::VST1q64 };
unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
ARM::VST2q32Pseudo };
return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
@ -3317,14 +3309,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
break;
case Intrinsic::arm_neon_vtbl2:
return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo);
return SelectVTBL(N, false, 2, ARM::VTBL2);
case Intrinsic::arm_neon_vtbl3:
return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
case Intrinsic::arm_neon_vtbl4:
return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
case Intrinsic::arm_neon_vtbx2:
return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo);
return SelectVTBL(N, true, 2, ARM::VTBX2);
case Intrinsic::arm_neon_vtbx3:
return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
case Intrinsic::arm_neon_vtbx4:
@ -3358,7 +3350,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
Ops.push_back(N->getOperand(2));
Ops.push_back(getAL(CurDAG)); // Predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT,
return CurDAG->getMachineNode(ARM::VTBL2, dl, VT,
Ops.data(), Ops.size());
}

View File

@ -97,6 +97,15 @@ def VecListTwoDAsmOperand : AsmOperandClass {
def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> {
let ParserMatchClass = VecListTwoDAsmOperand;
}
// FIXME: Replace all VecListTwoD with VecListDPair
def VecListDPairAsmOperand : AsmOperandClass {
let Name = "VecListDPair";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
def VecListDPair : RegisterOperand<DPair, "printVectorListDPair"> {
let ParserMatchClass = VecListDPairAsmOperand;
}
// Register list of three sequential D registers.
def VecListThreeDAsmOperand : AsmOperandClass {
let Name = "VecListThreeD";
@ -593,7 +602,7 @@ class VLD1D<bits<4> op7_4, string Dt>
let DecoderMethod = "DecodeVLDInstruction";
}
class VLD1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd),
: NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
(ins addrmode6:$Rn), IIC_VLD1x2,
"vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
@ -611,11 +620,6 @@ def VLD1q16 : VLD1Q<{0,1,?,?}, "16">;
def VLD1q32 : VLD1Q<{1,0,?,?}, "32">;
def VLD1q64 : VLD1Q<{1,1,?,?}, "64">;
def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>;
def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>;
def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>;
def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>;
// ...with address register writeback:
multiclass VLD1DWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
@ -637,7 +641,7 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> {
}
}
multiclass VLD1QWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb),
def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
(ins addrmode6:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
@ -646,7 +650,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDInstruction";
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb),
def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
@ -665,15 +669,6 @@ defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">;
defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">;
defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">;
def VLD1q8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>;
def VLD1q16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>;
def VLD1q32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>;
def VLD1q64PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>;
def VLD1q8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>;
def VLD1q16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>;
def VLD1q32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>;
def VLD1q64PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>;
// ...with 3 registers
class VLD1D3<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
@ -767,18 +762,14 @@ class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
let DecoderMethod = "DecodeVLDInstruction";
}
def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>;
def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>;
def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>;
def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>;
def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>;
def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>;
def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>;
def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>;
def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>;
def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>;
def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>;
def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
@ -805,21 +796,14 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
}
}
defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>;
defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>;
defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>;
defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>;
defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>;
defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>;
defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>;
def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>;
def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>;
def VLD2d8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
def VLD2d16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
def VLD2d32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
@ -1597,7 +1581,7 @@ class VST1D<bits<4> op7_4, string Dt>
let DecoderMethod = "DecodeVSTInstruction";
}
class VST1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListTwoD:$Vd),
: NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd),
IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
@ -1614,11 +1598,6 @@ def VST1q16 : VST1Q<{0,1,?,?}, "16">;
def VST1q32 : VST1Q<{1,0,?,?}, "32">;
def VST1q64 : VST1Q<{1,1,?,?}, "64">;
def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>;
def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>;
def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>;
def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>;
// ...with address register writeback:
multiclass VST1DWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
@ -1642,7 +1621,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> {
}
multiclass VST1QWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, VecListTwoD:$Vd), IIC_VLD1x2u,
(ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@ -1651,7 +1630,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> {
let AsmMatchConverter = "cvtVSTwbFixed";
}
def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, rGPR:$Rm, VecListTwoD:$Vd),
(ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd),
IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
@ -1671,15 +1650,6 @@ defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">;
defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">;
defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">;
def VST1q8PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>;
def VST1q16PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>;
def VST1q32PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>;
def VST1q64PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>;
def VST1q8PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>;
def VST1q16PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>;
def VST1q32PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>;
def VST1q64PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>;
// ...with 3 registers
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
@ -1779,18 +1749,14 @@ class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
let DecoderMethod = "DecodeVSTInstruction";
}
def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VST2>;
def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>;
def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>;
def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>;
def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>;
def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>;
def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>;
def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>;
def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>;
def VST2d8Pseudo : VSTQPseudo<IIC_VST2>;
def VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
def VST2d32Pseudo : VSTQPseudo<IIC_VST2>;
def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
@ -1837,21 +1803,14 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> {
}
}
defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>;
defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>;
defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>;
defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>;
defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
def VST2d8PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST2u>;
def VST2d16PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST2u>;
def VST2d32PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST2u>;
def VST2d8PseudoWB_register : VSTQWBregisterPseudo<IIC_VST2u>;
def VST2d16PseudoWB_register : VSTQWBregisterPseudo<IIC_VST2u>;
def VST2d32PseudoWB_register : VSTQWBregisterPseudo<IIC_VST2u>;
def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
@ -5444,7 +5403,7 @@ def VTBL1
let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
(ins VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
(ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
"vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
def VTBL3
: N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
@ -5457,8 +5416,6 @@ def VTBL4
"vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
} // hasExtraSrcRegAllocReq = 1
def VTBL2Pseudo
: PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>;
def VTBL3Pseudo
: PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
def VTBL4Pseudo
@ -5474,7 +5431,7 @@ def VTBX1
let hasExtraSrcRegAllocReq = 1 in {
def VTBX2
: N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
(ins DPR:$orig, VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
(ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
"vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
def VTBX3
: N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
@ -5489,9 +5446,6 @@ def VTBX4
"$orig = $Vd", []>;
} // hasExtraSrcRegAllocReq = 1
def VTBX2Pseudo
: PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src),
IIC_VTBX2, "$orig = $dst", []>;
def VTBX3Pseudo
: PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
IIC_VTBX3, "$orig = $dst", []>;

View File

@ -304,7 +304,11 @@ def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1],
// Register class representing a pair of consecutive D registers.
// Use the Q registers for the even-odd pairs.
def DPair : RegisterClass<"ARM", [v2i64], 128, (interleave QPR, TuplesOE2D)>;
def DPair : RegisterClass<"ARM", [v2i64], 128, (interleave QPR, TuplesOE2D)> {
// Allocate starting at non-VFP2 registers D16-D31 first.
let AltOrders = [(rotl DPair, 16)];
let AltOrderSelect = [{ return 1; }];
}
// Pseudo-registers representing 3 consecutive D registers.
def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2],

View File

@ -44,6 +44,7 @@ enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
class ARMAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
const MCRegisterInfo *MRI;
// Map of register aliases registers via the .req directive.
StringMap<unsigned> RegisterReqs;
@ -236,6 +237,9 @@ public:
: MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
MCAsmParserExtension::Initialize(_Parser);
// Cache the MCRegisterInfo.
MRI = &getContext().getRegisterInfo();
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@ -1086,6 +1090,12 @@ public:
return VectorList.Count == 2;
}
bool isVecListDPair() const {
if (!isSingleSpacedVectorList()) return false;
return (ARMMCRegisterClasses[ARM::DPairRegClassID]
.contains(VectorList.RegNum));
}
bool isVecListThreeD() const {
if (!isSingleSpacedVectorList()) return false;
return VectorList.Count == 3;
@ -2969,6 +2979,12 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
switch (LaneKind) {
case NoLanes:
E = Parser.getTok().getLoc();
// VLD1 wants a DPair register.
// FIXME: Make the rest of the two-reg instructions want the same
// thing.
Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
&ARMMCRegisterClasses[ARM::DPairRegClassID]);
Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
break;
case AllLanes:
@ -3138,6 +3154,14 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
switch (LaneKind) {
case NoLanes:
if (Count == 2 && Spacing == 1)
// VLD1 wants a DPair register.
// FIXME: Make the rest of the two-reg instructions want the same
// thing.
FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0,
&ARMMCRegisterClasses[ARM::DPairRegClassID]);
Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count,
(Spacing == 2), S, E));
break;

View File

@ -126,6 +126,8 @@ static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst,
const void *Decoder);
static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
@ -987,6 +989,25 @@ static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
static const unsigned DPairDecoderTable[] = {
ARM::Q0, ARM::D1_D2, ARM::Q1, ARM::D3_D4, ARM::Q2, ARM::D5_D6,
ARM::Q3, ARM::D7_D8, ARM::Q4, ARM::D9_D10, ARM::Q5, ARM::D11_D12,
ARM::Q6, ARM::D13_D14, ARM::Q7, ARM::D15_D16, ARM::Q8, ARM::D17_D18,
ARM::Q9, ARM::D19_D20, ARM::Q10, ARM::D21_D22, ARM::Q11, ARM::D23_D24,
ARM::Q12, ARM::D25_D26, ARM::Q13, ARM::D27_D28, ARM::Q14, ARM::D29_D30,
ARM::Q15
};
static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 30)
return MCDisassembler::Fail;
unsigned Register = DPairDecoderTable[RegNo];
Inst.addOperand(MCOperand::CreateReg(Register));
return MCDisassembler::Success;
}
static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
if (Val == 0xF) return MCDisassembler::Fail;
@ -1953,8 +1974,35 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
// First output register
switch (Inst.getOpcode()) {
case ARM::VLD1q16:
case ARM::VLD1q32:
case ARM::VLD1q64:
case ARM::VLD1q8:
case ARM::VLD1q16wb_fixed:
case ARM::VLD1q16wb_register:
case ARM::VLD1q32wb_fixed:
case ARM::VLD1q32wb_register:
case ARM::VLD1q64wb_fixed:
case ARM::VLD1q64wb_register:
case ARM::VLD1q8wb_fixed:
case ARM::VLD1q8wb_register:
case ARM::VLD2d16:
case ARM::VLD2d32:
case ARM::VLD2d8:
case ARM::VLD2d16wb_fixed:
case ARM::VLD2d16wb_register:
case ARM::VLD2d32wb_fixed:
case ARM::VLD2d32wb_register:
case ARM::VLD2d8wb_fixed:
case ARM::VLD2d8wb_register:
if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
break;
default:
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
}
// Second output register
switch (Inst.getOpcode()) {
@ -2285,8 +2333,35 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// First input register
switch (Inst.getOpcode()) {
case ARM::VST1q16:
case ARM::VST1q32:
case ARM::VST1q64:
case ARM::VST1q8:
case ARM::VST1q16wb_fixed:
case ARM::VST1q16wb_register:
case ARM::VST1q32wb_fixed:
case ARM::VST1q32wb_register:
case ARM::VST1q64wb_fixed:
case ARM::VST1q64wb_register:
case ARM::VST1q8wb_fixed:
case ARM::VST1q8wb_register:
case ARM::VST2d16:
case ARM::VST2d32:
case ARM::VST2d8:
case ARM::VST2d16wb_fixed:
case ARM::VST2d16wb_register:
case ARM::VST2d32wb_fixed:
case ARM::VST2d32wb_register:
case ARM::VST2d8wb_fixed:
case ARM::VST2d8wb_register:
if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
break;
default:
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
}
// Second input register
switch (Inst.getOpcode()) {
@ -2652,8 +2727,16 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail; // Writeback
}
switch (Inst.getOpcode()) {
case ARM::VTBL2:
case ARM::VTBX2:
if (!Check(S, DecodeDPairRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
break;
default:
if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
}
if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;

View File

@ -18,6 +18,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@ -1033,6 +1034,14 @@ void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
<< getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "}";
}
void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
}
void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
// Normally, it's not safe to use register enum values directly with

View File

@ -134,6 +134,7 @@ public:
void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListDPair(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,

View File

@ -185,6 +185,23 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) {
case S29: case D29: return 29;
case S30: case D30: return 30;
case S31: case D31: return 31;
// Composite registers use the regnum of the first register in the list.
case D1_D2: return 1;
case D3_D5: return 3;
case D5_D7: return 5;
case D7_D9: return 7;
case D9_D10: return 9;
case D11_D12: return 11;
case D13_D14: return 13;
case D15_D16: return 15;
case D17_D18: return 17;
case D19_D20: return 19;
case D21_D22: return 21;
case D23_D24: return 23;
case D25_D26: return 25;
case D27_D28: return 27;
case D29_D30: return 29;
}
}

View File

@ -575,6 +575,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
REG("QQQQPR");
REG("VecListOneD");
REG("VecListTwoD");
REG("VecListDPair");
REG("VecListThreeD");
REG("VecListFourD");
REG("VecListTwoQ");