forked from OSchip/llvm-project
Change ARM VFP VLDM/VSTM instructions to use addressing mode #4, just like
all the other LDM/STM instructions. This fixes asm printer crashes when compiling with -O0. I've changed one of the NEON tests (vst3.ll) to run with -O0 to check this in the future. Prior to this change VLDM/VSTM used addressing mode #5, but not really. The offset field was used to hold a count of the number of registers being loaded or stored, and the AM5 opcode field was expanded to specify the IA or DB mode, instead of the standard ADD/SUB specifier. Much of the backend was not aware of these special cases. The crashes occured when rewriting a frameindex caused the AM5 offset field to be changed so that it did not have a valid submode. I don't know exactly what changed to expose this now. Maybe we've never done much with -O0 and NEON. Regardless, there's no longer any reason to keep a count of the VLDM/VSTM registers, so we can use addressing mode #4 and clean things up in a lot of places. llvm-svn: 112322
This commit is contained in:
parent
954e9557e3
commit
13ce07fa92
|
@ -458,6 +458,7 @@ namespace ARM_AM {
|
|||
// IB - Increment before
|
||||
// DA - Decrement after
|
||||
// DB - Decrement before
|
||||
// For VFP instructions, only the IA and DB modes are valid.
|
||||
|
||||
static inline AMSubMode getAM4SubMode(unsigned Mode) {
|
||||
return (AMSubMode)(Mode & 0x7);
|
||||
|
@ -477,14 +478,6 @@ namespace ARM_AM {
|
|||
//
|
||||
// The first operand is always a Reg. The second operand encodes the
|
||||
// operation in bit 8 and the immediate in bits 0-7.
|
||||
//
|
||||
// This is also used for FP load/store multiple ops. The second operand
|
||||
// encodes the number of registers (or 2 times the number of registers
|
||||
// for DPR ops) in bits 0-7. In addition, bits 8-10 encode one of the
|
||||
// following two sub-modes:
|
||||
//
|
||||
// IA - Increment after
|
||||
// DB - Decrement before
|
||||
|
||||
/// getAM5Opc - This function encodes the addrmode5 opc field.
|
||||
static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
|
||||
|
@ -498,17 +491,6 @@ namespace ARM_AM {
|
|||
return ((AM5Opc >> 8) & 1) ? sub : add;
|
||||
}
|
||||
|
||||
/// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and
|
||||
/// VSTM instructions.
|
||||
static inline unsigned getAM5Opc(AMSubMode SubMode, unsigned char Offset) {
|
||||
assert((SubMode == ia || SubMode == db) &&
|
||||
"Illegal addressing mode 5 sub-mode!");
|
||||
return ((int)SubMode << 8) | Offset;
|
||||
}
|
||||
static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
|
||||
return (AMSubMode)((AM5Opc >> 8) & 0x7);
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Addressing Mode #6
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
|
|
@ -607,16 +607,6 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
|
|||
|
||||
assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
|
||||
|
||||
if (Modifier && strcmp(Modifier, "submode") == 0) {
|
||||
ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
|
||||
O << ARM_AM::getAMSubModeStr(Mode);
|
||||
return;
|
||||
} else if (Modifier && strcmp(Modifier, "base") == 0) {
|
||||
// Used for FSTM{D|S} and LSTM{D|S} operations.
|
||||
O << getRegisterName(MO1.getReg());
|
||||
return;
|
||||
}
|
||||
|
||||
O << "[" << getRegisterName(MO1.getReg());
|
||||
|
||||
if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
|
||||
|
|
|
@ -757,7 +757,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ))
|
||||
.addReg(SrcReg, getKillRegState(isKill))
|
||||
.addFrameIndex(FI)
|
||||
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
|
||||
.addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
|
||||
.addMemOperand(MMO));
|
||||
}
|
||||
break;
|
||||
|
@ -777,7 +777,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|||
MachineInstrBuilder MIB =
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
|
||||
.addFrameIndex(FI)
|
||||
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
|
||||
.addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
|
||||
.addMemOperand(MMO);
|
||||
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
|
||||
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
|
||||
|
@ -789,7 +789,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|||
MachineInstrBuilder MIB =
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
|
||||
.addFrameIndex(FI)
|
||||
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
|
||||
.addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
|
||||
.addMemOperand(MMO);
|
||||
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
|
||||
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
|
||||
|
@ -853,7 +853,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|||
} else {
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg)
|
||||
.addFrameIndex(FI)
|
||||
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
|
||||
.addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
|
||||
.addMemOperand(MMO));
|
||||
}
|
||||
break;
|
||||
|
@ -870,7 +870,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|||
MachineInstrBuilder MIB =
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
|
||||
.addFrameIndex(FI)
|
||||
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
|
||||
.addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
|
||||
.addMemOperand(MMO);
|
||||
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
|
||||
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
|
||||
|
@ -882,7 +882,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
|||
MachineInstrBuilder MIB =
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
|
||||
.addFrameIndex(FI)
|
||||
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
|
||||
.addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
|
||||
.addMemOperand(MMO);
|
||||
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
|
||||
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
|
||||
|
|
|
@ -1549,7 +1549,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
|
|||
|
||||
// Set addressing mode by modifying bits U(23) and P(24)
|
||||
const MachineOperand &MO = MI.getOperand(OpIdx++);
|
||||
Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm()));
|
||||
Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
|
||||
|
||||
// Set bit W(21)
|
||||
if (IsUpdating)
|
||||
|
@ -1558,7 +1558,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
|
|||
// First register is encoded in Dd.
|
||||
Binary |= encodeVFPRd(MI, OpIdx+2);
|
||||
|
||||
// Number of registers are encoded in offset field.
|
||||
// Count the number of registers.
|
||||
unsigned NumRegs = 1;
|
||||
for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &MO = MI.getOperand(i);
|
||||
|
|
|
@ -2052,15 +2052,15 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
|||
if (ResNode)
|
||||
return ResNode;
|
||||
|
||||
// VLDMQ must be custom-selected for "v2f64 load" to set the AM5Opc value.
|
||||
// VLDMQ must be custom-selected for "v2f64 load" to set the AM4 value.
|
||||
if (Subtarget->hasVFP2() &&
|
||||
N->getValueType(0).getSimpleVT().SimpleTy == MVT::v2f64) {
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue AM5Opc =
|
||||
CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
|
||||
SDValue AM4Imm =
|
||||
CurDAG->getTargetConstant(ARM_AM::getAM4ModeImm(ARM_AM::ia), MVT::i32);
|
||||
SDValue Pred = getAL(CurDAG);
|
||||
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
|
||||
SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain };
|
||||
SDValue Ops[] = { N->getOperand(1), AM4Imm, Pred, PredReg, Chain };
|
||||
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
|
||||
MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||
SDNode *Ret = CurDAG->getMachineNode(ARM::VLDMQ, dl,
|
||||
|
@ -2072,16 +2072,16 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
|||
break;
|
||||
}
|
||||
case ISD::STORE: {
|
||||
// VSTMQ must be custom-selected for "v2f64 store" to set the AM5Opc value.
|
||||
// VSTMQ must be custom-selected for "v2f64 store" to set the AM4 value.
|
||||
if (Subtarget->hasVFP2() &&
|
||||
N->getOperand(1).getValueType().getSimpleVT().SimpleTy == MVT::v2f64) {
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue AM5Opc =
|
||||
CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
|
||||
SDValue AM4Imm =
|
||||
CurDAG->getTargetConstant(ARM_AM::getAM4ModeImm(ARM_AM::ia), MVT::i32);
|
||||
SDValue Pred = getAL(CurDAG);
|
||||
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
|
||||
SDValue Ops[] = { N->getOperand(1), N->getOperand(2),
|
||||
AM5Opc, Pred, PredReg, Chain };
|
||||
AM4Imm, Pred, PredReg, Chain };
|
||||
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
|
||||
MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||
SDNode *Ret = CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6);
|
||||
|
|
|
@ -129,9 +129,9 @@ let mayLoad = 1, neverHasSideEffects = 1 in {
|
|||
// This is equivalent to VLDMD except that it has a Q register operand
|
||||
// instead of a pair of D registers.
|
||||
def VLDMQ
|
||||
: AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p),
|
||||
: AXDI5<(outs QPR:$dst), (ins addrmode4:$addr, pred:$p),
|
||||
IndexModeNone, IIC_fpLoadm,
|
||||
"vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>;
|
||||
"vldm${addr:submode}${p}\t$addr, ${dst:dregpair}", "", []>;
|
||||
|
||||
// Use vld1 to load a Q register as a D register pair.
|
||||
// This alternative to VLDMQ allows an alignment to be specified.
|
||||
|
@ -146,9 +146,9 @@ let mayStore = 1, neverHasSideEffects = 1 in {
|
|||
// This is equivalent to VSTMD except that it has a Q register operand
|
||||
// instead of a pair of D registers.
|
||||
def VSTMQ
|
||||
: AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p),
|
||||
: AXDI5<(outs), (ins QPR:$src, addrmode4:$addr, pred:$p),
|
||||
IndexModeNone, IIC_fpStorem,
|
||||
"vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>;
|
||||
"vstm${addr:submode}${p}\t$addr, ${src:dregpair}", "", []>;
|
||||
|
||||
// Use vst1 to store a Q register as a D register pair.
|
||||
// This alternative to VSTMQ allows an alignment to be specified.
|
||||
|
|
|
@ -77,61 +77,61 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
|
|||
//
|
||||
|
||||
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
|
||||
def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
|
||||
def VLDMD : AXDI5<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
|
||||
variable_ops), IndexModeNone, IIC_fpLoadm,
|
||||
"vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
|
||||
"vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
|
||||
let Inst{20} = 1;
|
||||
}
|
||||
|
||||
def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
|
||||
def VLDMS : AXSI5<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
|
||||
variable_ops), IndexModeNone, IIC_fpLoadm,
|
||||
"vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
|
||||
"vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
|
||||
let Inst{20} = 1;
|
||||
}
|
||||
|
||||
def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
|
||||
def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
||||
reglist:$dsts, variable_ops),
|
||||
IndexModeUpd, IIC_fpLoadm,
|
||||
"vldm${addr:submode}${p}\t${addr:base}!, $dsts",
|
||||
"$addr.base = $wb", []> {
|
||||
"vldm${addr:submode}${p}\t$addr!, $dsts",
|
||||
"$addr.addr = $wb", []> {
|
||||
let Inst{20} = 1;
|
||||
}
|
||||
|
||||
def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
|
||||
def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
||||
reglist:$dsts, variable_ops),
|
||||
IndexModeUpd, IIC_fpLoadm,
|
||||
"vldm${addr:submode}${p}\t${addr:base}!, $dsts",
|
||||
"$addr.base = $wb", []> {
|
||||
"vldm${addr:submode}${p}\t$addr!, $dsts",
|
||||
"$addr.addr = $wb", []> {
|
||||
let Inst{20} = 1;
|
||||
}
|
||||
} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
|
||||
|
||||
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
|
||||
def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
|
||||
def VSTMD : AXDI5<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
|
||||
variable_ops), IndexModeNone, IIC_fpStorem,
|
||||
"vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
|
||||
"vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
|
||||
let Inst{20} = 0;
|
||||
}
|
||||
|
||||
def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
|
||||
def VSTMS : AXSI5<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
|
||||
variable_ops), IndexModeNone, IIC_fpStorem,
|
||||
"vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
|
||||
"vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
|
||||
let Inst{20} = 0;
|
||||
}
|
||||
|
||||
def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
|
||||
def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
||||
reglist:$srcs, variable_ops),
|
||||
IndexModeUpd, IIC_fpStorem,
|
||||
"vstm${addr:submode}${p}\t${addr:base}!, $srcs",
|
||||
"$addr.base = $wb", []> {
|
||||
"vstm${addr:submode}${p}\t$addr!, $srcs",
|
||||
"$addr.addr = $wb", []> {
|
||||
let Inst{20} = 0;
|
||||
}
|
||||
|
||||
def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
|
||||
def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
|
||||
reglist:$srcs, variable_ops),
|
||||
IndexModeUpd, IIC_fpStorem,
|
||||
"vstm${addr:submode}${p}\t${addr:base}!, $srcs",
|
||||
"$addr.base = $wb", []> {
|
||||
"vstm${addr:submode}${p}\t$addr!, $srcs",
|
||||
"$addr.addr = $wb", []> {
|
||||
let Inst{20} = 0;
|
||||
}
|
||||
} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
|
||||
|
|
|
@ -193,18 +193,18 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
|
|||
return false;
|
||||
|
||||
ARM_AM::AMSubMode Mode = ARM_AM::ia;
|
||||
bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
|
||||
if (isAM4 && Offset == 4) {
|
||||
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
|
||||
if (isNotVFP && Offset == 4) {
|
||||
if (isThumb2)
|
||||
// Thumb2 does not support ldmib / stmib.
|
||||
return false;
|
||||
Mode = ARM_AM::ib;
|
||||
} else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
|
||||
} else if (isNotVFP && Offset == -4 * (int)NumRegs + 4) {
|
||||
if (isThumb2)
|
||||
// Thumb2 does not support ldmda / stmda.
|
||||
return false;
|
||||
Mode = ARM_AM::da;
|
||||
} else if (isAM4 && Offset == -4 * (int)NumRegs) {
|
||||
} else if (isNotVFP && Offset == -4 * (int)NumRegs) {
|
||||
Mode = ARM_AM::db;
|
||||
} else if (Offset != 0) {
|
||||
// If starting offset isn't zero, insert a MI to materialize a new base.
|
||||
|
@ -246,18 +246,12 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
|
|||
BaseKill = true; // New base is always killed right its use.
|
||||
}
|
||||
|
||||
bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
|
||||
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
|
||||
Opcode == ARM::VLDRD);
|
||||
Opcode = getLoadStoreMultipleOpcode(Opcode);
|
||||
MachineInstrBuilder MIB = (isAM4)
|
||||
? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
|
||||
.addReg(Base, getKillRegState(BaseKill))
|
||||
.addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
|
||||
: BuildMI(MBB, MBBI, dl, TII->get(Opcode))
|
||||
.addReg(Base, getKillRegState(BaseKill))
|
||||
.addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
|
||||
.addImm(Pred).addReg(PredReg);
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
|
||||
.addReg(Base, getKillRegState(BaseKill))
|
||||
.addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg);
|
||||
for (unsigned i = 0; i != NumRegs; ++i)
|
||||
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
|
||||
| getKillRegState(Regs[i].second));
|
||||
|
@ -348,7 +342,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
|
|||
ARMCC::CondCodes Pred, unsigned PredReg,
|
||||
unsigned Scratch, MemOpQueue &MemOps,
|
||||
SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
|
||||
bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
|
||||
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
|
||||
int Offset = MemOps[SIndex].Offset;
|
||||
int SOffset = Offset;
|
||||
unsigned insertAfter = SIndex;
|
||||
|
@ -366,12 +360,12 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
|
|||
unsigned Reg = MO.getReg();
|
||||
unsigned RegNum = MO.isUndef() ? UINT_MAX
|
||||
: ARMRegisterInfo::getRegisterNumbering(Reg);
|
||||
// AM4 - register numbers in ascending order.
|
||||
// AM5 - consecutive register numbers in ascending order.
|
||||
// Can only do up to 16 double-word registers per insn.
|
||||
// Register numbers must be in ascending order. For VFP, the registers
|
||||
// must also be consecutive and there is a limit of 16 double-word
|
||||
// registers per instruction.
|
||||
if (Reg != ARM::SP &&
|
||||
NewOffset == Offset + (int)Size &&
|
||||
((isAM4 && RegNum > PRegNum)
|
||||
((isNotVFP && RegNum > PRegNum)
|
||||
|| ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
|
||||
Offset += Size;
|
||||
PRegNum = RegNum;
|
||||
|
@ -464,12 +458,11 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
|
|||
case ARM::STM:
|
||||
case ARM::t2LDM:
|
||||
case ARM::t2STM:
|
||||
return (MI->getNumOperands() - 4) * 4;
|
||||
case ARM::VLDMS:
|
||||
case ARM::VSTMS:
|
||||
case ARM::VLDMD:
|
||||
case ARM::VSTMD:
|
||||
return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
|
||||
return (MI->getNumOperands() - 4) * 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -512,26 +505,17 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
|
|||
ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
|
||||
int Opcode = MI->getOpcode();
|
||||
DebugLoc dl = MI->getDebugLoc();
|
||||
bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
|
||||
Opcode == ARM::STM || Opcode == ARM::t2STM);
|
||||
|
||||
bool DoMerge = false;
|
||||
ARM_AM::AMSubMode Mode = ARM_AM::ia;
|
||||
unsigned Offset = 0;
|
||||
|
||||
if (isAM4) {
|
||||
// Can't use an updating ld/st if the base register is also a dest
|
||||
// register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
|
||||
for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
|
||||
if (MI->getOperand(i).getReg() == Base)
|
||||
return false;
|
||||
}
|
||||
Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
|
||||
} else {
|
||||
// VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
|
||||
Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
|
||||
Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
|
||||
// Can't use an updating ld/st if the base register is also a dest
|
||||
// register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
|
||||
for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
|
||||
if (MI->getOperand(i).getReg() == Base)
|
||||
return false;
|
||||
}
|
||||
Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
|
||||
|
||||
// Try merging with the previous instruction.
|
||||
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
|
||||
|
@ -539,22 +523,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
|
|||
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
|
||||
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
|
||||
--PrevMBBI;
|
||||
if (isAM4) {
|
||||
if (Mode == ARM_AM::ia &&
|
||||
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
DoMerge = true;
|
||||
Mode = ARM_AM::db;
|
||||
} else if (isAM4 && Mode == ARM_AM::ib &&
|
||||
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
DoMerge = true;
|
||||
Mode = ARM_AM::da;
|
||||
}
|
||||
} else {
|
||||
if (Mode == ARM_AM::ia &&
|
||||
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
Mode = ARM_AM::db;
|
||||
DoMerge = true;
|
||||
}
|
||||
if (Mode == ARM_AM::ia &&
|
||||
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
Mode = ARM_AM::db;
|
||||
DoMerge = true;
|
||||
} else if (Mode == ARM_AM::ib &&
|
||||
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
Mode = ARM_AM::da;
|
||||
DoMerge = true;
|
||||
}
|
||||
if (DoMerge)
|
||||
MBB.erase(PrevMBBI);
|
||||
|
@ -566,19 +542,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
|
|||
MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
|
||||
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
|
||||
++NextMBBI;
|
||||
if (isAM4) {
|
||||
if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
|
||||
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
DoMerge = true;
|
||||
} else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
|
||||
isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
DoMerge = true;
|
||||
}
|
||||
} else {
|
||||
if (Mode == ARM_AM::ia &&
|
||||
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
DoMerge = true;
|
||||
}
|
||||
if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
|
||||
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
DoMerge = true;
|
||||
} else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
|
||||
isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
DoMerge = true;
|
||||
}
|
||||
if (DoMerge) {
|
||||
if (NextMBBI == I) {
|
||||
|
@ -595,16 +564,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
|
|||
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
|
||||
.addReg(Base, getDefRegState(true)) // WB base register
|
||||
.addReg(Base, getKillRegState(BaseKill));
|
||||
if (isAM4) {
|
||||
// [t2]LDM_UPD, [t2]STM_UPD
|
||||
MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
|
||||
.addImm(Pred).addReg(PredReg);
|
||||
} else {
|
||||
// VLDM[SD}_UPD, VSTM[SD]_UPD
|
||||
MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
|
||||
.addImm(Pred).addReg(PredReg);
|
||||
}
|
||||
.addReg(Base, getKillRegState(BaseKill))
|
||||
.addImm(ARM_AM::getAM4ModeImm(Mode))
|
||||
.addImm(Pred).addReg(PredReg);
|
||||
// Transfer the rest of operands.
|
||||
for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
|
||||
MIB.addOperand(MI->getOperand(OpNum));
|
||||
|
@ -736,11 +698,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
|
|||
if (!DoMerge)
|
||||
return false;
|
||||
|
||||
bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
|
||||
unsigned Offset = 0;
|
||||
if (isAM5)
|
||||
Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
|
||||
(isDPR ? 2 : 1));
|
||||
Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ?
|
||||
ARM_AM::db : ARM_AM::ia);
|
||||
else if (isAM2)
|
||||
Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
|
||||
else
|
||||
|
@ -748,6 +709,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
|
|||
|
||||
if (isAM5) {
|
||||
// VLDM[SD}_UPD, VSTM[SD]_UPD
|
||||
// (There are no base-updating versions of VLDR/VSTR instructions, but the
|
||||
// updating load/store-multiple instructions can be used with only one
|
||||
// register.)
|
||||
MachineOperand &MO = MI->getOperand(0);
|
||||
BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
|
||||
.addReg(Base, getDefRegState(true)) // WB base register
|
||||
|
|
|
@ -158,7 +158,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
|
|||
if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) &&
|
||||
MI->getOperand(0).getReg() == ARM::SP) {
|
||||
const MCOperand &MO1 = MI->getOperand(2);
|
||||
if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::db) {
|
||||
if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) {
|
||||
O << '\t' << "vpush";
|
||||
printPredicateOperand(MI, 3, O);
|
||||
O << '\t';
|
||||
|
@ -171,7 +171,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
|
|||
if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) &&
|
||||
MI->getOperand(0).getReg() == ARM::SP) {
|
||||
const MCOperand &MO1 = MI->getOperand(2);
|
||||
if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::ia) {
|
||||
if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) {
|
||||
O << '\t' << "vpop";
|
||||
printPredicateOperand(MI, 3, O);
|
||||
O << '\t';
|
||||
|
@ -412,16 +412,6 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
|
|||
return;
|
||||
}
|
||||
|
||||
if (Modifier && strcmp(Modifier, "submode") == 0) {
|
||||
ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
|
||||
O << ARM_AM::getAMSubModeStr(Mode);
|
||||
return;
|
||||
} else if (Modifier && strcmp(Modifier, "base") == 0) {
|
||||
// Used for FSTM{D|S} and LSTM{D|S} operations.
|
||||
O << getRegisterName(MO1.getReg());
|
||||
return;
|
||||
}
|
||||
|
||||
O << "[" << getRegisterName(MO1.getReg());
|
||||
|
||||
if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
|
||||
|
|
|
@ -1863,7 +1863,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
|
|||
|
||||
assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3");
|
||||
|
||||
bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS) ? true : false;
|
||||
bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS);
|
||||
unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
|
||||
|
||||
// Extract Dd/Sd for operand 0.
|
||||
|
@ -1886,7 +1886,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
|
|||
|
||||
// VFP Load/Store Multiple Instructions.
|
||||
// This is similar to the algorithm for LDM/STM in that operand 0 (the base) and
|
||||
// operand 1 (the AM5 mode imm) is followed by two predicate operands. It is
|
||||
// operand 1 (the AM4 mode imm) is followed by two predicate operands. It is
|
||||
// followed by a reglist of either DPR(s) or SPR(s).
|
||||
//
|
||||
// VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
|
||||
|
@ -1910,16 +1910,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
|
|||
|
||||
MI.addOperand(MCOperand::CreateReg(Base));
|
||||
|
||||
// Next comes the AM5 Opcode.
|
||||
// Next comes the AM4 Opcode.
|
||||
ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
|
||||
// Must be either "ia" or "db" submode.
|
||||
if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) {
|
||||
DEBUG(errs() << "Illegal addressing mode 5 sub-mode!\n");
|
||||
DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned char Imm8 = insn & 0xFF;
|
||||
MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(SubMode, Imm8)));
|
||||
MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
|
||||
|
||||
// Handling the two predicate operands before the reglist.
|
||||
int64_t CondVal = insn >> ARMII::CondShift;
|
||||
|
@ -1929,13 +1927,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
|
|||
OpIdx += 4;
|
||||
|
||||
bool isSPVFP = (Opcode == ARM::VLDMS || Opcode == ARM::VLDMS_UPD ||
|
||||
Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD) ? true : false;
|
||||
Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD);
|
||||
unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
|
||||
|
||||
// Extract Dd/Sd.
|
||||
unsigned RegD = decodeVFPRd(insn, isSPVFP);
|
||||
|
||||
// Fill the variadic part of reglist.
|
||||
unsigned char Imm8 = insn & 0xFF;
|
||||
unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
|
||||
for (unsigned i = 0; i < Regs; ++i) {
|
||||
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
|
||||
; RUN: llc < %s -march=arm -mattr=+neon -O0 | FileCheck %s
|
||||
|
||||
define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
|
||||
;CHECK: vst3i8:
|
||||
|
|
Loading…
Reference in New Issue