ARM: properly handle alignment for struct byval.

Factor out the expansion code into a function.
This change is to be enabled in clang.

rdar://9877866

llvm-svn: 157830
This commit is contained in:
Manman Ren 2012-06-01 19:33:18 +00:00
parent 2c3f63cbda
commit e873552091
5 changed files with 308 additions and 250 deletions

View File

@ -299,6 +299,15 @@ protected:
unsigned Op1, bool Op1IsKill,
uint64_t Imm);
/// FastEmitInst_rrii - Emit a MachineInstr with two register operands,
/// two immediates operands, and a result register in the given register
/// class.
unsigned FastEmitInst_rrii(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
uint64_t Imm1, uint64_t Imm2);
/// FastEmitInst_i - Emit a MachineInstr with a single immediate
/// operand, and a result register in the given register class.
unsigned FastEmitInst_i(unsigned MachineInstrOpcode,

View File

@ -1306,6 +1306,30 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
return ResultReg;
}
unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
uint64_t Imm1, uint64_t Imm2) {
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addImm(Imm1).addImm(Imm2);
else {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addImm(Imm1).addImm(Imm2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
uint64_t Imm) {

View File

@ -1434,9 +1434,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
MVT::i32);
SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dst, Src, SizeNode};
SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
Ops, array_lengthof(Ops)));
}
@ -6239,6 +6240,270 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
llvm_unreachable("Expecting a BB with two successors!");
}
MachineBasicBlock *ARMTargetLowering::
EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
// Otherwise, we will generate unrolled scalar copies.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It;
unsigned dest = MI->getOperand(0).getReg();
unsigned src = MI->getOperand(1).getReg();
unsigned SizeVal = MI->getOperand(2).getImm();
unsigned Align = MI->getOperand(3).getImm();
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned ldrOpc, strOpc, UnitSize;
const TargetRegisterClass *TRC = isThumb2 ?
(const TargetRegisterClass*)&ARM::tGPRRegClass :
(const TargetRegisterClass*)&ARM::GPRRegClass;
if (Align & 1) {
ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
UnitSize = 1;
} else if (Align & 2) {
ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST;
strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
UnitSize = 2;
} else {
ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
UnitSize = 4;
}
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
// Use LDR and STR to copy.
// [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
// [destOut] = STR_POST(scratch, destIn, UnitSize)
unsigned srcIn = src;
unsigned destIn = dest;
for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
if (isThumb2) {
AddDefaultPred(BuildMI(*BB, MI, dl,
TII->get(ldrOpc), scratch)
.addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
.addReg(scratch).addReg(destIn)
.addImm(UnitSize));
} else {
AddDefaultPred(BuildMI(*BB, MI, dl,
TII->get(ldrOpc), scratch)
.addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0)
.addImm(UnitSize));
AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
.addReg(scratch).addReg(destIn)
.addReg(0).addImm(UnitSize));
}
srcIn = srcOut;
destIn = destOut;
}
// Handle the leftover bytes with LDRB and STRB.
// [scratch, srcOut] = LDRB_POST(srcIn, 1)
// [destOut] = STRB_POST(scratch, destIn, 1)
ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
for (unsigned i = 0; i < BytesLeft; i++) {
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
if (isThumb2) {
AddDefaultPred(BuildMI(*BB, MI, dl,
TII->get(ldrOpc),scratch)
.addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
.addReg(scratch).addReg(destIn)
.addReg(0).addImm(1));
} else {
AddDefaultPred(BuildMI(*BB, MI, dl,
TII->get(ldrOpc),scratch)
.addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
.addReg(scratch).addReg(destIn)
.addReg(0).addImm(1));
}
srcIn = srcOut;
destIn = destOut;
}
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
// Expand the pseudo op to a loop.
// thisMBB:
// ...
// movw varEnd, # --> with thumb2
// movt varEnd, #
// ldrcp varEnd, idx --> without thumb2
// fallthrough --> loopMBB
// loopMBB:
// PHI varPhi, varEnd, varLoop
// PHI srcPhi, src, srcLoop
// PHI destPhi, dst, destLoop
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
// [destLoop] = STR_POST(scratch, destPhi, UnitSize)
// subs varLoop, varPhi, #UnitSize
// bne loopMBB
// fallthrough --> exitMBB
// exitMBB:
// epilogue to handle left-over bytes
// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
// [destOut] = STRB_POST(scratch, destLoop, 1)
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// Load an immediate to varEnd.
unsigned varEnd = MRI.createVirtualRegister(TRC);
if (isThumb2) {
unsigned VReg1 = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
VReg1 = MRI.createVirtualRegister(TRC);
AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
.addImm(LoopSize & 0xFFFF));
if ((LoopSize & 0xFFFF0000) != 0)
AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
.addReg(VReg1)
.addImm(LoopSize >> 16));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
// MachineConstantPool wants an explicit alignment.
unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
if (Align == 0)
Align = getTargetData()->getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
.addReg(varEnd, RegState::Define)
.addConstantPoolIndex(Idx)
.addImm(0));
}
BB->addSuccessor(loopMBB);
// Generate the loop body:
// varPhi = PHI(varLoop, varEnd)
// srcPhi = PHI(srcLoop, src)
// destPhi = PHI(destLoop, dst)
MachineBasicBlock *entryBB = BB;
BB = loopMBB;
unsigned varLoop = MRI.createVirtualRegister(TRC);
unsigned varPhi = MRI.createVirtualRegister(TRC);
unsigned srcLoop = MRI.createVirtualRegister(TRC);
unsigned srcPhi = MRI.createVirtualRegister(TRC);
unsigned destLoop = MRI.createVirtualRegister(TRC);
unsigned destPhi = MRI.createVirtualRegister(TRC);
BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
.addReg(varLoop).addMBB(loopMBB)
.addReg(varEnd).addMBB(entryBB);
BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
.addReg(srcLoop).addMBB(loopMBB)
.addReg(src).addMBB(entryBB);
BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
.addReg(destLoop).addMBB(loopMBB)
.addReg(dest).addMBB(entryBB);
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
unsigned scratch = MRI.createVirtualRegister(TRC);
if (isThumb2) {
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
.addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
.addReg(scratch).addReg(destPhi)
.addImm(UnitSize));
} else {
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
.addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0)
.addImm(UnitSize));
AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
.addReg(scratch).addReg(destPhi)
.addReg(0).addImm(UnitSize));
}
// Decrement loop variable by UnitSize.
MachineInstrBuilder MIB = BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
MIB->getOperand(5).setReg(ARM::CPSR);
MIB->getOperand(5).setIsDef(true);
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
.addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
// loopMBB can loop back to loopMBB or fall through to exitMBB.
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
// Add epilogue to handle BytesLeft.
BB = exitMBB;
MachineInstr *StartOfExit = exitMBB->begin();
ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
// [destOut] = STRB_POST(scratch, destLoop, 1)
unsigned srcIn = srcLoop;
unsigned destIn = destLoop;
for (unsigned i = 0; i < BytesLeft; i++) {
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
if (isThumb2) {
AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
TII->get(ldrOpc),scratch)
.addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
.addReg(scratch).addReg(destIn)
.addImm(1));
} else {
AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
TII->get(ldrOpc),scratch)
.addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
.addReg(scratch).addReg(destIn)
.addReg(0).addImm(1));
}
srcIn = srcOut;
destIn = destOut;
}
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
@ -6594,252 +6859,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// return last added BB
return SinkBB;
}
case ARM::COPY_STRUCT_BYVAL_I32: {
case ARM::COPY_STRUCT_BYVAL_I32:
++NumLoopByVals;
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
// Otherwise, we will generate unrolled scalar copies.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It;
unsigned dest = MI->getOperand(0).getReg();
unsigned src = MI->getOperand(1).getReg();
unsigned size = MI->getOperand(2).getImm();
DebugLoc dl = MI->getDebugLoc();
unsigned BytesLeft = size & 3;
unsigned LoopSize = size - BytesLeft;
bool isThumb2 = Subtarget->isThumb2();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
unsigned strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
const TargetRegisterClass *TRC = isThumb2 ?
(const TargetRegisterClass*)&ARM::tGPRRegClass :
(const TargetRegisterClass*)&ARM::GPRRegClass;
if (size <= Subtarget->getMaxInlineSizeThreshold()) {
// Use LDR and STR to copy.
// [scratch, srcOut] = LDR_POST(srcIn, 4)
// [destOut] = STR_POST(scratch, destIn, 4)
unsigned srcIn = src;
unsigned destIn = dest;
for (unsigned i = 0; i < LoopSize; i+=4) {
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
if (isThumb2) {
AddDefaultPred(BuildMI(*BB, MI, dl,
TII->get(ldrOpc), scratch)
.addReg(srcOut, RegState::Define).addReg(srcIn).addImm(4));
AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
.addReg(scratch).addReg(destIn)
.addImm(4));
} else {
AddDefaultPred(BuildMI(*BB, MI, dl,
TII->get(ldrOpc), scratch)
.addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(4));
AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
.addReg(scratch).addReg(destIn)
.addReg(0).addImm(4));
}
srcIn = srcOut;
destIn = destOut;
}
// Handle the leftover bytes with LDRB and STRB.
// [scratch, srcOut] = LDRB_POST(srcIn, 1)
// [destOut] = STRB_POST(scratch, destIn, 1)
ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
for (unsigned i = 0; i < BytesLeft; i++) {
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
if (isThumb2) {
AddDefaultPred(BuildMI(*BB, MI, dl,
TII->get(ldrOpc),scratch)
.addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
.addReg(scratch).addReg(destIn)
.addReg(0).addImm(1));
} else {
AddDefaultPred(BuildMI(*BB, MI, dl,
TII->get(ldrOpc),scratch)
.addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
.addReg(scratch).addReg(destIn)
.addReg(0).addImm(1));
}
srcIn = srcOut;
destIn = destOut;
}
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
// Expand the pseudo op to a loop.
// thisMBB:
// ...
// movw varEnd, # --> with thumb2
// movt varEnd, #
// ldrcp varEnd, idx --> without thumb2
// fallthrough --> loopMBB
// loopMBB:
// PHI varPhi, varEnd, varLoop
// PHI srcPhi, src, srcLoop
// PHI destPhi, dst, destLoop
// [scratch, srcLoop] = LDR_POST(srcPhi, 4)
// [destLoop] = STR_POST(scratch, destPhi, 4)
// subs varLoop, varPhi, #4
// bne loopMBB
// fallthrough --> exitMBB
// exitMBB:
// epilogue to handle left-over bytes
// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
// [destOut] = STRB_POST(scratch, destLoop, 1)
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// Load an immediate to varEnd.
unsigned varEnd = MRI.createVirtualRegister(TRC);
if (isThumb2) {
unsigned VReg1 = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
VReg1 = MRI.createVirtualRegister(TRC);
AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
.addImm(LoopSize & 0xFFFF));
if ((LoopSize & 0xFFFF0000) != 0)
AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
.addReg(VReg1)
.addImm(LoopSize >> 16));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
// MachineConstantPool wants an explicit alignment.
unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
if (Align == 0)
Align = getTargetData()->getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
.addReg(varEnd, RegState::Define)
.addConstantPoolIndex(Idx)
.addImm(0));
}
BB->addSuccessor(loopMBB);
// Generate the loop body:
// varPhi = PHI(varLoop, varEnd)
// srcPhi = PHI(srcLoop, src)
// destPhi = PHI(destLoop, dst)
MachineBasicBlock *entryBB = BB;
BB = loopMBB;
unsigned varLoop = MRI.createVirtualRegister(TRC);
unsigned varPhi = MRI.createVirtualRegister(TRC);
unsigned srcLoop = MRI.createVirtualRegister(TRC);
unsigned srcPhi = MRI.createVirtualRegister(TRC);
unsigned destLoop = MRI.createVirtualRegister(TRC);
unsigned destPhi = MRI.createVirtualRegister(TRC);
BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
.addReg(varLoop).addMBB(loopMBB)
.addReg(varEnd).addMBB(entryBB);
BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
.addReg(srcLoop).addMBB(loopMBB)
.addReg(src).addMBB(entryBB);
BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
.addReg(destLoop).addMBB(loopMBB)
.addReg(dest).addMBB(entryBB);
// [scratch, srcLoop] = LDR_POST(srcPhi, 4)
// [destLoop] = STR_POST(scratch, destPhi, 4)
unsigned scratch = MRI.createVirtualRegister(TRC);
if (isThumb2) {
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
.addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(4));
AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
.addReg(scratch).addReg(destPhi)
.addImm(4));
} else {
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
.addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0).addImm(4));
AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
.addReg(scratch).addReg(destPhi)
.addReg(0).addImm(4));
}
// Decrement loop variable by 4.
MachineInstrBuilder MIB = BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(4)));
MIB->getOperand(5).setReg(ARM::CPSR);
MIB->getOperand(5).setIsDef(true);
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
.addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
// loopMBB can loop back to loopMBB or fall through to exitMBB.
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
// Add epilogue to handle BytesLeft.
BB = exitMBB;
MachineInstr *StartOfExit = exitMBB->begin();
ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
// [destOut] = STRB_POST(scratch, destLoop, 1)
unsigned srcIn = srcLoop;
unsigned destIn = destLoop;
for (unsigned i = 0; i < BytesLeft; i++) {
unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
if (isThumb2) {
AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
TII->get(ldrOpc),scratch)
.addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
.addReg(scratch).addReg(destIn)
.addImm(1));
} else {
AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
TII->get(ldrOpc),scratch)
.addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
.addReg(scratch).addReg(destIn)
.addReg(0).addImm(1));
}
srcIn = srcOut;
destIn = destOut;
}
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
return EmitStructByval(MI, BB);
}
}

View File

@ -530,6 +530,9 @@ namespace llvm {
MachineBasicBlock *MBB) const;
bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
MachineBasicBlock *EmitStructByval(MachineInstr *MI,
MachineBasicBlock *MBB) const;
};
enum NEONModImmType {

View File

@ -18,9 +18,9 @@
// Type profiles.
def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
def SDT_ARMStructByVal : SDTypeProfile<0, 3,
def SDT_ARMStructByVal : SDTypeProfile<0, 4,
[SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>]>;
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
@ -4174,9 +4174,9 @@ let usesCustomInserter = 1 in {
let usesCustomInserter = 1 in {
def COPY_STRUCT_BYVAL_I32 : PseudoInst<
(outs), (ins GPR:$dst, GPR:$src, i32imm:$size),
(outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment),
NoItinerary,
[(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size)]>;
[(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>;
}
let mayLoad = 1 in {