From e873552091786a6ea09e28987f238ba4f799d7b3 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Fri, 1 Jun 2012 19:33:18 +0000 Subject: [PATCH] ARM: properly handle alignment for struct byval. Factor out the expansion code into a function. This change is to be enabled in clang. rdar://9877866 llvm-svn: 157830 --- llvm/include/llvm/CodeGen/FastISel.h | 9 + llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 24 + llvm/lib/Target/ARM/ARMISelLowering.cpp | 514 +++++++++++---------- llvm/lib/Target/ARM/ARMISelLowering.h | 3 + llvm/lib/Target/ARM/ARMInstrInfo.td | 8 +- 5 files changed, 308 insertions(+), 250 deletions(-) diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h index e57c8b18c63e..9fe743eddbcc 100644 --- a/llvm/include/llvm/CodeGen/FastISel.h +++ b/llvm/include/llvm/CodeGen/FastISel.h @@ -299,6 +299,15 @@ protected: unsigned Op1, bool Op1IsKill, uint64_t Imm); + /// FastEmitInst_rrii - Emit a MachineInstr with two register operands, + /// two immediates operands, and a result register in the given register + /// class. + unsigned FastEmitInst_rrii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm1, uint64_t Imm2); + /// FastEmitInst_i - Emit a MachineInstr with a single immediate /// operand, and a result register in the given register class. unsigned FastEmitInst_i(unsigned MachineInstrOpcode, diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 07687ef8b586..6464cf4ecc6b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1306,6 +1306,30 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, return ResultReg; } +unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm1).addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 45287015769d..8c71021c6ce3 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1434,9 +1434,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, MVT::i32); + SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue Ops[] = { Chain, Dst, Src, SizeNode}; + SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops, array_lengthof(Ops))); } @@ -6239,6 +6240,270 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { llvm_unreachable("Expecting a BB with two successors!"); } +MachineBasicBlock *ARMTargetLowering:: +EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { + // This pseudo instruction has 3 operands: dst, src, size + // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). + // Otherwise, we will generate unrolled scalar copies. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned src = MI->getOperand(1).getReg(); + unsigned SizeVal = MI->getOperand(2).getImm(); + unsigned Align = MI->getOperand(3).getImm(); + DebugLoc dl = MI->getDebugLoc(); + + bool isThumb2 = Subtarget->isThumb2(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned ldrOpc, strOpc, UnitSize; + + const TargetRegisterClass *TRC = isThumb2 ? + (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::GPRRegClass; + + if (Align & 1) { + ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; + strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; + UnitSize = 1; + } else if (Align & 2) { + ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST; + strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST; + UnitSize = 2; + } else { + ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; + strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; + UnitSize = 4; + } + unsigned BytesLeft = SizeVal % UnitSize; + unsigned LoopSize = SizeVal - BytesLeft; + + if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) { + // Use LDR and STR to copy. + // [scratch, srcOut] = LDR_POST(srcIn, UnitSize) + // [destOut] = STR_POST(scratch, destIn, UnitSize) + unsigned srcIn = src; + unsigned destIn = dest; + for (unsigned i = 0; i < LoopSize; i+=UnitSize) { + unsigned scratch = MRI.createVirtualRegister(TRC); + unsigned srcOut = MRI.createVirtualRegister(TRC); + unsigned destOut = MRI.createVirtualRegister(TRC); + if (isThumb2) { + AddDefaultPred(BuildMI(*BB, MI, dl, + TII->get(ldrOpc), scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize)); + + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) + .addReg(scratch).addReg(destIn) + .addImm(UnitSize)); + } else { + AddDefaultPred(BuildMI(*BB, MI, dl, + TII->get(ldrOpc), scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0) + .addImm(UnitSize)); + + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) + .addReg(scratch).addReg(destIn) + .addReg(0).addImm(UnitSize)); + } + srcIn = srcOut; + destIn = destOut; + } + + // Handle the leftover bytes with LDRB and STRB. + // [scratch, srcOut] = LDRB_POST(srcIn, 1) + // [destOut] = STRB_POST(scratch, destIn, 1) + ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; + strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; + for (unsigned i = 0; i < BytesLeft; i++) { + unsigned scratch = MRI.createVirtualRegister(TRC); + unsigned srcOut = MRI.createVirtualRegister(TRC); + unsigned destOut = MRI.createVirtualRegister(TRC); + if (isThumb2) { + AddDefaultPred(BuildMI(*BB, MI, dl, + TII->get(ldrOpc),scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); + + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) + .addReg(scratch).addReg(destIn) + .addReg(0).addImm(1)); + } else { + AddDefaultPred(BuildMI(*BB, MI, dl, + TII->get(ldrOpc),scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); + + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) + .addReg(scratch).addReg(destIn) + .addReg(0).addImm(1)); + } + srcIn = srcOut; + destIn = destOut; + } + MI->eraseFromParent(); // The instruction is gone now. + return BB; + } + + // Expand the pseudo op to a loop. + // thisMBB: + // ... + // movw varEnd, # --> with thumb2 + // movt varEnd, # + // ldrcp varEnd, idx --> without thumb2 + // fallthrough --> loopMBB + // loopMBB: + // PHI varPhi, varEnd, varLoop + // PHI srcPhi, src, srcLoop + // PHI destPhi, dst, destLoop + // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) + // [destLoop] = STR_POST(scratch, destPhi, UnitSize) + // subs varLoop, varPhi, #UnitSize + // bne loopMBB + // fallthrough --> exitMBB + // exitMBB: + // epilogue to handle left-over bytes + // [scratch, srcOut] = LDRB_POST(srcLoop, 1) + // [destOut] = STRB_POST(scratch, destLoop, 1) + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Load an immediate to varEnd. + unsigned varEnd = MRI.createVirtualRegister(TRC); + if (isThumb2) { + unsigned VReg1 = varEnd; + if ((LoopSize & 0xFFFF0000) != 0) + VReg1 = MRI.createVirtualRegister(TRC); + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1) + .addImm(LoopSize & 0xFFFF)); + + if ((LoopSize & 0xFFFF0000) != 0) + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) + .addReg(VReg1) + .addImm(LoopSize >> 16)); + } else { + MachineConstantPool *ConstantPool = MF->getConstantPool(); + Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); + const Constant *C = ConstantInt::get(Int32Ty, LoopSize); + + // MachineConstantPool wants an explicit alignment. + unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); + if (Align == 0) + Align = getTargetData()->getTypeAllocSize(C->getType()); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); + + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp)) + .addReg(varEnd, RegState::Define) + .addConstantPoolIndex(Idx) + .addImm(0)); + } + BB->addSuccessor(loopMBB); + + // Generate the loop body: + // varPhi = PHI(varLoop, varEnd) + // srcPhi = PHI(srcLoop, src) + // destPhi = PHI(destLoop, dst) + MachineBasicBlock *entryBB = BB; + BB = loopMBB; + unsigned varLoop = MRI.createVirtualRegister(TRC); + unsigned varPhi = MRI.createVirtualRegister(TRC); + unsigned srcLoop = MRI.createVirtualRegister(TRC); + unsigned srcPhi = MRI.createVirtualRegister(TRC); + unsigned destLoop = MRI.createVirtualRegister(TRC); + unsigned destPhi = MRI.createVirtualRegister(TRC); + + BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi) + .addReg(varLoop).addMBB(loopMBB) + .addReg(varEnd).addMBB(entryBB); + BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi) + .addReg(srcLoop).addMBB(loopMBB) + .addReg(src).addMBB(entryBB); + BuildMI(BB, dl, TII->get(ARM::PHI), destPhi) + .addReg(destLoop).addMBB(loopMBB) + .addReg(dest).addMBB(entryBB); + + // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) + // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) + unsigned scratch = MRI.createVirtualRegister(TRC); + if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) + .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize)); + + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) + .addReg(scratch).addReg(destPhi) + .addImm(UnitSize)); + } else { + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) + .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0) + .addImm(UnitSize)); + + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) + .addReg(scratch).addReg(destPhi) + .addReg(0).addImm(UnitSize)); + } + + // Decrement loop variable by UnitSize. + MachineInstrBuilder MIB = BuildMI(BB, dl, + TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); + AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); + MIB->getOperand(5).setReg(ARM::CPSR); + MIB->getOperand(5).setIsDef(true); + + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + + // loopMBB can loop back to loopMBB or fall through to exitMBB. + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // Add epilogue to handle BytesLeft. + BB = exitMBB; + MachineInstr *StartOfExit = exitMBB->begin(); + ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; + strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; + + // [scratch, srcOut] = LDRB_POST(srcLoop, 1) + // [destOut] = STRB_POST(scratch, destLoop, 1) + unsigned srcIn = srcLoop; + unsigned destIn = destLoop; + for (unsigned i = 0; i < BytesLeft; i++) { + unsigned scratch = MRI.createVirtualRegister(TRC); + unsigned srcOut = MRI.createVirtualRegister(TRC); + unsigned destOut = MRI.createVirtualRegister(TRC); + if (isThumb2) { + AddDefaultPred(BuildMI(*BB, StartOfExit, dl, + TII->get(ldrOpc),scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); + + AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) + .addReg(scratch).addReg(destIn) + .addImm(1)); + } else { + AddDefaultPred(BuildMI(*BB, StartOfExit, dl, + TII->get(ldrOpc),scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1)); + + AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) + .addReg(scratch).addReg(destIn) + .addReg(0).addImm(1)); + } + srcIn = srcOut; + destIn = destOut; + } + + MI->eraseFromParent(); // The instruction is gone now. + return BB; +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -6594,252 +6859,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // return last added BB return SinkBB; } - case ARM::COPY_STRUCT_BYVAL_I32: { + case ARM::COPY_STRUCT_BYVAL_I32: ++NumLoopByVals; - // This pseudo instruction has 3 operands: dst, src, size - // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). - // Otherwise, we will generate unrolled scalar copies. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned src = MI->getOperand(1).getReg(); - unsigned size = MI->getOperand(2).getImm(); - DebugLoc dl = MI->getDebugLoc(); - unsigned BytesLeft = size & 3; - unsigned LoopSize = size - BytesLeft; - - bool isThumb2 = Subtarget->isThumb2(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; - unsigned strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; - - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - - if (size <= Subtarget->getMaxInlineSizeThreshold()) { - // Use LDR and STR to copy. - // [scratch, srcOut] = LDR_POST(srcIn, 4) - // [destOut] = STR_POST(scratch, destIn, 4) - unsigned srcIn = src; - unsigned destIn = dest; - for (unsigned i = 0; i < LoopSize; i+=4) { - unsigned scratch = MRI.createVirtualRegister(TRC); - unsigned srcOut = MRI.createVirtualRegister(TRC); - unsigned destOut = MRI.createVirtualRegister(TRC); - if (isThumb2) { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc), scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(4)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addImm(4)); - } else { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc), scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(4)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(4)); - } - srcIn = srcOut; - destIn = destOut; - } - - // Handle the leftover bytes with LDRB and STRB. - // [scratch, srcOut] = LDRB_POST(srcIn, 1) - // [destOut] = STRB_POST(scratch, destIn, 1) - ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; - strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; - for (unsigned i = 0; i < BytesLeft; i++) { - unsigned scratch = MRI.createVirtualRegister(TRC); - unsigned srcOut = MRI.createVirtualRegister(TRC); - unsigned destOut = MRI.createVirtualRegister(TRC); - if (isThumb2) { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(1)); - } else { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(1)); - } - srcIn = srcOut; - destIn = destOut; - } - MI->eraseFromParent(); // The instruction is gone now. - return BB; - } - - // Expand the pseudo op to a loop. - // thisMBB: - // ... - // movw varEnd, # --> with thumb2 - // movt varEnd, # - // ldrcp varEnd, idx --> without thumb2 - // fallthrough --> loopMBB - // loopMBB: - // PHI varPhi, varEnd, varLoop - // PHI srcPhi, src, srcLoop - // PHI destPhi, dst, destLoop - // [scratch, srcLoop] = LDR_POST(srcPhi, 4) - // [destLoop] = STR_POST(scratch, destPhi, 4) - // subs varLoop, varPhi, #4 - // bne loopMBB - // fallthrough --> exitMBB - // exitMBB: - // epilogue to handle left-over bytes - // [scratch, srcOut] = LDRB_POST(srcLoop, 1) - // [destOut] = STRB_POST(scratch, destLoop, 1) - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Load an immediate to varEnd. - unsigned varEnd = MRI.createVirtualRegister(TRC); - if (isThumb2) { - unsigned VReg1 = varEnd; - if ((LoopSize & 0xFFFF0000) != 0) - VReg1 = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1) - .addImm(LoopSize & 0xFFFF)); - - if ((LoopSize & 0xFFFF0000) != 0) - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) - .addReg(VReg1) - .addImm(LoopSize >> 16)); - } else { - MachineConstantPool *ConstantPool = MF->getConstantPool(); - Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); - const Constant *C = ConstantInt::get(Int32Ty, LoopSize); - - // MachineConstantPool wants an explicit alignment. - unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); - if (Align == 0) - Align = getTargetData()->getTypeAllocSize(C->getType()); - unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); - - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp)) - .addReg(varEnd, RegState::Define) - .addConstantPoolIndex(Idx) - .addImm(0)); - } - BB->addSuccessor(loopMBB); - - // Generate the loop body: - // varPhi = PHI(varLoop, varEnd) - // srcPhi = PHI(srcLoop, src) - // destPhi = PHI(destLoop, dst) - MachineBasicBlock *entryBB = BB; - BB = loopMBB; - unsigned varLoop = MRI.createVirtualRegister(TRC); - unsigned varPhi = MRI.createVirtualRegister(TRC); - unsigned srcLoop = MRI.createVirtualRegister(TRC); - unsigned srcPhi = MRI.createVirtualRegister(TRC); - unsigned destLoop = MRI.createVirtualRegister(TRC); - unsigned destPhi = MRI.createVirtualRegister(TRC); - - BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi) - .addReg(varLoop).addMBB(loopMBB) - .addReg(varEnd).addMBB(entryBB); - BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi) - .addReg(srcLoop).addMBB(loopMBB) - .addReg(src).addMBB(entryBB); - BuildMI(BB, dl, TII->get(ARM::PHI), destPhi) - .addReg(destLoop).addMBB(loopMBB) - .addReg(dest).addMBB(entryBB); - - // [scratch, srcLoop] = LDR_POST(srcPhi, 4) - // [destLoop] = STR_POST(scratch, destPhi, 4) - unsigned scratch = MRI.createVirtualRegister(TRC); - if (isThumb2) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) - .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(4)); - - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) - .addReg(scratch).addReg(destPhi) - .addImm(4)); - } else { - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) - .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0).addImm(4)); - - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) - .addReg(scratch).addReg(destPhi) - .addReg(0).addImm(4)); - } - - // Decrement loop variable by 4. - MachineInstrBuilder MIB = BuildMI(BB, dl, - TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); - AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(4))); - MIB->getOperand(5).setReg(ARM::CPSR); - MIB->getOperand(5).setIsDef(true); - - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - - // loopMBB can loop back to loopMBB or fall through to exitMBB. - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // Add epilogue to handle BytesLeft. - BB = exitMBB; - MachineInstr *StartOfExit = exitMBB->begin(); - ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; - strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; - - // [scratch, srcOut] = LDRB_POST(srcLoop, 1) - // [destOut] = STRB_POST(scratch, destLoop, 1) - unsigned srcIn = srcLoop; - unsigned destIn = destLoop; - for (unsigned i = 0; i < BytesLeft; i++) { - unsigned scratch = MRI.createVirtualRegister(TRC); - unsigned srcOut = MRI.createVirtualRegister(TRC); - unsigned destOut = MRI.createVirtualRegister(TRC); - if (isThumb2) { - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); - - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addImm(1)); - } else { - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1)); - - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(1)); - } - srcIn = srcOut; - destIn = destOut; - } - - MI->eraseFromParent(); // The instruction is gone now. - return BB; - } + return EmitStructByval(MI, BB); } } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index d11171ef110c..a11387897825 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -530,6 +530,9 @@ namespace llvm { MachineBasicBlock *MBB) const; bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *EmitStructByval(MachineInstr *MI, + MachineBasicBlock *MBB) const; }; enum NEONModImmType { diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index f700ec6eec79..cf7d07c1d3a1 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -18,9 +18,9 @@ // Type profiles. def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; -def SDT_ARMStructByVal : SDTypeProfile<0, 3, +def SDT_ARMStructByVal : SDTypeProfile<0, 4, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, - SDTCisVT<2, i32>]>; + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>; @@ -4174,9 +4174,9 @@ let usesCustomInserter = 1 in { let usesCustomInserter = 1 in { def COPY_STRUCT_BYVAL_I32 : PseudoInst< - (outs), (ins GPR:$dst, GPR:$src, i32imm:$size), + (outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment), NoItinerary, - [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size)]>; + [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>; } let mayLoad = 1 in {