diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 65622aff7b31..a6a95c77bf71 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -147,6 +147,9 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); + // X86 wants to expand memset / memcpy itself. + setOperationAction(ISD::MEMSET , MVT::Other, Custom); + setOperationAction(ISD::MEMCPY , MVT::Other, Custom); } // We don't have line number support yet. @@ -1614,6 +1617,109 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0), DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); } + case ISD::MEMSET: { + SDOperand InFlag; + SDOperand Chain = Op.getOperand(0); + unsigned Align = + (unsigned)cast(Op.getOperand(4))->getValue(); + if (Align == 0) Align = 1; + + MVT::ValueType AVT; + SDOperand Count; + if (ConstantSDNode *ValC = dyn_cast(Op.getOperand(2))) { + unsigned ValReg; + unsigned Val = ValC->getValue() & 255; + + // If the value is a constant, then we can potentially use larger sets. + switch (Align & 3) { + case 2: // WORD aligned + AVT = MVT::i16; + if (ConstantSDNode *I = dyn_cast(Op.getOperand(3))) + Count = DAG.getConstant(I->getValue() / 2, MVT::i32); + else + Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), + DAG.getConstant(1, MVT::i8)); + Val = (Val << 8) | Val; + ValReg = X86::AX; + break; + case 0: // DWORD aligned + AVT = MVT::i32; + if (ConstantSDNode *I = dyn_cast(Op.getOperand(3))) + Count = DAG.getConstant(I->getValue() / 4, MVT::i32); + else + Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), + DAG.getConstant(2, MVT::i8)); + Val = (Val << 8) | Val; + Val = (Val << 16) | Val; + ValReg = X86::EAX; + break; + default: // Byte aligned + AVT = MVT::i8; + Count = Op.getOperand(3); + ValReg = X86::AL; + break; + } + + Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), + InFlag); + InFlag = Chain.getValue(1); + } else { + AVT = MVT::i8; + Count = Op.getOperand(3); + Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); + InFlag = Chain.getValue(1); + } + + Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); + InFlag = Chain.getValue(1); + + return DAG.getNode(X86ISD::REP_STOS, MVT::Other, Chain, + DAG.getValueType(AVT), InFlag); + } + case ISD::MEMCPY: { + SDOperand Chain = Op.getOperand(0); + unsigned Align = + (unsigned)cast(Op.getOperand(4))->getValue(); + if (Align == 0) Align = 1; + + MVT::ValueType AVT; + SDOperand Count; + switch (Align & 3) { + case 2: // WORD aligned + AVT = MVT::i16; + if (ConstantSDNode *I = dyn_cast(Op.getOperand(3))) + Count = DAG.getConstant(I->getValue() / 2, MVT::i32); + else + Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), + DAG.getConstant(1, MVT::i8)); + break; + case 0: // DWORD aligned + AVT = MVT::i32; + if (ConstantSDNode *I = dyn_cast(Op.getOperand(3))) + Count = DAG.getConstant(I->getValue() / 4, MVT::i32); + else + Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), + DAG.getConstant(2, MVT::i8)); + break; + default: // Byte aligned + AVT = MVT::i8; + Count = Op.getOperand(3); + break; + } + + SDOperand InFlag; + Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); + InFlag = Chain.getValue(1); + + return DAG.getNode(X86ISD::REP_MOVS, MVT::Other, Chain, + DAG.getValueType(AVT), InFlag); + } case ISD::GlobalAddress: { GlobalValue *GV = cast(Op)->getGlobal(); SDOperand GVOp = DAG.getTargetGlobalAddress(GV, getPointerTy()); @@ -1659,6 +1765,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; + case X86ISD::REP_STOS: return "X86ISD::RET_STOS"; + case X86ISD::REP_MOVS: return "X86ISD::RET_MOVS"; } } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index a45c84ef7632..a4d481aca312 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -123,9 +123,15 @@ namespace llvm { /// or TEST instruction. BRCOND, - /// Return with a flag operand. Operand 1 is the number of bytes of stack - /// to pop, operand 2 is the chain and operand 3 is a flag operand. + /// Return with a flag operand. Operand 1 is the chain operand, operand + /// 2 is the number of bytes of stack to pop. RET_FLAG, + + /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx. + REP_STOS, + + /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx. + REP_MOVS, }; // X86 specific condition code. These correspond to X86_*_COND in diff --git a/llvm/lib/Target/X86/X86ISelPattern.cpp b/llvm/lib/Target/X86/X86ISelPattern.cpp index 563539d09030..6ca4078047d1 100644 --- a/llvm/lib/Target/X86/X86ISelPattern.cpp +++ b/llvm/lib/Target/X86/X86ISelPattern.cpp @@ -3558,8 +3558,8 @@ void ISel::Select(SDOperand N) { Opcode = X86::REP_STOSB; } - // No matter what the alignment is, we put the source in ESI, the - // destination in EDI, and the count in ECX. + // No matter what the alignment is, we put the destination in EDI, and the + // count in ECX. unsigned TmpReg1 = SelectExpr(Node->getOperand(1)); BuildMI(BB, X86::MOV32rr, 1, X86::ECX).addReg(CountReg); BuildMI(BB, X86::MOV32rr, 1, X86::EDI).addReg(TmpReg1); diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 695dc84de84d..70baa8a3a1fb 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -52,6 +52,8 @@ def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; def SDTX86Fild64m : SDTypeProfile<1, 1, [SDTCisVT<0, f64>, SDTCisPtrTy<1>]>; +def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>; + def SDTX86RdTsc : SDTypeProfile<0, 0, []>; def X86addflag : SDNode<"X86ISD::ADD_FLAG", SDTIntBinOp , @@ -99,6 +101,11 @@ def X86fst : SDNode<"X86ISD::FST", SDTX86Fst, def X86fild64m : SDNode<"X86ISD::FILD64m", SDTX86Fild64m, [SDNPHasChain]>; +def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr, + [SDNPHasChain, SDNPInFlag]>; +def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr, + [SDNPHasChain, SDNPInFlag]>; + def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG",SDTX86RdTsc, [SDNPHasChain, SDNPOutFlag]>; @@ -549,18 +556,24 @@ def LEA32r : I<0x8D, MRMSrcMem, "lea{l} {$src|$dst}, {$dst|$src}", [(set R32:$dst, leaaddr:$src)]>; -def REP_MOVSB : I<0xA4, RawFrm, (ops), "{rep;movsb|rep movsb}", []>, +def REP_MOVSB : I<0xA4, RawFrm, (ops), "{rep;movsb|rep movsb}", + [(X86rep_movs i8)]>, Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>, REP; -def REP_MOVSW : I<0xA5, RawFrm, (ops), "{rep;movsw|rep movsw}", []>, +def REP_MOVSW : I<0xA5, RawFrm, (ops), "{rep;movsw|rep movsw}", + [(X86rep_movs i16)]>, Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>, REP, OpSize; -def REP_MOVSD : I<0xA5, RawFrm, (ops), "{rep;movsd|rep movsd}", []>, +def REP_MOVSD : I<0xA5, RawFrm, (ops), "{rep;movsd|rep movsd}", + [(X86rep_movs i32)]>, Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>, REP; -def REP_STOSB : I<0xAA, RawFrm, (ops), "{rep;stosb|rep stosb}", []>, +def REP_STOSB : I<0xAA, RawFrm, (ops), "{rep;stosb|rep stosb}", + [(X86rep_stos i8)]>, Imp<[AL,ECX,EDI], [ECX,EDI]>, REP; -def REP_STOSW : I<0xAB, RawFrm, (ops), "{rep;stosw|rep stosw}", []>, +def REP_STOSW : I<0xAB, RawFrm, (ops), "{rep;stosw|rep stosw}", + [(X86rep_stos i16)]>, Imp<[AX,ECX,EDI], [ECX,EDI]>, REP, OpSize; -def REP_STOSD : I<0xAB, RawFrm, (ops), "{rep;stosl|rep stosd}", []>, +def REP_STOSD : I<0xAB, RawFrm, (ops), "{rep;stosl|rep stosd}", + [(X86rep_stos i32)]>, Imp<[EAX,ECX,EDI], [ECX,EDI]>, REP;