Expand small memmovs using inline code. Set the X86 threshold for expanding

memmove to a more plausible value, now that it's actually being used. llvm-svn: 51696
2008-05-29 19:42:22 +00:00 · 2008-05-29 19:42:22 +00:00 · 714663ab94
parent d8734cf916
commit 714663ab94
3 changed files with 86 additions and 6 deletions
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -2695,8 +2695,8 @@ static SDOperand getMemcpyLoadsAndStores(SelectionDAG &DAG,
                                         const Value *SrcSV, uint64_t SrcSVOff){
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  // Expand memcpy to a series of store ops if the size operand falls below
+  // Expand memcpy to a series of load and store ops if the size operand falls
-  // a certain threshold.
+  // below a certain threshold.
  std::vector<MVT::ValueType> MemOps;
  uint64_t Limit = -1;
  if (!AlwaysInline)
@ -2743,6 +2743,63 @@ static SDOperand getMemcpyLoadsAndStores(SelectionDAG &DAG,
                     &OutChains[0], OutChains.size());
 }
 static SDOperand getMemmoveLoadsAndStores(SelectionDAG &DAG,
                                          SDOperand Chain, SDOperand Dst,
                                          SDOperand Src, uint64_t Size,
                                          unsigned Align, bool AlwaysInline,
                                          const Value *DstSV, uint64_t DstSVOff,
                                          const Value *SrcSV, uint64_t SrcSVOff){
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  // Expand memmove to a series of load and store ops if the size operand falls
  // below a certain threshold.
  std::vector<MVT::ValueType> MemOps;
  uint64_t Limit = -1;
  if (!AlwaysInline)
    Limit = TLI.getMaxStoresPerMemmove();
  unsigned DstAlign = Align;  // Destination alignment can change.
  if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
                                DAG, TLI))
    return SDOperand();
  std::string Str;
  uint64_t SrcOff = 0, DstOff = 0;
  SmallVector<SDOperand, 8> LoadValues;
  SmallVector<SDOperand, 8> LoadChains;
  SmallVector<SDOperand, 8> OutChains;
  unsigned NumMemOps = MemOps.size();
  for (unsigned i = 0; i < NumMemOps; i++) {
    MVT::ValueType VT = MemOps[i];
    unsigned VTSize = MVT::getSizeInBits(VT) / 8;
    SDOperand Value, Store;
    Value = DAG.getLoad(VT, Chain,
                        getMemBasePlusOffset(Src, SrcOff, DAG),
                        SrcSV, SrcSVOff + SrcOff, false, Align);
    LoadValues.push_back(Value);
    LoadChains.push_back(Value.getValue(1));
    SrcOff += VTSize;
  }
  Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
                      &LoadChains[0], LoadChains.size());
  OutChains.clear();
  for (unsigned i = 0; i < NumMemOps; i++) {
    MVT::ValueType VT = MemOps[i];
    unsigned VTSize = MVT::getSizeInBits(VT) / 8;
    SDOperand Value, Store;
    Store = DAG.getStore(Chain, LoadValues[i],
                         getMemBasePlusOffset(Dst, DstOff, DAG),
                         DstSV, DstSVOff + DstOff, false, DstAlign);
    OutChains.push_back(Store);
    DstOff += VTSize;
  }
  return DAG.getNode(ISD::TokenFactor, MVT::Other,
                     &OutChains[0], OutChains.size());
 }
 static SDOperand getMemsetStores(SelectionDAG &DAG,
                                 SDOperand Chain, SDOperand Dst,
                                 SDOperand Src, uint64_t Size,
@ -2836,9 +2893,20 @@ SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dst,
                                   const Value *DstSV, uint64_t DstSVOff,
                                   const Value *SrcSV, uint64_t SrcSVOff) {
-  // TODO: Optimize small memmove cases with simple loads and stores,
+  // Check to see if we should lower the memmove to loads and stores first.
-  // ensuring that all loads precede all stores. This can cause severe
+  // For cases within the target-specified limits, this is the best choice.
-  // register pressure, so targets should be careful with the size limit.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
  if (ConstantSize) {
    // Memmove with size zero? Just return the original chain.
    if (ConstantSize->isNullValue())
      return Chain;
    SDOperand Result =
      getMemmoveLoadsAndStores(*this, Chain, Dst, Src, ConstantSize->getValue(),
                               Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
    if (Result.Val)
      return Result;
  }
  // Then check to see if we should lower the memmove with target-specific
  // code. If the target chooses to do this, this is the next best.
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -737,7 +737,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
  // be smaller when we are in optimizing for size mode.
  maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
  maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
-  maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
+  maxStoresPerMemmove = 3; // For %llvm.memmove -> sequence of stores
  allowUnalignedMemoryAccesses = true; // x86 supports it!
  setPrefLoopAlignment(16);
 }
--- a/llvm/test/CodeGen/X86/memmove-4.ll
+++ b/llvm/test/CodeGen/X86/memmove-4.ll
@ -0,0 +1,12 @@
 ; RUN: llvm-as < %s | llc | not grep call
 target triple = "i686-pc-linux-gnu"
 define void @a(i8* %a, i8* %b) nounwind {
        %tmp2 = bitcast i8* %a to i8*
        %tmp3 = bitcast i8* %b to i8*
        tail call void @llvm.memmove.i32( i8* %tmp2, i8* %tmp3, i32 12, i32 4 )
        ret void
 }
 declare void @llvm.memmove.i32(i8*, i8*, i32, i32)