forked from OSchip/llvm-project
Expand small memmovs using inline code. Set the X86 threshold for expanding
memmove to a more plausible value, now that it's actually being used. llvm-svn: 51696
This commit is contained in:
parent
d8734cf916
commit
714663ab94
|
@ -2695,8 +2695,8 @@ static SDOperand getMemcpyLoadsAndStores(SelectionDAG &DAG,
|
||||||
const Value *SrcSV, uint64_t SrcSVOff){
|
const Value *SrcSV, uint64_t SrcSVOff){
|
||||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||||
|
|
||||||
// Expand memcpy to a series of store ops if the size operand falls below
|
// Expand memcpy to a series of load and store ops if the size operand falls
|
||||||
// a certain threshold.
|
// below a certain threshold.
|
||||||
std::vector<MVT::ValueType> MemOps;
|
std::vector<MVT::ValueType> MemOps;
|
||||||
uint64_t Limit = -1;
|
uint64_t Limit = -1;
|
||||||
if (!AlwaysInline)
|
if (!AlwaysInline)
|
||||||
|
@ -2743,6 +2743,63 @@ static SDOperand getMemcpyLoadsAndStores(SelectionDAG &DAG,
|
||||||
&OutChains[0], OutChains.size());
|
&OutChains[0], OutChains.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SDOperand getMemmoveLoadsAndStores(SelectionDAG &DAG,
|
||||||
|
SDOperand Chain, SDOperand Dst,
|
||||||
|
SDOperand Src, uint64_t Size,
|
||||||
|
unsigned Align, bool AlwaysInline,
|
||||||
|
const Value *DstSV, uint64_t DstSVOff,
|
||||||
|
const Value *SrcSV, uint64_t SrcSVOff){
|
||||||
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||||
|
|
||||||
|
// Expand memmove to a series of load and store ops if the size operand falls
|
||||||
|
// below a certain threshold.
|
||||||
|
std::vector<MVT::ValueType> MemOps;
|
||||||
|
uint64_t Limit = -1;
|
||||||
|
if (!AlwaysInline)
|
||||||
|
Limit = TLI.getMaxStoresPerMemmove();
|
||||||
|
unsigned DstAlign = Align; // Destination alignment can change.
|
||||||
|
if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
|
||||||
|
DAG, TLI))
|
||||||
|
return SDOperand();
|
||||||
|
|
||||||
|
std::string Str;
|
||||||
|
uint64_t SrcOff = 0, DstOff = 0;
|
||||||
|
|
||||||
|
SmallVector<SDOperand, 8> LoadValues;
|
||||||
|
SmallVector<SDOperand, 8> LoadChains;
|
||||||
|
SmallVector<SDOperand, 8> OutChains;
|
||||||
|
unsigned NumMemOps = MemOps.size();
|
||||||
|
for (unsigned i = 0; i < NumMemOps; i++) {
|
||||||
|
MVT::ValueType VT = MemOps[i];
|
||||||
|
unsigned VTSize = MVT::getSizeInBits(VT) / 8;
|
||||||
|
SDOperand Value, Store;
|
||||||
|
|
||||||
|
Value = DAG.getLoad(VT, Chain,
|
||||||
|
getMemBasePlusOffset(Src, SrcOff, DAG),
|
||||||
|
SrcSV, SrcSVOff + SrcOff, false, Align);
|
||||||
|
LoadValues.push_back(Value);
|
||||||
|
LoadChains.push_back(Value.getValue(1));
|
||||||
|
SrcOff += VTSize;
|
||||||
|
}
|
||||||
|
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
|
||||||
|
&LoadChains[0], LoadChains.size());
|
||||||
|
OutChains.clear();
|
||||||
|
for (unsigned i = 0; i < NumMemOps; i++) {
|
||||||
|
MVT::ValueType VT = MemOps[i];
|
||||||
|
unsigned VTSize = MVT::getSizeInBits(VT) / 8;
|
||||||
|
SDOperand Value, Store;
|
||||||
|
|
||||||
|
Store = DAG.getStore(Chain, LoadValues[i],
|
||||||
|
getMemBasePlusOffset(Dst, DstOff, DAG),
|
||||||
|
DstSV, DstSVOff + DstOff, false, DstAlign);
|
||||||
|
OutChains.push_back(Store);
|
||||||
|
DstOff += VTSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
return DAG.getNode(ISD::TokenFactor, MVT::Other,
|
||||||
|
&OutChains[0], OutChains.size());
|
||||||
|
}
|
||||||
|
|
||||||
static SDOperand getMemsetStores(SelectionDAG &DAG,
|
static SDOperand getMemsetStores(SelectionDAG &DAG,
|
||||||
SDOperand Chain, SDOperand Dst,
|
SDOperand Chain, SDOperand Dst,
|
||||||
SDOperand Src, uint64_t Size,
|
SDOperand Src, uint64_t Size,
|
||||||
|
@ -2836,9 +2893,20 @@ SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dst,
|
||||||
const Value *DstSV, uint64_t DstSVOff,
|
const Value *DstSV, uint64_t DstSVOff,
|
||||||
const Value *SrcSV, uint64_t SrcSVOff) {
|
const Value *SrcSV, uint64_t SrcSVOff) {
|
||||||
|
|
||||||
// TODO: Optimize small memmove cases with simple loads and stores,
|
// Check to see if we should lower the memmove to loads and stores first.
|
||||||
// ensuring that all loads precede all stores. This can cause severe
|
// For cases within the target-specified limits, this is the best choice.
|
||||||
// register pressure, so targets should be careful with the size limit.
|
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
|
||||||
|
if (ConstantSize) {
|
||||||
|
// Memmove with size zero? Just return the original chain.
|
||||||
|
if (ConstantSize->isNullValue())
|
||||||
|
return Chain;
|
||||||
|
|
||||||
|
SDOperand Result =
|
||||||
|
getMemmoveLoadsAndStores(*this, Chain, Dst, Src, ConstantSize->getValue(),
|
||||||
|
Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
|
||||||
|
if (Result.Val)
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
|
|
||||||
// Then check to see if we should lower the memmove with target-specific
|
// Then check to see if we should lower the memmove with target-specific
|
||||||
// code. If the target chooses to do this, this is the next best.
|
// code. If the target chooses to do this, this is the next best.
|
||||||
|
|
|
@ -737,7 +737,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||||
// be smaller when we are in optimizing for size mode.
|
// be smaller when we are in optimizing for size mode.
|
||||||
maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
|
maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
|
||||||
maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
|
maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
|
||||||
maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
|
maxStoresPerMemmove = 3; // For %llvm.memmove -> sequence of stores
|
||||||
allowUnalignedMemoryAccesses = true; // x86 supports it!
|
allowUnalignedMemoryAccesses = true; // x86 supports it!
|
||||||
setPrefLoopAlignment(16);
|
setPrefLoopAlignment(16);
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
; RUN: llvm-as < %s | llc | not grep call
|
||||||
|
|
||||||
|
target triple = "i686-pc-linux-gnu"
|
||||||
|
|
||||||
|
define void @a(i8* %a, i8* %b) nounwind {
|
||||||
|
%tmp2 = bitcast i8* %a to i8*
|
||||||
|
%tmp3 = bitcast i8* %b to i8*
|
||||||
|
tail call void @llvm.memmove.i32( i8* %tmp2, i8* %tmp3, i32 12, i32 4 )
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
|
Loading…
Reference in New Issue