forked from OSchip/llvm-project
fix memcpy/memset/memmove lowering when optimizing for size
Fixing MinSize attribute handling was discussed in D11363. This is a prerequisite patch to doing that. The handling of OptSize when lowering mem* functions was broken on Darwin because it wants to ignore -Os for these cases, but the existing logic also made it ignore -Oz (MinSize). The Linux change demonstrates a widespread problem. The backend doesn't usually recognize the MinSize attribute by itself; it assumes that if the MinSize attribute exists, then the OptSize attribute must also exist. Fixing this more generally will be a follow-on patch or two. Differential Revision: http://reviews.llvm.org/D11568 llvm-svn: 243693
This commit is contained in:
parent
0deb694d94
commit
1166f2ff9f
|
@ -4151,6 +4151,18 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
|
||||
const Function *F = MF.getFunction();
|
||||
bool HasMinSize = F->hasFnAttribute(Attribute::MinSize);
|
||||
bool HasOptSize = F->hasFnAttribute(Attribute::OptimizeForSize);
|
||||
|
||||
// On Darwin, -Os means optimize for size without hurting performance, so
|
||||
// only really optimize for size when -Oz (MinSize) is used.
|
||||
if (MF.getTarget().getTargetTriple().isOSDarwin())
|
||||
return HasMinSize;
|
||||
return HasOptSize || HasMinSize;
|
||||
}
|
||||
|
||||
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
|
||||
SDValue Chain, SDValue Dst,
|
||||
SDValue Src, uint64_t Size,
|
||||
|
@ -4171,7 +4183,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
|
|||
bool DstAlignCanChange = false;
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
|
||||
bool OptSize = shouldLowerMemFuncForSize(MF);
|
||||
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
|
||||
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
|
||||
DstAlignCanChange = true;
|
||||
|
@ -4284,7 +4296,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
|
|||
bool DstAlignCanChange = false;
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
|
||||
bool OptSize = shouldLowerMemFuncForSize(MF);
|
||||
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
|
||||
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
|
||||
DstAlignCanChange = true;
|
||||
|
@ -4378,7 +4390,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
|
|||
bool DstAlignCanChange = false;
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
|
||||
bool OptSize = shouldLowerMemFuncForSize(MF);
|
||||
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
|
||||
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
|
||||
DstAlignCanChange = true;
|
||||
|
|
|
@ -964,11 +964,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
|||
|
||||
//// temporary - rewrite interface to use type
|
||||
MaxStoresPerMemset = 8;
|
||||
MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
|
||||
MaxStoresPerMemsetOptSize = 4;
|
||||
MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
|
||||
MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
|
||||
MaxStoresPerMemcpyOptSize = 2;
|
||||
MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
|
||||
MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
|
||||
MaxStoresPerMemmoveOptSize = 2;
|
||||
|
||||
// On ARM arguments smaller than 4 bytes are extended, so all arguments
|
||||
// are at least 4 bytes aligned.
|
||||
|
|
|
@ -1723,14 +1723,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
|
||||
computeRegisterProperties(Subtarget->getRegisterInfo());
|
||||
|
||||
// On Darwin, -Os means optimize for size without hurting performance,
|
||||
// do not reduce the limit.
|
||||
MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
|
||||
MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
|
||||
MaxStoresPerMemsetOptSize = 8;
|
||||
MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
|
||||
MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
|
||||
MaxStoresPerMemcpyOptSize = 4;
|
||||
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
|
||||
MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
|
||||
MaxStoresPerMemmoveOptSize = 4;
|
||||
setPrefLoopAlignment(4); // 2^4 bytes.
|
||||
|
||||
// Predictable cmov don't hurt on atom because it's in-order.
|
||||
|
|
|
@ -59,48 +59,16 @@ entry:
|
|||
; DARWIN: movq
|
||||
}
|
||||
|
||||
; FIXME: Both Linux and Darwin should lower to a memcpy call; minsize is on.
|
||||
define void @test3_minsize(i8* nocapture %A, i8* nocapture %B) nounwind minsize noredzone {
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
|
||||
ret void
|
||||
; LINUX-LABEL: test3_minsize:
|
||||
; LINUX: movq
|
||||
; LINUX: movq
|
||||
; LINUX: movq
|
||||
; LINUX: movq
|
||||
; LINUX: movq
|
||||
; LINUX: movq
|
||||
; LINUX: movq
|
||||
; LINUX: movq
|
||||
; LINUX: movq
|
||||
; LINUX: movq
|
||||
; LINUX: movq
|
||||
; LINUX: movq
|
||||
; LINUX: movq
|
||||
; LINUX: movq
|
||||
; LINUX: movq
|
||||
; LINUX: movq
|
||||
; LINUX: memcpy
|
||||
|
||||
; DARWIN-LABEL: test3_minsize:
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: memcpy
|
||||
}
|
||||
|
||||
; FIXME: Darwin should lower to a memcpy call; minsize is on.
|
||||
define void @test3_minsize_optsize(i8* nocapture %A, i8* nocapture %B) nounwind optsize minsize noredzone {
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
|
||||
ret void
|
||||
|
@ -108,22 +76,7 @@ define void @test3_minsize_optsize(i8* nocapture %A, i8* nocapture %B) nounwind
|
|||
; LINUX: memcpy
|
||||
|
||||
; DARWIN-LABEL: test3_minsize_optsize:
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: memcpy
|
||||
}
|
||||
|
||||
; Large constant memcpy's should be inlined when not optimizing for size.
|
||||
|
|
Loading…
Reference in New Issue