forked from OSchip/llvm-project
[X86] Add support for using fast short rep mov for memcpy lowering.
Disabled by default behind an option. Differential Revision: https://reviews.llvm.org/D86883
This commit is contained in:
parent
72e2fbde54
commit
0ab6a15698
|
@ -3109,7 +3109,7 @@ argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
|
|||
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
|
||||
SDValue Chain, ISD::ArgFlagsTy Flags,
|
||||
SelectionDAG &DAG, const SDLoc &dl) {
|
||||
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
|
||||
SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
|
||||
|
||||
return DAG.getMemcpy(
|
||||
Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
|
||||
|
|
|
@ -24,6 +24,10 @@ using namespace llvm;
|
|||
|
||||
#define DEBUG_TYPE "x86-selectiondag-info"
|
||||
|
||||
static cl::opt<bool>
|
||||
UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false),
|
||||
cl::desc("Use fast short rep mov in memcpy lowering"));
|
||||
|
||||
bool X86SelectionDAGInfo::isBaseRegConflictPossible(
|
||||
SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
|
||||
// We cannot use TRI->hasBasePointer() until *after* we select all basic
|
||||
|
@ -306,6 +310,10 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
|
|||
const X86Subtarget &Subtarget =
|
||||
DAG.getMachineFunction().getSubtarget<X86Subtarget>();
|
||||
|
||||
// If enabled and available, use fast short rep mov.
|
||||
if (UseFSRMForMemcpy && Subtarget.hasFSRM())
|
||||
return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8);
|
||||
|
||||
/// Handle constant sizes,
|
||||
if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
|
||||
return emitConstantSizeRepmov(
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mattr=-fsrm < %s -o - | FileCheck %s --check-prefix=NOFSRM
|
||||
; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mattr=+fsrm < %s -o - | FileCheck %s --check-prefix=FSRM
|
||||
; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=haswell < %s | FileCheck %s --check-prefix=NOFSRM
|
||||
; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-client < %s | FileCheck %s --check-prefix=FSRM
|
||||
; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-server < %s | FileCheck %s --check-prefix=FSRM
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
|
||||
|
||||
define void @test1(i8* %a, i8* %b, i64 %s) nounwind {
|
||||
; NOFSRM-LABEL: test1
|
||||
; NOFSRM: # %bb.0:
|
||||
; NOFSRM: jmp memcpy
|
||||
;
|
||||
; FSRM-LABEL: test1
|
||||
; FSRM: # %bb.0:
|
||||
; FSRM-NEXT: movq %rdx, %rcx
|
||||
; FSRM-NEXT: rep;movsb (%rsi), %es:(%rdi)
|
||||
; FSRM-NEXT: retq
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %s, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that we don't crash due to a memcpy size type mismatch error ("Cannot
|
||||
; emit physreg copy instruction") in X86InstrInfo::copyPhysReg.
|
||||
%struct = type { [4096 x i8] }
|
||||
declare void @foo(%struct* byval)
|
||||
define void @test2(%struct* %x) {
|
||||
call void @foo(%struct* byval %x)
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue