forked from OSchip/llvm-project
[SystemZ] Implement memcpy of variable length with MVC.
Instead of making a memcpy libcall, emit an MVC loop and an EXRL instruction the same way as is already done for memset 0. Review: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D106874
This commit is contained in:
parent
be26e6ff73
commit
c6c13c58ee
|
@ -8532,6 +8532,7 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
|
|||
return emitAtomicCmpSwapW(MI, MBB);
|
||||
case SystemZ::MVCSequence:
|
||||
case SystemZ::MVCLoop:
|
||||
case SystemZ::MVCLoopVarLen:
|
||||
return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
|
||||
case SystemZ::NCSequence:
|
||||
case SystemZ::NCLoop:
|
||||
|
|
|
@ -45,6 +45,17 @@ static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence,
|
|||
DAG.getConstant(Size, DL, PtrVT));
|
||||
}
|
||||
|
||||
static SDValue emitMemMemVarLen(SelectionDAG &DAG, const SDLoc &DL,
|
||||
unsigned Loop, SDValue Chain, SDValue Dst,
|
||||
SDValue Src, SDValue Size) {
|
||||
SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64,
|
||||
DAG.getZExtOrTrunc(Size, DL, MVT::i64),
|
||||
DAG.getConstant(-1, DL, MVT::i64));
|
||||
SDValue TripC = DAG.getNode(ISD::SRL, DL, MVT::i64, LenMinus1,
|
||||
DAG.getConstant(8, DL, MVT::i64));
|
||||
return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src, LenMinus1, TripC);
|
||||
}
|
||||
|
||||
SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
|
||||
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
|
||||
SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline,
|
||||
|
@ -55,7 +66,8 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
|
|||
if (auto *CSize = dyn_cast<ConstantSDNode>(Size))
|
||||
return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
|
||||
Chain, Dst, Src, CSize->getZExtValue());
|
||||
return SDValue();
|
||||
|
||||
return emitMemMemVarLen(DAG, DL, SystemZISD::MVC_LOOP, Chain, Dst, Src, Size);
|
||||
}
|
||||
|
||||
// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by
|
||||
|
@ -140,16 +152,10 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
|
|||
}
|
||||
|
||||
// Variable length
|
||||
if (CByte && CByte->getZExtValue() == 0) {
|
||||
if (CByte && CByte->getZExtValue() == 0)
|
||||
// Handle the special case of a variable length memset of 0 with XC.
|
||||
SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64,
|
||||
DAG.getZExtOrTrunc(Size, DL, MVT::i64),
|
||||
DAG.getConstant(-1, DL, MVT::i64));
|
||||
SDValue TripC = DAG.getNode(ISD::SRL, DL, MVT::i64, LenMinus1,
|
||||
DAG.getConstant(8, DL, MVT::i64));
|
||||
return DAG.getNode(SystemZISD::XC_LOOP, DL, MVT::Other, Chain, Dst, Dst,
|
||||
LenMinus1, TripC);
|
||||
}
|
||||
return emitMemMemVarLen(DAG, DL, SystemZISD::XC_LOOP, Chain, Dst, Dst, Size);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r
|
|||
|
||||
define void @fun0(%0*) {
|
||||
; CHECK-LABEL: .LBB0_4
|
||||
; CHECK: => This Inner Loop Header: Depth=2
|
||||
; CHECK: => This Inner Loop Header
|
||||
; CHECK-NOT: 16-byte Folded Spill
|
||||
; CHECK-NOT: 16-byte Folded Reload
|
||||
|
||||
|
|
|
@ -217,3 +217,28 @@ define void @f16() {
|
|||
call void @foo(i8* %dest, i8* %src)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test a variable length loop.
|
||||
define void @f17(i8* %dest, i8* %src, i64 %Len) {
|
||||
; CHECK-LABEL: f17:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: aghi %r4, -1
|
||||
; CHECK-NEXT: cgibe %r4, -1, 0(%r14)
|
||||
; CHECK-NEXT: .LBB16_1:
|
||||
; CHECK-NEXT: srlg %r0, %r4, 8
|
||||
; CHECK-NEXT: cgije %r0, 0, .LBB16_3
|
||||
; CHECK-NEXT: .LBB16_2: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: pfd 2, 768(%r2)
|
||||
; CHECK-NEXT: mvc 0(256,%r2), 0(%r3)
|
||||
; CHECK-NEXT: la %r2, 256(%r2)
|
||||
; CHECK-NEXT: la %r3, 256(%r3)
|
||||
; CHECK-NEXT: brctg %r0, .LBB16_2
|
||||
; CHECK-NEXT: .LBB16_3:
|
||||
; CHECK-NEXT: exrl %r4, .Ltmp0
|
||||
; CHECK-NEXT: br %r14
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 %Len, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: .Ltmp0:
|
||||
; CHECK-NEXT: mvc 0(1,%r2), 0(%r3)
|
||||
|
|
|
@ -1,13 +1,5 @@
|
|||
; RUN: llc -mtriple=s390x-linux-gnu < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: tail_memcpy:
|
||||
; CHECK: jg memcpy
|
||||
define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
|
||||
entry:
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: tail_memmove:
|
||||
; CHECK: jg memmove
|
||||
define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
|
||||
|
|
Loading…
Reference in New Issue