From c6c13c58eebda605a9a05f1f13cac1e46407afc7 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Tue, 20 Jul 2021 20:53:22 +0200 Subject: [PATCH] [SystemZ] Implement memcpy of variable length with MVC. Instead of making a memcpy libcall, emit an MVC loop and an EXRL instruction the same way as is already done for memset 0. Review: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D106874 --- .../Target/SystemZ/SystemZISelLowering.cpp | 1 + .../SystemZ/SystemZSelectionDAGInfo.cpp | 26 ++++++++++++------- llvm/test/CodeGen/SystemZ/loop-03.ll | 2 +- llvm/test/CodeGen/SystemZ/memcpy-01.ll | 25 ++++++++++++++++++ .../SystemZ/tail-call-mem-intrinsics.ll | 8 ------ 5 files changed, 43 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index e65dfea15f0c..8b649a7d7983 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -8532,6 +8532,7 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( return emitAtomicCmpSwapW(MI, MBB); case SystemZ::MVCSequence: case SystemZ::MVCLoop: + case SystemZ::MVCLoopVarLen: return emitMemMemWrapper(MI, MBB, SystemZ::MVC); case SystemZ::NCSequence: case SystemZ::NCLoop: diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 4a9ea69d101c..aad0180a2924 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -45,6 +45,17 @@ static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence, DAG.getConstant(Size, DL, PtrVT)); } +static SDValue emitMemMemVarLen(SelectionDAG &DAG, const SDLoc &DL, + unsigned Loop, SDValue Chain, SDValue Dst, + SDValue Src, SDValue Size) { + SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64, + DAG.getZExtOrTrunc(Size, DL, MVT::i64), + DAG.getConstant(-1, DL, MVT::i64)); + SDValue TripC = DAG.getNode(ISD::SRL, DL, MVT::i64, LenMinus1, + DAG.getConstant(8, DL, MVT::i64)); + return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src, LenMinus1, TripC); +} + SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline, @@ -55,7 +66,8 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy( if (auto *CSize = dyn_cast(Size)) return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, Chain, Dst, Src, CSize->getZExtValue()); - return SDValue(); + + return emitMemMemVarLen(DAG, DL, SystemZISD::MVC_LOOP, Chain, Dst, Src, Size); } // Handle a memset of 1, 2, 4 or 8 bytes with the operands given by @@ -140,16 +152,10 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset( } // Variable length - if (CByte && CByte->getZExtValue() == 0) { + if (CByte && CByte->getZExtValue() == 0) // Handle the special case of a variable length memset of 0 with XC. - SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64, - DAG.getZExtOrTrunc(Size, DL, MVT::i64), - DAG.getConstant(-1, DL, MVT::i64)); - SDValue TripC = DAG.getNode(ISD::SRL, DL, MVT::i64, LenMinus1, - DAG.getConstant(8, DL, MVT::i64)); - return DAG.getNode(SystemZISD::XC_LOOP, DL, MVT::Other, Chain, Dst, Dst, - LenMinus1, TripC); - } + return emitMemMemVarLen(DAG, DL, SystemZISD::XC_LOOP, Chain, Dst, Dst, Size); + return SDValue(); } diff --git a/llvm/test/CodeGen/SystemZ/loop-03.ll b/llvm/test/CodeGen/SystemZ/loop-03.ll index 7ba7165cdff1..e3230436affc 100644 --- a/llvm/test/CodeGen/SystemZ/loop-03.ll +++ b/llvm/test/CodeGen/SystemZ/loop-03.ll @@ -19,7 +19,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r define void @fun0(%0*) { ; CHECK-LABEL: .LBB0_4 -; CHECK: => This Inner Loop Header: Depth=2 +; CHECK: => This Inner Loop Header ; CHECK-NOT: 16-byte Folded Spill ; CHECK-NOT: 16-byte Folded Reload diff --git a/llvm/test/CodeGen/SystemZ/memcpy-01.ll b/llvm/test/CodeGen/SystemZ/memcpy-01.ll index ee4e71b53ceb..63f334712895 100644 --- a/llvm/test/CodeGen/SystemZ/memcpy-01.ll +++ b/llvm/test/CodeGen/SystemZ/memcpy-01.ll @@ -217,3 +217,28 @@ define void @f16() { call void @foo(i8* %dest, i8* %src) ret void } + +; Test a variable length loop. +define void @f17(i8* %dest, i8* %src, i64 %Len) { +; CHECK-LABEL: f17: +; CHECK: # %bb.0: +; CHECK-NEXT: aghi %r4, -1 +; CHECK-NEXT: cgibe %r4, -1, 0(%r14) +; CHECK-NEXT: .LBB16_1: +; CHECK-NEXT: srlg %r0, %r4, 8 +; CHECK-NEXT: cgije %r0, 0, .LBB16_3 +; CHECK-NEXT: .LBB16_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: pfd 2, 768(%r2) +; CHECK-NEXT: mvc 0(256,%r2), 0(%r3) +; CHECK-NEXT: la %r2, 256(%r2) +; CHECK-NEXT: la %r3, 256(%r3) +; CHECK-NEXT: brctg %r0, .LBB16_2 +; CHECK-NEXT: .LBB16_3: +; CHECK-NEXT: exrl %r4, .Ltmp0 +; CHECK-NEXT: br %r14 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 %Len, i1 false) + ret void +} + +; CHECK: .Ltmp0: +; CHECK-NEXT: mvc 0(1,%r2), 0(%r3) diff --git a/llvm/test/CodeGen/SystemZ/tail-call-mem-intrinsics.ll b/llvm/test/CodeGen/SystemZ/tail-call-mem-intrinsics.ll index 4633175830a4..7ceab2795a68 100644 --- a/llvm/test/CodeGen/SystemZ/tail-call-mem-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/tail-call-mem-intrinsics.ll @@ -1,13 +1,5 @@ ; RUN: llc -mtriple=s390x-linux-gnu < %s | FileCheck %s -; CHECK-LABEL: tail_memcpy: -; CHECK: jg memcpy -define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { -entry: - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) - ret void -} - ; CHECK-LABEL: tail_memmove: ; CHECK: jg memmove define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {