[AArch64] Lower calls with rv_marker attribute .

This patch adds support for lowering function calls with the
rv_marker attribute. The goal is to expand such calls to the
following sequence of instructions:

    BL @fn
    mov x29, x29

This sequence of instructions triggers Objective-C runtime optimizations,
hence we want to ensure no instructions get moved in between them.
This patch achieves that by adding a new CALL_RVMARKER ISD node,
which gets turned into the BLR_RVMARKER pseudo, which eventually gets
expanded into the sequence mentioned above. The sequence is then marked
as instruction bundle, to avoid anything being moved in between.

@ahatanak is working on using this attribute in the front- & middle-end.

Together with the front- & middle-end changes, this should address
PR31925 for AArch64.

Reviewed By: t.p.northover

Differential Revision: https://reviews.llvm.org/D92569
This commit is contained in:
Florian Hahn 2020-12-11 19:37:14 +00:00
parent 32910f780d
commit a87fccb3ff
No known key found for this signature in database
GPG Key ID: 61D7554B5CECDC0D
6 changed files with 242 additions and 2 deletions

View File

@ -83,6 +83,8 @@ private:
bool expandSVESpillFill(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, unsigned Opc,
unsigned N);
bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
};
} // end anonymous namespace
@ -627,6 +629,46 @@ bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
return true;
}
bool AArch64ExpandPseudo::expandCALL_RVMARKER(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
// Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29,
// x29` marker. Mark the sequence as bundle, to avoid passes moving other code
// in between.
MachineInstr &MI = *MBBI;
MachineInstr *OriginalCall;
MachineOperand &CallTarget = MI.getOperand(0);
assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
"invalid operand for regular call");
unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
OriginalCall->addOperand(CallTarget);
unsigned RegMaskStartIdx = 1;
// Skip register arguments. Those are added during ISel, but are not
// needed for the concrete branch.
while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
assert(MI.getOperand(RegMaskStartIdx).isReg() &&
"should only skip register operands");
RegMaskStartIdx++;
}
for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx)
OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx));
auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
.addReg(AArch64::FP)
.addReg(AArch64::XZR)
.addReg(AArch64::FP)
.addImm(0)
.getInstr();
if (MI.shouldUpdateCallSiteInfo())
MBB.getParent()->moveCallSiteInfo(&MI, Marker);
MI.eraseFromParent();
finalizeBundle(MBB, OriginalCall->getIterator(),
std::next(Marker->getIterator()));
return true;
}
/// If MBBI references a pseudo instruction that should be expanded here,
/// do the expansion and return true. Otherwise return false.
bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@ -1014,6 +1056,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
case AArch64::LDR_ZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
case AArch64::BLR_RVMARKER:
return expandCALL_RVMARKER(MBB, MBBI);
}
return false;
}

View File

@ -1934,6 +1934,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::INDEX_VECTOR)
MAKE_CASE(AArch64ISD::UABD)
MAKE_CASE(AArch64ISD::SABD)
MAKE_CASE(AArch64ISD::CALL_RVMARKER)
}
#undef MAKE_CASE
return nullptr;
@ -5539,8 +5540,17 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
return Ret;
}
unsigned CallOpc = AArch64ISD::CALL;
// Calls marked with "rv_marker" are special. They should be expanded to the
// call, directly followed by a special marker sequence. Use the CALL_RVMARKER
// to do that.
if (CLI.CB && CLI.CB->hasRetAttr("rv_marker")) {
assert(!IsTailCall && "tail calls cannot be marked with rv_marker");
CallOpc = AArch64ISD::CALL_RVMARKER;
}
// Returns a chain and a flag for retval copy to use.
Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
InFlag = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));

View File

@ -417,7 +417,11 @@ enum NodeType : unsigned {
LDP,
STP,
STNP
STNP,
// Pseudo for a OBJC call that gets emitted together with a special `mov
// x29, x29` marker instruction.
CALL_RVMARKER
};
} // end namespace AArch64ISD

View File

@ -399,6 +399,12 @@ def AArch64call : SDNode<"AArch64ISD::CALL",
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
[SDNPHasChain]>;
def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
@ -2089,6 +2095,8 @@ let isCall = 1, Defs = [LR], Uses = [SP] in {
def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
Sched<[WriteBrReg]>,
PseudoInstExpansion<(BLR GPR64:$Rn)>;
def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
Sched<[WriteBrReg]>;
} // isCall
def : Pat<(AArch64call GPR64:$Rn),
@ -2098,6 +2106,10 @@ def : Pat<(AArch64call GPR64noip:$Rn),
(BLRNoIP GPR64noip:$Rn)>,
Requires<[SLSBLRMitigation]>;
def : Pat<(AArch64call_rvmarker GPR64:$Rn),
(BLR_RVMARKER GPR64:$Rn)>,
Requires<[NoSLSBLRMitigation]>;
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
} // isBranch, isTerminator, isBarrier, isIndirectBranch

View File

@ -0,0 +1,149 @@
; RUN: llc -o - %s | FileCheck --check-prefix=SELDAG --check-prefix=CHECK %s
; RUN: llc -global-isel -o - %s | FileCheck --check-prefix=GISEL --check-prefix=CHECK %s
; TODO: support marker generation with GlobalISel
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-iphoneos"
declare i8* @foo0(i32)
declare i8* @foo1()
declare void @llvm.objc.release(i8*)
declare void @objc_object(i8*)
declare void @foo2(i8*)
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
declare %struct.S* @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(1))
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
%struct.S = type { i8 }
@g = global i8* null, align 8
@fptr = global i8* ()* null, align 8
define i8* @rv_marker_1() {
; CHECK-LABEL: rv_marker_1:
; CHECK: .cfi_offset w30, -16
; CHECK-NEXT: bl foo1
; SELDAG-NEXT: mov x29, x29
; GISEL-NOT: mov x29, x29
;
entry:
%call = call "rv_marker" i8* @foo1()
ret i8* %call
}
define void @rv_marker_2_select(i32 %c) {
; CHECK-LABEL: rv_marker_2_select:
; SELDAG: cinc w0, w8, eq
; GISEL: csinc w0, w8, wzr, eq
; CHECK-NEXT: bl foo0
; SELDAG-NEXT: mov x29, x29
; CHECK-NEXT: ldr x30, [sp], #16
; CHECK-NEXT: b foo2
;
entry:
%tobool.not = icmp eq i32 %c, 0
%.sink = select i1 %tobool.not, i32 2, i32 1
%call1 = call "rv_marker" i8* @foo0(i32 %.sink)
tail call void @foo2(i8* %call1)
ret void
}
define void @rv_marker_3() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
; CHECK-LABEL: rv_marker_3
; CHECK: .cfi_offset w30, -32
; CHECK-NEXT: bl foo1
; SELDAG-NEXT: mov x29, x29
;
entry:
%call = call "rv_marker" i8* @foo1()
invoke void @objc_object(i8* %call) #5
to label %invoke.cont unwind label %lpad
invoke.cont: ; preds = %entry
tail call void @llvm.objc.release(i8* %call)
ret void
lpad: ; preds = %entry
%0 = landingpad { i8*, i32 }
cleanup
tail call void @llvm.objc.release(i8* %call)
resume { i8*, i32 } %0
}
define void @rv_marker_4() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
; CHECK-LABEL: rv_marker_4
; CHECK: .Ltmp3:
; CHECK-NEXT: bl foo1
; SELDAG-NEXT: mov x29, x29
; CHECK-NEXT: .Ltmp4:
;
entry:
%s = alloca %struct.S, align 1
%0 = getelementptr inbounds %struct.S, %struct.S* %s, i64 0, i32 0
call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %0) #2
%call = invoke "rv_marker" i8* @foo1()
to label %invoke.cont unwind label %lpad
invoke.cont: ; preds = %entry
invoke void @objc_object(i8* %call) #5
to label %invoke.cont2 unwind label %lpad1
invoke.cont2: ; preds = %invoke.cont
tail call void @llvm.objc.release(i8* %call)
%call3 = call %struct.S* @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(1) %s)
call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %0)
ret void
lpad: ; preds = %entry
%1 = landingpad { i8*, i32 }
cleanup
br label %ehcleanup
lpad1: ; preds = %invoke.cont
%2 = landingpad { i8*, i32 }
cleanup
tail call void @llvm.objc.release(i8* %call)
br label %ehcleanup
ehcleanup: ; preds = %lpad1, %lpad
%.pn = phi { i8*, i32 } [ %2, %lpad1 ], [ %1, %lpad ]
%call4 = call %struct.S* @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(1) %s)
call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %0)
resume { i8*, i32 } %.pn
}
define i8* @rv_marker_5_indirect_call() {
; CHECK-LABEL: rv_marker_5_indirect_call
; CHECK: ldr [[ADDR:x[0-9]+]], [
; CHECK-NEXT: blr [[ADDR]]
; SLEDAG-NEXT: mov x29, x29
; GISEL-NOT: mov x29, x29
;
entry:
%0 = load i8* ()*, i8* ()** @fptr, align 8
%call = call "rv_marker" i8* %0()
tail call void @foo2(i8* %call)
ret i8* %call
}
declare void @foo(i64, i64, i64)
define void @rv_marker_multiarg(i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: rv_marker_multiarg
; CHECK: mov [[TMP:x[0-9]+]], x0
; CHECK-NEXT: mov x0, x2
; CHECK-NEXT: mov x2, [[TMP]]
; CHECK-NEXT: bl foo
; SELDAG-NEXT: mov x29, x29
; GISEL-NOT: mov x29, x29
call "rv_marker" void @foo(i64 %c, i64 %b, i64 %a)
ret void
}
declare i32 @__gxx_personality_v0(...)

View File

@ -0,0 +1,21 @@
# RUN: llc -run-pass=aarch64-expand-pseudo -mtriple=arm64-apple-ios -o - -emit-call-site-info %s | FileCheck %s
# CHECK-LABEL: test_1_callsite_info
# CHECK: bb.0.entry:
# CHECK-NEXT: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $sp, implicit-def $wsp, implicit-def dead $x0, implicit $x0, implicit $sp, implicit $fp, implicit $xzr {
# CHECK-NEXT: BLR $x0, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $x0
# CHECK-NEXT: ORRXrs $fp, $xzr, $fp, 0
# CHECK-NEXT: }
# CHECK-NEXT: RET undef $lr, implicit killed $w0
---
name: test_1_callsite_info
callSites:
- {bb: 0, offset: 0, fwdArgRegs:
- { arg: 0, reg: '$x0' } }
body: |
bb.0.entry:
liveins: $lr, $x0
BLR_RVMARKER $x0, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $x0
RET_ReallyLR implicit killed $w0
...