forked from OSchip/llvm-project
[AArch64] Fix i128 cmpxchg using ldxp/stxp.
Basically two parts to this fix: 1. Stop using AtomicExpand to expand cmpxchg i128 2. Fix AArch64ExpandPseudoInsts to use a correct expansion. From ARM architecture reference: To atomically load two 64-bit quantities, perform a Load-Exclusive pair/Store-Exclusive pair sequence of reading and writing the same value for which the Store-Exclusive pair succeeds, and use the read values from the Load-Exclusive pair. Fixes https://bugs.llvm.org/show_bug.cgi?id=51102 Differential Revision: https://reviews.llvm.org/D106039
This commit is contained in:
parent
d51f74acdf
commit
843c614058
|
@ -279,21 +279,46 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
|
|||
Register NewLoReg = MI.getOperand(6).getReg();
|
||||
Register NewHiReg = MI.getOperand(7).getReg();
|
||||
|
||||
unsigned LdxpOp, StxpOp;
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
case AArch64::CMP_SWAP_128_MONOTONIC:
|
||||
LdxpOp = AArch64::LDXPX;
|
||||
StxpOp = AArch64::STXPX;
|
||||
break;
|
||||
case AArch64::CMP_SWAP_128_RELEASE:
|
||||
LdxpOp = AArch64::LDXPX;
|
||||
StxpOp = AArch64::STLXPX;
|
||||
break;
|
||||
case AArch64::CMP_SWAP_128_ACQUIRE:
|
||||
LdxpOp = AArch64::LDAXPX;
|
||||
StxpOp = AArch64::STXPX;
|
||||
break;
|
||||
case AArch64::CMP_SWAP_128:
|
||||
LdxpOp = AArch64::LDAXPX;
|
||||
StxpOp = AArch64::STLXPX;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unexpected opcode");
|
||||
}
|
||||
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
|
||||
MF->insert(++MBB.getIterator(), LoadCmpBB);
|
||||
MF->insert(++LoadCmpBB->getIterator(), StoreBB);
|
||||
MF->insert(++StoreBB->getIterator(), DoneBB);
|
||||
MF->insert(++StoreBB->getIterator(), FailBB);
|
||||
MF->insert(++FailBB->getIterator(), DoneBB);
|
||||
|
||||
// .Lloadcmp:
|
||||
// ldaxp xDestLo, xDestHi, [xAddr]
|
||||
// cmp xDestLo, xDesiredLo
|
||||
// sbcs xDestHi, xDesiredHi
|
||||
// b.ne .Ldone
|
||||
BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX))
|
||||
BuildMI(LoadCmpBB, DL, TII->get(LdxpOp))
|
||||
.addReg(DestLo.getReg(), RegState::Define)
|
||||
.addReg(DestHi.getReg(), RegState::Define)
|
||||
.addReg(AddrReg);
|
||||
|
@ -315,23 +340,37 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
|
|||
.addImm(AArch64CC::EQ);
|
||||
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
|
||||
.addUse(StatusReg, getKillRegState(StatusDead))
|
||||
.addMBB(DoneBB);
|
||||
LoadCmpBB->addSuccessor(DoneBB);
|
||||
.addMBB(FailBB);
|
||||
LoadCmpBB->addSuccessor(FailBB);
|
||||
LoadCmpBB->addSuccessor(StoreBB);
|
||||
|
||||
// .Lstore:
|
||||
// stlxp wStatus, xNewLo, xNewHi, [xAddr]
|
||||
// cbnz wStatus, .Lloadcmp
|
||||
BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
|
||||
BuildMI(StoreBB, DL, TII->get(StxpOp), StatusReg)
|
||||
.addReg(NewLoReg)
|
||||
.addReg(NewHiReg)
|
||||
.addReg(AddrReg);
|
||||
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
|
||||
.addReg(StatusReg, getKillRegState(StatusDead))
|
||||
.addMBB(LoadCmpBB);
|
||||
BuildMI(StoreBB, DL, TII->get(AArch64::B)).addMBB(DoneBB);
|
||||
StoreBB->addSuccessor(LoadCmpBB);
|
||||
StoreBB->addSuccessor(DoneBB);
|
||||
|
||||
// .Lfail:
|
||||
// stlxp wStatus, xDestLo, xDestHi, [xAddr]
|
||||
// cbnz wStatus, .Lloadcmp
|
||||
BuildMI(FailBB, DL, TII->get(StxpOp), StatusReg)
|
||||
.addReg(DestLo.getReg())
|
||||
.addReg(DestHi.getReg())
|
||||
.addReg(AddrReg);
|
||||
BuildMI(FailBB, DL, TII->get(AArch64::CBNZW))
|
||||
.addReg(StatusReg, getKillRegState(StatusDead))
|
||||
.addMBB(LoadCmpBB);
|
||||
FailBB->addSuccessor(LoadCmpBB);
|
||||
FailBB->addSuccessor(DoneBB);
|
||||
|
||||
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
|
||||
DoneBB->transferSuccessors(&MBB);
|
||||
|
||||
|
@ -343,9 +382,13 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
|
|||
// Recompute liveness bottom up.
|
||||
LivePhysRegs LiveRegs;
|
||||
computeAndAddLiveIns(LiveRegs, *DoneBB);
|
||||
computeAndAddLiveIns(LiveRegs, *FailBB);
|
||||
computeAndAddLiveIns(LiveRegs, *StoreBB);
|
||||
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
|
||||
|
||||
// Do an extra pass in the loop to get the loop carried dependencies right.
|
||||
FailBB->clearLiveIns();
|
||||
computeAndAddLiveIns(LiveRegs, *FailBB);
|
||||
StoreBB->clearLiveIns();
|
||||
computeAndAddLiveIns(LiveRegs, *StoreBB);
|
||||
LoadCmpBB->clearLiveIns();
|
||||
|
@ -1093,6 +1136,9 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
|||
AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
|
||||
AArch64::XZR, NextMBBI);
|
||||
case AArch64::CMP_SWAP_128:
|
||||
case AArch64::CMP_SWAP_128_RELEASE:
|
||||
case AArch64::CMP_SWAP_128_ACQUIRE:
|
||||
case AArch64::CMP_SWAP_128_MONOTONIC:
|
||||
return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
|
||||
|
||||
case AArch64::AESMCrrTied:
|
||||
|
|
|
@ -17008,6 +17008,7 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
|
|||
assert(N->getValueType(0) == MVT::i128 &&
|
||||
"AtomicCmpSwap on types less than 128 should be legal");
|
||||
|
||||
MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
|
||||
if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
|
||||
// LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
|
||||
// so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
|
||||
|
@ -17018,8 +17019,6 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
|
|||
N->getOperand(0), // Chain in
|
||||
};
|
||||
|
||||
MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
|
||||
|
||||
unsigned Opcode;
|
||||
switch (MemOp->getMergedOrdering()) {
|
||||
case AtomicOrdering::Monotonic:
|
||||
|
@ -17056,6 +17055,25 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
|
|||
return;
|
||||
}
|
||||
|
||||
unsigned Opcode;
|
||||
switch (MemOp->getMergedOrdering()) {
|
||||
case AtomicOrdering::Monotonic:
|
||||
Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
|
||||
break;
|
||||
case AtomicOrdering::Acquire:
|
||||
Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
|
||||
break;
|
||||
case AtomicOrdering::Release:
|
||||
Opcode = AArch64::CMP_SWAP_128_RELEASE;
|
||||
break;
|
||||
case AtomicOrdering::AcquireRelease:
|
||||
case AtomicOrdering::SequentiallyConsistent:
|
||||
Opcode = AArch64::CMP_SWAP_128;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unexpected ordering!");
|
||||
}
|
||||
|
||||
auto Desired = splitInt128(N->getOperand(2), DAG);
|
||||
auto New = splitInt128(N->getOperand(3), DAG);
|
||||
SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
|
||||
|
@ -17063,8 +17081,6 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
|
|||
SDNode *CmpSwap = DAG.getMachineNode(
|
||||
AArch64::CMP_SWAP_128, SDLoc(N),
|
||||
DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other), Ops);
|
||||
|
||||
MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
|
||||
DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
|
||||
|
||||
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
|
||||
|
@ -17285,6 +17301,13 @@ AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
|
|||
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
|
||||
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
|
||||
return AtomicExpansionKind::None;
|
||||
|
||||
// 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
|
||||
// it.
|
||||
unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
|
||||
if (Size > 64)
|
||||
return AtomicExpansionKind::None;
|
||||
|
||||
return AtomicExpansionKind::LLSC;
|
||||
}
|
||||
|
||||
|
|
|
@ -429,11 +429,16 @@ def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch),
|
|||
}
|
||||
|
||||
let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch",
|
||||
mayLoad = 1, mayStore = 1 in
|
||||
def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch),
|
||||
mayLoad = 1, mayStore = 1 in {
|
||||
class cmp_swap_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32common:$scratch),
|
||||
(ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
|
||||
GPR64:$newLo, GPR64:$newHi), []>,
|
||||
Sched<[WriteAtomic]>;
|
||||
def CMP_SWAP_128 : cmp_swap_128;
|
||||
def CMP_SWAP_128_RELEASE : cmp_swap_128;
|
||||
def CMP_SWAP_128_ACQUIRE : cmp_swap_128;
|
||||
def CMP_SWAP_128_MONOTONIC : cmp_swap_128;
|
||||
}
|
||||
|
||||
// v8.1 Atomic instructions:
|
||||
let Predicates = [HasLSE] in {
|
||||
|
|
|
@ -1210,8 +1210,26 @@ bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
|
|||
// The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
|
||||
// can take arbitrary registers so it just has the normal GPR64 operands the
|
||||
// rest of AArch64 is expecting.
|
||||
auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
|
||||
unsigned Opcode;
|
||||
switch (Ordering) {
|
||||
case AtomicOrdering::Acquire:
|
||||
Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
|
||||
break;
|
||||
case AtomicOrdering::Release:
|
||||
Opcode = AArch64::CMP_SWAP_128_RELEASE;
|
||||
break;
|
||||
case AtomicOrdering::AcquireRelease:
|
||||
case AtomicOrdering::SequentiallyConsistent:
|
||||
Opcode = AArch64::CMP_SWAP_128;
|
||||
break;
|
||||
default:
|
||||
Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
|
||||
break;
|
||||
}
|
||||
|
||||
auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
|
||||
CAS = MIRBuilder.buildInstr(AArch64::CMP_SWAP_128, {DstLo, DstHi, Scratch},
|
||||
CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
|
||||
{Addr, DesiredI->getOperand(0),
|
||||
DesiredI->getOperand(1), NewI->getOperand(0),
|
||||
NewI->getOperand(1)});
|
||||
|
|
|
@ -24,7 +24,7 @@ define void @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
|
|||
; CHECK-LLSC-O0: cmp [[OLD_HI]], x3
|
||||
; CHECK-LLSC-O0: cinc [[EQUAL:w[0-9]+]], [[EQUAL_TMP]], ne
|
||||
; CHECK-LLSC-O0: cbnz [[EQUAL]], .LBB0_3
|
||||
; CHECK-LLSC-O0: stlxp [[STATUS:w[0-9]+]], x4, x5, [x0]
|
||||
; CHECK-LLSC-O0: stxp [[STATUS:w[0-9]+]], x4, x5, [x0]
|
||||
; CHECK-LLSC-O0: cbnz [[STATUS]], .LBB0_1
|
||||
; CHECK-LLSC-O0: .LBB0_3:
|
||||
; CHECK-LLSC-O0: mov v[[OLD:[0-9]+]].d[0], [[OLD_LO]]
|
||||
|
|
|
@ -23,7 +23,7 @@ body: |
|
|||
; CHECK: [[COPY6:%[0-9]+]]:gpr64(s64) = COPY [[COPY2]](s64)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:gpr64(s64) = COPY [[COPY3]](s64)
|
||||
; CHECK: [[COPY8:%[0-9]+]]:gpr64(s64) = COPY [[COPY4]](s64)
|
||||
; CHECK: early-clobber %13:gpr64(s64), early-clobber %14:gpr64(s64), early-clobber %16:gpr32 = CMP_SWAP_128 [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire 16)
|
||||
; CHECK: early-clobber %13:gpr64(s64), early-clobber %14:gpr64(s64), early-clobber %16:gpr32common = CMP_SWAP_128_ACQUIRE [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire 16)
|
||||
; CHECK: [[COPY9:%[0-9]+]]:gpr64 = COPY %16
|
||||
; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES %13(s64), %14(s64)
|
||||
; CHECK: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16)
|
||||
|
@ -39,7 +39,7 @@ body: |
|
|||
; CHECK-NOLSE: [[COPY6:%[0-9]+]]:gpr64(s64) = COPY [[COPY2]](s64)
|
||||
; CHECK-NOLSE: [[COPY7:%[0-9]+]]:gpr64(s64) = COPY [[COPY3]](s64)
|
||||
; CHECK-NOLSE: [[COPY8:%[0-9]+]]:gpr64(s64) = COPY [[COPY4]](s64)
|
||||
; CHECK-NOLSE: early-clobber %13:gpr64(s64), early-clobber %14:gpr64(s64), early-clobber %16:gpr32 = CMP_SWAP_128 [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire (s128))
|
||||
; CHECK-NOLSE: early-clobber %13:gpr64(s64), early-clobber %14:gpr64(s64), early-clobber %16:gpr32common = CMP_SWAP_128_ACQUIRE [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire (s128))
|
||||
; CHECK-NOLSE: [[COPY9:%[0-9]+]]:gpr64 = COPY %16
|
||||
; CHECK-NOLSE: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES %13(s64), %14(s64)
|
||||
; CHECK-NOLSE: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128))
|
||||
|
|
|
@ -8,11 +8,16 @@ define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
|
|||
; CHECK-LABEL: val_compare_and_swap:
|
||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: ldaxp [[RESULTLO:x[0-9]+]], [[RESULTHI:x[0-9]+]], [x[[ADDR:[0-9]+]]]
|
||||
; CHECK-DAG: eor [[MISMATCH_LO:x[0-9]+]], [[RESULTLO]], x2
|
||||
; CHECK-DAG: eor [[MISMATCH_HI:x[0-9]+]], [[RESULTHI]], x3
|
||||
; CHECK: orr [[MISMATCH:x[0-9]+]], [[MISMATCH_LO]], [[MISMATCH_HI]]
|
||||
; CHECK: cbnz [[MISMATCH]], [[DONE:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: stxp [[SCRATCH_RES:w[0-9]+]], x4, x5, [x[[ADDR]]]
|
||||
; CHECK: cmp
|
||||
; CHECK: cset
|
||||
; CHECK: cmp
|
||||
; CHECK: cinc [[MISMATCH:w[0-9]+]]
|
||||
; CHECK: cbz [[MISMATCH]], [[EQUAL:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], [[RESULTLO]], [[RESULTHI]], [x[[ADDR]]]
|
||||
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
|
||||
; CHECK: b [[DONE:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: [[EQUAL]]:
|
||||
; CHECK: stlxp [[SCRATCH_RES:w[0-9]+]], x4, x5, [x[[ADDR]]]
|
||||
; CHECK: cbnz [[SCRATCH_RES]], [[LABEL]]
|
||||
; CHECK: [[DONE]]:
|
||||
%pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
|
||||
|
|
|
@ -234,7 +234,12 @@ define i128 @test_rmw_add_128(i128* %dst) {
|
|||
; NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2
|
||||
; NOLSE-NEXT: stlxp w12, x14, x15, [x13]
|
||||
; NOLSE-NEXT: cbnz w12, .LBB4_2
|
||||
; NOLSE-NEXT: b .LBB4_5
|
||||
; NOLSE-NEXT: .LBB4_4: // %atomicrmw.start
|
||||
; NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2
|
||||
; NOLSE-NEXT: stlxp w12, x10, x9, [x13]
|
||||
; NOLSE-NEXT: cbnz w12, .LBB4_2
|
||||
; NOLSE-NEXT: .LBB4_5: // %atomicrmw.start
|
||||
; NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1
|
||||
; NOLSE-NEXT: eor x11, x9, x11
|
||||
; NOLSE-NEXT: eor x8, x10, x8
|
||||
|
@ -244,8 +249,8 @@ define i128 @test_rmw_add_128(i128* %dst) {
|
|||
; NOLSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill
|
||||
; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill
|
||||
; NOLSE-NEXT: cbnz x8, .LBB4_1
|
||||
; NOLSE-NEXT: b .LBB4_5
|
||||
; NOLSE-NEXT: .LBB4_5: // %atomicrmw.end
|
||||
; NOLSE-NEXT: b .LBB4_6
|
||||
; NOLSE-NEXT: .LBB4_6: // %atomicrmw.end
|
||||
; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload
|
||||
; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload
|
||||
; NOLSE-NEXT: add sp, sp, #48 // =48
|
||||
|
@ -630,7 +635,12 @@ define i128 @test_rmw_nand_128(i128* %dst) {
|
|||
; NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=2
|
||||
; NOLSE-NEXT: stlxp w12, x14, x15, [x13]
|
||||
; NOLSE-NEXT: cbnz w12, .LBB9_2
|
||||
; NOLSE-NEXT: b .LBB9_5
|
||||
; NOLSE-NEXT: .LBB9_4: // %atomicrmw.start
|
||||
; NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=2
|
||||
; NOLSE-NEXT: stlxp w12, x10, x9, [x13]
|
||||
; NOLSE-NEXT: cbnz w12, .LBB9_2
|
||||
; NOLSE-NEXT: .LBB9_5: // %atomicrmw.start
|
||||
; NOLSE-NEXT: // in Loop: Header=BB9_1 Depth=1
|
||||
; NOLSE-NEXT: eor x11, x9, x11
|
||||
; NOLSE-NEXT: eor x8, x10, x8
|
||||
|
@ -640,8 +650,8 @@ define i128 @test_rmw_nand_128(i128* %dst) {
|
|||
; NOLSE-NEXT: str x10, [sp, #32] // 8-byte Folded Spill
|
||||
; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill
|
||||
; NOLSE-NEXT: cbnz x8, .LBB9_1
|
||||
; NOLSE-NEXT: b .LBB9_5
|
||||
; NOLSE-NEXT: .LBB9_5: // %atomicrmw.end
|
||||
; NOLSE-NEXT: b .LBB9_6
|
||||
; NOLSE-NEXT: .LBB9_6: // %atomicrmw.end
|
||||
; NOLSE-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload
|
||||
; NOLSE-NEXT: ldr x0, [sp, #16] // 8-byte Folded Reload
|
||||
; NOLSE-NEXT: add sp, sp, #48 // =48
|
||||
|
|
Loading…
Reference in New Issue