From 00c0c8c87d300406d8121bcdf02ae75b9b1af9c3 Mon Sep 17 00:00:00 2001 From: Shimin Cui Date: Wed, 31 Mar 2021 12:36:49 -0400 Subject: [PATCH] =?UTF-8?q?[PowerPC]=C2=A0[MLICM]=20Enable=20hoisting=20of?= =?UTF-8?q?=20caller=20preserved=20registers=20on=20AIX?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ppc64 linux , MachineLICM will hoist caller preserved registers, including TOC loads of the global variable address, out of loops. This is to enable this on AIX for both ppc64 and ppc32. Differential Revision: https://reviews.llvm.org/D99076 --- llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 16 ++++--- llvm/test/CodeGen/PowerPC/licm-tocReg.ll | 46 +++++++++++++-------- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index cb3d3d200bf7..d0c40842a45c 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -403,22 +403,20 @@ bool PPCRegisterInfo::isCallerPreservedPhysReg(MCRegister PhysReg, assert(Register::isPhysicalRegister(PhysReg)); const PPCSubtarget &Subtarget = MF.getSubtarget(); const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (!TM.isPPC64()) - return false; - if (!Subtarget.isSVR4ABI()) + if (!Subtarget.is64BitELFABI() && !Subtarget.isAIXABI()) return false; - if (PhysReg == PPC::X2) - // X2 is guaranteed to be preserved within a function if it is reserved. + if (PhysReg == Subtarget.getTOCPointerRegister()) + // X2/R2 is guaranteed to be preserved within a function if it is reserved. // The reason it's reserved is that it's the TOC pointer (and the function // uses the TOC). In functions where it isn't reserved (i.e. leaf functions // with no TOC access), we can't claim that it is preserved. - return (getReservedRegs(MF).test(PPC::X2)); - if (StackPtrConst && (PhysReg == PPC::X1) && !MFI.hasVarSizedObjects() - && !MFI.hasOpaqueSPAdjustment()) + return (getReservedRegs(MF).test(PhysReg)); + if (StackPtrConst && PhysReg == Subtarget.getStackPointerRegister() && + !MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment()) // The value of the stack pointer does not change within a function after // the prologue and before the epilogue if there are no dynamic allocations - // and no inline asm which clobbers X1. + // and no inline asm which clobbers X1/R1. return true; return false; } diff --git a/llvm/test/CodeGen/PowerPC/licm-tocReg.ll b/llvm/test/CodeGen/PowerPC/licm-tocReg.ll index faad41a56ad0..2772e925ca25 100644 --- a/llvm/test/CodeGen/PowerPC/licm-tocReg.ll +++ b/llvm/test/CodeGen/PowerPC/licm-tocReg.ll @@ -1,4 +1,6 @@ -; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck -check-prefixes=CHECK,CHECKLX %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff < %s | FileCheck -check-prefixes=CHECK,CHECKAIX %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck -check-prefixes=CHECK,CHECKAIX32 %s ; The instructions ADDIStocHA8/LDtocL are used to calculate the address of ; globals. The ones that are in bb.3.if.end could not be hoisted by Machine @@ -65,22 +67,32 @@ define signext i32 @test(i32 (i32)* nocapture %FP) local_unnamed_addr #0 { ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr 0 -; CHECK: addis 4, 2, .LC0@toc@ha -; CHECK-NEXT: addis 5, 2, .LC1@toc@ha -; CHECK-NEXT: mr 12, 3 -; CHECK-NEXT: ld 4, .LC0@toc@l(4) -; CHECK-NEXT: ld 5, .LC1@toc@l(5) -; CHECK-NEXT: lwz 6, 0(4) -; CHECK-NEXT: lwz 7, 0(5) -; CHECK-NEXT: cmpw 6, 7 -; CHECK-NEXT: lwz 6, 0(4) -; CHECK-NEXT: bgt 0, .LBB0_2 -; CHECK-NOT: addis {{[0-9]+}}, 2, .LC0@toc@ha -; CHECK-NOT: addis {{[0-9]+}}, 2, .LC1@toc@ha -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB0_1: # %if.end -; CHECK-NOT: addis {{[0-9]+}}, 2, .LC0@toc@ha -; CHECK-NOT: addis {{[0-9]+}}, 2, .LC1@toc@ha +; CHECKLX: addis 4, 2, .LC0@toc@ha +; CHECKLX-NEXT: addis 5, 2, .LC1@toc@ha +; CHECKLX-NEXT: mr 12, 3 +; CHECKLX-NEXT: ld 4, .LC0@toc@l(4) +; CHECKLX-NEXT: ld 5, .LC1@toc@l(5) +; CHECKLX-NEXT: lwz 6, 0(4) +; CHECKLX-NEXT: lwz 7, 0(5) +; CHECKLX-NEXT: cmpw 6, 7 +; CHECKLX-NEXT: lwz 6, 0(4) +; CHECKLX-NEXT: bgt 0, .LBB0_2 +; CHECKLX-NOT: addis {{[0-9]+}}, 2, .LC0@toc@ha +; CHECKLX-NOT: addis {{[0-9]+}}, 2, .LC1@toc@ha +; CHECKLX-NEXT: .p2align 5 +; CHECKLX-NEXT: .LBB0_1: # %if.end +; CHECKLX-NOT: addis {{[0-9]+}}, 2, .LC0@toc@ha +; CHECKLX-NOT: addis {{[0-9]+}}, 2, .LC1@toc@ha +; CHECKAIX: ld 5, L..C0(2) +; CHECKAIX-NEXT: ld 6, L..C1(2) +; CHECKAIX-NEXT: L..BB0_1: # %if.end +; CHECKAIX-NOT: ld {{[0-9]+}}, L..C0(2) +; CHECKAIX-NOT: ld {{[0-9]+}}, L..C1(2) +; CHECKAIX32: lwz 5, L..C0(2) +; CHECKAIX32-NEXT: lwz 6, L..C1(2) +; CHECKAIX32-NEXT: L..BB0_1: # %if.end +; CHECKAIX32-NOT: lwz 5, L..C0(2) +; CHECKAIX32-NOT: lwz 6, L..C1(2) ; CHECK: blr entry: %0 = load volatile i32, i32* @ga, align 4