[SystemZ] Copy Access registers and CC with the correct register class.

On SystemZ there are a set of "access registers" that can be copied in and
out of 32-bit GPRs with special instructions. These instructions can only
perform the copy using low 32-bit parts of the 64-bit GPRs. However, the
default register class for 32-bit integers is GRX32, which also contains the
high 32-bit part registers.

In order to never end up with a case of such a COPY into a high reg, this
patch adds a new simple pre-RA pass that selects such COPYs into target
instructions.

This pass also handles COPYs from CC (Condition Code register), and COPYs to
CC can now also be emitted from a high reg in copyPhysReg().

Fixes: https://bugs.llvm.org/show_bug.cgi?id=44254

Review: Ulrich Weigand.

Differential Revision: https://reviews.llvm.org/D75014
This commit is contained in:
Jonas Paulsson 2020-02-20 17:42:51 -08:00
parent 573e077699
commit ae4d39c9e4
9 changed files with 234 additions and 17 deletions

View File

@ -16,6 +16,7 @@ add_llvm_target(SystemZCodeGen
SystemZAsmPrinter.cpp
SystemZCallingConv.cpp
SystemZConstantPoolValue.cpp
SystemZCopyPhysRegs.cpp
SystemZElimCompare.cpp
SystemZFrameLowering.cpp
SystemZHazardRecognizer.cpp

View File

@ -193,6 +193,7 @@ FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZTDCPass();
} // end namespace llvm

View File

@ -0,0 +1,120 @@
//===---------- SystemZPhysRegCopy.cpp - Handle phys reg copies -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass makes sure that a COPY of a physical register will be
// implementable after register allocation in copyPhysReg() (this could be
// done in EmitInstrWithCustomInserter() instead if COPY instructions would
// be passed to it).
//
//===----------------------------------------------------------------------===//
#include "SystemZMachineFunctionInfo.h"
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define SYSTEMZ_COPYPHYSREGS_NAME "SystemZ Copy Physregs"
namespace llvm {
void initializeSystemZCopyPhysRegsPass(PassRegistry&);
}
namespace {
class SystemZCopyPhysRegs : public MachineFunctionPass {
public:
static char ID;
SystemZCopyPhysRegs()
: MachineFunctionPass(ID), TII(nullptr), MRI(nullptr) {
initializeSystemZCopyPhysRegsPass(*PassRegistry::getPassRegistry());
}
StringRef getPassName() const override { return SYSTEMZ_COPYPHYSREGS_NAME; }
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
private:
bool visitMBB(MachineBasicBlock &MBB);
const SystemZInstrInfo *TII;
MachineRegisterInfo *MRI;
};
char SystemZCopyPhysRegs::ID = 0;
} // end anonymous namespace
INITIALIZE_PASS(SystemZCopyPhysRegs, "systemz-copy-physregs",
SYSTEMZ_COPYPHYSREGS_NAME, false, false)
FunctionPass *llvm::createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM) {
return new SystemZCopyPhysRegs();
}
void SystemZCopyPhysRegs::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
bool SystemZCopyPhysRegs::visitMBB(MachineBasicBlock &MBB) {
bool Modified = false;
// Certain special registers can only be copied from a subset of the
// default register class of the type. It is therefore necessary to create
// the target copy instructions before regalloc instead of in copyPhysReg().
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E; ) {
MachineInstr *MI = &*MBBI++;
if (!MI->isCopy())
continue;
DebugLoc DL = MI->getDebugLoc();
Register SrcReg = MI->getOperand(1).getReg();
Register DstReg = MI->getOperand(0).getReg();
if (DstReg.isVirtual() &&
(SrcReg == SystemZ::CC || SystemZ::AR32BitRegClass.contains(SrcReg))) {
Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass);
if (SrcReg == SystemZ::CC)
BuildMI(MBB, MI, DL, TII->get(SystemZ::IPM), Tmp);
else
BuildMI(MBB, MI, DL, TII->get(SystemZ::EAR), Tmp).addReg(SrcReg);
MI->getOperand(1).setReg(Tmp);
Modified = true;
}
else if (SrcReg.isVirtual() &&
SystemZ::AR32BitRegClass.contains(DstReg)) {
Register Tmp = MRI->createVirtualRegister(&SystemZ::GR32BitRegClass);
MI->getOperand(0).setReg(Tmp);
BuildMI(MBB, MBBI, DL, TII->get(SystemZ::SAR), DstReg).addReg(Tmp);
Modified = true;
}
}
return Modified;
}
bool SystemZCopyPhysRegs::runOnMachineFunction(MachineFunction &F) {
TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
MRI = &F.getRegInfo();
bool Modified = false;
for (auto &MBB : F)
Modified |= visitMBB(MBB);
return Modified;
}

View File

@ -820,18 +820,11 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
// Move CC value from/to a GR32.
if (SrcReg == SystemZ::CC) {
auto MIB = BuildMI(MBB, MBBI, DL, get(SystemZ::IPM), DestReg);
if (KillSrc) {
const MachineFunction *MF = MBB.getParent();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
MIB->addRegisterKilled(SrcReg, TRI);
}
return;
}
// Move CC value from a GR32.
if (DestReg == SystemZ::CC) {
BuildMI(MBB, MBBI, DL, get(SystemZ::TMLH))
unsigned Opcode =
SystemZ::GR32BitRegClass.contains(SrcReg) ? SystemZ::TMLH : SystemZ::TMHH;
BuildMI(MBB, MBBI, DL, get(Opcode))
.addReg(SrcReg, getKillRegState(KillSrc))
.addImm(3 << (SystemZ::IPM_CC - 16));
return;
@ -856,12 +849,6 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opcode = SystemZ::VLR;
else if (SystemZ::AR32BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::CPYA;
else if (SystemZ::AR32BitRegClass.contains(DestReg) &&
SystemZ::GR32BitRegClass.contains(SrcReg))
Opcode = SystemZ::SAR;
else if (SystemZ::GR32BitRegClass.contains(DestReg) &&
SystemZ::AR32BitRegClass.contains(SrcReg))
Opcode = SystemZ::EAR;
else
llvm_unreachable("Impossible reg-to-reg copy");

View File

@ -222,6 +222,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
bool addILPOpts() override;
void addPreRegAlloc() override;
void addPostRewrite() override;
void addPostRegAlloc() override;
void addPreSched2() override;
@ -253,6 +254,10 @@ bool SystemZPassConfig::addILPOpts() {
return true;
}
void SystemZPassConfig::addPreRegAlloc() {
addPass(createSystemZCopyPhysRegsPass(getSystemZTargetMachine()));
}
void SystemZPassConfig::addPostRewrite() {
addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
}

View File

@ -0,0 +1,24 @@
; RUN: llc < %s -mcpu=z196 -mtriple=s390x-linux-gnu -O0 \
; RUN: -stop-before=regallocfast 2>&1 | FileCheck %s
; RUN: llc < %s -mcpu=z196 -mtriple=s390x-linux-gnu -O3 \
; RUN: -stop-before=livevars 2>&1 | FileCheck %s
;
; Test that copies to/from access registers are handled before regalloc with
; GR32 regs.
@x = dso_local thread_local global i32 0, align 4
define weak_odr hidden i32* @fun0() {
; CHECK: name: fun0
; CHECK: {{%[0-9]+}}:gr32bit = EAR $a0
; CHECK: {{%[0-9]+}}:gr32bit = EAR $a1
ret i32* @x
}
define i32 @fun1() {
; CHECK: name: fun1
; CHECK: [[VREG0:%[0-9]+]]:gr32bit = COPY %0
; CHECK-NEXT: $a1 = SAR [[VREG0]]
; CHECK: {{%[0-9]+}}:gr32bit = EAR $a0
%val = call i32 asm "blah", "={a0}, {a1}" (i32 0)
ret i32 %val
}

View File

@ -0,0 +1,37 @@
; RUN: llc < %s -mcpu=z196 -mtriple=s390x-linux-gnu -O0
;
; Test that a0 and a1 are copied successfully into GR32 registers.
@x = dso_local thread_local global i32 0, align 4
define i32 @fun0(i32 signext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext) {
%8 = alloca i32, align 4
%9 = alloca i32, align 4
%10 = alloca i32, align 4
%11 = alloca i32, align 4
%12 = alloca i32, align 4
%13 = alloca i32, align 4
%14 = alloca i32, align 4
%15 = load i32, i32* @x, align 4
store i32 %0, i32* %8, align 4
store i32 %1, i32* %9, align 4
store i32 %2, i32* %10, align 4
store i32 %3, i32* %11, align 4
store i32 %4, i32* %12, align 4
store i32 %5, i32* %13, align 4
store i32 %6, i32* %14, align 4
%16 = load i32, i32* %8, align 4
%17 = add nsw i32 %15, %16
%18 = load i32, i32* %9, align 4
%19 = add nsw i32 %17, %18
%20 = load i32, i32* %10, align 4
%21 = add nsw i32 %19, %20
%22 = load i32, i32* %11, align 4
%23 = add nsw i32 %21, %22
%24 = load i32, i32* %12, align 4
%25 = add nsw i32 %23, %24
%26 = load i32, i32* %13, align 4
%27 = add nsw i32 %25, %26
%28 = load i32, i32* %14, align 4
%29 = add nsw i32 %27, %28
ret i32 %29
}

View File

@ -0,0 +1,24 @@
# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z196 -O0 -start-after=finalize-isel \
# RUN: -stop-before=regallocfast -o - %s | FileCheck %s
# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z196 -O3 -start-after=finalize-isel \
# RUN: -stop-before=livevars -o - %s | FileCheck %s
#
# Test that a COPY from CC gets implemented with an IPM to a GR32 reg.
---
name: fun0
tracksRegLiveness: true
registers:
- { id: 0, class: grx32bit }
body: |
bb.0:
liveins: $cc
; CHECK-LABEL: name: fun0
; CHECK: %1:gr32bit = IPM implicit $cc
; CHECK-NEXT: %0:grx32bit = COPY %1
; CHECK-NEXT: $r2l = COPY %0
; CHECK-NEXT: Return implicit $r2l
%0:grx32bit = COPY $cc
$r2l = COPY %0
Return implicit $r2l
...

View File

@ -0,0 +1,18 @@
# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z196 -O0 -start-before=prologepilog \
# RUN: -o - %s | FileCheck %s
#
# Test that a COPY to CC gets implemented with a tmlh or tmhh depending on
# the source register.
---
name: fun0
tracksRegLiveness: true
body: |
bb.0:
liveins: $r3l, $r4h
; CHECK-LABEL: fun0
; CHECK: tmlh %r3, 12288
; CHECK: tmhh %r4, 12288
$cc = COPY $r3l
$cc = COPY $r4h
...