From d1cd30b218897f41984cf3694fd19006b4799e8d Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Wed, 8 Jun 2016 16:53:32 +0000 Subject: [PATCH] [AArch64][RegisterBankInfo] G_OR are fine on either GPR or FPR. Teach AArch64RegisterBankInfo that G_OR can be mapped on either GPR or FPR for 64-bit or 32-bit values. Add test cases demonstrating how this information is used to coalesce a computation on a single register bank. llvm-svn: 272170 --- .../AArch64/AArch64RegisterBankInfo.cpp | 55 +++++++++ .../Target/AArch64/AArch64RegisterBankInfo.h | 8 ++ .../GlobalISel/arm64-regbankselect.mir | 104 +++++++++++++++++- 3 files changed, 166 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp index 94e2d569e712..0a1831bd9a8c 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -111,3 +112,57 @@ const RegisterBank &AArch64RegisterBankInfo::getRegBankFromRegClass( llvm_unreachable("Register class not supported"); } } + +RegisterBankInfo::InstructionMappings +AArch64RegisterBankInfo::getInstrAlternativeMappings( + const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case TargetOpcode::G_OR: { + // 32 and 64-bit or can be mapped on either FPR or + // GPR for the same cost. + const MachineFunction &MF = *MI.getParent()->getParent(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); + if (Size != 32 && Size != 64) + break; + + // If the instruction has any implicit-defs or uses, + // do not mess with it. + if (MI.getNumOperands() != 3) + break; + InstructionMappings AltMappings; + InstructionMapping GPRMapping(/*ID*/ 1, /*Cost*/ 1, /*NumOperands*/ 3); + InstructionMapping FPRMapping(/*ID*/ 2, /*Cost*/ 1, /*NumOperands*/ 3); + for (unsigned Idx = 0; Idx != 3; ++Idx) { + GPRMapping.setOperandMapping(Idx, Size, + getRegBank(AArch64::GPRRegBankID)); + FPRMapping.setOperandMapping(Idx, Size, + getRegBank(AArch64::FPRRegBankID)); + } + AltMappings.emplace_back(std::move(GPRMapping)); + AltMappings.emplace_back(std::move(FPRMapping)); + return AltMappings; + } + default: + break; + } + return RegisterBankInfo::getInstrAlternativeMappings(MI); +} + +void AArch64RegisterBankInfo::applyMappingImpl( + const OperandsMapper &OpdMapper) const { + switch (OpdMapper.getMI().getOpcode()) { + case TargetOpcode::G_OR: { + // Those ID must match getInstrAlternativeMappings. + assert((OpdMapper.getInstrMapping().getID() == 1 || + OpdMapper.getInstrMapping().getID() == 2) && + "Don't know how to handle that ID"); + return applyDefaultMapping(OpdMapper); + } + default: + llvm_unreachable("Don't know how to handle that operation"); + } +} diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h index 69a21ec82cfa..907bcfdea161 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h @@ -31,6 +31,9 @@ enum { /// This class provides the information for the target register banks. class AArch64RegisterBankInfo : public RegisterBankInfo { + /// See RegisterBankInfo::applyMapping. + void applyMappingImpl(const OperandsMapper &OpdMapper) const override; + public: AArch64RegisterBankInfo(const TargetRegisterInfo &TRI); /// Get the cost of a copy from \p B to \p A, or put differently, @@ -56,6 +59,11 @@ public: /// \todo This should be TableGen'ed. const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC) const override; + + /// Get the alternative mappings for \p MI. + /// Alternative in the sense different from getInstrMapping. + InstructionMappings + getInstrAlternativeMappings(const MachineInstr &MI) const override; }; } // End llvm namespace. #endif diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir index 12ff22d971f9..f5d85e189d75 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir @@ -1,4 +1,5 @@ -# RUN: llc -O0 -run-pass=regbankselect -global-isel %s -o - 2>&1 | FileCheck %s +# RUN: llc -O0 -run-pass=regbankselect -global-isel %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=FAST +# RUN: llc -O0 -run-pass=regbankselect -global-isel %s -regbankselect-greedy -o - 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=GREEDY # REQUIRES: global-isel --- | @@ -45,6 +46,14 @@ entry: ret void } + define void @greedyMappingOr() { + entry: + ret void + } + define void @greedyMappingOrWithConstraints() { + entry: + ret void + } ... --- @@ -225,3 +234,96 @@ body: | %0(32) = COPY %w0 %s0 = G_ADD i32 %0, %0 ... + +--- +# Check that the greedy mode is able to switch the +# G_OR instruction from fpr to gpr. +name: greedyMappingOr +isSSA: true +# CHECK: registers: +# CHECK-NEXT: - { id: 0, class: gpr } +# CHECK-NEXT: - { id: 1, class: gpr } + +# Fast mode maps vector instruction on FPR. +# FAST-NEXT: - { id: 2, class: fpr } +# Fast mode needs two extra copies. +# FAST-NEXT: - { id: 3, class: fpr } +# FAST-NEXT: - { id: 4, class: fpr } + +# Greedy mode coalesce the computation on the GPR register +# because it is the cheapest. +# GREEDY-NEXT: - { id: 2, class: gpr } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0.entry: + liveins: %x0, %x1 + ; CHECK: %0(64) = COPY %x0 + ; CHECK-NEXT: %1(64) = COPY %x1 + + + ; Fast mode tries to reuse the source of the copy for the destination. + ; Now, the default mapping says that %0 and %1 need to be in FPR. + ; The repairing code insert two copies to materialize that. + ; FAST-NEXT: %3(64) = COPY %0 + ; FAST-NEXT: %4(64) = COPY %1 + ; The mapping of G_OR is on FPR. + ; FAST-NEXT: %2(64) = G_OR <2 x i32> %3, %4 + + ; Greedy mode remapped the instruction on the GPR bank. + ; GREEDY-NEXT: %2(64) = G_OR <2 x i32> %0, %1 + %0(64) = COPY %x0 + %1(64) = COPY %x1 + %2(64) = G_OR <2 x i32> %0, %1 +... + +--- +# Check that the greedy mode is able to switch the +# G_OR instruction from fpr to gpr, while still honoring +# %2 constraint. +name: greedyMappingOrWithConstraints +isSSA: true +# CHECK: registers: +# CHECK-NEXT: - { id: 0, class: gpr } +# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 2, class: fpr } + +# Fast mode maps vector instruction on FPR. +# Fast mode needs two extra copies. +# FAST-NEXT: - { id: 3, class: fpr } +# FAST-NEXT: - { id: 4, class: fpr } + +# Greedy mode coalesce the computation on the GPR register because it +# is the cheapest, but will need one extra copy to materialize %2 into a FPR. +# GREEDY-NEXT: - { id: 3, class: gpr } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: fpr } +body: | + bb.0.entry: + liveins: %x0, %x1 + ; CHECK: %0(64) = COPY %x0 + ; CHECK-NEXT: %1(64) = COPY %x1 + + + ; Fast mode tries to reuse the source of the copy for the destination. + ; Now, the default mapping says that %0 and %1 need to be in FPR. + ; The repairing code insert two copies to materialize that. + ; FAST-NEXT: %3(64) = COPY %0 + ; FAST-NEXT: %4(64) = COPY %1 + ; The mapping of G_OR is on FPR. + ; FAST-NEXT: %2(64) = G_OR <2 x i32> %3, %4 + + ; Greedy mode remapped the instruction on the GPR bank. + ; GREEDY-NEXT: %3(64) = G_OR <2 x i32> %0, %1 + ; We need to keep %2 into FPR because we do not know anything about it. + ; GREEDY-NEXT: %2(64) = COPY %3 + %0(64) = COPY %x0 + %1(64) = COPY %x1 + %2(64) = G_OR <2 x i32> %0, %1 +...