From adc40baa29a1c16cde63c4bf857afea0eb7b476e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 8 Jan 2019 01:22:47 +0000 Subject: [PATCH] RegBankSelect: Fix copy insertion point for terminators If a copy was needed to handle the condition of brcond, it was being inserted before the defining instruction. Add tests for iterator edge cases. I find the existing code here suspect for the case where it's looking for terminators that modify the register. It's going to insert a copy in the middle of the terminators, which isn't allowed (it might be necessary to have a COPY_terminator if anybody actually needs this). Also legalize brcond for AMDGPU. llvm-svn: 350595 --- llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp | 24 ++- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2 + .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 26 +++ .../AMDGPU/GlobalISel/legalize-brcond.mir | 24 +++ .../GlobalISel/regbankselect-brcond.mir | 179 ++++++++++++++++++ 5 files changed, 246 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 98df7d34857e..bfd7a68a8b06 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -717,15 +717,21 @@ RegBankSelect::RepairingPlacement::RepairingPlacement( unsigned Reg = MO.getReg(); if (Before) { // Check whether Reg is defined by any terminator. - MachineBasicBlock::iterator It = MI; - for (auto Begin = MI.getParent()->begin(); - --It != Begin && It->isTerminator();) - if (It->modifiesRegister(Reg, &TRI)) { - // Insert the repairing code right after the definition. - addInsertPoint(*It, /*Before*/ false); - return; - } - addInsertPoint(*It, /*Before*/ true); + MachineBasicBlock::reverse_iterator It = MI; + auto REnd = MI.getParent()->rend(); + + for (; It != REnd && It->isTerminator(); ++It) { + assert(!It->modifiesRegister(Reg, &TRI) && + "copy insertion in middle of terminators not handled"); + } + + if (It == REnd) { + addInsertPoint(*MI.getParent()->begin(), true); + return; + } + + // We are sure to be right before the first terminator. + addInsertPoint(*It, /*Before*/ false); return; } // Make sure Reg is not redefined by other terminators, otherwise diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 820ff4b6add8..01ee4afe886f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -85,6 +85,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, PrivatePtr }; + setAction({G_BRCOND, S1}, Legal); + setAction({G_ADD, S32}, Legal); setAction({G_ASHR, S32}, Legal); setAction({G_SUB, S32}, Legal); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 9d576e621600..90553425c950 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -221,6 +221,22 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( AltMappings.push_back(&VVMapping); return AltMappings; } + case AMDGPU::G_BRCOND: { + assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1); + + const InstructionMapping &SMapping = getInstructionMapping( + 1, 1, getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, 1), nullptr}), + 2); // Num Operands + AltMappings.push_back(&SMapping); + + const InstructionMapping &VMapping = getInstructionMapping( + 1, 1, getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1), nullptr }), + 2); // Num Operands + AltMappings.push_back(&VMapping); + return AltMappings; + } default: break; } @@ -712,6 +728,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_ATOMIC_CMPXCHG: { return getDefaultMappingAllVGPR(MI); } + case AMDGPU::G_BRCOND: { + unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI, + AMDGPU::SGPRRegBankID); + assert(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 1); + if (Bank != AMDGPU::SCCRegBankID) + Bank = AMDGPU::SGPRRegBankID; + + OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1); + break; + } } return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir new file mode 100644 index 000000000000..eb8b34915f50 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir @@ -0,0 +1,24 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: legal_brcond +body: | + ; CHECK-LABEL: name: legal_brcond + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK: bb.1: + bb.0.entry: + successors: %bb.1 + liveins: $vgpr0, $vgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(ne), %0, %1 + G_BRCOND %2, %bb.1 + + bb.1: +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir new file mode 100644 index 000000000000..b973943f0c6a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir @@ -0,0 +1,179 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s + +--- +name: brcond_vcc_cond +legalized: true +body: | + ; CHECK-LABEL: name: brcond_vcc_cond + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK: bb.1: + bb.0.entry: + successors: %bb.1 + liveins: $vgpr0, $vgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(ne), %0, %1 + G_BRCOND %2, %bb.1 + + bb.1: +... + +--- +name: brcond_scc_cond +legalized: true +body: | + ; CHECK-LABEL: name: brcond_scc_cond + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK: bb.1: + bb.0.entry: + successors: %bb.1 + liveins: $sgpr0, $sgpr1 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s1) = G_ICMP intpred(ne), %0, %1 + G_BRCOND %2, %bb.1 + + bb.1: +... + +--- +name: brcond_sgpr_cond +legalized: true +body: | + ; CHECK-LABEL: name: brcond_sgpr_cond + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1 + ; CHECK: bb.1: + bb.0.entry: + successors: %bb.1 + liveins: $sgpr0 + %0:_(s32) = COPY $sgpr0 + %1:_(s1) = G_TRUNC %0 + G_BRCOND %1, %bb.1 + + bb.1: +... + +--- +name: brcond_vgpr_cond +legalized: true +body: | + ; CHECK-LABEL: name: brcond_vgpr_cond + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s1) = COPY [[TRUNC]](s1) + ; CHECK: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK: bb.1: + bb.0.entry: + successors: %bb.1 + liveins: $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:_(s1) = G_TRUNC %0 + G_BRCOND %1, %bb.1 + + bb.1: +... + + +# The terminator that needs handling is the only instruction in the +# block. + +--- +name: empty_block_vgpr_brcond +legalized: true +body: | + ; CHECK-LABEL: name: empty_block_vgpr_brcond + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s1) = COPY [[TRUNC]](s1) + ; CHECK: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK: bb.2: + bb.0.entry: + successors: %bb.1 + liveins: $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:_(s1) = G_TRUNC %0 + + bb.1: + G_BRCOND %1, %bb.1 + + bb.2: +... + + +# Make sure the first instruction in the block isn't skipped. +--- +name: copy_first_inst_brcond +legalized: true +body: | + ; CHECK-LABEL: name: copy_first_inst_brcond + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s1) = COPY [[TRUNC]](s1) + ; CHECK: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK: bb.2: + bb.0.entry: + successors: %bb.1 + liveins: $vgpr0 + %0:_(s32) = COPY $vgpr0 + + bb.1: + %1:_(s1) = G_TRUNC %0 + G_BRCOND %1, %bb.1 + + bb.2: +... + +# Extra instruction separates brcond from the condition def +--- +name: copy_middle_inst_brcond +legalized: true +body: | + ; CHECK-LABEL: name: copy_middle_inst_brcond + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: S_NOP 0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s1) = COPY [[TRUNC]](s1) + ; CHECK: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK: bb.2: + bb.0.entry: + successors: %bb.1 + liveins: $vgpr0 + %0:_(s32) = COPY $vgpr0 + + bb.1: + %1:_(s1) = G_TRUNC %0 + S_NOP 0 + G_BRCOND %1, %bb.1 + + bb.2: +...