From c437f6c6873fe32e99e975f4fb1fe3b3531cb93c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 25 Jan 2020 21:10:17 -0500 Subject: [PATCH] AMDGPU/GlobalISel: Split 64-bit G_CTPOP in RegBankSelect --- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 41 ++++++++++++- .../AMDGPU/GlobalISel/regbankselect-ctpop.mir | 57 ++++++++++++++++--- 2 files changed, 87 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index fd2481319570..d8e3269d3c6d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2084,6 +2084,29 @@ void AMDGPURegisterBankInfo::applyMappingImpl( MI.eraseFromParent(); return; } + case AMDGPU::G_CTPOP: { + MachineIRBuilder B(MI); + MachineFunction &MF = B.getMF(); + + const RegisterBank *DstBank = + OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank; + if (DstBank == &AMDGPU::SGPRRegBank) + break; + + Register SrcReg = MI.getOperand(1).getReg(); + const LLT S32 = LLT::scalar(32); + LLT Ty = MRI.getType(SrcReg); + if (Ty == S32) + break; + + ApplyRegBankMapping ApplyVALU(*this, MRI, &AMDGPU::VGPRRegBank); + GISelObserverWrapper Observer(&ApplyVALU); + LegalizerHelper Helper(MF, Observer, B); + + if (Helper.narrowScalar(MI, 1, S32) != LegalizerHelper::Legalized) + llvm_unreachable("widenScalar should have succeeded"); + return; + } case AMDGPU::G_SEXT: case AMDGPU::G_ZEXT: { Register SrcReg = MI.getOperand(1).getReg(); @@ -3172,9 +3195,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_BITCAST: case AMDGPU::G_INTTOPTR: case AMDGPU::G_PTRTOINT: - case AMDGPU::G_CTLZ_ZERO_UNDEF: - case AMDGPU::G_CTTZ_ZERO_UNDEF: - case AMDGPU::G_CTPOP: case AMDGPU::G_BSWAP: case AMDGPU::G_BITREVERSE: case AMDGPU::G_FABS: @@ -3184,6 +3204,21 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size); break; } + case AMDGPU::G_CTLZ: + case AMDGPU::G_CTLZ_ZERO_UNDEF: + case AMDGPU::G_CTTZ: + case AMDGPU::G_CTTZ_ZERO_UNDEF: + case AMDGPU::G_CTPOP: { + unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32); + + // This should really be getValueMappingSGPR64Only, but allowing the generic + // code to handle the register split just makes using LegalizerHelper more + // difficult. + OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size); + break; + } case AMDGPU::G_TRUNC: { Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctpop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctpop.mir index e4694371805a..30a367f727ee 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctpop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctpop.mir @@ -3,29 +3,70 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s --- -name: ctpop_i32_s +name: ctpop_s32_s legalized: true body: | bb.0: liveins: $sgpr0 - ; CHECK-LABEL: name: ctpop_i32_s + ; CHECK-LABEL: name: ctpop_s32_s ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]] + ; CHECK: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]](s32) + ; CHECK: S_ENDPGM 0, implicit [[CTPOP]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_CTPOP %0 + S_ENDPGM 0, implicit %1 ... --- -name: ctpop_i32_v +name: ctpop_s32_v +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ctpop_s32_v + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[COPY]](s32) + ; CHECK: S_ENDPGM 0, implicit [[CTPOP]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CTPOP %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: ctpop_s64_s +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: ctpop_s64_s + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]](s64) + ; CHECK: S_ENDPGM 0, implicit [[CTPOP]](s32) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_CTPOP %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: ctpop_s64_v legalized: true body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: ctpop_i32_v - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[COPY]] - %0:_(s32) = COPY $vgpr0 + + ; CHECK-LABEL: name: ctpop_s64_v + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[UV]](s32) + ; CHECK: [[CTPOP1:%[0-9]+]]:vgpr(s32) = G_CTPOP [[UV1]](s32) + ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[CTPOP1]], [[CTPOP]] + ; CHECK: S_ENDPGM 0, implicit [[ADD]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CTPOP %0 + S_ENDPGM 0, implicit %1 ...