forked from OSchip/llvm-project
AMDGPU/GlobalISel: Split 64-bit G_CTPOP in RegBankSelect
This commit is contained in:
parent
6135f5eda4
commit
c437f6c687
|
@ -2084,6 +2084,29 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||||
MI.eraseFromParent();
|
MI.eraseFromParent();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
case AMDGPU::G_CTPOP: {
|
||||||
|
MachineIRBuilder B(MI);
|
||||||
|
MachineFunction &MF = B.getMF();
|
||||||
|
|
||||||
|
const RegisterBank *DstBank =
|
||||||
|
OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
|
||||||
|
if (DstBank == &AMDGPU::SGPRRegBank)
|
||||||
|
break;
|
||||||
|
|
||||||
|
Register SrcReg = MI.getOperand(1).getReg();
|
||||||
|
const LLT S32 = LLT::scalar(32);
|
||||||
|
LLT Ty = MRI.getType(SrcReg);
|
||||||
|
if (Ty == S32)
|
||||||
|
break;
|
||||||
|
|
||||||
|
ApplyRegBankMapping ApplyVALU(*this, MRI, &AMDGPU::VGPRRegBank);
|
||||||
|
GISelObserverWrapper Observer(&ApplyVALU);
|
||||||
|
LegalizerHelper Helper(MF, Observer, B);
|
||||||
|
|
||||||
|
if (Helper.narrowScalar(MI, 1, S32) != LegalizerHelper::Legalized)
|
||||||
|
llvm_unreachable("widenScalar should have succeeded");
|
||||||
|
return;
|
||||||
|
}
|
||||||
case AMDGPU::G_SEXT:
|
case AMDGPU::G_SEXT:
|
||||||
case AMDGPU::G_ZEXT: {
|
case AMDGPU::G_ZEXT: {
|
||||||
Register SrcReg = MI.getOperand(1).getReg();
|
Register SrcReg = MI.getOperand(1).getReg();
|
||||||
|
@ -3172,9 +3195,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||||
case AMDGPU::G_BITCAST:
|
case AMDGPU::G_BITCAST:
|
||||||
case AMDGPU::G_INTTOPTR:
|
case AMDGPU::G_INTTOPTR:
|
||||||
case AMDGPU::G_PTRTOINT:
|
case AMDGPU::G_PTRTOINT:
|
||||||
case AMDGPU::G_CTLZ_ZERO_UNDEF:
|
|
||||||
case AMDGPU::G_CTTZ_ZERO_UNDEF:
|
|
||||||
case AMDGPU::G_CTPOP:
|
|
||||||
case AMDGPU::G_BSWAP:
|
case AMDGPU::G_BSWAP:
|
||||||
case AMDGPU::G_BITREVERSE:
|
case AMDGPU::G_BITREVERSE:
|
||||||
case AMDGPU::G_FABS:
|
case AMDGPU::G_FABS:
|
||||||
|
@ -3184,6 +3204,21 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||||
OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
|
OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case AMDGPU::G_CTLZ:
|
||||||
|
case AMDGPU::G_CTLZ_ZERO_UNDEF:
|
||||||
|
case AMDGPU::G_CTTZ:
|
||||||
|
case AMDGPU::G_CTTZ_ZERO_UNDEF:
|
||||||
|
case AMDGPU::G_CTPOP: {
|
||||||
|
unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
|
||||||
|
unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
|
||||||
|
OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
|
||||||
|
|
||||||
|
// This should really be getValueMappingSGPR64Only, but allowing the generic
|
||||||
|
// code to handle the register split just makes using LegalizerHelper more
|
||||||
|
// difficult.
|
||||||
|
OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case AMDGPU::G_TRUNC: {
|
case AMDGPU::G_TRUNC: {
|
||||||
Register Dst = MI.getOperand(0).getReg();
|
Register Dst = MI.getOperand(0).getReg();
|
||||||
Register Src = MI.getOperand(1).getReg();
|
Register Src = MI.getOperand(1).getReg();
|
||||||
|
|
|
@ -3,29 +3,70 @@
|
||||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
|
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
|
||||||
|
|
||||||
---
|
---
|
||||||
name: ctpop_i32_s
|
name: ctpop_s32_s
|
||||||
legalized: true
|
legalized: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
liveins: $sgpr0
|
liveins: $sgpr0
|
||||||
; CHECK-LABEL: name: ctpop_i32_s
|
; CHECK-LABEL: name: ctpop_s32_s
|
||||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
; CHECK: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]]
|
; CHECK: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]](s32)
|
||||||
|
; CHECK: S_ENDPGM 0, implicit [[CTPOP]](s32)
|
||||||
%0:_(s32) = COPY $sgpr0
|
%0:_(s32) = COPY $sgpr0
|
||||||
%1:_(s32) = G_CTPOP %0
|
%1:_(s32) = G_CTPOP %0
|
||||||
|
S_ENDPGM 0, implicit %1
|
||||||
...
|
...
|
||||||
|
|
||||||
---
|
---
|
||||||
name: ctpop_i32_v
|
name: ctpop_s32_v
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0
|
||||||
|
; CHECK-LABEL: name: ctpop_s32_v
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; CHECK: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[COPY]](s32)
|
||||||
|
; CHECK: S_ENDPGM 0, implicit [[CTPOP]](s32)
|
||||||
|
%0:_(s32) = COPY $vgpr0
|
||||||
|
%1:_(s32) = G_CTPOP %0
|
||||||
|
S_ENDPGM 0, implicit %1
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: ctpop_s64_s
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0_sgpr1
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: ctpop_s64_s
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
|
||||||
|
; CHECK: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]](s64)
|
||||||
|
; CHECK: S_ENDPGM 0, implicit [[CTPOP]](s32)
|
||||||
|
%0:_(s64) = COPY $sgpr0_sgpr1
|
||||||
|
%1:_(s32) = G_CTPOP %0
|
||||||
|
S_ENDPGM 0, implicit %1
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: ctpop_s64_v
|
||||||
legalized: true
|
legalized: true
|
||||||
|
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
liveins: $vgpr0_vgpr1
|
liveins: $vgpr0_vgpr1
|
||||||
; CHECK-LABEL: name: ctpop_i32_v
|
|
||||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
; CHECK-LABEL: name: ctpop_s64_v
|
||||||
; CHECK: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[COPY]]
|
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||||
%0:_(s32) = COPY $vgpr0
|
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||||
|
; CHECK: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[UV]](s32)
|
||||||
|
; CHECK: [[CTPOP1:%[0-9]+]]:vgpr(s32) = G_CTPOP [[UV1]](s32)
|
||||||
|
; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[CTPOP1]], [[CTPOP]]
|
||||||
|
; CHECK: S_ENDPGM 0, implicit [[ADD]](s32)
|
||||||
|
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||||
%1:_(s32) = G_CTPOP %0
|
%1:_(s32) = G_CTPOP %0
|
||||||
|
S_ENDPGM 0, implicit %1
|
||||||
...
|
...
|
||||||
|
|
Loading…
Reference in New Issue