forked from OSchip/llvm-project
AMDGPU/GlobalISel: Legalize G_BUILD_VECTOR_TRUNC
Treat this as legal on gfx9 since it can use S_PACK_* instructions for this. This isn't used by anything yet. The same will probably apply to 16-bit G_BUILD_VECTOR without the trunc. llvm-svn: 371423
This commit is contained in:
parent
6d7fba6aae
commit
182f9248e8
|
@ -719,6 +719,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
|||
.legalIf(isRegisterType(0))
|
||||
.minScalarOrElt(0, S32);
|
||||
|
||||
if (ST.hasScalarPackInsts()) {
|
||||
getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
|
||||
.legalFor({V2S16, S32})
|
||||
.lower();
|
||||
} else {
|
||||
getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
|
||||
.lower();
|
||||
}
|
||||
|
||||
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
|
||||
.legalIf(isRegisterType(0));
|
||||
|
||||
|
|
|
@ -1305,6 +1305,44 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
|||
MI.eraseFromParent();
|
||||
return;
|
||||
}
|
||||
case AMDGPU::G_BUILD_VECTOR_TRUNC: {
|
||||
assert(MI.getNumOperands() == 3 && empty(OpdMapper.getVRegs(0)));
|
||||
substituteSimpleCopyRegs(OpdMapper, 1);
|
||||
substituteSimpleCopyRegs(OpdMapper, 2);
|
||||
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
|
||||
if (DstBank == &AMDGPU::SGPRRegBank)
|
||||
break; // Can use S_PACK_* instructions.
|
||||
|
||||
MachineIRBuilder B(MI);
|
||||
|
||||
Register Lo = MI.getOperand(1).getReg();
|
||||
Register Hi = MI.getOperand(2).getReg();
|
||||
|
||||
const RegisterBank *BankLo = getRegBank(Lo, MRI, *TRI);
|
||||
const RegisterBank *BankHi = getRegBank(Hi, MRI, *TRI);
|
||||
|
||||
const LLT S32 = LLT::scalar(32);
|
||||
auto MaskLo = B.buildConstant(S32, 0xffff);
|
||||
MRI.setRegBank(MaskLo.getReg(0), *BankLo);
|
||||
|
||||
auto ShiftAmt = B.buildConstant(S32, 16);
|
||||
MRI.setRegBank(ShiftAmt.getReg(0), *BankHi);
|
||||
|
||||
auto ShiftHi = B.buildShl(S32, Hi, ShiftAmt);
|
||||
MRI.setRegBank(ShiftHi.getReg(0), *BankHi);
|
||||
|
||||
auto Masked = B.buildAnd(S32, Lo, MaskLo);
|
||||
MRI.setRegBank(Masked.getReg(0), *BankLo);
|
||||
|
||||
auto Or = B.buildOr(S32, Masked, ShiftHi);
|
||||
MRI.setRegBank(Or.getReg(0), *DstBank);
|
||||
|
||||
B.buildBitcast(DstReg, Or);
|
||||
MI.eraseFromParent();
|
||||
return;
|
||||
}
|
||||
case AMDGPU::G_EXTRACT_VECTOR_ELT:
|
||||
applyDefaultMapping(OpdMapper);
|
||||
executeInWaterfallLoop(MI, MRI, { 2 });
|
||||
|
@ -1513,6 +1551,11 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg,
|
|||
return Bank ? Bank->getID() : Default;
|
||||
}
|
||||
|
||||
static unsigned regBankUnion(unsigned RB0, unsigned RB1) {
|
||||
return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ?
|
||||
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
|
||||
}
|
||||
|
||||
///
|
||||
/// This function must return a legal mapping, because
|
||||
/// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
|
||||
|
@ -1774,6 +1817,20 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
|
||||
break;
|
||||
}
|
||||
case AMDGPU::G_BUILD_VECTOR_TRUNC: {
|
||||
assert(MI.getNumOperands() == 3);
|
||||
|
||||
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
|
||||
unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
|
||||
unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
|
||||
unsigned Src1BankID = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
|
||||
unsigned DstBankID = regBankUnion(Src0BankID, Src1BankID);
|
||||
|
||||
OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
|
||||
OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
|
||||
OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
|
||||
break;
|
||||
}
|
||||
case AMDGPU::G_BITCAST:
|
||||
case AMDGPU::G_INTTOPTR:
|
||||
case AMDGPU::G_PTRTOINT:
|
||||
|
|
|
@ -551,6 +551,10 @@ public:
|
|||
return GFX9Insts;
|
||||
}
|
||||
|
||||
bool hasScalarPackInsts() const {
|
||||
return GFX9Insts;
|
||||
}
|
||||
|
||||
TrapHandlerAbi getTrapHandlerAbi() const {
|
||||
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: legal_s32_to_v2s16
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; GFX9-LABEL: name: legal_s32_to_v2s16
|
||||
; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
|
||||
; GFX9: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
|
||||
S_NOP 0, implicit %2
|
||||
...
|
|
@ -0,0 +1,83 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
---
|
||||
name: build_vector_trunc_v2s16_s32_ss
|
||||
legalized: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_ss
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
|
||||
...
|
||||
|
||||
---
|
||||
name: build_vector_trunc_v2s16_s32_sv
|
||||
legalized: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $vgpr0
|
||||
|
||||
; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_sv
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
|
||||
; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY1]], [[C1]](s32)
|
||||
; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY]], [[C]]
|
||||
; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[SHL]]
|
||||
; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $vgpr0
|
||||
%2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
|
||||
...
|
||||
|
||||
---
|
||||
name: build_vector_trunc_v2s16_s32_vs
|
||||
legalized: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $sgpr0
|
||||
; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_vs
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 65535
|
||||
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY1]], [[C1]](s32)
|
||||
; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]]
|
||||
; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[SHL]]
|
||||
; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $sgpr0
|
||||
%2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
|
||||
...
|
||||
|
||||
---
|
||||
name: build_vector_trunc_v2s16_s32_vv
|
||||
legalized: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_vv
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 65535
|
||||
; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY1]], [[C1]](s32)
|
||||
; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]]
|
||||
; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[SHL]]
|
||||
; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
|
||||
...
|
Loading…
Reference in New Issue