forked from OSchip/llvm-project
AMDGPU/GlobalISel: Legalize G_BUILD_VECTOR v2s16
Handle it the same way as G_BUILD_VECTOR_TRUNC. Arguably only G_BUILD_VECTOR_TRUNC should be legal for this, but G_BUILD_VECTOR will probably be more convenient in most cases. llvm-svn: 371440
This commit is contained in:
parent
fc910c507e
commit
a0933e6df7
|
@ -713,11 +713,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
getActionDefinitionsBuilder(G_BUILD_VECTOR)
|
auto &BuildVector = getActionDefinitionsBuilder(G_BUILD_VECTOR)
|
||||||
.legalForCartesianProduct(AllS32Vectors, {S32})
|
.legalForCartesianProduct(AllS32Vectors, {S32})
|
||||||
.legalForCartesianProduct(AllS64Vectors, {S64})
|
.legalForCartesianProduct(AllS64Vectors, {S64})
|
||||||
.clampNumElements(0, V16S32, V16S32)
|
.clampNumElements(0, V16S32, V16S32)
|
||||||
.clampNumElements(0, V2S64, V8S64)
|
.clampNumElements(0, V2S64, V8S64);
|
||||||
|
|
||||||
|
if (ST.hasScalarPackInsts())
|
||||||
|
BuildVector.legalFor({V2S16, S32});
|
||||||
|
|
||||||
|
BuildVector
|
||||||
.minScalarSameAs(1, 0)
|
.minScalarSameAs(1, 0)
|
||||||
.legalIf(isRegisterType(0))
|
.legalIf(isRegisterType(0))
|
||||||
.minScalarOrElt(0, S32);
|
.minScalarOrElt(0, S32);
|
||||||
|
|
|
@ -1305,12 +1305,17 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||||
MI.eraseFromParent();
|
MI.eraseFromParent();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
case AMDGPU::G_BUILD_VECTOR:
|
||||||
case AMDGPU::G_BUILD_VECTOR_TRUNC: {
|
case AMDGPU::G_BUILD_VECTOR_TRUNC: {
|
||||||
|
Register DstReg = MI.getOperand(0).getReg();
|
||||||
|
LLT DstTy = MRI.getType(DstReg);
|
||||||
|
if (DstTy != LLT::vector(2, 16))
|
||||||
|
break;
|
||||||
|
|
||||||
assert(MI.getNumOperands() == 3 && empty(OpdMapper.getVRegs(0)));
|
assert(MI.getNumOperands() == 3 && empty(OpdMapper.getVRegs(0)));
|
||||||
substituteSimpleCopyRegs(OpdMapper, 1);
|
substituteSimpleCopyRegs(OpdMapper, 1);
|
||||||
substituteSimpleCopyRegs(OpdMapper, 2);
|
substituteSimpleCopyRegs(OpdMapper, 2);
|
||||||
|
|
||||||
Register DstReg = MI.getOperand(0).getReg();
|
|
||||||
const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
|
const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
|
||||||
if (DstBank == &AMDGPU::SGPRRegBank)
|
if (DstBank == &AMDGPU::SGPRRegBank)
|
||||||
break; // Can use S_PACK_* instructions.
|
break; // Can use S_PACK_* instructions.
|
||||||
|
@ -1319,24 +1324,41 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||||
|
|
||||||
Register Lo = MI.getOperand(1).getReg();
|
Register Lo = MI.getOperand(1).getReg();
|
||||||
Register Hi = MI.getOperand(2).getReg();
|
Register Hi = MI.getOperand(2).getReg();
|
||||||
|
const LLT S32 = LLT::scalar(32);
|
||||||
|
|
||||||
const RegisterBank *BankLo = getRegBank(Lo, MRI, *TRI);
|
const RegisterBank *BankLo = getRegBank(Lo, MRI, *TRI);
|
||||||
const RegisterBank *BankHi = getRegBank(Hi, MRI, *TRI);
|
const RegisterBank *BankHi = getRegBank(Hi, MRI, *TRI);
|
||||||
|
|
||||||
const LLT S32 = LLT::scalar(32);
|
Register ZextLo;
|
||||||
auto MaskLo = B.buildConstant(S32, 0xffff);
|
Register ShiftHi;
|
||||||
MRI.setRegBank(MaskLo.getReg(0), *BankLo);
|
|
||||||
|
if (Opc == AMDGPU::G_BUILD_VECTOR) {
|
||||||
|
ZextLo = B.buildZExt(S32, Lo).getReg(0);
|
||||||
|
MRI.setRegBank(ZextLo, *BankLo);
|
||||||
|
|
||||||
|
Register ZextHi = B.buildZExt(S32, Hi).getReg(0);
|
||||||
|
MRI.setRegBank(ZextHi, *BankHi);
|
||||||
|
|
||||||
auto ShiftAmt = B.buildConstant(S32, 16);
|
auto ShiftAmt = B.buildConstant(S32, 16);
|
||||||
MRI.setRegBank(ShiftAmt.getReg(0), *BankHi);
|
MRI.setRegBank(ShiftAmt.getReg(0), *BankHi);
|
||||||
|
|
||||||
auto ShiftHi = B.buildShl(S32, Hi, ShiftAmt);
|
ShiftHi = B.buildShl(S32, ZextHi, ShiftAmt).getReg(0);
|
||||||
MRI.setRegBank(ShiftHi.getReg(0), *BankHi);
|
MRI.setRegBank(ShiftHi, *BankHi);
|
||||||
|
} else {
|
||||||
|
Register MaskLo = B.buildConstant(S32, 0xffff).getReg(0);
|
||||||
|
MRI.setRegBank(MaskLo, *BankLo);
|
||||||
|
|
||||||
auto Masked = B.buildAnd(S32, Lo, MaskLo);
|
auto ShiftAmt = B.buildConstant(S32, 16);
|
||||||
MRI.setRegBank(Masked.getReg(0), *BankLo);
|
MRI.setRegBank(ShiftAmt.getReg(0), *BankHi);
|
||||||
|
|
||||||
auto Or = B.buildOr(S32, Masked, ShiftHi);
|
ShiftHi = B.buildShl(S32, Hi, ShiftAmt).getReg(0);
|
||||||
|
MRI.setRegBank(ShiftHi, *BankHi);
|
||||||
|
|
||||||
|
ZextLo = B.buildAnd(S32, Lo, MaskLo).getReg(0);
|
||||||
|
MRI.setRegBank(ZextLo, *BankLo);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto Or = B.buildOr(S32, ZextLo, ShiftHi);
|
||||||
MRI.setRegBank(Or.getReg(0), *DstBank);
|
MRI.setRegBank(Or.getReg(0), *DstBank);
|
||||||
|
|
||||||
B.buildBitcast(DstReg, Or);
|
B.buildBitcast(DstReg, Or);
|
||||||
|
@ -1804,8 +1826,25 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||||
OpdsMapping[2] = nullptr;
|
OpdsMapping[2] = nullptr;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case AMDGPU::G_MERGE_VALUES:
|
|
||||||
case AMDGPU::G_BUILD_VECTOR:
|
case AMDGPU::G_BUILD_VECTOR:
|
||||||
|
case AMDGPU::G_BUILD_VECTOR_TRUNC: {
|
||||||
|
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
|
||||||
|
if (DstTy == LLT::vector(2, 16)) {
|
||||||
|
unsigned DstSize = DstTy.getSizeInBits();
|
||||||
|
unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
|
||||||
|
unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
|
||||||
|
unsigned Src1BankID = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
|
||||||
|
unsigned DstBankID = regBankUnion(Src0BankID, Src1BankID);
|
||||||
|
|
||||||
|
OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
|
||||||
|
OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
|
||||||
|
OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
LLVM_FALLTHROUGH;
|
||||||
|
}
|
||||||
|
case AMDGPU::G_MERGE_VALUES:
|
||||||
case AMDGPU::G_CONCAT_VECTORS: {
|
case AMDGPU::G_CONCAT_VECTORS: {
|
||||||
unsigned Bank = isSALUMapping(MI) ?
|
unsigned Bank = isSALUMapping(MI) ?
|
||||||
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
|
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
|
||||||
|
@ -1818,20 +1857,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||||
OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
|
OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case AMDGPU::G_BUILD_VECTOR_TRUNC: {
|
|
||||||
assert(MI.getNumOperands() == 3);
|
|
||||||
|
|
||||||
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
|
|
||||||
unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
|
|
||||||
unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
|
|
||||||
unsigned Src1BankID = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
|
|
||||||
unsigned DstBankID = regBankUnion(Src0BankID, Src1BankID);
|
|
||||||
|
|
||||||
OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
|
|
||||||
OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
|
|
||||||
OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case AMDGPU::G_BITCAST:
|
case AMDGPU::G_BITCAST:
|
||||||
case AMDGPU::G_INTTOPTR:
|
case AMDGPU::G_INTTOPTR:
|
||||||
case AMDGPU::G_PTRTOINT:
|
case AMDGPU::G_PTRTOINT:
|
||||||
|
|
|
@ -0,0 +1,99 @@
|
||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
|
||||||
|
|
||||||
|
---
|
||||||
|
name: build_vector_v2s16_s32_ss
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0, $sgpr1
|
||||||
|
; CHECK-LABEL: name: build_vector_v2s16_s32_ss
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
|
||||||
|
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
|
||||||
|
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
|
||||||
|
%0:_(s32) = COPY $sgpr0
|
||||||
|
%1:_(s32) = COPY $sgpr1
|
||||||
|
%2:_(s16) = G_TRUNC %0
|
||||||
|
%3:_(s16) = G_TRUNC %1
|
||||||
|
%4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: build_vector_v2s16_s32_sv
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0, $vgpr0
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: build_vector_v2s16_s32_sv
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
|
||||||
|
; CHECK: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
|
||||||
|
; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s16)
|
||||||
|
; CHECK: [[ZEXT1:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC1]](s16)
|
||||||
|
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 16
|
||||||
|
; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[ZEXT1]], [[C]](s32)
|
||||||
|
; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[ZEXT]], [[SHL]]
|
||||||
|
; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||||
|
%0:_(s32) = COPY $sgpr0
|
||||||
|
%1:_(s32) = COPY $vgpr0
|
||||||
|
%2:_(s16) = G_TRUNC %0
|
||||||
|
%3:_(s16) = G_TRUNC %1
|
||||||
|
%4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: build_vector_v2s16_s32_vs
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0, $sgpr0
|
||||||
|
; CHECK-LABEL: name: build_vector_v2s16_s32_vs
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
|
||||||
|
; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
|
||||||
|
; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s16)
|
||||||
|
; CHECK: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s16)
|
||||||
|
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
|
||||||
|
; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[ZEXT1]], [[C]](s32)
|
||||||
|
; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[ZEXT]], [[SHL]]
|
||||||
|
; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||||
|
%0:_(s32) = COPY $vgpr0
|
||||||
|
%1:_(s32) = COPY $sgpr0
|
||||||
|
%2:_(s16) = G_TRUNC %0
|
||||||
|
%3:_(s16) = G_TRUNC %1
|
||||||
|
%4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: build_vector_v2s16_s32_vv
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0, $vgpr1
|
||||||
|
; CHECK-LABEL: name: build_vector_v2s16_s32_vv
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||||
|
; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
|
||||||
|
; CHECK: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
|
||||||
|
; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s16)
|
||||||
|
; CHECK: [[ZEXT1:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC1]](s16)
|
||||||
|
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 16
|
||||||
|
; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[ZEXT1]], [[C]](s32)
|
||||||
|
; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[ZEXT]], [[SHL]]
|
||||||
|
; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||||
|
%0:_(s32) = COPY $vgpr0
|
||||||
|
%1:_(s32) = COPY $vgpr1
|
||||||
|
%2:_(s16) = G_TRUNC %0
|
||||||
|
%3:_(s16) = G_TRUNC %1
|
||||||
|
%4:_(<2 x s16>) = G_BUILD_VECTOR %2, %3
|
||||||
|
...
|
Loading…
Reference in New Issue