AMDGPU/GlobalISel: Split VALU s64 G_ZEXT/G_SEXT in RegBankSelect

Scalar extends to s64 can use S_BFE_{I64|U64}, but vector extends need
to extend to the 32-bit half, and then to 64.

I'm not sure what the line should be between what RegBankSelect
handles, and what instruction select does, but for now I'm erring on
the side of RegBankSelect for future post-RBS combines.

llvm-svn: 364212
This commit is contained in:
Matt Arsenault 2019-06-24 17:54:12 +00:00
parent 6e04b92c89
commit 8fcd5ade3e
3 changed files with 145 additions and 26 deletions

View File

@ -830,19 +830,53 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case AMDGPU::G_ZEXT: {
Register SrcReg = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
bool Signed = Opc == AMDGPU::G_SEXT;
MachineIRBuilder B(MI);
const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isScalar() &&
SrcBank != &AMDGPU::SGPRRegBank &&
SrcBank != &AMDGPU::SCCRegBank &&
SrcBank != &AMDGPU::VCCRegBank &&
// FIXME: Should handle any type that round to s64 when irregular
// breakdowns supported.
DstTy.getSizeInBits() == 64 &&
SrcTy.getSizeInBits() <= 32) {
const LLT S32 = LLT::scalar(32);
SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
// Extend to 32-bit, and then extend the low half.
if (Signed) {
// TODO: Should really be buildSExtOrCopy
B.buildSExtOrTrunc(DefRegs[0], SrcReg);
// Replicate sign bit from 32-bit extended part.
auto ShiftAmt = B.buildConstant(S32, 31);
MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
} else {
B.buildZExtOrTrunc(DefRegs[0], SrcReg);
B.buildConstant(DefRegs[1], 0);
}
MRI.setRegBank(DstReg, *SrcBank);
MI.eraseFromParent();
return;
}
if (SrcTy != LLT::scalar(1))
return;
MachineIRBuilder B(MI);
bool Signed = Opc == AMDGPU::G_SEXT;
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
if (SrcBank->getID() == AMDGPU::SCCRegBankID ||
SrcBank->getID() == AMDGPU::VCCRegBankID) {
const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
unsigned DstSize = DstTy.getSizeInBits();
if (SrcBank == &AMDGPU::SCCRegBank || SrcBank == &AMDGPU::VCCRegBank) {
SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
const RegisterBank *DstBank = SrcBank == &AMDGPU::SCCRegBank ?
&AMDGPU::SGPRRegBank : &AMDGPU::VGPRRegBank;
unsigned DstSize = DstTy.getSizeInBits();
// 64-bit select is SGPR only
const bool UseSel64 = DstSize > 32 &&
SrcBank->getID() == AMDGPU::SCCRegBankID;
@ -854,10 +888,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
MRI.setRegBank(True.getReg(0), *DstBank);
MRI.setRegBank(False.getReg(0), *DstBank);
MRI.setRegBank(DstReg, *DstBank);
if (DstSize > 32 && SrcBank->getID() != AMDGPU::SCCRegBankID) {
auto Sel = B.buildSelect(SelType, SrcReg, True, False);
MRI.setRegBank(Sel.getReg(0), *DstBank);
B.buildMerge(DstReg, { Sel.getReg(0), Sel.getReg(0) });
B.buildSelect(DefRegs[0], SrcReg, True, False);
B.buildCopy(DefRegs[1], DefRegs[0]);
} else if (DstSize < 32) {
auto Sel = B.buildSelect(SelType, SrcReg, True, False);
MRI.setRegBank(Sel.getReg(0), *DstBank);
@ -1313,8 +1348,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
// TODO: Should anyext be split into 32-bit part as well?
if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
} else {
// Scalar extend can use 64-bit BFE, but VGPRs require extending to
// 32-bits, and then to 64.
OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
SrcSize);
}
break;
}
case AMDGPU::G_FCMP: {

View File

@ -16,6 +16,22 @@ body: |
%1:_(s64) = G_SEXT %0
...
---
name: sext_s16_to_s64_s
legalized: true
body: |
bb.0:
liveins: $sgpr0
; CHECK-LABEL: name: sext_s16_to_s64_s
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
; CHECK: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[TRUNC]](s16)
%0:_(s32) = COPY $sgpr0
%1:_(s16) = G_TRUNC %0
%2:_(s64) = G_SEXT %1
...
---
name: sext_s32_to_s64_v
legalized: true
@ -25,7 +41,10 @@ body: |
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: sext_s32_to_s64_v
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[SEXT:%[0-9]+]]:vgpr(s64) = G_SEXT [[COPY]](s32)
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32)
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s64) = G_SEXT %0
...
@ -146,7 +165,8 @@ body: |
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT]](s32)
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
@ -258,11 +278,30 @@ body: |
; CHECK-LABEL: name: sext_s1_to_s64_vgpr
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[TRUNC]](s1)
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 63
; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[C]](s32)
; CHECK: [[SEXT:%[0-9]+]]:vgpr(s32) = G_SEXT [[TRUNC]](s1)
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT]], [[C]](s32)
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT]](s32), [[ASHR]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s64) = G_SEXT %1
...
---
name: sext_s16_to_s64_vgpr
legalized: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: sext_s16_to_s64_vgpr
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
; CHECK: [[SEXT:%[0-9]+]]:vgpr(s32) = G_SEXT [[TRUNC]](s16)
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT]], [[C]](s32)
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT]](s32), [[ASHR]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s16) = G_TRUNC %0
%2:_(s64) = G_SEXT %1
...

View File

@ -16,6 +16,22 @@ body: |
%1:_(s64) = G_ZEXT %0
...
---
name: zext_s16_to_s64_s
legalized: true
body: |
bb.0:
liveins: $sgpr0
; CHECK-LABEL: name: zext_s16_to_s64_s
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[TRUNC]](s16)
%0:_(s32) = COPY $sgpr0
%1:_(s16) = G_TRUNC %0
%2:_(s64) = G_ZEXT %1
...
---
name: zext_s32_to_s64_v
legalized: true
@ -25,7 +41,9 @@ body: |
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: zext_s32_to_s64_v
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[COPY]](s32)
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s64) = G_ZEXT %0
...
@ -146,7 +164,8 @@ body: |
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT]](s32)
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
@ -258,11 +277,28 @@ body: |
; CHECK-LABEL: name: zext_s1_to_s64_vgpr
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[TRUNC]](s1)
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 63
; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
; CHECK: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[SHL]], [[C]](s32)
; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s1)
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ZEXT]](s32), [[C]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s64) = G_ZEXT %1
...
---
name: zext_s16_to_s64_vgpr
legalized: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: zext_s16_to_s64_vgpr
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s16)
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ZEXT]](s32), [[C]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s16) = G_TRUNC %0
%2:_(s64) = G_ZEXT %1
...