forked from OSchip/llvm-project
AMDGPU/GlobalISel: Select more G_EXTRACTs correctly
This assumed a 32-bit extract size, which would produce invalid copies with 64-bit extracts. Handle the easy case. Ideally we would have a way to get the proper subreg index for any 32-bit offset, but there should probably be a tablegenerated way of getting the subreg index for any size and offset.
This commit is contained in:
parent
d67c4cc2eb
commit
f1c85ecdfc
|
@ -441,15 +441,29 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const {
|
|||
|
||||
bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
|
||||
MachineBasicBlock *BB = I.getParent();
|
||||
Register DstReg = I.getOperand(0).getReg();
|
||||
Register SrcReg = I.getOperand(1).getReg();
|
||||
LLT DstTy = MRI->getType(DstReg);
|
||||
LLT SrcTy = MRI->getType(SrcReg);
|
||||
const unsigned SrcSize = SrcTy.getSizeInBits();
|
||||
const unsigned DstSize = DstTy.getSizeInBits();
|
||||
|
||||
// TODO: Should handle any multiple of 32 offset.
|
||||
unsigned Offset = I.getOperand(2).getImm();
|
||||
if (Offset % 32 != 0)
|
||||
if (Offset % DstSize != 0)
|
||||
return false;
|
||||
|
||||
unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32);
|
||||
const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
|
||||
const TargetRegisterClass *SrcRC =
|
||||
TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, *MRI);
|
||||
if (!SrcRC)
|
||||
return false;
|
||||
|
||||
ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
|
||||
|
||||
const DebugLoc &DL = I.getDebugLoc();
|
||||
MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
|
||||
I.getOperand(0).getReg())
|
||||
.addReg(I.getOperand(1).getReg(), 0, SubReg);
|
||||
MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), DstReg)
|
||||
.addReg(SrcReg, 0, SubRegs[Offset / DstSize]);
|
||||
|
||||
for (const MachineOperand &MO : Copy->operands()) {
|
||||
const TargetRegisterClass *RC =
|
||||
|
|
|
@ -157,3 +157,23 @@ body: |
|
|||
|
||||
S_ENDPGM 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31, implicit %32
|
||||
...
|
||||
|
||||
# TODO: Handle offset 32
|
||||
---
|
||||
name: extract_sgpr_s64_from_s128
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: extract_sgpr_s64_from_s128
|
||||
; CHECK: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY [[DEF]].sub0_sub1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[DEF]].sub2_sub3
|
||||
; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]]
|
||||
%0:sgpr(s128) = G_IMPLICIT_DEF
|
||||
%1:sgpr(s64) = G_EXTRACT %0, 0
|
||||
%2:sgpr(s64) = G_EXTRACT %0, 64
|
||||
S_ENDPGM 0, implicit %1, implicit %2
|
||||
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue