[AMDGPU] Fix FoldImmediate for 16 bit operand

Differential Revision: https://reviews.llvm.org/D79362
This commit is contained in:
Stanislav Mekhanoshin 2020-05-04 12:47:23 -07:00
parent 55b9b11fea
commit 9ef166e657
3 changed files with 288 additions and 9 deletions

View File

@ -2509,15 +2509,41 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned Opc = UseMI.getOpcode();
if (Opc == AMDGPU::COPY) {
bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
Register DstReg = UseMI.getOperand(0).getReg();
Register SrcReg = UseMI.getOperand(1).getReg();
bool Is16Bit = getOpSize(UseMI, 0) == 2;
bool isVGPRCopy = RI.isVGPR(*MRI, DstReg);
unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
if (RI.isAGPR(*MRI, UseMI.getOperand(0).getReg())) {
if (!isInlineConstant(*ImmOp, AMDGPU::OPERAND_REG_INLINE_AC_INT32))
APInt Imm(32, ImmOp->getImm());
if (UseMI.getOperand(1).getSubReg() == AMDGPU::hi16)
Imm = Imm.ashr(16);
if (RI.isAGPR(*MRI, DstReg)) {
if (!isInlineConstant(Imm))
return false;
NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32;
}
if (Is16Bit) {
if (isVGPRCopy)
return false; // Do not clobber vgpr_hi16
if (DstReg.isVirtual() &&
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
return false;
UseMI.getOperand(0).setSubReg(0);
if (DstReg.isPhysical()) {
DstReg = RI.get32BitRegister(DstReg);
UseMI.getOperand(0).setReg(DstReg);
}
assert(SrcReg.isVirtual());
}
UseMI.setDesc(get(NewOpc));
UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
UseMI.getOperand(1).setTargetFlags(0);
UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
return true;
}

View File

@ -827,11 +827,7 @@ public:
const MachineOperand &MO = MI.getOperand(OpNo);
if (MO.isReg()) {
if (unsigned SubReg = MO.getSubReg()) {
assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg(
MI.getParent()->getParent()->getRegInfo().
getRegClass(MO.getReg()), SubReg)) >= 32 &&
"Sub-dword subregs are not supported");
return RI.getNumChannelsFromSubReg(SubReg) * 4;
return RI.getSubRegIdxSize(SubReg) / 8;
}
}
return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;

View File

@ -0,0 +1,257 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass peephole-opt -o - %s | FileCheck -check-prefix=GCN %s
---
name: fold_simm_16_sub_to_lo
body: |
bb.0:
; GCN-LABEL: name: fold_simm_16_sub_to_lo
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
; GCN: [[COPY:%[0-9]+]]:sgpr_lo16 = COPY killed [[S_MOV_B32_]].lo16
; GCN: SI_RETURN_TO_EPILOG [[COPY]]
%0:sreg_32 = S_MOV_B32 2048
%1:sgpr_lo16 = COPY killed %0.lo16
SI_RETURN_TO_EPILOG %1
...
---
name: fold_simm_16_sub_to_sub
body: |
bb.0:
; GCN-LABEL: name: fold_simm_16_sub_to_sub
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]]
%0:sreg_32 = S_MOV_B32 2048
%1.lo16:sreg_32 = COPY killed %0.lo16
SI_RETURN_TO_EPILOG %1
...
---
name: fold_simm_16_sub_to_phys
body: |
bb.0:
; GCN-LABEL: name: fold_simm_16_sub_to_phys
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
; GCN: $sgpr0 = S_MOV_B32 2048
; GCN: SI_RETURN_TO_EPILOG $sgpr0_lo16
%0:sreg_32 = S_MOV_B32 2048
$sgpr0_lo16 = COPY killed %0.lo16
SI_RETURN_TO_EPILOG $sgpr0_lo16
...
---
name: fold_aimm_16_sub_to_sub_2048
body: |
bb.0:
; GCN-LABEL: name: fold_aimm_16_sub_to_sub_2048
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].lo16
; GCN: SI_RETURN_TO_EPILOG %1
%0:sreg_32 = S_MOV_B32 2048
%1.lo16:agpr_32 = COPY killed %0.lo16
SI_RETURN_TO_EPILOG %1
...
---
name: fold_aimm_16_sub_to_sub_0
body: |
bb.0:
; GCN-LABEL: name: fold_aimm_16_sub_to_sub_0
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec
; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]]
%0:sreg_32 = S_MOV_B32 0
%1.lo16:agpr_32 = COPY killed %0.lo16
SI_RETURN_TO_EPILOG %1
...
---
name: fold_aimm_16_sub_to_phys
body: |
bb.0:
; GCN-LABEL: name: fold_aimm_16_sub_to_phys
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 0, implicit $exec
; GCN: SI_RETURN_TO_EPILOG $agpr0_lo16
%0:sreg_32 = S_MOV_B32 0
$agpr0_lo16 = COPY killed %0.lo16
SI_RETURN_TO_EPILOG $agpr0_lo16
...
---
name: fold_vimm_16_sub_to_lo
body: |
bb.0:
; GCN-LABEL: name: fold_vimm_16_sub_to_lo
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
; GCN: [[COPY:%[0-9]+]]:vgpr_lo16 = COPY killed [[S_MOV_B32_]].lo16
; GCN: SI_RETURN_TO_EPILOG [[COPY]]
%0:sreg_32 = S_MOV_B32 2048
%1:vgpr_lo16 = COPY killed %0.lo16
SI_RETURN_TO_EPILOG %1
...
---
name: fold_vimm_16_sub_to_sub
body: |
bb.0:
; GCN-LABEL: name: fold_vimm_16_sub_to_sub
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16
; GCN: SI_RETURN_TO_EPILOG %1
%0:sreg_32 = S_MOV_B32 2048
%1.lo16:vgpr_32 = COPY killed %0.lo16
SI_RETURN_TO_EPILOG %1
...
---
name: fold_vimm_16_sub_to_phys
body: |
bb.0:
; GCN-LABEL: name: fold_vimm_16_sub_to_phys
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
; GCN: $vgpr0_lo16 = COPY killed [[S_MOV_B32_]].lo16
; GCN: SI_RETURN_TO_EPILOG $vgpr0_lo16
%0:sreg_32 = S_MOV_B32 2048
$vgpr0_lo16 = COPY killed %0.lo16
SI_RETURN_TO_EPILOG $vgpr0_lo16
...
---
name: fold_vimm_16_lo_to_hi
body: |
bb.0:
; GCN-LABEL: name: fold_vimm_16_lo_to_hi
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
; GCN: %1.hi16:vgpr_32 = COPY killed [[S_MOV_B32_]].lo16
; GCN: SI_RETURN_TO_EPILOG %1
%0:sreg_32 = S_MOV_B32 2048
%1.hi16:vgpr_32 = COPY killed %0.lo16
SI_RETURN_TO_EPILOG %1
...
---
name: fold_vimm_16_hi_to_lo
body: |
bb.0:
; GCN-LABEL: name: fold_vimm_16_hi_to_lo
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
; GCN: %1.lo16:vgpr_32 = COPY killed [[S_MOV_B32_]].hi16
; GCN: SI_RETURN_TO_EPILOG %1
%0:sreg_32 = S_MOV_B32 2048
%1.lo16:vgpr_32 = COPY killed %0.hi16
SI_RETURN_TO_EPILOG %1
...
---
name: fold_simm_16_sub_to_sub_lo_to_hi
body: |
bb.0:
; GCN-LABEL: name: fold_simm_16_sub_to_sub_lo_to_hi
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
; GCN: %1.hi16:sreg_32 = COPY killed [[S_MOV_B32_]].lo16
; GCN: SI_RETURN_TO_EPILOG %1
%0:sreg_32 = S_MOV_B32 2048
%1.hi16:sreg_32 = COPY killed %0.lo16
SI_RETURN_TO_EPILOG %1
...
---
name: fold_simm_16_sub_to_sub_hi_to_lo_2048
body: |
bb.0:
; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_2048
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]]
%0:sreg_32 = S_MOV_B32 2048
%1.lo16:sreg_32 = COPY killed %0.hi16
SI_RETURN_TO_EPILOG %1
...
---
name: fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048
body: |
bb.0:
; GCN-LABEL: name: fold_simm_16_sub_to_sub_hi_to_lo_shifted_2048
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728
; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
; GCN: SI_RETURN_TO_EPILOG [[S_MOV_B32_1]]
%0:sreg_32 = S_MOV_B32 134217728
%1.lo16:sreg_32 = COPY killed %0.hi16
SI_RETURN_TO_EPILOG %1
...
---
name: fold_aimm_16_sub_to_sub_hi_to_lo_2048
body: |
bb.0:
; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_2048
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2048
; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 0, implicit $exec
; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]]
%0:sreg_32 = S_MOV_B32 2048
%1.lo16:agpr_32 = COPY killed %0.hi16
SI_RETURN_TO_EPILOG %1
...
---
name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1
body: |
bb.0:
; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65536
; GCN: [[V_ACCVGPR_WRITE_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32 1, implicit $exec
; GCN: SI_RETURN_TO_EPILOG [[V_ACCVGPR_WRITE_B32_]]
%0:sreg_32 = S_MOV_B32 65536
%1.lo16:agpr_32 = COPY killed %0.hi16
SI_RETURN_TO_EPILOG %1
...
---
name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048
body: |
bb.0:
; GCN-LABEL: name: fold_aimm_16_sub_to_sub_hi_to_lo_shifted_2048
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 134217728
; GCN: %1.lo16:agpr_32 = COPY killed [[S_MOV_B32_]].hi16
; GCN: SI_RETURN_TO_EPILOG %1
%0:sreg_32 = S_MOV_B32 134217728
%1.lo16:agpr_32 = COPY killed %0.hi16
SI_RETURN_TO_EPILOG %1
...