forked from OSchip/llvm-project
[TwoAddressInstruction] Tweak constraining of tied operands
In collectTiedOperands, when handling an undef use that is tied to a def, constrain the dst reg with the actual register class of the src reg, instead of with the register class from the instructions's MCInstrDesc. This makes a difference in some AMDGPU test cases like this, before: %16:sgpr_96 = INSERT_SUBREG undef %15:sgpr_96_with_sub0_sub1(tied-def 0), killed %11:sreg_64_xexec, %subreg.sub0_sub1 After, without this patch: undef %16.sub0_sub1:sgpr_96 = COPY killed %11:sreg_64_xexec This fails machine verification if you force it to run after TwoAddressInstruction (currently it is disabled) with: *** Bad machine code: Invalid register class for subregister index *** - function: s_load_constant_v3i32_align4 - basic block: %bb.0 (0xa011a88) - instruction: undef %16.sub0_sub1:sgpr_96 = COPY killed %11:sreg_64_xexec - operand 0: undef %16.sub0_sub1:sgpr_96 Register class SGPR_96 does not fully support subreg index 4 After, with this patch: undef %16.sub0_sub1:sgpr_96_with_sub0_sub1 = COPY killed %11:sreg_64_xexec See also svn r159120 which introduced the code to handle tied undef uses. Differential Revision: https://reviews.llvm.org/D110944
This commit is contained in:
parent
61ecfc6f9d
commit
dff3454bda
|
@ -1335,7 +1335,6 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
|
||||||
// Return true if any tied operands where found, including the trivial ones.
|
// Return true if any tied operands where found, including the trivial ones.
|
||||||
bool TwoAddressInstructionPass::
|
bool TwoAddressInstructionPass::
|
||||||
collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
|
collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
|
||||||
const MCInstrDesc &MCID = MI->getDesc();
|
|
||||||
bool AnyOps = false;
|
bool AnyOps = false;
|
||||||
unsigned NumOps = MI->getNumOperands();
|
unsigned NumOps = MI->getNumOperands();
|
||||||
|
|
||||||
|
@ -1357,10 +1356,10 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
|
||||||
// Deal with undef uses immediately - simply rewrite the src operand.
|
// Deal with undef uses immediately - simply rewrite the src operand.
|
||||||
if (SrcMO.isUndef() && !DstMO.getSubReg()) {
|
if (SrcMO.isUndef() && !DstMO.getSubReg()) {
|
||||||
// Constrain the DstReg register class if required.
|
// Constrain the DstReg register class if required.
|
||||||
if (DstReg.isVirtual())
|
if (DstReg.isVirtual()) {
|
||||||
if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
|
const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
|
||||||
TRI, *MF))
|
MRI->constrainRegClass(DstReg, RC);
|
||||||
MRI->constrainRegClass(DstReg, RC);
|
}
|
||||||
SrcMO.setReg(DstReg);
|
SrcMO.setReg(DstReg);
|
||||||
SrcMO.setSubReg(0);
|
SrcMO.setSubReg(0);
|
||||||
LLVM_DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
|
LLVM_DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -stop-after twoaddressinstruction < %s | FileCheck %s
|
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -stop-after twoaddressinstruction < %s | FileCheck %s
|
||||||
|
|
||||||
; FIXME: the operand "undef %16.sub0_sub1:sgpr_96" will fail machine
|
; Check that %16 gets constrained to register class sgpr_96_with_sub0_sub1.
|
||||||
; verification because sgpr_96 does not fully support sub0_sub1.
|
|
||||||
define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* inreg %ptr) {
|
define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* inreg %ptr) {
|
||||||
; CHECK-LABEL: name: s_load_constant_v3i32_align4
|
; CHECK-LABEL: name: s_load_constant_v3i32_align4
|
||||||
; CHECK: bb.0 (%ir-block.0):
|
; CHECK: bb.0 (%ir-block.0):
|
||||||
|
@ -14,9 +13,9 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)*
|
||||||
; CHECK-NEXT: %0.sub1:sreg_64 = COPY killed [[COPY1]]
|
; CHECK-NEXT: %0.sub1:sreg_64 = COPY killed [[COPY1]]
|
||||||
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (load (<2 x s32>) from %ir.ptr, align 4, addrspace 4)
|
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (load (<2 x s32>) from %ir.ptr, align 4, addrspace 4)
|
||||||
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 8, 0 :: (load (s32) from %ir.ptr + 8, addrspace 4)
|
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 8, 0 :: (load (s32) from %ir.ptr + 8, addrspace 4)
|
||||||
; CHECK-NEXT: undef %16.sub0_sub1:sgpr_96 = COPY killed [[S_LOAD_DWORDX2_IMM]]
|
; CHECK-NEXT: undef %16.sub0_sub1:sgpr_96_with_sub0_sub1 = COPY killed [[S_LOAD_DWORDX2_IMM]]
|
||||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY killed %16
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY killed %16
|
||||||
; CHECK-NEXT: [[COPY2]].sub2:sgpr_96 = COPY undef [[S_LOAD_DWORD_IMM]]
|
; CHECK-NEXT: [[COPY2]].sub2:sgpr_96_with_sub0_sub1 = COPY undef [[S_LOAD_DWORD_IMM]]
|
||||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY2]].sub0
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY2]].sub0
|
||||||
; CHECK-NEXT: $sgpr0 = COPY killed [[COPY3]]
|
; CHECK-NEXT: $sgpr0 = COPY killed [[COPY3]]
|
||||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY killed [[COPY2]].sub1
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY killed [[COPY2]].sub1
|
||||||
|
|
Loading…
Reference in New Issue