From dbda30e2947b0c1339bb080b21475b2a44ca5fd5 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Thu, 28 Jul 2022 11:43:46 +0900 Subject: [PATCH] [AMDGPU][SIFoldOperands] Clear kills when folding COPY Clear all kill flags on source register when folding a COPY. This is necessary because the kills may now be out of order with the uses. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D130622 --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 3 + .../CodeGen/AMDGPU/si-fold-copy-kills.mir | 66 +++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/si-fold-copy-kills.mir diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 99aa8a60b04f..2e76679f400d 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -737,6 +737,9 @@ void SIFoldOperands::foldOperand( CopiesToReplace.push_back(UseMI); OpToFold.setIsKill(false); + // Remove kill flags as kills may now be out of order with uses. + MRI->clearKillFlags(OpToFold.getReg()); + // That is very tricky to store a value into an AGPR. v_accvgpr_write_b32 // can only accept VGPR or inline immediate. Recreate a reg_sequence with // its initializers right here, so we will rematerialize immediates and diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-copy-kills.mir b/llvm/test/CodeGen/AMDGPU/si-fold-copy-kills.mir new file mode 100644 index 000000000000..51f6a11ae56f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-fold-copy-kills.mir @@ -0,0 +1,66 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s + +--- +name: fold_reg_kill +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: fold_reg_kill + ; GCN: liveins: $sgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY]], implicit-def $scc + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[COPY1]] + %0:sreg_32 = COPY $sgpr0 + %1:sreg_32 = COPY %0 + %2:sreg_32 = S_ADD_U32 killed %0, killed %0, implicit-def $scc + %3:sreg_32 = COPY %1 + S_ENDPGM 0, implicit %2, implicit %3 +... + +--- +name: fold_subreg_kill +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: fold_subreg_kill + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $sgpr0_sgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 9, 0 :: (load (s128), align 4, addrspace 4) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY2]].sub1 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY2]].sub0 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY3]], %subreg.sub0, killed [[COPY4]], %subreg.sub1, killed [[COPY5]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET [[DEF]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) + bb.0: + liveins: $sgpr0_sgpr1 + + %0:sgpr_64(p4) = COPY $sgpr0_sgpr1 + %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0:sgpr_64(p4), 9, 0 :: (load (s128), align 4, addrspace 4) + %2:sreg_64_xexec = COPY %1.sub0_sub1:sgpr_128 + %3:sreg_64_xexec = COPY killed %1.sub2_sub3:sgpr_128 + %4:sreg_64 = COPY %2:sreg_64_xexec + %5:sreg_32 = COPY %3.sub1:sreg_64_xexec + + bb.1: + %6:sreg_32 = COPY %4.sub1:sreg_64 + %7:sreg_32 = COPY %4.sub0:sreg_64 + %8:sreg_32 = COPY %5:sreg_32 + %9:sreg_32 = S_MOV_B32 -1 + %10:sgpr_128 = REG_SEQUENCE killed %6:sreg_32, %subreg.sub0, killed %7:sreg_32, %subreg.sub1, killed %8:sreg_32, %subreg.sub2, killed %9:sreg_32, %subreg.sub3 + %11:vgpr_32 = IMPLICIT_DEF + BUFFER_STORE_DWORD_OFFSET %11:vgpr_32, killed %10:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) +... +