diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 35594ecbde16..00a9ea445bcd 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1596,6 +1596,10 @@ bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) { LLVM_DEBUG(dbgs() << "Folded " << *RS << " into " << *UseMI); + // Erase the REG_SEQUENCE eagerly, unless we followed a chain of COPY users, + // in which case we can erase them all later in runOnMachineFunction. + if (MRI->use_nodbg_empty(MI.getOperand(0).getReg())) + MI.eraseFromParentAndMarkDBGValuesForRemoval(); return true; } @@ -1786,8 +1790,24 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { // If we managed to fold all uses of this copy then we might as well // delete it now. - if (MRI->use_nodbg_empty(MI.getOperand(0).getReg())) - MI.eraseFromParentAndMarkDBGValuesForRemoval(); + // The only reason we need to follow chains of copies here is that + // tryFoldRegSequence looks forward through copies before folding a + // REG_SEQUENCE into its eventual users. + auto *InstToErase = &MI; + while (MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) { + auto &SrcOp = InstToErase->getOperand(1); + auto SrcReg = SrcOp.isReg() ? SrcOp.getReg() : Register(); + InstToErase->eraseFromParentAndMarkDBGValuesForRemoval(); + InstToErase = nullptr; + if (!SrcReg || SrcReg.isPhysical()) + break; + InstToErase = MRI->getVRegDef(SrcReg); + if (!InstToErase || !TII->isFoldableCopy(*InstToErase)) + break; + } + if (InstToErase && InstToErase->isRegSequence() && + MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) + InstToErase->eraseFromParentAndMarkDBGValuesForRemoval(); } } return true;