Improve sub-register def handling in ProcessImplicitDefs.

This boils down to using MachineOperand::readsReg() more.

This fixes PR11829 where a use ended up after the first def when
lowering REG_SEQUENCE instructions involving IMPLICIT_DEFs.

llvm-svn: 148996
This commit is contained in:
Jakob Stoklund Olesen 2012-01-25 23:36:27 +00:00
parent 9562f39e2f
commit 4864a81aa3
2 changed files with 34 additions and 9 deletions

View File

@ -50,10 +50,10 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
SmallSet<unsigned, 8> &ImpDefRegs) {
switch(OpIdx) {
case 1:
return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 ||
return MI->isCopy() && (!MI->getOperand(0).readsReg() ||
ImpDefRegs.count(MI->getOperand(0).getReg()));
case 2:
return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 ||
return MI->isSubregToReg() && (!MI->getOperand(0).readsReg() ||
ImpDefRegs.count(MI->getOperand(0).getReg()));
default: return false;
}
@ -66,7 +66,7 @@ static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
MachineOperand &MO1 = MI->getOperand(1);
if (MO1.getReg() != Reg)
return false;
if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg()))
if (!MO0.readsReg() || ImpDefRegs.count(MO0.getReg()))
return true;
return false;
}
@ -105,7 +105,9 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
MachineInstr *MI = &*I;
++I;
if (MI->isImplicitDef()) {
if (MI->getOperand(0).getSubReg())
ImpDefMIs.push_back(MI);
// Is this a sub-register read-modify-write?
if (MI->getOperand(0).readsReg())
continue;
unsigned Reg = MI->getOperand(0).getReg();
ImpDefRegs.insert(Reg);
@ -113,12 +115,11 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
ImpDefRegs.insert(*SS);
}
ImpDefMIs.push_back(MI);
continue;
}
// Eliminate %reg1032:sub<def> = COPY undef.
if (MI->isCopy() && MI->getOperand(0).getSubReg()) {
if (MI->isCopy() && MI->getOperand(0).readsReg()) {
MachineOperand &MO = MI->getOperand(1);
if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
if (MO.isKill()) {
@ -140,7 +141,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
bool ChangedToImpDef = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand& MO = MI->getOperand(i);
if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef())
if (!MO.isReg() || !MO.readsReg())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
@ -172,10 +173,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
continue;
}
if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
// Make sure other uses of
// Make sure other reads of Reg are also marked <undef>.
for (unsigned j = i+1; j != e; ++j) {
MachineOperand &MOJ = MI->getOperand(j);
if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg)
if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg())
MOJ.setIsUndef();
}
ImpDefRegs.erase(Reg);

View File

@ -1,5 +1,6 @@
; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -verify-coalescing
; PR11841
; PR11829
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
target triple = "armv7-none-linux-eabi"
@ -37,7 +38,30 @@ bb:
ret void
}
define arm_aapcs_vfpcc void @foo2() nounwind uwtable {
entry:
br i1 undef, label %for.end, label %cond.end295
cond.end295: ; preds = %entry
%shuffle.i39.i.i1035 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
%shuffle.i38.i.i1036 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> zeroinitializer
%shuffle.i37.i.i1037 = shufflevector <1 x i64> %shuffle.i39.i.i1035, <1 x i64> %shuffle.i38.i.i1036, <2 x i32> <i32 0, i32 1>
%0 = bitcast <2 x i64> %shuffle.i37.i.i1037 to <4 x float>
%1 = bitcast <4 x float> undef to <2 x i64>
%shuffle.i36.i.i = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32> zeroinitializer
%shuffle.i35.i.i = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
%shuffle.i34.i.i = shufflevector <1 x i64> %shuffle.i36.i.i, <1 x i64> %shuffle.i35.i.i, <2 x i32> <i32 0, i32 1>
%2 = bitcast <2 x i64> %shuffle.i34.i.i to <4 x float>
tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind
tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind
unreachable
for.end: ; preds = %entry
ret void
}
declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
!0 = metadata !{metadata !"omnipotent char", metadata !1}
!1 = metadata !{metadata !"Simple C/C++ TBAA", null}