Remove the restriction that MachineSinking is now stopped by "insert_subreg,

subreg_to_reg, and reg_sequence" instructions.

This is to solve PR28852. The restriction was added at 2010 to make better register
coalescing. We assumed that it was not necessary any more. Testing results on x86
supported the assumption.

We will look closely to any performance impact it will bring and will be prepared
to help analyzing performance problem found on other architectures.

Differential Revision: https://reviews.llvm.org/D23210

llvm-svn: 278384
This commit is contained in:
Wei Mi 2016-08-11 18:42:56 +00:00
parent a003b76391
commit ec19b35179
2 changed files with 38 additions and 4 deletions

View File

@ -270,11 +270,8 @@ public:
/// MachineSink determines on its own whether the instruction is safe to sink;
/// this gives the target a hook to override the default behavior with regards
/// to which instructions should be sunk.
/// The default behavior is to not sink insert_subreg, subreg_to_reg, and
/// reg_sequence. These are meant to be close to the source to make it easier
/// to coalesce.
virtual bool shouldSink(const MachineInstr &MI) const {
return !MI.isInsertSubreg() && !MI.isSubregToReg() && !MI.isRegSequence();
return true;
}
/// Re-issue the specified 'original' instruction at the

View File

@ -0,0 +1,37 @@
; PR28852: Check machine code sinking is not stopped by SUBREG_TO_REG.
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; CHECK: foo
; CHECK-NOT: imull
; CHECK: retq
; CHECK: imull
define void @foo(i64 %value, i32 %kLengthBits, i32* nocapture %bits, i64* nocapture %bit_buffer_64, i32 %x) local_unnamed_addr {
entry:
%mul = mul i32 %x, %kLengthBits
%add = add i32 %mul, 3
%conv = zext i32 %add to i64
%mul2 = mul nuw nsw i64 %conv, 5
%sub = sub i64 64, %value
%conv4 = trunc i64 %sub to i32
%tmp0 = load i32, i32* %bits, align 4
%cmp = icmp ult i32 %tmp0, %conv4
br i1 %cmp, label %if.then, label %if.end, !prof !0
if.then: ; preds = %entry
%add7 = add i64 %mul2, %value
%tmp1 = load i64, i64* %bit_buffer_64, align 8
%add8 = add i64 %add7, %tmp1
store i64 %add8, i64* %bit_buffer_64, align 8
%conv9 = trunc i64 %mul2 to i32
store i32 %conv9, i32* %bits, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
ret void
}
!0 = !{!"branch_weights", i32 1, i32 2000}