forked from OSchip/llvm-project
[X86] Enable RRL part of the LEA optimization pass for -O2.
Enable "Remove Redundant LEAs" part of the LEA optimization pass for -O2. This gives 6.4% performance improve on Broadwell on nnet benchmark from Coremark-pro. There is no significant effect on other benchmarks (Geekbench, Spec2000, Spec2006). Differential Revision: http://reviews.llvm.org/D19659 llvm-svn: 270036
This commit is contained in:
parent
3f64bb9618
commit
45b22a4aff
|
@ -8,7 +8,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the pass that performs some optimizations with LEA
|
||||
// instructions in order to improve code size.
|
||||
// instructions in order to improve performance and code size.
|
||||
// Currently, it does two things:
|
||||
// 1) If there are two LEA instructions calculating addresses which only differ
|
||||
// by displacement inside a basic block, one of them is removed.
|
||||
|
@ -614,9 +614,7 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
|
|||
bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
bool Changed = false;
|
||||
|
||||
// Perform this optimization only if we care about code size.
|
||||
if (DisableX86LEAOpt || skipFunction(*MF.getFunction()) ||
|
||||
!MF.getFunction()->optForSize())
|
||||
if (DisableX86LEAOpt || skipFunction(*MF.getFunction()))
|
||||
return false;
|
||||
|
||||
MRI = &MF.getRegInfo();
|
||||
|
@ -635,13 +633,13 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
|
|||
if (LEAs.empty())
|
||||
continue;
|
||||
|
||||
// Remove redundant LEA instructions. The optimization may have a negative
|
||||
// effect on performance, so do it only for -Oz.
|
||||
if (MF.getFunction()->optForMinSize())
|
||||
Changed |= removeRedundantLEAs(LEAs);
|
||||
// Remove redundant LEA instructions.
|
||||
Changed |= removeRedundantLEAs(LEAs);
|
||||
|
||||
// Remove redundant address calculations.
|
||||
Changed |= removeRedundantAddrCalc(LEAs);
|
||||
// Remove redundant address calculations. Do it only for -Os/-Oz since only
|
||||
// a code size gain is expected from this part of the pass.
|
||||
if (MF.getFunction()->optForSize())
|
||||
Changed |= removeRedundantAddrCalc(LEAs);
|
||||
}
|
||||
|
||||
return Changed;
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK -check-prefix=ENABLED
|
||||
; RUN: llc --disable-x86-lea-opt < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK -check-prefix=DISABLED
|
||||
|
||||
%struct.anon1 = type { i32, i32, i32 }
|
||||
%struct.anon2 = type { i32, [32 x i32], i32 }
|
||||
|
@ -38,12 +39,14 @@ sw.epilog: ; preds = %sw.bb.2, %sw.bb.1,
|
|||
; CHECK: movl arr1([[REG1]],[[REG1]],2), {{.*}}
|
||||
; CHECK: leaq arr1+4([[REG1]],[[REG1]],2), [[REG2:%[a-z]+]]
|
||||
; CHECK: subl arr1+4([[REG1]],[[REG1]],2), {{.*}}
|
||||
; CHECK: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]]
|
||||
; DISABLED: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]]
|
||||
; CHECK: addl arr1+8([[REG1]],[[REG1]],2), {{.*}}
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG3]])
|
||||
; ENABLED: movl ${{[1-4]+}}, 4([[REG2]])
|
||||
; DISABLED: movl ${{[1-4]+}}, ([[REG3]])
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG3]])
|
||||
; ENABLED: movl ${{[1-4]+}}, 4([[REG2]])
|
||||
; DISABLED: movl ${{[1-4]+}}, ([[REG3]])
|
||||
}
|
||||
|
||||
define void @test2(i64 %x) nounwind optsize {
|
||||
|
@ -75,15 +78,20 @@ sw.epilog: ; preds = %sw.bb.2, %sw.bb.1,
|
|||
ret void
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: shlq $2, [[REG1:%[a-z]+]]
|
||||
; DISABLED: movl arr1([[REG1]],[[REG1]],2), {{.*}}
|
||||
; CHECK: leaq arr1+4([[REG1]],[[REG1]],2), [[REG2:%[a-z]+]]
|
||||
; CHECK: movl -4([[REG2]]), {{.*}}
|
||||
; CHECK: subl ([[REG2]]), {{.*}}
|
||||
; CHECK: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]]
|
||||
; CHECK: addl ([[REG3]]), {{.*}}
|
||||
; ENABLED: movl -4([[REG2]]), {{.*}}
|
||||
; ENABLED: subl ([[REG2]]), {{.*}}
|
||||
; ENABLED: addl 4([[REG2]]), {{.*}}
|
||||
; DISABLED: subl arr1+4([[REG1]],[[REG1]],2), {{.*}}
|
||||
; DISABLED: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]]
|
||||
; DISABLED: addl arr1+8([[REG1]],[[REG1]],2), {{.*}}
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG3]])
|
||||
; ENABLED: movl ${{[1-4]+}}, 4([[REG2]])
|
||||
; DISABLED: movl ${{[1-4]+}}, ([[REG3]])
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG3]])
|
||||
; ENABLED: movl ${{[1-4]+}}, 4([[REG2]])
|
||||
; DISABLED: movl ${{[1-4]+}}, ([[REG3]])
|
||||
}
|
||||
|
||||
; Check that LEA optimization pass takes into account a resultant address
|
||||
|
@ -109,7 +117,9 @@ sw.bb.1: ; preds = %entry
|
|||
|
||||
sw.bb.2: ; preds = %entry
|
||||
store i32 333, i32* %a, align 4
|
||||
store i32 444, i32* %b, align 4
|
||||
; Make sure the REG3's definition LEA won't be removed as redundant.
|
||||
%cvt = ptrtoint i32* %b to i32
|
||||
store i32 %cvt, i32* %b, align 4
|
||||
br label %sw.epilog
|
||||
|
||||
sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry
|
||||
|
@ -122,12 +132,14 @@ sw.epilog: ; preds = %sw.bb.2, %sw.bb.1,
|
|||
; REG3's definition is closer to movl than REG2's, but the pass still chooses
|
||||
; REG2 because it provides the resultant address displacement fitting 1 byte.
|
||||
|
||||
; CHECK: movl ([[REG2]]), {{.*}}
|
||||
; CHECK: addl ([[REG3]]), {{.*}}
|
||||
; ENABLED: movl ([[REG2]]), {{.*}}
|
||||
; ENABLED: addl ([[REG3]]), {{.*}}
|
||||
; DISABLED: movl arr2+132([[REG1]]), {{.*}}
|
||||
; DISABLED: addl arr2([[REG1]]), {{.*}}
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG3]])
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG3]])
|
||||
; CHECK: movl {{.*}}, ([[REG3]])
|
||||
}
|
||||
|
||||
define void @test4(i64 %x) nounwind minsize {
|
||||
|
@ -158,12 +170,19 @@ sw.bb.2: ; preds = %entry
|
|||
sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry
|
||||
ret void
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: leaq arr1+4({{.*}}), [[REG2:%[a-z]+]]
|
||||
; CHECK: movl -4([[REG2]]), {{.*}}
|
||||
; CHECK: subl ([[REG2]]), {{.*}}
|
||||
; CHECK: addl 4([[REG2]]), {{.*}}
|
||||
; CHECK: imulq {{.*}}, [[REG1:%[a-z]+]]
|
||||
; DISABLED: movl arr1([[REG1]]), {{.*}}
|
||||
; CHECK: leaq arr1+4([[REG1]]), [[REG2:%[a-z]+]]
|
||||
; ENABLED: movl -4([[REG2]]), {{.*}}
|
||||
; ENABLED: subl ([[REG2]]), {{.*}}
|
||||
; ENABLED: addl 4([[REG2]]), {{.*}}
|
||||
; DISABLED: subl arr1+4([[REG1]]), {{.*}}
|
||||
; DISABLED: leaq arr1+8([[REG1]]), [[REG3:%[a-z]+]]
|
||||
; DISABLED: addl arr1+8([[REG1]]), {{.*}}
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
|
||||
; CHECK: movl ${{[1-4]+}}, 4([[REG2]])
|
||||
; ENABLED: movl ${{[1-4]+}}, 4([[REG2]])
|
||||
; DISABLED: movl ${{[1-4]+}}, ([[REG3]])
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
|
||||
; CHECK: movl ${{[1-4]+}}, 4([[REG2]])
|
||||
; ENABLED: movl ${{[1-4]+}}, 4([[REG2]])
|
||||
; DISABLED: movl ${{[1-4]+}}, ([[REG3]])
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue