[X86] Enable RRL part of the LEA optimization pass for -O2.

Enable "Remove Redundant LEAs" part of the LEA optimization pass for -O2.
This gives 6.4% performance improve on Broadwell on nnet benchmark from Coremark-pro.
There is no significant effect on other benchmarks (Geekbench, Spec2000, Spec2006).

Differential Revision: http://reviews.llvm.org/D19659

llvm-svn: 270036
This commit is contained in:
Andrey Turetskiy 2016-05-19 10:18:29 +00:00
parent 3f64bb9618
commit 45b22a4aff
2 changed files with 47 additions and 30 deletions

View File

@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This file defines the pass that performs some optimizations with LEA
// instructions in order to improve code size.
// instructions in order to improve performance and code size.
// Currently, it does two things:
// 1) If there are two LEA instructions calculating addresses which only differ
// by displacement inside a basic block, one of them is removed.
@ -614,9 +614,7 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
// Perform this optimization only if we care about code size.
if (DisableX86LEAOpt || skipFunction(*MF.getFunction()) ||
!MF.getFunction()->optForSize())
if (DisableX86LEAOpt || skipFunction(*MF.getFunction()))
return false;
MRI = &MF.getRegInfo();
@ -635,13 +633,13 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
if (LEAs.empty())
continue;
// Remove redundant LEA instructions. The optimization may have a negative
// effect on performance, so do it only for -Oz.
if (MF.getFunction()->optForMinSize())
Changed |= removeRedundantLEAs(LEAs);
// Remove redundant LEA instructions.
Changed |= removeRedundantLEAs(LEAs);
// Remove redundant address calculations.
Changed |= removeRedundantAddrCalc(LEAs);
// Remove redundant address calculations. Do it only for -Os/-Oz since only
// a code size gain is expected from this part of the pass.
if (MF.getFunction()->optForSize())
Changed |= removeRedundantAddrCalc(LEAs);
}
return Changed;

View File

@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK -check-prefix=ENABLED
; RUN: llc --disable-x86-lea-opt < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK -check-prefix=DISABLED
%struct.anon1 = type { i32, i32, i32 }
%struct.anon2 = type { i32, [32 x i32], i32 }
@ -38,12 +39,14 @@ sw.epilog: ; preds = %sw.bb.2, %sw.bb.1,
; CHECK: movl arr1([[REG1]],[[REG1]],2), {{.*}}
; CHECK: leaq arr1+4([[REG1]],[[REG1]],2), [[REG2:%[a-z]+]]
; CHECK: subl arr1+4([[REG1]],[[REG1]],2), {{.*}}
; CHECK: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]]
; DISABLED: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]]
; CHECK: addl arr1+8([[REG1]],[[REG1]],2), {{.*}}
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
; CHECK: movl ${{[1-4]+}}, ([[REG3]])
; ENABLED: movl ${{[1-4]+}}, 4([[REG2]])
; DISABLED: movl ${{[1-4]+}}, ([[REG3]])
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
; CHECK: movl ${{[1-4]+}}, ([[REG3]])
; ENABLED: movl ${{[1-4]+}}, 4([[REG2]])
; DISABLED: movl ${{[1-4]+}}, ([[REG3]])
}
define void @test2(i64 %x) nounwind optsize {
@ -75,15 +78,20 @@ sw.epilog: ; preds = %sw.bb.2, %sw.bb.1,
ret void
; CHECK-LABEL: test2:
; CHECK: shlq $2, [[REG1:%[a-z]+]]
; DISABLED: movl arr1([[REG1]],[[REG1]],2), {{.*}}
; CHECK: leaq arr1+4([[REG1]],[[REG1]],2), [[REG2:%[a-z]+]]
; CHECK: movl -4([[REG2]]), {{.*}}
; CHECK: subl ([[REG2]]), {{.*}}
; CHECK: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]]
; CHECK: addl ([[REG3]]), {{.*}}
; ENABLED: movl -4([[REG2]]), {{.*}}
; ENABLED: subl ([[REG2]]), {{.*}}
; ENABLED: addl 4([[REG2]]), {{.*}}
; DISABLED: subl arr1+4([[REG1]],[[REG1]],2), {{.*}}
; DISABLED: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]]
; DISABLED: addl arr1+8([[REG1]],[[REG1]],2), {{.*}}
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
; CHECK: movl ${{[1-4]+}}, ([[REG3]])
; ENABLED: movl ${{[1-4]+}}, 4([[REG2]])
; DISABLED: movl ${{[1-4]+}}, ([[REG3]])
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
; CHECK: movl ${{[1-4]+}}, ([[REG3]])
; ENABLED: movl ${{[1-4]+}}, 4([[REG2]])
; DISABLED: movl ${{[1-4]+}}, ([[REG3]])
}
; Check that LEA optimization pass takes into account a resultant address
@ -109,7 +117,9 @@ sw.bb.1: ; preds = %entry
sw.bb.2: ; preds = %entry
store i32 333, i32* %a, align 4
store i32 444, i32* %b, align 4
; Make sure the REG3's definition LEA won't be removed as redundant.
%cvt = ptrtoint i32* %b to i32
store i32 %cvt, i32* %b, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry
@ -122,12 +132,14 @@ sw.epilog: ; preds = %sw.bb.2, %sw.bb.1,
; REG3's definition is closer to movl than REG2's, but the pass still chooses
; REG2 because it provides the resultant address displacement fitting 1 byte.
; CHECK: movl ([[REG2]]), {{.*}}
; CHECK: addl ([[REG3]]), {{.*}}
; ENABLED: movl ([[REG2]]), {{.*}}
; ENABLED: addl ([[REG3]]), {{.*}}
; DISABLED: movl arr2+132([[REG1]]), {{.*}}
; DISABLED: addl arr2([[REG1]]), {{.*}}
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
; CHECK: movl ${{[1-4]+}}, ([[REG3]])
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
; CHECK: movl ${{[1-4]+}}, ([[REG3]])
; CHECK: movl {{.*}}, ([[REG3]])
}
define void @test4(i64 %x) nounwind minsize {
@ -158,12 +170,19 @@ sw.bb.2: ; preds = %entry
sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry
ret void
; CHECK-LABEL: test4:
; CHECK: leaq arr1+4({{.*}}), [[REG2:%[a-z]+]]
; CHECK: movl -4([[REG2]]), {{.*}}
; CHECK: subl ([[REG2]]), {{.*}}
; CHECK: addl 4([[REG2]]), {{.*}}
; CHECK: imulq {{.*}}, [[REG1:%[a-z]+]]
; DISABLED: movl arr1([[REG1]]), {{.*}}
; CHECK: leaq arr1+4([[REG1]]), [[REG2:%[a-z]+]]
; ENABLED: movl -4([[REG2]]), {{.*}}
; ENABLED: subl ([[REG2]]), {{.*}}
; ENABLED: addl 4([[REG2]]), {{.*}}
; DISABLED: subl arr1+4([[REG1]]), {{.*}}
; DISABLED: leaq arr1+8([[REG1]]), [[REG3:%[a-z]+]]
; DISABLED: addl arr1+8([[REG1]]), {{.*}}
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
; CHECK: movl ${{[1-4]+}}, 4([[REG2]])
; ENABLED: movl ${{[1-4]+}}, 4([[REG2]])
; DISABLED: movl ${{[1-4]+}}, ([[REG3]])
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
; CHECK: movl ${{[1-4]+}}, 4([[REG2]])
; ENABLED: movl ${{[1-4]+}}, 4([[REG2]])
; DISABLED: movl ${{[1-4]+}}, ([[REG3]])
}