From aa036eedb85412edcb1c0c37c27227608c29cb9c Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 4 Mar 2011 20:46:46 +0000 Subject: [PATCH] When decling to reuse existing expressions that involve casts, ignore bitcasts, which are really no-ops here. This fixes slowdowns on MultiSource/Applications/aha and others. llvm-svn: 127031 --- llvm/lib/Analysis/ScalarEvolutionExpander.cpp | 2 +- .../CodeGen/X86/lsr-redundant-addressing.ll | 45 +++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/lsr-redundant-addressing.ll diff --git a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index f2d23c4be708..76a94ea27467 100644 --- a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -859,7 +859,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // to be. do { if (IncV->getNumOperands() == 0 || isa(IncV) || - isa(IncV)) { + (isa(IncV) && !isa(IncV))) { IncV = 0; break; } diff --git a/llvm/test/CodeGen/X86/lsr-redundant-addressing.ll b/llvm/test/CodeGen/X86/lsr-redundant-addressing.ll new file mode 100644 index 000000000000..aaa1426918f2 --- /dev/null +++ b/llvm/test/CodeGen/X86/lsr-redundant-addressing.ll @@ -0,0 +1,45 @@ +; RUN: llc -march=x86-64 < %s | fgrep {addq $-16,} | count 1 +; rdar://9081094 + +; LSR shouldn't create lots of redundant address computations. + +%0 = type { i32, [3 x i32] } +%1 = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* } + +@pgm = external hidden unnamed_addr global [5 x %0], align 32 +@isa = external hidden unnamed_addr constant [13 x %1], align 32 + +define void @main_bb.i() nounwind { +bb: + br label %bb38 + +bb38: ; preds = %bb200, %bb + %tmp39 = phi i64 [ %tmp201, %bb200 ], [ 0, %bb ] + %tmp40 = sub i64 0, %tmp39 + %tmp47 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 0 + %tmp34 = load i32* %tmp47, align 16 + %tmp203 = icmp slt i32 %tmp34, 12 + br i1 %tmp203, label %bb215, label %bb200 + +bb200: ; preds = %bb38 + %tmp201 = add i64 %tmp39, 1 + br label %bb38 + +bb215: ; preds = %bb38 + %tmp50 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 2 + %tmp49 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 1 + %tmp48 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 0 + %tmp216 = add nsw i32 %tmp34, 1 + store i32 %tmp216, i32* %tmp47, align 16 + %tmp217 = sext i32 %tmp216 to i64 + %tmp218 = getelementptr inbounds [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 0 + %tmp219 = load i32* %tmp218, align 8 + store i32 %tmp219, i32* %tmp48, align 4 + %tmp220 = getelementptr inbounds [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 1 + %tmp221 = load i32* %tmp220, align 4 + store i32 %tmp221, i32* %tmp49, align 4 + %tmp222 = getelementptr inbounds [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 2 + %tmp223 = load i32* %tmp222, align 8 + store i32 %tmp223, i32* %tmp50, align 4 + ret void +}