Avoid creating canonical induction variables for non-native types.

For example, on 32-bit architecture, don't promote all uses of the IV
to 64-bits just because one use is a 64-bit cast.
Alternate implementation of the patch by Arnaud de Grandmaison.

llvm-svn: 127884
This commit is contained in:
Andrew Trick 2011-03-18 16:50:32 +00:00
parent 6e31408af1
commit 1c4b42d00f
20 changed files with 30 additions and 19 deletions

View File

@ -28,6 +28,7 @@ class IVUsers;
class ScalarEvolution;
class SCEV;
class IVUsers;
class TargetData;
/// IVStrideUse - Keep track of one use of a strided induction variable.
/// The Expr member keeps track of the expression, User is the actual user
@ -122,6 +123,7 @@ class IVUsers : public LoopPass {
LoopInfo *LI;
DominatorTree *DT;
ScalarEvolution *SE;
TargetData *TD;
SmallPtrSet<Instruction*,16> Processed;
/// IVUses - A list of all tracked IV uses of induction variable expressions

View File

@ -21,6 +21,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
@ -83,7 +84,10 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
return false; // Void and FP expressions cannot be reduced.
// LSR is not APInt clean, do not touch integers bigger than 64-bits.
if (SE->getTypeSizeInBits(I->getType()) > 64)
// Also avoid creating IVs of non-native types. For example, we don't want a
// 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
uint64_t Width = SE->getTypeSizeInBits(I->getType());
if (Width > 64 || (TD && !TD->isLegalInteger(Width)))
return false;
if (!Processed.insert(I))
@ -167,6 +171,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
DT = &getAnalysis<DominatorTree>();
SE = &getAnalysis<ScalarEvolution>();
TD = getAnalysisIfAvailable<TargetData>();
// Find all uses of induction variables in this loop, and categorize
// them by stride. Start by finding all of the PHI nodes in the header for

View File

@ -1,6 +1,6 @@
; RUN: opt < %s -indvars -S | not grep {sext}
; ModuleID = '<stdin>'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
target triple = "x86_64-apple-darwin9.6"
@a = external global i32* ; <i32**> [#uses=3]
@b = external global i32* ; <i32**> [#uses=3]

View File

@ -13,7 +13,7 @@
; d[(i+2)&15] = e[(i+2)&15]+f[(i+2)&15]+K[i+2];
; }
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
target triple = "x86_64-apple-darwin9.6"
@a = external global i32* ; <i32**> [#uses=3]
@b = external global i32* ; <i32**> [#uses=3]

View File

@ -11,7 +11,7 @@
; count without casting.
; ModuleID = 'ada.bc'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-n:8:16:32"
target triple = "i686-pc-linux-gnu"
define void @kinds__sbytezero([256 x i32]* nocapture %a) nounwind {

View File

@ -9,7 +9,7 @@
; be able to reconstruct the full getelementptr, despite it having a few
; obstacles set in its way.
target datalayout = "e-p:64:64:64"
target datalayout = "e-p:64:64:64-n:32:64"
define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double* nocapture %p) nounwind {
entry:

View File

@ -4,7 +4,7 @@
; Indvars should be able to eliminate all of the sign extensions
; inside the loop.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
@pow_2_tab = external constant [0 x float] ; <[0 x float]*> [#uses=1]
@pow_2_025_tab = external constant [0 x float] ; <[0 x float]*> [#uses=1]
@i_pow_2_tab = external constant [0 x float] ; <[0 x float]*> [#uses=1]

View File

@ -6,7 +6,7 @@
; inner loop to i64.
; TODO: it should promote hiPart to i64 in the outer loop too.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
define void @t(float* %pTmp1, float* %peakWeight, float* %nrgReducePeakrate, i32 %bandEdgeIndex, float %tmp1) nounwind {
entry:

View File

@ -2,7 +2,7 @@
; RUN: not grep and %t
; RUN: not grep zext %t
target datalayout = "-p:64:64:64"
target datalayout = "-p:64:64:64-n:32:64"
define void @foo(double* %d, i64 %n) nounwind {
entry:

View File

@ -2,7 +2,7 @@
; RUN: grep {icmp ugt i8\\\*} %t | count 1
; RUN: grep {icmp sgt i8\\\*} %t | count 1
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
%struct.CKenCodeCodec = type <{ i8 }>

View File

@ -9,7 +9,7 @@
; Indvars should be able to expand the pointer-arithmetic
; IV into an integer IV indexing into a simple getelementptr.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-n:32:64"
define void @foo(i8* %A, i64 %n) nounwind {
entry:

View File

@ -2,7 +2,7 @@
; RUN: not grep inttoptr %t
; RUN: not grep ptrtoint %t
; RUN: grep scevgep %t
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n:32:64"
; Indvars shouldn't need inttoptr/ptrtoint to expand an address here.

View File

@ -13,7 +13,7 @@
; FIXME: This test should pass with or without TargetData. Until opt
; supports running tests without targetdata, just hardware this in.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
%struct.Q = type { [10 x %struct.N] }
%struct.N = type { %struct.S }

View File

@ -1,6 +1,6 @@
; RUN: opt < %s -indvars -S \
; RUN: | grep {\[%\]p.2.ip.1 = getelementptr \\\[3 x \\\[3 x double\\\]\\\]\\* \[%\]p, i64 2, i64 \[%\]tmp, i64 1}
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n:32:64"
; Indvars shouldn't expand this to
; %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 %tmp, i64 19

View File

@ -6,7 +6,7 @@
; Indvars shouldn't leave getelementptrs expanded out as
; inttoptr+ptrtoint in its output in common cases.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
target triple = "x86_64-unknown-linux-gnu"
%struct.Foo = type { i32, i32, [10 x i32], i32 }

View File

@ -7,7 +7,7 @@
; that indvars can promote the induction variable to i64
; without needing casts.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n:32:64"
define float @t(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
entry:

View File

@ -1,7 +1,8 @@
; Check that the index of 'P[outer]' is pulled out of the loop.
; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | \
; RUN: opt < %s -loop-reduce -S | \
; RUN: not grep {getelementptr.*%outer.*%INDVAR}
target datalayout = "e-p:32:32:32-n:8:16:32"
declare i1 @pred()
declare i32 @foo()

View File

@ -1,7 +1,8 @@
; Check that the index of 'P[outer]' is pulled out of the loop.
; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | \
; RUN: opt < %s -loop-reduce -S | \
; RUN: not grep {getelementptr.*%outer.*%INDVAR}
target datalayout = "e-p:32:32:32-n:32"
declare i1 @pred()
define void @test([10000 x i32]* %P, i32 %outer) {

View File

@ -1,7 +1,9 @@
; Check that this test makes INDVAR and related stuff dead, because P[indvar]
; gets reduced, making INDVAR dead.
; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | not grep INDVAR
; RUN: opt < %s -loop-reduce -S | not grep INDVAR
target datalayout = "e-p:32:32:32-n:32"
declare i1 @pred()

View File

@ -9,7 +9,7 @@
; mul uint %i, 3
target datalayout = "e-p:32:32"
target datalayout = "e-p:32:32-n:32"
target triple = "i686-apple-darwin8"
@flags2 = external global [8193 x i8], align 32 ; <[8193 x i8]*> [#uses=1]