From 4e55044ff52029c2345639008aab5c7a1f21cfb8 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 29 Mar 2012 17:22:39 +0000 Subject: [PATCH] Don't PRE compares. CodeGenPrepare sinks compare instructions down to their uses to prevent live flags and predicate registers across basic blocks. PRE of a compare instruction prevents that, forcing the i1 compare result into a general purpose register. That is usually more expensive than the redundant compare PRE was trying to eliminate in the first place. llvm-svn: 153657 --- llvm/lib/Transforms/Scalar/GVN.cpp | 9 +++- llvm/test/Transforms/GVN/pre-compare.ll | 68 +++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/GVN/pre-compare.ll diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 8b79d27b9521..38afe1b83d11 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2328,7 +2328,14 @@ bool GVN::performPRE(Function &F) { CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || isa(CurInst)) continue; - + + // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from + // sinking the compare again, and it would force the code generator to + // move the i1 from processor flags or predicate registers into a general + // purpose register. + if (isa(CurInst)) + continue; + // We don't currently value number ANY inline asm calls. if (CallInst *CallI = dyn_cast(CurInst)) if (CallI->isInlineAsm()) diff --git a/llvm/test/Transforms/GVN/pre-compare.ll b/llvm/test/Transforms/GVN/pre-compare.ll new file mode 100644 index 000000000000..18d0c2e1085c --- /dev/null +++ b/llvm/test/Transforms/GVN/pre-compare.ll @@ -0,0 +1,68 @@ +; RUN: opt -gvn -S < %s | FileCheck %s + +; C source: +; +; void f(int x) { +; if (x != 1) +; puts (x == 2 ? "a" : "b"); +; for (;;) { +; puts("step 1"); +; if (x == 2) +; continue; +; printf("step 2: %d\n", x); +; } +; } +; +; If we PRE %cmp3, CodeGenPrepare won't be able to sink the compare down to its +; uses, and we are forced to keep both %x and %cmp3 in registers in the loop. +; +; It is just as cheap to recompute the icmp against %x as it is to compare a +; GPR against 0. On x86-64, the br i1 %cmp3 becomes: +; +; testb %r12b, %r12b +; jne LBB0_3 +; +; The sunk icmp is: +; +; cmpl $2, %ebx +; je LBB0_3 +; +; This is just as good, and it doesn't require a separate register. +; +; CHECK-NOT: phi i1 + +@.str = private unnamed_addr constant [2 x i8] c"a\00", align 1 +@.str1 = private unnamed_addr constant [2 x i8] c"b\00", align 1 +@.str2 = private unnamed_addr constant [7 x i8] c"step 1\00", align 1 +@.str3 = private unnamed_addr constant [12 x i8] c"step 2: %d\0A\00", align 1 + +define void @f(i32 %x) noreturn nounwind uwtable ssp { +entry: + %cmp = icmp eq i32 %x, 1 + br i1 %cmp, label %for.cond.preheader, label %if.then + +if.then: ; preds = %entry + %cmp1 = icmp eq i32 %x, 2 + %cond = select i1 %cmp1, i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8]* @.str1, i64 0, i64 0) + %call = tail call i32 @puts(i8* %cond) nounwind + br label %for.cond.preheader + +for.cond.preheader: ; preds = %entry, %if.then + %cmp3 = icmp eq i32 %x, 2 + br label %for.cond + +for.cond: ; preds = %for.cond.backedge, %for.cond.preheader + %call2 = tail call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @.str2, i64 0, i64 0)) nounwind + br i1 %cmp3, label %for.cond.backedge, label %if.end5 + +if.end5: ; preds = %for.cond + %call6 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str3, i64 0, i64 0), i32 %x) nounwind + br label %for.cond.backedge + +for.cond.backedge: ; preds = %if.end5, %for.cond + br label %for.cond +} + +declare i32 @puts(i8* nocapture) nounwind + +declare i32 @printf(i8* nocapture, ...) nounwind