instcombine: Migrate ffs* optimizations

This patch migrates the ffs* optimizations from the simplify-libcalls
pass into the instcombine library call simplifier.

llvm-svn: 168571
This commit is contained in:
Meador Inge 2012-11-25 20:45:27 +00:00
parent ee7ede76f4
commit 7415f8403d
4 changed files with 180 additions and 86 deletions

View File

@ -19,7 +19,6 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/IRBuilder.h"
#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
@ -100,42 +99,6 @@ namespace {
// Integer Optimizations
//===----------------------------------------------------------------------===//
//===---------------------------------------===//
// 'ffs*' Optimizations
struct FFSOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 2 arguments of the same FP type, which match the
// result type.
if (FT->getNumParams() != 1 ||
!FT->getReturnType()->isIntegerTy(32) ||
!FT->getParamType(0)->isIntegerTy())
return 0;
Value *Op = CI->getArgOperand(0);
// Constant fold.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
if (CI->isZero()) // ffs(0) -> 0.
return B.getInt32(0);
// ffs(c) -> cttz(c)+1
return B.getInt32(CI->getValue().countTrailingZeros() + 1);
}
// ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
Type *ArgType = Op->getType();
Value *F = Intrinsic::getDeclaration(Callee->getParent(),
Intrinsic::cttz, ArgType);
Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
V = B.CreateIntCast(V, B.getInt32Ty(), false);
Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
return B.CreateSelect(Cond, V, B.getInt32(0));
}
};
//===---------------------------------------===//
// 'isdigit' Optimizations
@ -579,7 +542,7 @@ namespace {
StringMap<LibCallOptimization*> Optimizations;
// Integer Optimizations
FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
ToAsciiOpt ToAscii;
// Formatting and IO Optimizations
SPrintFOpt SPrintF; PrintFOpt PrintF;
@ -640,9 +603,6 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2,
/// we know.
void SimplifyLibCalls::InitOptimizations() {
// Integer Optimizations
Optimizations["ffs"] = &FFS;
Optimizations["ffsl"] = &FFS;
Optimizations["ffsll"] = &FFS;
Optimizations["abs"] = &Abs;
Optimizations["labs"] = &Abs;
Optimizations["llabs"] = &Abs;

View File

@ -20,6 +20,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Function.h"
#include "llvm/IRBuilder.h"
#include "llvm/Intrinsics.h"
#include "llvm/Module.h"
#include "llvm/LLVMContext.h"
#include "llvm/Target/TargetLibraryInfo.h"
@ -1212,6 +1213,43 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
}
};
//===----------------------------------------------------------------------===//
// Integer Library Call Optimizations
//===----------------------------------------------------------------------===//
struct FFSOpt : public LibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 2 arguments of the same FP type, which match the
// result type.
if (FT->getNumParams() != 1 ||
!FT->getReturnType()->isIntegerTy(32) ||
!FT->getParamType(0)->isIntegerTy())
return 0;
Value *Op = CI->getArgOperand(0);
// Constant fold.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
if (CI->isZero()) // ffs(0) -> 0.
return B.getInt32(0);
// ffs(c) -> cttz(c)+1
return B.getInt32(CI->getValue().countTrailingZeros() + 1);
}
// ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
Type *ArgType = Op->getType();
Value *F = Intrinsic::getDeclaration(Callee->getParent(),
Intrinsic::cttz, ArgType);
Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
V = B.CreateIntCast(V, B.getInt32Ty(), false);
Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
return B.CreateSelect(Cond, V, B.getInt32(0));
}
};
} // End anonymous namespace.
namespace llvm {
@ -1258,6 +1296,9 @@ class LibCallSimplifierImpl {
UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP;
CosOpt Cos; PowOpt Pow; Exp2Opt Exp2;
// Integer library call optimizations.
FFSOpt FFS;
void initOptimizations();
void addOpt(LibFunc::Func F, LibCallOptimization* Opt);
void addOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt);
@ -1367,6 +1408,11 @@ void LibCallSimplifierImpl::initOptimizations() {
Optimizations["llvm.exp2.f80"] = &Exp2;
Optimizations["llvm.exp2.f64"] = &Exp2;
Optimizations["llvm.exp2.f32"] = &Exp2;
// Integer library call optimizations.
addOpt(LibFunc::ffs, &FFS);
addOpt(LibFunc::ffsl, &FFS);
addOpt(LibFunc::ffsll, &FFS);
}
Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {

View File

@ -0,0 +1,133 @@
; Test that the strcpy library call simplifier works correctly.
; RUN: opt < %s -instcombine -S | FileCheck %s
; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=LINUX
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
declare i32 @ffs(i32)
declare i32 @ffsl(i32)
declare i32 @ffsll(i64)
; Check ffs(0) -> 0.
define i32 @test_simplify1() {
; CHECK: @test_simplify1
%ret = call i32 @ffs(i32 0)
ret i32 %ret
; CHECK-NEXT: ret i32 0
}
define i32 @test_simplify2() {
; CHECK-LINUX: @test_simplify2
%ret = call i32 @ffsl(i32 0)
ret i32 %ret
; CHECK-LINUX-NEXT: ret i32 0
}
define i32 @test_simplify3() {
; CHECK-LINUX: @test_simplify3
%ret = call i32 @ffsll(i64 0)
ret i32 %ret
; CHECK-LINUX-NEXT: ret i32 0
}
; Check ffs(c) -> cttz(c) + 1, where 'c' is a constant.
define i32 @test_simplify4() {
; CHECK: @test_simplify4
%ret = call i32 @ffs(i32 1)
ret i32 %ret
; CHECK-NEXT: ret i32 1
}
define i32 @test_simplify5() {
; CHECK: @test_simplify5
%ret = call i32 @ffs(i32 2048)
ret i32 %ret
; CHECK-NEXT: ret i32 12
}
define i32 @test_simplify6() {
; CHECK: @test_simplify6
%ret = call i32 @ffs(i32 65536)
ret i32 %ret
; CHECK-NEXT: ret i32 17
}
define i32 @test_simplify7() {
; CHECK-LINUX: @test_simplify7
%ret = call i32 @ffsl(i32 65536)
ret i32 %ret
; CHECK-LINUX-NEXT: ret i32 17
}
define i32 @test_simplify8() {
; CHECK-LINUX: @test_simplify8
%ret = call i32 @ffsll(i64 1024)
ret i32 %ret
; CHECK-LINUX-NEXT: ret i32 11
}
define i32 @test_simplify9() {
; CHECK-LINUX: @test_simplify9
%ret = call i32 @ffsll(i64 65536)
ret i32 %ret
; CHECK-LINUX-NEXT: ret i32 17
}
define i32 @test_simplify10() {
; CHECK-LINUX: @test_simplify10
%ret = call i32 @ffsll(i64 17179869184)
ret i32 %ret
; CHECK-LINUX-NEXT: ret i32 35
}
define i32 @test_simplify11() {
; CHECK-LINUX: @test_simplify11
%ret = call i32 @ffsll(i64 281474976710656)
ret i32 %ret
; CHECK-LINUX-NEXT: ret i32 49
}
define i32 @test_simplify12() {
; CHECK-LINUX: @test_simplify12
%ret = call i32 @ffsll(i64 1152921504606846976)
ret i32 %ret
; CHECK-LINUX-NEXT: ret i32 61
}
; Check ffs(x) -> x != 0 ? (i32)llvm.cttz(x) + 1 : 0.
define i32 @test_simplify13(i32 %x) {
; CHECK: @test_simplify13
%ret = call i32 @ffs(i32 %x)
; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add i32 [[CTTZ]], 1
; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
ret i32 %ret
; CHECK-NEXT: ret i32 [[RET]]
}
define i32 @test_simplify14(i32 %x) {
; CHECK-LINUX: @test_simplify14
%ret = call i32 @ffsl(i32 %x)
; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add i32 [[CTTZ]], 1
; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
ret i32 %ret
; CHECK-LINUX-NEXT: ret i32 [[RET]]
}
define i32 @test_simplify15(i64 %x) {
; CHECK-LINUX: @test_simplify15
%ret = call i32 @ffsll(i64 %x)
; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 false)
; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add i64 [[CTTZ]], 1
; CHECK-LINUX-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0
; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[TRUNC]], i32 0
ret i32 %ret
; CHECK-LINUX-NEXT: ret i32 [[RET]]
}

View File

@ -1,45 +0,0 @@
; Test that FFSOpt works correctly
; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
; CHECK-NOT: call{{.*}}@ffs
@non_const = external global i32 ; <i32*> [#uses=1]
declare i32 @ffs(i32)
declare i32 @ffsl(i32)
declare i32 @ffsll(i64)
define i32 @main() {
%arg = load i32* @non_const ; <i32> [#uses=1]
%val0 = call i32 @ffs( i32 %arg ) ; <i32> [#uses=1]
%val1 = call i32 @ffs( i32 1 ) ; <i32> [#uses=1]
%val2 = call i32 @ffs( i32 2048 ) ; <i32> [#uses=1]
%val3 = call i32 @ffsl( i32 65536 ) ; <i32> [#uses=1]
%val4 = call i32 @ffsll( i64 1024 ) ; <i32> [#uses=1]
%val5 = call i32 @ffsll( i64 17179869184 ) ; <i32> [#uses=1]
%val6 = call i32 @ffsll( i64 1152921504606846976 ) ; <i32> [#uses=1]
%rslt1 = add i32 %val1, %val2 ; <i32> [#uses=1]
%rslt2 = add i32 %val3, %val4 ; <i32> [#uses=1]
%rslt3 = add i32 %val5, %val6 ; <i32> [#uses=1]
%rslt4 = add i32 %rslt1, %rslt2 ; <i32> [#uses=1]
%rslt5 = add i32 %rslt4, %rslt3 ; <i32> [#uses=2]
%rslt6 = add i32 %rslt5, %val0 ; <i32> [#uses=0]
ret i32 %rslt5
}
; PR4206
define i32 @a(i64) nounwind {
%2 = call i32 @ffsll(i64 %0) ; <i32> [#uses=1]
ret i32 %2
}
; PR13028
define i32 @b() nounwind {
%ffs = call i32 @ffsll(i64 0)
ret i32 %ffs
; CHECK: @b
; CHECK-NEXT: ret i32 0
}