TLI: Add addVectorizableFunctionsFromVecLib.

Also, add several entries to vectorizable functions table, and
corresponding tests. The table isn't complete, it'll be populated later.

Review: http://reviews.llvm.org/D8131
llvm-svn: 232531
This commit is contained in:
Michael Zolotukhin 2015-03-17 19:50:55 +00:00
parent 9b3cf604ce
commit 6d8a2aa976
3 changed files with 232 additions and 0 deletions

View File

@ -71,6 +71,18 @@ class TargetLibraryInfoImpl {
std::vector<VecDesc> ScalarDescs;
public:
/// \brief List of known vector-functions libraries.
///
/// The vector-functions library defines, which functions are vectorizable
/// and with which factor. The library can be specified by either frontend,
/// or a commandline option, and then used by
/// addVectorizableFunctionsFromVecLib for filling up the tables of
/// vectorizable functions.
enum VectorLibrary {
NoLibrary, // Don't use any vector library.
Accelerate // Use Accelerate framework.
};
TargetLibraryInfoImpl();
explicit TargetLibraryInfoImpl(const Triple &T);
@ -117,6 +129,10 @@ public:
/// queryable via getVectorizedFunction and getScalarizedFunction.
void addVectorizableFunctions(ArrayRef<VecDesc> Fns);
/// Calls addVectorizableFunctions with a known preset of functions for the
/// given vector library.
void addVectorizableFunctionsFromVecLib(enum VectorLibrary VecLib);
/// isFunctionVectorizable - Return true if the function F has a
/// vector equivalent with vectorization factor VF.
bool isFunctionVectorizable(StringRef F, unsigned VF) const {

View File

@ -13,8 +13,18 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
"vector-library", cl::Hidden, cl::desc("Vector functions library"),
cl::init(TargetLibraryInfoImpl::NoLibrary),
cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none",
"No vector functions library"),
clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
"Accelerate framework"),
clEnumValEnd));
const char *const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
#define TLI_DEFINE_STRING
#include "llvm/Analysis/TargetLibraryInfo.def"
@ -345,6 +355,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc::statvfs64);
TLI.setUnavailable(LibFunc::tmpfile64);
}
TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary);
}
TargetLibraryInfoImpl::TargetLibraryInfoImpl() {
@ -452,6 +464,28 @@ void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
std::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName);
}
void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
enum VectorLibrary VecLib) {
switch (VecLib) {
case Accelerate: {
const VecDesc VecFuncs[] = {
{"expf", "vexpf", 4},
{"llvm.exp.f32", "vexpf", 4},
{"logf", "vlogf", 4},
{"llvm.log.f32", "vlogf", 4},
{"sqrtf", "vsqrtf", 4},
{"llvm.sqrt.f32", "vsqrtf", 4},
{"fabsf", "vfabsf", 4},
{"llvm.fabs.f32", "vfabsf", 4},
};
addVectorizableFunctions(VecFuncs);
break;
}
case NoLibrary:
break;
}
}
bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
funcName = sanitizeFunctionName(funcName);
if (funcName.empty())

View File

@ -0,0 +1,182 @@
; RUN: opt < %s -vector-library=Accelerate -loop-vectorize -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
;CHECK-LABEL: @sqrt_f32(
;CHECK: vsqrtf{{.*}}<4 x float>
;CHECK: ret void
declare float @sqrtf(float) nounwind readnone
define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
%call = tail call float @sqrtf(float %0) nounwind readnone
%arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
store float %call, float* %arrayidx2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
;CHECK-LABEL: @exp_f32(
;CHECK: vexpf{{.*}}<4 x float>
;CHECK: ret void
declare float @expf(float) nounwind readnone
define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
%call = tail call float @expf(float %0) nounwind readnone
%arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
store float %call, float* %arrayidx2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
;CHECK-LABEL: @log_f32(
;CHECK: vlogf{{.*}}<4 x float>
;CHECK: ret void
declare float @logf(float) nounwind readnone
define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
%call = tail call float @logf(float %0) nounwind readnone
%arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
store float %call, float* %arrayidx2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; For abs instruction we'll generate vector intrinsic, as it's cheaper than a lib call.
;CHECK-LABEL: @fabs_f32(
;CHECK: fabs{{.*}}<4 x float>
;CHECK: ret void
declare float @fabsf(float) nounwind readnone
define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
%call = tail call float @fabsf(float %0) nounwind readnone
%arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
store float %call, float* %arrayidx2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; Test that we can vectorize an intrinsic into a vector call.
;CHECK-LABEL: @exp_f32_intrin(
;CHECK: vexpf{{.*}}<4 x float>
;CHECK: ret void
declare float @llvm.exp.f32(float) nounwind readnone
define void @exp_f32_intrin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
%call = tail call float @llvm.exp.f32(float %0) nounwind readnone
%arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
store float %call, float* %arrayidx2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; Test that we don't vectorize arbitrary functions.
;CHECK-LABEL: @foo_f32(
;CHECK-NOT: foo{{.*}}<4 x float>
;CHECK: ret void
declare float @foo(float) nounwind readnone
define void @foo_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
%call = tail call float @foo(float %0) nounwind readnone
%arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
store float %call, float* %arrayidx2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}
; Test that we don't vectorize calls with nobuiltin attribute.
;CHECK-LABEL: @sqrt_f32_nobuiltin(
;CHECK-NOT: vsqrtf{{.*}}<4 x float>
;CHECK: ret void
define void @sqrt_f32_nobuiltin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
%call = tail call float @sqrtf(float %0) nounwind readnone nobuiltin
%arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
store float %call, float* %arrayidx2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}