forked from OSchip/llvm-project
TLI: Add addVectorizableFunctionsFromVecLib.
Also, add several entries to vectorizable functions table, and corresponding tests. The table isn't complete, it'll be populated later. Review: http://reviews.llvm.org/D8131 llvm-svn: 232531
This commit is contained in:
parent
9b3cf604ce
commit
6d8a2aa976
|
@ -71,6 +71,18 @@ class TargetLibraryInfoImpl {
|
|||
std::vector<VecDesc> ScalarDescs;
|
||||
|
||||
public:
|
||||
/// \brief List of known vector-functions libraries.
|
||||
///
|
||||
/// The vector-functions library defines, which functions are vectorizable
|
||||
/// and with which factor. The library can be specified by either frontend,
|
||||
/// or a commandline option, and then used by
|
||||
/// addVectorizableFunctionsFromVecLib for filling up the tables of
|
||||
/// vectorizable functions.
|
||||
enum VectorLibrary {
|
||||
NoLibrary, // Don't use any vector library.
|
||||
Accelerate // Use Accelerate framework.
|
||||
};
|
||||
|
||||
TargetLibraryInfoImpl();
|
||||
explicit TargetLibraryInfoImpl(const Triple &T);
|
||||
|
||||
|
@ -117,6 +129,10 @@ public:
|
|||
/// queryable via getVectorizedFunction and getScalarizedFunction.
|
||||
void addVectorizableFunctions(ArrayRef<VecDesc> Fns);
|
||||
|
||||
/// Calls addVectorizableFunctions with a known preset of functions for the
|
||||
/// given vector library.
|
||||
void addVectorizableFunctionsFromVecLib(enum VectorLibrary VecLib);
|
||||
|
||||
/// isFunctionVectorizable - Return true if the function F has a
|
||||
/// vector equivalent with vectorization factor VF.
|
||||
bool isFunctionVectorizable(StringRef F, unsigned VF) const {
|
||||
|
|
|
@ -13,8 +13,18 @@
|
|||
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
|
||||
"vector-library", cl::Hidden, cl::desc("Vector functions library"),
|
||||
cl::init(TargetLibraryInfoImpl::NoLibrary),
|
||||
cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none",
|
||||
"No vector functions library"),
|
||||
clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
|
||||
"Accelerate framework"),
|
||||
clEnumValEnd));
|
||||
|
||||
const char *const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
|
||||
#define TLI_DEFINE_STRING
|
||||
#include "llvm/Analysis/TargetLibraryInfo.def"
|
||||
|
@ -345,6 +355,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
|
|||
TLI.setUnavailable(LibFunc::statvfs64);
|
||||
TLI.setUnavailable(LibFunc::tmpfile64);
|
||||
}
|
||||
|
||||
TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary);
|
||||
}
|
||||
|
||||
TargetLibraryInfoImpl::TargetLibraryInfoImpl() {
|
||||
|
@ -452,6 +464,28 @@ void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
|
|||
std::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName);
|
||||
}
|
||||
|
||||
void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
|
||||
enum VectorLibrary VecLib) {
|
||||
switch (VecLib) {
|
||||
case Accelerate: {
|
||||
const VecDesc VecFuncs[] = {
|
||||
{"expf", "vexpf", 4},
|
||||
{"llvm.exp.f32", "vexpf", 4},
|
||||
{"logf", "vlogf", 4},
|
||||
{"llvm.log.f32", "vlogf", 4},
|
||||
{"sqrtf", "vsqrtf", 4},
|
||||
{"llvm.sqrt.f32", "vsqrtf", 4},
|
||||
{"fabsf", "vfabsf", 4},
|
||||
{"llvm.fabs.f32", "vfabsf", 4},
|
||||
};
|
||||
addVectorizableFunctions(VecFuncs);
|
||||
break;
|
||||
}
|
||||
case NoLibrary:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
|
||||
funcName = sanitizeFunctionName(funcName);
|
||||
if (funcName.empty())
|
||||
|
|
|
@ -0,0 +1,182 @@
|
|||
; RUN: opt < %s -vector-library=Accelerate -loop-vectorize -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
;CHECK-LABEL: @sqrt_f32(
|
||||
;CHECK: vsqrtf{{.*}}<4 x float>
|
||||
;CHECK: ret void
|
||||
declare float @sqrtf(float) nounwind readnone
|
||||
define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp6, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%call = tail call float @sqrtf(float %0) nounwind readnone
|
||||
%arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx2, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: @exp_f32(
|
||||
;CHECK: vexpf{{.*}}<4 x float>
|
||||
;CHECK: ret void
|
||||
declare float @expf(float) nounwind readnone
|
||||
define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp6, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%call = tail call float @expf(float %0) nounwind readnone
|
||||
%arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx2, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: @log_f32(
|
||||
;CHECK: vlogf{{.*}}<4 x float>
|
||||
;CHECK: ret void
|
||||
declare float @logf(float) nounwind readnone
|
||||
define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp6, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%call = tail call float @logf(float %0) nounwind readnone
|
||||
%arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx2, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; For abs instruction we'll generate vector intrinsic, as it's cheaper than a lib call.
|
||||
;CHECK-LABEL: @fabs_f32(
|
||||
;CHECK: fabs{{.*}}<4 x float>
|
||||
;CHECK: ret void
|
||||
declare float @fabsf(float) nounwind readnone
|
||||
define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp6, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%call = tail call float @fabsf(float %0) nounwind readnone
|
||||
%arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx2, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test that we can vectorize an intrinsic into a vector call.
|
||||
;CHECK-LABEL: @exp_f32_intrin(
|
||||
;CHECK: vexpf{{.*}}<4 x float>
|
||||
;CHECK: ret void
|
||||
declare float @llvm.exp.f32(float) nounwind readnone
|
||||
define void @exp_f32_intrin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp6, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%call = tail call float @llvm.exp.f32(float %0) nounwind readnone
|
||||
%arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx2, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test that we don't vectorize arbitrary functions.
|
||||
;CHECK-LABEL: @foo_f32(
|
||||
;CHECK-NOT: foo{{.*}}<4 x float>
|
||||
;CHECK: ret void
|
||||
declare float @foo(float) nounwind readnone
|
||||
define void @foo_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp6, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%call = tail call float @foo(float %0) nounwind readnone
|
||||
%arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx2, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test that we don't vectorize calls with nobuiltin attribute.
|
||||
;CHECK-LABEL: @sqrt_f32_nobuiltin(
|
||||
;CHECK-NOT: vsqrtf{{.*}}<4 x float>
|
||||
;CHECK: ret void
|
||||
define void @sqrt_f32_nobuiltin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp6, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%call = tail call float @sqrtf(float %0) nounwind readnone nobuiltin
|
||||
%arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
|
||||
store float %call, float* %arrayidx2, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue