forked from OSchip/llvm-project
Allow vectorization of intrinsics such as powi,cttz and ctlz in Loop and SLP Vectorizer.
This patch adds support to vectorize intrinsics such as powi, cttz and ctlz in Vectorizer. These intrinsics are different from other intrinsics as second argument to these function must be same in order to vectorize them and it should be represented as a scalar. Review: http://reviews.llvm.org/D3851#inline-32769 and http://reviews.llvm.org/D3937#inline-32857 llvm-svn: 209873
This commit is contained in:
parent
6cd3ebb223
commit
5ab7795649
|
@ -48,12 +48,27 @@ static inline bool isTriviallyVectorizable(Intrinsic::ID ID) {
|
|||
case Intrinsic::pow:
|
||||
case Intrinsic::fma:
|
||||
case Intrinsic::fmuladd:
|
||||
case Intrinsic::ctlz:
|
||||
case Intrinsic::cttz:
|
||||
case Intrinsic::powi:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
|
||||
unsigned ScalarOpdIdx) {
|
||||
switch (ID) {
|
||||
case Intrinsic::ctlz:
|
||||
case Intrinsic::cttz:
|
||||
case Intrinsic::powi:
|
||||
return (ScalarOpdIdx == 1);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
|
||||
Intrinsic::ID ValidIntrinsicID) {
|
||||
if (I.getNumArgOperands() != 1 ||
|
||||
|
|
|
@ -3123,9 +3123,14 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
|
|||
scalarizeInstruction(it);
|
||||
break;
|
||||
default:
|
||||
bool HasScalarOpd = hasVectorInstrinsicScalarOpd(ID, 1);
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
SmallVector<Value *, 4> Args;
|
||||
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
|
||||
if (HasScalarOpd && i == 1) {
|
||||
Args.push_back(CI->getArgOperand(i));
|
||||
continue;
|
||||
}
|
||||
VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
|
||||
Args.push_back(Arg[Part]);
|
||||
}
|
||||
|
@ -3474,6 +3479,16 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
|
||||
// second argument is the same (i.e. loop invariant)
|
||||
if (CI &&
|
||||
hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) {
|
||||
if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) {
|
||||
DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the instruction return type is vectorizable.
|
||||
// Also, we can't vectorize extractelement instructions.
|
||||
if ((!VectorType::isValidElementType(it->getType()) &&
|
||||
|
|
|
@ -961,9 +961,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
Function *Int = CI->getCalledFunction();
|
||||
|
||||
Value *A1I = nullptr;
|
||||
if (hasVectorInstrinsicScalarOpd(ID, 1))
|
||||
A1I = CI->getArgOperand(1);
|
||||
for (unsigned i = 1, e = VL.size(); i != e; ++i) {
|
||||
CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
|
||||
if (!CI2 || CI2->getCalledFunction() != Int ||
|
||||
|
@ -973,6 +974,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
<< "\n");
|
||||
return;
|
||||
}
|
||||
// ctlz,cttz and powi are special intrinsics whose second argument
|
||||
// should be same in order for them to be vectorized.
|
||||
if (hasVectorInstrinsicScalarOpd(ID, 1)) {
|
||||
Value *A1J = CI2->getArgOperand(1);
|
||||
if (A1I != A1J) {
|
||||
newTreeEntry(VL, false);
|
||||
DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
|
||||
<< " argument "<< A1I<<"!=" << A1J
|
||||
<< "\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
newTreeEntry(VL, true);
|
||||
|
@ -1652,9 +1665,21 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
case Instruction::Call: {
|
||||
CallInst *CI = cast<CallInst>(VL0);
|
||||
setInsertPointAfterBundle(E->Scalars);
|
||||
Function *FI;
|
||||
Intrinsic::ID IID = Intrinsic::not_intrinsic;
|
||||
if (CI && (FI = CI->getCalledFunction())) {
|
||||
IID = (Intrinsic::ID) FI->getIntrinsicID();
|
||||
}
|
||||
std::vector<Value *> OpVecs;
|
||||
for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
|
||||
ValueList OpVL;
|
||||
// ctlz,cttz and powi are special intrinsics whose second argument is
|
||||
// a scalar. This argument should not be vectorized.
|
||||
if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) {
|
||||
CallInst *CEI = cast<CallInst>(E->Scalars[0]);
|
||||
OpVecs.push_back(CEI->getArgOperand(j));
|
||||
continue;
|
||||
}
|
||||
for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
|
||||
CallInst *CEI = cast<CallInst>(E->Scalars[i]);
|
||||
OpVL.push_back(CEI->getArgOperand(j));
|
||||
|
|
|
@ -1090,3 +1090,105 @@ for.end: ; preds = %for.body
|
|||
ret void
|
||||
}
|
||||
|
||||
declare double @llvm.powi.f64(double %Val, i32 %power) nounwind readnone
|
||||
|
||||
;CHECK-LABEL: @powi_f64(
|
||||
;CHECK: llvm.powi.v4f64
|
||||
;CHECK: ret void
|
||||
define void @powi_f64(i32 %n, double* noalias %y, double* noalias %x, i32 %P) nounwind uwtable {
|
||||
entry:
|
||||
%cmp9 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp9, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
|
||||
%0 = load double* %arrayidx, align 8
|
||||
%call = tail call double @llvm.powi.f64(double %0, i32 %P) nounwind readnone
|
||||
%arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
|
||||
store double %call, double* %arrayidx4, align 8
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: @powi_f64_neg(
|
||||
;CHECK-NOT: llvm.powi.v4f64
|
||||
;CHECK: ret void
|
||||
define void @powi_f64_neg(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
|
||||
entry:
|
||||
%cmp9 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp9, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
|
||||
%0 = load double* %arrayidx, align 8
|
||||
%1 = trunc i64 %indvars.iv to i32
|
||||
%call = tail call double @llvm.powi.f64(double %0, i32 %1) nounwind readnone
|
||||
%arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
|
||||
store double %call, double* %arrayidx4, align 8
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i64 @llvm.cttz.i64 (i64, i1) nounwind readnone
|
||||
|
||||
;CHECK-LABEL: @cttz_f64(
|
||||
;CHECK: llvm.cttz.v4i64
|
||||
;CHECK: ret void
|
||||
define void @cttz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
|
||||
entry:
|
||||
%cmp9 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp9, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
|
||||
%0 = load i64* %arrayidx, align 8
|
||||
%call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone
|
||||
%arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
|
||||
store i64 %call, i64* %arrayidx4, align 8
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i64 @llvm.ctlz.i64 (i64, i1) nounwind readnone
|
||||
|
||||
;CHECK-LABEL: @ctlz_f64(
|
||||
;CHECK: llvm.ctlz.v4i64
|
||||
;CHECK: ret void
|
||||
define void @ctlz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
|
||||
entry:
|
||||
%cmp9 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp9, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
|
||||
%0 = load i64* %arrayidx, align 8
|
||||
%call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone
|
||||
%arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
|
||||
store i64 %call, i64* %arrayidx4, align 8
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -117,3 +117,270 @@ entry:
|
|||
; CHECK: store <4 x i32>
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
declare i32 @llvm.ctlz.i32(i32,i1) nounwind readnone
|
||||
|
||||
define void @vec_ctlz_i32(i32* %a, i32* %b, i32* %c, i1) {
|
||||
entry:
|
||||
%i0 = load i32* %a, align 4
|
||||
%i1 = load i32* %b, align 4
|
||||
%add1 = add i32 %i0, %i1
|
||||
%call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
|
||||
|
||||
%arrayidx2 = getelementptr inbounds i32* %a, i32 1
|
||||
%i2 = load i32* %arrayidx2, align 4
|
||||
%arrayidx3 = getelementptr inbounds i32* %b, i32 1
|
||||
%i3 = load i32* %arrayidx3, align 4
|
||||
%add2 = add i32 %i2, %i3
|
||||
%call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 true) nounwind readnone
|
||||
|
||||
%arrayidx4 = getelementptr inbounds i32* %a, i32 2
|
||||
%i4 = load i32* %arrayidx4, align 4
|
||||
%arrayidx5 = getelementptr inbounds i32* %b, i32 2
|
||||
%i5 = load i32* %arrayidx5, align 4
|
||||
%add3 = add i32 %i4, %i5
|
||||
%call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
|
||||
|
||||
%arrayidx6 = getelementptr inbounds i32* %a, i32 3
|
||||
%i6 = load i32* %arrayidx6, align 4
|
||||
%arrayidx7 = getelementptr inbounds i32* %b, i32 3
|
||||
%i7 = load i32* %arrayidx7, align 4
|
||||
%add4 = add i32 %i6, %i7
|
||||
%call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 true) nounwind readnone
|
||||
|
||||
store i32 %call1, i32* %c, align 4
|
||||
%arrayidx8 = getelementptr inbounds i32* %c, i32 1
|
||||
store i32 %call2, i32* %arrayidx8, align 4
|
||||
%arrayidx9 = getelementptr inbounds i32* %c, i32 2
|
||||
store i32 %call3, i32* %arrayidx9, align 4
|
||||
%arrayidx10 = getelementptr inbounds i32* %c, i32 3
|
||||
store i32 %call4, i32* %arrayidx10, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @vec_ctlz_i32(
|
||||
; CHECK: load <4 x i32>
|
||||
; CHECK: load <4 x i32>
|
||||
; CHECK: call <4 x i32> @llvm.ctlz.v4i32
|
||||
; CHECK: store <4 x i32>
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
define void @vec_ctlz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
|
||||
entry:
|
||||
%i0 = load i32* %a, align 4
|
||||
%i1 = load i32* %b, align 4
|
||||
%add1 = add i32 %i0, %i1
|
||||
%call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
|
||||
|
||||
%arrayidx2 = getelementptr inbounds i32* %a, i32 1
|
||||
%i2 = load i32* %arrayidx2, align 4
|
||||
%arrayidx3 = getelementptr inbounds i32* %b, i32 1
|
||||
%i3 = load i32* %arrayidx3, align 4
|
||||
%add2 = add i32 %i2, %i3
|
||||
%call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 false) nounwind readnone
|
||||
|
||||
%arrayidx4 = getelementptr inbounds i32* %a, i32 2
|
||||
%i4 = load i32* %arrayidx4, align 4
|
||||
%arrayidx5 = getelementptr inbounds i32* %b, i32 2
|
||||
%i5 = load i32* %arrayidx5, align 4
|
||||
%add3 = add i32 %i4, %i5
|
||||
%call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
|
||||
|
||||
%arrayidx6 = getelementptr inbounds i32* %a, i32 3
|
||||
%i6 = load i32* %arrayidx6, align 4
|
||||
%arrayidx7 = getelementptr inbounds i32* %b, i32 3
|
||||
%i7 = load i32* %arrayidx7, align 4
|
||||
%add4 = add i32 %i6, %i7
|
||||
%call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 false) nounwind readnone
|
||||
|
||||
store i32 %call1, i32* %c, align 4
|
||||
%arrayidx8 = getelementptr inbounds i32* %c, i32 1
|
||||
store i32 %call2, i32* %arrayidx8, align 4
|
||||
%arrayidx9 = getelementptr inbounds i32* %c, i32 2
|
||||
store i32 %call3, i32* %arrayidx9, align 4
|
||||
%arrayidx10 = getelementptr inbounds i32* %c, i32 3
|
||||
store i32 %call4, i32* %arrayidx10, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @vec_ctlz_i32_neg(
|
||||
; CHECK-NOT: call <4 x i32> @llvm.ctlz.v4i32
|
||||
|
||||
}
|
||||
|
||||
|
||||
declare i32 @llvm.cttz.i32(i32,i1) nounwind readnone
|
||||
|
||||
define void @vec_cttz_i32(i32* %a, i32* %b, i32* %c, i1) {
|
||||
entry:
|
||||
%i0 = load i32* %a, align 4
|
||||
%i1 = load i32* %b, align 4
|
||||
%add1 = add i32 %i0, %i1
|
||||
%call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
|
||||
|
||||
%arrayidx2 = getelementptr inbounds i32* %a, i32 1
|
||||
%i2 = load i32* %arrayidx2, align 4
|
||||
%arrayidx3 = getelementptr inbounds i32* %b, i32 1
|
||||
%i3 = load i32* %arrayidx3, align 4
|
||||
%add2 = add i32 %i2, %i3
|
||||
%call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 true) nounwind readnone
|
||||
|
||||
%arrayidx4 = getelementptr inbounds i32* %a, i32 2
|
||||
%i4 = load i32* %arrayidx4, align 4
|
||||
%arrayidx5 = getelementptr inbounds i32* %b, i32 2
|
||||
%i5 = load i32* %arrayidx5, align 4
|
||||
%add3 = add i32 %i4, %i5
|
||||
%call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
|
||||
|
||||
%arrayidx6 = getelementptr inbounds i32* %a, i32 3
|
||||
%i6 = load i32* %arrayidx6, align 4
|
||||
%arrayidx7 = getelementptr inbounds i32* %b, i32 3
|
||||
%i7 = load i32* %arrayidx7, align 4
|
||||
%add4 = add i32 %i6, %i7
|
||||
%call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 true) nounwind readnone
|
||||
|
||||
store i32 %call1, i32* %c, align 4
|
||||
%arrayidx8 = getelementptr inbounds i32* %c, i32 1
|
||||
store i32 %call2, i32* %arrayidx8, align 4
|
||||
%arrayidx9 = getelementptr inbounds i32* %c, i32 2
|
||||
store i32 %call3, i32* %arrayidx9, align 4
|
||||
%arrayidx10 = getelementptr inbounds i32* %c, i32 3
|
||||
store i32 %call4, i32* %arrayidx10, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @vec_cttz_i32(
|
||||
; CHECK: load <4 x i32>
|
||||
; CHECK: load <4 x i32>
|
||||
; CHECK: call <4 x i32> @llvm.cttz.v4i32
|
||||
; CHECK: store <4 x i32>
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
define void @vec_cttz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
|
||||
entry:
|
||||
%i0 = load i32* %a, align 4
|
||||
%i1 = load i32* %b, align 4
|
||||
%add1 = add i32 %i0, %i1
|
||||
%call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
|
||||
|
||||
%arrayidx2 = getelementptr inbounds i32* %a, i32 1
|
||||
%i2 = load i32* %arrayidx2, align 4
|
||||
%arrayidx3 = getelementptr inbounds i32* %b, i32 1
|
||||
%i3 = load i32* %arrayidx3, align 4
|
||||
%add2 = add i32 %i2, %i3
|
||||
%call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 false) nounwind readnone
|
||||
|
||||
%arrayidx4 = getelementptr inbounds i32* %a, i32 2
|
||||
%i4 = load i32* %arrayidx4, align 4
|
||||
%arrayidx5 = getelementptr inbounds i32* %b, i32 2
|
||||
%i5 = load i32* %arrayidx5, align 4
|
||||
%add3 = add i32 %i4, %i5
|
||||
%call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
|
||||
|
||||
%arrayidx6 = getelementptr inbounds i32* %a, i32 3
|
||||
%i6 = load i32* %arrayidx6, align 4
|
||||
%arrayidx7 = getelementptr inbounds i32* %b, i32 3
|
||||
%i7 = load i32* %arrayidx7, align 4
|
||||
%add4 = add i32 %i6, %i7
|
||||
%call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 false) nounwind readnone
|
||||
|
||||
store i32 %call1, i32* %c, align 4
|
||||
%arrayidx8 = getelementptr inbounds i32* %c, i32 1
|
||||
store i32 %call2, i32* %arrayidx8, align 4
|
||||
%arrayidx9 = getelementptr inbounds i32* %c, i32 2
|
||||
store i32 %call3, i32* %arrayidx9, align 4
|
||||
%arrayidx10 = getelementptr inbounds i32* %c, i32 3
|
||||
store i32 %call4, i32* %arrayidx10, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @vec_cttz_i32_neg(
|
||||
; CHECK-NOT: call <4 x i32> @llvm.cttz.v4i32
|
||||
}
|
||||
|
||||
|
||||
declare float @llvm.powi.f32(float, i32)
|
||||
define void @vec_powi_f32(float* %a, float* %b, float* %c, i32 %P) {
|
||||
entry:
|
||||
%i0 = load float* %a, align 4
|
||||
%i1 = load float* %b, align 4
|
||||
%add1 = fadd float %i0, %i1
|
||||
%call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
|
||||
|
||||
%arrayidx2 = getelementptr inbounds float* %a, i32 1
|
||||
%i2 = load float* %arrayidx2, align 4
|
||||
%arrayidx3 = getelementptr inbounds float* %b, i32 1
|
||||
%i3 = load float* %arrayidx3, align 4
|
||||
%add2 = fadd float %i2, %i3
|
||||
%call2 = tail call float @llvm.powi.f32(float %add2,i32 %P) nounwind readnone
|
||||
|
||||
%arrayidx4 = getelementptr inbounds float* %a, i32 2
|
||||
%i4 = load float* %arrayidx4, align 4
|
||||
%arrayidx5 = getelementptr inbounds float* %b, i32 2
|
||||
%i5 = load float* %arrayidx5, align 4
|
||||
%add3 = fadd float %i4, %i5
|
||||
%call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
|
||||
|
||||
%arrayidx6 = getelementptr inbounds float* %a, i32 3
|
||||
%i6 = load float* %arrayidx6, align 4
|
||||
%arrayidx7 = getelementptr inbounds float* %b, i32 3
|
||||
%i7 = load float* %arrayidx7, align 4
|
||||
%add4 = fadd float %i6, %i7
|
||||
%call4 = tail call float @llvm.powi.f32(float %add4,i32 %P) nounwind readnone
|
||||
|
||||
store float %call1, float* %c, align 4
|
||||
%arrayidx8 = getelementptr inbounds float* %c, i32 1
|
||||
store float %call2, float* %arrayidx8, align 4
|
||||
%arrayidx9 = getelementptr inbounds float* %c, i32 2
|
||||
store float %call3, float* %arrayidx9, align 4
|
||||
%arrayidx10 = getelementptr inbounds float* %c, i32 3
|
||||
store float %call4, float* %arrayidx10, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @vec_powi_f32(
|
||||
; CHECK: load <4 x float>
|
||||
; CHECK: load <4 x float>
|
||||
; CHECK: call <4 x float> @llvm.powi.v4f32
|
||||
; CHECK: store <4 x float>
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
|
||||
define void @vec_powi_f32_neg(float* %a, float* %b, float* %c, i32 %P, i32 %Q) {
|
||||
entry:
|
||||
%i0 = load float* %a, align 4
|
||||
%i1 = load float* %b, align 4
|
||||
%add1 = fadd float %i0, %i1
|
||||
%call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
|
||||
|
||||
%arrayidx2 = getelementptr inbounds float* %a, i32 1
|
||||
%i2 = load float* %arrayidx2, align 4
|
||||
%arrayidx3 = getelementptr inbounds float* %b, i32 1
|
||||
%i3 = load float* %arrayidx3, align 4
|
||||
%add2 = fadd float %i2, %i3
|
||||
%call2 = tail call float @llvm.powi.f32(float %add2,i32 %Q) nounwind readnone
|
||||
|
||||
%arrayidx4 = getelementptr inbounds float* %a, i32 2
|
||||
%i4 = load float* %arrayidx4, align 4
|
||||
%arrayidx5 = getelementptr inbounds float* %b, i32 2
|
||||
%i5 = load float* %arrayidx5, align 4
|
||||
%add3 = fadd float %i4, %i5
|
||||
%call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
|
||||
|
||||
%arrayidx6 = getelementptr inbounds float* %a, i32 3
|
||||
%i6 = load float* %arrayidx6, align 4
|
||||
%arrayidx7 = getelementptr inbounds float* %b, i32 3
|
||||
%i7 = load float* %arrayidx7, align 4
|
||||
%add4 = fadd float %i6, %i7
|
||||
%call4 = tail call float @llvm.powi.f32(float %add4,i32 %Q) nounwind readnone
|
||||
|
||||
store float %call1, float* %c, align 4
|
||||
%arrayidx8 = getelementptr inbounds float* %c, i32 1
|
||||
store float %call2, float* %arrayidx8, align 4
|
||||
%arrayidx9 = getelementptr inbounds float* %c, i32 2
|
||||
store float %call3, float* %arrayidx9, align 4
|
||||
%arrayidx10 = getelementptr inbounds float* %c, i32 3
|
||||
store float %call4, float* %arrayidx10, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @vec_powi_f32_neg(
|
||||
; CHECK-NOT: call <4 x float> @llvm.powi.v4f32
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue