forked from OSchip/llvm-project
[Vectorizer] Add vectorization support for fixed smul/umul intrinsics
This requires a couple of tweaks to existing vectorization functions as they were assuming that only the second call argument (ctlz/cttz/powi) could ever be the 'always scalar' argument, but for smul.fix + umul.fix its the third argument. Differential Revision: https://reviews.llvm.org/D58616 llvm-svn: 354790
This commit is contained in:
parent
3e34150009
commit
a066f1f9e6
|
@ -52,6 +52,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
|
|||
case Intrinsic::ssub_sat:
|
||||
case Intrinsic::uadd_sat:
|
||||
case Intrinsic::usub_sat:
|
||||
case Intrinsic::smul_fix:
|
||||
case Intrinsic::umul_fix:
|
||||
case Intrinsic::sqrt: // Begin floating-point.
|
||||
case Intrinsic::sin:
|
||||
case Intrinsic::cos:
|
||||
|
@ -92,6 +94,9 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
|
|||
case Intrinsic::cttz:
|
||||
case Intrinsic::powi:
|
||||
return (ScalarOpdIdx == 1);
|
||||
case Intrinsic::smul_fix:
|
||||
case Intrinsic::umul_fix:
|
||||
return (ScalarOpdIdx == 2);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -713,18 +713,21 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
|
||||
// second argument is the same (i.e. loop invariant)
|
||||
if (CI && hasVectorInstrinsicScalarOpd(
|
||||
getVectorIntrinsicIDForCall(CI, TLI), 1)) {
|
||||
// Some intrinsics have scalar arguments and should be same in order for
|
||||
// them to be vectorized (i.e. loop invariant).
|
||||
if (CI) {
|
||||
auto *SE = PSE.getSE();
|
||||
if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
|
||||
ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI)
|
||||
<< "intrinsic instruction cannot be vectorized");
|
||||
LLVM_DEBUG(dbgs()
|
||||
<< "LV: Found unvectorizable intrinsic " << *CI << "\n");
|
||||
return false;
|
||||
}
|
||||
Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
|
||||
for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
|
||||
if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
|
||||
if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
|
||||
ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI)
|
||||
<< "intrinsic instruction cannot be vectorized");
|
||||
LLVM_DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI
|
||||
<< "\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the instruction return type is vectorizable.
|
||||
|
|
|
@ -437,8 +437,9 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
|
|||
case Instruction::Call: {
|
||||
CallInst *CI = cast<CallInst>(UserInst);
|
||||
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
|
||||
if (hasVectorInstrinsicScalarOpd(ID, 1)) {
|
||||
return (CI->getArgOperand(1) == Scalar);
|
||||
for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
|
||||
if (hasVectorInstrinsicScalarOpd(ID, i))
|
||||
return (CI->getArgOperand(i) == Scalar);
|
||||
}
|
||||
LLVM_FALLTHROUGH;
|
||||
}
|
||||
|
@ -1860,9 +1861,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
return;
|
||||
}
|
||||
Function *Int = CI->getCalledFunction();
|
||||
Value *A1I = nullptr;
|
||||
if (hasVectorInstrinsicScalarOpd(ID, 1))
|
||||
A1I = CI->getArgOperand(1);
|
||||
unsigned NumArgs = CI->getNumArgOperands();
|
||||
SmallVector<Value*, 4> ScalarArgs(NumArgs, nullptr);
|
||||
for (unsigned j = 0; j != NumArgs; ++j)
|
||||
if (hasVectorInstrinsicScalarOpd(ID, j))
|
||||
ScalarArgs[j] = CI->getArgOperand(j);
|
||||
for (unsigned i = 1, e = VL.size(); i != e; ++i) {
|
||||
CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
|
||||
if (!CI2 || CI2->getCalledFunction() != Int ||
|
||||
|
@ -1874,16 +1877,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
<< "\n");
|
||||
return;
|
||||
}
|
||||
// ctlz,cttz and powi are special intrinsics whose second argument
|
||||
// should be same in order for them to be vectorized.
|
||||
if (hasVectorInstrinsicScalarOpd(ID, 1)) {
|
||||
Value *A1J = CI2->getArgOperand(1);
|
||||
if (A1I != A1J) {
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
|
||||
<< " argument " << A1I << "!=" << A1J << "\n");
|
||||
return;
|
||||
// Some intrinsics have scalar arguments and should be same in order for
|
||||
// them to be vectorized.
|
||||
for (unsigned j = 0; j != NumArgs; ++j) {
|
||||
if (hasVectorInstrinsicScalarOpd(ID, j)) {
|
||||
Value *A1J = CI2->getArgOperand(j);
|
||||
if (ScalarArgs[j] != A1J) {
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
|
||||
<< " argument " << ScalarArgs[j] << "!=" << A1J
|
||||
<< "\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Verify that the bundle operands are identical between the two calls.
|
||||
|
@ -3443,9 +3449,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
std::vector<Value *> OpVecs;
|
||||
for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
|
||||
ValueList OpVL;
|
||||
// ctlz,cttz and powi are special intrinsics whose second argument is
|
||||
// a scalar. This argument should not be vectorized.
|
||||
if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) {
|
||||
// Some intrinsics have scalar arguments. This argument should not be
|
||||
// vectorized.
|
||||
if (hasVectorInstrinsicScalarOpd(IID, j)) {
|
||||
CallInst *CEI = cast<CallInst>(VL0);
|
||||
ScalarArg = CEI->getArgOperand(j);
|
||||
OpVecs.push_back(CEI->getArgOperand(j));
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue