forked from OSchip/llvm-project
[SLP] Support unary FNeg vectorization
Differential Revision: https://reviews.llvm.org/D63609 llvm-svn: 364219
This commit is contained in:
parent
4412d83959
commit
fe3f15cf90
|
@ -2390,6 +2390,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
return;
|
||||
}
|
||||
case Instruction::Select:
|
||||
case Instruction::FNeg:
|
||||
case Instruction::Add:
|
||||
case Instruction::FAdd:
|
||||
case Instruction::Sub:
|
||||
|
@ -2409,7 +2410,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
|||
case Instruction::Or:
|
||||
case Instruction::Xor: {
|
||||
auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
|
||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
|
||||
|
||||
// Sort operands of the instructions so that each side is more likely to
|
||||
// have the same opcode.
|
||||
|
@ -2881,6 +2882,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
int VecCost = TTI->getCmpSelInstrCost(S.getOpcode(), VecTy, MaskTy, VL0);
|
||||
return ReuseShuffleCost + VecCost - ScalarCost;
|
||||
}
|
||||
case Instruction::FNeg:
|
||||
case Instruction::Add:
|
||||
case Instruction::FAdd:
|
||||
case Instruction::Sub:
|
||||
|
@ -2918,7 +2920,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
ConstantInt *CInt0 = nullptr;
|
||||
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
|
||||
const Instruction *I = cast<Instruction>(VL[i]);
|
||||
ConstantInt *CInt = dyn_cast<ConstantInt>(I->getOperand(1));
|
||||
unsigned OpIdx = isa<BinaryOperator>(I) ? 1 : 0;
|
||||
ConstantInt *CInt = dyn_cast<ConstantInt>(I->getOperand(OpIdx));
|
||||
if (!CInt) {
|
||||
Op2VK = TargetTransformInfo::OK_AnyValue;
|
||||
Op2VP = TargetTransformInfo::OP_None;
|
||||
|
@ -3698,6 +3701,31 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
++NumVectorInstructions;
|
||||
return V;
|
||||
}
|
||||
case Instruction::FNeg: {
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
|
||||
Value *Op = vectorizeTree(E->getOperand(0));
|
||||
|
||||
if (E->VectorizedValue) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
|
||||
return E->VectorizedValue;
|
||||
}
|
||||
|
||||
Value *V = Builder.CreateUnOp(
|
||||
static_cast<Instruction::UnaryOps>(S.getOpcode()), Op);
|
||||
propagateIRFlags(V, E->Scalars, VL0);
|
||||
if (auto *I = dyn_cast<Instruction>(V))
|
||||
V = propagateMetadata(I, E->Scalars);
|
||||
|
||||
if (NeedToShuffleReuses) {
|
||||
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
|
||||
E->ReuseShuffleIndices, "shuffle");
|
||||
}
|
||||
E->VectorizedValue = V;
|
||||
++NumVectorInstructions;
|
||||
|
||||
return V;
|
||||
}
|
||||
case Instruction::Add:
|
||||
case Instruction::FAdd:
|
||||
case Instruction::Sub:
|
||||
|
|
|
@ -54,13 +54,11 @@ if.end7: ; preds = %if.then6, %if.then,
|
|||
define void @Rf_GReset_unary_fneg() {
|
||||
; CHECK-LABEL: @Rf_GReset_unary_fneg(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[SUB:%.*]] = fneg double undef
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load double, double* @d, align 8
|
||||
; CHECK-NEXT: [[SUB1:%.*]] = fneg double [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[TMP0]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fneg <2 x double> [[TMP1]]
|
||||
; CHECK-NEXT: br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]]
|
||||
; CHECK: if.then:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[SUB]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[SUB1]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], undef
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
|
||||
|
|
|
@ -559,15 +559,10 @@ define void @fcmp_fast_unary_fneg(double* %x) #1 {
|
|||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[SUB1:%.*]] = fneg fast double [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
|
||||
; CHECK-NEXT: [[SUB2:%.*]] = fneg fast double [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> undef, double [[SUB1]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[SUB2]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fneg fast <2 x double> [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%idx1 = getelementptr inbounds double, double* %x, i64 0
|
||||
|
@ -632,15 +627,10 @@ define void @fcmp_no_fast_unary_fneg(double* %x) #1 {
|
|||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[SUB1:%.*]] = fneg double [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
|
||||
; CHECK-NEXT: [[SUB2:%.*]] = fneg double [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> undef, double [[SUB1]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[SUB2]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fneg <2 x double> [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%idx1 = getelementptr inbounds double, double* %x, i64 0
|
||||
|
|
Loading…
Reference in New Issue