forked from OSchip/llvm-project
[LoopVectorize] Add FNeg instruction support
Differential Revision: https://reviews.llvm.org/D62510 llvm-svn: 362124
This commit is contained in:
parent
5d5f629922
commit
778e445c58
|
@ -1383,6 +1383,24 @@ public:
|
||||||
return Insert(UnOp, Name);
|
return Insert(UnOp, Name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create either a UnaryOperator or BinaryOperator depending on \p Opc.
|
||||||
|
/// Correct number of operands must be passed accordingly.
|
||||||
|
Value *CreateNAryOp(unsigned Opc, ArrayRef<Value *> Ops,
|
||||||
|
const Twine &Name = "",
|
||||||
|
MDNode *FPMathTag = nullptr) {
|
||||||
|
if (Instruction::isBinaryOp(Opc)) {
|
||||||
|
assert(Ops.size() == 2 && "Invalid number of operands!");
|
||||||
|
return CreateBinOp(static_cast<Instruction::BinaryOps>(Opc),
|
||||||
|
Ops[0], Ops[1], Name, FPMathTag);
|
||||||
|
}
|
||||||
|
if (Instruction::isUnaryOp(Opc)) {
|
||||||
|
assert(Ops.size() == 1 && "Invalid number of operands!");
|
||||||
|
return CreateUnOp(static_cast<Instruction::UnaryOps>(Opc),
|
||||||
|
Ops[0], Name, FPMathTag);
|
||||||
|
}
|
||||||
|
llvm_unreachable("Unexpected opcode!");
|
||||||
|
}
|
||||||
|
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
// Instruction creation methods: Memory Instructions
|
// Instruction creation methods: Memory Instructions
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
|
|
|
@ -3969,6 +3969,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
|
||||||
case Instruction::FAdd:
|
case Instruction::FAdd:
|
||||||
case Instruction::Sub:
|
case Instruction::Sub:
|
||||||
case Instruction::FSub:
|
case Instruction::FSub:
|
||||||
|
case Instruction::FNeg:
|
||||||
case Instruction::Mul:
|
case Instruction::Mul:
|
||||||
case Instruction::FMul:
|
case Instruction::FMul:
|
||||||
case Instruction::FDiv:
|
case Instruction::FDiv:
|
||||||
|
@ -3979,21 +3980,22 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
|
||||||
case Instruction::And:
|
case Instruction::And:
|
||||||
case Instruction::Or:
|
case Instruction::Or:
|
||||||
case Instruction::Xor: {
|
case Instruction::Xor: {
|
||||||
// Just widen binops.
|
// Just widen unops and binops.
|
||||||
auto *BinOp = cast<BinaryOperator>(&I);
|
setDebugLocFromInst(Builder, &I);
|
||||||
setDebugLocFromInst(Builder, BinOp);
|
|
||||||
|
|
||||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||||
Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part);
|
SmallVector<Value *, 2> Ops;
|
||||||
Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part);
|
for (Value *Op : I.operands())
|
||||||
Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
|
Ops.push_back(getOrCreateVectorValue(Op, Part));
|
||||||
|
|
||||||
if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))
|
Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
|
||||||
VecOp->copyIRFlags(BinOp);
|
|
||||||
|
if (auto *VecOp = dyn_cast<Instruction>(V))
|
||||||
|
VecOp->copyIRFlags(&I);
|
||||||
|
|
||||||
// Use this vector value for all users of the original instruction.
|
// Use this vector value for all users of the original instruction.
|
||||||
VectorLoopValueMap.setVectorValue(&I, Part, V);
|
VectorLoopValueMap.setVectorValue(&I, Part, V);
|
||||||
addMetadata(V, BinOp);
|
addMetadata(V, &I);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -5960,6 +5962,14 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||||
I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
|
I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
|
||||||
Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);
|
Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);
|
||||||
}
|
}
|
||||||
|
case Instruction::FNeg: {
|
||||||
|
unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
|
||||||
|
return N * TTI.getArithmeticInstrCost(
|
||||||
|
I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
|
||||||
|
TargetTransformInfo::OK_AnyValue,
|
||||||
|
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
|
||||||
|
I->getOperand(0));
|
||||||
|
}
|
||||||
case Instruction::Select: {
|
case Instruction::Select: {
|
||||||
SelectInst *SI = cast<SelectInst>(I);
|
SelectInst *SI = cast<SelectInst>(I);
|
||||||
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
|
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
|
||||||
|
@ -6589,6 +6599,7 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
|
||||||
case Instruction::FCmp:
|
case Instruction::FCmp:
|
||||||
case Instruction::FDiv:
|
case Instruction::FDiv:
|
||||||
case Instruction::FMul:
|
case Instruction::FMul:
|
||||||
|
case Instruction::FNeg:
|
||||||
case Instruction::FPExt:
|
case Instruction::FPExt:
|
||||||
case Instruction::FPToSI:
|
case Instruction::FPToSI:
|
||||||
case Instruction::FPToUI:
|
case Instruction::FPToUI:
|
||||||
|
|
|
@ -5,9 +5,9 @@
|
||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
target triple = "x86_64-apple-macosx10.8.0"
|
target triple = "x86_64-apple-macosx10.8.0"
|
||||||
|
|
||||||
; CHECK: Found an estimated cost of 2 for VF 1 For instruction: %neg = fneg float %{{.*}}
|
; CHECK: Found an estimated cost of 4 for VF 1 For instruction: %neg = fneg float %{{.*}}
|
||||||
; CHECK: Found an estimated cost of 6 for VF 2 For instruction: %neg = fneg float %{{.*}}
|
; CHECK: Found an estimated cost of 4 for VF 2 For instruction: %neg = fneg float %{{.*}}
|
||||||
; CHECK: Found an estimated cost of 14 for VF 4 For instruction: %neg = fneg float %{{.*}}
|
; CHECK: Found an estimated cost of 4 for VF 4 For instruction: %neg = fneg float %{{.*}}
|
||||||
define void @fneg_cost(float* %a, i64 %n) {
|
define void @fneg_cost(float* %a, i64 %n) {
|
||||||
entry:
|
entry:
|
||||||
br label %for.body
|
br label %for.body
|
||||||
|
|
|
@ -3,19 +3,8 @@
|
||||||
define void @foo(float* %a, i64 %n) {
|
define void @foo(float* %a, i64 %n) {
|
||||||
; CHECK: vector.body:
|
; CHECK: vector.body:
|
||||||
; CHECK: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 4
|
; CHECK: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 4
|
||||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 0
|
; CHECK-NEXT: [[TMP4:%.*]] = fneg <4 x float> [[WIDE_LOAD]]
|
||||||
; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]]
|
; CHECK: store <4 x float> [[TMP4]], <4 x float>* {{.*}}, align 4
|
||||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 1
|
|
||||||
; CHECK-NEXT: [[TMP7:%.*]] = fneg float [[TMP6]]
|
|
||||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 2
|
|
||||||
; CHECK-NEXT: [[TMP9:%.*]] = fneg float [[TMP8]]
|
|
||||||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 3
|
|
||||||
; CHECK-NEXT: [[TMP11:%.*]] = fneg float [[TMP10]]
|
|
||||||
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> undef, float [[TMP5]], i32 0
|
|
||||||
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP7]], i32 1
|
|
||||||
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP9]], i32 2
|
|
||||||
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i32 3
|
|
||||||
; CHECK: store <4 x float> [[TMP15]], <4 x float>* {{.*}}, align 4
|
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
br label %for.body
|
br label %for.body
|
||||||
|
|
Loading…
Reference in New Issue