forked from OSchip/llvm-project
Annotate imprecise FP division with fpaccuracy metadata
The OpenCL single precision division operation is only required to be accurate to 2.5ulp. Annotate the fdiv instruction with metadata which signals to the backend that an imprecise divide instruction may be used. llvm-svn: 143136
This commit is contained in:
parent
f7d1e7be55
commit
95fd2ca69f
|
@ -23,6 +23,7 @@
|
|||
#include "clang/AST/DeclObjC.h"
|
||||
#include "clang/Frontend/CodeGenOptions.h"
|
||||
#include "llvm/Intrinsics.h"
|
||||
#include "llvm/LLVMContext.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
using namespace clang;
|
||||
using namespace CodeGen;
|
||||
|
@ -2752,3 +2753,18 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
|
|||
return RValue::get(0);
|
||||
return ConvertTempToRValue(*this, E->getType(), OrigDest);
|
||||
}
|
||||
|
||||
void CodeGenFunction::SetFPAccuracy(llvm::Value *Val, unsigned AccuracyN,
|
||||
unsigned AccuracyD) {
|
||||
assert(Val->getType()->isFPOrFPVectorTy());
|
||||
if (!AccuracyN || !isa<llvm::Instruction>(Val))
|
||||
return;
|
||||
|
||||
llvm::Value *Vals[2];
|
||||
Vals[0] = llvm::ConstantInt::get(Int32Ty, AccuracyN);
|
||||
Vals[1] = llvm::ConstantInt::get(Int32Ty, AccuracyD);
|
||||
llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(), Vals);
|
||||
|
||||
cast<llvm::Instruction>(Val)->setMetadata(llvm::LLVMContext::MD_fpaccuracy,
|
||||
Node);
|
||||
}
|
||||
|
|
|
@ -1772,8 +1772,18 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
|
|||
Builder.SetInsertPoint(DivCont);
|
||||
}
|
||||
}
|
||||
if (Ops.LHS->getType()->isFPOrFPVectorTy())
|
||||
return Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
|
||||
if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
|
||||
llvm::Value *Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
|
||||
if (CGF.getContext().getLangOptions().OpenCL) {
|
||||
// OpenCL 1.1 7.4: minimum accuracy of single precision / is 2.5ulp
|
||||
llvm::Type *ValTy = Val->getType();
|
||||
if (ValTy->isFloatTy() ||
|
||||
(isa<llvm::VectorType>(ValTy) &&
|
||||
cast<llvm::VectorType>(ValTy)->getElementType()->isFloatTy()))
|
||||
CGF.SetFPAccuracy(Val, 5, 2);
|
||||
}
|
||||
return Val;
|
||||
}
|
||||
else if (Ops.Ty->hasUnsignedIntegerRepresentation())
|
||||
return Builder.CreateUDiv(Ops.LHS, Ops.RHS, "div");
|
||||
else
|
||||
|
|
|
@ -2382,6 +2382,11 @@ public:
|
|||
/// a r-value suitable for passing the given parameter.
|
||||
void EmitDelegateCallArg(CallArgList &args, const VarDecl *param);
|
||||
|
||||
/// SetFPAccuracy - Set the minimum required accuracy of the given floating
|
||||
/// point operation, expressed as the maximum relative error in ulp.
|
||||
void SetFPAccuracy(llvm::Value *Val, unsigned AccuracyN,
|
||||
unsigned AccuracyD = 1);
|
||||
|
||||
private:
|
||||
void EmitReturnOfRValue(RValue RV, QualType Ty);
|
||||
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s
|
||||
|
||||
typedef __attribute__(( ext_vector_type(4) )) float float4;
|
||||
|
||||
float spscalardiv(float a, float b) {
|
||||
// CHECK: @spscalardiv
|
||||
// CHECK: fdiv{{.*}}, !fpaccuracy ![[MD:[0-9]+]]
|
||||
return a / b;
|
||||
}
|
||||
|
||||
float4 spvectordiv(float4 a, float4 b) {
|
||||
// CHECK: @spvectordiv
|
||||
// CHECK: fdiv{{.*}}, !fpaccuracy ![[MD]]
|
||||
return a / b;
|
||||
}
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
double dpscalardiv(double a, double b) {
|
||||
// CHECK: @dpscalardiv
|
||||
// CHECK-NOT: !fpaccuracy
|
||||
return a / b;
|
||||
}
|
||||
|
||||
// CHECK: ![[MD]] = metadata !{i{{[0-9]+}} 5, i{{[0-9]+}} 2}
|
Loading…
Reference in New Issue