diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 224468bfc03c..b1652bed07f2 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -72,6 +72,15 @@ namespace clang { }; } + /// \brief R600 builtins + namespace R600 { + enum { + LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1, + #define BUILTIN(ID, TYPE, ATTRS) BI##ID, + #include "clang/Basic/BuiltinsR600.def" + LastTSBuiltin + }; + } /// \brief X86 builtins namespace X86 { diff --git a/clang/include/clang/module.modulemap b/clang/include/clang/module.modulemap index 2a4bb75ac293..03806016cc48 100644 --- a/clang/include/clang/module.modulemap +++ b/clang/include/clang/module.modulemap @@ -36,6 +36,7 @@ module Clang_Basic { exclude header "Basic/BuiltinsNEON.def" exclude header "Basic/BuiltinsNVPTX.def" exclude header "Basic/BuiltinsPPC.def" + exclude header "Basic/BuiltinsR600.def" exclude header "Basic/BuiltinsX86.def" exclude header "Basic/BuiltinsXCore.def" exclude header "Basic/DiagnosticOptions.def" diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index 4fdbc2447719..a71e8b0ca32f 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -1458,6 +1458,8 @@ static const char *DescriptionStringSI = "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; class R600TargetInfo : public TargetInfo { + static const Builtin::Info BuiltinInfo[]; + /// \brief The GPU profiles supported by the R600 target. enum GPUKind { GK_NONE, @@ -1504,11 +1506,10 @@ public: void getTargetBuiltins(const Builtin::Info *&Records, unsigned &NumRecords) const override { - Records = nullptr; - NumRecords = 0; + Records = BuiltinInfo; + NumRecords = clang::R600::LastTSBuiltin - Builtin::FirstTSBuiltin; } - void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override { Builder.defineMacro("__R600__"); @@ -1584,6 +1585,12 @@ public: } }; +const Builtin::Info R600TargetInfo::BuiltinInfo[] = { +#define BUILTIN(ID, TYPE, ATTRS) \ + { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES }, +#include "clang/Basic/BuiltinsR600.def" +}; + } // end anonymous namespace namespace { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 608e9d9a16dc..5bc03f3b949b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1689,6 +1689,8 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, case llvm::Triple::ppc64: case llvm::Triple::ppc64le: return EmitPPCBuiltinExpr(BuiltinID, E); + case llvm::Triple::r600: + return EmitR600BuiltinExpr(BuiltinID, E); default: return nullptr; } @@ -5922,3 +5924,38 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } } } + +Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + switch (BuiltinID) { + case R600::BI__builtin_amdgpu_div_scale: + case R600::BI__builtin_amdgpu_div_scalef: { + // Translate from the intrinsics's struct return to the builtin's out + // argument. + + std::pair FlagOutPtr + = EmitPointerWithAlignment(E->getArg(3)); + + llvm::Value *X = EmitScalarExpr(E->getArg(0)); + llvm::Value *Y = EmitScalarExpr(E->getArg(1)); + llvm::Value *Z = EmitScalarExpr(E->getArg(2)); + + llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale, + X->getType()); + + llvm::Value *Tmp = Builder.CreateCall3(Callee, X, Y, Z); + + llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); + llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); + + llvm::Type *RealFlagType + = FlagOutPtr.first->getType()->getPointerElementType(); + + llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); + llvm::StoreInst *FlagStore = Builder.CreateStore(FlagExt, FlagOutPtr.first); + FlagStore->setAlignment(FlagOutPtr.second); + return Result; + } default: + return nullptr; + } +} diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 1ce89642ff0b..e859a54118e9 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2249,6 +2249,7 @@ public: llvm::Value *BuildVector(ArrayRef Ops); llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitR600BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitObjCProtocolExpr(const ObjCProtocolExpr *E); llvm::Value *EmitObjCStringLiteral(const ObjCStringLiteral *E); diff --git a/clang/test/CodeGenOpenCL/builtins-r600.cl b/clang/test/CodeGenOpenCL/builtins-r600.cl new file mode 100644 index 000000000000..053103814e94 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-r600.cl @@ -0,0 +1,30 @@ +// REQUIRES: r600-registered-target +// RUN: %clang_cc1 -triple r600-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// CHECK-LABEL: @test_div_scale_f64 +// CHECK: call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) +// CHECK-DAG: [[FLAG:%.+]] = extractvalue { double, i1 } %{{.+}}, 1 +// CHECK-DAG: [[VAL:%.+]] = extractvalue { double, i1 } %{{.+}}, 0 +// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 +// CHECK: store i32 [[FLAGEXT]] +void test_div_scale_f64(global double* out, global int* flagout, double a, double b) +{ + bool flag; + *out = __builtin_amdgpu_div_scale(a, b, true, &flag); + *flagout = flag; +} + +// CHECK-LABEL: @test_div_scale_f32 +// CHECK: call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) +// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1 +// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0 +// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 +// CHECK: store i32 [[FLAGEXT]] +void test_div_scale_f32(global float* out, global int* flagout, float a, float b) +{ + bool flag; + *out = __builtin_amdgpu_div_scalef(a, b, true, &flag); + *flagout = flag; +}