AMDGPU: Rename builtins to use amdgcn prefix

Keep the ones still used by libclc around for now.

Emit the new amdgcn intrinsic name if not targeting r600,
in which case the old AMDGPU name is still used.

llvm-svn: 258560
This commit is contained in:
Matt Arsenault 2016-01-22 21:30:53 +00:00
parent 7766b951d6
commit 8a4078c741
4 changed files with 241 additions and 159 deletions

View File

@ -7,30 +7,36 @@
//
//===----------------------------------------------------------------------===//
//
// This file defines the R600-specific builtin function database. Users of this
// file must define the BUILTIN macro to make use of this information.
// This file defines the AMDGPU-specific builtin function database. Users of
// this file must define the BUILTIN macro to make use of this information.
//
//===----------------------------------------------------------------------===//
// The format of this database matches clang/Basic/Builtins.def.
BUILTIN(__builtin_amdgpu_div_scale, "dddbb*", "n")
BUILTIN(__builtin_amdgpu_div_scalef, "fffbb*", "n")
BUILTIN(__builtin_amdgpu_div_fmas, "ddddb", "nc")
BUILTIN(__builtin_amdgpu_div_fmasf, "ffffb", "nc")
BUILTIN(__builtin_amdgpu_div_fixup, "dddd", "nc")
BUILTIN(__builtin_amdgpu_div_fixupf, "ffff", "nc")
BUILTIN(__builtin_amdgpu_trig_preop, "ddi", "nc")
BUILTIN(__builtin_amdgpu_trig_preopf, "ffi", "nc")
BUILTIN(__builtin_amdgpu_rcp, "dd", "nc")
BUILTIN(__builtin_amdgpu_rcpf, "ff", "nc")
BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n")
BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n")
BUILTIN(__builtin_amdgcn_div_fmas, "ddddb", "nc")
BUILTIN(__builtin_amdgcn_div_fmasf, "ffffb", "nc")
BUILTIN(__builtin_amdgcn_div_fixup, "dddd", "nc")
BUILTIN(__builtin_amdgcn_div_fixupf, "ffff", "nc")
BUILTIN(__builtin_amdgcn_trig_preop, "ddi", "nc")
BUILTIN(__builtin_amdgcn_trig_preopf, "ffi", "nc")
BUILTIN(__builtin_amdgcn_rcp, "dd", "nc")
BUILTIN(__builtin_amdgcn_rcpf, "ff", "nc")
BUILTIN(__builtin_amdgcn_rsq, "dd", "nc")
BUILTIN(__builtin_amdgcn_rsqf, "ff", "nc")
BUILTIN(__builtin_amdgcn_rsq_clamped, "dd", "nc")
BUILTIN(__builtin_amdgcn_rsq_clampedf, "ff", "nc")
BUILTIN(__builtin_amdgcn_ldexp, "ddi", "nc")
BUILTIN(__builtin_amdgcn_ldexpf, "ffi", "nc")
BUILTIN(__builtin_amdgcn_class, "bdi", "nc")
BUILTIN(__builtin_amdgcn_classf, "bfi", "nc")
// Legacy names with amdgpu prefix
BUILTIN(__builtin_amdgpu_rsq, "dd", "nc")
BUILTIN(__builtin_amdgpu_rsqf, "ff", "nc")
BUILTIN(__builtin_amdgpu_rsq_clamped, "dd", "nc")
BUILTIN(__builtin_amdgpu_rsq_clampedf, "ff", "nc")
BUILTIN(__builtin_amdgpu_ldexp, "ddi", "nc")
BUILTIN(__builtin_amdgpu_ldexpf, "ffi", "nc")
BUILTIN(__builtin_amdgpu_class, "bdi", "nc")
BUILTIN(__builtin_amdgpu_classf, "bfi", "nc")
#undef BUILTIN

View File

@ -6887,8 +6887,8 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgpu_div_scale:
case AMDGPU::BI__builtin_amdgpu_div_scalef: {
case AMDGPU::BI__builtin_amdgcn_div_scale:
case AMDGPU::BI__builtin_amdgcn_div_scalef: {
// Translate from the intrinsics's struct return to the builtin's out
// argument.
@ -6898,7 +6898,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
llvm::Value *Y = EmitScalarExpr(E->getArg(1));
llvm::Value *Z = EmitScalarExpr(E->getArg(2));
llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale,
llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
X->getType());
llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
@ -6913,40 +6913,54 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
Builder.CreateStore(FlagExt, FlagOutPtr);
return Result;
}
case AMDGPU::BI__builtin_amdgpu_div_fmas:
case AMDGPU::BI__builtin_amdgpu_div_fmasf: {
case AMDGPU::BI__builtin_amdgcn_div_fmas:
case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas,
llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
Src0->getType());
llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
}
case AMDGPU::BI__builtin_amdgpu_div_fixup:
case AMDGPU::BI__builtin_amdgpu_div_fixupf:
return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup);
case AMDGPU::BI__builtin_amdgpu_trig_preop:
case AMDGPU::BI__builtin_amdgpu_trig_preopf:
return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop);
case AMDGPU::BI__builtin_amdgpu_rcp:
case AMDGPU::BI__builtin_amdgpu_rcpf:
return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp);
case AMDGPU::BI__builtin_amdgcn_div_fixup:
case AMDGPU::BI__builtin_amdgcn_div_fixupf:
return emitTernaryFPBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
case AMDGPU::BI__builtin_amdgcn_trig_preop:
case AMDGPU::BI__builtin_amdgcn_trig_preopf:
return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
case AMDGPU::BI__builtin_amdgcn_rcp:
case AMDGPU::BI__builtin_amdgcn_rcpf:
return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rcp);
case AMDGPU::BI__builtin_amdgcn_rsq:
case AMDGPU::BI__builtin_amdgcn_rsqf:
return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rsq);
case AMDGPU::BI__builtin_amdgcn_rsq_clamped:
case AMDGPU::BI__builtin_amdgcn_rsq_clampedf:
return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamped);
case AMDGPU::BI__builtin_amdgcn_ldexp:
case AMDGPU::BI__builtin_amdgcn_ldexpf:
return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
case AMDGPU::BI__builtin_amdgcn_class:
case AMDGPU::BI__builtin_amdgcn_classf:
return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
// Legacy amdgpu prefix
case AMDGPU::BI__builtin_amdgpu_rsq:
case AMDGPU::BI__builtin_amdgpu_rsqf:
return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq);
case AMDGPU::BI__builtin_amdgpu_rsq_clamped:
case AMDGPU::BI__builtin_amdgpu_rsq_clampedf:
return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped);
case AMDGPU::BI__builtin_amdgpu_rsqf: {
if (getTarget().getTriple().getArch() == Triple::amdgcn)
return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rsq);
return emitUnaryFPBuiltin(*this, E, Intrinsic::r600_rsq);
}
case AMDGPU::BI__builtin_amdgpu_ldexp:
case AMDGPU::BI__builtin_amdgpu_ldexpf:
case AMDGPU::BI__builtin_amdgpu_ldexpf: {
if (getTarget().getTriple().getArch() == Triple::amdgcn)
return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
case AMDGPU::BI__builtin_amdgpu_class:
case AMDGPU::BI__builtin_amdgpu_classf:
return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class);
default:
}
default:
return nullptr;
}
}

View File

@ -0,0 +1,173 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
// CHECK-LABEL: @test_div_scale_f64
// CHECK: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true)
// CHECK-DAG: [[FLAG:%.+]] = extractvalue { double, i1 } %{{.+}}, 1
// CHECK-DAG: [[VAL:%.+]] = extractvalue { double, i1 } %{{.+}}, 0
// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32
// CHECK: store i32 [[FLAGEXT]]
void test_div_scale_f64(global double* out, global int* flagout, double a, double b)
{
bool flag;
*out = __builtin_amdgcn_div_scale(a, b, true, &flag);
*flagout = flag;
}
// CHECK-LABEL: @test_div_scale_f32
// CHECK: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true)
// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1
// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0
// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32
// CHECK: store i32 [[FLAGEXT]]
void test_div_scale_f32(global float* out, global int* flagout, float a, float b)
{
bool flag;
*out = __builtin_amdgcn_div_scalef(a, b, true, &flag);
*flagout = flag;
}
// CHECK-LABEL: @test_div_fmas_f32
// CHECK: call float @llvm.amdgcn.div.fmas.f32
void test_div_fmas_f32(global float* out, float a, float b, float c, int d)
{
*out = __builtin_amdgcn_div_fmasf(a, b, c, d);
}
// CHECK-LABEL: @test_div_fmas_f64
// CHECK: call double @llvm.amdgcn.div.fmas.f64
void test_div_fmas_f64(global double* out, double a, double b, double c, int d)
{
*out = __builtin_amdgcn_div_fmas(a, b, c, d);
}
// CHECK-LABEL: @test_div_fixup_f32
// CHECK: call float @llvm.amdgcn.div.fixup.f32
void test_div_fixup_f32(global float* out, float a, float b, float c)
{
*out = __builtin_amdgcn_div_fixupf(a, b, c);
}
// CHECK-LABEL: @test_div_fixup_f64
// CHECK: call double @llvm.amdgcn.div.fixup.f64
void test_div_fixup_f64(global double* out, double a, double b, double c)
{
*out = __builtin_amdgcn_div_fixup(a, b, c);
}
// CHECK-LABEL: @test_trig_preop_f32
// CHECK: call float @llvm.amdgcn.trig.preop.f32
void test_trig_preop_f32(global float* out, float a, int b)
{
*out = __builtin_amdgcn_trig_preopf(a, b);
}
// CHECK-LABEL: @test_trig_preop_f64
// CHECK: call double @llvm.amdgcn.trig.preop.f64
void test_trig_preop_f64(global double* out, double a, int b)
{
*out = __builtin_amdgcn_trig_preop(a, b);
}
// CHECK-LABEL: @test_rcp_f32
// CHECK: call float @llvm.amdgcn.rcp.f32
void test_rcp_f32(global float* out, float a)
{
*out = __builtin_amdgcn_rcpf(a);
}
// CHECK-LABEL: @test_rcp_f64
// CHECK: call double @llvm.amdgcn.rcp.f64
void test_rcp_f64(global double* out, double a)
{
*out = __builtin_amdgcn_rcp(a);
}
// CHECK-LABEL: @test_rsq_f32
// CHECK: call float @llvm.amdgcn.rsq.f32
void test_rsq_f32(global float* out, float a)
{
*out = __builtin_amdgcn_rsqf(a);
}
// CHECK-LABEL: @test_rsq_f64
// CHECK: call double @llvm.amdgcn.rsq.f64
void test_rsq_f64(global double* out, double a)
{
*out = __builtin_amdgcn_rsq(a);
}
// CHECK-LABEL: @test_rsq_clamped_f32
// CHECK: call float @llvm.amdgcn.rsq.clamped.f32
void test_rsq_clamped_f32(global float* out, float a)
{
*out = __builtin_amdgcn_rsq_clampedf(a);
}
// CHECK-LABEL: @test_rsq_clamped_f64
// CHECK: call double @llvm.amdgcn.rsq.clamped.f64
void test_rsq_clamped_f64(global double* out, double a)
{
*out = __builtin_amdgcn_rsq_clamped(a);
}
// CHECK-LABEL: @test_ldexp_f32
// CHECK: call float @llvm.amdgcn.ldexp.f32
void test_ldexp_f32(global float* out, float a, int b)
{
*out = __builtin_amdgcn_ldexpf(a, b);
}
// CHECK-LABEL: @test_ldexp_f64
// CHECK: call double @llvm.amdgcn.ldexp.f64
void test_ldexp_f64(global double* out, double a, int b)
{
*out = __builtin_amdgcn_ldexp(a, b);
}
// CHECK-LABEL: @test_class_f32
// CHECK: call i1 @llvm.amdgcn.class.f32
void test_class_f32(global float* out, float a, int b)
{
*out = __builtin_amdgcn_classf(a, b);
}
// CHECK-LABEL: @test_class_f64
// CHECK: call i1 @llvm.amdgcn.class.f64
void test_class_f64(global double* out, double a, int b)
{
*out = __builtin_amdgcn_class(a, b);
}
// Legacy intrinsics with AMDGPU prefix
// CHECK-LABEL: @test_legacy_rsq_f32
// CHECK: call float @llvm.amdgcn.rsq.f32
void test_legacy_rsq_f32(global float* out, float a)
{
*out = __builtin_amdgpu_rsqf(a);
}
// CHECK-LABEL: @test_legacy_rsq_f64
// CHECK: call double @llvm.amdgcn.rsq.f64
void test_legacy_rsq_f64(global double* out, double a)
{
*out = __builtin_amdgpu_rsq(a);
}
// CHECK-LABEL: @test_legacy_ldexp_f32
// CHECK: call float @llvm.amdgcn.ldexp.f32
void test_legacy_ldexp_f32(global float* out, float a, int b)
{
*out = __builtin_amdgpu_ldexpf(a, b);
}
// CHECK-LABEL: @test_legacy_ldexp_f64
// CHECK: call double @llvm.amdgcn.ldexp.f64
void test_legacy_ldexp_f64(global double* out, double a, int b)
{
*out = __builtin_amdgpu_ldexp(a, b);
}

View File

@ -1,143 +1,32 @@
// REQUIRES: r600-registered-target
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple r600-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
// CHECK-LABEL: @test_div_scale_f64
// CHECK: call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true)
// CHECK-DAG: [[FLAG:%.+]] = extractvalue { double, i1 } %{{.+}}, 1
// CHECK-DAG: [[VAL:%.+]] = extractvalue { double, i1 } %{{.+}}, 0
// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32
// CHECK: store i32 [[FLAGEXT]]
void test_div_scale_f64(global double* out, global int* flagout, double a, double b)
{
bool flag;
*out = __builtin_amdgpu_div_scale(a, b, true, &flag);
*flagout = flag;
}
// CHECK-LABEL: @test_div_scale_f32
// CHECK: call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true)
// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1
// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0
// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32
// CHECK: store i32 [[FLAGEXT]]
void test_div_scale_f32(global float* out, global int* flagout, float a, float b)
{
bool flag;
*out = __builtin_amdgpu_div_scalef(a, b, true, &flag);
*flagout = flag;
}
// CHECK-LABEL: @test_div_fmas_f32
// CHECK: call float @llvm.AMDGPU.div.fmas.f32
void test_div_fmas_f32(global float* out, float a, float b, float c, int d)
{
*out = __builtin_amdgpu_div_fmasf(a, b, c, d);
}
// CHECK-LABEL: @test_div_fmas_f64
// CHECK: call double @llvm.AMDGPU.div.fmas.f64
void test_div_fmas_f64(global double* out, double a, double b, double c, int d)
{
*out = __builtin_amdgpu_div_fmas(a, b, c, d);
}
// CHECK-LABEL: @test_div_fixup_f32
// CHECK: call float @llvm.AMDGPU.div.fixup.f32
void test_div_fixup_f32(global float* out, float a, float b, float c)
{
*out = __builtin_amdgpu_div_fixupf(a, b, c);
}
// CHECK-LABEL: @test_div_fixup_f64
// CHECK: call double @llvm.AMDGPU.div.fixup.f64
void test_div_fixup_f64(global double* out, double a, double b, double c)
{
*out = __builtin_amdgpu_div_fixup(a, b, c);
}
// CHECK-LABEL: @test_trig_preop_f32
// CHECK: call float @llvm.AMDGPU.trig.preop.f32
void test_trig_preop_f32(global float* out, float a, int b)
{
*out = __builtin_amdgpu_trig_preopf(a, b);
}
// CHECK-LABEL: @test_trig_preop_f64
// CHECK: call double @llvm.AMDGPU.trig.preop.f64
void test_trig_preop_f64(global double* out, double a, int b)
{
*out = __builtin_amdgpu_trig_preop(a, b);
}
// CHECK-LABEL: @test_rcp_f32
// CHECK: call float @llvm.AMDGPU.rcp.f32
void test_rcp_f32(global float* out, float a)
{
*out = __builtin_amdgpu_rcpf(a);
}
// CHECK-LABEL: @test_rcp_f64
// CHECK: call double @llvm.AMDGPU.rcp.f64
void test_rcp_f64(global double* out, double a)
{
*out = __builtin_amdgpu_rcp(a);
}
// CHECK-LABEL: @test_rsq_f32
// CHECK: call float @llvm.AMDGPU.rsq.f32
// CHECK: call float @llvm.r600.rsq.f32
void test_rsq_f32(global float* out, float a)
{
*out = __builtin_amdgpu_rsqf(a);
}
// CHECK-LABEL: @test_rsq_f64
// CHECK: call double @llvm.AMDGPU.rsq.f64
// CHECK: call double @llvm.r600.rsq.f64
void test_rsq_f64(global double* out, double a)
{
*out = __builtin_amdgpu_rsq(a);
}
// CHECK-LABEL: @test_rsq_clamped_f32
// CHECK: call float @llvm.AMDGPU.rsq.clamped.f32
void test_rsq_clamped_f32(global float* out, float a)
{
*out = __builtin_amdgpu_rsq_clampedf(a);
}
// CHECK-LABEL: @test_rsq_clamped_f64
// CHECK: call double @llvm.AMDGPU.rsq.clamped.f64
void test_rsq_clamped_f64(global double* out, double a)
{
*out = __builtin_amdgpu_rsq_clamped(a);
}
// CHECK-LABEL: @test_ldexp_f32
// CHECK-LABEL: @test_legacy_ldexp_f32
// CHECK: call float @llvm.AMDGPU.ldexp.f32
void test_ldexp_f32(global float* out, float a, int b)
void test_legacy_ldexp_f32(global float* out, float a, int b)
{
*out = __builtin_amdgpu_ldexpf(a, b);
}
// CHECK-LABEL: @test_ldexp_f64
// CHECK-LABEL: @test_legacy_ldexp_f64
// CHECK: call double @llvm.AMDGPU.ldexp.f64
void test_ldexp_f64(global double* out, double a, int b)
void test_legacy_ldexp_f64(global double* out, double a, int b)
{
*out = __builtin_amdgpu_ldexp(a, b);
}
// CHECK-LABEL: @test_class_f32
// CHECK: call i1 @llvm.AMDGPU.class.f32
void test_class_f32(global float* out, float a, int b)
{
*out = __builtin_amdgpu_classf(a, b);
}
// CHECK-LABEL: @test_class_f64
// CHECK: call i1 @llvm.AMDGPU.class.f64
void test_class_f64(global double* out, double a, int b)
{
*out = __builtin_amdgpu_class(a, b);
}