AMDGPU: Rename builtins to use amdgcn prefix

Keep the ones still used by libclc around for now. Emit the new amdgcn intrinsic name if not targeting r600, in which case the old AMDGPU name is still used. llvm-svn: 258560
2016-01-22 21:30:53 +00:00 · 2016-01-22 21:30:53 +00:00 · 8a4078c741
parent 7766b951d6
commit 8a4078c741
4 changed files with 241 additions and 159 deletions
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@ -7,30 +7,36 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the R600-specific builtin function database. Users of this
-// file must define the BUILTIN macro to make use of this information.
+// This file defines the AMDGPU-specific builtin function database. Users of
+// this file must define the BUILTIN macro to make use of this information.
 //
 //===----------------------------------------------------------------------===//

 // The format of this database matches clang/Basic/Builtins.def.

-BUILTIN(__builtin_amdgpu_div_scale, "dddbb*", "n")
-BUILTIN(__builtin_amdgpu_div_scalef, "fffbb*", "n")
-BUILTIN(__builtin_amdgpu_div_fmas, "ddddb", "nc")
-BUILTIN(__builtin_amdgpu_div_fmasf, "ffffb", "nc")
-BUILTIN(__builtin_amdgpu_div_fixup, "dddd", "nc")
-BUILTIN(__builtin_amdgpu_div_fixupf, "ffff", "nc")
-BUILTIN(__builtin_amdgpu_trig_preop, "ddi", "nc")
-BUILTIN(__builtin_amdgpu_trig_preopf, "ffi", "nc")
-BUILTIN(__builtin_amdgpu_rcp, "dd", "nc")
-BUILTIN(__builtin_amdgpu_rcpf, "ff", "nc")
+BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n")
+BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n")
+BUILTIN(__builtin_amdgcn_div_fmas, "ddddb", "nc")
+BUILTIN(__builtin_amdgcn_div_fmasf, "ffffb", "nc")
+BUILTIN(__builtin_amdgcn_div_fixup, "dddd", "nc")
+BUILTIN(__builtin_amdgcn_div_fixupf, "ffff", "nc")
+BUILTIN(__builtin_amdgcn_trig_preop, "ddi", "nc")
+BUILTIN(__builtin_amdgcn_trig_preopf, "ffi", "nc")
+BUILTIN(__builtin_amdgcn_rcp, "dd", "nc")
+BUILTIN(__builtin_amdgcn_rcpf, "ff", "nc")
+BUILTIN(__builtin_amdgcn_rsq, "dd", "nc")
+BUILTIN(__builtin_amdgcn_rsqf, "ff", "nc")
+BUILTIN(__builtin_amdgcn_rsq_clamped, "dd", "nc")
+BUILTIN(__builtin_amdgcn_rsq_clampedf, "ff", "nc")
+BUILTIN(__builtin_amdgcn_ldexp, "ddi", "nc")
+BUILTIN(__builtin_amdgcn_ldexpf, "ffi", "nc")
+BUILTIN(__builtin_amdgcn_class, "bdi", "nc")
+BUILTIN(__builtin_amdgcn_classf, "bfi", "nc")
+
+// Legacy names with amdgpu prefix
 BUILTIN(__builtin_amdgpu_rsq, "dd", "nc")
 BUILTIN(__builtin_amdgpu_rsqf, "ff", "nc")
-BUILTIN(__builtin_amdgpu_rsq_clamped, "dd", "nc")
-BUILTIN(__builtin_amdgpu_rsq_clampedf, "ff", "nc")
 BUILTIN(__builtin_amdgpu_ldexp, "ddi", "nc")
 BUILTIN(__builtin_amdgpu_ldexpf, "ffi", "nc")
-BUILTIN(__builtin_amdgpu_class, "bdi", "nc")
-BUILTIN(__builtin_amdgpu_classf, "bfi", "nc")

 #undef BUILTIN
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@ -6887,8 +6887,8 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
                                              const CallExpr *E) {
  switch (BuiltinID) {
-  case AMDGPU::BI__builtin_amdgpu_div_scale:
-  case AMDGPU::BI__builtin_amdgpu_div_scalef: {
+  case AMDGPU::BI__builtin_amdgcn_div_scale:
+  case AMDGPU::BI__builtin_amdgcn_div_scalef: {
    // Translate from the intrinsics's struct return to the builtin's out
    // argument.

@ -6898,7 +6898,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
    llvm::Value *Y = EmitScalarExpr(E->getArg(1));
    llvm::Value *Z = EmitScalarExpr(E->getArg(2));

-    llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale,
+    llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
                                           X->getType());

    llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
@ -6913,40 +6913,54 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
    Builder.CreateStore(FlagExt, FlagOutPtr);
    return Result;
  }
-  case AMDGPU::BI__builtin_amdgpu_div_fmas:
-  case AMDGPU::BI__builtin_amdgpu_div_fmasf: {
+  case AMDGPU::BI__builtin_amdgcn_div_fmas:
+  case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
    llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
    llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
    llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
    llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));

-    llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas,
+    llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
                                      Src0->getType());
    llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
    return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
  }
-  case AMDGPU::BI__builtin_amdgpu_div_fixup:
-  case AMDGPU::BI__builtin_amdgpu_div_fixupf:
-    return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup);
-  case AMDGPU::BI__builtin_amdgpu_trig_preop:
-  case AMDGPU::BI__builtin_amdgpu_trig_preopf:
-    return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop);
-  case AMDGPU::BI__builtin_amdgpu_rcp:
-  case AMDGPU::BI__builtin_amdgpu_rcpf:
-    return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp);
+  case AMDGPU::BI__builtin_amdgcn_div_fixup:
+  case AMDGPU::BI__builtin_amdgcn_div_fixupf:
+    return emitTernaryFPBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
+  case AMDGPU::BI__builtin_amdgcn_trig_preop:
+  case AMDGPU::BI__builtin_amdgcn_trig_preopf:
+    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
+  case AMDGPU::BI__builtin_amdgcn_rcp:
+  case AMDGPU::BI__builtin_amdgcn_rcpf:
+    return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rcp);
+  case AMDGPU::BI__builtin_amdgcn_rsq:
+  case AMDGPU::BI__builtin_amdgcn_rsqf:
+    return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rsq);
+  case AMDGPU::BI__builtin_amdgcn_rsq_clamped:
+  case AMDGPU::BI__builtin_amdgcn_rsq_clampedf:
+    return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamped);
+  case AMDGPU::BI__builtin_amdgcn_ldexp:
+  case AMDGPU::BI__builtin_amdgcn_ldexpf:
+    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
+  case AMDGPU::BI__builtin_amdgcn_class:
+  case AMDGPU::BI__builtin_amdgcn_classf:
+    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
+
+    // Legacy amdgpu prefix
  case AMDGPU::BI__builtin_amdgpu_rsq:
-  case AMDGPU::BI__builtin_amdgpu_rsqf:
-    return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq);
-  case AMDGPU::BI__builtin_amdgpu_rsq_clamped:
-  case AMDGPU::BI__builtin_amdgpu_rsq_clampedf:
-    return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped);
+  case AMDGPU::BI__builtin_amdgpu_rsqf: {
+    if (getTarget().getTriple().getArch() == Triple::amdgcn)
+      return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rsq);
+    return emitUnaryFPBuiltin(*this, E, Intrinsic::r600_rsq);
+  }
  case AMDGPU::BI__builtin_amdgpu_ldexp:
-  case AMDGPU::BI__builtin_amdgpu_ldexpf:
+  case AMDGPU::BI__builtin_amdgpu_ldexpf: {
+    if (getTarget().getTriple().getArch() == Triple::amdgcn)
+      return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
    return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
-  case AMDGPU::BI__builtin_amdgpu_class:
-  case AMDGPU::BI__builtin_amdgpu_classf:
-    return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class);
-   default:
+  }
+  default:
    return nullptr;
  }
 }
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@ -0,0 +1,173 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// CHECK-LABEL: @test_div_scale_f64
+// CHECK: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true)
+// CHECK-DAG: [[FLAG:%.+]] = extractvalue { double, i1 } %{{.+}}, 1
+// CHECK-DAG: [[VAL:%.+]] = extractvalue { double, i1 } %{{.+}}, 0
+// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32
+// CHECK: store i32 [[FLAGEXT]]
+void test_div_scale_f64(global double* out, global int* flagout, double a, double b)
+{
+  bool flag;
+  *out = __builtin_amdgcn_div_scale(a, b, true, &flag);
+  *flagout = flag;
+}
+
+// CHECK-LABEL: @test_div_scale_f32
+// CHECK: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true)
+// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1
+// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0
+// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32
+// CHECK: store i32 [[FLAGEXT]]
+void test_div_scale_f32(global float* out, global int* flagout, float a, float b)
+{
+  bool flag;
+  *out = __builtin_amdgcn_div_scalef(a, b, true, &flag);
+  *flagout = flag;
+}
+
+// CHECK-LABEL: @test_div_fmas_f32
+// CHECK: call float @llvm.amdgcn.div.fmas.f32
+void test_div_fmas_f32(global float* out, float a, float b, float c, int d)
+{
+  *out = __builtin_amdgcn_div_fmasf(a, b, c, d);
+}
+
+// CHECK-LABEL: @test_div_fmas_f64
+// CHECK: call double @llvm.amdgcn.div.fmas.f64
+void test_div_fmas_f64(global double* out, double a, double b, double c, int d)
+{
+  *out = __builtin_amdgcn_div_fmas(a, b, c, d);
+}
+
+// CHECK-LABEL: @test_div_fixup_f32
+// CHECK: call float @llvm.amdgcn.div.fixup.f32
+void test_div_fixup_f32(global float* out, float a, float b, float c)
+{
+  *out = __builtin_amdgcn_div_fixupf(a, b, c);
+}
+
+// CHECK-LABEL: @test_div_fixup_f64
+// CHECK: call double @llvm.amdgcn.div.fixup.f64
+void test_div_fixup_f64(global double* out, double a, double b, double c)
+{
+  *out = __builtin_amdgcn_div_fixup(a, b, c);
+}
+
+// CHECK-LABEL: @test_trig_preop_f32
+// CHECK: call float @llvm.amdgcn.trig.preop.f32
+void test_trig_preop_f32(global float* out, float a, int b)
+{
+  *out = __builtin_amdgcn_trig_preopf(a, b);
+}
+
+// CHECK-LABEL: @test_trig_preop_f64
+// CHECK: call double @llvm.amdgcn.trig.preop.f64
+void test_trig_preop_f64(global double* out, double a, int b)
+{
+  *out = __builtin_amdgcn_trig_preop(a, b);
+}
+
+// CHECK-LABEL: @test_rcp_f32
+// CHECK: call float @llvm.amdgcn.rcp.f32
+void test_rcp_f32(global float* out, float a)
+{
+  *out = __builtin_amdgcn_rcpf(a);
+}
+
+// CHECK-LABEL: @test_rcp_f64
+// CHECK: call double @llvm.amdgcn.rcp.f64
+void test_rcp_f64(global double* out, double a)
+{
+  *out = __builtin_amdgcn_rcp(a);
+}
+
+// CHECK-LABEL: @test_rsq_f32
+// CHECK: call float @llvm.amdgcn.rsq.f32
+void test_rsq_f32(global float* out, float a)
+{
+  *out = __builtin_amdgcn_rsqf(a);
+}
+
+// CHECK-LABEL: @test_rsq_f64
+// CHECK: call double @llvm.amdgcn.rsq.f64
+void test_rsq_f64(global double* out, double a)
+{
+  *out = __builtin_amdgcn_rsq(a);
+}
+
+// CHECK-LABEL: @test_rsq_clamped_f32
+// CHECK: call float @llvm.amdgcn.rsq.clamped.f32
+void test_rsq_clamped_f32(global float* out, float a)
+{
+  *out = __builtin_amdgcn_rsq_clampedf(a);
+}
+
+// CHECK-LABEL: @test_rsq_clamped_f64
+// CHECK: call double @llvm.amdgcn.rsq.clamped.f64
+void test_rsq_clamped_f64(global double* out, double a)
+{
+  *out = __builtin_amdgcn_rsq_clamped(a);
+}
+
+// CHECK-LABEL: @test_ldexp_f32
+// CHECK: call float @llvm.amdgcn.ldexp.f32
+void test_ldexp_f32(global float* out, float a, int b)
+{
+  *out = __builtin_amdgcn_ldexpf(a, b);
+}
+
+// CHECK-LABEL: @test_ldexp_f64
+// CHECK: call double @llvm.amdgcn.ldexp.f64
+void test_ldexp_f64(global double* out, double a, int b)
+{
+  *out = __builtin_amdgcn_ldexp(a, b);
+}
+
+// CHECK-LABEL: @test_class_f32
+// CHECK: call i1 @llvm.amdgcn.class.f32
+void test_class_f32(global float* out, float a, int b)
+{
+  *out = __builtin_amdgcn_classf(a, b);
+}
+
+// CHECK-LABEL: @test_class_f64
+// CHECK: call i1 @llvm.amdgcn.class.f64
+void test_class_f64(global double* out, double a, int b)
+{
+  *out = __builtin_amdgcn_class(a, b);
+}
+
+
+// Legacy intrinsics with AMDGPU prefix
+
+// CHECK-LABEL: @test_legacy_rsq_f32
+// CHECK: call float @llvm.amdgcn.rsq.f32
+void test_legacy_rsq_f32(global float* out, float a)
+{
+  *out = __builtin_amdgpu_rsqf(a);
+}
+
+// CHECK-LABEL: @test_legacy_rsq_f64
+// CHECK: call double @llvm.amdgcn.rsq.f64
+void test_legacy_rsq_f64(global double* out, double a)
+{
+  *out = __builtin_amdgpu_rsq(a);
+}
+
+// CHECK-LABEL: @test_legacy_ldexp_f32
+// CHECK: call float @llvm.amdgcn.ldexp.f32
+void test_legacy_ldexp_f32(global float* out, float a, int b)
+{
+  *out = __builtin_amdgpu_ldexpf(a, b);
+}
+
+// CHECK-LABEL: @test_legacy_ldexp_f64
+// CHECK: call double @llvm.amdgcn.ldexp.f64
+void test_legacy_ldexp_f64(global double* out, double a, int b)
+{
+  *out = __builtin_amdgpu_ldexp(a, b);
+}
--- a/clang/test/CodeGenOpenCL/builtins-r600.cl
+++ b/clang/test/CodeGenOpenCL/builtins-r600.cl
@ -1,143 +1,32 @@
-// REQUIRES: r600-registered-target
+// REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple r600-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s

 #pragma OPENCL EXTENSION cl_khr_fp64 : enable

-// CHECK-LABEL: @test_div_scale_f64
-// CHECK: call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true)
-// CHECK-DAG: [[FLAG:%.+]] = extractvalue { double, i1 } %{{.+}}, 1
-// CHECK-DAG: [[VAL:%.+]] = extractvalue { double, i1 } %{{.+}}, 0
-// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32
-// CHECK: store i32 [[FLAGEXT]]
-void test_div_scale_f64(global double* out, global int* flagout, double a, double b)
-{
-  bool flag;
-  *out = __builtin_amdgpu_div_scale(a, b, true, &flag);
-  *flagout = flag;
-}
-
-// CHECK-LABEL: @test_div_scale_f32
-// CHECK: call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true)
-// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1
-// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0
-// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32
-// CHECK: store i32 [[FLAGEXT]]
-void test_div_scale_f32(global float* out, global int* flagout, float a, float b)
-{
-  bool flag;
-  *out = __builtin_amdgpu_div_scalef(a, b, true, &flag);
-  *flagout = flag;
-}
-
-// CHECK-LABEL: @test_div_fmas_f32
-// CHECK: call float @llvm.AMDGPU.div.fmas.f32
-void test_div_fmas_f32(global float* out, float a, float b, float c, int d)
-{
-  *out = __builtin_amdgpu_div_fmasf(a, b, c, d);
-}
-
-// CHECK-LABEL: @test_div_fmas_f64
-// CHECK: call double @llvm.AMDGPU.div.fmas.f64
-void test_div_fmas_f64(global double* out, double a, double b, double c, int d)
-{
-  *out = __builtin_amdgpu_div_fmas(a, b, c, d);
-}
-
-// CHECK-LABEL: @test_div_fixup_f32
-// CHECK: call float @llvm.AMDGPU.div.fixup.f32
-void test_div_fixup_f32(global float* out, float a, float b, float c)
-{
-  *out = __builtin_amdgpu_div_fixupf(a, b, c);
-}
-
-// CHECK-LABEL: @test_div_fixup_f64
-// CHECK: call double @llvm.AMDGPU.div.fixup.f64
-void test_div_fixup_f64(global double* out, double a, double b, double c)
-{
-  *out = __builtin_amdgpu_div_fixup(a, b, c);
-}
-
-// CHECK-LABEL: @test_trig_preop_f32
-// CHECK: call float @llvm.AMDGPU.trig.preop.f32
-void test_trig_preop_f32(global float* out, float a, int b)
-{
-  *out = __builtin_amdgpu_trig_preopf(a, b);
-}
-
-// CHECK-LABEL: @test_trig_preop_f64
-// CHECK: call double @llvm.AMDGPU.trig.preop.f64
-void test_trig_preop_f64(global double* out, double a, int b)
-{
-  *out = __builtin_amdgpu_trig_preop(a, b);
-}
-
-// CHECK-LABEL: @test_rcp_f32
-// CHECK: call float @llvm.AMDGPU.rcp.f32
-void test_rcp_f32(global float* out, float a)
-{
-  *out = __builtin_amdgpu_rcpf(a);
-}
-
-// CHECK-LABEL: @test_rcp_f64
-// CHECK: call double @llvm.AMDGPU.rcp.f64
-void test_rcp_f64(global double* out, double a)
-{
-  *out = __builtin_amdgpu_rcp(a);
-}
-
 // CHECK-LABEL: @test_rsq_f32
-// CHECK: call float @llvm.AMDGPU.rsq.f32
+// CHECK: call float @llvm.r600.rsq.f32
 void test_rsq_f32(global float* out, float a)
 {
  *out = __builtin_amdgpu_rsqf(a);
 }

 // CHECK-LABEL: @test_rsq_f64
-// CHECK: call double @llvm.AMDGPU.rsq.f64
+// CHECK: call double @llvm.r600.rsq.f64
 void test_rsq_f64(global double* out, double a)
 {
  *out = __builtin_amdgpu_rsq(a);
 }

-// CHECK-LABEL: @test_rsq_clamped_f32
-// CHECK: call float @llvm.AMDGPU.rsq.clamped.f32
-void test_rsq_clamped_f32(global float* out, float a)
-{
-  *out = __builtin_amdgpu_rsq_clampedf(a);
-}
-
-// CHECK-LABEL: @test_rsq_clamped_f64
-// CHECK: call double @llvm.AMDGPU.rsq.clamped.f64
-void test_rsq_clamped_f64(global double* out, double a)
-{
-  *out = __builtin_amdgpu_rsq_clamped(a);
-}
-
-// CHECK-LABEL: @test_ldexp_f32
+// CHECK-LABEL: @test_legacy_ldexp_f32
 // CHECK: call float @llvm.AMDGPU.ldexp.f32
-void test_ldexp_f32(global float* out, float a, int b)
+void test_legacy_ldexp_f32(global float* out, float a, int b)
 {
  *out = __builtin_amdgpu_ldexpf(a, b);
 }

-// CHECK-LABEL: @test_ldexp_f64
+// CHECK-LABEL: @test_legacy_ldexp_f64
 // CHECK: call double @llvm.AMDGPU.ldexp.f64
-void test_ldexp_f64(global double* out, double a, int b)
+void test_legacy_ldexp_f64(global double* out, double a, int b)
 {
  *out = __builtin_amdgpu_ldexp(a, b);
 }
-
-// CHECK-LABEL: @test_class_f32
-// CHECK: call i1 @llvm.AMDGPU.class.f32
-void test_class_f32(global float* out, float a, int b)
-{
-  *out = __builtin_amdgpu_classf(a, b);
-}
-
-// CHECK-LABEL: @test_class_f64
-// CHECK: call i1 @llvm.AMDGPU.class.f64
-void test_class_f64(global double* out, double a, int b)
-{
-  *out = __builtin_amdgpu_class(a, b);
-}