llvm-project/clang/lib/Basic/Targets/NVPTX.cpp

//===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements NVPTX TargetInfo objects.
//
//===----------------------------------------------------------------------===//

#include "NVPTX.h"
#include "Targets.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
#include "llvm/ADT/StringSwitch.h"

using namespace clang;
using namespace clang::targets;

const Builtin::Info NVPTXTargetInfo::BuiltinInfo[] = {
#define BUILTIN(ID, TYPE, ATTRS)                                               \
  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
  {#ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr},
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
#include "clang/Basic/BuiltinsNVPTX.def"
};

const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};

NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
                                 const TargetOptions &Opts,
                                 unsigned TargetPointerWidth)
    : TargetInfo(Triple) {
  assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) &&
         "NVPTX only supports 32- and 64-bit modes.");

  TLSSupported = false;
  AddrSpaceMap = &NVPTXAddrSpaceMap;
  UseAddrSpaceMapMangling = true;

  // Define available target features
  // These must be defined in sorted order!
  NoAsmVariants = true;
  GPU = CudaArch::SM_20;

  if (TargetPointerWidth == 32)
    resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
  else
    resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64");

  // If possible, get a TargetInfo for our host triple, so we can match its
  // types.
  llvm::Triple HostTriple(Opts.HostTriple);
  if (!HostTriple.isNVPTX())
    HostTarget.reset(AllocateTarget(llvm::Triple(Opts.HostTriple), Opts));

  // If no host target, make some guesses about the data layout and return.
  if (!HostTarget) {
    LongWidth = LongAlign = TargetPointerWidth;
    PointerWidth = PointerAlign = TargetPointerWidth;
    switch (TargetPointerWidth) {
    case 32:
      SizeType = TargetInfo::UnsignedInt;
      PtrDiffType = TargetInfo::SignedInt;
      IntPtrType = TargetInfo::SignedInt;
      break;
    case 64:
      SizeType = TargetInfo::UnsignedLong;
      PtrDiffType = TargetInfo::SignedLong;
      IntPtrType = TargetInfo::SignedLong;
      break;
    default:
      llvm_unreachable("TargetPointerWidth must be 32 or 64");
    }
    return;
  }

  // Copy properties from host target.
  PointerWidth = HostTarget->getPointerWidth(/* AddrSpace = */ 0);
  PointerAlign = HostTarget->getPointerAlign(/* AddrSpace = */ 0);
  BoolWidth = HostTarget->getBoolWidth();
  BoolAlign = HostTarget->getBoolAlign();
  IntWidth = HostTarget->getIntWidth();
  IntAlign = HostTarget->getIntAlign();
  HalfWidth = HostTarget->getHalfWidth();
  HalfAlign = HostTarget->getHalfAlign();
  FloatWidth = HostTarget->getFloatWidth();
  FloatAlign = HostTarget->getFloatAlign();
  DoubleWidth = HostTarget->getDoubleWidth();
  DoubleAlign = HostTarget->getDoubleAlign();
  LongWidth = HostTarget->getLongWidth();
  LongAlign = HostTarget->getLongAlign();
  LongLongWidth = HostTarget->getLongLongWidth();
  LongLongAlign = HostTarget->getLongLongAlign();
  MinGlobalAlign = HostTarget->getMinGlobalAlign();
  NewAlign = HostTarget->getNewAlign();
  DefaultAlignForAttributeAligned =
      HostTarget->getDefaultAlignForAttributeAligned();
  SizeType = HostTarget->getSizeType();
  IntMaxType = HostTarget->getIntMaxType();
  PtrDiffType = HostTarget->getPtrDiffType(/* AddrSpace = */ 0);
  IntPtrType = HostTarget->getIntPtrType();
  WCharType = HostTarget->getWCharType();
  WIntType = HostTarget->getWIntType();
  Char16Type = HostTarget->getChar16Type();
  Char32Type = HostTarget->getChar32Type();
  Int64Type = HostTarget->getInt64Type();
  SigAtomicType = HostTarget->getSigAtomicType();
  ProcessIDType = HostTarget->getProcessIDType();

  UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();
  UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment();
  UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();
  ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();

  // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and
  // we need those macros to be identical on host and device, because (among
  // other things) they affect which standard library classes are defined, and
  // we need all classes to be defined on both the host and device.
  MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth();

  // Properties intentionally not copied from host:
  // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
  //   host/device boundary.
  // - SuitableAlign: Not visible across the host/device boundary, and may
  //   correctly be different on host/device, e.g. if host has wider vector
  //   types than device.
  // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
  //   as its double type, but that's not necessarily true on the host.
  //   TODO: nvcc emits a warning when using long double on device; we should
  //   do the same.
}

ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const {
  return llvm::makeArrayRef(GCCRegNames);
}

bool NVPTXTargetInfo::hasFeature(StringRef Feature) const {
  return llvm::StringSwitch<bool>(Feature)
      .Cases("ptx", "nvptx", true)
      .Case("satom", GPU >= CudaArch::SM_60) // Atomics w/ scope.
      .Default(false);
}

void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
                                       MacroBuilder &Builder) const {
  Builder.defineMacro("__PTX__");
  Builder.defineMacro("__NVPTX__");
  if (Opts.CUDAIsDevice) {
    // Set __CUDA_ARCH__ for the GPU specified.
    std::string CUDAArchCode = [this] {
      switch (GPU) {
      case CudaArch::UNKNOWN:
        assert(false && "No GPU arch when compiling CUDA device code.");
        return "";
      case CudaArch::SM_20:
        return "200";
      case CudaArch::SM_21:
        return "210";
      case CudaArch::SM_30:
        return "300";
      case CudaArch::SM_32:
        return "320";
      case CudaArch::SM_35:
        return "350";
      case CudaArch::SM_37:
        return "370";
      case CudaArch::SM_50:
        return "500";
      case CudaArch::SM_52:
        return "520";
      case CudaArch::SM_53:
        return "530";
      case CudaArch::SM_60:
        return "600";
      case CudaArch::SM_61:
        return "610";
      case CudaArch::SM_62:
        return "620";
      case CudaArch::SM_70:
        return "700";
      }
      llvm_unreachable("unhandled CudaArch");
    }();
    Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
  }
}

ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const {
  return llvm::makeArrayRef(BuiltinInfo, clang::NVPTX::LastTSBuiltin -
                                             Builtin::FirstTSBuiltin);
}
Break up Targets.cpp into a header/impl pair per target type[NFCI] Targets.cpp is getting unwieldy, and even minor changes cause the entire thing to cause recompilation for everyone. This patch bites the bullet and breaks it up into a number of files. I tended to keep function definitions in the class declaration unless it caused additional includes to be necessary. In those cases, I pulled it over into the .cpp file. Content is copy/paste for the most part, besides includes/format/etc. Differential Revision: https://reviews.llvm.org/D35701 llvm-svn: 308791 2017-07-22 06:37:03 +08:00			`//===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// This file implements NVPTX TargetInfo objects.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "NVPTX.h"`
			`#include "Targets.h"`
			`#include "clang/Basic/Builtins.h"`
			`#include "clang/Basic/MacroBuilder.h"`
			`#include "clang/Basic/TargetBuiltins.h"`
			`#include "llvm/ADT/StringSwitch.h"`

			`using namespace clang;`
			`using namespace clang::targets;`

			`const Builtin::Info NVPTXTargetInfo::BuiltinInfo[] = {`
			`#define BUILTIN(ID, TYPE, ATTRS) \`
			`{#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},`
			`#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \`
			`{#ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr},`
			`#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \`
			`{#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},`
			`#include "clang/Basic/BuiltinsNVPTX.def"`
			`};`

			`const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};`

			`NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,`
			`const TargetOptions &Opts,`
			`unsigned TargetPointerWidth)`
			`: TargetInfo(Triple) {`
			`assert((TargetPointerWidth == 32 \|\| TargetPointerWidth == 64) &&`
			`"NVPTX only supports 32- and 64-bit modes.");`

			`TLSSupported = false;`
			`AddrSpaceMap = &NVPTXAddrSpaceMap;`
			`UseAddrSpaceMapMangling = true;`

			`// Define available target features`
			`// These must be defined in sorted order!`
			`NoAsmVariants = true;`
			`GPU = CudaArch::SM_20;`

			`if (TargetPointerWidth == 32)`
			`resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");`
			`else`
			`resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64");`

			`// If possible, get a TargetInfo for our host triple, so we can match its`
			`// types.`
			`llvm::Triple HostTriple(Opts.HostTriple);`
			`if (!HostTriple.isNVPTX())`
			`HostTarget.reset(AllocateTarget(llvm::Triple(Opts.HostTriple), Opts));`

			`// If no host target, make some guesses about the data layout and return.`
			`if (!HostTarget) {`
			`LongWidth = LongAlign = TargetPointerWidth;`
			`PointerWidth = PointerAlign = TargetPointerWidth;`
			`switch (TargetPointerWidth) {`
			`case 32:`
			`SizeType = TargetInfo::UnsignedInt;`
			`PtrDiffType = TargetInfo::SignedInt;`
			`IntPtrType = TargetInfo::SignedInt;`
			`break;`
			`case 64:`
			`SizeType = TargetInfo::UnsignedLong;`
			`PtrDiffType = TargetInfo::SignedLong;`
			`IntPtrType = TargetInfo::SignedLong;`
			`break;`
			`default:`
			`llvm_unreachable("TargetPointerWidth must be 32 or 64");`
			`}`
			`return;`
			`}`

			`// Copy properties from host target.`
			`PointerWidth = HostTarget->getPointerWidth(/* AddrSpace = */ 0);`
			`PointerAlign = HostTarget->getPointerAlign(/* AddrSpace = */ 0);`
			`BoolWidth = HostTarget->getBoolWidth();`
			`BoolAlign = HostTarget->getBoolAlign();`
			`IntWidth = HostTarget->getIntWidth();`
			`IntAlign = HostTarget->getIntAlign();`
			`HalfWidth = HostTarget->getHalfWidth();`
			`HalfAlign = HostTarget->getHalfAlign();`
			`FloatWidth = HostTarget->getFloatWidth();`
			`FloatAlign = HostTarget->getFloatAlign();`
			`DoubleWidth = HostTarget->getDoubleWidth();`
			`DoubleAlign = HostTarget->getDoubleAlign();`
			`LongWidth = HostTarget->getLongWidth();`
			`LongAlign = HostTarget->getLongAlign();`
			`LongLongWidth = HostTarget->getLongLongWidth();`
			`LongLongAlign = HostTarget->getLongLongAlign();`
			`MinGlobalAlign = HostTarget->getMinGlobalAlign();`
			`NewAlign = HostTarget->getNewAlign();`
			`DefaultAlignForAttributeAligned =`
			`HostTarget->getDefaultAlignForAttributeAligned();`
			`SizeType = HostTarget->getSizeType();`
			`IntMaxType = HostTarget->getIntMaxType();`
			`PtrDiffType = HostTarget->getPtrDiffType(/* AddrSpace = */ 0);`
			`IntPtrType = HostTarget->getIntPtrType();`
			`WCharType = HostTarget->getWCharType();`
			`WIntType = HostTarget->getWIntType();`
			`Char16Type = HostTarget->getChar16Type();`
			`Char32Type = HostTarget->getChar32Type();`
			`Int64Type = HostTarget->getInt64Type();`
			`SigAtomicType = HostTarget->getSigAtomicType();`
			`ProcessIDType = HostTarget->getProcessIDType();`

			`UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();`
			`UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment();`
			`UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();`
			`ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();`

			`// This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and`
			`// we need those macros to be identical on host and device, because (among`
			`// other things) they affect which standard library classes are defined, and`
			`// we need all classes to be defined on both the host and device.`
			`MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth();`

			`// Properties intentionally not copied from host:`
			`// - LargeArrayMinWidth, LargeArrayAlign: Not visible across the`
			`// host/device boundary.`
			`// - SuitableAlign: Not visible across the host/device boundary, and may`
			`// correctly be different on host/device, e.g. if host has wider vector`
			`// types than device.`
			`// - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same`
			`// as its double type, but that's not necessarily true on the host.`
			`// TODO: nvcc emits a warning when using long double on device; we should`
			`// do the same.`
			`}`

			`ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const {`
			`return llvm::makeArrayRef(GCCRegNames);`
			`}`

			`bool NVPTXTargetInfo::hasFeature(StringRef Feature) const {`
			`return llvm::StringSwitch<bool>(Feature)`
			`.Cases("ptx", "nvptx", true)`
			`.Case("satom", GPU >= CudaArch::SM_60) // Atomics w/ scope.`
			`.Default(false);`
			`}`

			`void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,`
			`MacroBuilder &Builder) const {`
			`Builder.defineMacro("__PTX__");`
			`Builder.defineMacro("__NVPTX__");`
			`if (Opts.CUDAIsDevice) {`
			`// Set __CUDA_ARCH__ for the GPU specified.`
			`std::string CUDAArchCode = [this] {`
			`switch (GPU) {`
			`case CudaArch::UNKNOWN:`
			`assert(false && "No GPU arch when compiling CUDA device code.");`
			`return "";`
			`case CudaArch::SM_20:`
			`return "200";`
			`case CudaArch::SM_21:`
			`return "210";`
			`case CudaArch::SM_30:`
			`return "300";`
			`case CudaArch::SM_32:`
			`return "320";`
			`case CudaArch::SM_35:`
			`return "350";`
			`case CudaArch::SM_37:`
			`return "370";`
			`case CudaArch::SM_50:`
			`return "500";`
			`case CudaArch::SM_52:`
			`return "520";`
			`case CudaArch::SM_53:`
			`return "530";`
			`case CudaArch::SM_60:`
			`return "600";`
			`case CudaArch::SM_61:`
			`return "610";`
			`case CudaArch::SM_62:`
			`return "620";`
[CUDA] Added rudimentary support for CUDA-9 and sm_70. For now CUDA-9 is not included in the list of CUDA versions clang searches for, so the path to CUDA-9 must be explicitly passed via --cuda-path=. On LLVM side NVPTX added sm_70 GPU type which bumps required PTX version to 6.0, but otherwise is equivalent to sm_62 at the moment. Differential Revision: https://reviews.llvm.org/D37576 llvm-svn: 312734 2017-09-08 02:14:32 +08:00			`case CudaArch::SM_70:`
			`return "700";`
Break up Targets.cpp into a header/impl pair per target type[NFCI] Targets.cpp is getting unwieldy, and even minor changes cause the entire thing to cause recompilation for everyone. This patch bites the bullet and breaks it up into a number of files. I tended to keep function definitions in the class declaration unless it caused additional includes to be necessary. In those cases, I pulled it over into the .cpp file. Content is copy/paste for the most part, besides includes/format/etc. Differential Revision: https://reviews.llvm.org/D35701 llvm-svn: 308791 2017-07-22 06:37:03 +08:00			`}`
			`llvm_unreachable("unhandled CudaArch");`
			`}();`
			`Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);`
			`}`
			`}`

			`ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const {`
			`return llvm::makeArrayRef(BuiltinInfo, clang::NVPTX::LastTSBuiltin -`
			`Builtin::FirstTSBuiltin);`
			`}`