llvm-project/clang/lib/Basic/Targets/NVPTX.h

//===--- NVPTX.h - Declare NVPTX target feature support ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file declares NVPTX TargetInfo objects.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_NVPTX_H
#define LLVM_CLANG_LIB_BASIC_TARGETS_NVPTX_H

#include "clang/Basic/Cuda.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Compiler.h"

namespace clang {
namespace targets {

static const unsigned NVPTXAddrSpaceMap[] = {
    0, // Default
    1, // opencl_global
    3, // opencl_local
    4, // opencl_constant
    0, // opencl_private
    // FIXME: generic has to be added to the target
    0, // opencl_generic
    1, // cuda_device
    4, // cuda_constant
    3, // cuda_shared
    0, // ptr32_sptr
    0, // ptr32_uptr
    0  // ptr64
};

/// The DWARF address class. Taken from
/// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
static const int NVPTXDWARFAddrSpaceMap[] = {
    -1, // Default, opencl_private or opencl_generic - not defined
    5,  // opencl_global
    -1,
    8,  // opencl_local or cuda_shared
    4,  // opencl_constant or cuda_constant
};

class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
  static const char *const GCCRegNames[];
  static const Builtin::Info BuiltinInfo[];
  CudaArch GPU;
  uint32_t PTXVersion;
  std::unique_ptr<TargetInfo> HostTarget;

public:
  NVPTXTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts,
                  unsigned TargetPointerWidth);

  void getTargetDefines(const LangOptions &Opts,
                        MacroBuilder &Builder) const override;

  ArrayRef<Builtin::Info> getTargetBuiltins() const override;

  bool
  initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
                 StringRef CPU,
                 const std::vector<std::string> &FeaturesVec) const override {
    Features[CudaArchToString(GPU)] = true;
    Features["ptx" + std::to_string(PTXVersion)] = true;
    return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
  }

  bool hasFeature(StringRef Feature) const override;

  ArrayRef<const char *> getGCCRegNames() const override;

  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
    // No aliases.
    return None;
  }

  bool validateAsmConstraint(const char *&Name,
                             TargetInfo::ConstraintInfo &Info) const override {
    switch (*Name) {
    default:
      return false;
    case 'c':
    case 'h':
    case 'r':
    case 'l':
    case 'f':
    case 'd':
      Info.setAllowsRegister();
      return true;
    }
  }

  const char *getClobbers() const override {
    // FIXME: Is this really right?
    return "";
  }

  BuiltinVaListKind getBuiltinVaListKind() const override {
    // FIXME: implement
    return TargetInfo::CharPtrBuiltinVaList;
  }

  bool isValidCPUName(StringRef Name) const override {
    return StringToCudaArch(Name) != CudaArch::UNKNOWN;
  }

  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override {
    for (int i = static_cast<int>(CudaArch::SM_20);
         i < static_cast<int>(CudaArch::LAST); ++i)
      Values.emplace_back(CudaArchToString(static_cast<CudaArch>(i)));
  }

  bool setCPU(const std::string &Name) override {
    GPU = StringToCudaArch(Name);
    return GPU != CudaArch::UNKNOWN;
  }

  void setSupportedOpenCLOpts() override {
    auto &Opts = getSupportedOpenCLOpts();
    Opts.support("cl_clang_storage_class_specifiers");
    Opts.support("cl_khr_gl_sharing");
    Opts.support("cl_khr_icd");

    Opts.support("cl_khr_fp64");
    Opts.support("cl_khr_byte_addressable_store");
    Opts.support("cl_khr_global_int32_base_atomics");
    Opts.support("cl_khr_global_int32_extended_atomics");
    Opts.support("cl_khr_local_int32_base_atomics");
    Opts.support("cl_khr_local_int32_extended_atomics");
  }

  /// \returns If a target requires an address within a target specific address
  /// space \p AddressSpace to be converted in order to be used, then return the
  /// corresponding target specific DWARF address space.
  ///
  /// \returns Otherwise return None and no conversion will be emitted in the
  /// DWARF.
  Optional<unsigned>
  getDWARFAddressSpace(unsigned AddressSpace) const override {
    if (AddressSpace >= llvm::array_lengthof(NVPTXDWARFAddrSpaceMap) ||
        NVPTXDWARFAddrSpaceMap[AddressSpace] < 0)
      return llvm::None;
    return NVPTXDWARFAddrSpaceMap[AddressSpace];
  }

  CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
    // CUDA compilations support all of the host's calling conventions.
    //
    // TODO: We should warn if you apply a non-default CC to anything other than
    // a host function.
    if (HostTarget)
      return HostTarget->checkCallingConvention(CC);
    return CCCR_Warning;
  }
};
} // namespace targets
} // namespace clang
#endif // LLVM_CLANG_LIB_BASIC_TARGETS_NVPTX_H
Break up Targets.cpp into a header/impl pair per target type[NFCI] Targets.cpp is getting unwieldy, and even minor changes cause the entire thing to cause recompilation for everyone. This patch bites the bullet and breaks it up into a number of files. I tended to keep function definitions in the class declaration unless it caused additional includes to be necessary. In those cases, I pulled it over into the .cpp file. Content is copy/paste for the most part, besides includes/format/etc. Differential Revision: https://reviews.llvm.org/D35701 llvm-svn: 308791 2017-07-22 06:37:03 +08:00			`//===--- NVPTX.h - Declare NVPTX target feature support ---------- C++ --===//`
			`//`
Update the file headers across all of the LLVM projects in the monorepo to reflect the new license. We understand that people may be surprised that we're moving the header entirely to discuss the new license. We checked this carefully with the Foundation's lawyer and we believe this is the correct approach. Essentially, all code in the project is now made available by the LLVM project under our new license, so you will see that the license headers include that license only. Some of our contributors have contributed code under our old license, and accordingly, we have retained a copy of our old license notice in the top-level files in each project and repository. llvm-svn: 351636 2019-01-19 16:50:56 +08:00			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
Break up Targets.cpp into a header/impl pair per target type[NFCI] Targets.cpp is getting unwieldy, and even minor changes cause the entire thing to cause recompilation for everyone. This patch bites the bullet and breaks it up into a number of files. I tended to keep function definitions in the class declaration unless it caused additional includes to be necessary. In those cases, I pulled it over into the .cpp file. Content is copy/paste for the most part, besides includes/format/etc. Differential Revision: https://reviews.llvm.org/D35701 llvm-svn: 308791 2017-07-22 06:37:03 +08:00			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// This file declares NVPTX TargetInfo objects.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_NVPTX_H`
			`#define LLVM_CLANG_LIB_BASIC_TARGETS_NVPTX_H`

			`#include "clang/Basic/Cuda.h"`
			`#include "clang/Basic/TargetInfo.h"`
			`#include "clang/Basic/TargetOptions.h"`
			`#include "llvm/ADT/Triple.h"`
			`#include "llvm/Support/Compiler.h"`

			`namespace clang {`
			`namespace targets {`

			`static const unsigned NVPTXAddrSpaceMap[] = {`
			`0, // Default`
			`1, // opencl_global`
			`3, // opencl_local`
			`4, // opencl_constant`
[OpenCL] Add LangAS::opencl_private to represent private address space in AST Currently Clang uses default address space (0) to represent private address space for OpenCL in AST. There are two issues with this: Multiple address spaces including private address space cannot be diagnosed. There is no mangling for default address space. For example, if private int* is emitted as i32 addrspace(5)* in IR. It is supposed to be mangled as PUAS5i but it is mangled as Pi instead. This patch attempts to represent OpenCL private address space explicitly in AST. It adds a new enum LangAS::opencl_private and adds it to the variable types which are implicitly private: automatic variables without address space qualifier function parameter pointee type without address space qualifier (OpenCL 1.2 and below) Differential Revision: https://reviews.llvm.org/D35082 llvm-svn: 315668 2017-10-13 11:37:48 +08:00			`0, // opencl_private`
Break up Targets.cpp into a header/impl pair per target type[NFCI] Targets.cpp is getting unwieldy, and even minor changes cause the entire thing to cause recompilation for everyone. This patch bites the bullet and breaks it up into a number of files. I tended to keep function definitions in the class declaration unless it caused additional includes to be necessary. In those cases, I pulled it over into the .cpp file. Content is copy/paste for the most part, besides includes/format/etc. Differential Revision: https://reviews.llvm.org/D35701 llvm-svn: 308791 2017-07-22 06:37:03 +08:00			`// FIXME: generic has to be added to the target`
			`0, // opencl_generic`
			`1, // cuda_device`
			`4, // cuda_constant`
			`3, // cuda_shared`
Add support for the MS qualifiers __ptr32, __ptr64, __sptr, __uptr. Summary: This adds parsing of the qualifiers __ptr32, __ptr64, __sptr, and __uptr and lowers them to the corresponding address space pointer for 32-bit and 64-bit pointers. (32/64-bit pointers added in https://reviews.llvm.org/D69639) A large part of this patch is making these pointers ignore the address space when doing things like overloading and casting. https://bugs.llvm.org/show_bug.cgi?id=42359 Reviewers: rnk, rsmith Subscribers: jholewinski, jvesely, nhaehnle, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D71039 2019-10-25 07:34:25 +08:00			`0, // ptr32_sptr`
			`0, // ptr32_uptr`
			`0 // ptr64`
Break up Targets.cpp into a header/impl pair per target type[NFCI] Targets.cpp is getting unwieldy, and even minor changes cause the entire thing to cause recompilation for everyone. This patch bites the bullet and breaks it up into a number of files. I tended to keep function definitions in the class declaration unless it caused additional includes to be necessary. In those cases, I pulled it over into the .cpp file. Content is copy/paste for the most part, besides includes/format/etc. Differential Revision: https://reviews.llvm.org/D35701 llvm-svn: 308791 2017-07-22 06:37:03 +08:00			`};`

[DEBUG_INFO][NVPTX] Generate correct data about variable address class. Summary: Added ability to generate correct debug info data about the variable address class. Currently, for all the locals and globals the default values are used, ADDR_local_space(6) for locals and ADDR_global_space(5) for globals. The values are taken from the table in https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf. We need to emit correct data for address classes of, at least, shared and constant globals. Currently, all these variables are treated by the cuda-gdb debugger as the variables in the global address space and, thus, it require manual data type casting. Reviewers: echristo, probinson Subscribers: jholewinski, aprantl, cfe-commits Differential Revision: https://reviews.llvm.org/D57162 llvm-svn: 353204 2019-02-06 03:45:57 +08:00			`/// The DWARF address class. Taken from`
			`/// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf`
			`static const int NVPTXDWARFAddrSpaceMap[] = {`
			`-1, // Default, opencl_private or opencl_generic - not defined`
			`5, // opencl_global`
			`-1,`
			`8, // opencl_local or cuda_shared`
			`4, // opencl_constant or cuda_constant`
			`};`

Break up Targets.cpp into a header/impl pair per target type[NFCI] Targets.cpp is getting unwieldy, and even minor changes cause the entire thing to cause recompilation for everyone. This patch bites the bullet and breaks it up into a number of files. I tended to keep function definitions in the class declaration unless it caused additional includes to be necessary. In those cases, I pulled it over into the .cpp file. Content is copy/paste for the most part, besides includes/format/etc. Differential Revision: https://reviews.llvm.org/D35701 llvm-svn: 308791 2017-07-22 06:37:03 +08:00			`class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {`
			`static const char *const GCCRegNames[];`
			`static const Builtin::Info BuiltinInfo[];`
			`CudaArch GPU;`
[NVPTX, CUDA] Improved feature constraints on NVPTX target builtins. When NVPTX TARGET_BUILTIN specifies sm_XX or ptxYY as required feature, consider those features available if we're compiling for GPU >= sm_XX or have enabled PTX version >= ptxYY. Differential Revision: https://reviews.llvm.org/D45061 llvm-svn: 329829 2018-04-12 01:51:19 +08:00			`uint32_t PTXVersion;`
Break up Targets.cpp into a header/impl pair per target type[NFCI] Targets.cpp is getting unwieldy, and even minor changes cause the entire thing to cause recompilation for everyone. This patch bites the bullet and breaks it up into a number of files. I tended to keep function definitions in the class declaration unless it caused additional includes to be necessary. In those cases, I pulled it over into the .cpp file. Content is copy/paste for the most part, besides includes/format/etc. Differential Revision: https://reviews.llvm.org/D35701 llvm-svn: 308791 2017-07-22 06:37:03 +08:00			`std::unique_ptr<TargetInfo> HostTarget;`

			`public:`
			`NVPTXTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts,`
			`unsigned TargetPointerWidth);`

			`void getTargetDefines(const LangOptions &Opts,`
			`MacroBuilder &Builder) const override;`

			`ArrayRef<Builtin::Info> getTargetBuiltins() const override;`

			`bool`
			`initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,`
			`StringRef CPU,`
			`const std::vector<std::string> &FeaturesVec) const override {`
[NVPTX, CUDA] Improved feature constraints on NVPTX target builtins. When NVPTX TARGET_BUILTIN specifies sm_XX or ptxYY as required feature, consider those features available if we're compiling for GPU >= sm_XX or have enabled PTX version >= ptxYY. Differential Revision: https://reviews.llvm.org/D45061 llvm-svn: 329829 2018-04-12 01:51:19 +08:00			`Features[CudaArchToString(GPU)] = true;`
			`Features["ptx" + std::to_string(PTXVersion)] = true;`
Break up Targets.cpp into a header/impl pair per target type[NFCI] Targets.cpp is getting unwieldy, and even minor changes cause the entire thing to cause recompilation for everyone. This patch bites the bullet and breaks it up into a number of files. I tended to keep function definitions in the class declaration unless it caused additional includes to be necessary. In those cases, I pulled it over into the .cpp file. Content is copy/paste for the most part, besides includes/format/etc. Differential Revision: https://reviews.llvm.org/D35701 llvm-svn: 308791 2017-07-22 06:37:03 +08:00			`return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);`
			`}`

			`bool hasFeature(StringRef Feature) const override;`

			`ArrayRef<const char *> getGCCRegNames() const override;`

			`ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {`
			`// No aliases.`
			`return None;`
			`}`

			`bool validateAsmConstraint(const char *&Name,`
			`TargetInfo::ConstraintInfo &Info) const override {`
			`switch (*Name) {`
			`default:`
			`return false;`
			`case 'c':`
			`case 'h':`
			`case 'r':`
			`case 'l':`
			`case 'f':`
			`case 'd':`
			`Info.setAllowsRegister();`
			`return true;`
			`}`
			`}`

			`const char *getClobbers() const override {`
			`// FIXME: Is this really right?`
			`return "";`
			`}`

			`BuiltinVaListKind getBuiltinVaListKind() const override {`
			`// FIXME: implement`
			`return TargetInfo::CharPtrBuiltinVaList;`
			`}`

			`bool isValidCPUName(StringRef Name) const override {`
			`return StringToCudaArch(Name) != CudaArch::UNKNOWN;`
			`}`

Add NVPTX Support to ValidCPUList (enabling march notes) A followup to: https://reviews.llvm.org/D42978 This patch adds NVPTX support for enabling the march notes. Differential Revision: https://reviews.llvm.org/D43045 llvm-svn: 324675 2018-02-09 07:16:00 +08:00			`void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override {`
			`for (int i = static_cast<int>(CudaArch::SM_20);`
			`i < static_cast<int>(CudaArch::LAST); ++i)`
			`Values.emplace_back(CudaArchToString(static_cast<CudaArch>(i)));`
			`}`

Break up Targets.cpp into a header/impl pair per target type[NFCI] Targets.cpp is getting unwieldy, and even minor changes cause the entire thing to cause recompilation for everyone. This patch bites the bullet and breaks it up into a number of files. I tended to keep function definitions in the class declaration unless it caused additional includes to be necessary. In those cases, I pulled it over into the .cpp file. Content is copy/paste for the most part, besides includes/format/etc. Differential Revision: https://reviews.llvm.org/D35701 llvm-svn: 308791 2017-07-22 06:37:03 +08:00			`bool setCPU(const std::string &Name) override {`
			`GPU = StringToCudaArch(Name);`
			`return GPU != CudaArch::UNKNOWN;`
			`}`

			`void setSupportedOpenCLOpts() override {`
			`auto &Opts = getSupportedOpenCLOpts();`
			`Opts.support("cl_clang_storage_class_specifiers");`
			`Opts.support("cl_khr_gl_sharing");`
			`Opts.support("cl_khr_icd");`

			`Opts.support("cl_khr_fp64");`
			`Opts.support("cl_khr_byte_addressable_store");`
			`Opts.support("cl_khr_global_int32_base_atomics");`
			`Opts.support("cl_khr_global_int32_extended_atomics");`
			`Opts.support("cl_khr_local_int32_base_atomics");`
			`Opts.support("cl_khr_local_int32_extended_atomics");`
			`}`

[DEBUG_INFO][NVPTX] Generate correct data about variable address class. Summary: Added ability to generate correct debug info data about the variable address class. Currently, for all the locals and globals the default values are used, ADDR_local_space(6) for locals and ADDR_global_space(5) for globals. The values are taken from the table in https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf. We need to emit correct data for address classes of, at least, shared and constant globals. Currently, all these variables are treated by the cuda-gdb debugger as the variables in the global address space and, thus, it require manual data type casting. Reviewers: echristo, probinson Subscribers: jholewinski, aprantl, cfe-commits Differential Revision: https://reviews.llvm.org/D57162 llvm-svn: 353204 2019-02-06 03:45:57 +08:00			`/// \returns If a target requires an address within a target specific address`
			`/// space \p AddressSpace to be converted in order to be used, then return the`
			`/// corresponding target specific DWARF address space.`
			`///`
			`/// \returns Otherwise return None and no conversion will be emitted in the`
			`/// DWARF.`
			`Optional<unsigned>`
			`getDWARFAddressSpace(unsigned AddressSpace) const override {`
			`if (AddressSpace >= llvm::array_lengthof(NVPTXDWARFAddrSpaceMap) \|\|`
			`NVPTXDWARFAddrSpaceMap[AddressSpace] < 0)`
			`return llvm::None;`
			`return NVPTXDWARFAddrSpaceMap[AddressSpace];`
			`}`

Break up Targets.cpp into a header/impl pair per target type[NFCI] Targets.cpp is getting unwieldy, and even minor changes cause the entire thing to cause recompilation for everyone. This patch bites the bullet and breaks it up into a number of files. I tended to keep function definitions in the class declaration unless it caused additional includes to be necessary. In those cases, I pulled it over into the .cpp file. Content is copy/paste for the most part, besides includes/format/etc. Differential Revision: https://reviews.llvm.org/D35701 llvm-svn: 308791 2017-07-22 06:37:03 +08:00			`CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {`
			`// CUDA compilations support all of the host's calling conventions.`
			`//`
			`// TODO: We should warn if you apply a non-default CC to anything other than`
			`// a host function.`
			`if (HostTarget)`
			`return HostTarget->checkCallingConvention(CC);`
			`return CCCR_Warning;`
			`}`
			`};`
			`} // namespace targets`
			`} // namespace clang`
			`#endif // LLVM_CLANG_LIB_BASIC_TARGETS_NVPTX_H`