llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeature...

//===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file This pass adds target attributes to functions which use intrinsics
/// which will impact calling convention lowering.
//
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"

#define DEBUG_TYPE "amdgpu-annotate-kernel-features"

using namespace llvm;

namespace {

class AMDGPUAnnotateKernelFeatures : public ModulePass {
private:
  static bool hasAddrSpaceCast(const Function &F);

  void addAttrToCallers(Function *Intrin, StringRef AttrName);
  bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);

public:
  static char ID;

  AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { }
  bool runOnModule(Module &M) override;
  StringRef getPassName() const override {
    return "AMDGPU Annotate Kernel Features";
  }

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.setPreservesAll();
    ModulePass::getAnalysisUsage(AU);
  }

  static bool visitConstantExpr(const ConstantExpr *CE);
  static bool visitConstantExprsRecursively(
    const Constant *EntryC,
    SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
};

}

char AMDGPUAnnotateKernelFeatures::ID = 0;

char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;

INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
                "Add AMDGPU function attributes", false, false)


// The queue ptr is only needed when casting to flat, not from it.
static bool castRequiresQueuePtr(unsigned SrcAS) {
  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
}

static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
  return castRequiresQueuePtr(ASC->getSrcAddressSpace());
}

bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
  if (CE->getOpcode() == Instruction::AddrSpaceCast) {
    unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
    return castRequiresQueuePtr(SrcAS);
  }

  return false;
}

bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
  const Constant *EntryC,
  SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {

  if (!ConstantExprVisited.insert(EntryC).second)
    return false;

  SmallVector<const Constant *, 16> Stack;
  Stack.push_back(EntryC);

  while (!Stack.empty()) {
    const Constant *C = Stack.pop_back_val();

    // Check this constant expression.
    if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
      if (visitConstantExpr(CE))
        return true;
    }

    // Visit all sub-expressions.
    for (const Use &U : C->operands()) {
      const auto *OpC = dyn_cast<Constant>(U);
      if (!OpC)
        continue;

      if (!ConstantExprVisited.insert(OpC).second)
        continue;

      Stack.push_back(OpC);
    }
  }

  return false;
}

// Return true if an addrspacecast is used that requires the queue ptr.
bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
  SmallPtrSet<const Constant *, 8> ConstantExprVisited;

  for (const BasicBlock &BB : F) {
    for (const Instruction &I : BB) {
      if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
        if (castRequiresQueuePtr(ASC))
          return true;
      }

      for (const Use &U : I.operands()) {
        const auto *OpC = dyn_cast<Constant>(U);
        if (!OpC)
          continue;

        if (visitConstantExprsRecursively(OpC, ConstantExprVisited))
          return true;
      }
    }
  }

  return false;
}

void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin,
                                                    StringRef AttrName) {
  SmallPtrSet<Function *, 4> SeenFuncs;

  for (User *U : Intrin->users()) {
    // CallInst is the only valid user for an intrinsic.
    CallInst *CI = cast<CallInst>(U);

    Function *CallingFunction = CI->getParent()->getParent();
    if (SeenFuncs.insert(CallingFunction).second)
      CallingFunction->addFnAttr(AttrName);
  }
}

bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
  Module &M,
  ArrayRef<StringRef[2]> IntrinsicToAttr) {
  bool Changed = false;

  for (const StringRef *Arr  : IntrinsicToAttr) {
    if (Function *Fn = M.getFunction(Arr[0])) {
      addAttrToCallers(Fn, Arr[1]);
      Changed = true;
    }
  }

  return Changed;
}

bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
  Triple TT(M.getTargetTriple());

  static const StringRef IntrinsicToAttr[][2] = {
    // .x omitted
    { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" },
    { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" },

    { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" },
    { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" },

    { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" },
    { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" },

    // .x omitted
    { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" },
    { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" }
  };

  static const StringRef HSAIntrinsicToAttr[][2] = {
    { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
    { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" },
    { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" }
  };

  // TODO: We should not add the attributes if the known compile time workgroup
  // size is 1 for y/z.

  // TODO: Intrinsics that require queue ptr.

  // We do not need to note the x workitem or workgroup id because they are
  // always initialized.

  bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr);
  if (TT.getOS() == Triple::AMDHSA || TT.getOS() == Triple::Mesa3D) {
    Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr);

    for (Function &F : M) {
      if (F.hasFnAttribute("amdgpu-queue-ptr"))
        continue;

      if (hasAddrSpaceCast(F))
        F.addFnAttr("amdgpu-queue-ptr");
    }
  }

  return Changed;
}

ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
  return new AMDGPUAnnotateKernelFeatures();
}
AMDGPU: Add pass to detect used kernel features Mark kernels that use certain features that require user SGPRs to support with kernel attributes. We need to know before instruction selection begins because it impacts the kernel calling convention lowering. For now this only detects the workitem intrinsics. llvm-svn: 252323 2015-11-07 02:01:57 +08:00			`//===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`/// \file This pass adds target attributes to functions which use intrinsics`
			`/// which will impact calling convention lowering.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "AMDGPU.h"`
AMDGPU: Prune includes llvm-svn: 278391 2016-08-12 03:18:50 +08:00			`#include "llvm/ADT/Triple.h"`
AMDGPU: Fix constantexpr addrspacecasts If we had a constant group address space cast the queue pointer wasn't enabled for the function, resulting in a crash on noreg later. llvm-svn: 271935 2016-06-07 04:03:31 +08:00			`#include "llvm/IR/Constants.h"`
AMDGPU: Add pass to detect used kernel features Mark kernels that use certain features that require user SGPRs to support with kernel attributes. We need to know before instruction selection begins because it impacts the kernel calling convention lowering. For now this only detects the workitem intrinsics. llvm-svn: 252323 2015-11-07 02:01:57 +08:00			`#include "llvm/IR/Instructions.h"`
			`#include "llvm/IR/Module.h"`

			`#define DEBUG_TYPE "amdgpu-annotate-kernel-features"`

			`using namespace llvm;`

			`namespace {`

			`class AMDGPUAnnotateKernelFeatures : public ModulePass {`
			`private:`
AMDGPU: Implement addrspacecast llvm-svn: 267452 2016-04-26 03:27:24 +08:00			`static bool hasAddrSpaceCast(const Function &F);`

AMDGPU: Add pass to detect used kernel features Mark kernels that use certain features that require user SGPRs to support with kernel attributes. We need to know before instruction selection begins because it impacts the kernel calling convention lowering. For now this only detects the workitem intrinsics. llvm-svn: 252323 2015-11-07 02:01:57 +08:00			`void addAttrToCallers(Function *Intrin, StringRef AttrName);`
			`bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);`

			`public:`
			`static char ID;`

			`AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { }`
			`bool runOnModule(Module &M) override;`
Use StringRef in Pass/PassManager APIs (NFC) llvm-svn: 283004 2016-10-01 10:56:57 +08:00			`StringRef getPassName() const override {`
AMDGPU: Add pass to detect used kernel features Mark kernels that use certain features that require user SGPRs to support with kernel attributes. We need to know before instruction selection begins because it impacts the kernel calling convention lowering. For now this only detects the workitem intrinsics. llvm-svn: 252323 2015-11-07 02:01:57 +08:00			`return "AMDGPU Annotate Kernel Features";`
			`}`

			`void getAnalysisUsage(AnalysisUsage &AU) const override {`
			`AU.setPreservesAll();`
			`ModulePass::getAnalysisUsage(AU);`
			`}`
AMDGPU: Fix constantexpr addrspacecasts If we had a constant group address space cast the queue pointer wasn't enabled for the function, resulting in a crash on noreg later. llvm-svn: 271935 2016-06-07 04:03:31 +08:00
			`static bool visitConstantExpr(const ConstantExpr *CE);`
			`static bool visitConstantExprsRecursively(`
			`const Constant *EntryC,`
			`SmallPtrSet<const Constant *, 8> &ConstantExprVisited);`
AMDGPU: Add pass to detect used kernel features Mark kernels that use certain features that require user SGPRs to support with kernel attributes. We need to know before instruction selection begins because it impacts the kernel calling convention lowering. For now this only detects the workitem intrinsics. llvm-svn: 252323 2015-11-07 02:01:57 +08:00			`};`

			`}`

			`char AMDGPUAnnotateKernelFeatures::ID = 0;`

			`char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;`

AMDGPU: Implement addrspacecast llvm-svn: 267452 2016-04-26 03:27:24 +08:00			`INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,`
			`"Add AMDGPU function attributes", false, false)`

AMDGPU: Add pass to detect used kernel features Mark kernels that use certain features that require user SGPRs to support with kernel attributes. We need to know before instruction selection begins because it impacts the kernel calling convention lowering. For now this only detects the workitem intrinsics. llvm-svn: 252323 2015-11-07 02:01:57 +08:00
AMDGPU: Fix constantexpr addrspacecasts If we had a constant group address space cast the queue pointer wasn't enabled for the function, resulting in a crash on noreg later. llvm-svn: 271935 2016-06-07 04:03:31 +08:00			`// The queue ptr is only needed when casting to flat, not from it.`
			`static bool castRequiresQueuePtr(unsigned SrcAS) {`
AMDGPU: Implement addrspacecast llvm-svn: 267452 2016-04-26 03:27:24 +08:00			`return SrcAS == AMDGPUAS::LOCAL_ADDRESS \|\| SrcAS == AMDGPUAS::PRIVATE_ADDRESS;`
			`}`
AMDGPU: Add pass to detect used kernel features Mark kernels that use certain features that require user SGPRs to support with kernel attributes. We need to know before instruction selection begins because it impacts the kernel calling convention lowering. For now this only detects the workitem intrinsics. llvm-svn: 252323 2015-11-07 02:01:57 +08:00
AMDGPU: Fix constantexpr addrspacecasts If we had a constant group address space cast the queue pointer wasn't enabled for the function, resulting in a crash on noreg later. llvm-svn: 271935 2016-06-07 04:03:31 +08:00			`static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {`
			`return castRequiresQueuePtr(ASC->getSrcAddressSpace());`
			`}`

			`bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {`
			`if (CE->getOpcode() == Instruction::AddrSpaceCast) {`
			`unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();`
			`return castRequiresQueuePtr(SrcAS);`
			`}`

			`return false;`
			`}`

			`bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(`
			`const Constant *EntryC,`
			`SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {`

			`if (!ConstantExprVisited.insert(EntryC).second)`
			`return false;`

			`SmallVector<const Constant *, 16> Stack;`
			`Stack.push_back(EntryC);`

			`while (!Stack.empty()) {`
			`const Constant *C = Stack.pop_back_val();`

			`// Check this constant expression.`
			`if (const auto *CE = dyn_cast<ConstantExpr>(C)) {`
			`if (visitConstantExpr(CE))`
			`return true;`
			`}`

			`// Visit all sub-expressions.`
			`for (const Use &U : C->operands()) {`
			`const auto *OpC = dyn_cast<Constant>(U);`
			`if (!OpC)`
			`continue;`

			`if (!ConstantExprVisited.insert(OpC).second)`
			`continue;`

			`Stack.push_back(OpC);`
			`}`
			`}`

			`return false;`
			`}`

AMDGPU: Implement addrspacecast llvm-svn: 267452 2016-04-26 03:27:24 +08:00			`// Return true if an addrspacecast is used that requires the queue ptr.`
			`bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {`
AMDGPU: Fix constantexpr addrspacecasts If we had a constant group address space cast the queue pointer wasn't enabled for the function, resulting in a crash on noreg later. llvm-svn: 271935 2016-06-07 04:03:31 +08:00			`SmallPtrSet<const Constant *, 8> ConstantExprVisited;`

AMDGPU: Implement addrspacecast llvm-svn: 267452 2016-04-26 03:27:24 +08:00			`for (const BasicBlock &BB : F) {`
			`for (const Instruction &I : BB) {`
			`if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {`
			`if (castRequiresQueuePtr(ASC))`
			`return true;`
			`}`
AMDGPU: Fix constantexpr addrspacecasts If we had a constant group address space cast the queue pointer wasn't enabled for the function, resulting in a crash on noreg later. llvm-svn: 271935 2016-06-07 04:03:31 +08:00
			`for (const Use &U : I.operands()) {`
			`const auto *OpC = dyn_cast<Constant>(U);`
			`if (!OpC)`
			`continue;`

			`if (visitConstantExprsRecursively(OpC, ConstantExprVisited))`
			`return true;`
			`}`
AMDGPU: Implement addrspacecast llvm-svn: 267452 2016-04-26 03:27:24 +08:00			`}`
			`}`

			`return false;`
			`}`
AMDGPU: Add pass to detect used kernel features Mark kernels that use certain features that require user SGPRs to support with kernel attributes. We need to know before instruction selection begins because it impacts the kernel calling convention lowering. For now this only detects the workitem intrinsics. llvm-svn: 252323 2015-11-07 02:01:57 +08:00
			`void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin,`
			`StringRef AttrName) {`
			`SmallPtrSet<Function *, 4> SeenFuncs;`

			`for (User *U : Intrin->users()) {`
			`// CallInst is the only valid user for an intrinsic.`
			`CallInst *CI = cast<CallInst>(U);`

			`Function *CallingFunction = CI->getParent()->getParent();`
			`if (SeenFuncs.insert(CallingFunction).second)`
			`CallingFunction->addFnAttr(AttrName);`
			`}`
			`}`

			`bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(`
			`Module &M,`
			`ArrayRef<StringRef[2]> IntrinsicToAttr) {`
			`bool Changed = false;`

			`for (const StringRef *Arr : IntrinsicToAttr) {`
			`if (Function *Fn = M.getFunction(Arr[0])) {`
			`addAttrToCallers(Fn, Arr[1]);`
			`Changed = true;`
			`}`
			`}`

			`return Changed;`
			`}`

			`bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {`
			`Triple TT(M.getTargetTriple());`

			`static const StringRef IntrinsicToAttr[][2] = {`
			`// .x omitted`
AMDGPU: Add new amdgcn workitem intrinsics These use the correct prefix and follow the HSA naming convention rather than the config register option names. llvm-svn: 259293 2016-01-30 12:25:19 +08:00			`{ "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" },`
			`{ "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" },`

			`{ "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" },`
			`{ "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" },`

AMDGPU: Add pass to detect used kernel features Mark kernels that use certain features that require user SGPRs to support with kernel attributes. We need to know before instruction selection begins because it impacts the kernel calling convention lowering. For now this only detects the workitem intrinsics. llvm-svn: 252323 2015-11-07 02:01:57 +08:00			`{ "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" },`
			`{ "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" },`

			`// .x omitted`
			`{ "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" },`
			`{ "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" }`
			`};`

			`static const StringRef HSAIntrinsicToAttr[][2] = {`
AMDGPU: Add queue ptr intrinsic llvm-svn: 267451 2016-04-26 03:27:18 +08:00			`{ "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },`
AMDGPU: Add HSA dispatch id intrinsic llvm-svn: 276437 2016-07-23 01:01:30 +08:00			`{ "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" },`
			`{ "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" }`
AMDGPU: Add pass to detect used kernel features Mark kernels that use certain features that require user SGPRs to support with kernel attributes. We need to know before instruction selection begins because it impacts the kernel calling convention lowering. For now this only detects the workitem intrinsics. llvm-svn: 252323 2015-11-07 02:01:57 +08:00			`};`

AMDGPU: Stop checking intrinsics not used by HSA for dispatch-ptr Only the dispatch.ptr intrinsic is supposed to be used now to get the workgroup size, and the read.local.size intrinsics do not work correctly. llvm-svn: 259296 2016-01-30 13:10:59 +08:00			`// TODO: We should not add the attributes if the known compile time workgroup`
			`// size is 1 for y/z.`

AMDGPU: Add pass to detect used kernel features Mark kernels that use certain features that require user SGPRs to support with kernel attributes. We need to know before instruction selection begins because it impacts the kernel calling convention lowering. For now this only detects the workitem intrinsics. llvm-svn: 252323 2015-11-07 02:01:57 +08:00			`// TODO: Intrinsics that require queue ptr.`

			`// We do not need to note the x workitem or workgroup id because they are`
			`// always initialized.`

			`bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr);`
AMDGPU/SI: Add support for triples with the mesa3d operating system Summary: mesa3d will use the same kernel calling convention as amdhsa, but it will handle everything else like the default 'unknown' OS type. Reviewers: arsenm Subscribers: arsenm, llvm-commits, kzhuravl Differential Revision: https://reviews.llvm.org/D22783 llvm-svn: 281779 2016-09-17 05:34:26 +08:00			`if (TT.getOS() == Triple::AMDHSA \|\| TT.getOS() == Triple::Mesa3D) {`
AMDGPU: Add pass to detect used kernel features Mark kernels that use certain features that require user SGPRs to support with kernel attributes. We need to know before instruction selection begins because it impacts the kernel calling convention lowering. For now this only detects the workitem intrinsics. llvm-svn: 252323 2015-11-07 02:01:57 +08:00			`Changed \|= addAttrsForIntrinsics(M, HSAIntrinsicToAttr);`

AMDGPU: Implement addrspacecast llvm-svn: 267452 2016-04-26 03:27:24 +08:00			`for (Function &F : M) {`
			`if (F.hasFnAttribute("amdgpu-queue-ptr"))`
			`continue;`

			`if (hasAddrSpaceCast(F))`
			`F.addFnAttr("amdgpu-queue-ptr");`
			`}`
			`}`

AMDGPU: Add pass to detect used kernel features Mark kernels that use certain features that require user SGPRs to support with kernel attributes. We need to know before instruction selection begins because it impacts the kernel calling convention lowering. For now this only detects the workitem intrinsics. llvm-svn: 252323 2015-11-07 02:01:57 +08:00			`return Changed;`
			`}`

			`ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {`
			`return new AMDGPUAnnotateKernelFeatures();`
			`}`