llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp

//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief Implementation of the TargetInstrInfo class that is common to all
/// AMD GPUs.
//
//===----------------------------------------------------------------------===//

#include "AMDGPUInstrInfo.h"
#include "AMDGPURegisterInfo.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"

using namespace llvm;

#define GET_INSTRINFO_CTOR_DTOR
#define GET_INSTRMAP_INFO
#include "AMDGPUGenInstrInfo.inc"

// Pin the vtable to this file.
void AMDGPUInstrInfo::anchor() {}

AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
  : AMDGPUGenInstrInfo(-1, -1), ST(ST) {}

// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
// the first 16 loads will be interleaved with the stores, and the next 16 will
// be clustered as expected. It should really split into 2 16 store batches.
//
// Loads are clustered until this returns false, rather than trying to schedule
// groups of stores. This also means we have to deal with saying different
// address space loads should be clustered, and ones which might cause bank
// conflicts.
//
// This might be deprecated so it might not be worth that much effort to fix.
bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
                                              int64_t Offset0, int64_t Offset1,
                                              unsigned NumLoads) const {
  assert(Offset1 > Offset0 &&
         "Second offset should be larger than first offset!");
  // If we have less than 16 loads in a row, and the offsets are within 64
  // bytes, then schedule together.

  // A cacheline is 64 bytes (for global memory).
  return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
}

int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
  switch (Channels) {
  default: return Opcode;
  case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
  case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
  case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
  }
}

// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
enum SIEncodingFamily {
  SI = 0,
  VI = 1
};

// Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
// header files, so we need to wrap it in a function that takes unsigned
// instead.
namespace llvm {
namespace AMDGPU {
static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
  return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
}
}
}

static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
  switch (ST.getGeneration()) {
  case AMDGPUSubtarget::SOUTHERN_ISLANDS:
  case AMDGPUSubtarget::SEA_ISLANDS:
    return SIEncodingFamily::SI;
  case AMDGPUSubtarget::VOLCANIC_ISLANDS:
    return SIEncodingFamily::VI;

  // FIXME: This should never be called for r600 GPUs.
  case AMDGPUSubtarget::R600:
  case AMDGPUSubtarget::R700:
  case AMDGPUSubtarget::EVERGREEN:
  case AMDGPUSubtarget::NORTHERN_ISLANDS:
    return SIEncodingFamily::SI;
  }

  llvm_unreachable("Unknown subtarget generation!");
}

int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
  int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST));

  // -1 means that Opcode is already a native instruction.
  if (MCOp == -1)
    return Opcode;

  // (uint16_t)-1 means that Opcode is a pseudo instruction that has
  // no encoding in the given subtarget generation.
  if (MCOp == (uint16_t)-1)
    return -1;

  return MCOp;
}
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00			`//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`/// \file`
			`/// \brief Implementation of the TargetInstrInfo class that is common to all`
			`/// AMD GPUs.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#include "AMDGPUInstrInfo.h"`
			`#include "AMDGPURegisterInfo.h"`
			`#include "AMDGPUTargetMachine.h"`
			`#include "llvm/CodeGen/MachineFrameInfo.h"`
			`#include "llvm/CodeGen/MachineInstrBuilder.h"`
			`#include "llvm/CodeGen/MachineRegisterInfo.h"`

[cleanup] Lift using directives, DEBUG_TYPE definitions, and even some system headers above the includes of generated '.inc' files that actually contain code. In a few targets this was already done pretty consistently, but it wasn't done really consistently anywhere. It is strictly cleaner IMO and necessary in a bunch of places where the DEBUG_TYPE is referenced from the generated code. Consistency with the necessary places trumps. Hopefully the build bots are OK with the movement of intrin.h... llvm-svn: 206838 2014-04-22 10:03:14 +08:00			`using namespace llvm;`

[weak vtables] Remove a bunch of weak vtables This patch removes most of the trivial cases of weak vtables by pinning them to a single object file. The memory leaks in this version have been fixed. Thanks Alexey for pointing them out. Differential Revision: http://llvm-reviews.chandlerc.com/D2068 Reviewed by Andy llvm-svn: 195064 2013-11-19 08:57:56 +08:00			`#define GET_INSTRINFO_CTOR_DTOR`
R600/SI: add VOP mapping functions Make it possible to map between e32 and e64 encoding opcodes. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> llvm-svn: 176104 2013-02-27 01:52:42 +08:00			`#define GET_INSTRMAP_INFO`
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00			`#include "AMDGPUGenInstrInfo.inc"`

[weak vtables] Remove a bunch of weak vtables This patch removes most of the trivial cases of weak vtables by pinning them to a single object file. The memory leaks in this version have been fixed. Thanks Alexey for pointing them out. Differential Revision: http://llvm-reviews.chandlerc.com/D2068 Reviewed by Andy llvm-svn: 195064 2013-11-19 08:57:56 +08:00			`// Pin the vtable to this file.`
			`void AMDGPUInstrInfo::anchor() {}`

AMDGPU: Cleanup subtarget handling. Split AMDGPUSubtarget into amdgcn/r600 specific subclasses. This removes most of the static_casting of the basic codegen classes everywhere, and tries to restrict the features visible on the wrong target. llvm-svn: 273652 2016-06-24 14:30:11 +08:00			`AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)`
			`: AMDGPUGenInstrInfo(-1, -1), ST(ST) {}`
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00
R600: Increase nearby load scheduling threshold. This partially fixes weird looking load scheduling in memcpy test. The load clustering doesn't seem particularly smart, but this method seems to be partially deprecated so it might not be worth trying to fix. llvm-svn: 214943 2014-08-06 08:29:49 +08:00			`// FIXME: This behaves strangely. If, for example, you have 32 load + stores,`
			`// the first 16 loads will be interleaved with the stores, and the next 16 will`
			`// be clustered as expected. It should really split into 2 16 store batches.`
			`//`
			`// Loads are clustered until this returns false, rather than trying to schedule`
			`// groups of stores. This also means we have to deal with saying different`
			`// address space loads should be clustered, and ones which might cause bank`
			`// conflicts.`
			`//`
			`// This might be deprecated so it might not be worth that much effort to fix.`
			`bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode Load0, SDNode Load1,`
			`int64_t Offset0, int64_t Offset1,`
			`unsigned NumLoads) const {`
			`assert(Offset1 > Offset0 &&`
			`"Second offset should be larger than first offset!");`
			`// If we have less than 16 loads in a row, and the offsets are within 64`
			`// bytes, then schedule together.`

			`// A cacheline is 64 bytes (for global memory).`
			`return (NumLoads <= 16 && (Offset1 - Offset0) < 64);`
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00			`}`

R600/SI: Define a separate MIMG instruction for each possible output value type During instruction selection, we rewrite the destination register class for MIMG instructions based on their writemasks. This creates machine verifier errors since the new register class does not match the register class in the MIMG instruction definition. We can avoid this by defining different MIMG instructions for each possible destination type and then switching to the correct instruction when we change the register class. llvm-svn: 192365 2013-10-11 01:11:24 +08:00			`int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {`
			`switch (Channels) {`
			`default: return Opcode;`
			`case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);`
			`case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);`
			`case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);`
			`}`
			`}`
R600/SI: Refactor the VOP3_32 tablegen class This will allow us to use a single MachineInstr to represent instructions which behave the same but have different encodings on some subtargets. llvm-svn: 209028 2014-05-17 04:56:47 +08:00
AMDGPU: Cleanup subtarget handling. Split AMDGPUSubtarget into amdgcn/r600 specific subclasses. This removes most of the static_casting of the basic codegen classes everywhere, and tries to restrict the features visible on the wrong target. llvm-svn: 273652 2016-06-24 14:30:11 +08:00			`// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td`
			`enum SIEncodingFamily {`
			`SI = 0,`
			`VI = 1`
			`};`

R600/SI: Refactor the VOP3_32 tablegen class This will allow us to use a single MachineInstr to represent instructions which behave the same but have different encodings on some subtargets. llvm-svn: 209028 2014-05-17 04:56:47 +08:00			`// Wrapper for Tablegen'd function. enum Subtarget is not defined in any`
R600: Remove dead code llvm-svn: 219242 2014-10-08 05:29:56 +08:00			`// header files, so we need to wrap it in a function that takes unsigned`
R600/SI: Refactor the VOP3_32 tablegen class This will allow us to use a single MachineInstr to represent instructions which behave the same but have different encodings on some subtargets. llvm-svn: 209028 2014-05-17 04:56:47 +08:00			`// instead.`
			`namespace llvm {`
			`namespace AMDGPU {`
R600/SI: Don't shrink instructions whose e32 encoding doesn't exist v2: modify hasVALU32BitEncoding instead v3: - add pseudoToMCOpcode helper to AMDGPUInstInfo, which is used by both hasVALU32BitEncoding and AMDGPUMCInstLower::lower - report an error if a pseudo can't be lowered llvm-svn: 226188 2015-01-16 02:42:51 +08:00			`static int getMCOpcode(uint16_t Opcode, unsigned Gen) {`
AMDGPU: Cleanup subtarget handling. Split AMDGPUSubtarget into amdgcn/r600 specific subclasses. This removes most of the static_casting of the basic codegen classes everywhere, and tries to restrict the features visible on the wrong target. llvm-svn: 273652 2016-06-24 14:30:11 +08:00			`return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));`
R600/SI: Refactor the VOP3_32 tablegen class This will allow us to use a single MachineInstr to represent instructions which behave the same but have different encodings on some subtargets. llvm-svn: 209028 2014-05-17 04:56:47 +08:00			`}`
			`}`
			`}`
R600/SI: Don't shrink instructions whose e32 encoding doesn't exist v2: modify hasVALU32BitEncoding instead v3: - add pseudoToMCOpcode helper to AMDGPUInstInfo, which is used by both hasVALU32BitEncoding and AMDGPUMCInstLower::lower - report an error if a pseudo can't be lowered llvm-svn: 226188 2015-01-16 02:42:51 +08:00
AMDGPU: Cleanup subtarget handling. Split AMDGPUSubtarget into amdgcn/r600 specific subclasses. This removes most of the static_casting of the basic codegen classes everywhere, and tries to restrict the features visible on the wrong target. llvm-svn: 273652 2016-06-24 14:30:11 +08:00			`static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {`
			`switch (ST.getGeneration()) {`
			`case AMDGPUSubtarget::SOUTHERN_ISLANDS:`
			`case AMDGPUSubtarget::SEA_ISLANDS:`
			`return SIEncodingFamily::SI;`
R600/SI: Don't shrink instructions whose e32 encoding doesn't exist v2: modify hasVALU32BitEncoding instead v3: - add pseudoToMCOpcode helper to AMDGPUInstInfo, which is used by both hasVALU32BitEncoding and AMDGPUMCInstLower::lower - report an error if a pseudo can't be lowered llvm-svn: 226188 2015-01-16 02:42:51 +08:00			`case AMDGPUSubtarget::VOLCANIC_ISLANDS:`
AMDGPU: Cleanup subtarget handling. Split AMDGPUSubtarget into amdgcn/r600 specific subclasses. This removes most of the static_casting of the basic codegen classes everywhere, and tries to restrict the features visible on the wrong target. llvm-svn: 273652 2016-06-24 14:30:11 +08:00			`return SIEncodingFamily::VI;`

			`// FIXME: This should never be called for r600 GPUs.`
			`case AMDGPUSubtarget::R600:`
			`case AMDGPUSubtarget::R700:`
			`case AMDGPUSubtarget::EVERGREEN:`
			`case AMDGPUSubtarget::NORTHERN_ISLANDS:`
			`return SIEncodingFamily::SI;`
R600/SI: Don't shrink instructions whose e32 encoding doesn't exist v2: modify hasVALU32BitEncoding instead v3: - add pseudoToMCOpcode helper to AMDGPUInstInfo, which is used by both hasVALU32BitEncoding and AMDGPUMCInstLower::lower - report an error if a pseudo can't be lowered llvm-svn: 226188 2015-01-16 02:42:51 +08:00			`}`
Fix "not all control paths return a value" warning on MSVC llvm-svn: 273872 2016-06-27 20:58:10 +08:00
			`llvm_unreachable("Unknown subtarget generation!");`
R600/SI: Don't shrink instructions whose e32 encoding doesn't exist v2: modify hasVALU32BitEncoding instead v3: - add pseudoToMCOpcode helper to AMDGPUInstInfo, which is used by both hasVALU32BitEncoding and AMDGPUMCInstLower::lower - report an error if a pseudo can't be lowered llvm-svn: 226188 2015-01-16 02:42:51 +08:00			`}`

			`int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {`
AMDGPU: Cleanup subtarget handling. Split AMDGPUSubtarget into amdgcn/r600 specific subclasses. This removes most of the static_casting of the basic codegen classes everywhere, and tries to restrict the features visible on the wrong target. llvm-svn: 273652 2016-06-24 14:30:11 +08:00			`int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST));`
R600/SI: Don't shrink instructions whose e32 encoding doesn't exist v2: modify hasVALU32BitEncoding instead v3: - add pseudoToMCOpcode helper to AMDGPUInstInfo, which is used by both hasVALU32BitEncoding and AMDGPUMCInstLower::lower - report an error if a pseudo can't be lowered llvm-svn: 226188 2015-01-16 02:42:51 +08:00
			`// -1 means that Opcode is already a native instruction.`
			`if (MCOp == -1)`
			`return Opcode;`

			`// (uint16_t)-1 means that Opcode is a pseudo instruction that has`
			`// no encoding in the given subtarget generation.`
			`if (MCOp == (uint16_t)-1)`
			`return -1;`

			`return MCOp;`
			`}`