2017-02-14 08:33:36 +08:00
|
|
|
//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
|
2015-06-27 05:15:07 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
|
|
|
|
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
|
|
|
|
|
2017-03-27 22:04:01 +08:00
|
|
|
#include "AMDGPU.h"
|
2015-06-27 05:15:07 +08:00
|
|
|
#include "AMDKernelCodeT.h"
|
2016-12-10 08:39:12 +08:00
|
|
|
#include "SIDefines.h"
|
2017-02-14 08:33:36 +08:00
|
|
|
#include "llvm/ADT/StringRef.h"
|
|
|
|
#include "llvm/IR/CallingConv.h"
|
|
|
|
#include "llvm/MC/MCInstrDesc.h"
|
|
|
|
#include "llvm/Support/Compiler.h"
|
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
#include <cstdint>
|
|
|
|
#include <utility>
|
2016-12-10 08:39:12 +08:00
|
|
|
|
2015-06-27 05:15:07 +08:00
|
|
|
namespace llvm {
|
|
|
|
|
|
|
|
class FeatureBitset;
|
2015-12-16 00:26:16 +08:00
|
|
|
class Function;
|
2015-12-03 01:00:42 +08:00
|
|
|
class GlobalValue;
|
2017-01-28 02:41:14 +08:00
|
|
|
class MachineMemOperand;
|
2015-09-26 05:41:28 +08:00
|
|
|
class MCContext;
|
2016-10-20 01:40:36 +08:00
|
|
|
class MCRegisterClass;
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
class MCRegisterInfo;
|
2015-09-26 05:41:28 +08:00
|
|
|
class MCSection;
|
2015-12-22 02:44:27 +08:00
|
|
|
class MCSubtargetInfo;
|
2017-02-14 08:33:36 +08:00
|
|
|
class Triple;
|
2015-06-27 05:15:07 +08:00
|
|
|
|
|
|
|
namespace AMDGPU {
|
2017-02-08 22:05:23 +08:00
|
|
|
namespace IsaInfo {
|
2016-10-07 22:46:06 +08:00
|
|
|
|
2017-02-08 22:05:23 +08:00
|
|
|
enum {
|
|
|
|
// The closed Vulkan driver sets 96, which limits the wave count to 8 but
|
|
|
|
// doesn't spill SGPRs as much as when 80 is set.
|
|
|
|
FIXED_NUM_SGPRS_FOR_INIT_BUG = 96
|
|
|
|
};
|
|
|
|
|
|
|
|
/// \brief Instruction set architecture version.
|
2015-06-27 05:15:07 +08:00
|
|
|
struct IsaVersion {
|
|
|
|
unsigned Major;
|
|
|
|
unsigned Minor;
|
|
|
|
unsigned Stepping;
|
|
|
|
};
|
|
|
|
|
2017-02-08 22:05:23 +08:00
|
|
|
/// \returns Isa version for given subtarget \p Features.
|
2015-06-27 05:15:07 +08:00
|
|
|
IsaVersion getIsaVersion(const FeatureBitset &Features);
|
2017-02-08 22:05:23 +08:00
|
|
|
|
|
|
|
/// \returns Wavefront size for given subtarget \p Features.
|
|
|
|
unsigned getWavefrontSize(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns Local memory size in bytes for given subtarget \p Features.
|
|
|
|
unsigned getLocalMemorySize(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns Number of execution units per compute unit for given subtarget \p
|
|
|
|
/// Features.
|
|
|
|
unsigned getEUsPerCU(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns Maximum number of work groups per compute unit for given subtarget
|
|
|
|
/// \p Features and limited by given \p FlatWorkGroupSize.
|
|
|
|
unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
|
|
|
|
unsigned FlatWorkGroupSize);
|
|
|
|
|
|
|
|
/// \returns Maximum number of waves per compute unit for given subtarget \p
|
|
|
|
/// Features without any kind of limitation.
|
|
|
|
unsigned getMaxWavesPerCU(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns Maximum number of waves per compute unit for given subtarget \p
|
|
|
|
/// Features and limited by given \p FlatWorkGroupSize.
|
|
|
|
unsigned getMaxWavesPerCU(const FeatureBitset &Features,
|
|
|
|
unsigned FlatWorkGroupSize);
|
|
|
|
|
|
|
|
/// \returns Minimum number of waves per execution unit for given subtarget \p
|
|
|
|
/// Features.
|
|
|
|
unsigned getMinWavesPerEU(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns Maximum number of waves per execution unit for given subtarget \p
|
|
|
|
/// Features without any kind of limitation.
|
|
|
|
unsigned getMaxWavesPerEU(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns Maximum number of waves per execution unit for given subtarget \p
|
|
|
|
/// Features and limited by given \p FlatWorkGroupSize.
|
|
|
|
unsigned getMaxWavesPerEU(const FeatureBitset &Features,
|
|
|
|
unsigned FlatWorkGroupSize);
|
|
|
|
|
|
|
|
/// \returns Minimum flat work group size for given subtarget \p Features.
|
|
|
|
unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns Maximum flat work group size for given subtarget \p Features.
|
|
|
|
unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns Number of waves per work group for given subtarget \p Features and
|
|
|
|
/// limited by given \p FlatWorkGroupSize.
|
|
|
|
unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
|
|
|
|
unsigned FlatWorkGroupSize);
|
|
|
|
|
|
|
|
/// \returns SGPR allocation granularity for given subtarget \p Features.
|
|
|
|
unsigned getSGPRAllocGranule(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns SGPR encoding granularity for given subtarget \p Features.
|
|
|
|
unsigned getSGPREncodingGranule(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns Total number of SGPRs for given subtarget \p Features.
|
|
|
|
unsigned getTotalNumSGPRs(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns Addressable number of SGPRs for given subtarget \p Features.
|
|
|
|
unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns Minimum number of SGPRs that meets the given number of waves per
|
|
|
|
/// execution unit requirement for given subtarget \p Features.
|
|
|
|
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
|
|
|
|
|
|
|
|
/// \returns Maximum number of SGPRs that meets the given number of waves per
|
|
|
|
/// execution unit requirement for given subtarget \p Features.
|
|
|
|
unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
|
|
|
|
bool Addressable);
|
|
|
|
|
|
|
|
/// \returns VGPR allocation granularity for given subtarget \p Features.
|
|
|
|
unsigned getVGPRAllocGranule(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns VGPR encoding granularity for given subtarget \p Features.
|
|
|
|
unsigned getVGPREncodingGranule(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns Total number of VGPRs for given subtarget \p Features.
|
|
|
|
unsigned getTotalNumVGPRs(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns Addressable number of VGPRs for given subtarget \p Features.
|
|
|
|
unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
|
|
|
|
|
|
|
|
/// \returns Minimum number of VGPRs that meets given number of waves per
|
|
|
|
/// execution unit requirement for given subtarget \p Features.
|
|
|
|
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
|
|
|
|
|
|
|
|
/// \returns Maximum number of VGPRs that meets given number of waves per
|
|
|
|
/// execution unit requirement for given subtarget \p Features.
|
|
|
|
unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
|
|
|
|
|
2017-02-14 08:33:36 +08:00
|
|
|
} // end namespace IsaInfo
|
2017-02-08 22:05:23 +08:00
|
|
|
|
|
|
|
LLVM_READONLY
|
|
|
|
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
|
|
|
|
|
2015-06-27 05:58:31 +08:00
|
|
|
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
|
|
|
|
const FeatureBitset &Features);
|
2015-09-26 05:41:28 +08:00
|
|
|
MCSection *getHSATextSection(MCContext &Ctx);
|
2015-06-27 05:15:07 +08:00
|
|
|
|
2015-12-03 03:47:57 +08:00
|
|
|
MCSection *getHSADataGlobalAgentSection(MCContext &Ctx);
|
|
|
|
|
|
|
|
MCSection *getHSADataGlobalProgramSection(MCContext &Ctx);
|
|
|
|
|
2015-12-03 11:34:32 +08:00
|
|
|
MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
|
|
|
|
|
2017-03-27 22:04:01 +08:00
|
|
|
bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS);
|
|
|
|
bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS);
|
|
|
|
bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS);
|
2015-12-03 01:00:42 +08:00
|
|
|
|
2016-10-21 02:12:38 +08:00
|
|
|
/// \returns True if constants should be emitted to .text section for given
|
|
|
|
/// target triple \p TT, false otherwise.
|
|
|
|
bool shouldEmitConstantsToTextSection(const Triple &TT);
|
|
|
|
|
2016-09-07 04:22:28 +08:00
|
|
|
/// \returns Integer value requested using \p F's \p Name attribute.
|
|
|
|
///
|
|
|
|
/// \returns \p Default if attribute is not present.
|
|
|
|
///
|
|
|
|
/// \returns \p Default and emits error if requested value cannot be converted
|
|
|
|
/// to integer.
|
2016-05-12 10:45:18 +08:00
|
|
|
int getIntegerAttribute(const Function &F, StringRef Name, int Default);
|
|
|
|
|
2016-09-07 04:22:28 +08:00
|
|
|
/// \returns A pair of integer values requested using \p F's \p Name attribute
|
|
|
|
/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
|
|
|
|
/// is false).
|
|
|
|
///
|
|
|
|
/// \returns \p Default if attribute is not present.
|
|
|
|
///
|
|
|
|
/// \returns \p Default and emits error if one of the requested values cannot be
|
|
|
|
/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
|
|
|
|
/// not present.
|
|
|
|
std::pair<int, int> getIntegerPairAttribute(const Function &F,
|
|
|
|
StringRef Name,
|
|
|
|
std::pair<int, int> Default,
|
|
|
|
bool OnlyFirstRequired = false);
|
|
|
|
|
2016-10-12 02:58:22 +08:00
|
|
|
/// \returns Vmcnt bit mask for given isa \p Version.
|
2017-02-08 22:05:23 +08:00
|
|
|
unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
|
2016-10-01 01:01:40 +08:00
|
|
|
|
2016-10-12 02:58:22 +08:00
|
|
|
/// \returns Expcnt bit mask for given isa \p Version.
|
2017-02-08 22:05:23 +08:00
|
|
|
unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
|
2016-10-01 01:01:40 +08:00
|
|
|
|
2016-10-12 02:58:22 +08:00
|
|
|
/// \returns Lgkmcnt bit mask for given isa \p Version.
|
2017-02-08 22:05:23 +08:00
|
|
|
unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
|
|
|
|
|
|
|
|
/// \returns Waitcnt bit mask for given isa \p Version.
|
|
|
|
unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
|
2016-10-01 01:01:40 +08:00
|
|
|
|
2016-10-12 02:58:22 +08:00
|
|
|
/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
|
2017-02-08 22:05:23 +08:00
|
|
|
unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
|
2016-10-01 01:01:40 +08:00
|
|
|
|
2016-10-12 02:58:22 +08:00
|
|
|
/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
|
2017-02-08 22:05:23 +08:00
|
|
|
unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
|
2016-10-12 02:58:22 +08:00
|
|
|
|
|
|
|
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
|
2017-02-08 22:05:23 +08:00
|
|
|
unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
|
2016-10-12 02:58:22 +08:00
|
|
|
|
|
|
|
/// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
|
|
|
|
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
|
|
|
|
/// \p Lgkmcnt respectively.
|
|
|
|
///
|
|
|
|
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
|
2017-02-19 02:29:53 +08:00
|
|
|
/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
|
|
|
|
/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
|
2016-10-12 02:58:22 +08:00
|
|
|
/// \p Expcnt = \p Waitcnt[6:4]
|
|
|
|
/// \p Lgkmcnt = \p Waitcnt[11:8]
|
2017-02-08 22:05:23 +08:00
|
|
|
void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
2016-10-12 02:58:22 +08:00
|
|
|
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
|
|
|
|
|
|
|
|
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
|
2017-02-08 22:05:23 +08:00
|
|
|
unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
|
|
|
unsigned Vmcnt);
|
2016-10-12 02:58:22 +08:00
|
|
|
|
|
|
|
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
|
2017-02-08 22:05:23 +08:00
|
|
|
unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
|
|
|
unsigned Expcnt);
|
2016-10-12 02:58:22 +08:00
|
|
|
|
|
|
|
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
|
2017-02-08 22:05:23 +08:00
|
|
|
unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
|
|
|
|
unsigned Lgkmcnt);
|
2016-10-12 02:58:22 +08:00
|
|
|
|
|
|
|
/// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
|
|
|
|
/// \p Version.
|
|
|
|
///
|
|
|
|
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
|
2017-02-19 02:29:53 +08:00
|
|
|
/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
|
|
|
|
/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
|
|
|
|
/// Waitcnt[6:4] = \p Expcnt
|
|
|
|
/// Waitcnt[11:8] = \p Lgkmcnt
|
|
|
|
/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
|
2016-10-12 02:58:22 +08:00
|
|
|
///
|
|
|
|
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
|
|
|
|
/// isa \p Version.
|
2017-02-08 22:05:23 +08:00
|
|
|
unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
|
2016-10-12 02:58:22 +08:00
|
|
|
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
|
2016-10-01 01:01:40 +08:00
|
|
|
|
2016-01-13 19:45:36 +08:00
|
|
|
unsigned getInitialPSInputAddr(const Function &F);
|
|
|
|
|
2017-04-12 06:29:24 +08:00
|
|
|
LLVM_READNONE
|
|
|
|
bool isShader(CallingConv::ID CC);
|
|
|
|
|
|
|
|
LLVM_READNONE
|
|
|
|
bool isCompute(CallingConv::ID CC);
|
|
|
|
|
|
|
|
LLVM_READNONE
|
|
|
|
bool isEntryFunctionCC(CallingConv::ID CC);
|
|
|
|
|
2017-04-12 06:29:28 +08:00
|
|
|
// FIXME: Remove this when calling conventions cleaned up
|
|
|
|
LLVM_READNONE
|
|
|
|
inline bool isKernel(CallingConv::ID CC) {
|
|
|
|
switch (CC) {
|
|
|
|
case CallingConv::AMDGPU_KERNEL:
|
|
|
|
case CallingConv::SPIR_KERNEL:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2015-12-16 00:26:16 +08:00
|
|
|
|
2015-12-22 02:44:27 +08:00
|
|
|
bool isSI(const MCSubtargetInfo &STI);
|
|
|
|
bool isCI(const MCSubtargetInfo &STI);
|
|
|
|
bool isVI(const MCSubtargetInfo &STI);
|
|
|
|
|
|
|
|
/// If \p Reg is a pseudo reg, return the correct hardware register given
|
|
|
|
/// \p STI otherwise return \p Reg.
|
|
|
|
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
|
|
|
|
|
2017-03-03 22:31:06 +08:00
|
|
|
/// \brief Convert hardware register \p Reg to a pseudo register
|
|
|
|
LLVM_READNONE
|
|
|
|
unsigned mc2PseudoReg(unsigned Reg);
|
|
|
|
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
/// \brief Can this operand also contain immediate values?
|
|
|
|
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
|
|
|
|
|
|
|
|
/// \brief Is this floating-point operand?
|
|
|
|
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
|
|
|
|
|
|
|
|
/// \brief Does this opearnd support only inlinable literals?
|
|
|
|
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
|
|
|
|
|
2016-10-28 07:05:31 +08:00
|
|
|
/// \brief Get the size in bits of a register from the register class \p RC.
|
|
|
|
unsigned getRegBitWidth(unsigned RCID);
|
|
|
|
|
2016-10-20 01:40:36 +08:00
|
|
|
/// \brief Get the size in bits of a register from the register class \p RC.
|
|
|
|
unsigned getRegBitWidth(const MCRegisterClass &RC);
|
|
|
|
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
/// \brief Get size of register operand
|
|
|
|
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
|
|
|
|
unsigned OpNo);
|
|
|
|
|
2016-12-10 08:39:12 +08:00
|
|
|
LLVM_READNONE
|
|
|
|
inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
|
|
|
|
switch (OpInfo.OperandType) {
|
|
|
|
case AMDGPU::OPERAND_REG_IMM_INT32:
|
|
|
|
case AMDGPU::OPERAND_REG_IMM_FP32:
|
|
|
|
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
|
|
|
|
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
|
|
|
|
return 4;
|
|
|
|
|
|
|
|
case AMDGPU::OPERAND_REG_IMM_INT64:
|
|
|
|
case AMDGPU::OPERAND_REG_IMM_FP64:
|
|
|
|
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
|
|
|
|
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
|
|
|
|
return 8;
|
|
|
|
|
|
|
|
case AMDGPU::OPERAND_REG_IMM_INT16:
|
|
|
|
case AMDGPU::OPERAND_REG_IMM_FP16:
|
|
|
|
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
|
|
|
|
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
|
2017-02-28 02:49:11 +08:00
|
|
|
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
|
|
|
|
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
|
2016-12-10 08:39:12 +08:00
|
|
|
return 2;
|
|
|
|
|
|
|
|
default:
|
|
|
|
llvm_unreachable("unhandled operand type");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
LLVM_READNONE
|
|
|
|
inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
|
|
|
|
return getOperandSize(Desc.OpInfo[OpNo]);
|
|
|
|
}
|
|
|
|
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
/// \brief Is this literal inlinable
|
2016-12-06 06:26:17 +08:00
|
|
|
LLVM_READNONE
|
|
|
|
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
|
|
|
|
|
|
|
|
LLVM_READNONE
|
|
|
|
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
|
|
|
|
|
2016-12-10 08:39:12 +08:00
|
|
|
LLVM_READNONE
|
|
|
|
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
|
2017-02-28 02:49:11 +08:00
|
|
|
LLVM_READNONE
|
|
|
|
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
|
|
|
|
|
2017-01-28 02:41:14 +08:00
|
|
|
bool isUniformMMO(const MachineMemOperand *MMO);
|
|
|
|
|
|
|
|
/// \returns The encoding that will be used for \p ByteOffset in the SMRD
|
|
|
|
/// offset field.
|
|
|
|
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
|
|
|
|
|
|
|
|
/// \returns true if this offset is small enough to fit in the SMRD
|
|
|
|
/// offset field. \p ByteOffset should be the offset in bytes and
|
|
|
|
/// not the encoded offset.
|
|
|
|
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
|
|
|
|
|
2015-06-27 05:15:07 +08:00
|
|
|
} // end namespace AMDGPU
|
|
|
|
} // end namespace llvm
|
|
|
|
|
2017-02-14 08:33:36 +08:00
|
|
|
#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
|