2013-02-21 23:16:44 +08:00
|
|
|
//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===//
|
2012-12-12 05:25:42 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2012-12-12 05:25:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2019-06-14 03:18:29 +08:00
|
|
|
def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
|
|
|
|
AssemblerPredicate <"FeatureWavefrontSize32">;
|
|
|
|
def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
|
|
|
|
AssemblerPredicate <"FeatureWavefrontSize64">;
|
|
|
|
|
2015-04-24 03:33:54 +08:00
|
|
|
def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
|
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
class GCNPredicateControl : PredicateControl {
|
2019-04-06 02:24:34 +08:00
|
|
|
Predicate SIAssemblerPredicate = isGFX6GFX7;
|
2019-04-06 17:20:48 +08:00
|
|
|
Predicate VIAssemblerPredicate = isGFX8GFX9;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
}
|
|
|
|
|
2016-06-24 14:30:11 +08:00
|
|
|
// Execpt for the NONE field, this must be kept in sync with the
|
|
|
|
// SIEncodingFamily enum in AMDGPUInstrInfo.cpp
|
|
|
|
def SIEncodingFamily {
|
2014-05-17 04:56:47 +08:00
|
|
|
int NONE = -1;
|
|
|
|
int SI = 0;
|
2014-12-07 20:18:57 +08:00
|
|
|
int VI = 1;
|
2017-06-21 16:53:38 +08:00
|
|
|
int SDWA = 2;
|
|
|
|
int SDWA9 = 3;
|
2018-01-13 05:12:19 +08:00
|
|
|
int GFX80 = 4;
|
|
|
|
int GFX9 = 5;
|
2019-04-25 01:03:15 +08:00
|
|
|
int GFX10 = 6;
|
|
|
|
int SDWA10 = 7;
|
2014-05-17 04:56:47 +08:00
|
|
|
}
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// SI DAG Nodes
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2018-05-24 13:28:34 +08:00
|
|
|
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
|
|
|
|
|
[AMDGPU] Add support for multi-dword s.buffer.load intrinsic
Summary:
Patch by Marek Olsak and David Stuttard, both of AMD.
This adds a new amdgcn intrinsic supporting s.buffer.load, in particular
multiple dword variants. These are convenient to use from some front-end
implementations.
Also modified the existing llvm.SI.load.const intrinsic to common up the
underlying implementation.
This modification also requires that we can lower to non-uniform loads correctly
by splitting larger dword variants into sizes supported by the non-uniform
versions of the load.
V2: Addressed minor review comments.
V3: i1 glc is now i32 cachepolicy for consistency with buffer and
tbuffer intrinsics, plus fixed formatting issue.
V4: Added glc test.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D51098
Change-Id: I83a6e00681158bb243591a94a51c7baa445f169b
llvm-svn: 340684
2018-08-25 22:53:17 +08:00
|
|
|
def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
|
2019-06-17 01:14:12 +08:00
|
|
|
SDTypeProfile<1, 4, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>,
|
|
|
|
SDTCisVT<4, i1>]>,
|
[AMDGPU] Add support for multi-dword s.buffer.load intrinsic
Summary:
Patch by Marek Olsak and David Stuttard, both of AMD.
This adds a new amdgcn intrinsic supporting s.buffer.load, in particular
multiple dword variants. These are convenient to use from some front-end
implementations.
Also modified the existing llvm.SI.load.const intrinsic to common up the
underlying implementation.
This modification also requires that we can lower to non-uniform loads correctly
by splitting larger dword variants into sizes supported by the non-uniform
versions of the load.
V2: Addressed minor review comments.
V3: i1 glc is now i32 cachepolicy for consistency with buffer and
tbuffer intrinsics, plus fixed formatting issue.
V4: Added glc test.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D51098
Change-Id: I83a6e00681158bb243591a94a51c7baa445f169b
llvm-svn: 340684
2018-08-25 22:53:17 +08:00
|
|
|
[SDNPMayLoad, SDNPMemOperand]
|
2013-08-15 07:24:45 +08:00
|
|
|
>;
|
|
|
|
|
2019-01-16 23:43:53 +08:00
|
|
|
def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
|
|
|
|
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
|
|
|
|
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
|
|
|
|
>;
|
|
|
|
|
2016-04-12 22:05:04 +08:00
|
|
|
def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
|
|
|
|
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
|
|
|
|
>;
|
|
|
|
|
|
|
|
def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2,
|
|
|
|
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
|
|
|
|
>;
|
|
|
|
|
2018-01-17 22:05:05 +08:00
|
|
|
def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
|
|
|
|
SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
|
|
|
|
]>;
|
|
|
|
|
|
|
|
def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32,
|
|
|
|
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
|
|
|
|
>;
|
|
|
|
|
|
|
|
def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32,
|
|
|
|
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
|
|
|
|
>;
|
|
|
|
|
2019-03-09 04:58:11 +08:00
|
|
|
// load_d16_{lo|hi} ptr, tied_input
|
|
|
|
def SIload_d16 : SDTypeProfile<1, 2, [
|
|
|
|
SDTCisPtrTy<1>,
|
|
|
|
SDTCisSameAs<0, 2>
|
|
|
|
]>;
|
|
|
|
|
|
|
|
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
def SDTtbuffer_load : SDTypeProfile<1, 8,
|
2018-01-13 05:12:19 +08:00
|
|
|
[ // vdata
|
|
|
|
SDTCisVT<1, v4i32>, // rsrc
|
|
|
|
SDTCisVT<2, i32>, // vindex(VGPR)
|
|
|
|
SDTCisVT<3, i32>, // voffset(VGPR)
|
|
|
|
SDTCisVT<4, i32>, // soffset(SGPR)
|
|
|
|
SDTCisVT<5, i32>, // offset(imm)
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
SDTCisVT<6, i32>, // format(imm)
|
|
|
|
SDTCisVT<7, i32>, // cachecontrol(imm)
|
|
|
|
SDTCisVT<8, i1> // idxen(imm)
|
2018-01-13 05:12:19 +08:00
|
|
|
]>;
|
|
|
|
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load,
|
2018-01-13 05:12:19 +08:00
|
|
|
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
|
|
|
|
def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16",
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
SDTtbuffer_load,
|
2018-01-13 05:12:19 +08:00
|
|
|
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
|
2013-09-12 10:55:14 +08:00
|
|
|
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
def SDTtbuffer_store : SDTypeProfile<0, 9,
|
2017-06-23 00:29:22 +08:00
|
|
|
[ // vdata
|
|
|
|
SDTCisVT<1, v4i32>, // rsrc
|
|
|
|
SDTCisVT<2, i32>, // vindex(VGPR)
|
|
|
|
SDTCisVT<3, i32>, // voffset(VGPR)
|
|
|
|
SDTCisVT<4, i32>, // soffset(SGPR)
|
|
|
|
SDTCisVT<5, i32>, // offset(imm)
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
SDTCisVT<6, i32>, // format(imm)
|
|
|
|
SDTCisVT<7, i32>, // cachecontrol(imm)
|
|
|
|
SDTCisVT<8, i1> // idxen(imm)
|
2017-06-23 00:29:22 +08:00
|
|
|
]>;
|
|
|
|
|
|
|
|
def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
|
|
|
|
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
|
2018-01-13 05:12:19 +08:00
|
|
|
def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16",
|
|
|
|
SDTtbuffer_store,
|
|
|
|
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
|
2017-06-23 00:29:22 +08:00
|
|
|
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
def SDTBufferLoad : SDTypeProfile<1, 7,
|
2016-12-21 01:19:44 +08:00
|
|
|
[ // vdata
|
|
|
|
SDTCisVT<1, v4i32>, // rsrc
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
SDTCisVT<2, i32>, // vindex(VGPR)
|
|
|
|
SDTCisVT<3, i32>, // voffset(VGPR)
|
|
|
|
SDTCisVT<4, i32>, // soffset(SGPR)
|
|
|
|
SDTCisVT<5, i32>, // offset(imm)
|
|
|
|
SDTCisVT<6, i32>, // cachepolicy(imm)
|
|
|
|
SDTCisVT<7, i1>]>; // idxen(imm)
|
2016-12-21 01:19:44 +08:00
|
|
|
|
|
|
|
def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
|
|
|
|
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
|
[AMDGPU] Add buffer/load 8/16 bit overloaded intrinsics
Summary:
Add buffer store/load 8/16 overloaded intrinsics for buffer, raw_buffer and struct_buffer
Change-Id: I166a29f071b2ff4e4683fb0392564b1f223ac61d
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59265
llvm-svn: 356465
2019-03-20 00:07:00 +08:00
|
|
|
def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad,
|
|
|
|
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
|
|
|
|
def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad,
|
|
|
|
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
|
|
|
|
def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad,
|
|
|
|
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
|
|
|
|
def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad,
|
|
|
|
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
|
2016-12-21 01:19:44 +08:00
|
|
|
def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
|
|
|
|
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
|
2018-01-13 05:12:19 +08:00
|
|
|
def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16",
|
|
|
|
SDTBufferLoad,
|
|
|
|
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
|
2016-12-21 01:19:44 +08:00
|
|
|
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
def SDTBufferStore : SDTypeProfile<0, 8,
|
2017-11-09 09:52:48 +08:00
|
|
|
[ // vdata
|
|
|
|
SDTCisVT<1, v4i32>, // rsrc
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
SDTCisVT<2, i32>, // vindex(VGPR)
|
|
|
|
SDTCisVT<3, i32>, // voffset(VGPR)
|
|
|
|
SDTCisVT<4, i32>, // soffset(SGPR)
|
|
|
|
SDTCisVT<5, i32>, // offset(imm)
|
|
|
|
SDTCisVT<6, i32>, // cachepolicy(imm)
|
|
|
|
SDTCisVT<7, i1>]>; // idxen(imm)
|
2017-11-09 09:52:48 +08:00
|
|
|
|
|
|
|
def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
|
2018-01-13 05:12:19 +08:00
|
|
|
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
|
[AMDGPU] Add buffer/load 8/16 bit overloaded intrinsics
Summary:
Add buffer store/load 8/16 overloaded intrinsics for buffer, raw_buffer and struct_buffer
Change-Id: I166a29f071b2ff4e4683fb0392564b1f223ac61d
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59265
llvm-svn: 356465
2019-03-20 00:07:00 +08:00
|
|
|
def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE",
|
|
|
|
SDTBufferStore,
|
|
|
|
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
|
|
|
|
def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT",
|
|
|
|
SDTBufferStore,
|
|
|
|
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
|
2018-01-13 05:12:19 +08:00
|
|
|
def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT",
|
|
|
|
SDTBufferStore,
|
|
|
|
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
|
|
|
|
def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
|
|
|
|
SDTBufferStore,
|
|
|
|
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
|
2017-11-09 09:52:48 +08:00
|
|
|
|
|
|
|
class SDBufferAtomic<string opcode> : SDNode <opcode,
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
SDTypeProfile<1, 8,
|
[AMDGPU] Add support for 64 bit buffer atomic artihmetic instructions
Summary:
This adds support for 64 bit buffer atomic arithmetic instructions but does not include
cmpswap as that depends on a fix to the way the register pairs are handled
Change-Id: Ib207ea65fb69487ccad5066ea647ae8ddfe2ce61
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D58918
llvm-svn: 355520
2019-03-07 01:02:06 +08:00
|
|
|
[SDTCisVT<2, v4i32>, // rsrc
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
SDTCisVT<3, i32>, // vindex(VGPR)
|
|
|
|
SDTCisVT<4, i32>, // voffset(VGPR)
|
|
|
|
SDTCisVT<5, i32>, // soffset(SGPR)
|
|
|
|
SDTCisVT<6, i32>, // offset(imm)
|
|
|
|
SDTCisVT<7, i32>, // cachepolicy(imm)
|
|
|
|
SDTCisVT<8, i1>]>, // idxen(imm)
|
2017-11-09 09:52:48 +08:00
|
|
|
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
|
|
|
|
>;
|
|
|
|
|
2019-07-11 08:10:17 +08:00
|
|
|
class SDBufferAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
|
|
|
|
SDTypeProfile<0, 8,
|
|
|
|
[SDTCisVT<0, ty>, // vdata
|
|
|
|
SDTCisVT<1, v4i32>, // rsrc
|
|
|
|
SDTCisVT<2, i32>, // vindex(VGPR)
|
|
|
|
SDTCisVT<3, i32>, // voffset(VGPR)
|
|
|
|
SDTCisVT<4, i32>, // soffset(SGPR)
|
|
|
|
SDTCisVT<5, i32>, // offset(imm)
|
|
|
|
SDTCisVT<6, i32>, // cachepolicy(imm)
|
|
|
|
SDTCisVT<7, i1>]>, // idxen(imm)
|
|
|
|
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
|
|
|
|
>;
|
|
|
|
|
2017-11-09 09:52:48 +08:00
|
|
|
def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
|
|
|
|
def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
|
|
|
|
def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
|
|
|
|
def SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">;
|
|
|
|
def SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">;
|
|
|
|
def SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">;
|
|
|
|
def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
|
|
|
|
def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
|
|
|
|
def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
|
|
|
|
def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
|
2019-07-11 08:10:17 +08:00
|
|
|
def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD", f32>;
|
|
|
|
def SIbuffer_atomic_pk_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_PK_FADD", v2f16>;
|
2017-11-09 09:52:48 +08:00
|
|
|
|
|
|
|
def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
SDTypeProfile<1, 9,
|
2017-11-09 09:52:48 +08:00
|
|
|
[SDTCisVT<0, i32>, // dst
|
|
|
|
SDTCisVT<1, i32>, // src
|
|
|
|
SDTCisVT<2, i32>, // cmp
|
|
|
|
SDTCisVT<3, v4i32>, // rsrc
|
[AMDGPU] New buffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.buffer.load
llvm.amdgcn.raw.buffer.load.format
llvm.amdgcn.raw.buffer.load.format.d16
llvm.amdgcn.struct.buffer.load
llvm.amdgcn.struct.buffer.load.format
llvm.amdgcn.struct.buffer.load.format.d16
llvm.amdgcn.raw.buffer.store
llvm.amdgcn.raw.buffer.store.format
llvm.amdgcn.raw.buffer.store.format.d16
llvm.amdgcn.struct.buffer.store
llvm.amdgcn.struct.buffer.store.format
llvm.amdgcn.struct.buffer.store.format.d16
llvm.amdgcn.raw.buffer.atomic.*
llvm.amdgcn.struct.buffer.atomic.*
with the following changes from the llvm.amdgcn.buffer.*
intrinsics:
* there are separate raw and struct versions: raw does not have an
index arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::BUFFER_* SD nodes always have an index operand, all three
offset operands, combined cachepolicy operand, and an extra idxen
operand.
The obsolescent llvm.amdgcn.buffer.* intrinsics continue to work.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50306
Change-Id: If897ea7dc34fcbf4d5496e98cc99a934f62fc205
llvm-svn: 340269
2018-08-21 19:07:10 +08:00
|
|
|
SDTCisVT<4, i32>, // vindex(VGPR)
|
|
|
|
SDTCisVT<5, i32>, // voffset(VGPR)
|
|
|
|
SDTCisVT<6, i32>, // soffset(SGPR)
|
|
|
|
SDTCisVT<7, i32>, // offset(imm)
|
|
|
|
SDTCisVT<8, i32>, // cachepolicy(imm)
|
|
|
|
SDTCisVT<9, i1>]>, // idxen(imm)
|
2017-11-09 09:52:48 +08:00
|
|
|
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
|
|
|
|
>;
|
|
|
|
|
2019-07-11 08:10:17 +08:00
|
|
|
class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
|
|
|
|
SDTypeProfile<0, 2,
|
|
|
|
[SDTCisPtrTy<0>, // vaddr
|
|
|
|
SDTCisVT<1, ty>]>, // vdata
|
|
|
|
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
|
|
|
|
>;
|
|
|
|
|
|
|
|
def SIglobal_atomic_fadd : SDGlobalAtomicNoRtn <"AMDGPUISD::ATOMIC_FADD", f32>;
|
|
|
|
def SIglobal_atomic_pk_fadd : SDGlobalAtomicNoRtn <"AMDGPUISD::ATOMIC_PK_FADD", v2f16>;
|
|
|
|
|
2016-06-15 04:29:59 +08:00
|
|
|
def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
|
2016-10-14 12:37:34 +08:00
|
|
|
SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
|
2016-06-15 04:29:59 +08:00
|
|
|
>;
|
|
|
|
|
AMDGPU: Write LDS objects out as global symbols in code generation
Summary:
The symbols use the processor-specific SHN_AMDGPU_LDS section index
introduced with a previous change. The linker is then expected to resolve
relocations, which are also emitted.
Initially disabled for HSA and PAL environments until they have caught up
in terms of linker and runtime loader.
Some notes:
- The llvm.amdgcn.groupstaticsize intrinsics can no longer be lowered
to a constant at compile times, which means some tests can no longer
be applied.
The current "solution" is a terrible hack, but the intrinsic isn't
used by Mesa, so we can keep it for now.
- We no longer know the full LDS size per kernel at compile time, which
means that we can no longer generate a relevant error message at
compile time. It would be possible to add a check for the size of
individual variables, but ultimately the linker will have to perform
the final check.
Change-Id: If66dbf33fccfbf3609aefefa2558ac0850d42275
Reviewers: arsenm, rampitec, t-tye, b-sumner, jsjodin
Subscribers: qcolombet, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61494
llvm-svn: 364297
2019-06-25 19:52:30 +08:00
|
|
|
def SIlds : SDNode<"AMDGPUISD::LDS",
|
|
|
|
SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
|
|
|
|
>;
|
|
|
|
|
2019-03-09 04:58:11 +08:00
|
|
|
def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO",
|
|
|
|
SIload_d16,
|
|
|
|
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
|
|
|
|
>;
|
|
|
|
|
|
|
|
def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8",
|
|
|
|
SIload_d16,
|
|
|
|
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
|
|
|
|
>;
|
|
|
|
|
|
|
|
def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8",
|
|
|
|
SIload_d16,
|
|
|
|
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
|
|
|
|
>;
|
|
|
|
|
|
|
|
def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI",
|
|
|
|
SIload_d16,
|
|
|
|
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
|
|
|
|
>;
|
|
|
|
|
|
|
|
def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8",
|
|
|
|
SIload_d16,
|
|
|
|
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
|
|
|
|
>;
|
|
|
|
|
|
|
|
def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8",
|
|
|
|
SIload_d16,
|
|
|
|
[SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
|
|
|
|
>;
|
|
|
|
|
2018-01-17 22:00:48 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// ValueType helpers
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
// Returns 1 if the source arguments have modifiers, 0 if they do not.
|
|
|
|
// XXX - do f16 instructions?
|
|
|
|
class isFloatType<ValueType SrcVT> {
|
|
|
|
bit ret =
|
|
|
|
!if(!eq(SrcVT.Value, f16.Value), 1,
|
|
|
|
!if(!eq(SrcVT.Value, f32.Value), 1,
|
|
|
|
!if(!eq(SrcVT.Value, f64.Value), 1,
|
|
|
|
!if(!eq(SrcVT.Value, v2f16.Value), 1,
|
2019-07-10 05:43:09 +08:00
|
|
|
!if(!eq(SrcVT.Value, v4f16.Value), 1,
|
|
|
|
0)))));
|
2018-01-17 22:00:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
class isIntType<ValueType SrcVT> {
|
|
|
|
bit ret =
|
|
|
|
!if(!eq(SrcVT.Value, i16.Value), 1,
|
|
|
|
!if(!eq(SrcVT.Value, i32.Value), 1,
|
|
|
|
!if(!eq(SrcVT.Value, i64.Value), 1,
|
|
|
|
0)));
|
|
|
|
}
|
|
|
|
|
|
|
|
class isPackedType<ValueType SrcVT> {
|
|
|
|
bit ret =
|
|
|
|
!if(!eq(SrcVT.Value, v2i16.Value), 1,
|
2019-07-10 05:43:09 +08:00
|
|
|
!if(!eq(SrcVT.Value, v2f16.Value), 1,
|
|
|
|
!if(!eq(SrcVT.Value, v4f16.Value), 1, 0)
|
|
|
|
));
|
2018-01-17 22:00:48 +08:00
|
|
|
}
|
|
|
|
|
2016-04-12 22:05:04 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// PatFrags for global memory operations
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-12-23 23:34:51 +08:00
|
|
|
defm atomic_inc_global : global_binary_atomic_op<SIatomic_inc>;
|
|
|
|
defm atomic_dec_global : global_binary_atomic_op<SIatomic_dec>;
|
2016-04-12 22:05:04 +08:00
|
|
|
|
2017-11-29 08:55:57 +08:00
|
|
|
def atomic_inc_local : local_binary_atomic_op<SIatomic_inc>;
|
|
|
|
def atomic_dec_local : local_binary_atomic_op<SIatomic_dec>;
|
2019-01-23 02:36:06 +08:00
|
|
|
def atomic_load_fadd_local : local_binary_atomic_op<atomic_load_fadd>;
|
2018-01-17 22:05:05 +08:00
|
|
|
def atomic_load_fmin_local : local_binary_atomic_op<SIatomic_fmin>;
|
|
|
|
def atomic_load_fmax_local : local_binary_atomic_op<SIatomic_fmax>;
|
2017-11-29 08:55:57 +08:00
|
|
|
|
2015-05-12 23:00:49 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2017-09-20 11:43:35 +08:00
|
|
|
// SDNodes PatFrags for loads/stores with a glue input.
|
|
|
|
// This is for SDNodes and PatFrag for local loads and stores to
|
|
|
|
// enable s_mov_b32 m0, -1 to be glued to the memory instructions.
|
|
|
|
//
|
|
|
|
// These mirror the regular load/store PatFrags and rely on special
|
|
|
|
// processing during Select() to add the glued copy.
|
|
|
|
//
|
2015-05-12 23:00:49 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2017-09-20 11:43:35 +08:00
|
|
|
def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad,
|
2015-05-12 23:00:49 +08:00
|
|
|
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
|
|
|
|
>;
|
|
|
|
|
2018-06-22 16:39:52 +08:00
|
|
|
def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad,
|
|
|
|
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
|
|
|
|
>;
|
|
|
|
|
2019-07-09 06:08:23 +08:00
|
|
|
def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> {
|
|
|
|
let IsUnindexed = 1;
|
|
|
|
}
|
2015-05-12 23:00:49 +08:00
|
|
|
|
2019-07-09 06:08:23 +08:00
|
|
|
def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
|
|
|
|
let IsNonExtLoad = 1;
|
|
|
|
}
|
2015-05-12 23:00:49 +08:00
|
|
|
|
2018-06-22 16:39:52 +08:00
|
|
|
def atomic_load_32_glue : PatFrag<(ops node:$ptr),
|
|
|
|
(AMDGPUatomic_ld_glue node:$ptr)> {
|
|
|
|
let IsAtomic = 1;
|
|
|
|
let MemoryVT = i32;
|
|
|
|
}
|
|
|
|
|
|
|
|
def atomic_load_64_glue : PatFrag<(ops node:$ptr),
|
|
|
|
(AMDGPUatomic_ld_glue node:$ptr)> {
|
|
|
|
let IsAtomic = 1;
|
|
|
|
let MemoryVT = i64;
|
|
|
|
}
|
|
|
|
|
2019-07-09 06:08:23 +08:00
|
|
|
def extload_glue : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
|
|
|
|
let IsLoad = 1;
|
|
|
|
let IsAnyExtLoad = 1;
|
|
|
|
}
|
2015-05-12 23:00:49 +08:00
|
|
|
|
2019-07-20 05:01:30 +08:00
|
|
|
def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
|
|
|
|
let IsLoad = 1;
|
|
|
|
let IsSignExtLoad = 1;
|
|
|
|
}
|
2015-05-12 23:00:49 +08:00
|
|
|
|
2019-07-09 06:08:23 +08:00
|
|
|
def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
|
|
|
|
let IsLoad = 1;
|
|
|
|
let IsZeroExtLoad = 1;
|
|
|
|
}
|
2015-05-12 23:00:49 +08:00
|
|
|
|
2019-07-09 06:08:23 +08:00
|
|
|
def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
|
|
|
|
let IsLoad = 1;
|
|
|
|
let MemoryVT = i8;
|
|
|
|
}
|
2015-05-12 23:00:49 +08:00
|
|
|
|
2019-07-09 06:08:23 +08:00
|
|
|
def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
|
|
|
|
let IsLoad = 1;
|
|
|
|
let MemoryVT = i8;
|
|
|
|
}
|
2017-09-20 11:43:35 +08:00
|
|
|
|
2019-07-09 06:08:23 +08:00
|
|
|
def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
|
|
|
|
let IsLoad = 1;
|
|
|
|
let MemoryVT = i16;
|
|
|
|
}
|
2015-05-12 23:00:49 +08:00
|
|
|
|
2019-07-09 06:08:23 +08:00
|
|
|
def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
|
|
|
|
let IsLoad = 1;
|
|
|
|
let MemoryVT = i16;
|
|
|
|
}
|
2017-09-20 11:43:35 +08:00
|
|
|
|
2019-07-09 06:08:23 +08:00
|
|
|
def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
|
|
|
|
let IsLoad = 1;
|
|
|
|
let MemoryVT = i8;
|
|
|
|
}
|
|
|
|
|
|
|
|
def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
|
|
|
|
let IsLoad = 1;
|
|
|
|
let MemoryVT = i16;
|
|
|
|
}
|
2015-05-12 23:00:49 +08:00
|
|
|
|
2017-09-20 11:43:35 +08:00
|
|
|
def load_glue_align8 : Aligned8Bytes <
|
|
|
|
(ops node:$ptr), (load_glue node:$ptr)
|
|
|
|
>;
|
2018-03-10 01:41:39 +08:00
|
|
|
def load_glue_align16 : Aligned16Bytes <
|
|
|
|
(ops node:$ptr), (load_glue node:$ptr)
|
|
|
|
>;
|
2017-09-20 11:43:35 +08:00
|
|
|
|
|
|
|
|
|
|
|
def load_local_m0 : LoadFrag<load_glue>, LocalAddress;
|
|
|
|
def sextloadi8_local_m0 : LoadFrag<sextloadi8_glue>, LocalAddress;
|
|
|
|
def sextloadi16_local_m0 : LoadFrag<sextloadi16_glue>, LocalAddress;
|
2019-07-09 06:08:23 +08:00
|
|
|
def extloadi8_local_m0 : LoadFrag<extloadi8_glue>, LocalAddress;
|
|
|
|
def zextloadi8_local_m0 : LoadFrag<zextloadi8_glue>, LocalAddress;
|
|
|
|
def extloadi16_local_m0 : LoadFrag<extloadi16_glue>, LocalAddress;
|
|
|
|
def zextloadi16_local_m0 : LoadFrag<zextloadi16_glue>, LocalAddress;
|
2017-09-20 11:43:35 +08:00
|
|
|
def load_align8_local_m0 : LoadFrag <load_glue_align8>, LocalAddress;
|
2018-03-10 01:41:39 +08:00
|
|
|
def load_align16_local_m0 : LoadFrag <load_glue_align16>, LocalAddress;
|
2018-06-22 16:39:52 +08:00
|
|
|
def atomic_load_32_local_m0 : LoadFrag<atomic_load_32_glue>, LocalAddress;
|
|
|
|
def atomic_load_64_local_m0 : LoadFrag<atomic_load_64_glue>, LocalAddress;
|
2017-09-20 11:43:35 +08:00
|
|
|
|
|
|
|
|
|
|
|
def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore,
|
2015-05-12 23:00:49 +08:00
|
|
|
[SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
|
|
|
|
>;
|
|
|
|
|
2018-06-22 16:39:52 +08:00
|
|
|
def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore,
|
|
|
|
[SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
|
|
|
|
>;
|
|
|
|
|
|
|
|
def atomic_store_glue : PatFrag<(ops node:$ptr, node:$val),
|
|
|
|
(AMDGPUatomic_st_glue node:$ptr, node:$val)> {
|
|
|
|
}
|
|
|
|
|
2017-09-20 11:43:35 +08:00
|
|
|
def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr),
|
2019-07-20 05:01:30 +08:00
|
|
|
(AMDGPUst_glue node:$val, node:$ptr)> {
|
|
|
|
let IsStore = 1;
|
|
|
|
let IsUnindexed = 1;
|
|
|
|
}
|
2015-05-12 23:00:49 +08:00
|
|
|
|
2017-09-20 11:43:35 +08:00
|
|
|
def store_glue : PatFrag<(ops node:$val, node:$ptr),
|
2019-07-20 05:01:30 +08:00
|
|
|
(unindexedstore_glue node:$val, node:$ptr)> {
|
|
|
|
let IsStore = 1;
|
|
|
|
let IsTruncStore = 0;
|
|
|
|
}
|
2015-05-12 23:00:49 +08:00
|
|
|
|
2017-09-20 11:43:35 +08:00
|
|
|
def truncstore_glue : PatFrag<(ops node:$val, node:$ptr),
|
2019-07-20 05:01:30 +08:00
|
|
|
(unindexedstore_glue node:$val, node:$ptr)> {
|
|
|
|
let IsStore = 1;
|
|
|
|
let IsTruncStore = 1;
|
|
|
|
}
|
2015-05-12 23:00:49 +08:00
|
|
|
|
2017-09-20 11:43:35 +08:00
|
|
|
def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr),
|
2019-07-20 05:01:30 +08:00
|
|
|
(truncstore_glue node:$val, node:$ptr)> {
|
|
|
|
let IsStore = 1;
|
|
|
|
let MemoryVT = i8;
|
|
|
|
}
|
2015-05-12 23:00:49 +08:00
|
|
|
|
2017-09-20 11:43:35 +08:00
|
|
|
def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr),
|
2019-07-20 05:01:30 +08:00
|
|
|
(truncstore_glue node:$val, node:$ptr)> {
|
|
|
|
let IsStore = 1;
|
|
|
|
let MemoryVT = i16;
|
|
|
|
}
|
2015-05-12 23:00:49 +08:00
|
|
|
|
2017-09-20 11:43:35 +08:00
|
|
|
def store_glue_align8 : Aligned8Bytes <
|
|
|
|
(ops node:$value, node:$ptr), (store_glue node:$value, node:$ptr)
|
|
|
|
>;
|
|
|
|
|
2018-03-17 02:12:00 +08:00
|
|
|
def store_glue_align16 : Aligned16Bytes <
|
|
|
|
(ops node:$value, node:$ptr), (store_glue node:$value, node:$ptr)
|
|
|
|
>;
|
|
|
|
|
2017-09-20 11:43:35 +08:00
|
|
|
def store_local_m0 : StoreFrag<store_glue>, LocalAddress;
|
|
|
|
def truncstorei8_local_m0 : StoreFrag<truncstorei8_glue>, LocalAddress;
|
|
|
|
def truncstorei16_local_m0 : StoreFrag<truncstorei16_glue>, LocalAddress;
|
2018-06-22 16:39:52 +08:00
|
|
|
def atomic_store_local_m0 : StoreFrag<AMDGPUatomic_st_glue>, LocalAddress;
|
2017-09-20 11:43:35 +08:00
|
|
|
|
|
|
|
def store_align8_local_m0 : StoreFrag<store_glue_align8>, LocalAddress;
|
2018-03-17 02:12:00 +08:00
|
|
|
def store_align16_local_m0 : StoreFrag<store_glue_align16>, LocalAddress;
|
2017-09-20 11:43:35 +08:00
|
|
|
|
2016-02-13 07:45:29 +08:00
|
|
|
def si_setcc_uniform : PatFrag <
|
|
|
|
(ops node:$lhs, node:$rhs, node:$cond),
|
|
|
|
(setcc node:$lhs, node:$rhs, node:$cond), [{
|
|
|
|
for (SDNode *Use : N->uses()) {
|
|
|
|
if (Use->isMachineOpcode() || Use->getOpcode() != ISD::CopyToReg)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
|
|
|
|
if (Reg != AMDGPU::SCC)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}]>;
|
|
|
|
|
2019-03-09 04:58:11 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// SDNodes PatFrags for d16 loads
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
class LoadD16Frag <SDPatternOperator op> : PatFrag<(ops node:$ptr, node:$tied_in), (op node:$ptr, node:$tied_in)>;
|
|
|
|
class LocalLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, LocalAddress;
|
|
|
|
class GlobalLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, GlobalLoadAddress;
|
|
|
|
class PrivateLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, PrivateAddress;
|
|
|
|
class FlatLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, FlatLoadAddress;
|
|
|
|
|
|
|
|
def load_d16_hi_local : LocalLoadD16 <SIload_d16_hi>;
|
|
|
|
def az_extloadi8_d16_hi_local : LocalLoadD16 <SIload_d16_hi_u8>;
|
|
|
|
def sextloadi8_d16_hi_local : LocalLoadD16 <SIload_d16_hi_i8>;
|
|
|
|
|
|
|
|
def load_d16_hi_global : GlobalLoadD16 <SIload_d16_hi>;
|
|
|
|
def az_extloadi8_d16_hi_global : GlobalLoadD16 <SIload_d16_hi_u8>;
|
|
|
|
def sextloadi8_d16_hi_global : GlobalLoadD16 <SIload_d16_hi_i8>;
|
|
|
|
|
|
|
|
def load_d16_hi_private : PrivateLoadD16 <SIload_d16_hi>;
|
|
|
|
def az_extloadi8_d16_hi_private : PrivateLoadD16 <SIload_d16_hi_u8>;
|
|
|
|
def sextloadi8_d16_hi_private : PrivateLoadD16 <SIload_d16_hi_i8>;
|
|
|
|
|
|
|
|
def load_d16_hi_flat : FlatLoadD16 <SIload_d16_hi>;
|
|
|
|
def az_extloadi8_d16_hi_flat : FlatLoadD16 <SIload_d16_hi_u8>;
|
|
|
|
def sextloadi8_d16_hi_flat : FlatLoadD16 <SIload_d16_hi_i8>;
|
|
|
|
|
|
|
|
|
|
|
|
def load_d16_lo_local : LocalLoadD16 <SIload_d16_lo>;
|
|
|
|
def az_extloadi8_d16_lo_local : LocalLoadD16 <SIload_d16_lo_u8>;
|
|
|
|
def sextloadi8_d16_lo_local : LocalLoadD16 <SIload_d16_lo_i8>;
|
|
|
|
|
|
|
|
def load_d16_lo_global : GlobalLoadD16 <SIload_d16_lo>;
|
|
|
|
def az_extloadi8_d16_lo_global : GlobalLoadD16 <SIload_d16_lo_u8>;
|
|
|
|
def sextloadi8_d16_lo_global : GlobalLoadD16 <SIload_d16_lo_i8>;
|
|
|
|
|
|
|
|
def load_d16_lo_private : PrivateLoadD16 <SIload_d16_lo>;
|
|
|
|
def az_extloadi8_d16_lo_private : PrivateLoadD16 <SIload_d16_lo_u8>;
|
|
|
|
def sextloadi8_d16_lo_private : PrivateLoadD16 <SIload_d16_lo_i8>;
|
|
|
|
|
|
|
|
def load_d16_lo_flat : FlatLoadD16 <SIload_d16_lo>;
|
|
|
|
def az_extloadi8_d16_lo_flat : FlatLoadD16 <SIload_d16_lo_u8>;
|
|
|
|
def sextloadi8_d16_lo_flat : FlatLoadD16 <SIload_d16_lo_i8>;
|
|
|
|
|
|
|
|
|
|
|
|
|
2017-02-28 06:15:25 +08:00
|
|
|
def lshr_rev : PatFrag <
|
|
|
|
(ops node:$src1, node:$src0),
|
|
|
|
(srl $src0, $src1)
|
|
|
|
>;
|
|
|
|
|
|
|
|
def ashr_rev : PatFrag <
|
|
|
|
(ops node:$src1, node:$src0),
|
|
|
|
(sra $src0, $src1)
|
|
|
|
>;
|
|
|
|
|
|
|
|
def lshl_rev : PatFrag <
|
|
|
|
(ops node:$src1, node:$src0),
|
|
|
|
(shl $src0, $src1)
|
|
|
|
>;
|
|
|
|
|
2018-01-17 22:05:05 +08:00
|
|
|
multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
|
|
|
|
SDTypeProfile tc = SDTAtomic2> {
|
2015-05-12 23:00:49 +08:00
|
|
|
|
2016-04-12 22:05:04 +08:00
|
|
|
def _glue : SDNode <
|
2018-01-17 22:05:05 +08:00
|
|
|
!if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc,
|
2015-05-12 23:00:49 +08:00
|
|
|
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
|
|
|
|
>;
|
|
|
|
|
2017-10-24 01:16:43 +08:00
|
|
|
def _local_m0 : local_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
|
AMDGPU: Support GDS atomics
Summary:
Original patch by Marek Olšák
Change-Id: Ia97d5d685a63a377d86e82942436d1fe6e429bab
Reviewers: mareko, arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, jfb, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63452
llvm-svn: 364814
2019-07-02 01:17:45 +08:00
|
|
|
def _region_m0 : region_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
|
2015-05-12 23:00:49 +08:00
|
|
|
}
|
|
|
|
|
2017-10-24 01:16:43 +08:00
|
|
|
defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
|
|
|
|
defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
|
|
|
|
defm atomic_inc : SIAtomicM0Glue2 <"INC", 1>;
|
|
|
|
defm atomic_dec : SIAtomicM0Glue2 <"DEC", 1>;
|
|
|
|
defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
|
|
|
|
defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
|
|
|
|
defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
|
|
|
|
defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
|
|
|
|
defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
|
|
|
|
defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
|
|
|
|
defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
|
|
|
|
defm atomic_swap : SIAtomicM0Glue2 <"SWAP">;
|
2019-01-23 02:36:06 +08:00
|
|
|
defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32>;
|
2018-01-17 22:05:05 +08:00
|
|
|
defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32>;
|
|
|
|
defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32>;
|
2017-10-24 01:16:43 +08:00
|
|
|
|
|
|
|
def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
|
2015-05-12 23:00:49 +08:00
|
|
|
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
|
|
|
|
>;
|
|
|
|
|
2017-10-24 01:16:43 +08:00
|
|
|
def atomic_cmp_swap_local_m0 : AtomicCmpSwapLocal<atomic_cmp_swap_glue>;
|
AMDGPU: Support GDS atomics
Summary:
Original patch by Marek Olšák
Change-Id: Ia97d5d685a63a377d86e82942436d1fe6e429bab
Reviewers: mareko, arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, jfb, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63452
llvm-svn: 364814
2019-07-02 01:17:45 +08:00
|
|
|
def atomic_cmp_swap_region_m0 : AtomicCmpSwapRegion<atomic_cmp_swap_glue>;
|
2017-10-24 01:16:43 +08:00
|
|
|
|
2015-05-12 23:00:49 +08:00
|
|
|
|
2013-09-12 10:55:14 +08:00
|
|
|
def as_i1imm : SDNodeXForm<imm, [{
|
2015-04-28 22:05:47 +08:00
|
|
|
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
|
2013-09-12 10:55:14 +08:00
|
|
|
}]>;
|
|
|
|
|
|
|
|
def as_i8imm : SDNodeXForm<imm, [{
|
2015-04-28 22:05:47 +08:00
|
|
|
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
|
2013-09-12 10:55:14 +08:00
|
|
|
}]>;
|
|
|
|
|
2013-06-04 01:39:43 +08:00
|
|
|
def as_i16imm : SDNodeXForm<imm, [{
|
2015-04-28 22:05:47 +08:00
|
|
|
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
|
2013-06-04 01:39:43 +08:00
|
|
|
}]>;
|
|
|
|
|
2014-02-07 02:36:34 +08:00
|
|
|
def as_i32imm: SDNodeXForm<imm, [{
|
2015-04-28 22:05:47 +08:00
|
|
|
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
|
2014-02-07 02:36:34 +08:00
|
|
|
}]>;
|
|
|
|
|
2014-12-03 13:22:35 +08:00
|
|
|
def as_i64imm: SDNodeXForm<imm, [{
|
2015-04-28 22:05:47 +08:00
|
|
|
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
|
2014-12-03 13:22:35 +08:00
|
|
|
}]>;
|
|
|
|
|
2017-10-24 18:27:13 +08:00
|
|
|
def cond_as_i32imm: SDNodeXForm<cond, [{
|
|
|
|
return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32);
|
|
|
|
}]>;
|
|
|
|
|
2015-01-14 06:59:41 +08:00
|
|
|
// Copied from the AArch64 backend:
|
|
|
|
def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
|
|
|
|
return CurDAG->getTargetConstant(
|
2015-04-28 22:05:47 +08:00
|
|
|
N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
|
2015-01-14 06:59:41 +08:00
|
|
|
}]>;
|
|
|
|
|
2016-09-18 00:09:55 +08:00
|
|
|
def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{
|
|
|
|
auto FI = cast<FrameIndexSDNode>(N);
|
|
|
|
return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32);
|
|
|
|
}]>;
|
|
|
|
|
2015-01-14 06:59:41 +08:00
|
|
|
// Copied from the AArch64 backend:
|
|
|
|
def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
|
|
|
|
return CurDAG->getTargetConstant(
|
2015-04-28 22:05:47 +08:00
|
|
|
N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
|
2015-01-14 06:59:41 +08:00
|
|
|
}]>;
|
|
|
|
|
AMDGPU: Dimension-aware image intrinsics
Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.
This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.
Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.
v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI
Change-Id: I099f309e0a394082a5901ea196c3967afb867f04
Reviewers: arsenm, rampitec, b-sumner
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D44939
llvm-svn: 329166
2018-04-04 18:58:54 +08:00
|
|
|
class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
|
|
|
|
uint64_t Imm = N->getZExtValue();
|
|
|
|
unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
|
|
|
|
return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1);
|
|
|
|
}]>;
|
|
|
|
|
2019-07-12 07:42:57 +08:00
|
|
|
def SIMM16bit : ImmLeaf <i32,
|
|
|
|
[{return isInt<16>(Imm);}]
|
|
|
|
>;
|
|
|
|
|
|
|
|
def UIMM16bit : ImmLeaf <i32,
|
|
|
|
[{return isUInt<16>(Imm); }]
|
2016-02-27 16:53:52 +08:00
|
|
|
>;
|
|
|
|
|
2013-02-27 01:52:23 +08:00
|
|
|
class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
|
2014-04-04 04:19:27 +08:00
|
|
|
return isInlineImmediate(N);
|
2013-02-16 19:28:36 +08:00
|
|
|
}]>;
|
|
|
|
|
2014-12-18 05:04:08 +08:00
|
|
|
class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{
|
|
|
|
return isInlineImmediate(N);
|
|
|
|
}]>;
|
|
|
|
|
2016-11-02 06:55:07 +08:00
|
|
|
class VGPRImm <dag frag> : PatLeaf<frag, [{
|
2018-09-11 19:56:50 +08:00
|
|
|
return isVGPRImm(N);
|
2013-08-15 07:24:24 +08:00
|
|
|
}]>;
|
|
|
|
|
2017-01-31 03:30:24 +08:00
|
|
|
def NegateImm : SDNodeXForm<imm, [{
|
|
|
|
return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
|
|
|
|
}]>;
|
|
|
|
|
|
|
|
// TODO: When FP inline imm values work?
|
|
|
|
def NegSubInlineConst32 : ImmLeaf<i32, [{
|
|
|
|
return Imm < -16 && Imm >= -64;
|
|
|
|
}], NegateImm>;
|
|
|
|
|
|
|
|
def NegSubInlineConst16 : ImmLeaf<i16, [{
|
|
|
|
return Imm < -16 && Imm >= -64;
|
|
|
|
}], NegateImm>;
|
|
|
|
|
2017-06-28 10:52:39 +08:00
|
|
|
def ShiftAmt32Imm : PatLeaf <(imm), [{
|
|
|
|
return N->getZExtValue() < 32;
|
|
|
|
}]>;
|
|
|
|
|
2019-06-20 07:37:43 +08:00
|
|
|
def getNegV2I16Imm : SDNodeXForm<build_vector, [{
|
|
|
|
return SDValue(packNegConstantV2I16(N, *CurDAG), 0);
|
|
|
|
}]>;
|
|
|
|
|
|
|
|
def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
|
|
|
|
assert(N->getNumOperands() == 2);
|
|
|
|
assert(N->getOperand(0).getValueType().getSizeInBits() == 16);
|
|
|
|
SDValue Src0 = N->getOperand(0);
|
|
|
|
SDValue Src1 = N->getOperand(1);
|
|
|
|
if (Src0 == Src1)
|
|
|
|
return isNegInlineImmediate(Src0.getNode());
|
|
|
|
|
2019-06-21 00:01:09 +08:00
|
|
|
return (isNullConstantOrUndef(Src0) && isNegInlineImmediate(Src1.getNode())) ||
|
|
|
|
(isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode()));
|
2019-06-20 07:37:43 +08:00
|
|
|
}], getNegV2I16Imm>;
|
|
|
|
|
2014-07-21 22:01:08 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Custom Operands
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2015-04-08 09:09:26 +08:00
|
|
|
def SoppBrTarget : AsmOperandClass {
|
|
|
|
let Name = "SoppBrTarget";
|
|
|
|
let ParserMethod = "parseSOppBrTarget";
|
|
|
|
}
|
|
|
|
|
2014-07-21 22:01:08 +08:00
|
|
|
def sopp_brtarget : Operand<OtherVT> {
|
|
|
|
let EncoderMethod = "getSOPPBrEncoding";
|
2016-10-06 21:46:08 +08:00
|
|
|
let DecoderMethod = "decodeSoppBrTarget";
|
2014-07-21 22:01:08 +08:00
|
|
|
let OperandType = "OPERAND_PCREL";
|
2015-04-08 09:09:26 +08:00
|
|
|
let ParserMatchClass = SoppBrTarget;
|
2014-07-21 22:01:08 +08:00
|
|
|
}
|
|
|
|
|
2016-06-15 04:29:59 +08:00
|
|
|
def si_ga : Operand<iPTR>;
|
2015-12-10 10:13:01 +08:00
|
|
|
|
2016-12-16 04:40:20 +08:00
|
|
|
def InterpSlotMatchClass : AsmOperandClass {
|
|
|
|
let Name = "InterpSlot";
|
|
|
|
let PredicateMethod = "isInterpSlot";
|
|
|
|
let ParserMethod = "parseInterpSlot";
|
|
|
|
let RenderMethod = "addImmOperands";
|
|
|
|
}
|
|
|
|
|
2016-05-07 03:32:38 +08:00
|
|
|
def InterpSlot : Operand<i32> {
|
|
|
|
let PrintMethod = "printInterpSlot";
|
2016-12-16 04:40:20 +08:00
|
|
|
let ParserMatchClass = InterpSlotMatchClass;
|
|
|
|
let OperandType = "OPERAND_IMMEDIATE";
|
|
|
|
}
|
|
|
|
|
|
|
|
def AttrMatchClass : AsmOperandClass {
|
|
|
|
let Name = "Attr";
|
|
|
|
let PredicateMethod = "isInterpAttr";
|
|
|
|
let ParserMethod = "parseInterpAttr";
|
|
|
|
let RenderMethod = "addImmOperands";
|
2016-05-07 03:32:38 +08:00
|
|
|
}
|
|
|
|
|
2016-12-15 00:36:12 +08:00
|
|
|
// It appears to be necessary to create a separate operand for this to
|
|
|
|
// be able to parse attr<num> with no space.
|
|
|
|
def Attr : Operand<i32> {
|
|
|
|
let PrintMethod = "printInterpAttr";
|
2016-12-16 04:40:20 +08:00
|
|
|
let ParserMatchClass = AttrMatchClass;
|
2016-12-15 00:36:12 +08:00
|
|
|
let OperandType = "OPERAND_IMMEDIATE";
|
|
|
|
}
|
|
|
|
|
2016-12-16 04:40:20 +08:00
|
|
|
def AttrChanMatchClass : AsmOperandClass {
|
|
|
|
let Name = "AttrChan";
|
|
|
|
let PredicateMethod = "isAttrChan";
|
|
|
|
let RenderMethod = "addImmOperands";
|
|
|
|
}
|
|
|
|
|
2016-12-15 00:36:12 +08:00
|
|
|
def AttrChan : Operand<i32> {
|
|
|
|
let PrintMethod = "printInterpAttrChan";
|
2016-12-16 04:40:20 +08:00
|
|
|
let ParserMatchClass = AttrChanMatchClass;
|
2016-12-15 00:36:12 +08:00
|
|
|
let OperandType = "OPERAND_IMMEDIATE";
|
|
|
|
}
|
|
|
|
|
2016-05-07 03:32:38 +08:00
|
|
|
def SendMsgMatchClass : AsmOperandClass {
|
|
|
|
let Name = "SendMsg";
|
|
|
|
let PredicateMethod = "isSendMsg";
|
|
|
|
let ParserMethod = "parseSendMsgOp";
|
|
|
|
let RenderMethod = "addImmOperands";
|
|
|
|
}
|
|
|
|
|
2017-06-01 00:26:47 +08:00
|
|
|
def SwizzleMatchClass : AsmOperandClass {
|
|
|
|
let Name = "Swizzle";
|
|
|
|
let PredicateMethod = "isSwizzle";
|
|
|
|
let ParserMethod = "parseSwizzleOp";
|
|
|
|
let RenderMethod = "addImmOperands";
|
|
|
|
let IsOptional = 1;
|
|
|
|
}
|
|
|
|
|
[AMDGPU] Add support for immediate operand for S_ENDPGM
Summary:
Add support for immediate operand in S_ENDPGM
Change-Id: I0c56a076a10980f719fb2a8f16407e9c301013f6
Reviewers: alexshap
Subscribers: qcolombet, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, eraman, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59213
llvm-svn: 355902
2019-03-12 17:52:58 +08:00
|
|
|
def EndpgmMatchClass : AsmOperandClass {
|
|
|
|
let Name = "EndpgmImm";
|
|
|
|
let PredicateMethod = "isEndpgm";
|
|
|
|
let ParserMethod = "parseEndpgmOp";
|
|
|
|
let RenderMethod = "addImmOperands";
|
|
|
|
let IsOptional = 1;
|
|
|
|
}
|
|
|
|
|
2016-12-06 04:42:41 +08:00
|
|
|
def ExpTgtMatchClass : AsmOperandClass {
|
|
|
|
let Name = "ExpTgt";
|
|
|
|
let PredicateMethod = "isExpTgt";
|
|
|
|
let ParserMethod = "parseExpTgt";
|
|
|
|
let RenderMethod = "printExpTgt";
|
|
|
|
}
|
|
|
|
|
2016-05-07 03:32:38 +08:00
|
|
|
def SendMsgImm : Operand<i32> {
|
|
|
|
let PrintMethod = "printSendMsg";
|
|
|
|
let ParserMatchClass = SendMsgMatchClass;
|
|
|
|
}
|
|
|
|
|
2017-06-01 00:26:47 +08:00
|
|
|
def SwizzleImm : Operand<i16> {
|
|
|
|
let PrintMethod = "printSwizzle";
|
|
|
|
let ParserMatchClass = SwizzleMatchClass;
|
|
|
|
}
|
|
|
|
|
[AMDGPU] Add support for immediate operand for S_ENDPGM
Summary:
Add support for immediate operand in S_ENDPGM
Change-Id: I0c56a076a10980f719fb2a8f16407e9c301013f6
Reviewers: alexshap
Subscribers: qcolombet, arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, eraman, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59213
llvm-svn: 355902
2019-03-12 17:52:58 +08:00
|
|
|
def EndpgmImm : Operand<i16> {
|
|
|
|
let PrintMethod = "printEndpgm";
|
|
|
|
let ParserMatchClass = EndpgmMatchClass;
|
|
|
|
}
|
|
|
|
|
2016-05-07 03:32:38 +08:00
|
|
|
def SWaitMatchClass : AsmOperandClass {
|
|
|
|
let Name = "SWaitCnt";
|
|
|
|
let RenderMethod = "addImmOperands";
|
|
|
|
let ParserMethod = "parseSWaitCntOps";
|
|
|
|
}
|
|
|
|
|
2016-12-06 04:42:41 +08:00
|
|
|
def VReg32OrOffClass : AsmOperandClass {
|
|
|
|
let Name = "VReg32OrOff";
|
|
|
|
let ParserMethod = "parseVReg32OrOff";
|
|
|
|
}
|
|
|
|
|
2016-05-07 03:32:38 +08:00
|
|
|
def WAIT_FLAG : Operand <i32> {
|
|
|
|
let ParserMatchClass = SWaitMatchClass;
|
|
|
|
let PrintMethod = "printWaitFlag";
|
2019-07-09 00:53:48 +08:00
|
|
|
let OperandType = "OPERAND_IMMEDIATE";
|
2016-05-07 03:32:38 +08:00
|
|
|
}
|
|
|
|
|
2014-08-01 08:32:39 +08:00
|
|
|
include "SIInstrFormats.td"
|
2014-12-07 20:18:57 +08:00
|
|
|
include "VIInstrFormats.td"
|
2014-08-01 08:32:39 +08:00
|
|
|
|
2019-06-15 00:25:46 +08:00
|
|
|
def BoolReg : AsmOperandClass {
|
|
|
|
let Name = "BoolReg";
|
|
|
|
let ParserMethod = "parseBoolReg";
|
|
|
|
let RenderMethod = "addRegOperands";
|
|
|
|
}
|
|
|
|
|
|
|
|
class BoolRC : RegisterOperand<SReg_1> {
|
|
|
|
let ParserMatchClass = BoolReg;
|
|
|
|
let DecoderMethod = "decodeBoolReg";
|
|
|
|
}
|
|
|
|
|
|
|
|
def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
|
|
|
|
let ParserMatchClass = BoolReg;
|
|
|
|
let DecoderMethod = "decodeBoolReg";
|
|
|
|
}
|
|
|
|
|
|
|
|
def VOPDstS64orS32 : BoolRC {
|
|
|
|
let PrintMethod = "printVOPDst";
|
|
|
|
}
|
|
|
|
|
2019-06-20 23:08:34 +08:00
|
|
|
// SCSrc_i1 is the operand for pseudo instructions only.
|
|
|
|
// Boolean immeadiates shall not be exposed to codegen instructions.
|
|
|
|
def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
|
|
|
|
let OperandNamespace = "AMDGPU";
|
|
|
|
let OperandType = "OPERAND_REG_IMM_INT32";
|
|
|
|
let ParserMatchClass = BoolReg;
|
|
|
|
let DecoderMethod = "decodeBoolReg";
|
|
|
|
}
|
|
|
|
|
2016-12-06 04:42:41 +08:00
|
|
|
// ===----------------------------------------------------------------------===//
|
|
|
|
// ExpSrc* Special cases for exp src operands which are printed as
|
|
|
|
// "off" depending on en operand.
|
|
|
|
// ===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
def ExpSrc0 : RegisterOperand<VGPR_32> {
|
|
|
|
let PrintMethod = "printExpSrc0";
|
|
|
|
let ParserMatchClass = VReg32OrOffClass;
|
|
|
|
}
|
|
|
|
|
|
|
|
def ExpSrc1 : RegisterOperand<VGPR_32> {
|
|
|
|
let PrintMethod = "printExpSrc1";
|
|
|
|
let ParserMatchClass = VReg32OrOffClass;
|
|
|
|
}
|
|
|
|
|
|
|
|
def ExpSrc2 : RegisterOperand<VGPR_32> {
|
|
|
|
let PrintMethod = "printExpSrc2";
|
|
|
|
let ParserMatchClass = VReg32OrOffClass;
|
|
|
|
}
|
|
|
|
|
|
|
|
def ExpSrc3 : RegisterOperand<VGPR_32> {
|
|
|
|
let PrintMethod = "printExpSrc3";
|
|
|
|
let ParserMatchClass = VReg32OrOffClass;
|
|
|
|
}
|
|
|
|
|
2018-01-17 22:00:48 +08:00
|
|
|
class SDWASrc<ValueType vt> : RegisterOperand<VS_32> {
|
2017-05-23 18:08:55 +08:00
|
|
|
let OperandNamespace = "AMDGPU";
|
2018-01-17 22:00:48 +08:00
|
|
|
string Type = !if(isFloatType<vt>.ret, "FP", "INT");
|
|
|
|
let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size;
|
|
|
|
let DecoderMethod = "decodeSDWASrc"#vt.Size;
|
2017-06-21 16:53:38 +08:00
|
|
|
let EncoderMethod = "getSDWASrcEncoding";
|
2017-05-23 18:08:55 +08:00
|
|
|
}
|
|
|
|
|
2018-01-17 22:00:48 +08:00
|
|
|
def SDWASrc_i32 : SDWASrc<i32>;
|
|
|
|
def SDWASrc_i16 : SDWASrc<i16>;
|
|
|
|
def SDWASrc_f32 : SDWASrc<f32>;
|
|
|
|
def SDWASrc_f16 : SDWASrc<f16>;
|
2017-05-26 23:52:00 +08:00
|
|
|
|
2019-06-20 23:08:34 +08:00
|
|
|
def SDWAVopcDst : BoolRC {
|
2017-05-23 18:08:55 +08:00
|
|
|
let OperandNamespace = "AMDGPU";
|
2017-06-21 16:53:38 +08:00
|
|
|
let OperandType = "OPERAND_SDWA_VOPC_DST";
|
|
|
|
let EncoderMethod = "getSDWAVopcDstEncoding";
|
|
|
|
let DecoderMethod = "decodeSDWAVopcDst";
|
2019-06-20 23:08:34 +08:00
|
|
|
let PrintMethod = "printVOPDst";
|
2017-05-23 18:08:55 +08:00
|
|
|
}
|
|
|
|
|
2016-04-29 17:02:30 +08:00
|
|
|
class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
|
|
|
|
let Name = "Imm"#CName;
|
|
|
|
let PredicateMethod = "is"#CName;
|
2016-05-24 20:38:33 +08:00
|
|
|
let ParserMethod = !if(Optional, "parseOptionalOperand", "parse"#CName);
|
2015-04-08 09:09:26 +08:00
|
|
|
let RenderMethod = "addImmOperands";
|
2016-04-29 17:02:30 +08:00
|
|
|
let IsOptional = Optional;
|
2016-06-03 18:27:37 +08:00
|
|
|
let DefaultMethod = !if(Optional, "default"#CName, ?);
|
2015-04-08 09:09:26 +08:00
|
|
|
}
|
|
|
|
|
2016-04-29 17:02:30 +08:00
|
|
|
class NamedOperandBit<string Name, AsmOperandClass MatchClass> : Operand<i1> {
|
|
|
|
let PrintMethod = "print"#Name;
|
|
|
|
let ParserMatchClass = MatchClass;
|
2015-04-08 09:09:26 +08:00
|
|
|
}
|
2016-05-28 08:50:51 +08:00
|
|
|
|
2016-04-29 17:02:30 +08:00
|
|
|
class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> {
|
|
|
|
let PrintMethod = "print"#Name;
|
|
|
|
let ParserMatchClass = MatchClass;
|
2015-08-07 03:28:38 +08:00
|
|
|
}
|
|
|
|
|
2016-04-29 17:02:30 +08:00
|
|
|
class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> {
|
|
|
|
let PrintMethod = "print"#Name;
|
|
|
|
let ParserMatchClass = MatchClass;
|
2016-03-09 20:29:31 +08:00
|
|
|
}
|
|
|
|
|
2016-04-29 17:02:30 +08:00
|
|
|
class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
|
|
|
|
let PrintMethod = "print"#Name;
|
|
|
|
let ParserMatchClass = MatchClass;
|
2016-04-25 22:13:51 +08:00
|
|
|
}
|
|
|
|
|
2017-02-28 02:49:11 +08:00
|
|
|
class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> :
|
|
|
|
OperandWithDefaultOps<i32, (ops (i32 0))> {
|
|
|
|
let PrintMethod = "print"#Name;
|
|
|
|
let ParserMatchClass = MatchClass;
|
|
|
|
}
|
|
|
|
|
2014-08-05 22:48:12 +08:00
|
|
|
let OperandType = "OPERAND_IMMEDIATE" in {
|
|
|
|
|
2016-04-29 17:02:30 +08:00
|
|
|
def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
|
|
|
|
def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>;
|
|
|
|
def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>;
|
2015-04-08 09:09:26 +08:00
|
|
|
|
2019-07-08 22:27:37 +08:00
|
|
|
def flat_offset : NamedOperandU16<"FlatOffset", NamedMatchClass<"FlatOffset">>;
|
2016-04-29 17:02:30 +08:00
|
|
|
def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>;
|
|
|
|
def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>;
|
|
|
|
def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>;
|
2015-04-08 09:09:26 +08:00
|
|
|
|
2016-04-29 17:02:30 +08:00
|
|
|
def gds : NamedOperandBit<"GDS", NamedMatchClass<"GDS">>;
|
2015-04-08 09:09:26 +08:00
|
|
|
|
2016-04-29 17:02:30 +08:00
|
|
|
def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>;
|
|
|
|
def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
|
2017-08-07 21:14:12 +08:00
|
|
|
def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;
|
2015-06-13 04:47:06 +08:00
|
|
|
|
2019-05-01 06:08:23 +08:00
|
|
|
def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
|
2016-10-29 05:55:08 +08:00
|
|
|
def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
|
2018-06-04 22:45:20 +08:00
|
|
|
def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
|
|
|
|
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
|
|
|
|
def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
|
|
|
|
def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
|
2018-08-28 23:07:30 +08:00
|
|
|
def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
|
2018-01-29 22:20:42 +08:00
|
|
|
def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
|
2018-06-04 22:45:20 +08:00
|
|
|
def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
|
2016-12-06 04:31:49 +08:00
|
|
|
def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
|
|
|
|
def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
|
2015-06-13 04:47:06 +08:00
|
|
|
|
[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
2018-08-21 19:06:05 +08:00
|
|
|
def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>;
|
2017-06-23 00:29:22 +08:00
|
|
|
|
2018-06-04 22:45:20 +08:00
|
|
|
def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
|
2019-05-02 00:32:58 +08:00
|
|
|
def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>;
|
2015-06-13 04:47:06 +08:00
|
|
|
|
2019-06-13 02:02:41 +08:00
|
|
|
def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>;
|
|
|
|
|
2016-04-29 17:02:30 +08:00
|
|
|
def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
|
|
|
|
def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
|
|
|
|
def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>;
|
|
|
|
def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>;
|
2019-06-13 02:02:41 +08:00
|
|
|
def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>;
|
2014-08-05 22:48:12 +08:00
|
|
|
|
2016-06-03 18:27:37 +08:00
|
|
|
def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>;
|
|
|
|
def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
|
|
|
|
def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>;
|
2016-04-29 17:02:30 +08:00
|
|
|
def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>;
|
2015-06-13 04:47:06 +08:00
|
|
|
|
2017-02-28 02:49:11 +08:00
|
|
|
def op_sel : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>;
|
|
|
|
def op_sel_hi : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
|
|
|
|
def neg_lo : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
|
|
|
|
def neg_hi : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
|
|
|
|
|
2019-07-10 05:43:09 +08:00
|
|
|
def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;
|
|
|
|
def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
|
|
|
|
def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
|
|
|
|
|
2016-05-24 20:38:33 +08:00
|
|
|
def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
|
2016-04-26 21:33:56 +08:00
|
|
|
|
2016-12-06 04:42:41 +08:00
|
|
|
def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
|
|
|
|
|
2016-12-06 04:31:49 +08:00
|
|
|
}
|
|
|
|
|
2014-08-05 22:48:12 +08:00
|
|
|
} // End OperandType = "OPERAND_IMMEDIATE"
|
|
|
|
|
2016-12-10 08:39:12 +08:00
|
|
|
class KImmMatchClass<int size> : AsmOperandClass {
|
|
|
|
let Name = "KImmFP"#size;
|
|
|
|
let PredicateMethod = "isKImmFP"#size;
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
let ParserMethod = "parseImm";
|
2016-12-10 08:39:12 +08:00
|
|
|
let RenderMethod = "addKImmFP"#size#"Operands";
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
}
|
|
|
|
|
2016-12-10 08:39:12 +08:00
|
|
|
class kimmOperand<ValueType vt> : Operand<vt> {
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
let OperandNamespace = "AMDGPU";
|
2016-12-10 08:39:12 +08:00
|
|
|
let OperandType = "OPERAND_KIMM"#vt.Size;
|
|
|
|
let PrintMethod = "printU"#vt.Size#"ImmOperand";
|
|
|
|
let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass");
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
}
|
|
|
|
|
2016-12-10 08:39:12 +08:00
|
|
|
// 32-bit VALU immediate operand that uses the constant bus.
|
|
|
|
def KImmFP32MatchClass : KImmMatchClass<32>;
|
|
|
|
def f32kimm : kimmOperand<i32>;
|
|
|
|
|
|
|
|
// 32-bit VALU immediate operand with a 16-bit value that uses the
|
|
|
|
// constant bus.
|
|
|
|
def KImmFP16MatchClass : KImmMatchClass<16>;
|
|
|
|
def f16kimm : kimmOperand<i16>;
|
|
|
|
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
class FPInputModsMatchClass <int opSize> : AsmOperandClass {
|
|
|
|
let Name = "RegOrImmWithFP"#opSize#"InputMods";
|
2016-06-10 17:57:59 +08:00
|
|
|
let ParserMethod = "parseRegOrImmWithFPInputMods";
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
|
2016-06-10 17:57:59 +08:00
|
|
|
}
|
2017-02-28 02:49:11 +08:00
|
|
|
|
2016-12-10 08:39:12 +08:00
|
|
|
def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
|
|
|
|
def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
|
2016-06-10 17:57:59 +08:00
|
|
|
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
class InputMods <AsmOperandClass matchClass> : Operand <i32> {
|
|
|
|
let OperandNamespace = "AMDGPU";
|
|
|
|
let OperandType = "OPERAND_INPUT_MODS";
|
|
|
|
let ParserMatchClass = matchClass;
|
|
|
|
}
|
|
|
|
|
|
|
|
class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
|
2016-06-10 17:57:59 +08:00
|
|
|
let PrintMethod = "printOperandAndFPInputMods";
|
|
|
|
}
|
2016-12-10 08:39:12 +08:00
|
|
|
|
|
|
|
def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
|
|
|
|
def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
|
2016-06-10 17:57:59 +08:00
|
|
|
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
class IntInputModsMatchClass <int opSize> : AsmOperandClass {
|
|
|
|
let Name = "RegOrImmWithInt"#opSize#"InputMods";
|
2016-06-10 17:57:59 +08:00
|
|
|
let ParserMethod = "parseRegOrImmWithIntInputMods";
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods";
|
2016-06-10 17:57:59 +08:00
|
|
|
}
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
|
|
|
|
def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
|
2016-06-10 17:57:59 +08:00
|
|
|
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
|
2016-06-10 17:57:59 +08:00
|
|
|
let PrintMethod = "printOperandAndIntInputMods";
|
|
|
|
}
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
|
|
|
|
def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
|
2016-06-10 17:57:59 +08:00
|
|
|
|
2017-07-21 21:54:11 +08:00
|
|
|
class OpSelModsMatchClass : AsmOperandClass {
|
|
|
|
let Name = "OpSelMods";
|
|
|
|
let ParserMethod = "parseRegOrImm";
|
|
|
|
let PredicateMethod = "isRegOrImm";
|
|
|
|
}
|
|
|
|
|
|
|
|
def IntOpSelModsMatchClass : OpSelModsMatchClass;
|
|
|
|
def IntOpSelMods : InputMods<IntOpSelModsMatchClass>;
|
|
|
|
|
2018-01-17 22:00:48 +08:00
|
|
|
class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
|
|
|
|
let Name = "SDWAWithFP"#opSize#"InputMods";
|
|
|
|
let ParserMethod = "parseRegOrImmWithFPInputMods";
|
|
|
|
let PredicateMethod = "isSDWAFP"#opSize#"Operand";
|
2017-05-23 18:08:55 +08:00
|
|
|
}
|
|
|
|
|
2018-01-17 22:00:48 +08:00
|
|
|
def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>;
|
|
|
|
def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>;
|
|
|
|
|
|
|
|
class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> :
|
|
|
|
InputMods <matchClass> {
|
2017-05-23 18:08:55 +08:00
|
|
|
let PrintMethod = "printOperandAndFPInputMods";
|
|
|
|
}
|
|
|
|
|
2018-01-17 22:00:48 +08:00
|
|
|
def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>;
|
|
|
|
def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>;
|
|
|
|
|
2017-01-11 19:46:30 +08:00
|
|
|
def FPVRegInputModsMatchClass : AsmOperandClass {
|
|
|
|
let Name = "VRegWithFPInputMods";
|
|
|
|
let ParserMethod = "parseRegWithFPInputMods";
|
2019-02-27 21:58:48 +08:00
|
|
|
let PredicateMethod = "isVReg32";
|
2017-01-11 19:46:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
|
|
|
|
let PrintMethod = "printOperandAndFPInputMods";
|
|
|
|
}
|
|
|
|
|
2018-01-17 22:00:48 +08:00
|
|
|
class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
|
|
|
|
let Name = "SDWAWithInt"#opSize#"InputMods";
|
|
|
|
let ParserMethod = "parseRegOrImmWithIntInputMods";
|
|
|
|
let PredicateMethod = "isSDWAInt"#opSize#"Operand";
|
2017-05-23 18:08:55 +08:00
|
|
|
}
|
|
|
|
|
2018-01-17 22:00:48 +08:00
|
|
|
def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>;
|
|
|
|
def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>;
|
|
|
|
|
|
|
|
class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> :
|
|
|
|
InputMods <matchClass> {
|
2017-05-23 18:08:55 +08:00
|
|
|
let PrintMethod = "printOperandAndIntInputMods";
|
|
|
|
}
|
|
|
|
|
2018-01-17 22:00:48 +08:00
|
|
|
def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>;
|
|
|
|
def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>;
|
|
|
|
|
2017-01-11 19:46:30 +08:00
|
|
|
def IntVRegInputModsMatchClass : AsmOperandClass {
|
|
|
|
let Name = "VRegWithIntInputMods";
|
|
|
|
let ParserMethod = "parseRegWithIntInputMods";
|
2019-02-27 21:58:48 +08:00
|
|
|
let PredicateMethod = "isVReg32";
|
2017-01-11 19:46:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
|
|
|
|
let PrintMethod = "printOperandAndIntInputMods";
|
|
|
|
}
|
|
|
|
|
2017-02-28 02:49:11 +08:00
|
|
|
class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
|
|
|
|
let Name = "PackedFP"#opSize#"InputMods";
|
|
|
|
let ParserMethod = "parseRegOrImm";
|
|
|
|
let PredicateMethod = "isRegOrImm";
|
|
|
|
// let PredicateMethod = "isPackedFP"#opSize#"InputMods";
|
|
|
|
}
|
|
|
|
|
|
|
|
class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
|
|
|
|
let Name = "PackedInt"#opSize#"InputMods";
|
|
|
|
let ParserMethod = "parseRegOrImm";
|
|
|
|
let PredicateMethod = "isRegOrImm";
|
|
|
|
// let PredicateMethod = "isPackedInt"#opSize#"InputMods";
|
|
|
|
}
|
|
|
|
|
|
|
|
def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
|
|
|
|
def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
|
|
|
|
|
|
|
|
class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
|
|
|
|
// let PrintMethod = "printPackedFPInputMods";
|
|
|
|
}
|
|
|
|
|
|
|
|
class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
|
|
|
|
//let PrintMethod = "printPackedIntInputMods";
|
|
|
|
}
|
|
|
|
|
|
|
|
def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
|
|
|
|
def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
|
2017-01-11 19:46:30 +08:00
|
|
|
|
2014-06-25 07:33:07 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Complex patterns
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2014-08-23 02:49:33 +08:00
|
|
|
def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
|
2014-08-23 02:49:35 +08:00
|
|
|
def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
|
2014-08-23 02:49:33 +08:00
|
|
|
|
2016-07-12 16:12:16 +08:00
|
|
|
def MOVRELOffset : ComplexPattern<i32, 2, "SelectMOVRELOffset">;
|
2016-07-09 09:13:56 +08:00
|
|
|
|
2014-08-01 08:32:39 +08:00
|
|
|
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
|
2014-11-14 03:49:04 +08:00
|
|
|
def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
|
2015-01-07 07:00:37 +08:00
|
|
|
def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
|
2014-08-01 08:32:39 +08:00
|
|
|
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
|
2017-04-26 05:17:38 +08:00
|
|
|
def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
|
2017-01-31 11:07:46 +08:00
|
|
|
// VOP3Mods, but the input source is known to never be NaN.
|
|
|
|
def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
|
[AMDGPU] Restrict v_cndmask_b32 abs/neg modifiers to f32
Summary:
D64497 allowed abs/neg source modifiers on v_cndmask_b32 but it doesn't
make any sense to apply them to f16 operands; they would interpret the
bits of the value as an f32, giving nonsensical results. This patch
restricts them to f32 operands.
Reviewers: arsenm, hakzsam
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64636
llvm-svn: 365904
2019-07-12 23:02:59 +08:00
|
|
|
// VOP3Mods, but only allowed for f32 operands.
|
|
|
|
def VOP3Mods_f32 : ComplexPattern<fAny, 2, "SelectVOP3Mods_f32">;
|
2017-01-31 11:07:46 +08:00
|
|
|
|
2017-03-27 23:57:17 +08:00
|
|
|
def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
|
|
|
|
|
2017-02-28 02:49:11 +08:00
|
|
|
def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
|
|
|
|
def VOP3PMods0 : ComplexPattern<untyped, 3, "SelectVOP3PMods0">;
|
|
|
|
|
2017-07-21 21:54:11 +08:00
|
|
|
def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
|
|
|
|
def VOP3OpSel0 : ComplexPattern<untyped, 3, "SelectVOP3OpSel0">;
|
|
|
|
|
|
|
|
def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
|
|
|
|
def VOP3OpSelMods0 : ComplexPattern<untyped, 3, "SelectVOP3OpSelMods0">;
|
2017-02-28 02:49:11 +08:00
|
|
|
|
2017-09-08 02:05:07 +08:00
|
|
|
def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
|
|
|
|
|
2017-11-13 08:22:09 +08:00
|
|
|
|
|
|
|
def Hi16Elt : ComplexPattern<untyped, 1, "SelectHi16Elt">;
|
|
|
|
|
2013-02-21 23:16:44 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// SI assembler operands
|
|
|
|
//===----------------------------------------------------------------------===//
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2013-02-21 23:16:49 +08:00
|
|
|
def SIOperand {
|
|
|
|
int ZERO = 0x80;
|
2013-02-27 01:52:09 +08:00
|
|
|
int VCC = 0x6A;
|
2014-09-15 23:41:53 +08:00
|
|
|
int FLAT_SCR = 0x68;
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
|
2017-02-28 06:15:25 +08:00
|
|
|
// This should be kept in sync with SISrcMods enum
|
2014-08-01 08:32:39 +08:00
|
|
|
def SRCMODS {
|
|
|
|
int NONE = 0;
|
2015-03-24 21:40:15 +08:00
|
|
|
int NEG = 1;
|
2017-02-02 10:27:04 +08:00
|
|
|
int ABS = 2;
|
|
|
|
int NEG_ABS = 3;
|
2017-02-28 06:15:25 +08:00
|
|
|
|
|
|
|
int NEG_HI = ABS;
|
|
|
|
int OP_SEL_0 = 4;
|
|
|
|
int OP_SEL_1 = 8;
|
2017-07-21 21:54:11 +08:00
|
|
|
int DST_OP_SEL = 8;
|
2014-08-01 08:32:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
def DSTCLAMP {
|
|
|
|
int NONE = 0;
|
2017-02-22 07:35:48 +08:00
|
|
|
int ENABLE = 1;
|
2014-08-01 08:32:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
def DSTOMOD {
|
|
|
|
int NONE = 0;
|
|
|
|
}
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2017-02-23 07:22:19 +08:00
|
|
|
def TRAPID{
|
|
|
|
int LLVM_TRAP = 2;
|
|
|
|
int LLVM_DEBUG_TRAP = 3;
|
2017-02-10 10:15:29 +08:00
|
|
|
}
|
|
|
|
|
2019-07-09 11:20:33 +08:00
|
|
|
def HWREG {
|
|
|
|
int MODE = 1;
|
|
|
|
int STATUS = 2;
|
|
|
|
int TRAPSTS = 3;
|
|
|
|
int HW_ID = 4;
|
|
|
|
int GPR_ALLOC = 5;
|
|
|
|
int LDS_ALLOC = 6;
|
|
|
|
int IB_STS = 7;
|
|
|
|
int MEM_BASES = 15;
|
|
|
|
int TBA_LO = 16;
|
|
|
|
int TBA_HI = 17;
|
|
|
|
int TMA_LO = 18;
|
|
|
|
int TMA_HI = 19;
|
|
|
|
int FLAT_SCR_LO = 20;
|
|
|
|
int FLAT_SCR_HI = 21;
|
|
|
|
int XNACK_MASK = 22;
|
|
|
|
int POPS_PACKER = 25;
|
|
|
|
}
|
|
|
|
|
|
|
|
class getHwRegImm<int Reg, int Offset = 0, int Size = 32> {
|
|
|
|
int ret = !or(Reg,
|
|
|
|
!or(!shl(Offset, 6),
|
|
|
|
!shl(!add(Size, -1), 11)));
|
|
|
|
}
|
|
|
|
|
2013-02-21 23:16:44 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// SI Instruction multiclass helpers.
|
|
|
|
//
|
|
|
|
// Instructions with _32 take 32-bit operands.
|
|
|
|
// Instructions with _64 take 64-bit operands.
|
|
|
|
//
|
|
|
|
// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
|
|
|
|
// encoding is the standard encoding, but instruction that make use of
|
|
|
|
// any of the instruction modifiers must use the 64-bit encoding.
|
|
|
|
//
|
|
|
|
// Instructions with _e32 use the 32-bit encoding.
|
|
|
|
// Instructions with _e64 use the 64-bit encoding.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2014-10-01 22:44:42 +08:00
|
|
|
class SIMCInstr <string pseudo, int subtarget> {
|
|
|
|
string PseudoInstr = pseudo;
|
|
|
|
int Subtarget = subtarget;
|
|
|
|
}
|
|
|
|
|
2014-10-01 22:44:45 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// EXP classes
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-12-06 04:23:10 +08:00
|
|
|
class EXP_Helper<bit done, SDPatternOperator node = null_frag> : EXPCommon<
|
2014-10-01 22:44:45 +08:00
|
|
|
(outs),
|
2016-12-06 04:31:49 +08:00
|
|
|
(ins exp_tgt:$tgt,
|
|
|
|
ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
|
|
|
|
exp_vm:$vm, exp_compr:$compr, i8imm:$en),
|
|
|
|
"exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm",
|
2017-01-17 15:26:53 +08:00
|
|
|
[(node (i8 timm:$tgt), (i8 timm:$en),
|
|
|
|
f32:$src0, f32:$src1, f32:$src2, f32:$src3,
|
|
|
|
(i1 timm:$compr), (i1 timm:$vm))]> {
|
2016-12-06 04:42:41 +08:00
|
|
|
let AsmMatchConverter = "cvtExp";
|
|
|
|
}
|
2014-12-07 20:18:57 +08:00
|
|
|
|
2016-12-06 04:23:10 +08:00
|
|
|
// Split EXP instruction into EXP and EXP_DONE so we can set
|
|
|
|
// mayLoad for done=1.
|
|
|
|
multiclass EXP_m<bit done, SDPatternOperator node> {
|
2017-09-11 21:55:39 +08:00
|
|
|
let mayLoad = done, DisableWQM = 1 in {
|
2016-12-06 04:23:10 +08:00
|
|
|
let isPseudo = 1, isCodeGenOnly = 1 in {
|
|
|
|
def "" : EXP_Helper<done, node>,
|
|
|
|
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>;
|
|
|
|
}
|
|
|
|
|
|
|
|
let done = done in {
|
|
|
|
def _si : EXP_Helper<done>,
|
|
|
|
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>,
|
|
|
|
EXPe {
|
2019-04-06 02:24:34 +08:00
|
|
|
let AssemblerPredicates = [isGFX6GFX7];
|
|
|
|
let DecoderNamespace = "GFX6GFX7";
|
2016-12-06 04:23:10 +08:00
|
|
|
let DisableDecoder = DisableSIDecoder;
|
|
|
|
}
|
|
|
|
|
|
|
|
def _vi : EXP_Helper<done>,
|
|
|
|
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>,
|
|
|
|
EXPe_vi {
|
2019-04-06 17:20:48 +08:00
|
|
|
let AssemblerPredicates = [isGFX8GFX9];
|
|
|
|
let DecoderNamespace = "GFX8";
|
2016-12-06 04:23:10 +08:00
|
|
|
let DisableDecoder = DisableVIDecoder;
|
|
|
|
}
|
2019-05-09 05:23:37 +08:00
|
|
|
|
|
|
|
def _gfx10 : EXP_Helper<done>,
|
|
|
|
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.GFX10>,
|
|
|
|
EXPe {
|
|
|
|
let AssemblerPredicates = [isGFX10Plus];
|
|
|
|
let DecoderNamespace = "GFX10";
|
|
|
|
let DisableDecoder = DisableSIDecoder;
|
|
|
|
}
|
2016-12-06 04:23:10 +08:00
|
|
|
}
|
2016-02-18 11:42:32 +08:00
|
|
|
}
|
2014-10-01 22:44:45 +08:00
|
|
|
}
|
|
|
|
|
2013-02-21 23:16:44 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Vector ALU classes
|
|
|
|
//===----------------------------------------------------------------------===//
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2015-10-06 23:57:53 +08:00
|
|
|
class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
|
2014-08-01 08:32:39 +08:00
|
|
|
int ret =
|
2015-10-06 23:57:53 +08:00
|
|
|
!if (!eq(Src0.Value, untyped.Value), 0,
|
|
|
|
!if (!eq(Src1.Value, untyped.Value), 1, // VOP1
|
2014-08-01 08:32:39 +08:00
|
|
|
!if (!eq(Src2.Value, untyped.Value), 2, // VOP2
|
2015-10-06 23:57:53 +08:00
|
|
|
3))); // VOP3
|
2014-08-01 08:32:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the register class to use for the destination of VOP[123C]
|
|
|
|
// instructions for the given VT.
|
|
|
|
class getVALUDstForVT<ValueType VT> {
|
2015-03-13 05:34:22 +08:00
|
|
|
RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
|
2016-09-10 03:31:51 +08:00
|
|
|
!if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
|
|
|
|
!if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
|
|
|
|
!if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
|
2019-06-20 23:08:34 +08:00
|
|
|
VOPDstS64orS32)))); // else VT == i1
|
2014-08-01 08:32:39 +08:00
|
|
|
}
|
|
|
|
|
2019-06-22 00:30:14 +08:00
|
|
|
// Returns true if VT is floating point.
|
|
|
|
class getIsFP<ValueType VT> {
|
|
|
|
bit ret = !if(!eq(VT.Value, f16.Value), 1,
|
|
|
|
!if(!eq(VT.Value, v2f16.Value), 1,
|
2019-07-10 05:43:09 +08:00
|
|
|
!if(!eq(VT.Value, v4f16.Value), 1,
|
2019-06-22 00:30:14 +08:00
|
|
|
!if(!eq(VT.Value, f32.Value), 1,
|
|
|
|
!if(!eq(VT.Value, v2f32.Value), 1,
|
|
|
|
!if(!eq(VT.Value, f64.Value), 1,
|
|
|
|
!if(!eq(VT.Value, v2f64.Value), 1,
|
2019-07-10 05:43:09 +08:00
|
|
|
0)))))));
|
2019-06-22 00:30:14 +08:00
|
|
|
}
|
|
|
|
|
2017-05-23 18:08:55 +08:00
|
|
|
// Returns the register class to use for the destination of VOP[12C]
|
2017-06-21 16:53:38 +08:00
|
|
|
// instructions with SDWA extension
|
|
|
|
class getSDWADstForVT<ValueType VT> {
|
2017-05-23 18:08:55 +08:00
|
|
|
RegisterOperand ret = !if(!eq(VT.Size, 1),
|
2017-06-21 16:53:38 +08:00
|
|
|
SDWAVopcDst, // VOPC
|
2017-05-23 18:08:55 +08:00
|
|
|
VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst
|
|
|
|
}
|
|
|
|
|
2014-08-01 08:32:39 +08:00
|
|
|
// Returns the register class to use for source 0 of VOP[12C]
|
|
|
|
// instructions for the given VT.
|
|
|
|
class getVOPSrc0ForVT<ValueType VT> {
|
2019-06-22 00:30:14 +08:00
|
|
|
bit isFP = getIsFP<VT>.ret;
|
2017-02-28 02:49:11 +08:00
|
|
|
|
|
|
|
RegisterOperand ret =
|
|
|
|
!if(isFP,
|
|
|
|
!if(!eq(VT.Size, 64),
|
|
|
|
VSrc_f64,
|
|
|
|
!if(!eq(VT.Value, f16.Value),
|
|
|
|
VSrc_f16,
|
|
|
|
!if(!eq(VT.Value, v2f16.Value),
|
2019-05-02 12:01:39 +08:00
|
|
|
VSrc_v2f16,
|
2019-07-10 05:43:09 +08:00
|
|
|
!if(!eq(VT.Value, v4f16.Value),
|
|
|
|
AVSrc_64,
|
|
|
|
VSrc_f32
|
|
|
|
)
|
2017-02-28 02:49:11 +08:00
|
|
|
)
|
|
|
|
)
|
|
|
|
),
|
|
|
|
!if(!eq(VT.Size, 64),
|
|
|
|
VSrc_b64,
|
|
|
|
!if(!eq(VT.Value, i16.Value),
|
|
|
|
VSrc_b16,
|
|
|
|
!if(!eq(VT.Value, v2i16.Value),
|
2019-05-02 12:01:39 +08:00
|
|
|
VSrc_v2b16,
|
2017-02-28 02:49:11 +08:00
|
|
|
VSrc_b32
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
);
|
2014-08-01 08:32:39 +08:00
|
|
|
}
|
|
|
|
|
2016-04-26 21:33:56 +08:00
|
|
|
// Returns the vreg register class to use for source operand given VT
|
|
|
|
class getVregSrcForVT<ValueType VT> {
|
2016-09-10 03:31:51 +08:00
|
|
|
RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128,
|
|
|
|
!if(!eq(VT.Size, 64), VReg_64, VGPR_32));
|
2014-08-01 08:32:39 +08:00
|
|
|
}
|
|
|
|
|
2017-06-21 16:53:38 +08:00
|
|
|
class getSDWASrcForVT <ValueType VT> {
|
2019-06-22 00:30:14 +08:00
|
|
|
bit isFP = getIsFP<VT>.ret;
|
2018-01-17 22:00:48 +08:00
|
|
|
RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
|
|
|
|
RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
|
|
|
|
RegisterOperand ret = !if(isFP, retFlt, retInt);
|
2017-05-26 23:52:00 +08:00
|
|
|
}
|
|
|
|
|
2014-08-01 08:32:39 +08:00
|
|
|
// Returns the register class to use for sources of VOP3 instructions for the
|
|
|
|
// given VT.
|
|
|
|
class getVOP3SrcForVT<ValueType VT> {
|
2019-06-22 00:30:14 +08:00
|
|
|
bit isFP = getIsFP<VT>.ret;
|
2015-09-09 05:15:00 +08:00
|
|
|
RegisterOperand ret =
|
2016-09-10 03:31:51 +08:00
|
|
|
!if(!eq(VT.Size, 128),
|
2017-02-28 02:49:11 +08:00
|
|
|
VSrc_128,
|
|
|
|
!if(!eq(VT.Size, 64),
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
!if(isFP,
|
2019-05-02 12:01:39 +08:00
|
|
|
VSrc_f64,
|
|
|
|
VSrc_b64),
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
!if(!eq(VT.Value, i1.Value),
|
2019-06-20 23:08:34 +08:00
|
|
|
SSrc_i1,
|
2017-02-28 02:49:11 +08:00
|
|
|
!if(isFP,
|
|
|
|
!if(!eq(VT.Value, f16.Value),
|
2019-05-02 12:01:39 +08:00
|
|
|
VSrc_f16,
|
2017-02-28 02:49:11 +08:00
|
|
|
!if(!eq(VT.Value, v2f16.Value),
|
2019-05-02 12:01:39 +08:00
|
|
|
VSrc_v2f16,
|
2019-07-10 05:43:09 +08:00
|
|
|
!if(!eq(VT.Value, v4f16.Value),
|
|
|
|
AVSrc_64,
|
|
|
|
VSrc_f32
|
|
|
|
)
|
2017-02-28 02:49:11 +08:00
|
|
|
)
|
|
|
|
),
|
|
|
|
!if(!eq(VT.Value, i16.Value),
|
2019-05-02 12:01:39 +08:00
|
|
|
VSrc_b16,
|
2017-02-28 02:49:11 +08:00
|
|
|
!if(!eq(VT.Value, v2i16.Value),
|
2019-05-02 12:01:39 +08:00
|
|
|
VSrc_v2b16,
|
|
|
|
VSrc_b32
|
2017-02-28 02:49:11 +08:00
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
);
|
2014-08-01 08:32:39 +08:00
|
|
|
}
|
|
|
|
|
2017-02-28 02:49:11 +08:00
|
|
|
// Float or packed int
|
|
|
|
class isModifierType<ValueType SrcVT> {
|
|
|
|
bit ret =
|
|
|
|
!if(!eq(SrcVT.Value, f16.Value), 1,
|
|
|
|
!if(!eq(SrcVT.Value, f32.Value), 1,
|
|
|
|
!if(!eq(SrcVT.Value, f64.Value), 1,
|
|
|
|
!if(!eq(SrcVT.Value, v2f16.Value), 1,
|
|
|
|
!if(!eq(SrcVT.Value, v2i16.Value), 1,
|
|
|
|
0)))));
|
|
|
|
}
|
2016-09-19 22:39:49 +08:00
|
|
|
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
// Return type of input modifiers operand for specified input operand
|
2019-03-19 03:25:39 +08:00
|
|
|
class getSrcMod <ValueType VT, bit EnableF32SrcMods> {
|
2019-06-22 00:30:14 +08:00
|
|
|
bit isFP = getIsFP<VT>.ret;
|
2017-02-28 02:49:11 +08:00
|
|
|
bit isPacked = isPackedType<VT>.ret;
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
Operand ret = !if(!eq(VT.Size, 64),
|
|
|
|
!if(isFP, FP64InputMods, Int64InputMods),
|
2016-12-10 08:39:12 +08:00
|
|
|
!if(isFP,
|
|
|
|
!if(!eq(VT.Value, f16.Value),
|
|
|
|
FP16InputMods,
|
|
|
|
FP32InputMods
|
|
|
|
),
|
2019-03-19 03:25:39 +08:00
|
|
|
!if(EnableF32SrcMods, FP32InputMods, Int32InputMods))
|
2016-12-10 08:39:12 +08:00
|
|
|
);
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
}
|
|
|
|
|
2017-07-21 21:54:11 +08:00
|
|
|
class getOpSelMod <ValueType VT> {
|
|
|
|
Operand ret = !if(!eq(VT.Value, f16.Value), FP16InputMods, IntOpSelMods);
|
|
|
|
}
|
|
|
|
|
2017-06-21 16:53:38 +08:00
|
|
|
// Return type of input modifiers operand specified input operand for DPP
|
2017-01-11 19:46:30 +08:00
|
|
|
class getSrcModExt <ValueType VT> {
|
2019-06-22 00:30:14 +08:00
|
|
|
bit isFP = getIsFP<VT>.ret;
|
2017-01-11 19:46:30 +08:00
|
|
|
Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
|
|
|
|
}
|
|
|
|
|
2017-06-21 16:53:38 +08:00
|
|
|
// Return type of input modifiers operand specified input operand for SDWA
|
|
|
|
class getSrcModSDWA <ValueType VT> {
|
2018-01-17 22:00:48 +08:00
|
|
|
Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods,
|
|
|
|
!if(!eq(VT.Value, f32.Value), FP32SDWAInputMods,
|
|
|
|
!if(!eq(VT.Value, i16.Value), Int16SDWAInputMods,
|
|
|
|
Int32SDWAInputMods)));
|
2017-05-23 18:08:55 +08:00
|
|
|
}
|
|
|
|
|
2014-08-01 08:32:39 +08:00
|
|
|
// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
|
2015-01-13 03:33:18 +08:00
|
|
|
class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
|
2014-08-01 08:32:39 +08:00
|
|
|
dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1
|
|
|
|
!if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
|
|
|
|
(ins)));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the input arguments for VOP3 instructions for the given SrcVT.
|
2015-01-13 03:33:18 +08:00
|
|
|
class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
|
|
|
|
RegisterOperand Src2RC, int NumSrcArgs,
|
2019-03-19 03:25:39 +08:00
|
|
|
bit HasIntClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
|
2017-02-28 02:49:11 +08:00
|
|
|
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
|
2014-08-01 08:32:39 +08:00
|
|
|
|
|
|
|
dag ret =
|
2016-02-18 11:42:32 +08:00
|
|
|
!if (!eq(NumSrcArgs, 0),
|
|
|
|
// VOP1 without input operands (V_NOP, V_CLREXCP)
|
|
|
|
(ins),
|
|
|
|
/* else */
|
2014-08-01 08:32:39 +08:00
|
|
|
!if (!eq(NumSrcArgs, 1),
|
|
|
|
!if (!eq(HasModifiers, 1),
|
|
|
|
// VOP1 with modifiers
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
2016-04-29 17:02:30 +08:00
|
|
|
clampmod:$clamp, omod:$omod)
|
2014-08-01 08:32:39 +08:00
|
|
|
/* else */,
|
|
|
|
// VOP1 without modifiers
|
2017-08-16 21:51:56 +08:00
|
|
|
!if (!eq(HasIntClamp, 1),
|
|
|
|
(ins Src0RC:$src0, clampmod:$clamp),
|
|
|
|
(ins Src0RC:$src0))
|
2014-08-01 08:32:39 +08:00
|
|
|
/* endif */ ),
|
|
|
|
!if (!eq(NumSrcArgs, 2),
|
|
|
|
!if (!eq(HasModifiers, 1),
|
|
|
|
// VOP 2 with modifiers
|
2017-02-28 02:49:11 +08:00
|
|
|
!if( !eq(HasOMod, 1),
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
clampmod:$clamp, omod:$omod),
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
clampmod:$clamp))
|
2014-08-01 08:32:39 +08:00
|
|
|
/* else */,
|
|
|
|
// VOP2 without modifiers
|
2017-08-16 21:51:56 +08:00
|
|
|
!if (!eq(HasIntClamp, 1),
|
|
|
|
(ins Src0RC:$src0, Src1RC:$src1, clampmod:$clamp),
|
|
|
|
(ins Src0RC:$src0, Src1RC:$src1))
|
|
|
|
|
2014-08-01 08:32:39 +08:00
|
|
|
/* endif */ )
|
|
|
|
/* NumSrcArgs == 3 */,
|
|
|
|
!if (!eq(HasModifiers, 1),
|
2019-03-19 03:25:39 +08:00
|
|
|
!if (!eq(HasSrc2Mods, 1),
|
|
|
|
// VOP3 with modifiers
|
|
|
|
!if (!eq(HasOMod, 1),
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
Src2Mod:$src2_modifiers, Src2RC:$src2,
|
|
|
|
clampmod:$clamp, omod:$omod),
|
|
|
|
!if (!eq(HasIntClamp, 1),
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
Src2Mod:$src2_modifiers, Src2RC:$src2,
|
|
|
|
clampmod:$clamp),
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
Src2Mod:$src2_modifiers, Src2RC:$src2))),
|
|
|
|
// VOP3 with modifiers except src2
|
|
|
|
!if (!eq(HasOMod, 1),
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
Src2RC:$src2, clampmod:$clamp, omod:$omod),
|
|
|
|
!if (!eq(HasIntClamp, 1),
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
Src2RC:$src2, clampmod:$clamp),
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
Src2RC:$src2))))
|
2014-08-01 08:32:39 +08:00
|
|
|
/* else */,
|
|
|
|
// VOP3 without modifiers
|
2017-08-16 21:51:56 +08:00
|
|
|
!if (!eq(HasIntClamp, 1),
|
|
|
|
(ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod:$clamp),
|
|
|
|
(ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2))
|
2016-02-18 11:42:32 +08:00
|
|
|
/* endif */ ))));
|
2014-08-01 08:32:39 +08:00
|
|
|
}
|
|
|
|
|
2017-02-28 02:49:11 +08:00
|
|
|
/// XXX - src1 may only allow VGPRs?
|
|
|
|
|
|
|
|
// The modifiers (except clamp) are dummy operands for the benefit of
|
|
|
|
// printing and parsing. They defer their values to looking at the
|
|
|
|
// srcN_modifiers for what to print.
|
|
|
|
class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
|
|
|
|
RegisterOperand Src2RC, int NumSrcArgs,
|
|
|
|
bit HasClamp,
|
|
|
|
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
|
|
|
|
dag ret = !if (!eq(NumSrcArgs, 2),
|
|
|
|
!if (HasClamp,
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
clampmod:$clamp,
|
|
|
|
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
|
|
|
|
neg_lo:$neg_lo, neg_hi:$neg_hi),
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
|
|
|
|
neg_lo:$neg_lo, neg_hi:$neg_hi)),
|
|
|
|
// else NumSrcArgs == 3
|
|
|
|
!if (HasClamp,
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
Src2Mod:$src2_modifiers, Src2RC:$src2,
|
|
|
|
clampmod:$clamp,
|
|
|
|
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
|
|
|
|
neg_lo:$neg_lo, neg_hi:$neg_hi),
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
Src2Mod:$src2_modifiers, Src2RC:$src2,
|
|
|
|
op_sel:$op_sel, op_sel_hi:$op_sel_hi,
|
|
|
|
neg_lo:$neg_lo, neg_hi:$neg_hi))
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2017-07-21 21:54:11 +08:00
|
|
|
class getInsVOP3OpSel <RegisterOperand Src0RC,
|
|
|
|
RegisterOperand Src1RC,
|
|
|
|
RegisterOperand Src2RC,
|
|
|
|
int NumSrcArgs,
|
|
|
|
bit HasClamp,
|
|
|
|
Operand Src0Mod,
|
|
|
|
Operand Src1Mod,
|
|
|
|
Operand Src2Mod> {
|
|
|
|
dag ret = !if (!eq(NumSrcArgs, 2),
|
|
|
|
!if (HasClamp,
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
clampmod:$clamp,
|
|
|
|
op_sel:$op_sel),
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
op_sel:$op_sel)),
|
|
|
|
// else NumSrcArgs == 3
|
|
|
|
!if (HasClamp,
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
Src2Mod:$src2_modifiers, Src2RC:$src2,
|
|
|
|
clampmod:$clamp,
|
|
|
|
op_sel:$op_sel),
|
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
Src2Mod:$src2_modifiers, Src2RC:$src2,
|
|
|
|
op_sel:$op_sel))
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
[AMDGPU] Add pseudo "old" source to all DPP instructions
Summary:
All instructions with the DPP modifier may not write to certain lanes of
the output if bound_ctrl=1 is set or any bits in bank_mask or row_mask
aren't set, so the destination register may be both defined and modified.
The right way to handle this is to add a constraint that the destination
register is the same as one of the inputs. We could tie the destination
to the first source, but that would be too restrictive for some use-cases
where we want the destination to be some other value before the
instruction executes. Instead, add a fake "old" source and tie it to the
destination. Effectively, the "old" source defines what value unwritten
lanes will get. We'll expose this functionality to users with a new
intrinsic later.
Also, we want to use DPP instructions for computing derivatives, which
means we need to set WQM for them. We also need to enable the entire
wavefront when using DPP intrinsics to implement nonuniform subgroup
reductions, since otherwise we'll get incorrect results in some cases.
To accomodate this, add a new operand to all DPP instructions which will
be interpreted by the SI WQM pass. This will be exposed with a new
intrinsic later. We'll also add support for Whole Wavefront Mode later.
I also fixed llvm.amdgcn.mov.dpp to overwrite the source and fixed up
the test. However, I could also keep the old behavior (where lanes that
aren't written are undefined) if people want it.
Reviewers: tstellar, arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye
Differential Revision: https://reviews.llvm.org/D34716
llvm-svn: 310283
2017-08-08 03:10:56 +08:00
|
|
|
class getInsDPP <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
|
|
|
|
int NumSrcArgs, bit HasModifiers,
|
|
|
|
Operand Src0Mod, Operand Src1Mod> {
|
2016-02-13 08:51:31 +08:00
|
|
|
|
2016-04-06 21:29:59 +08:00
|
|
|
dag ret = !if (!eq(NumSrcArgs, 0),
|
|
|
|
// VOP1 without input operands (V_NOP)
|
|
|
|
(ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
|
|
|
|
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl),
|
|
|
|
!if (!eq(NumSrcArgs, 1),
|
2016-02-13 08:51:31 +08:00
|
|
|
!if (!eq(HasModifiers, 1),
|
|
|
|
// VOP1_DPP with modifiers
|
[AMDGPU] Add pseudo "old" source to all DPP instructions
Summary:
All instructions with the DPP modifier may not write to certain lanes of
the output if bound_ctrl=1 is set or any bits in bank_mask or row_mask
aren't set, so the destination register may be both defined and modified.
The right way to handle this is to add a constraint that the destination
register is the same as one of the inputs. We could tie the destination
to the first source, but that would be too restrictive for some use-cases
where we want the destination to be some other value before the
instruction executes. Instead, add a fake "old" source and tie it to the
destination. Effectively, the "old" source defines what value unwritten
lanes will get. We'll expose this functionality to users with a new
intrinsic later.
Also, we want to use DPP instructions for computing derivatives, which
means we need to set WQM for them. We also need to enable the entire
wavefront when using DPP intrinsics to implement nonuniform subgroup
reductions, since otherwise we'll get incorrect results in some cases.
To accomodate this, add a new operand to all DPP instructions which will
be interpreted by the SI WQM pass. This will be exposed with a new
intrinsic later. We'll also add support for Whole Wavefront Mode later.
I also fixed llvm.amdgcn.mov.dpp to overwrite the source and fixed up
the test. However, I could also keep the old behavior (where lanes that
aren't written are undefined) if people want it.
Reviewers: tstellar, arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye
Differential Revision: https://reviews.llvm.org/D34716
llvm-svn: 310283
2017-08-08 03:10:56 +08:00
|
|
|
(ins DstRC:$old, Src0Mod:$src0_modifiers,
|
|
|
|
Src0RC:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
|
2016-03-09 20:29:31 +08:00
|
|
|
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
|
2016-02-13 08:51:31 +08:00
|
|
|
/* else */,
|
|
|
|
// VOP1_DPP without modifiers
|
[AMDGPU] Add pseudo "old" source to all DPP instructions
Summary:
All instructions with the DPP modifier may not write to certain lanes of
the output if bound_ctrl=1 is set or any bits in bank_mask or row_mask
aren't set, so the destination register may be both defined and modified.
The right way to handle this is to add a constraint that the destination
register is the same as one of the inputs. We could tie the destination
to the first source, but that would be too restrictive for some use-cases
where we want the destination to be some other value before the
instruction executes. Instead, add a fake "old" source and tie it to the
destination. Effectively, the "old" source defines what value unwritten
lanes will get. We'll expose this functionality to users with a new
intrinsic later.
Also, we want to use DPP instructions for computing derivatives, which
means we need to set WQM for them. We also need to enable the entire
wavefront when using DPP intrinsics to implement nonuniform subgroup
reductions, since otherwise we'll get incorrect results in some cases.
To accomodate this, add a new operand to all DPP instructions which will
be interpreted by the SI WQM pass. This will be exposed with a new
intrinsic later. We'll also add support for Whole Wavefront Mode later.
I also fixed llvm.amdgcn.mov.dpp to overwrite the source and fixed up
the test. However, I could also keep the old behavior (where lanes that
aren't written are undefined) if people want it.
Reviewers: tstellar, arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye
Differential Revision: https://reviews.llvm.org/D34716
llvm-svn: 310283
2017-08-08 03:10:56 +08:00
|
|
|
(ins DstRC:$old, Src0RC:$src0,
|
|
|
|
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
|
|
|
|
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
|
2016-02-13 08:51:31 +08:00
|
|
|
/* endif */)
|
2016-03-09 20:29:31 +08:00
|
|
|
/* NumSrcArgs == 2 */,
|
2016-02-13 08:51:31 +08:00
|
|
|
!if (!eq(HasModifiers, 1),
|
|
|
|
// VOP2_DPP with modifiers
|
[AMDGPU] Add pseudo "old" source to all DPP instructions
Summary:
All instructions with the DPP modifier may not write to certain lanes of
the output if bound_ctrl=1 is set or any bits in bank_mask or row_mask
aren't set, so the destination register may be both defined and modified.
The right way to handle this is to add a constraint that the destination
register is the same as one of the inputs. We could tie the destination
to the first source, but that would be too restrictive for some use-cases
where we want the destination to be some other value before the
instruction executes. Instead, add a fake "old" source and tie it to the
destination. Effectively, the "old" source defines what value unwritten
lanes will get. We'll expose this functionality to users with a new
intrinsic later.
Also, we want to use DPP instructions for computing derivatives, which
means we need to set WQM for them. We also need to enable the entire
wavefront when using DPP intrinsics to implement nonuniform subgroup
reductions, since otherwise we'll get incorrect results in some cases.
To accomodate this, add a new operand to all DPP instructions which will
be interpreted by the SI WQM pass. This will be exposed with a new
intrinsic later. We'll also add support for Whole Wavefront Mode later.
I also fixed llvm.amdgcn.mov.dpp to overwrite the source and fixed up
the test. However, I could also keep the old behavior (where lanes that
aren't written are undefined) if people want it.
Reviewers: tstellar, arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye
Differential Revision: https://reviews.llvm.org/D34716
llvm-svn: 310283
2017-08-08 03:10:56 +08:00
|
|
|
(ins DstRC:$old,
|
|
|
|
Src0Mod:$src0_modifiers, Src0RC:$src0,
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
2016-06-10 17:57:59 +08:00
|
|
|
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
|
|
|
|
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
|
2016-02-13 08:51:31 +08:00
|
|
|
/* else */,
|
|
|
|
// VOP2_DPP without modifiers
|
[AMDGPU] Add pseudo "old" source to all DPP instructions
Summary:
All instructions with the DPP modifier may not write to certain lanes of
the output if bound_ctrl=1 is set or any bits in bank_mask or row_mask
aren't set, so the destination register may be both defined and modified.
The right way to handle this is to add a constraint that the destination
register is the same as one of the inputs. We could tie the destination
to the first source, but that would be too restrictive for some use-cases
where we want the destination to be some other value before the
instruction executes. Instead, add a fake "old" source and tie it to the
destination. Effectively, the "old" source defines what value unwritten
lanes will get. We'll expose this functionality to users with a new
intrinsic later.
Also, we want to use DPP instructions for computing derivatives, which
means we need to set WQM for them. We also need to enable the entire
wavefront when using DPP intrinsics to implement nonuniform subgroup
reductions, since otherwise we'll get incorrect results in some cases.
To accomodate this, add a new operand to all DPP instructions which will
be interpreted by the SI WQM pass. This will be exposed with a new
intrinsic later. We'll also add support for Whole Wavefront Mode later.
I also fixed llvm.amdgcn.mov.dpp to overwrite the source and fixed up
the test. However, I could also keep the old behavior (where lanes that
aren't written are undefined) if people want it.
Reviewers: tstellar, arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye
Differential Revision: https://reviews.llvm.org/D34716
llvm-svn: 310283
2017-08-08 03:10:56 +08:00
|
|
|
(ins DstRC:$old,
|
|
|
|
Src0RC:$src0, Src1RC:$src1, dpp_ctrl:$dpp_ctrl,
|
|
|
|
row_mask:$row_mask, bank_mask:$bank_mask,
|
|
|
|
bound_ctrl:$bound_ctrl)
|
2016-04-06 21:29:59 +08:00
|
|
|
/* endif */)));
|
|
|
|
}
|
|
|
|
|
2019-06-13 02:02:41 +08:00
|
|
|
class getInsDPP16 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
|
|
|
|
int NumSrcArgs, bit HasModifiers,
|
|
|
|
Operand Src0Mod, Operand Src1Mod> {
|
|
|
|
dag ret = !con(getInsDPP<DstRC, Src0RC, Src1RC, NumSrcArgs,
|
|
|
|
HasModifiers, Src0Mod, Src1Mod>.ret,
|
|
|
|
(ins FI:$fi));
|
|
|
|
}
|
|
|
|
|
|
|
|
class getInsDPP8 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
|
|
|
|
int NumSrcArgs, bit HasModifiers,
|
|
|
|
Operand Src0Mod, Operand Src1Mod> {
|
|
|
|
dag ret = !if (!eq(NumSrcArgs, 0),
|
|
|
|
// VOP1 without input operands (V_NOP)
|
|
|
|
(ins dpp8:$dpp8, FI:$fi),
|
|
|
|
!if (!eq(NumSrcArgs, 1),
|
|
|
|
!if (!eq(HasModifiers, 1),
|
|
|
|
// VOP1_DPP with modifiers
|
|
|
|
(ins DstRC:$old, Src0Mod:$src0_modifiers,
|
|
|
|
Src0RC:$src0, dpp8:$dpp8, FI:$fi)
|
|
|
|
/* else */,
|
|
|
|
// VOP1_DPP without modifiers
|
|
|
|
(ins DstRC:$old, Src0RC:$src0, dpp8:$dpp8, FI:$fi)
|
|
|
|
/* endif */)
|
|
|
|
/* NumSrcArgs == 2 */,
|
|
|
|
!if (!eq(HasModifiers, 1),
|
|
|
|
// VOP2_DPP with modifiers
|
|
|
|
(ins DstRC:$old,
|
|
|
|
Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
dpp8:$dpp8, FI:$fi)
|
|
|
|
/* else */,
|
|
|
|
// VOP2_DPP without modifiers
|
|
|
|
(ins DstRC:$old,
|
|
|
|
Src0RC:$src0, Src1RC:$src1, dpp8:$dpp8, FI:$fi)
|
|
|
|
/* endif */)));
|
|
|
|
}
|
2016-07-01 17:59:21 +08:00
|
|
|
|
2016-04-26 21:33:56 +08:00
|
|
|
|
2017-06-21 16:53:38 +08:00
|
|
|
// Ins for SDWA
|
|
|
|
class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
|
|
|
|
bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod,
|
|
|
|
ValueType DstVT> {
|
2017-05-23 18:08:55 +08:00
|
|
|
|
|
|
|
dag ret = !if(!eq(NumSrcArgs, 0),
|
|
|
|
// VOP1 without input operands (V_NOP)
|
|
|
|
(ins),
|
|
|
|
!if(!eq(NumSrcArgs, 1),
|
|
|
|
// VOP1
|
|
|
|
!if(!eq(HasSDWAOMod, 0),
|
2017-06-21 16:53:38 +08:00
|
|
|
// VOP1_SDWA without omod
|
2017-05-23 18:08:55 +08:00
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
clampmod:$clamp,
|
|
|
|
dst_sel:$dst_sel, dst_unused:$dst_unused,
|
|
|
|
src0_sel:$src0_sel),
|
2017-06-21 16:53:38 +08:00
|
|
|
// VOP1_SDWA with omod
|
2017-05-23 18:08:55 +08:00
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
clampmod:$clamp, omod:$omod,
|
|
|
|
dst_sel:$dst_sel, dst_unused:$dst_unused,
|
|
|
|
src0_sel:$src0_sel)),
|
|
|
|
!if(!eq(NumSrcArgs, 2),
|
|
|
|
!if(!eq(DstVT.Size, 1),
|
2017-06-21 16:53:38 +08:00
|
|
|
// VOPC_SDWA
|
2017-05-23 18:08:55 +08:00
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
2017-06-21 16:53:38 +08:00
|
|
|
clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
|
|
|
|
// VOP2_SDWA
|
2017-05-23 18:08:55 +08:00
|
|
|
!if(!eq(HasSDWAOMod, 0),
|
2017-06-21 16:53:38 +08:00
|
|
|
// VOP2_SDWA without omod
|
2017-05-23 18:08:55 +08:00
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
clampmod:$clamp,
|
|
|
|
dst_sel:$dst_sel, dst_unused:$dst_unused,
|
|
|
|
src0_sel:$src0_sel, src1_sel:$src1_sel),
|
2017-06-21 16:53:38 +08:00
|
|
|
// VOP2_SDWA with omod
|
2017-05-23 18:08:55 +08:00
|
|
|
(ins Src0Mod:$src0_modifiers, Src0RC:$src0,
|
|
|
|
Src1Mod:$src1_modifiers, Src1RC:$src1,
|
|
|
|
clampmod:$clamp, omod:$omod,
|
|
|
|
dst_sel:$dst_sel, dst_unused:$dst_unused,
|
|
|
|
src0_sel:$src0_sel, src1_sel:$src1_sel))),
|
|
|
|
(ins)/* endif */)));
|
|
|
|
}
|
|
|
|
|
2016-04-26 21:33:56 +08:00
|
|
|
// Outs for DPP and SDWA
|
2017-05-23 18:08:55 +08:00
|
|
|
class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCExt> {
|
2016-04-06 21:29:59 +08:00
|
|
|
dag ret = !if(HasDst,
|
|
|
|
!if(!eq(DstVT.Size, 1),
|
2016-07-01 17:59:21 +08:00
|
|
|
(outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions
|
2017-05-23 18:08:55 +08:00
|
|
|
(outs DstRCExt:$vdst)),
|
|
|
|
(outs)); // V_NOP
|
|
|
|
}
|
|
|
|
|
2017-06-21 16:53:38 +08:00
|
|
|
// Outs for SDWA
|
|
|
|
class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> {
|
2017-05-23 18:08:55 +08:00
|
|
|
dag ret = !if(HasDst,
|
|
|
|
!if(!eq(DstVT.Size, 1),
|
2017-06-21 16:53:38 +08:00
|
|
|
(outs DstRCSDWA:$sdst),
|
|
|
|
(outs DstRCSDWA:$vdst)),
|
2016-04-06 21:29:59 +08:00
|
|
|
(outs)); // V_NOP
|
2016-02-13 08:51:31 +08:00
|
|
|
}
|
|
|
|
|
2014-08-01 08:32:39 +08:00
|
|
|
// Returns the assembly string for the inputs and outputs of a VOP[12C]
|
|
|
|
// instruction. This does not add the _e32 suffix, so it can be reused
|
|
|
|
// by getAsm64.
|
2016-02-17 02:14:56 +08:00
|
|
|
class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
|
|
|
|
string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
|
2015-10-06 23:57:53 +08:00
|
|
|
string src0 = ", $src0";
|
2014-08-01 08:32:39 +08:00
|
|
|
string src1 = ", $src1";
|
|
|
|
string src2 = ", $src2";
|
2015-10-06 23:57:53 +08:00
|
|
|
string ret = !if(HasDst, dst, "") #
|
|
|
|
!if(!eq(NumSrcArgs, 1), src0, "") #
|
|
|
|
!if(!eq(NumSrcArgs, 2), src0#src1, "") #
|
|
|
|
!if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
|
2014-08-01 08:32:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the assembly string for the inputs and outputs of a VOP3
|
|
|
|
// instruction.
|
2017-08-16 21:51:56 +08:00
|
|
|
class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers,
|
2017-02-28 02:49:11 +08:00
|
|
|
bit HasOMod, ValueType DstVT = i32> {
|
2016-02-17 02:14:56 +08:00
|
|
|
string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
|
2015-01-16 07:17:03 +08:00
|
|
|
string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
|
2014-10-01 03:49:48 +08:00
|
|
|
string src1 = !if(!eq(NumSrcArgs, 1), "",
|
|
|
|
!if(!eq(NumSrcArgs, 2), " $src1_modifiers",
|
|
|
|
" $src1_modifiers,"));
|
|
|
|
string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
|
2017-08-16 21:51:56 +08:00
|
|
|
string iclamp = !if(HasIntClamp, "$clamp", "");
|
2014-08-01 08:32:39 +08:00
|
|
|
string ret =
|
|
|
|
!if(!eq(HasModifiers, 0),
|
2017-08-16 21:51:56 +08:00
|
|
|
getAsm32<HasDst, NumSrcArgs, DstVT>.ret # iclamp,
|
2017-02-28 02:49:11 +08:00
|
|
|
dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", ""));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the assembly string for the inputs and outputs of a VOP3P
|
|
|
|
// instruction.
|
|
|
|
class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers,
|
|
|
|
bit HasClamp, ValueType DstVT = i32> {
|
|
|
|
string dst = " $vdst";
|
|
|
|
string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
|
|
|
|
string src1 = !if(!eq(NumSrcArgs, 1), "",
|
|
|
|
!if(!eq(NumSrcArgs, 2), " $src1",
|
|
|
|
" $src1,"));
|
|
|
|
string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
|
|
|
|
|
|
|
|
string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
|
|
|
|
string clamp = !if(HasClamp, "$clamp", "");
|
|
|
|
|
|
|
|
// Each modifier is printed as an array of bits for each operand, so
|
|
|
|
// all operands are printed as part of src0_modifiers.
|
|
|
|
string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp;
|
2014-08-01 08:32:39 +08:00
|
|
|
}
|
|
|
|
|
2017-07-21 21:54:11 +08:00
|
|
|
class getAsmVOP3OpSel <int NumSrcArgs,
|
|
|
|
bit HasClamp,
|
|
|
|
bit Src0HasMods,
|
|
|
|
bit Src1HasMods,
|
|
|
|
bit Src2HasMods> {
|
|
|
|
string dst = " $vdst";
|
|
|
|
|
|
|
|
string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
|
|
|
|
string isrc1 = !if(!eq(NumSrcArgs, 1), "",
|
|
|
|
!if(!eq(NumSrcArgs, 2), " $src1",
|
|
|
|
" $src1,"));
|
|
|
|
string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
|
|
|
|
|
|
|
|
string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
|
|
|
|
string fsrc1 = !if(!eq(NumSrcArgs, 1), "",
|
|
|
|
!if(!eq(NumSrcArgs, 2), " $src1_modifiers",
|
|
|
|
" $src1_modifiers,"));
|
|
|
|
string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
|
|
|
|
|
|
|
|
string src0 = !if(Src0HasMods, fsrc0, isrc0);
|
|
|
|
string src1 = !if(Src1HasMods, fsrc1, isrc1);
|
|
|
|
string src2 = !if(Src2HasMods, fsrc2, isrc2);
|
|
|
|
|
|
|
|
string clamp = !if(HasClamp, "$clamp", "");
|
|
|
|
|
|
|
|
string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp;
|
|
|
|
}
|
|
|
|
|
2016-02-17 02:14:56 +08:00
|
|
|
class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
|
2016-04-06 21:29:59 +08:00
|
|
|
string dst = !if(HasDst,
|
|
|
|
!if(!eq(DstVT.Size, 1),
|
|
|
|
"$sdst",
|
|
|
|
"$vdst"),
|
|
|
|
""); // use $sdst for VOPC
|
2016-02-13 08:51:31 +08:00
|
|
|
string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
|
|
|
|
string src1 = !if(!eq(NumSrcArgs, 1), "",
|
|
|
|
!if(!eq(NumSrcArgs, 2), " $src1_modifiers",
|
|
|
|
" $src1_modifiers,"));
|
|
|
|
string args = !if(!eq(HasModifiers, 0),
|
2016-02-17 02:14:56 +08:00
|
|
|
getAsm32<0, NumSrcArgs, DstVT>.ret,
|
2016-03-09 20:29:31 +08:00
|
|
|
", "#src0#src1);
|
2016-04-29 17:02:30 +08:00
|
|
|
string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
|
2016-02-13 08:51:31 +08:00
|
|
|
}
|
|
|
|
|
2019-06-13 02:02:41 +08:00
|
|
|
class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
|
|
|
|
string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi";
|
|
|
|
}
|
|
|
|
|
|
|
|
class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
|
|
|
|
string dst = !if(HasDst,
|
|
|
|
!if(!eq(DstVT.Size, 1),
|
|
|
|
"$sdst",
|
|
|
|
"$vdst"),
|
|
|
|
""); // use $sdst for VOPC
|
|
|
|
string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
|
|
|
|
string src1 = !if(!eq(NumSrcArgs, 1), "",
|
|
|
|
!if(!eq(NumSrcArgs, 2), " $src1_modifiers",
|
|
|
|
" $src1_modifiers,"));
|
|
|
|
string args = !if(!eq(HasModifiers, 0),
|
|
|
|
getAsm32<0, NumSrcArgs, DstVT>.ret,
|
|
|
|
", "#src0#src1);
|
|
|
|
string ret = dst#args#"$dpp8$fi";
|
|
|
|
}
|
|
|
|
|
2017-05-23 18:08:55 +08:00
|
|
|
class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
|
2016-06-24 14:30:11 +08:00
|
|
|
string dst = !if(HasDst,
|
2016-04-26 21:33:56 +08:00
|
|
|
!if(!eq(DstVT.Size, 1),
|
2016-07-01 17:59:21 +08:00
|
|
|
" vcc", // use vcc token as dst for VOPC instructioins
|
2016-04-26 21:33:56 +08:00
|
|
|
"$vdst"),
|
2016-06-10 17:57:59 +08:00
|
|
|
"");
|
2016-09-23 17:08:07 +08:00
|
|
|
string src0 = "$src0_modifiers";
|
|
|
|
string src1 = "$src1_modifiers";
|
2016-06-10 17:57:59 +08:00
|
|
|
string args = !if(!eq(NumSrcArgs, 0),
|
2016-06-24 14:30:11 +08:00
|
|
|
"",
|
|
|
|
!if(!eq(NumSrcArgs, 1),
|
2016-06-10 17:57:59 +08:00
|
|
|
", "#src0#"$clamp",
|
|
|
|
", "#src0#", "#src1#"$clamp"
|
|
|
|
)
|
|
|
|
);
|
2016-04-26 21:33:56 +08:00
|
|
|
string sdwa = !if(!eq(NumSrcArgs, 0),
|
|
|
|
"",
|
|
|
|
!if(!eq(NumSrcArgs, 1),
|
|
|
|
" $dst_sel $dst_unused $src0_sel",
|
2016-07-01 17:59:21 +08:00
|
|
|
!if(!eq(DstVT.Size, 1),
|
|
|
|
" $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC
|
|
|
|
" $dst_sel $dst_unused $src0_sel $src1_sel"
|
|
|
|
)
|
2016-04-26 21:33:56 +08:00
|
|
|
)
|
|
|
|
);
|
|
|
|
string ret = dst#args#sdwa;
|
|
|
|
}
|
|
|
|
|
2017-05-23 18:08:55 +08:00
|
|
|
class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs,
|
|
|
|
ValueType DstVT = i32> {
|
|
|
|
string dst = !if(HasDst,
|
|
|
|
!if(!eq(DstVT.Size, 1),
|
|
|
|
"$sdst", // VOPC
|
|
|
|
"$vdst"), // VOP1/2
|
|
|
|
"");
|
|
|
|
string src0 = "$src0_modifiers";
|
|
|
|
string src1 = "$src1_modifiers";
|
|
|
|
string out_mods = !if(!eq(HasOMod, 0), "$clamp", "$clamp$omod");
|
|
|
|
string args = !if(!eq(NumSrcArgs, 0), "",
|
|
|
|
!if(!eq(NumSrcArgs, 1),
|
|
|
|
", "#src0,
|
|
|
|
", "#src0#", "#src1
|
|
|
|
)
|
|
|
|
);
|
|
|
|
string sdwa = !if(!eq(NumSrcArgs, 0), "",
|
|
|
|
!if(!eq(NumSrcArgs, 1),
|
|
|
|
out_mods#" $dst_sel $dst_unused $src0_sel",
|
|
|
|
!if(!eq(DstVT.Size, 1),
|
|
|
|
" $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC
|
|
|
|
out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
);
|
|
|
|
string ret = dst#args#sdwa;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-04-26 21:33:56 +08:00
|
|
|
// Function that checks if instruction supports DPP and SDWA
|
|
|
|
class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
|
2016-04-06 21:29:59 +08:00
|
|
|
ValueType Src1VT = i32> {
|
|
|
|
bit ret = !if(!eq(NumSrcArgs, 3),
|
2016-04-26 21:33:56 +08:00
|
|
|
0, // NumSrcArgs == 3 - No DPP or SDWA for VOP3
|
|
|
|
!if(!eq(DstVT.Size, 64),
|
|
|
|
0, // 64-bit dst - No DPP or SDWA for 64-bit operands
|
|
|
|
!if(!eq(Src0VT.Size, 64),
|
|
|
|
0, // 64-bit src0
|
2018-11-30 22:21:56 +08:00
|
|
|
!if(!eq(Src1VT.Size, 64),
|
2016-04-26 21:33:56 +08:00
|
|
|
0, // 64-bit src2
|
|
|
|
1
|
2016-04-06 21:29:59 +08:00
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2018-11-30 22:21:56 +08:00
|
|
|
class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
|
|
|
|
ValueType Src1VT = i32> {
|
|
|
|
bit ret = !if(!eq(NumSrcArgs, 0), 0,
|
|
|
|
getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
|
|
|
|
}
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
class BitOr<bit a, bit b> {
|
|
|
|
bit ret = !if(a, 1, !if(b, 1, 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
class BitAnd<bit a, bit b> {
|
|
|
|
bit ret = !if(a, !if(b, 1, 0), 0);
|
|
|
|
}
|
|
|
|
|
2018-09-21 18:31:22 +08:00
|
|
|
def PatGenMode {
|
|
|
|
int NoPattern = 0;
|
|
|
|
int Pattern = 1;
|
|
|
|
}
|
|
|
|
|
2019-03-19 03:35:44 +08:00
|
|
|
class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
|
|
|
|
bit _EnableClamp = 0> {
|
2014-08-01 08:32:39 +08:00
|
|
|
|
|
|
|
field list<ValueType> ArgVT = _ArgVT;
|
2019-03-19 03:25:39 +08:00
|
|
|
field bit EnableF32SrcMods = _EnableF32SrcMods;
|
2019-03-19 03:35:44 +08:00
|
|
|
field bit EnableClamp = _EnableClamp;
|
2014-08-01 08:32:39 +08:00
|
|
|
|
|
|
|
field ValueType DstVT = ArgVT[0];
|
|
|
|
field ValueType Src0VT = ArgVT[1];
|
|
|
|
field ValueType Src1VT = ArgVT[2];
|
|
|
|
field ValueType Src2VT = ArgVT[3];
|
2015-03-13 05:34:22 +08:00
|
|
|
field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
|
2016-03-09 20:29:31 +08:00
|
|
|
field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret;
|
2017-06-21 16:53:38 +08:00
|
|
|
field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret;
|
2015-01-13 03:33:18 +08:00
|
|
|
field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
|
2016-04-26 21:33:56 +08:00
|
|
|
field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret;
|
2015-01-13 03:33:18 +08:00
|
|
|
field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
|
|
|
|
field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
|
|
|
|
field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
|
2016-04-26 21:33:56 +08:00
|
|
|
field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret;
|
|
|
|
field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
|
2017-06-21 16:53:38 +08:00
|
|
|
field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
|
|
|
|
field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
|
2019-03-19 03:25:39 +08:00
|
|
|
field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret;
|
|
|
|
field Operand Src1Mod = getSrcMod<Src1VT, EnableF32SrcMods>.ret;
|
|
|
|
field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret;
|
2017-01-11 19:46:30 +08:00
|
|
|
field Operand Src0ModDPP = getSrcModExt<Src0VT>.ret;
|
|
|
|
field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret;
|
2017-06-21 16:53:38 +08:00
|
|
|
field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
|
|
|
|
field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
|
2017-01-17 15:26:53 +08:00
|
|
|
|
2016-05-28 08:50:51 +08:00
|
|
|
|
2015-10-06 23:57:53 +08:00
|
|
|
field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
|
|
|
|
field bit HasDst32 = HasDst;
|
2016-09-23 17:08:07 +08:00
|
|
|
field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
|
2015-10-06 23:57:53 +08:00
|
|
|
field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
|
2016-09-19 22:39:49 +08:00
|
|
|
field bit HasSrc0 = !if(!eq(Src0VT.Value, untyped.Value), 0, 1);
|
|
|
|
field bit HasSrc1 = !if(!eq(Src1VT.Value, untyped.Value), 0, 1);
|
|
|
|
field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
|
|
|
|
|
2016-09-23 17:08:07 +08:00
|
|
|
// TODO: Modifiers logic is somewhat adhoc here, to be refined later
|
2019-03-19 03:25:39 +08:00
|
|
|
// HasModifiers affects the normal and DPP encodings. We take note of EnableF32SrcMods, which
|
|
|
|
// enables modifiers for i32 type.
|
|
|
|
field bit HasModifiers = BitOr<isModifierType<Src0VT>.ret, EnableF32SrcMods>.ret;
|
2016-09-23 17:08:07 +08:00
|
|
|
|
2019-03-19 03:25:39 +08:00
|
|
|
// HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods.
|
2016-09-23 17:08:07 +08:00
|
|
|
field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
|
|
|
|
field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
|
|
|
|
field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret;
|
|
|
|
|
2019-03-19 03:25:39 +08:00
|
|
|
// HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods.
|
2016-09-23 17:08:07 +08:00
|
|
|
field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
|
|
|
|
field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
|
|
|
|
field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
|
|
|
|
|
|
|
|
field bit HasSrc0Mods = HasModifiers;
|
|
|
|
field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
|
|
|
|
field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
|
2016-09-19 22:39:49 +08:00
|
|
|
|
2019-03-19 03:35:44 +08:00
|
|
|
field bit HasClamp = BitOr<isModifierType<Src0VT>.ret, EnableClamp>.ret;
|
2017-05-23 18:08:55 +08:00
|
|
|
field bit HasSDWAClamp = EmitDst;
|
2017-02-23 07:27:53 +08:00
|
|
|
field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
|
2017-08-16 21:51:56 +08:00
|
|
|
field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp);
|
2017-09-01 07:53:50 +08:00
|
|
|
field bit HasClampLo = HasClamp;
|
|
|
|
field bit HasClampHi = BitAnd<isPackedType<DstVT>.ret, HasClamp>.ret;
|
2017-08-07 21:14:12 +08:00
|
|
|
field bit HasHigh = 0;
|
2014-08-01 08:32:39 +08:00
|
|
|
|
2017-02-28 02:49:11 +08:00
|
|
|
field bit IsPacked = isPackedType<Src0VT>.ret;
|
|
|
|
field bit HasOpSel = IsPacked;
|
2017-07-18 22:23:26 +08:00
|
|
|
field bit HasOMod = !if(HasOpSel, 0, isFloatType<DstVT>.ret);
|
2017-05-23 18:08:55 +08:00
|
|
|
field bit HasSDWAOMod = isFloatType<DstVT>.ret;
|
2017-02-28 02:49:11 +08:00
|
|
|
|
2016-04-26 21:33:56 +08:00
|
|
|
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
|
2018-11-30 22:21:56 +08:00
|
|
|
field bit HasExtDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
|
2018-09-28 04:49:00 +08:00
|
|
|
field bit HasExtSDWA = HasExt;
|
|
|
|
field bit HasExtSDWA9 = HasExt;
|
2018-09-21 18:31:22 +08:00
|
|
|
field int NeedPatGen = PatGenMode.NoPattern;
|
2016-05-28 08:50:51 +08:00
|
|
|
|
2019-07-10 05:43:09 +08:00
|
|
|
field bit IsMAI = 0;
|
|
|
|
|
2017-02-28 02:49:11 +08:00
|
|
|
field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
|
|
|
|
field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
|
|
|
|
field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
|
|
|
|
|
2016-02-17 02:14:56 +08:00
|
|
|
field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
|
2014-08-01 08:32:39 +08:00
|
|
|
|
2015-08-29 15:16:50 +08:00
|
|
|
// VOP3b instructions are a special case with a second explicit
|
|
|
|
// output. This is manually overridden for them.
|
|
|
|
field dag Outs32 = Outs;
|
|
|
|
field dag Outs64 = Outs;
|
2016-04-26 21:33:56 +08:00
|
|
|
field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
|
2019-06-13 02:02:41 +08:00
|
|
|
field dag OutsDPP8 = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
|
2017-06-21 16:53:38 +08:00
|
|
|
field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
|
2015-08-29 15:16:50 +08:00
|
|
|
|
2014-08-01 08:32:39 +08:00
|
|
|
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
|
|
|
|
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
|
2019-03-19 03:25:39 +08:00
|
|
|
HasIntClamp, HasModifiers, HasSrc2Mods,
|
|
|
|
HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
|
2017-02-28 02:49:11 +08:00
|
|
|
field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
|
|
|
|
NumSrcArgs, HasClamp,
|
|
|
|
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
|
2017-07-21 21:54:11 +08:00
|
|
|
field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
|
|
|
|
NumSrcArgs,
|
|
|
|
HasClamp,
|
|
|
|
getOpSelMod<Src0VT>.ret,
|
|
|
|
getOpSelMod<Src1VT>.ret,
|
|
|
|
getOpSelMod<Src2VT>.ret>.ret;
|
2018-11-30 22:21:56 +08:00
|
|
|
field dag InsDPP = !if(HasExtDPP,
|
|
|
|
getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
|
|
|
|
HasModifiers, Src0ModDPP, Src1ModDPP>.ret,
|
|
|
|
(ins));
|
2019-06-13 02:02:41 +08:00
|
|
|
field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
|
|
|
|
HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
|
|
|
|
field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 0,
|
|
|
|
Src0ModDPP, Src1ModDPP>.ret;
|
AMDGPU] Assembler: better support for immediate literals in assembler.
Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.
With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
- We parsed fp literal:
- Instruction expects 64-bit operand:
- If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
- then we do nothing this literal
- Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
- report error
- Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
- If instruction expect fp operand type (f64)
- Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
- If so then do nothing
- Else (e.g. v_fract_f64 v[0:1], 3.1415)
- report warning that low 32 bits will be set to zeroes and precision will be lost
- set low 32 bits of literal to zeroes
- Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
- report error as it is unclear how to encode this literal
- Instruction expects 32-bit operand:
- Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
- Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
- do nothing
- Else report error
- Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
- Parsed binary literal:
- Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
- do nothing
- Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
- report error
- Else, literal is not-inlinable and we are not required to inline it
- Are high 32 bit of literal zeroes or same as sign bit (32 bit)
- do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
- Else
- report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)
For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
OPERAND_REG_IMM32_INT,
OPERAND_REG_IMM32_FP,
OPERAND_REG_INLINE_C_INT,
OPERAND_REG_INLINE_C_FP,
}
'''
This is not working yet:
- Several tests are failing
- Problems with predicate methods for inline immediates
- LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.
Reviewers: vpykhtin, tstellarAMD
Subscribers: arsenm, kzhuravl, artem.tamazov
Differential Revision: https://reviews.llvm.org/D22922
llvm-svn: 281050
2016-09-09 22:44:04 +08:00
|
|
|
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
|
2017-06-21 16:53:38 +08:00
|
|
|
HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
|
2017-01-11 19:46:30 +08:00
|
|
|
DstVT>.ret;
|
2017-06-21 16:53:38 +08:00
|
|
|
|
2014-08-01 08:32:39 +08:00
|
|
|
|
2016-02-17 02:14:56 +08:00
|
|
|
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
|
2017-08-16 21:51:56 +08:00
|
|
|
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret;
|
2017-02-28 02:49:11 +08:00
|
|
|
field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
|
2017-07-21 21:54:11 +08:00
|
|
|
field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
|
|
|
|
HasClamp,
|
|
|
|
HasSrc0FloatMods,
|
|
|
|
HasSrc1FloatMods,
|
|
|
|
HasSrc2FloatMods>.ret;
|
2018-11-30 22:21:56 +08:00
|
|
|
field string AsmDPP = !if(HasExtDPP,
|
|
|
|
getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
|
2019-06-13 02:02:41 +08:00
|
|
|
field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
|
|
|
|
field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0, DstVT>.ret;
|
2017-05-23 18:08:55 +08:00
|
|
|
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
|
|
|
|
field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
|
2019-04-26 03:01:51 +08:00
|
|
|
|
|
|
|
field string TieRegDPP = "$old";
|
2014-08-01 08:32:39 +08:00
|
|
|
}
|
|
|
|
|
2016-04-26 21:33:56 +08:00
|
|
|
class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
|
|
|
|
let HasExt = 0;
|
2018-09-28 04:49:00 +08:00
|
|
|
let HasExtDPP = 0;
|
|
|
|
let HasExtSDWA = 0;
|
|
|
|
let HasExtSDWA9 = 0;
|
2016-04-06 21:29:59 +08:00
|
|
|
}
|
|
|
|
|
2018-09-21 18:31:22 +08:00
|
|
|
class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.Pattern> : VOPProfile <p.ArgVT> {
|
|
|
|
let NeedPatGen = mode;
|
|
|
|
}
|
|
|
|
|
2015-08-22 07:49:51 +08:00
|
|
|
def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>;
|
2016-11-13 15:01:11 +08:00
|
|
|
def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>;
|
|
|
|
def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
|
2015-04-24 03:33:54 +08:00
|
|
|
|
2015-08-22 07:49:51 +08:00
|
|
|
def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
|
2016-11-13 15:01:11 +08:00
|
|
|
def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
|
|
|
|
def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
|
2017-03-01 05:31:45 +08:00
|
|
|
def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
|
2015-05-26 23:55:52 +08:00
|
|
|
|
2017-02-28 06:40:39 +08:00
|
|
|
def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
|
2016-06-17 00:50:04 +08:00
|
|
|
def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
|
|
|
|
|
2017-07-21 21:54:11 +08:00
|
|
|
def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>;
|
|
|
|
|
2017-02-28 02:49:11 +08:00
|
|
|
def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
|
|
|
|
def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
|
|
|
|
def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
|
|
|
|
|
|
|
|
def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
|
|
|
|
def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
|
2018-08-02 04:13:58 +08:00
|
|
|
def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
|
|
|
|
def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
|
2017-02-28 02:49:11 +08:00
|
|
|
|
2017-07-07 22:29:06 +08:00
|
|
|
def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
|
|
|
|
|
2015-10-06 23:57:53 +08:00
|
|
|
def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
|
|
|
|
|
2014-08-01 08:32:39 +08:00
|
|
|
def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
|
|
|
|
def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
|
|
|
|
def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
|
|
|
|
def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>;
|
|
|
|
def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>;
|
|
|
|
def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
|
|
|
|
def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
|
|
|
|
def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
|
|
|
|
def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
|
2017-02-02 10:27:04 +08:00
|
|
|
def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
|
|
|
|
def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
|
2014-08-01 08:32:39 +08:00
|
|
|
|
2016-11-13 15:01:11 +08:00
|
|
|
def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
|
2014-08-01 08:32:39 +08:00
|
|
|
def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
|
|
|
|
def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
|
|
|
|
def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
|
|
|
|
def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
|
|
|
|
def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
|
2015-02-04 01:38:01 +08:00
|
|
|
def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
|
2014-08-01 08:32:39 +08:00
|
|
|
def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
|
2019-03-19 03:35:44 +08:00
|
|
|
def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], 0, /*EnableClamp=*/1>;
|
2017-02-22 08:27:34 +08:00
|
|
|
def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
|
2017-08-31 06:18:40 +08:00
|
|
|
def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
|
2015-08-29 15:16:50 +08:00
|
|
|
|
2014-08-01 08:32:39 +08:00
|
|
|
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
|
2015-02-04 05:53:01 +08:00
|
|
|
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
|
2014-08-01 08:32:39 +08:00
|
|
|
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
|
|
|
|
|
2016-11-13 15:01:11 +08:00
|
|
|
def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>;
|
|
|
|
def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
|
2014-08-01 08:32:39 +08:00
|
|
|
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
|
|
|
|
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
|
|
|
|
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
|
|
|
|
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
|
2016-08-12 04:34:48 +08:00
|
|
|
def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
|
2016-08-19 03:51:14 +08:00
|
|
|
def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
|
2016-09-10 03:31:51 +08:00
|
|
|
def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
|
2014-08-01 08:32:39 +08:00
|
|
|
|
2018-05-01 03:08:16 +08:00
|
|
|
def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>;
|
|
|
|
def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>;
|
|
|
|
|
2019-07-10 05:43:09 +08:00
|
|
|
def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>;
|
|
|
|
def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>;
|
2019-07-13 06:42:01 +08:00
|
|
|
def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>;
|
2019-07-10 05:43:09 +08:00
|
|
|
def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>;
|
|
|
|
def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>;
|
2019-07-13 06:42:01 +08:00
|
|
|
def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>;
|
2019-07-10 05:43:09 +08:00
|
|
|
def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>;
|
|
|
|
def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>;
|
2019-07-13 06:42:01 +08:00
|
|
|
def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>;
|
2019-07-10 05:43:09 +08:00
|
|
|
def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>;
|
|
|
|
def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>;
|
|
|
|
def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>;
|
|
|
|
|
2016-09-17 05:41:16 +08:00
|
|
|
class Commutable_REV <string revOp, bit isOrig> {
|
2013-03-27 17:12:59 +08:00
|
|
|
string RevOp = revOp;
|
|
|
|
bit IsOrig = isOrig;
|
|
|
|
}
|
|
|
|
|
2014-09-08 23:07:27 +08:00
|
|
|
class AtomicNoRet <string noRetOp, bit isRet> {
|
|
|
|
string NoRetOp = noRetOp;
|
|
|
|
bit IsRet = isRet;
|
|
|
|
}
|
|
|
|
|
2014-12-07 20:18:57 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Interpolation opcodes
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2018-03-17 00:38:04 +08:00
|
|
|
class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">;
|
|
|
|
|
2015-01-28 01:25:11 +08:00
|
|
|
class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
|
|
|
|
VINTRPCommon <outs, ins, "", pattern>,
|
2016-06-24 14:30:11 +08:00
|
|
|
SIMCInstr<opName, SIEncodingFamily.NONE> {
|
2014-12-07 20:18:57 +08:00
|
|
|
let isPseudo = 1;
|
2015-02-19 00:08:17 +08:00
|
|
|
let isCodeGenOnly = 1;
|
2014-12-07 20:18:57 +08:00
|
|
|
}
|
|
|
|
|
2019-05-10 02:38:55 +08:00
|
|
|
// FIXME-GFX10: WIP.
|
2014-12-07 20:18:57 +08:00
|
|
|
class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
|
2019-05-10 02:38:55 +08:00
|
|
|
string asm, int encodingFamily> :
|
2015-01-28 01:25:11 +08:00
|
|
|
VINTRPCommon <outs, ins, asm, []>,
|
2014-12-07 20:18:57 +08:00
|
|
|
VINTRPe <op>,
|
2019-05-10 02:38:55 +08:00
|
|
|
SIMCInstr<opName, encodingFamily> {
|
2016-02-18 11:42:32 +08:00
|
|
|
let DisableDecoder = DisableSIDecoder;
|
|
|
|
}
|
2014-12-07 20:18:57 +08:00
|
|
|
|
|
|
|
class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
|
2015-01-28 01:25:11 +08:00
|
|
|
string asm> :
|
|
|
|
VINTRPCommon <outs, ins, asm, []>,
|
2014-12-07 20:18:57 +08:00
|
|
|
VINTRPe_vi <op>,
|
2016-06-24 14:30:11 +08:00
|
|
|
SIMCInstr<opName, SIEncodingFamily.VI> {
|
2016-03-23 12:27:26 +08:00
|
|
|
let AssemblerPredicate = VIAssemblerPredicate;
|
2019-04-06 17:20:48 +08:00
|
|
|
let DecoderNamespace = "GFX8";
|
2016-02-18 11:42:32 +08:00
|
|
|
let DisableDecoder = DisableVIDecoder;
|
|
|
|
}
|
2014-12-07 20:18:57 +08:00
|
|
|
|
2019-05-10 02:38:55 +08:00
|
|
|
// FIXME-GFX10: WIP.
|
2015-05-26 00:15:50 +08:00
|
|
|
multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
|
2015-05-26 00:15:56 +08:00
|
|
|
list<dag> pattern = []> {
|
|
|
|
def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>;
|
2014-12-07 20:18:57 +08:00
|
|
|
|
2019-05-10 02:38:55 +08:00
|
|
|
let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
|
|
|
|
def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>;
|
|
|
|
} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
|
2014-12-07 20:18:57 +08:00
|
|
|
|
2015-05-26 00:15:56 +08:00
|
|
|
def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
|
2014-12-07 20:18:57 +08:00
|
|
|
|
2019-05-10 02:38:55 +08:00
|
|
|
let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
|
|
|
|
def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>;
|
|
|
|
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
|
|
|
|
}
|
2013-02-27 01:52:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Vector instruction mappings
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
// Maps an opcode in e32 form to its e64 equivalent
|
|
|
|
def getVOPe64 : InstrMapping {
|
|
|
|
let FilterClass = "VOP";
|
|
|
|
let RowFields = ["OpName"];
|
2016-11-15 21:39:07 +08:00
|
|
|
let ColFields = ["Size", "VOP3"];
|
|
|
|
let KeyCol = ["4", "0"];
|
|
|
|
let ValueCols = [["8", "1"]];
|
2013-02-27 01:52:42 +08:00
|
|
|
}
|
|
|
|
|
2014-07-22 00:55:33 +08:00
|
|
|
// Maps an opcode in e64 form to its e32 equivalent
|
|
|
|
def getVOPe32 : InstrMapping {
|
|
|
|
let FilterClass = "VOP";
|
|
|
|
let RowFields = ["OpName"];
|
2016-11-15 21:39:07 +08:00
|
|
|
let ColFields = ["Size", "VOP3"];
|
|
|
|
let KeyCol = ["8", "1"];
|
|
|
|
let ValueCols = [["4", "0"]];
|
2014-07-22 00:55:33 +08:00
|
|
|
}
|
|
|
|
|
[ADMGPU] SDWA peephole optimization pass.
Summary:
First iteration of SDWA peephole.
This pass tries to combine several instruction into one SDWA instruction. E.g. it converts:
'''
V_LSHRREV_B32_e32 %vreg0, 16, %vreg1
V_ADD_I32_e32 %vreg2, %vreg0, %vreg3
V_LSHLREV_B32_e32 %vreg4, 16, %vreg2
'''
Into:
'''
V_ADD_I32_sdwa %vreg4, %vreg1, %vreg3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
'''
Pass structure:
1. Iterate over machine instruction in basic block and try to apply "SDWA patterns" to each of them. SDWA patterns match machine instruction into either source or destination SDWA operand. E.g. ''' V_LSHRREV_B32_e32 %vreg0, 16, %vreg1''' is matched to source SDWA operand '''%vreg1 src_sel:WORD_1'''.
2. Iterate over found SDWA operands and find instruction that could be potentially coverted into SDWA. E.g. for source SDWA operand potential instruction are all instruction in this basic block that uses '''%vreg0'''
3. Iterate over all potential instructions and check if they can be converted into SDWA.
4. Convert instructions to SDWA.
This review contains basic implementation of SDWA peephole pass. This pass requires additional testing fot both correctness and performance (no performance testing done).
There are several ways this pass can be improved:
1. Make this pass work on whole function not only basic block. As I can see this can be done right now without changes to pass.
2. Introduce more SDWA patterns
3. Introduce mnemonics to limit when SDWA patterns should apply
Reviewers: vpykhtin, alex-t, arsenm, rampitec
Subscribers: wdng, nhaehnle, mgorny
Differential Revision: https://reviews.llvm.org/D30038
llvm-svn: 298365
2017-03-21 20:51:34 +08:00
|
|
|
// Maps ordinary instructions to their SDWA counterparts
|
|
|
|
def getSDWAOp : InstrMapping {
|
|
|
|
let FilterClass = "VOP";
|
|
|
|
let RowFields = ["OpName"];
|
|
|
|
let ColFields = ["AsmVariantName"];
|
|
|
|
let KeyCol = ["Default"];
|
|
|
|
let ValueCols = [["SDWA"]];
|
|
|
|
}
|
|
|
|
|
2017-06-21 16:53:38 +08:00
|
|
|
// Maps SDWA instructions to their ordinary counterparts
|
|
|
|
def getBasicFromSDWAOp : InstrMapping {
|
2017-05-23 18:08:55 +08:00
|
|
|
let FilterClass = "VOP";
|
|
|
|
let RowFields = ["OpName"];
|
|
|
|
let ColFields = ["AsmVariantName"];
|
2017-06-21 16:53:38 +08:00
|
|
|
let KeyCol = ["SDWA"];
|
|
|
|
let ValueCols = [["Default"]];
|
2017-05-23 18:08:55 +08:00
|
|
|
}
|
|
|
|
|
2018-11-30 22:21:56 +08:00
|
|
|
// Maps ordinary instructions to their DPP counterparts
|
|
|
|
def getDPPOp32 : InstrMapping {
|
|
|
|
let FilterClass = "VOP";
|
|
|
|
let RowFields = ["OpName"];
|
|
|
|
let ColFields = ["AsmVariantName"];
|
|
|
|
let KeyCol = ["Default"];
|
|
|
|
let ValueCols = [["DPP"]];
|
|
|
|
}
|
|
|
|
|
2013-03-27 17:12:59 +08:00
|
|
|
// Maps an commuted opcode to its original version
|
|
|
|
def getCommuteOrig : InstrMapping {
|
2016-09-17 05:41:16 +08:00
|
|
|
let FilterClass = "Commutable_REV";
|
2013-03-27 17:12:59 +08:00
|
|
|
let RowFields = ["RevOp"];
|
|
|
|
let ColFields = ["IsOrig"];
|
|
|
|
let KeyCol = ["0"];
|
|
|
|
let ValueCols = [["1"]];
|
|
|
|
}
|
|
|
|
|
2015-03-24 02:45:30 +08:00
|
|
|
// Maps an original opcode to its commuted version
|
|
|
|
def getCommuteRev : InstrMapping {
|
2016-09-17 05:41:16 +08:00
|
|
|
let FilterClass = "Commutable_REV";
|
2015-03-24 02:45:30 +08:00
|
|
|
let RowFields = ["RevOp"];
|
|
|
|
let ColFields = ["IsOrig"];
|
|
|
|
let KeyCol = ["1"];
|
|
|
|
let ValueCols = [["0"]];
|
|
|
|
}
|
|
|
|
|
2014-12-07 20:18:57 +08:00
|
|
|
def getMCOpcodeGen : InstrMapping {
|
2014-05-17 04:56:47 +08:00
|
|
|
let FilterClass = "SIMCInstr";
|
|
|
|
let RowFields = ["PseudoInstr"];
|
|
|
|
let ColFields = ["Subtarget"];
|
2016-06-24 14:30:11 +08:00
|
|
|
let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
|
|
|
|
let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
|
2017-06-21 16:53:38 +08:00
|
|
|
[!cast<string>(SIEncodingFamily.VI)],
|
|
|
|
[!cast<string>(SIEncodingFamily.SDWA)],
|
2017-08-10 01:10:47 +08:00
|
|
|
[!cast<string>(SIEncodingFamily.SDWA9)],
|
2018-01-13 05:12:19 +08:00
|
|
|
// GFX80 encoding is added to work around a multiple matching
|
|
|
|
// issue for buffer instructions with unpacked d16 data. This
|
|
|
|
// does not actually change the encoding, and thus may be
|
|
|
|
// removed later.
|
|
|
|
[!cast<string>(SIEncodingFamily.GFX80)],
|
2019-04-26 03:01:51 +08:00
|
|
|
[!cast<string>(SIEncodingFamily.GFX9)],
|
|
|
|
[!cast<string>(SIEncodingFamily.GFX10)],
|
|
|
|
[!cast<string>(SIEncodingFamily.SDWA10)]];
|
2014-05-17 04:56:47 +08:00
|
|
|
}
|
|
|
|
|
2016-09-17 05:41:16 +08:00
|
|
|
// Get equivalent SOPK instruction.
|
|
|
|
def getSOPKOp : InstrMapping {
|
|
|
|
let FilterClass = "SOPKInstTable";
|
|
|
|
let RowFields = ["BaseCmpOp"];
|
|
|
|
let ColFields = ["IsSOPK"];
|
|
|
|
let KeyCol = ["0"];
|
|
|
|
let ValueCols = [["1"]];
|
|
|
|
}
|
|
|
|
|
2014-08-12 06:18:17 +08:00
|
|
|
def getAddr64Inst : InstrMapping {
|
|
|
|
let FilterClass = "MUBUFAddr64Table";
|
2014-09-26 02:30:26 +08:00
|
|
|
let RowFields = ["OpName"];
|
2014-08-12 06:18:17 +08:00
|
|
|
let ColFields = ["IsAddr64"];
|
|
|
|
let KeyCol = ["0"];
|
|
|
|
let ValueCols = [["1"]];
|
|
|
|
}
|
|
|
|
|
2018-10-09 02:47:01 +08:00
|
|
|
def getIfAddr64Inst : InstrMapping {
|
|
|
|
let FilterClass = "MUBUFAddr64Table";
|
|
|
|
let RowFields = ["OpName"];
|
|
|
|
let ColFields = ["IsAddr64"];
|
|
|
|
let KeyCol = ["1"];
|
|
|
|
let ValueCols = [["1"]];
|
|
|
|
}
|
|
|
|
|
2018-02-21 21:13:48 +08:00
|
|
|
def getMUBUFNoLdsInst : InstrMapping {
|
|
|
|
let FilterClass = "MUBUFLdsTable";
|
|
|
|
let RowFields = ["OpName"];
|
|
|
|
let ColFields = ["IsLds"];
|
|
|
|
let KeyCol = ["1"];
|
|
|
|
let ValueCols = [["0"]];
|
|
|
|
}
|
|
|
|
|
2014-09-08 23:07:27 +08:00
|
|
|
// Maps an atomic opcode to its version with a return value.
|
|
|
|
def getAtomicRetOp : InstrMapping {
|
|
|
|
let FilterClass = "AtomicNoRet";
|
|
|
|
let RowFields = ["NoRetOp"];
|
|
|
|
let ColFields = ["IsRet"];
|
|
|
|
let KeyCol = ["0"];
|
|
|
|
let ValueCols = [["1"]];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Maps an atomic opcode to its returnless version.
|
|
|
|
def getAtomicNoRetOp : InstrMapping {
|
|
|
|
let FilterClass = "AtomicNoRet";
|
|
|
|
let RowFields = ["NoRetOp"];
|
|
|
|
let ColFields = ["IsRet"];
|
|
|
|
let KeyCol = ["1"];
|
|
|
|
let ValueCols = [["0"]];
|
|
|
|
}
|
|
|
|
|
2018-11-16 09:13:34 +08:00
|
|
|
// Maps a GLOBAL to its SADDR form.
|
|
|
|
def getGlobalSaddrOp : InstrMapping {
|
|
|
|
let FilterClass = "GlobalSaddrTable";
|
|
|
|
let RowFields = ["SaddrOp"];
|
|
|
|
let ColFields = ["IsSaddr"];
|
|
|
|
let KeyCol = ["0"];
|
|
|
|
let ValueCols = [["1"]];
|
|
|
|
}
|
|
|
|
|
2019-04-27 07:16:16 +08:00
|
|
|
// Maps a v_cmpx opcode with sdst to opcode without sdst.
|
|
|
|
def getVCMPXNoSDstOp : InstrMapping {
|
|
|
|
let FilterClass = "VCMPXNoSDstTable";
|
|
|
|
let RowFields = ["NoSDstOp"];
|
|
|
|
let ColFields = ["HasSDst"];
|
|
|
|
let KeyCol = ["1"];
|
|
|
|
let ValueCols = [["0"]];
|
|
|
|
}
|
|
|
|
|
2019-06-27 01:34:57 +08:00
|
|
|
// Maps a SOPP to a SOPP with S_NOP
|
|
|
|
def getSOPPWithRelaxation : InstrMapping {
|
|
|
|
let FilterClass = "Base_SOPP";
|
|
|
|
let RowFields = ["AsmString"];
|
|
|
|
let ColFields = ["Size"];
|
|
|
|
let KeyCol = ["4"];
|
|
|
|
let ValueCols = [["8"]];
|
|
|
|
}
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
include "SIInstructions.td"
|
2016-08-01 22:21:30 +08:00
|
|
|
|
|
|
|
include "DSInstructions.td"
|
2016-09-02 01:54:54 +08:00
|
|
|
include "MIMGInstructions.td"
|